xref: /freebsd/contrib/llvm-project/llvm/lib/Target/X86/X86InstrAVX512.td (revision a7dea1671b87c07d2d266f836bfa8b58efc7c134)
1//===-- X86InstrAVX512.td - AVX512 Instruction Set ---------*- tablegen -*-===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file describes the X86 AVX512 instruction set, defining the
10// instructions, and properties of the instructions which are needed for code
11// generation, machine code emission, and analysis.
12//
13//===----------------------------------------------------------------------===//
14
15// Group template arguments that can be derived from the vector type (EltNum x
16// EltVT).  These are things like the register class for the writemask, etc.
17// The idea is to pass one of these as the template argument rather than the
18// individual arguments.
19// The template is also used for scalar types, in this case numelts is 1.
20class X86VectorVTInfo<int numelts, ValueType eltvt, RegisterClass rc,
21                      string suffix = ""> {
22  RegisterClass RC = rc;
23  ValueType EltVT = eltvt;
24  int NumElts = numelts;
25
26  // Corresponding mask register class.
27  RegisterClass KRC = !cast<RegisterClass>("VK" # NumElts);
28
29  // Corresponding mask register pair class.
30  RegisterOperand KRPC = !if (!gt(NumElts, 16), ?,
31                              !cast<RegisterOperand>("VK" # NumElts # "Pair"));
32
33  // Corresponding write-mask register class.
34  RegisterClass KRCWM = !cast<RegisterClass>("VK" # NumElts # "WM");
35
36  // The mask VT.
37  ValueType KVT = !cast<ValueType>("v" # NumElts # "i1");
38
39  // Suffix used in the instruction mnemonic.
40  string Suffix = suffix;
41
42  // VTName is a string name for vector VT. For vector types it will be
43  // v # NumElts # EltVT, so for vector of 8 elements of i32 it will be v8i32
44  // It is a little bit complex for scalar types, where NumElts = 1.
45  // In this case we build v4f32 or v2f64
46  string VTName = "v" # !if (!eq (NumElts, 1),
47                        !if (!eq (EltVT.Size, 32), 4,
48                        !if (!eq (EltVT.Size, 64), 2, NumElts)), NumElts) # EltVT;
49
50  // The vector VT.
51  ValueType VT = !cast<ValueType>(VTName);
52
53  string EltTypeName = !cast<string>(EltVT);
54  // Size of the element type in bits, e.g. 32 for v16i32.
55  string EltSizeName = !subst("i", "", !subst("f", "", EltTypeName));
56  int EltSize = EltVT.Size;
57
58  // "i" for integer types and "f" for floating-point types
59  string TypeVariantName = !subst(EltSizeName, "", EltTypeName);
60
61  // Size of RC in bits, e.g. 512 for VR512.
62  int Size = VT.Size;
63
64  // The corresponding memory operand, e.g. i512mem for VR512.
65  X86MemOperand MemOp = !cast<X86MemOperand>(TypeVariantName # Size # "mem");
66  X86MemOperand ScalarMemOp = !cast<X86MemOperand>(EltVT # "mem");
67  // FP scalar memory operand for intrinsics - ssmem/sdmem.
68  Operand IntScalarMemOp = !if (!eq (EltTypeName, "f32"), !cast<Operand>("ssmem"),
69                           !if (!eq (EltTypeName, "f64"), !cast<Operand>("sdmem"), ?));
70
71  // Load patterns
72  PatFrag LdFrag = !cast<PatFrag>("load" # VTName);
73
74  PatFrag AlignedLdFrag = !cast<PatFrag>("alignedload" # VTName);
75
76  PatFrag ScalarLdFrag = !cast<PatFrag>("load" # EltVT);
77  PatFrag BroadcastLdFrag = !cast<PatFrag>("X86VBroadcastld" # EltSizeName);
78
79  ComplexPattern ScalarIntMemCPat = !if (!eq (EltTypeName, "f32"),
80                                          !cast<ComplexPattern>("sse_load_f32"),
81                                    !if (!eq (EltTypeName, "f64"),
82                                          !cast<ComplexPattern>("sse_load_f64"),
83                                    ?));
84
85  // The string to specify embedded broadcast in assembly.
86  string BroadcastStr = "{1to" # NumElts # "}";
87
88  // 8-bit compressed displacement tuple/subvector format.  This is only
89  // defined for NumElts <= 8.
90  CD8VForm CD8TupleForm = !if (!eq (!srl(NumElts, 4), 0),
91                               !cast<CD8VForm>("CD8VT" # NumElts), ?);
92
93  SubRegIndex SubRegIdx = !if (!eq (Size, 128), sub_xmm,
94                          !if (!eq (Size, 256), sub_ymm, ?));
95
96  Domain ExeDomain = !if (!eq (EltTypeName, "f32"), SSEPackedSingle,
97                     !if (!eq (EltTypeName, "f64"), SSEPackedDouble,
98                     SSEPackedInt));
99
100  RegisterClass FRC = !if (!eq (EltTypeName, "f32"), FR32X, FR64X);
101
102  dag ImmAllZerosV = (VT immAllZerosV);
103
104  string ZSuffix = !if (!eq (Size, 128), "Z128",
105                   !if (!eq (Size, 256), "Z256", "Z"));
106}
107
108def v64i8_info  : X86VectorVTInfo<64,  i8, VR512, "b">;
109def v32i16_info : X86VectorVTInfo<32, i16, VR512, "w">;
110def v16i32_info : X86VectorVTInfo<16, i32, VR512, "d">;
111def v8i64_info  : X86VectorVTInfo<8,  i64, VR512, "q">;
112def v16f32_info : X86VectorVTInfo<16, f32, VR512, "ps">;
113def v8f64_info  : X86VectorVTInfo<8,  f64, VR512, "pd">;
114
115// "x" in v32i8x_info means RC = VR256X
116def v32i8x_info  : X86VectorVTInfo<32,  i8, VR256X, "b">;
117def v16i16x_info : X86VectorVTInfo<16, i16, VR256X, "w">;
118def v8i32x_info  : X86VectorVTInfo<8,  i32, VR256X, "d">;
119def v4i64x_info  : X86VectorVTInfo<4,  i64, VR256X, "q">;
120def v8f32x_info  : X86VectorVTInfo<8,  f32, VR256X, "ps">;
121def v4f64x_info  : X86VectorVTInfo<4,  f64, VR256X, "pd">;
122
123def v16i8x_info  : X86VectorVTInfo<16,  i8, VR128X, "b">;
124def v8i16x_info  : X86VectorVTInfo<8,  i16, VR128X, "w">;
125def v4i32x_info  : X86VectorVTInfo<4,  i32, VR128X, "d">;
126def v2i64x_info  : X86VectorVTInfo<2,  i64, VR128X, "q">;
127def v4f32x_info  : X86VectorVTInfo<4,  f32, VR128X, "ps">;
128def v2f64x_info  : X86VectorVTInfo<2,  f64, VR128X, "pd">;
129
130// We map scalar types to the smallest (128-bit) vector type
131// with the appropriate element type. This allows to use the same masking logic.
132def i32x_info    : X86VectorVTInfo<1,  i32, GR32, "si">;
133def i64x_info    : X86VectorVTInfo<1,  i64, GR64, "sq">;
134def f32x_info    : X86VectorVTInfo<1,  f32, VR128X, "ss">;
135def f64x_info    : X86VectorVTInfo<1,  f64, VR128X, "sd">;
136
137class AVX512VLVectorVTInfo<X86VectorVTInfo i512, X86VectorVTInfo i256,
138                           X86VectorVTInfo i128> {
139  X86VectorVTInfo info512 = i512;
140  X86VectorVTInfo info256 = i256;
141  X86VectorVTInfo info128 = i128;
142}
143
144def avx512vl_i8_info  : AVX512VLVectorVTInfo<v64i8_info, v32i8x_info,
145                                             v16i8x_info>;
146def avx512vl_i16_info : AVX512VLVectorVTInfo<v32i16_info, v16i16x_info,
147                                             v8i16x_info>;
148def avx512vl_i32_info : AVX512VLVectorVTInfo<v16i32_info, v8i32x_info,
149                                             v4i32x_info>;
150def avx512vl_i64_info : AVX512VLVectorVTInfo<v8i64_info, v4i64x_info,
151                                             v2i64x_info>;
152def avx512vl_f32_info : AVX512VLVectorVTInfo<v16f32_info, v8f32x_info,
153                                             v4f32x_info>;
154def avx512vl_f64_info : AVX512VLVectorVTInfo<v8f64_info, v4f64x_info,
155                                             v2f64x_info>;
156
157class X86KVectorVTInfo<RegisterClass _krc, RegisterClass _krcwm,
158                       ValueType _vt> {
159  RegisterClass KRC = _krc;
160  RegisterClass KRCWM = _krcwm;
161  ValueType KVT = _vt;
162}
163
164def v1i1_info : X86KVectorVTInfo<VK1, VK1WM, v1i1>;
165def v2i1_info : X86KVectorVTInfo<VK2, VK2WM, v2i1>;
166def v4i1_info : X86KVectorVTInfo<VK4, VK4WM, v4i1>;
167def v8i1_info : X86KVectorVTInfo<VK8, VK8WM, v8i1>;
168def v16i1_info : X86KVectorVTInfo<VK16, VK16WM, v16i1>;
169def v32i1_info : X86KVectorVTInfo<VK32, VK32WM, v32i1>;
170def v64i1_info : X86KVectorVTInfo<VK64, VK64WM, v64i1>;
171
172// This multiclass generates the masking variants from the non-masking
173// variant.  It only provides the assembly pieces for the masking variants.
174// It assumes custom ISel patterns for masking which can be provided as
175// template arguments.
176multiclass AVX512_maskable_custom<bits<8> O, Format F,
177                                  dag Outs,
178                                  dag Ins, dag MaskingIns, dag ZeroMaskingIns,
179                                  string OpcodeStr,
180                                  string AttSrcAsm, string IntelSrcAsm,
181                                  list<dag> Pattern,
182                                  list<dag> MaskingPattern,
183                                  list<dag> ZeroMaskingPattern,
184                                  string MaskingConstraint = "",
185                                  bit IsCommutable = 0,
186                                  bit IsKCommutable = 0,
187                                  bit IsKZCommutable = IsCommutable> {
188  let isCommutable = IsCommutable in
189    def NAME: AVX512<O, F, Outs, Ins,
190                       OpcodeStr#"\t{"#AttSrcAsm#", $dst|"#
191                                     "$dst, "#IntelSrcAsm#"}",
192                       Pattern>;
193
194  // Prefer over VMOV*rrk Pat<>
195  let isCommutable = IsKCommutable in
196    def NAME#k: AVX512<O, F, Outs, MaskingIns,
197                       OpcodeStr#"\t{"#AttSrcAsm#", $dst {${mask}}|"#
198                                     "$dst {${mask}}, "#IntelSrcAsm#"}",
199                       MaskingPattern>,
200              EVEX_K {
201      // In case of the 3src subclass this is overridden with a let.
202      string Constraints = MaskingConstraint;
203    }
204
205  // Zero mask does not add any restrictions to commute operands transformation.
206  // So, it is Ok to use IsCommutable instead of IsKCommutable.
207  let isCommutable = IsKZCommutable in // Prefer over VMOV*rrkz Pat<>
208    def NAME#kz: AVX512<O, F, Outs, ZeroMaskingIns,
209                       OpcodeStr#"\t{"#AttSrcAsm#", $dst {${mask}} {z}|"#
210                                     "$dst {${mask}} {z}, "#IntelSrcAsm#"}",
211                       ZeroMaskingPattern>,
212              EVEX_KZ;
213}
214
215
216// Common base class of AVX512_maskable and AVX512_maskable_3src.
217multiclass AVX512_maskable_common<bits<8> O, Format F, X86VectorVTInfo _,
218                                  dag Outs,
219                                  dag Ins, dag MaskingIns, dag ZeroMaskingIns,
220                                  string OpcodeStr,
221                                  string AttSrcAsm, string IntelSrcAsm,
222                                  dag RHS, dag MaskingRHS,
223                                  SDNode Select = vselect,
224                                  string MaskingConstraint = "",
225                                  bit IsCommutable = 0,
226                                  bit IsKCommutable = 0,
227                                  bit IsKZCommutable = IsCommutable> :
228  AVX512_maskable_custom<O, F, Outs, Ins, MaskingIns, ZeroMaskingIns, OpcodeStr,
229                         AttSrcAsm, IntelSrcAsm,
230                         [(set _.RC:$dst, RHS)],
231                         [(set _.RC:$dst, MaskingRHS)],
232                         [(set _.RC:$dst,
233                               (Select _.KRCWM:$mask, RHS, _.ImmAllZerosV))],
234                         MaskingConstraint, IsCommutable,
235                         IsKCommutable, IsKZCommutable>;
236
237// This multiclass generates the unconditional/non-masking, the masking and
238// the zero-masking variant of the vector instruction.  In the masking case, the
239// perserved vector elements come from a new dummy input operand tied to $dst.
240// This version uses a separate dag for non-masking and masking.
241multiclass AVX512_maskable_split<bits<8> O, Format F, X86VectorVTInfo _,
242                           dag Outs, dag Ins, string OpcodeStr,
243                           string AttSrcAsm, string IntelSrcAsm,
244                           dag RHS, dag MaskRHS,
245                           bit IsCommutable = 0, bit IsKCommutable = 0,
246                           SDNode Select = vselect> :
247   AVX512_maskable_custom<O, F, Outs, Ins,
248                          !con((ins _.RC:$src0, _.KRCWM:$mask), Ins),
249                          !con((ins _.KRCWM:$mask), Ins),
250                          OpcodeStr, AttSrcAsm, IntelSrcAsm,
251                          [(set _.RC:$dst, RHS)],
252                          [(set _.RC:$dst,
253                              (Select _.KRCWM:$mask, MaskRHS, _.RC:$src0))],
254                          [(set _.RC:$dst,
255                              (Select _.KRCWM:$mask, MaskRHS, _.ImmAllZerosV))],
256                          "$src0 = $dst", IsCommutable, IsKCommutable>;
257
258// This multiclass generates the unconditional/non-masking, the masking and
259// the zero-masking variant of the vector instruction.  In the masking case, the
260// perserved vector elements come from a new dummy input operand tied to $dst.
261multiclass AVX512_maskable<bits<8> O, Format F, X86VectorVTInfo _,
262                           dag Outs, dag Ins, string OpcodeStr,
263                           string AttSrcAsm, string IntelSrcAsm,
264                           dag RHS,
265                           bit IsCommutable = 0, bit IsKCommutable = 0,
266                           bit IsKZCommutable = IsCommutable,
267                           SDNode Select = vselect> :
268   AVX512_maskable_common<O, F, _, Outs, Ins,
269                          !con((ins _.RC:$src0, _.KRCWM:$mask), Ins),
270                          !con((ins _.KRCWM:$mask), Ins),
271                          OpcodeStr, AttSrcAsm, IntelSrcAsm, RHS,
272                          (Select _.KRCWM:$mask, RHS, _.RC:$src0),
273                          Select, "$src0 = $dst", IsCommutable, IsKCommutable,
274                          IsKZCommutable>;
275
276// This multiclass generates the unconditional/non-masking, the masking and
277// the zero-masking variant of the scalar instruction.
278multiclass AVX512_maskable_scalar<bits<8> O, Format F, X86VectorVTInfo _,
279                           dag Outs, dag Ins, string OpcodeStr,
280                           string AttSrcAsm, string IntelSrcAsm,
281                           dag RHS> :
282   AVX512_maskable<O, F, _, Outs, Ins, OpcodeStr, AttSrcAsm, IntelSrcAsm,
283                   RHS, 0, 0, 0, X86selects>;
284
285// Similar to AVX512_maskable but in this case one of the source operands
286// ($src1) is already tied to $dst so we just use that for the preserved
287// vector elements.  NOTE that the NonTiedIns (the ins dag) should exclude
288// $src1.
289multiclass AVX512_maskable_3src<bits<8> O, Format F, X86VectorVTInfo _,
290                                dag Outs, dag NonTiedIns, string OpcodeStr,
291                                string AttSrcAsm, string IntelSrcAsm,
292                                dag RHS,
293                                bit IsCommutable = 0,
294                                bit IsKCommutable = 0,
295                                SDNode Select = vselect,
296                                bit MaskOnly = 0> :
297   AVX512_maskable_common<O, F, _, Outs,
298                          !con((ins _.RC:$src1), NonTiedIns),
299                          !con((ins _.RC:$src1, _.KRCWM:$mask), NonTiedIns),
300                          !con((ins _.RC:$src1, _.KRCWM:$mask), NonTiedIns),
301                          OpcodeStr, AttSrcAsm, IntelSrcAsm,
302                          !if(MaskOnly, (null_frag), RHS),
303                          (Select _.KRCWM:$mask, RHS, _.RC:$src1),
304                          Select, "", IsCommutable, IsKCommutable>;
305
306// Similar to AVX512_maskable_3src but in this case the input VT for the tied
307// operand differs from the output VT. This requires a bitconvert on
308// the preserved vector going into the vselect.
309// NOTE: The unmasked pattern is disabled.
310multiclass AVX512_maskable_3src_cast<bits<8> O, Format F, X86VectorVTInfo OutVT,
311                                     X86VectorVTInfo InVT,
312                                     dag Outs, dag NonTiedIns, string OpcodeStr,
313                                     string AttSrcAsm, string IntelSrcAsm,
314                                     dag RHS, bit IsCommutable = 0> :
315   AVX512_maskable_common<O, F, OutVT, Outs,
316                          !con((ins InVT.RC:$src1), NonTiedIns),
317                          !con((ins InVT.RC:$src1, InVT.KRCWM:$mask), NonTiedIns),
318                          !con((ins InVT.RC:$src1, InVT.KRCWM:$mask), NonTiedIns),
319                          OpcodeStr, AttSrcAsm, IntelSrcAsm, (null_frag),
320                          (vselect InVT.KRCWM:$mask, RHS,
321                           (bitconvert InVT.RC:$src1)),
322                           vselect, "", IsCommutable>;
323
324multiclass AVX512_maskable_3src_scalar<bits<8> O, Format F, X86VectorVTInfo _,
325                                     dag Outs, dag NonTiedIns, string OpcodeStr,
326                                     string AttSrcAsm, string IntelSrcAsm,
327                                     dag RHS,
328                                     bit IsCommutable = 0,
329                                     bit IsKCommutable = 0,
330                                     bit MaskOnly = 0> :
331   AVX512_maskable_3src<O, F, _, Outs, NonTiedIns, OpcodeStr, AttSrcAsm,
332                        IntelSrcAsm, RHS, IsCommutable, IsKCommutable,
333                        X86selects, MaskOnly>;
334
335multiclass AVX512_maskable_in_asm<bits<8> O, Format F, X86VectorVTInfo _,
336                                  dag Outs, dag Ins,
337                                  string OpcodeStr,
338                                  string AttSrcAsm, string IntelSrcAsm,
339                                  list<dag> Pattern> :
340   AVX512_maskable_custom<O, F, Outs, Ins,
341                          !con((ins _.RC:$src0, _.KRCWM:$mask), Ins),
342                          !con((ins _.KRCWM:$mask), Ins),
343                          OpcodeStr, AttSrcAsm, IntelSrcAsm, Pattern, [], [],
344                          "$src0 = $dst">;
345
346multiclass AVX512_maskable_3src_in_asm<bits<8> O, Format F, X86VectorVTInfo _,
347                                       dag Outs, dag NonTiedIns,
348                                       string OpcodeStr,
349                                       string AttSrcAsm, string IntelSrcAsm,
350                                       list<dag> Pattern> :
351   AVX512_maskable_custom<O, F, Outs,
352                          !con((ins _.RC:$src1), NonTiedIns),
353                          !con((ins _.RC:$src1, _.KRCWM:$mask), NonTiedIns),
354                          !con((ins _.RC:$src1, _.KRCWM:$mask), NonTiedIns),
355                          OpcodeStr, AttSrcAsm, IntelSrcAsm, Pattern, [], [],
356                          "">;
357
358// Instruction with mask that puts result in mask register,
359// like "compare" and "vptest"
360multiclass AVX512_maskable_custom_cmp<bits<8> O, Format F,
361                                  dag Outs,
362                                  dag Ins, dag MaskingIns,
363                                  string OpcodeStr,
364                                  string AttSrcAsm, string IntelSrcAsm,
365                                  list<dag> Pattern,
366                                  list<dag> MaskingPattern,
367                                  bit IsCommutable = 0> {
368    let isCommutable = IsCommutable in {
369    def NAME: AVX512<O, F, Outs, Ins,
370                       OpcodeStr#"\t{"#AttSrcAsm#", $dst|"#
371                                     "$dst, "#IntelSrcAsm#"}",
372                       Pattern>;
373
374    def NAME#k: AVX512<O, F, Outs, MaskingIns,
375                       OpcodeStr#"\t{"#AttSrcAsm#", $dst {${mask}}|"#
376                                     "$dst {${mask}}, "#IntelSrcAsm#"}",
377                       MaskingPattern>, EVEX_K;
378    }
379}
380
381multiclass AVX512_maskable_common_cmp<bits<8> O, Format F, X86VectorVTInfo _,
382                                  dag Outs,
383                                  dag Ins, dag MaskingIns,
384                                  string OpcodeStr,
385                                  string AttSrcAsm, string IntelSrcAsm,
386                                  dag RHS, dag MaskingRHS,
387                                  bit IsCommutable = 0> :
388  AVX512_maskable_custom_cmp<O, F, Outs, Ins, MaskingIns, OpcodeStr,
389                         AttSrcAsm, IntelSrcAsm,
390                         [(set _.KRC:$dst, RHS)],
391                         [(set _.KRC:$dst, MaskingRHS)], IsCommutable>;
392
393multiclass AVX512_maskable_cmp<bits<8> O, Format F, X86VectorVTInfo _,
394                           dag Outs, dag Ins, string OpcodeStr,
395                           string AttSrcAsm, string IntelSrcAsm,
396                           dag RHS, dag RHS_su, bit IsCommutable = 0> :
397   AVX512_maskable_common_cmp<O, F, _, Outs, Ins,
398                          !con((ins _.KRCWM:$mask), Ins),
399                          OpcodeStr, AttSrcAsm, IntelSrcAsm, RHS,
400                          (and _.KRCWM:$mask, RHS_su), IsCommutable>;
401
402
403// Alias instruction that maps zero vector to pxor / xorp* for AVX-512.
404// This is expanded by ExpandPostRAPseudos to an xorps / vxorps, and then
405// swizzled by ExecutionDomainFix to pxor.
406// We set canFoldAsLoad because this can be converted to a constant-pool
407// load of an all-zeros value if folding it would be beneficial.
408let isReMaterializable = 1, isAsCheapAsAMove = 1, canFoldAsLoad = 1,
409    isPseudo = 1, Predicates = [HasAVX512], SchedRW = [WriteZero] in {
410def AVX512_512_SET0 : I<0, Pseudo, (outs VR512:$dst), (ins), "",
411               [(set VR512:$dst, (v16i32 immAllZerosV))]>;
412def AVX512_512_SETALLONES : I<0, Pseudo, (outs VR512:$dst), (ins), "",
413               [(set VR512:$dst, (v16i32 immAllOnesV))]>;
414}
415
416let Predicates = [HasAVX512] in {
417def : Pat<(v64i8 immAllZerosV), (AVX512_512_SET0)>;
418def : Pat<(v32i16 immAllZerosV), (AVX512_512_SET0)>;
419def : Pat<(v8i64 immAllZerosV), (AVX512_512_SET0)>;
420def : Pat<(v16f32 immAllZerosV), (AVX512_512_SET0)>;
421def : Pat<(v8f64 immAllZerosV), (AVX512_512_SET0)>;
422}
423
424// Alias instructions that allow VPTERNLOG to be used with a mask to create
425// a mix of all ones and all zeros elements. This is done this way to force
426// the same register to be used as input for all three sources.
427let isPseudo = 1, Predicates = [HasAVX512], SchedRW = [WriteVecALU] in {
428def AVX512_512_SEXT_MASK_32 : I<0, Pseudo, (outs VR512:$dst),
429                                (ins VK16WM:$mask), "",
430                           [(set VR512:$dst, (vselect (v16i1 VK16WM:$mask),
431                                                      (v16i32 immAllOnesV),
432                                                      (v16i32 immAllZerosV)))]>;
433def AVX512_512_SEXT_MASK_64 : I<0, Pseudo, (outs VR512:$dst),
434                                (ins VK8WM:$mask), "",
435                [(set VR512:$dst, (vselect (v8i1 VK8WM:$mask),
436                                           (v8i64 immAllOnesV),
437                                           (v8i64 immAllZerosV)))]>;
438}
439
440let isReMaterializable = 1, isAsCheapAsAMove = 1, canFoldAsLoad = 1,
441    isPseudo = 1, Predicates = [HasAVX512], SchedRW = [WriteZero] in {
442def AVX512_128_SET0 : I<0, Pseudo, (outs VR128X:$dst), (ins), "",
443               [(set VR128X:$dst, (v4i32 immAllZerosV))]>;
444def AVX512_256_SET0 : I<0, Pseudo, (outs VR256X:$dst), (ins), "",
445               [(set VR256X:$dst, (v8i32 immAllZerosV))]>;
446}
447
448let Predicates = [HasAVX512] in {
449def : Pat<(v8i16 immAllZerosV), (AVX512_128_SET0)>;
450def : Pat<(v16i8 immAllZerosV), (AVX512_128_SET0)>;
451def : Pat<(v2i64 immAllZerosV), (AVX512_128_SET0)>;
452def : Pat<(v4f32 immAllZerosV), (AVX512_128_SET0)>;
453def : Pat<(v2f64 immAllZerosV), (AVX512_128_SET0)>;
454def : Pat<(v32i8 immAllZerosV), (AVX512_256_SET0)>;
455def : Pat<(v16i16 immAllZerosV), (AVX512_256_SET0)>;
456def : Pat<(v4i64 immAllZerosV), (AVX512_256_SET0)>;
457def : Pat<(v8f32 immAllZerosV), (AVX512_256_SET0)>;
458def : Pat<(v4f64 immAllZerosV), (AVX512_256_SET0)>;
459}
460
461// Alias instructions that map fld0 to xorps for sse or vxorps for avx.
462// This is expanded by ExpandPostRAPseudos.
463let isReMaterializable = 1, isAsCheapAsAMove = 1, canFoldAsLoad = 1,
464    isPseudo = 1, SchedRW = [WriteZero], Predicates = [HasAVX512] in {
465  def AVX512_FsFLD0SS : I<0, Pseudo, (outs FR32X:$dst), (ins), "",
466                          [(set FR32X:$dst, fp32imm0)]>;
467  def AVX512_FsFLD0SD : I<0, Pseudo, (outs FR64X:$dst), (ins), "",
468                          [(set FR64X:$dst, fp64imm0)]>;
469  def AVX512_FsFLD0F128 : I<0, Pseudo, (outs VR128X:$dst), (ins), "",
470                            [(set VR128X:$dst, fp128imm0)]>;
471}
472
473//===----------------------------------------------------------------------===//
474// AVX-512 - VECTOR INSERT
475//
476
477// Supports two different pattern operators for mask and unmasked ops. Allows
478// null_frag to be passed for one.
479multiclass vinsert_for_size_split<int Opcode, X86VectorVTInfo From,
480                                  X86VectorVTInfo To,
481                                  SDPatternOperator vinsert_insert,
482                                  SDPatternOperator vinsert_for_mask,
483                                  X86FoldableSchedWrite sched> {
484  let hasSideEffects = 0, ExeDomain = To.ExeDomain in {
485    defm rr : AVX512_maskable_split<Opcode, MRMSrcReg, To, (outs To.RC:$dst),
486                   (ins To.RC:$src1, From.RC:$src2, u8imm:$src3),
487                   "vinsert" # From.EltTypeName # "x" # From.NumElts,
488                   "$src3, $src2, $src1", "$src1, $src2, $src3",
489                   (vinsert_insert:$src3 (To.VT To.RC:$src1),
490                                         (From.VT From.RC:$src2),
491                                         (iPTR imm)),
492                   (vinsert_for_mask:$src3 (To.VT To.RC:$src1),
493                                           (From.VT From.RC:$src2),
494                                           (iPTR imm))>,
495                   AVX512AIi8Base, EVEX_4V, Sched<[sched]>;
496    let mayLoad = 1 in
497    defm rm : AVX512_maskable_split<Opcode, MRMSrcMem, To, (outs To.RC:$dst),
498                   (ins To.RC:$src1, From.MemOp:$src2, u8imm:$src3),
499                   "vinsert" # From.EltTypeName # "x" # From.NumElts,
500                   "$src3, $src2, $src1", "$src1, $src2, $src3",
501                   (vinsert_insert:$src3 (To.VT To.RC:$src1),
502                               (From.VT (From.LdFrag addr:$src2)),
503                               (iPTR imm)),
504                   (vinsert_for_mask:$src3 (To.VT To.RC:$src1),
505                               (From.VT (From.LdFrag addr:$src2)),
506                               (iPTR imm))>, AVX512AIi8Base, EVEX_4V,
507                   EVEX_CD8<From.EltSize, From.CD8TupleForm>,
508                   Sched<[sched.Folded, sched.ReadAfterFold]>;
509  }
510}
511
512// Passes the same pattern operator for masked and unmasked ops.
513multiclass vinsert_for_size<int Opcode, X86VectorVTInfo From,
514                            X86VectorVTInfo To,
515                            SDPatternOperator vinsert_insert,
516                            X86FoldableSchedWrite sched> :
517  vinsert_for_size_split<Opcode, From, To, vinsert_insert, vinsert_insert, sched>;
518
519multiclass vinsert_for_size_lowering<string InstrStr, X86VectorVTInfo From,
520                       X86VectorVTInfo To, PatFrag vinsert_insert,
521                       SDNodeXForm INSERT_get_vinsert_imm , list<Predicate> p> {
522  let Predicates = p in {
523    def : Pat<(vinsert_insert:$ins
524                     (To.VT To.RC:$src1), (From.VT From.RC:$src2), (iPTR imm)),
525              (To.VT (!cast<Instruction>(InstrStr#"rr")
526                     To.RC:$src1, From.RC:$src2,
527                     (INSERT_get_vinsert_imm To.RC:$ins)))>;
528
529    def : Pat<(vinsert_insert:$ins
530                  (To.VT To.RC:$src1),
531                  (From.VT (From.LdFrag addr:$src2)),
532                  (iPTR imm)),
533              (To.VT (!cast<Instruction>(InstrStr#"rm")
534                  To.RC:$src1, addr:$src2,
535                  (INSERT_get_vinsert_imm To.RC:$ins)))>;
536  }
537}
538
539multiclass vinsert_for_type<ValueType EltVT32, int Opcode128,
540                            ValueType EltVT64, int Opcode256,
541                            X86FoldableSchedWrite sched> {
542
543  let Predicates = [HasVLX] in
544    defm NAME # "32x4Z256" : vinsert_for_size<Opcode128,
545                                 X86VectorVTInfo< 4, EltVT32, VR128X>,
546                                 X86VectorVTInfo< 8, EltVT32, VR256X>,
547                                 vinsert128_insert, sched>, EVEX_V256;
548
549  defm NAME # "32x4Z" : vinsert_for_size<Opcode128,
550                                 X86VectorVTInfo< 4, EltVT32, VR128X>,
551                                 X86VectorVTInfo<16, EltVT32, VR512>,
552                                 vinsert128_insert, sched>, EVEX_V512;
553
554  defm NAME # "64x4Z" : vinsert_for_size<Opcode256,
555                                 X86VectorVTInfo< 4, EltVT64, VR256X>,
556                                 X86VectorVTInfo< 8, EltVT64, VR512>,
557                                 vinsert256_insert, sched>, VEX_W, EVEX_V512;
558
559  // Even with DQI we'd like to only use these instructions for masking.
560  let Predicates = [HasVLX, HasDQI] in
561    defm NAME # "64x2Z256" : vinsert_for_size_split<Opcode128,
562                                   X86VectorVTInfo< 2, EltVT64, VR128X>,
563                                   X86VectorVTInfo< 4, EltVT64, VR256X>,
564                                   null_frag, vinsert128_insert, sched>,
565                                   VEX_W1X, EVEX_V256;
566
567  // Even with DQI we'd like to only use these instructions for masking.
568  let Predicates = [HasDQI] in {
569    defm NAME # "64x2Z" : vinsert_for_size_split<Opcode128,
570                                 X86VectorVTInfo< 2, EltVT64, VR128X>,
571                                 X86VectorVTInfo< 8, EltVT64, VR512>,
572                                 null_frag, vinsert128_insert, sched>,
573                                 VEX_W, EVEX_V512;
574
575    defm NAME # "32x8Z" : vinsert_for_size_split<Opcode256,
576                                   X86VectorVTInfo< 8, EltVT32, VR256X>,
577                                   X86VectorVTInfo<16, EltVT32, VR512>,
578                                   null_frag, vinsert256_insert, sched>,
579                                   EVEX_V512;
580  }
581}
582
583// FIXME: Is there a better scheduler class for VINSERTF/VINSERTI?
584defm VINSERTF : vinsert_for_type<f32, 0x18, f64, 0x1a, WriteFShuffle256>;
585defm VINSERTI : vinsert_for_type<i32, 0x38, i64, 0x3a, WriteShuffle256>;
586
587// Codegen pattern with the alternative types,
588// Even with AVX512DQ we'll still use these for unmasked operations.
589defm : vinsert_for_size_lowering<"VINSERTF32x4Z256", v2f64x_info, v4f64x_info,
590              vinsert128_insert, INSERT_get_vinsert128_imm, [HasVLX]>;
591defm : vinsert_for_size_lowering<"VINSERTI32x4Z256", v2i64x_info, v4i64x_info,
592              vinsert128_insert, INSERT_get_vinsert128_imm, [HasVLX]>;
593
594defm : vinsert_for_size_lowering<"VINSERTF32x4Z", v2f64x_info, v8f64_info,
595              vinsert128_insert, INSERT_get_vinsert128_imm, [HasAVX512]>;
596defm : vinsert_for_size_lowering<"VINSERTI32x4Z", v2i64x_info, v8i64_info,
597              vinsert128_insert, INSERT_get_vinsert128_imm, [HasAVX512]>;
598
599defm : vinsert_for_size_lowering<"VINSERTF64x4Z", v8f32x_info, v16f32_info,
600              vinsert256_insert, INSERT_get_vinsert256_imm, [HasAVX512]>;
601defm : vinsert_for_size_lowering<"VINSERTI64x4Z", v8i32x_info, v16i32_info,
602              vinsert256_insert, INSERT_get_vinsert256_imm, [HasAVX512]>;
603
604// Codegen pattern with the alternative types insert VEC128 into VEC256
605defm : vinsert_for_size_lowering<"VINSERTI32x4Z256", v8i16x_info, v16i16x_info,
606              vinsert128_insert, INSERT_get_vinsert128_imm, [HasVLX]>;
607defm : vinsert_for_size_lowering<"VINSERTI32x4Z256", v16i8x_info, v32i8x_info,
608              vinsert128_insert, INSERT_get_vinsert128_imm, [HasVLX]>;
609// Codegen pattern with the alternative types insert VEC128 into VEC512
610defm : vinsert_for_size_lowering<"VINSERTI32x4Z", v8i16x_info, v32i16_info,
611              vinsert128_insert, INSERT_get_vinsert128_imm, [HasAVX512]>;
612defm : vinsert_for_size_lowering<"VINSERTI32x4Z", v16i8x_info, v64i8_info,
613               vinsert128_insert, INSERT_get_vinsert128_imm, [HasAVX512]>;
614// Codegen pattern with the alternative types insert VEC256 into VEC512
615defm : vinsert_for_size_lowering<"VINSERTI64x4Z", v16i16x_info, v32i16_info,
616              vinsert256_insert, INSERT_get_vinsert256_imm, [HasAVX512]>;
617defm : vinsert_for_size_lowering<"VINSERTI64x4Z", v32i8x_info, v64i8_info,
618              vinsert256_insert, INSERT_get_vinsert256_imm, [HasAVX512]>;
619
620
621multiclass vinsert_for_mask_cast<string InstrStr, X86VectorVTInfo From,
622                                 X86VectorVTInfo To, X86VectorVTInfo Cast,
623                                 PatFrag vinsert_insert,
624                                 SDNodeXForm INSERT_get_vinsert_imm,
625                                 list<Predicate> p> {
626let Predicates = p in {
627  def : Pat<(Cast.VT
628             (vselect Cast.KRCWM:$mask,
629                      (bitconvert
630                       (vinsert_insert:$ins (To.VT To.RC:$src1),
631                                            (From.VT From.RC:$src2),
632                                            (iPTR imm))),
633                      Cast.RC:$src0)),
634            (!cast<Instruction>(InstrStr#"rrk")
635             Cast.RC:$src0, Cast.KRCWM:$mask, To.RC:$src1, From.RC:$src2,
636             (INSERT_get_vinsert_imm To.RC:$ins))>;
637  def : Pat<(Cast.VT
638             (vselect Cast.KRCWM:$mask,
639                      (bitconvert
640                       (vinsert_insert:$ins (To.VT To.RC:$src1),
641                                            (From.VT
642                                             (bitconvert
643                                              (From.LdFrag addr:$src2))),
644                                            (iPTR imm))),
645                      Cast.RC:$src0)),
646            (!cast<Instruction>(InstrStr#"rmk")
647             Cast.RC:$src0, Cast.KRCWM:$mask, To.RC:$src1, addr:$src2,
648             (INSERT_get_vinsert_imm To.RC:$ins))>;
649
650  def : Pat<(Cast.VT
651             (vselect Cast.KRCWM:$mask,
652                      (bitconvert
653                       (vinsert_insert:$ins (To.VT To.RC:$src1),
654                                            (From.VT From.RC:$src2),
655                                            (iPTR imm))),
656                      Cast.ImmAllZerosV)),
657            (!cast<Instruction>(InstrStr#"rrkz")
658             Cast.KRCWM:$mask, To.RC:$src1, From.RC:$src2,
659             (INSERT_get_vinsert_imm To.RC:$ins))>;
660  def : Pat<(Cast.VT
661             (vselect Cast.KRCWM:$mask,
662                      (bitconvert
663                       (vinsert_insert:$ins (To.VT To.RC:$src1),
664                                            (From.VT (From.LdFrag addr:$src2)),
665                                            (iPTR imm))),
666                      Cast.ImmAllZerosV)),
667            (!cast<Instruction>(InstrStr#"rmkz")
668             Cast.KRCWM:$mask, To.RC:$src1, addr:$src2,
669             (INSERT_get_vinsert_imm To.RC:$ins))>;
670}
671}
672
673defm : vinsert_for_mask_cast<"VINSERTF32x4Z256", v2f64x_info, v4f64x_info,
674                             v8f32x_info, vinsert128_insert,
675                             INSERT_get_vinsert128_imm, [HasVLX]>;
676defm : vinsert_for_mask_cast<"VINSERTF64x2Z256", v4f32x_info, v8f32x_info,
677                             v4f64x_info, vinsert128_insert,
678                             INSERT_get_vinsert128_imm, [HasDQI, HasVLX]>;
679
680defm : vinsert_for_mask_cast<"VINSERTI32x4Z256", v2i64x_info, v4i64x_info,
681                             v8i32x_info, vinsert128_insert,
682                             INSERT_get_vinsert128_imm, [HasVLX]>;
683defm : vinsert_for_mask_cast<"VINSERTI32x4Z256", v8i16x_info, v16i16x_info,
684                             v8i32x_info, vinsert128_insert,
685                             INSERT_get_vinsert128_imm, [HasVLX]>;
686defm : vinsert_for_mask_cast<"VINSERTI32x4Z256", v16i8x_info, v32i8x_info,
687                             v8i32x_info, vinsert128_insert,
688                             INSERT_get_vinsert128_imm, [HasVLX]>;
689defm : vinsert_for_mask_cast<"VINSERTF64x2Z256", v4i32x_info, v8i32x_info,
690                             v4i64x_info, vinsert128_insert,
691                             INSERT_get_vinsert128_imm, [HasDQI, HasVLX]>;
692defm : vinsert_for_mask_cast<"VINSERTF64x2Z256", v8i16x_info, v16i16x_info,
693                             v4i64x_info, vinsert128_insert,
694                             INSERT_get_vinsert128_imm, [HasDQI, HasVLX]>;
695defm : vinsert_for_mask_cast<"VINSERTF64x2Z256", v16i8x_info, v32i8x_info,
696                             v4i64x_info, vinsert128_insert,
697                             INSERT_get_vinsert128_imm, [HasDQI, HasVLX]>;
698
699defm : vinsert_for_mask_cast<"VINSERTF32x4Z", v2f64x_info, v8f64_info,
700                             v16f32_info, vinsert128_insert,
701                             INSERT_get_vinsert128_imm, [HasAVX512]>;
702defm : vinsert_for_mask_cast<"VINSERTF64x2Z", v4f32x_info, v16f32_info,
703                             v8f64_info, vinsert128_insert,
704                             INSERT_get_vinsert128_imm, [HasDQI]>;
705
706defm : vinsert_for_mask_cast<"VINSERTI32x4Z", v2i64x_info, v8i64_info,
707                             v16i32_info, vinsert128_insert,
708                             INSERT_get_vinsert128_imm, [HasAVX512]>;
709defm : vinsert_for_mask_cast<"VINSERTI32x4Z", v8i16x_info, v32i16_info,
710                             v16i32_info, vinsert128_insert,
711                             INSERT_get_vinsert128_imm, [HasAVX512]>;
712defm : vinsert_for_mask_cast<"VINSERTI32x4Z", v16i8x_info, v64i8_info,
713                             v16i32_info, vinsert128_insert,
714                             INSERT_get_vinsert128_imm, [HasAVX512]>;
715defm : vinsert_for_mask_cast<"VINSERTI64x2Z", v4i32x_info, v16i32_info,
716                             v8i64_info, vinsert128_insert,
717                             INSERT_get_vinsert128_imm, [HasDQI]>;
718defm : vinsert_for_mask_cast<"VINSERTI64x2Z", v8i16x_info, v32i16_info,
719                             v8i64_info, vinsert128_insert,
720                             INSERT_get_vinsert128_imm, [HasDQI]>;
721defm : vinsert_for_mask_cast<"VINSERTI64x2Z", v16i8x_info, v64i8_info,
722                             v8i64_info, vinsert128_insert,
723                             INSERT_get_vinsert128_imm, [HasDQI]>;
724
725defm : vinsert_for_mask_cast<"VINSERTF32x8Z", v4f64x_info, v8f64_info,
726                             v16f32_info, vinsert256_insert,
727                             INSERT_get_vinsert256_imm, [HasDQI]>;
728defm : vinsert_for_mask_cast<"VINSERTF64x4Z", v8f32x_info, v16f32_info,
729                             v8f64_info, vinsert256_insert,
730                             INSERT_get_vinsert256_imm, [HasAVX512]>;
731
732defm : vinsert_for_mask_cast<"VINSERTI32x8Z", v4i64x_info, v8i64_info,
733                             v16i32_info, vinsert256_insert,
734                             INSERT_get_vinsert256_imm, [HasDQI]>;
735defm : vinsert_for_mask_cast<"VINSERTI32x8Z", v16i16x_info, v32i16_info,
736                             v16i32_info, vinsert256_insert,
737                             INSERT_get_vinsert256_imm, [HasDQI]>;
738defm : vinsert_for_mask_cast<"VINSERTI32x8Z", v32i8x_info, v64i8_info,
739                             v16i32_info, vinsert256_insert,
740                             INSERT_get_vinsert256_imm, [HasDQI]>;
741defm : vinsert_for_mask_cast<"VINSERTI64x4Z", v8i32x_info, v16i32_info,
742                             v8i64_info, vinsert256_insert,
743                             INSERT_get_vinsert256_imm, [HasAVX512]>;
744defm : vinsert_for_mask_cast<"VINSERTI64x4Z", v16i16x_info, v32i16_info,
745                             v8i64_info, vinsert256_insert,
746                             INSERT_get_vinsert256_imm, [HasAVX512]>;
747defm : vinsert_for_mask_cast<"VINSERTI64x4Z", v32i8x_info, v64i8_info,
748                             v8i64_info, vinsert256_insert,
749                             INSERT_get_vinsert256_imm, [HasAVX512]>;
750
751// vinsertps - insert f32 to XMM
752let ExeDomain = SSEPackedSingle in {
753let isCommutable = 1 in
754def VINSERTPSZrr : AVX512AIi8<0x21, MRMSrcReg, (outs VR128X:$dst),
755      (ins VR128X:$src1, VR128X:$src2, u8imm:$src3),
756      "vinsertps\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}",
757      [(set VR128X:$dst, (X86insertps VR128X:$src1, VR128X:$src2, timm:$src3))]>,
758      EVEX_4V, Sched<[SchedWriteFShuffle.XMM]>;
759def VINSERTPSZrm: AVX512AIi8<0x21, MRMSrcMem, (outs VR128X:$dst),
760      (ins VR128X:$src1, f32mem:$src2, u8imm:$src3),
761      "vinsertps\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}",
762      [(set VR128X:$dst, (X86insertps VR128X:$src1,
763                          (v4f32 (scalar_to_vector (loadf32 addr:$src2))),
764                          timm:$src3))]>,
765      EVEX_4V, EVEX_CD8<32, CD8VT1>,
766      Sched<[SchedWriteFShuffle.XMM.Folded, SchedWriteFShuffle.XMM.ReadAfterFold]>;
767}
768
769//===----------------------------------------------------------------------===//
770// AVX-512 VECTOR EXTRACT
771//---
772
773// Supports two different pattern operators for mask and unmasked ops. Allows
774// null_frag to be passed for one.
775multiclass vextract_for_size_split<int Opcode,
776                                   X86VectorVTInfo From, X86VectorVTInfo To,
777                                   SDPatternOperator vextract_extract,
778                                   SDPatternOperator vextract_for_mask,
779                                   SchedWrite SchedRR, SchedWrite SchedMR> {
780
781  let hasSideEffects = 0, ExeDomain = To.ExeDomain in {
782    defm rr : AVX512_maskable_split<Opcode, MRMDestReg, To, (outs To.RC:$dst),
783                (ins From.RC:$src1, u8imm:$idx),
784                "vextract" # To.EltTypeName # "x" # To.NumElts,
785                "$idx, $src1", "$src1, $idx",
786                (vextract_extract:$idx (From.VT From.RC:$src1), (iPTR imm)),
787                (vextract_for_mask:$idx (From.VT From.RC:$src1), (iPTR imm))>,
788                AVX512AIi8Base, EVEX, Sched<[SchedRR]>;
789
790    def mr  : AVX512AIi8<Opcode, MRMDestMem, (outs),
791                    (ins To.MemOp:$dst, From.RC:$src1, u8imm:$idx),
792                    "vextract" # To.EltTypeName # "x" # To.NumElts #
793                        "\t{$idx, $src1, $dst|$dst, $src1, $idx}",
794                    [(store (To.VT (vextract_extract:$idx
795                                    (From.VT From.RC:$src1), (iPTR imm))),
796                             addr:$dst)]>, EVEX,
797                    Sched<[SchedMR]>;
798
799    let mayStore = 1, hasSideEffects = 0 in
800    def mrk : AVX512AIi8<Opcode, MRMDestMem, (outs),
801                    (ins To.MemOp:$dst, To.KRCWM:$mask,
802                                        From.RC:$src1, u8imm:$idx),
803                     "vextract" # To.EltTypeName # "x" # To.NumElts #
804                          "\t{$idx, $src1, $dst {${mask}}|"
805                          "$dst {${mask}}, $src1, $idx}", []>,
806                    EVEX_K, EVEX, Sched<[SchedMR]>, NotMemoryFoldable;
807  }
808}
809
810// Passes the same pattern operator for masked and unmasked ops.
811multiclass vextract_for_size<int Opcode, X86VectorVTInfo From,
812                             X86VectorVTInfo To,
813                             SDPatternOperator vextract_extract,
814                             SchedWrite SchedRR, SchedWrite SchedMR> :
815  vextract_for_size_split<Opcode, From, To, vextract_extract, vextract_extract, SchedRR, SchedMR>;
816
817// Codegen pattern for the alternative types
818multiclass vextract_for_size_lowering<string InstrStr, X86VectorVTInfo From,
819                X86VectorVTInfo To, PatFrag vextract_extract,
820                SDNodeXForm EXTRACT_get_vextract_imm, list<Predicate> p> {
821  let Predicates = p in {
822     def : Pat<(vextract_extract:$ext (From.VT From.RC:$src1), (iPTR imm)),
823               (To.VT (!cast<Instruction>(InstrStr#"rr")
824                          From.RC:$src1,
825                          (EXTRACT_get_vextract_imm To.RC:$ext)))>;
826     def : Pat<(store (To.VT (vextract_extract:$ext (From.VT From.RC:$src1),
827                              (iPTR imm))), addr:$dst),
828               (!cast<Instruction>(InstrStr#"mr") addr:$dst, From.RC:$src1,
829                (EXTRACT_get_vextract_imm To.RC:$ext))>;
830  }
831}
832
833multiclass vextract_for_type<ValueType EltVT32, int Opcode128,
834                             ValueType EltVT64, int Opcode256,
835                             SchedWrite SchedRR, SchedWrite SchedMR> {
836  let Predicates = [HasAVX512] in {
837    defm NAME # "32x4Z" : vextract_for_size<Opcode128,
838                                   X86VectorVTInfo<16, EltVT32, VR512>,
839                                   X86VectorVTInfo< 4, EltVT32, VR128X>,
840                                   vextract128_extract, SchedRR, SchedMR>,
841                                       EVEX_V512, EVEX_CD8<32, CD8VT4>;
842    defm NAME # "64x4Z" : vextract_for_size<Opcode256,
843                                   X86VectorVTInfo< 8, EltVT64, VR512>,
844                                   X86VectorVTInfo< 4, EltVT64, VR256X>,
845                                   vextract256_extract, SchedRR, SchedMR>,
846                                       VEX_W, EVEX_V512, EVEX_CD8<64, CD8VT4>;
847  }
848  let Predicates = [HasVLX] in
849    defm NAME # "32x4Z256" : vextract_for_size<Opcode128,
850                                 X86VectorVTInfo< 8, EltVT32, VR256X>,
851                                 X86VectorVTInfo< 4, EltVT32, VR128X>,
852                                 vextract128_extract, SchedRR, SchedMR>,
853                                     EVEX_V256, EVEX_CD8<32, CD8VT4>;
854
855  // Even with DQI we'd like to only use these instructions for masking.
856  let Predicates = [HasVLX, HasDQI] in
857    defm NAME # "64x2Z256" : vextract_for_size_split<Opcode128,
858                                 X86VectorVTInfo< 4, EltVT64, VR256X>,
859                                 X86VectorVTInfo< 2, EltVT64, VR128X>,
860                                 null_frag, vextract128_extract, SchedRR, SchedMR>,
861                                     VEX_W1X, EVEX_V256, EVEX_CD8<64, CD8VT2>;
862
863  // Even with DQI we'd like to only use these instructions for masking.
864  let Predicates = [HasDQI] in {
865    defm NAME # "64x2Z" : vextract_for_size_split<Opcode128,
866                                 X86VectorVTInfo< 8, EltVT64, VR512>,
867                                 X86VectorVTInfo< 2, EltVT64, VR128X>,
868                                 null_frag, vextract128_extract, SchedRR, SchedMR>,
869                                     VEX_W, EVEX_V512, EVEX_CD8<64, CD8VT2>;
870    defm NAME # "32x8Z" : vextract_for_size_split<Opcode256,
871                                 X86VectorVTInfo<16, EltVT32, VR512>,
872                                 X86VectorVTInfo< 8, EltVT32, VR256X>,
873                                 null_frag, vextract256_extract, SchedRR, SchedMR>,
874                                     EVEX_V512, EVEX_CD8<32, CD8VT8>;
875  }
876}
877
878// TODO - replace WriteFStore/WriteVecStore with X86SchedWriteMoveLSWidths types.
879defm VEXTRACTF : vextract_for_type<f32, 0x19, f64, 0x1b, WriteFShuffle256, WriteFStore>;
880defm VEXTRACTI : vextract_for_type<i32, 0x39, i64, 0x3b, WriteShuffle256, WriteVecStore>;
881
882// extract_subvector codegen patterns with the alternative types.
883// Even with AVX512DQ we'll still use these for unmasked operations.
884defm : vextract_for_size_lowering<"VEXTRACTF32x4Z", v8f64_info, v2f64x_info,
885          vextract128_extract, EXTRACT_get_vextract128_imm, [HasAVX512]>;
886defm : vextract_for_size_lowering<"VEXTRACTI32x4Z", v8i64_info, v2i64x_info,
887          vextract128_extract, EXTRACT_get_vextract128_imm, [HasAVX512]>;
888
889defm : vextract_for_size_lowering<"VEXTRACTF64x4Z", v16f32_info, v8f32x_info,
890          vextract256_extract, EXTRACT_get_vextract256_imm, [HasAVX512]>;
891defm : vextract_for_size_lowering<"VEXTRACTI64x4Z", v16i32_info, v8i32x_info,
892          vextract256_extract, EXTRACT_get_vextract256_imm, [HasAVX512]>;
893
894defm : vextract_for_size_lowering<"VEXTRACTF32x4Z256", v4f64x_info, v2f64x_info,
895          vextract128_extract, EXTRACT_get_vextract128_imm, [HasVLX]>;
896defm : vextract_for_size_lowering<"VEXTRACTI32x4Z256", v4i64x_info, v2i64x_info,
897          vextract128_extract, EXTRACT_get_vextract128_imm, [HasVLX]>;
898
899// Codegen pattern with the alternative types extract VEC128 from VEC256
900defm : vextract_for_size_lowering<"VEXTRACTI32x4Z256", v16i16x_info, v8i16x_info,
901          vextract128_extract, EXTRACT_get_vextract128_imm, [HasVLX]>;
902defm : vextract_for_size_lowering<"VEXTRACTI32x4Z256", v32i8x_info, v16i8x_info,
903          vextract128_extract, EXTRACT_get_vextract128_imm, [HasVLX]>;
904
905// Codegen pattern with the alternative types extract VEC128 from VEC512
906defm : vextract_for_size_lowering<"VEXTRACTI32x4Z", v32i16_info, v8i16x_info,
907                 vextract128_extract, EXTRACT_get_vextract128_imm, [HasAVX512]>;
908defm : vextract_for_size_lowering<"VEXTRACTI32x4Z", v64i8_info, v16i8x_info,
909                 vextract128_extract, EXTRACT_get_vextract128_imm, [HasAVX512]>;
910// Codegen pattern with the alternative types extract VEC256 from VEC512
911defm : vextract_for_size_lowering<"VEXTRACTI64x4Z", v32i16_info, v16i16x_info,
912                 vextract256_extract, EXTRACT_get_vextract256_imm, [HasAVX512]>;
913defm : vextract_for_size_lowering<"VEXTRACTI64x4Z", v64i8_info, v32i8x_info,
914                 vextract256_extract, EXTRACT_get_vextract256_imm, [HasAVX512]>;
915
916
917// A 128-bit extract from bits [255:128] of a 512-bit vector should use a
918// smaller extract to enable EVEX->VEX.
919let Predicates = [NoVLX] in {
920def : Pat<(v2i64 (extract_subvector (v8i64 VR512:$src), (iPTR 2))),
921          (v2i64 (VEXTRACTI128rr
922                  (v4i64 (EXTRACT_SUBREG (v8i64 VR512:$src), sub_ymm)),
923                  (iPTR 1)))>;
924def : Pat<(v2f64 (extract_subvector (v8f64 VR512:$src), (iPTR 2))),
925          (v2f64 (VEXTRACTF128rr
926                  (v4f64 (EXTRACT_SUBREG (v8f64 VR512:$src), sub_ymm)),
927                  (iPTR 1)))>;
928def : Pat<(v4i32 (extract_subvector (v16i32 VR512:$src), (iPTR 4))),
929          (v4i32 (VEXTRACTI128rr
930                  (v8i32 (EXTRACT_SUBREG (v16i32 VR512:$src), sub_ymm)),
931                  (iPTR 1)))>;
932def : Pat<(v4f32 (extract_subvector (v16f32 VR512:$src), (iPTR 4))),
933          (v4f32 (VEXTRACTF128rr
934                  (v8f32 (EXTRACT_SUBREG (v16f32 VR512:$src), sub_ymm)),
935                  (iPTR 1)))>;
936def : Pat<(v8i16 (extract_subvector (v32i16 VR512:$src), (iPTR 8))),
937          (v8i16 (VEXTRACTI128rr
938                  (v16i16 (EXTRACT_SUBREG (v32i16 VR512:$src), sub_ymm)),
939                  (iPTR 1)))>;
940def : Pat<(v16i8 (extract_subvector (v64i8 VR512:$src), (iPTR 16))),
941          (v16i8 (VEXTRACTI128rr
942                  (v32i8 (EXTRACT_SUBREG (v64i8 VR512:$src), sub_ymm)),
943                  (iPTR 1)))>;
944}
945
946// A 128-bit extract from bits [255:128] of a 512-bit vector should use a
947// smaller extract to enable EVEX->VEX.
948let Predicates = [HasVLX] in {
949def : Pat<(v2i64 (extract_subvector (v8i64 VR512:$src), (iPTR 2))),
950          (v2i64 (VEXTRACTI32x4Z256rr
951                  (v4i64 (EXTRACT_SUBREG (v8i64 VR512:$src), sub_ymm)),
952                  (iPTR 1)))>;
953def : Pat<(v2f64 (extract_subvector (v8f64 VR512:$src), (iPTR 2))),
954          (v2f64 (VEXTRACTF32x4Z256rr
955                  (v4f64 (EXTRACT_SUBREG (v8f64 VR512:$src), sub_ymm)),
956                  (iPTR 1)))>;
957def : Pat<(v4i32 (extract_subvector (v16i32 VR512:$src), (iPTR 4))),
958          (v4i32 (VEXTRACTI32x4Z256rr
959                  (v8i32 (EXTRACT_SUBREG (v16i32 VR512:$src), sub_ymm)),
960                  (iPTR 1)))>;
961def : Pat<(v4f32 (extract_subvector (v16f32 VR512:$src), (iPTR 4))),
962          (v4f32 (VEXTRACTF32x4Z256rr
963                  (v8f32 (EXTRACT_SUBREG (v16f32 VR512:$src), sub_ymm)),
964                  (iPTR 1)))>;
965def : Pat<(v8i16 (extract_subvector (v32i16 VR512:$src), (iPTR 8))),
966          (v8i16 (VEXTRACTI32x4Z256rr
967                  (v16i16 (EXTRACT_SUBREG (v32i16 VR512:$src), sub_ymm)),
968                  (iPTR 1)))>;
969def : Pat<(v16i8 (extract_subvector (v64i8 VR512:$src), (iPTR 16))),
970          (v16i8 (VEXTRACTI32x4Z256rr
971                  (v32i8 (EXTRACT_SUBREG (v64i8 VR512:$src), sub_ymm)),
972                  (iPTR 1)))>;
973}
974
975
976// Additional patterns for handling a bitcast between the vselect and the
977// extract_subvector.
978multiclass vextract_for_mask_cast<string InstrStr, X86VectorVTInfo From,
979                                  X86VectorVTInfo To, X86VectorVTInfo Cast,
980                                  PatFrag vextract_extract,
981                                  SDNodeXForm EXTRACT_get_vextract_imm,
982                                  list<Predicate> p> {
983let Predicates = p in {
984  def : Pat<(Cast.VT (vselect Cast.KRCWM:$mask,
985                              (bitconvert
986                               (To.VT (vextract_extract:$ext
987                                       (From.VT From.RC:$src), (iPTR imm)))),
988                              To.RC:$src0)),
989            (Cast.VT (!cast<Instruction>(InstrStr#"rrk")
990                      Cast.RC:$src0, Cast.KRCWM:$mask, From.RC:$src,
991                      (EXTRACT_get_vextract_imm To.RC:$ext)))>;
992
993  def : Pat<(Cast.VT (vselect Cast.KRCWM:$mask,
994                              (bitconvert
995                               (To.VT (vextract_extract:$ext
996                                       (From.VT From.RC:$src), (iPTR imm)))),
997                              Cast.ImmAllZerosV)),
998            (Cast.VT (!cast<Instruction>(InstrStr#"rrkz")
999                      Cast.KRCWM:$mask, From.RC:$src,
1000                      (EXTRACT_get_vextract_imm To.RC:$ext)))>;
1001}
1002}
1003
1004defm : vextract_for_mask_cast<"VEXTRACTF32x4Z256", v4f64x_info, v2f64x_info,
1005                              v4f32x_info, vextract128_extract,
1006                              EXTRACT_get_vextract128_imm, [HasVLX]>;
1007defm : vextract_for_mask_cast<"VEXTRACTF64x2Z256", v8f32x_info, v4f32x_info,
1008                              v2f64x_info, vextract128_extract,
1009                              EXTRACT_get_vextract128_imm, [HasDQI, HasVLX]>;
1010
1011defm : vextract_for_mask_cast<"VEXTRACTI32x4Z256", v4i64x_info, v2i64x_info,
1012                              v4i32x_info, vextract128_extract,
1013                              EXTRACT_get_vextract128_imm, [HasVLX]>;
1014defm : vextract_for_mask_cast<"VEXTRACTI32x4Z256", v16i16x_info, v8i16x_info,
1015                              v4i32x_info, vextract128_extract,
1016                              EXTRACT_get_vextract128_imm, [HasVLX]>;
1017defm : vextract_for_mask_cast<"VEXTRACTI32x4Z256", v32i8x_info, v16i8x_info,
1018                              v4i32x_info, vextract128_extract,
1019                              EXTRACT_get_vextract128_imm, [HasVLX]>;
1020defm : vextract_for_mask_cast<"VEXTRACTI64x2Z256", v8i32x_info, v4i32x_info,
1021                              v2i64x_info, vextract128_extract,
1022                              EXTRACT_get_vextract128_imm, [HasDQI, HasVLX]>;
1023defm : vextract_for_mask_cast<"VEXTRACTI64x2Z256", v16i16x_info, v8i16x_info,
1024                              v2i64x_info, vextract128_extract,
1025                              EXTRACT_get_vextract128_imm, [HasDQI, HasVLX]>;
1026defm : vextract_for_mask_cast<"VEXTRACTI64x2Z256", v32i8x_info, v16i8x_info,
1027                              v2i64x_info, vextract128_extract,
1028                              EXTRACT_get_vextract128_imm, [HasDQI, HasVLX]>;
1029
1030defm : vextract_for_mask_cast<"VEXTRACTF32x4Z", v8f64_info, v2f64x_info,
1031                              v4f32x_info, vextract128_extract,
1032                              EXTRACT_get_vextract128_imm, [HasAVX512]>;
1033defm : vextract_for_mask_cast<"VEXTRACTF64x2Z", v16f32_info, v4f32x_info,
1034                              v2f64x_info, vextract128_extract,
1035                              EXTRACT_get_vextract128_imm, [HasDQI]>;
1036
1037defm : vextract_for_mask_cast<"VEXTRACTI32x4Z", v8i64_info, v2i64x_info,
1038                              v4i32x_info, vextract128_extract,
1039                              EXTRACT_get_vextract128_imm, [HasAVX512]>;
1040defm : vextract_for_mask_cast<"VEXTRACTI32x4Z", v32i16_info, v8i16x_info,
1041                              v4i32x_info, vextract128_extract,
1042                              EXTRACT_get_vextract128_imm, [HasAVX512]>;
1043defm : vextract_for_mask_cast<"VEXTRACTI32x4Z", v64i8_info, v16i8x_info,
1044                              v4i32x_info, vextract128_extract,
1045                              EXTRACT_get_vextract128_imm, [HasAVX512]>;
1046defm : vextract_for_mask_cast<"VEXTRACTI64x2Z", v16i32_info, v4i32x_info,
1047                              v2i64x_info, vextract128_extract,
1048                              EXTRACT_get_vextract128_imm, [HasDQI]>;
1049defm : vextract_for_mask_cast<"VEXTRACTI64x2Z", v32i16_info, v8i16x_info,
1050                              v2i64x_info, vextract128_extract,
1051                              EXTRACT_get_vextract128_imm, [HasDQI]>;
1052defm : vextract_for_mask_cast<"VEXTRACTI64x2Z", v64i8_info, v16i8x_info,
1053                              v2i64x_info, vextract128_extract,
1054                              EXTRACT_get_vextract128_imm, [HasDQI]>;
1055
1056defm : vextract_for_mask_cast<"VEXTRACTF32x8Z", v8f64_info, v4f64x_info,
1057                              v8f32x_info, vextract256_extract,
1058                              EXTRACT_get_vextract256_imm, [HasDQI]>;
1059defm : vextract_for_mask_cast<"VEXTRACTF64x4Z", v16f32_info, v8f32x_info,
1060                              v4f64x_info, vextract256_extract,
1061                              EXTRACT_get_vextract256_imm, [HasAVX512]>;
1062
1063defm : vextract_for_mask_cast<"VEXTRACTI32x8Z", v8i64_info, v4i64x_info,
1064                              v8i32x_info, vextract256_extract,
1065                              EXTRACT_get_vextract256_imm, [HasDQI]>;
1066defm : vextract_for_mask_cast<"VEXTRACTI32x8Z", v32i16_info, v16i16x_info,
1067                              v8i32x_info, vextract256_extract,
1068                              EXTRACT_get_vextract256_imm, [HasDQI]>;
1069defm : vextract_for_mask_cast<"VEXTRACTI32x8Z", v64i8_info, v32i8x_info,
1070                              v8i32x_info, vextract256_extract,
1071                              EXTRACT_get_vextract256_imm, [HasDQI]>;
1072defm : vextract_for_mask_cast<"VEXTRACTI64x4Z", v16i32_info, v8i32x_info,
1073                              v4i64x_info, vextract256_extract,
1074                              EXTRACT_get_vextract256_imm, [HasAVX512]>;
1075defm : vextract_for_mask_cast<"VEXTRACTI64x4Z", v32i16_info, v16i16x_info,
1076                              v4i64x_info, vextract256_extract,
1077                              EXTRACT_get_vextract256_imm, [HasAVX512]>;
1078defm : vextract_for_mask_cast<"VEXTRACTI64x4Z", v64i8_info, v32i8x_info,
1079                              v4i64x_info, vextract256_extract,
1080                              EXTRACT_get_vextract256_imm, [HasAVX512]>;
1081
1082// vextractps - extract 32 bits from XMM
1083def VEXTRACTPSZrr : AVX512AIi8<0x17, MRMDestReg, (outs GR32:$dst),
1084      (ins VR128X:$src1, u8imm:$src2),
1085      "vextractps\t{$src2, $src1, $dst|$dst, $src1, $src2}",
1086      [(set GR32:$dst, (extractelt (bc_v4i32 (v4f32 VR128X:$src1)), imm:$src2))]>,
1087      EVEX, VEX_WIG, Sched<[WriteVecExtract]>;
1088
1089def VEXTRACTPSZmr : AVX512AIi8<0x17, MRMDestMem, (outs),
1090      (ins f32mem:$dst, VR128X:$src1, u8imm:$src2),
1091      "vextractps\t{$src2, $src1, $dst|$dst, $src1, $src2}",
1092      [(store (extractelt (bc_v4i32 (v4f32 VR128X:$src1)), imm:$src2),
1093                          addr:$dst)]>,
1094      EVEX, VEX_WIG, EVEX_CD8<32, CD8VT1>, Sched<[WriteVecExtractSt]>;
1095
1096//===---------------------------------------------------------------------===//
1097// AVX-512 BROADCAST
1098//---
1099// broadcast with a scalar argument.
1100multiclass avx512_broadcast_scalar<bits<8> opc, string OpcodeStr,
1101                            string Name,
1102                            X86VectorVTInfo DestInfo, X86VectorVTInfo SrcInfo> {
1103  def : Pat<(DestInfo.VT (X86VBroadcast SrcInfo.FRC:$src)),
1104            (!cast<Instruction>(Name#DestInfo.ZSuffix#r)
1105             (SrcInfo.VT (COPY_TO_REGCLASS SrcInfo.FRC:$src, SrcInfo.RC)))>;
1106  def : Pat<(DestInfo.VT (vselect DestInfo.KRCWM:$mask,
1107                                  (X86VBroadcast SrcInfo.FRC:$src),
1108                                  DestInfo.RC:$src0)),
1109            (!cast<Instruction>(Name#DestInfo.ZSuffix#rk)
1110             DestInfo.RC:$src0, DestInfo.KRCWM:$mask,
1111             (SrcInfo.VT (COPY_TO_REGCLASS SrcInfo.FRC:$src, SrcInfo.RC)))>;
1112  def : Pat<(DestInfo.VT (vselect DestInfo.KRCWM:$mask,
1113                                  (X86VBroadcast SrcInfo.FRC:$src),
1114                                  DestInfo.ImmAllZerosV)),
1115            (!cast<Instruction>(Name#DestInfo.ZSuffix#rkz)
1116             DestInfo.KRCWM:$mask, (SrcInfo.VT (COPY_TO_REGCLASS SrcInfo.FRC:$src, SrcInfo.RC)))>;
1117}
1118
1119// Split version to allow mask and broadcast node to be different types. This
1120// helps support the 32x2 broadcasts.
1121multiclass avx512_broadcast_rm_split<bits<8> opc, string OpcodeStr,
1122                                     string Name,
1123                                     SchedWrite SchedRR, SchedWrite SchedRM,
1124                                     X86VectorVTInfo MaskInfo,
1125                                     X86VectorVTInfo DestInfo,
1126                                     X86VectorVTInfo SrcInfo,
1127                                     bit IsConvertibleToThreeAddress,
1128                                     SDPatternOperator UnmaskedOp = X86VBroadcast,
1129                                     SDPatternOperator UnmaskedBcastOp = SrcInfo.BroadcastLdFrag> {
1130  let hasSideEffects = 0 in
1131  def r : AVX512PI<opc, MRMSrcReg, (outs MaskInfo.RC:$dst), (ins SrcInfo.RC:$src),
1132                   !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
1133                   [(set MaskInfo.RC:$dst,
1134                     (MaskInfo.VT
1135                      (bitconvert
1136                       (DestInfo.VT
1137                        (UnmaskedOp (SrcInfo.VT SrcInfo.RC:$src))))))],
1138                   DestInfo.ExeDomain>, T8PD, EVEX, Sched<[SchedRR]>;
1139  def rkz : AVX512PI<opc, MRMSrcReg, (outs MaskInfo.RC:$dst),
1140                     (ins MaskInfo.KRCWM:$mask, SrcInfo.RC:$src),
1141                     !strconcat(OpcodeStr, "\t{$src, ${dst} {${mask}} {z}|",
1142                      "${dst} {${mask}} {z}, $src}"),
1143                      [(set MaskInfo.RC:$dst,
1144                        (vselect MaskInfo.KRCWM:$mask,
1145                         (MaskInfo.VT
1146                          (bitconvert
1147                           (DestInfo.VT
1148                            (X86VBroadcast (SrcInfo.VT SrcInfo.RC:$src))))),
1149                         MaskInfo.ImmAllZerosV))],
1150                      DestInfo.ExeDomain>, T8PD, EVEX, EVEX_KZ, Sched<[SchedRR]>;
1151  let Constraints = "$src0 = $dst" in
1152  def rk : AVX512PI<opc, MRMSrcReg, (outs MaskInfo.RC:$dst),
1153                    (ins MaskInfo.RC:$src0, MaskInfo.KRCWM:$mask,
1154                         SrcInfo.RC:$src),
1155                    !strconcat(OpcodeStr, "\t{$src, ${dst} {${mask}}|",
1156                    "${dst} {${mask}}, $src}"),
1157                    [(set MaskInfo.RC:$dst,
1158                      (vselect MaskInfo.KRCWM:$mask,
1159                       (MaskInfo.VT
1160                        (bitconvert
1161                         (DestInfo.VT
1162                          (X86VBroadcast (SrcInfo.VT SrcInfo.RC:$src))))),
1163                       MaskInfo.RC:$src0))],
1164                     DestInfo.ExeDomain>, T8PD, EVEX, EVEX_K, Sched<[SchedRR]>;
1165
1166  let hasSideEffects = 0, mayLoad = 1 in
1167  def m : AVX512PI<opc, MRMSrcMem, (outs MaskInfo.RC:$dst),
1168                   (ins SrcInfo.ScalarMemOp:$src),
1169                   !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
1170                   [(set MaskInfo.RC:$dst,
1171                     (MaskInfo.VT
1172                      (bitconvert
1173                       (DestInfo.VT
1174                        (UnmaskedBcastOp addr:$src)))))],
1175                   DestInfo.ExeDomain>, T8PD, EVEX,
1176                   EVEX_CD8<SrcInfo.EltSize, CD8VT1>, Sched<[SchedRM]>;
1177
1178  def mkz : AVX512PI<opc, MRMSrcMem, (outs MaskInfo.RC:$dst),
1179                     (ins MaskInfo.KRCWM:$mask, SrcInfo.ScalarMemOp:$src),
1180                     !strconcat(OpcodeStr, "\t{$src, ${dst} {${mask}} {z}|",
1181                      "${dst} {${mask}} {z}, $src}"),
1182                      [(set MaskInfo.RC:$dst,
1183                        (vselect MaskInfo.KRCWM:$mask,
1184                         (MaskInfo.VT
1185                          (bitconvert
1186                           (DestInfo.VT
1187                            (SrcInfo.BroadcastLdFrag addr:$src)))),
1188                         MaskInfo.ImmAllZerosV))],
1189                      DestInfo.ExeDomain>, T8PD, EVEX, EVEX_KZ,
1190                      EVEX_CD8<SrcInfo.EltSize, CD8VT1>, Sched<[SchedRM]>;
1191
1192  let Constraints = "$src0 = $dst",
1193      isConvertibleToThreeAddress = IsConvertibleToThreeAddress in
1194  def mk : AVX512PI<opc, MRMSrcMem, (outs MaskInfo.RC:$dst),
1195                    (ins MaskInfo.RC:$src0, MaskInfo.KRCWM:$mask,
1196                         SrcInfo.ScalarMemOp:$src),
1197                    !strconcat(OpcodeStr, "\t{$src, ${dst} {${mask}}|",
1198                    "${dst} {${mask}}, $src}"),
1199                    [(set MaskInfo.RC:$dst,
1200                      (vselect MaskInfo.KRCWM:$mask,
1201                       (MaskInfo.VT
1202                        (bitconvert
1203                         (DestInfo.VT
1204                          (SrcInfo.BroadcastLdFrag addr:$src)))),
1205                       MaskInfo.RC:$src0))],
1206                     DestInfo.ExeDomain>, T8PD, EVEX, EVEX_K,
1207                     EVEX_CD8<SrcInfo.EltSize, CD8VT1>, Sched<[SchedRM]>;
1208}
1209
1210// Helper class to force mask and broadcast result to same type.
1211multiclass avx512_broadcast_rm<bits<8> opc, string OpcodeStr, string Name,
1212                               SchedWrite SchedRR, SchedWrite SchedRM,
1213                               X86VectorVTInfo DestInfo,
1214                               X86VectorVTInfo SrcInfo,
1215                               bit IsConvertibleToThreeAddress> :
1216  avx512_broadcast_rm_split<opc, OpcodeStr, Name, SchedRR, SchedRM,
1217                            DestInfo, DestInfo, SrcInfo,
1218                            IsConvertibleToThreeAddress>;
1219
1220multiclass avx512_fp_broadcast_sd<bits<8> opc, string OpcodeStr,
1221                                                       AVX512VLVectorVTInfo _> {
1222  let Predicates = [HasAVX512] in {
1223    defm Z  : avx512_broadcast_rm<opc, OpcodeStr, NAME, WriteFShuffle256,
1224                                  WriteFShuffle256Ld, _.info512, _.info128, 1>,
1225              avx512_broadcast_scalar<opc, OpcodeStr, NAME, _.info512,
1226                                      _.info128>,
1227              EVEX_V512;
1228  }
1229
1230  let Predicates = [HasVLX] in {
1231    defm Z256  : avx512_broadcast_rm<opc, OpcodeStr, NAME, WriteFShuffle256,
1232                                     WriteFShuffle256Ld, _.info256, _.info128, 1>,
1233                 avx512_broadcast_scalar<opc, OpcodeStr, NAME, _.info256,
1234                                         _.info128>,
1235                 EVEX_V256;
1236  }
1237}
1238
1239multiclass avx512_fp_broadcast_ss<bits<8> opc, string OpcodeStr,
1240                                                       AVX512VLVectorVTInfo _> {
1241  let Predicates = [HasAVX512] in {
1242    defm Z  : avx512_broadcast_rm<opc, OpcodeStr, NAME, WriteFShuffle256,
1243                                  WriteFShuffle256Ld, _.info512, _.info128, 1>,
1244              avx512_broadcast_scalar<opc, OpcodeStr, NAME, _.info512,
1245                                      _.info128>,
1246              EVEX_V512;
1247  }
1248
1249  let Predicates = [HasVLX] in {
1250    defm Z256  : avx512_broadcast_rm<opc, OpcodeStr, NAME, WriteFShuffle256,
1251                                     WriteFShuffle256Ld, _.info256, _.info128, 1>,
1252                 avx512_broadcast_scalar<opc, OpcodeStr, NAME, _.info256,
1253                                         _.info128>,
1254                 EVEX_V256;
1255    defm Z128  : avx512_broadcast_rm<opc, OpcodeStr, NAME, WriteFShuffle256,
1256                                     WriteFShuffle256Ld, _.info128, _.info128, 1>,
1257                 avx512_broadcast_scalar<opc, OpcodeStr, NAME, _.info128,
1258                                         _.info128>,
1259                 EVEX_V128;
1260  }
1261}
1262defm VBROADCASTSS  : avx512_fp_broadcast_ss<0x18, "vbroadcastss",
1263                                       avx512vl_f32_info>;
1264defm VBROADCASTSD  : avx512_fp_broadcast_sd<0x19, "vbroadcastsd",
1265                                       avx512vl_f64_info>, VEX_W1X;
1266
1267multiclass avx512_int_broadcast_reg<bits<8> opc, SchedWrite SchedRR,
1268                                    X86VectorVTInfo _, SDPatternOperator OpNode,
1269                                    RegisterClass SrcRC> {
1270  let ExeDomain = _.ExeDomain in
1271  defm r : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
1272                         (ins SrcRC:$src),
1273                         "vpbroadcast"##_.Suffix, "$src", "$src",
1274                         (_.VT (OpNode SrcRC:$src))>, T8PD, EVEX,
1275                         Sched<[SchedRR]>;
1276}
1277
1278multiclass avx512_int_broadcastbw_reg<bits<8> opc, string Name, SchedWrite SchedRR,
1279                                    X86VectorVTInfo _, SDPatternOperator OpNode,
1280                                    RegisterClass SrcRC, SubRegIndex Subreg> {
1281  let hasSideEffects = 0, ExeDomain = _.ExeDomain in
1282  defm r : AVX512_maskable_custom<opc, MRMSrcReg,
1283                        (outs _.RC:$dst), (ins GR32:$src),
1284                        !con((ins _.RC:$src0, _.KRCWM:$mask), (ins GR32:$src)),
1285                        !con((ins _.KRCWM:$mask), (ins GR32:$src)),
1286                        "vpbroadcast"##_.Suffix, "$src", "$src", [], [], [],
1287                        "$src0 = $dst">, T8PD, EVEX, Sched<[SchedRR]>;
1288
1289  def : Pat <(_.VT (OpNode SrcRC:$src)),
1290             (!cast<Instruction>(Name#r)
1291              (i32 (INSERT_SUBREG (i32 (IMPLICIT_DEF)), SrcRC:$src, Subreg)))>;
1292
1293  def : Pat <(vselect _.KRCWM:$mask, (_.VT (OpNode SrcRC:$src)), _.RC:$src0),
1294             (!cast<Instruction>(Name#rk) _.RC:$src0, _.KRCWM:$mask,
1295              (i32 (INSERT_SUBREG (i32 (IMPLICIT_DEF)), SrcRC:$src, Subreg)))>;
1296
1297  def : Pat <(vselect _.KRCWM:$mask, (_.VT (OpNode SrcRC:$src)), _.ImmAllZerosV),
1298             (!cast<Instruction>(Name#rkz) _.KRCWM:$mask,
1299              (i32 (INSERT_SUBREG (i32 (IMPLICIT_DEF)), SrcRC:$src, Subreg)))>;
1300}
1301
1302multiclass avx512_int_broadcastbw_reg_vl<bits<8> opc, string Name,
1303                      AVX512VLVectorVTInfo _, SDPatternOperator OpNode,
1304                      RegisterClass SrcRC, SubRegIndex Subreg, Predicate prd> {
1305  let Predicates = [prd] in
1306    defm Z : avx512_int_broadcastbw_reg<opc, Name#Z, WriteShuffle256, _.info512,
1307              OpNode, SrcRC, Subreg>, EVEX_V512;
1308  let Predicates = [prd, HasVLX] in {
1309    defm Z256 : avx512_int_broadcastbw_reg<opc, Name#Z256, WriteShuffle256,
1310              _.info256, OpNode, SrcRC, Subreg>, EVEX_V256;
1311    defm Z128 : avx512_int_broadcastbw_reg<opc, Name#Z128, WriteShuffle,
1312              _.info128, OpNode, SrcRC, Subreg>, EVEX_V128;
1313  }
1314}
1315
1316multiclass avx512_int_broadcast_reg_vl<bits<8> opc, AVX512VLVectorVTInfo _,
1317                                       SDPatternOperator OpNode,
1318                                       RegisterClass SrcRC, Predicate prd> {
1319  let Predicates = [prd] in
1320    defm Z : avx512_int_broadcast_reg<opc, WriteShuffle256, _.info512, OpNode,
1321                                      SrcRC>, EVEX_V512;
1322  let Predicates = [prd, HasVLX] in {
1323    defm Z256 : avx512_int_broadcast_reg<opc, WriteShuffle256, _.info256, OpNode,
1324                                         SrcRC>, EVEX_V256;
1325    defm Z128 : avx512_int_broadcast_reg<opc, WriteShuffle, _.info128, OpNode,
1326                                         SrcRC>, EVEX_V128;
1327  }
1328}
1329
1330defm VPBROADCASTBr : avx512_int_broadcastbw_reg_vl<0x7A, "VPBROADCASTBr",
1331                       avx512vl_i8_info, X86VBroadcast, GR8, sub_8bit, HasBWI>;
1332defm VPBROADCASTWr : avx512_int_broadcastbw_reg_vl<0x7B, "VPBROADCASTWr",
1333                       avx512vl_i16_info, X86VBroadcast, GR16, sub_16bit,
1334                       HasBWI>;
1335defm VPBROADCASTDr : avx512_int_broadcast_reg_vl<0x7C, avx512vl_i32_info,
1336                                                 X86VBroadcast, GR32, HasAVX512>;
1337defm VPBROADCASTQr : avx512_int_broadcast_reg_vl<0x7C, avx512vl_i64_info,
1338                                                 X86VBroadcast, GR64, HasAVX512>, VEX_W;
1339
1340multiclass avx512_int_broadcast_rm_vl<bits<8> opc, string OpcodeStr,
1341                                        AVX512VLVectorVTInfo _, Predicate prd,
1342                                        bit IsConvertibleToThreeAddress> {
1343  let Predicates = [prd] in {
1344    defm Z :   avx512_broadcast_rm<opc, OpcodeStr, NAME, WriteShuffle256,
1345                                   WriteShuffle256Ld, _.info512, _.info128,
1346                                   IsConvertibleToThreeAddress>,
1347                                  EVEX_V512;
1348  }
1349  let Predicates = [prd, HasVLX] in {
1350    defm Z256 : avx512_broadcast_rm<opc, OpcodeStr, NAME, WriteShuffle256,
1351                                    WriteShuffle256Ld, _.info256, _.info128,
1352                                    IsConvertibleToThreeAddress>,
1353                                 EVEX_V256;
1354    defm Z128 : avx512_broadcast_rm<opc, OpcodeStr, NAME, WriteShuffle,
1355                                    WriteShuffleXLd, _.info128, _.info128,
1356                                    IsConvertibleToThreeAddress>,
1357                                 EVEX_V128;
1358  }
1359}
1360
1361defm VPBROADCASTB  : avx512_int_broadcast_rm_vl<0x78, "vpbroadcastb",
1362                                           avx512vl_i8_info, HasBWI, 0>;
1363defm VPBROADCASTW  : avx512_int_broadcast_rm_vl<0x79, "vpbroadcastw",
1364                                           avx512vl_i16_info, HasBWI, 0>;
1365defm VPBROADCASTD  : avx512_int_broadcast_rm_vl<0x58, "vpbroadcastd",
1366                                           avx512vl_i32_info, HasAVX512, 1>;
1367defm VPBROADCASTQ  : avx512_int_broadcast_rm_vl<0x59, "vpbroadcastq",
1368                                           avx512vl_i64_info, HasAVX512, 1>, VEX_W1X;
1369
1370multiclass avx512_subvec_broadcast_rm<bits<8> opc, string OpcodeStr,
1371                          X86VectorVTInfo _Dst, X86VectorVTInfo _Src> {
1372  defm rm : AVX512_maskable<opc, MRMSrcMem, _Dst, (outs _Dst.RC:$dst),
1373                           (ins _Src.MemOp:$src), OpcodeStr, "$src", "$src",
1374                           (_Dst.VT (X86SubVBroadcast
1375                             (_Src.VT (_Src.LdFrag addr:$src))))>,
1376                           Sched<[SchedWriteShuffle.YMM.Folded]>,
1377                           AVX5128IBase, EVEX;
1378}
1379
1380// This should be used for the AVX512DQ broadcast instructions. It disables
1381// the unmasked patterns so that we only use the DQ instructions when masking
1382//  is requested.
1383multiclass avx512_subvec_broadcast_rm_dq<bits<8> opc, string OpcodeStr,
1384                          X86VectorVTInfo _Dst, X86VectorVTInfo _Src> {
1385  let hasSideEffects = 0, mayLoad = 1 in
1386  defm rm : AVX512_maskable_split<opc, MRMSrcMem, _Dst, (outs _Dst.RC:$dst),
1387                           (ins _Src.MemOp:$src), OpcodeStr, "$src", "$src",
1388                           (null_frag),
1389                           (_Dst.VT (X86SubVBroadcast
1390                             (_Src.VT (_Src.LdFrag addr:$src))))>,
1391                           Sched<[SchedWriteShuffle.YMM.Folded]>,
1392                           AVX5128IBase, EVEX;
1393}
1394
1395let Predicates = [HasAVX512] in {
1396  // 32-bit targets will fail to load a i64 directly but can use ZEXT_LOAD.
1397  def : Pat<(v8i64 (X86VBroadcast (v2i64 (X86vzload64 addr:$src)))),
1398            (VPBROADCASTQZm addr:$src)>;
1399
1400  // FIXME this is to handle aligned extloads from i8.
1401  def : Pat<(v16i32 (X86VBroadcast (loadi32 addr:$src))),
1402            (VPBROADCASTDZm addr:$src)>;
1403}
1404
1405let Predicates = [HasVLX] in {
1406  // 32-bit targets will fail to load a i64 directly but can use ZEXT_LOAD.
1407  def : Pat<(v2i64 (X86VBroadcast (v2i64 (X86vzload64 addr:$src)))),
1408            (VPBROADCASTQZ128m addr:$src)>;
1409  def : Pat<(v4i64 (X86VBroadcast (v2i64 (X86vzload64 addr:$src)))),
1410            (VPBROADCASTQZ256m addr:$src)>;
1411
1412  // FIXME this is to handle aligned extloads from i8.
1413  def : Pat<(v4i32 (X86VBroadcast (loadi32 addr:$src))),
1414            (VPBROADCASTDZ128m addr:$src)>;
1415  def : Pat<(v8i32 (X86VBroadcast (loadi32 addr:$src))),
1416            (VPBROADCASTDZ256m addr:$src)>;
1417}
1418let Predicates = [HasVLX, HasBWI] in {
1419  // loadi16 is tricky to fold, because !isTypeDesirableForOp, justifiably.
1420  // This means we'll encounter truncated i32 loads; match that here.
1421  def : Pat<(v8i16 (X86VBroadcast (i16 (trunc (i32 (load addr:$src)))))),
1422            (VPBROADCASTWZ128m addr:$src)>;
1423  def : Pat<(v16i16 (X86VBroadcast (i16 (trunc (i32 (load addr:$src)))))),
1424            (VPBROADCASTWZ256m addr:$src)>;
1425  def : Pat<(v8i16 (X86VBroadcast
1426              (i16 (trunc (i32 (extloadi16 addr:$src)))))),
1427            (VPBROADCASTWZ128m addr:$src)>;
1428  def : Pat<(v8i16 (X86VBroadcast
1429              (i16 (trunc (i32 (zextloadi16 addr:$src)))))),
1430            (VPBROADCASTWZ128m addr:$src)>;
1431  def : Pat<(v16i16 (X86VBroadcast
1432              (i16 (trunc (i32 (extloadi16 addr:$src)))))),
1433            (VPBROADCASTWZ256m addr:$src)>;
1434  def : Pat<(v16i16 (X86VBroadcast
1435              (i16 (trunc (i32 (zextloadi16 addr:$src)))))),
1436            (VPBROADCASTWZ256m addr:$src)>;
1437
1438  // FIXME this is to handle aligned extloads from i8.
1439  def : Pat<(v8i16 (X86VBroadcast (loadi16 addr:$src))),
1440            (VPBROADCASTWZ128m addr:$src)>;
1441  def : Pat<(v16i16 (X86VBroadcast (loadi16 addr:$src))),
1442            (VPBROADCASTWZ256m addr:$src)>;
1443}
1444let Predicates = [HasBWI] in {
1445  // loadi16 is tricky to fold, because !isTypeDesirableForOp, justifiably.
1446  // This means we'll encounter truncated i32 loads; match that here.
1447  def : Pat<(v32i16 (X86VBroadcast (i16 (trunc (i32 (load addr:$src)))))),
1448            (VPBROADCASTWZm addr:$src)>;
1449  def : Pat<(v32i16 (X86VBroadcast
1450              (i16 (trunc (i32 (extloadi16 addr:$src)))))),
1451            (VPBROADCASTWZm addr:$src)>;
1452  def : Pat<(v32i16 (X86VBroadcast
1453              (i16 (trunc (i32 (zextloadi16 addr:$src)))))),
1454            (VPBROADCASTWZm addr:$src)>;
1455
1456  // FIXME this is to handle aligned extloads from i8.
1457  def : Pat<(v32i16 (X86VBroadcast (loadi16 addr:$src))),
1458            (VPBROADCASTWZm addr:$src)>;
1459}
1460
1461//===----------------------------------------------------------------------===//
1462// AVX-512 BROADCAST SUBVECTORS
1463//
1464
1465defm VBROADCASTI32X4 : avx512_subvec_broadcast_rm<0x5a, "vbroadcasti32x4",
1466                       v16i32_info, v4i32x_info>,
1467                       EVEX_V512, EVEX_CD8<32, CD8VT4>;
1468defm VBROADCASTF32X4 : avx512_subvec_broadcast_rm<0x1a, "vbroadcastf32x4",
1469                       v16f32_info, v4f32x_info>,
1470                       EVEX_V512, EVEX_CD8<32, CD8VT4>;
1471defm VBROADCASTI64X4 : avx512_subvec_broadcast_rm<0x5b, "vbroadcasti64x4",
1472                       v8i64_info, v4i64x_info>, VEX_W,
1473                       EVEX_V512, EVEX_CD8<64, CD8VT4>;
1474defm VBROADCASTF64X4 : avx512_subvec_broadcast_rm<0x1b, "vbroadcastf64x4",
1475                       v8f64_info, v4f64x_info>, VEX_W,
1476                       EVEX_V512, EVEX_CD8<64, CD8VT4>;
1477
1478let Predicates = [HasAVX512] in {
1479def : Pat<(v16f32 (X86SubVBroadcast (loadv8f32 addr:$src))),
1480          (VBROADCASTF64X4rm addr:$src)>;
1481def : Pat<(v16i32 (X86SubVBroadcast (loadv8i32 addr:$src))),
1482          (VBROADCASTI64X4rm addr:$src)>;
1483def : Pat<(v32i16 (X86SubVBroadcast (loadv16i16 addr:$src))),
1484          (VBROADCASTI64X4rm addr:$src)>;
1485def : Pat<(v64i8 (X86SubVBroadcast (loadv32i8 addr:$src))),
1486          (VBROADCASTI64X4rm addr:$src)>;
1487
1488// Provide fallback in case the load node that is used in the patterns above
1489// is used by additional users, which prevents the pattern selection.
1490def : Pat<(v8f64 (X86SubVBroadcast (v4f64 VR256X:$src))),
1491          (VINSERTF64x4Zrr (INSERT_SUBREG (v8f64 (IMPLICIT_DEF)), VR256X:$src, sub_ymm),
1492                           (v4f64 VR256X:$src), 1)>;
1493def : Pat<(v16f32 (X86SubVBroadcast (v8f32 VR256X:$src))),
1494          (VINSERTF64x4Zrr (INSERT_SUBREG (v16f32 (IMPLICIT_DEF)), VR256X:$src, sub_ymm),
1495                           (v8f32 VR256X:$src), 1)>;
1496def : Pat<(v8i64 (X86SubVBroadcast (v4i64 VR256X:$src))),
1497          (VINSERTI64x4Zrr (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src, sub_ymm),
1498                           (v4i64 VR256X:$src), 1)>;
1499def : Pat<(v16i32 (X86SubVBroadcast (v8i32 VR256X:$src))),
1500          (VINSERTI64x4Zrr (INSERT_SUBREG (v16i32 (IMPLICIT_DEF)), VR256X:$src, sub_ymm),
1501                           (v8i32 VR256X:$src), 1)>;
1502def : Pat<(v32i16 (X86SubVBroadcast (v16i16 VR256X:$src))),
1503          (VINSERTI64x4Zrr (INSERT_SUBREG (v32i16 (IMPLICIT_DEF)), VR256X:$src, sub_ymm),
1504                           (v16i16 VR256X:$src), 1)>;
1505def : Pat<(v64i8 (X86SubVBroadcast (v32i8 VR256X:$src))),
1506          (VINSERTI64x4Zrr (INSERT_SUBREG (v64i8 (IMPLICIT_DEF)), VR256X:$src, sub_ymm),
1507                           (v32i8 VR256X:$src), 1)>;
1508
1509def : Pat<(v8f64 (X86SubVBroadcast (loadv2f64 addr:$src))),
1510          (VBROADCASTF32X4rm addr:$src)>;
1511def : Pat<(v8i64 (X86SubVBroadcast (loadv2i64 addr:$src))),
1512          (VBROADCASTI32X4rm addr:$src)>;
1513def : Pat<(v32i16 (X86SubVBroadcast (loadv8i16 addr:$src))),
1514          (VBROADCASTI32X4rm addr:$src)>;
1515def : Pat<(v64i8 (X86SubVBroadcast (loadv16i8 addr:$src))),
1516          (VBROADCASTI32X4rm addr:$src)>;
1517
1518// Patterns for selects of bitcasted operations.
1519def : Pat<(vselect VK16WM:$mask,
1520                   (bc_v16f32 (v8f64 (X86SubVBroadcast (loadv2f64 addr:$src)))),
1521                   (v16f32 immAllZerosV)),
1522          (VBROADCASTF32X4rmkz VK16WM:$mask, addr:$src)>;
1523def : Pat<(vselect VK16WM:$mask,
1524                   (bc_v16f32 (v8f64 (X86SubVBroadcast (loadv2f64 addr:$src)))),
1525                   VR512:$src0),
1526          (VBROADCASTF32X4rmk VR512:$src0, VK16WM:$mask, addr:$src)>;
1527def : Pat<(vselect VK16WM:$mask,
1528                   (bc_v16i32 (v8i64 (X86SubVBroadcast (loadv2i64 addr:$src)))),
1529                   (v16i32 immAllZerosV)),
1530          (VBROADCASTI32X4rmkz VK16WM:$mask, addr:$src)>;
1531def : Pat<(vselect VK16WM:$mask,
1532                   (bc_v16i32 (v8i64 (X86SubVBroadcast (loadv2i64 addr:$src)))),
1533                   VR512:$src0),
1534          (VBROADCASTI32X4rmk VR512:$src0, VK16WM:$mask, addr:$src)>;
1535
1536def : Pat<(vselect VK8WM:$mask,
1537                   (bc_v8f64 (v16f32 (X86SubVBroadcast (loadv8f32 addr:$src)))),
1538                   (v8f64 immAllZerosV)),
1539          (VBROADCASTF64X4rmkz VK8WM:$mask, addr:$src)>;
1540def : Pat<(vselect VK8WM:$mask,
1541                   (bc_v8f64 (v16f32 (X86SubVBroadcast (loadv8f32 addr:$src)))),
1542                   VR512:$src0),
1543          (VBROADCASTF64X4rmk VR512:$src0, VK8WM:$mask, addr:$src)>;
1544def : Pat<(vselect VK8WM:$mask,
1545                   (bc_v8i64 (v16i32 (X86SubVBroadcast (loadv8i32 addr:$src)))),
1546                   (v8i64 immAllZerosV)),
1547          (VBROADCASTI64X4rmkz VK8WM:$mask, addr:$src)>;
1548def : Pat<(vselect VK8WM:$mask,
1549                   (bc_v8i64 (v16i32 (X86SubVBroadcast (loadv8i32 addr:$src)))),
1550                   VR512:$src0),
1551          (VBROADCASTI64X4rmk VR512:$src0, VK8WM:$mask, addr:$src)>;
1552}
1553
1554let Predicates = [HasVLX] in {
1555defm VBROADCASTI32X4Z256 : avx512_subvec_broadcast_rm<0x5a, "vbroadcasti32x4",
1556                           v8i32x_info, v4i32x_info>,
1557                           EVEX_V256, EVEX_CD8<32, CD8VT4>;
1558defm VBROADCASTF32X4Z256 : avx512_subvec_broadcast_rm<0x1a, "vbroadcastf32x4",
1559                           v8f32x_info, v4f32x_info>,
1560                           EVEX_V256, EVEX_CD8<32, CD8VT4>;
1561
1562def : Pat<(v4f64 (X86SubVBroadcast (loadv2f64 addr:$src))),
1563          (VBROADCASTF32X4Z256rm addr:$src)>;
1564def : Pat<(v4i64 (X86SubVBroadcast (loadv2i64 addr:$src))),
1565          (VBROADCASTI32X4Z256rm addr:$src)>;
1566def : Pat<(v16i16 (X86SubVBroadcast (loadv8i16 addr:$src))),
1567          (VBROADCASTI32X4Z256rm addr:$src)>;
1568def : Pat<(v32i8 (X86SubVBroadcast (loadv16i8 addr:$src))),
1569          (VBROADCASTI32X4Z256rm addr:$src)>;
1570
1571// Patterns for selects of bitcasted operations.
1572def : Pat<(vselect VK8WM:$mask,
1573                   (bc_v8f32 (v4f64 (X86SubVBroadcast (loadv2f64 addr:$src)))),
1574                   (v8f32 immAllZerosV)),
1575          (VBROADCASTF32X4Z256rmkz VK8WM:$mask, addr:$src)>;
1576def : Pat<(vselect VK8WM:$mask,
1577                   (bc_v8f32 (v4f64 (X86SubVBroadcast (loadv2f64 addr:$src)))),
1578                   VR256X:$src0),
1579          (VBROADCASTF32X4Z256rmk VR256X:$src0, VK8WM:$mask, addr:$src)>;
1580def : Pat<(vselect VK8WM:$mask,
1581                   (bc_v8i32 (v4i64 (X86SubVBroadcast (loadv2i64 addr:$src)))),
1582                   (v8i32 immAllZerosV)),
1583          (VBROADCASTI32X4Z256rmkz VK8WM:$mask, addr:$src)>;
1584def : Pat<(vselect VK8WM:$mask,
1585                   (bc_v8i32 (v4i64 (X86SubVBroadcast (loadv2i64 addr:$src)))),
1586                   VR256X:$src0),
1587          (VBROADCASTI32X4Z256rmk VR256X:$src0, VK8WM:$mask, addr:$src)>;
1588
1589
1590// Provide fallback in case the load node that is used in the patterns above
1591// is used by additional users, which prevents the pattern selection.
1592def : Pat<(v4f64 (X86SubVBroadcast (v2f64 VR128X:$src))),
1593          (VINSERTF32x4Z256rr (INSERT_SUBREG (v4f64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
1594                              (v2f64 VR128X:$src), 1)>;
1595def : Pat<(v8f32 (X86SubVBroadcast (v4f32 VR128X:$src))),
1596          (VINSERTF32x4Z256rr (INSERT_SUBREG (v8f32 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
1597                              (v4f32 VR128X:$src), 1)>;
1598def : Pat<(v4i64 (X86SubVBroadcast (v2i64 VR128X:$src))),
1599          (VINSERTI32x4Z256rr (INSERT_SUBREG (v4i64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
1600                              (v2i64 VR128X:$src), 1)>;
1601def : Pat<(v8i32 (X86SubVBroadcast (v4i32 VR128X:$src))),
1602          (VINSERTI32x4Z256rr (INSERT_SUBREG (v8i32 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
1603                              (v4i32 VR128X:$src), 1)>;
1604def : Pat<(v16i16 (X86SubVBroadcast (v8i16 VR128X:$src))),
1605          (VINSERTI32x4Z256rr (INSERT_SUBREG (v16i16 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
1606                              (v8i16 VR128X:$src), 1)>;
1607def : Pat<(v32i8 (X86SubVBroadcast (v16i8 VR128X:$src))),
1608          (VINSERTI32x4Z256rr (INSERT_SUBREG (v32i8 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
1609                              (v16i8 VR128X:$src), 1)>;
1610}
1611
1612let Predicates = [HasVLX, HasDQI] in {
1613defm VBROADCASTI64X2Z128 : avx512_subvec_broadcast_rm_dq<0x5a, "vbroadcasti64x2",
1614                           v4i64x_info, v2i64x_info>, VEX_W1X,
1615                           EVEX_V256, EVEX_CD8<64, CD8VT2>;
1616defm VBROADCASTF64X2Z128 : avx512_subvec_broadcast_rm_dq<0x1a, "vbroadcastf64x2",
1617                           v4f64x_info, v2f64x_info>, VEX_W1X,
1618                           EVEX_V256, EVEX_CD8<64, CD8VT2>;
1619
1620// Patterns for selects of bitcasted operations.
1621def : Pat<(vselect VK4WM:$mask,
1622                   (bc_v4f64 (v8f32 (X86SubVBroadcast (loadv4f32 addr:$src)))),
1623                   (v4f64 immAllZerosV)),
1624          (VBROADCASTF64X2Z128rmkz VK4WM:$mask, addr:$src)>;
1625def : Pat<(vselect VK4WM:$mask,
1626                   (bc_v4f64 (v8f32 (X86SubVBroadcast (loadv4f32 addr:$src)))),
1627                   VR256X:$src0),
1628          (VBROADCASTF64X2Z128rmk VR256X:$src0, VK4WM:$mask, addr:$src)>;
1629def : Pat<(vselect VK4WM:$mask,
1630                   (bc_v4i64 (v8i32 (X86SubVBroadcast (loadv4i32 addr:$src)))),
1631                   (v4i64 immAllZerosV)),
1632          (VBROADCASTI64X2Z128rmkz VK4WM:$mask, addr:$src)>;
1633def : Pat<(vselect VK4WM:$mask,
1634                   (bc_v4i64 (v8i32 (X86SubVBroadcast (loadv4i32 addr:$src)))),
1635                   VR256X:$src0),
1636          (VBROADCASTI64X2Z128rmk VR256X:$src0, VK4WM:$mask, addr:$src)>;
1637}
1638
1639let Predicates = [HasDQI] in {
1640defm VBROADCASTI64X2 : avx512_subvec_broadcast_rm_dq<0x5a, "vbroadcasti64x2",
1641                       v8i64_info, v2i64x_info>, VEX_W,
1642                       EVEX_V512, EVEX_CD8<64, CD8VT2>;
1643defm VBROADCASTI32X8 : avx512_subvec_broadcast_rm_dq<0x5b, "vbroadcasti32x8",
1644                       v16i32_info, v8i32x_info>,
1645                       EVEX_V512, EVEX_CD8<32, CD8VT8>;
1646defm VBROADCASTF64X2 : avx512_subvec_broadcast_rm_dq<0x1a, "vbroadcastf64x2",
1647                       v8f64_info, v2f64x_info>, VEX_W,
1648                       EVEX_V512, EVEX_CD8<64, CD8VT2>;
1649defm VBROADCASTF32X8 : avx512_subvec_broadcast_rm_dq<0x1b, "vbroadcastf32x8",
1650                       v16f32_info, v8f32x_info>,
1651                       EVEX_V512, EVEX_CD8<32, CD8VT8>;
1652
1653// Patterns for selects of bitcasted operations.
1654def : Pat<(vselect VK16WM:$mask,
1655                   (bc_v16f32 (v8f64 (X86SubVBroadcast (loadv4f64 addr:$src)))),
1656                   (v16f32 immAllZerosV)),
1657          (VBROADCASTF32X8rmkz VK16WM:$mask, addr:$src)>;
1658def : Pat<(vselect VK16WM:$mask,
1659                   (bc_v16f32 (v8f64 (X86SubVBroadcast (loadv4f64 addr:$src)))),
1660                   VR512:$src0),
1661          (VBROADCASTF32X8rmk VR512:$src0, VK16WM:$mask, addr:$src)>;
1662def : Pat<(vselect VK16WM:$mask,
1663                   (bc_v16i32 (v8i64 (X86SubVBroadcast (loadv4i64 addr:$src)))),
1664                   (v16i32 immAllZerosV)),
1665          (VBROADCASTI32X8rmkz VK16WM:$mask, addr:$src)>;
1666def : Pat<(vselect VK16WM:$mask,
1667                   (bc_v16i32 (v8i64 (X86SubVBroadcast (loadv4i64 addr:$src)))),
1668                   VR512:$src0),
1669          (VBROADCASTI32X8rmk VR512:$src0, VK16WM:$mask, addr:$src)>;
1670
1671def : Pat<(vselect VK8WM:$mask,
1672                   (bc_v8f64 (v16f32 (X86SubVBroadcast (loadv4f32 addr:$src)))),
1673                   (v8f64 immAllZerosV)),
1674          (VBROADCASTF64X2rmkz VK8WM:$mask, addr:$src)>;
1675def : Pat<(vselect VK8WM:$mask,
1676                   (bc_v8f64 (v16f32 (X86SubVBroadcast (loadv4f32 addr:$src)))),
1677                   VR512:$src0),
1678          (VBROADCASTF64X2rmk VR512:$src0, VK8WM:$mask, addr:$src)>;
1679def : Pat<(vselect VK8WM:$mask,
1680                   (bc_v8i64 (v16i32 (X86SubVBroadcast (loadv4i32 addr:$src)))),
1681                   (v8i64 immAllZerosV)),
1682          (VBROADCASTI64X2rmkz VK8WM:$mask, addr:$src)>;
1683def : Pat<(vselect VK8WM:$mask,
1684                   (bc_v8i64 (v16i32 (X86SubVBroadcast (loadv4i32 addr:$src)))),
1685                   VR512:$src0),
1686          (VBROADCASTI64X2rmk VR512:$src0, VK8WM:$mask, addr:$src)>;
1687}
1688
1689multiclass avx512_common_broadcast_32x2<bits<8> opc, string OpcodeStr,
1690                         AVX512VLVectorVTInfo _Dst, AVX512VLVectorVTInfo _Src> {
1691  let Predicates = [HasDQI] in
1692    defm Z :    avx512_broadcast_rm_split<opc, OpcodeStr, NAME, WriteShuffle256,
1693                                          WriteShuffle256Ld, _Dst.info512,
1694                                          _Src.info512, _Src.info128, 0, null_frag, null_frag>,
1695                                          EVEX_V512;
1696  let Predicates = [HasDQI, HasVLX] in
1697    defm Z256 : avx512_broadcast_rm_split<opc, OpcodeStr, NAME, WriteShuffle256,
1698                                          WriteShuffle256Ld, _Dst.info256,
1699                                          _Src.info256, _Src.info128, 0, null_frag, null_frag>,
1700                                          EVEX_V256;
1701}
1702
1703multiclass avx512_common_broadcast_i32x2<bits<8> opc, string OpcodeStr,
1704                         AVX512VLVectorVTInfo _Dst, AVX512VLVectorVTInfo _Src> :
1705  avx512_common_broadcast_32x2<opc, OpcodeStr, _Dst, _Src> {
1706
1707  let Predicates = [HasDQI, HasVLX] in
1708    defm Z128 : avx512_broadcast_rm_split<opc, OpcodeStr, NAME, WriteShuffle,
1709                                          WriteShuffleXLd, _Dst.info128,
1710                                          _Src.info128, _Src.info128, 0, null_frag, null_frag>,
1711                                          EVEX_V128;
1712}
1713
1714defm VBROADCASTI32X2  : avx512_common_broadcast_i32x2<0x59, "vbroadcasti32x2",
1715                                          avx512vl_i32_info, avx512vl_i64_info>;
1716defm VBROADCASTF32X2  : avx512_common_broadcast_32x2<0x19, "vbroadcastf32x2",
1717                                          avx512vl_f32_info, avx512vl_f64_info>;
1718
1719//===----------------------------------------------------------------------===//
1720// AVX-512 BROADCAST MASK TO VECTOR REGISTER
1721//---
1722multiclass avx512_mask_broadcastm<bits<8> opc, string OpcodeStr,
1723                                  X86VectorVTInfo _, RegisterClass KRC> {
1724  def rr : AVX512XS8I<opc, MRMSrcReg, (outs _.RC:$dst), (ins KRC:$src),
1725                  !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
1726                  [(set _.RC:$dst, (_.VT (X86VBroadcastm KRC:$src)))]>,
1727                  EVEX, Sched<[WriteShuffle]>;
1728}
1729
1730multiclass avx512_mask_broadcast<bits<8> opc, string OpcodeStr,
1731                                 AVX512VLVectorVTInfo VTInfo, RegisterClass KRC> {
1732  let Predicates = [HasCDI] in
1733    defm Z : avx512_mask_broadcastm<opc, OpcodeStr, VTInfo.info512, KRC>, EVEX_V512;
1734  let Predicates = [HasCDI, HasVLX] in {
1735    defm Z256 : avx512_mask_broadcastm<opc, OpcodeStr, VTInfo.info256, KRC>, EVEX_V256;
1736    defm Z128 : avx512_mask_broadcastm<opc, OpcodeStr, VTInfo.info128, KRC>, EVEX_V128;
1737  }
1738}
1739
1740defm VPBROADCASTMW2D : avx512_mask_broadcast<0x3A, "vpbroadcastmw2d",
1741                                               avx512vl_i32_info, VK16>;
1742defm VPBROADCASTMB2Q : avx512_mask_broadcast<0x2A, "vpbroadcastmb2q",
1743                                               avx512vl_i64_info, VK8>, VEX_W;
1744
1745//===----------------------------------------------------------------------===//
1746// -- VPERMI2 - 3 source operands form --
1747multiclass avx512_perm_i<bits<8> opc, string OpcodeStr,
1748                         X86FoldableSchedWrite sched,
1749                         X86VectorVTInfo _, X86VectorVTInfo IdxVT> {
1750let Constraints = "$src1 = $dst", ExeDomain = _.ExeDomain,
1751    hasSideEffects = 0 in {
1752  defm rr: AVX512_maskable_3src_cast<opc, MRMSrcReg, _, IdxVT, (outs _.RC:$dst),
1753          (ins _.RC:$src2, _.RC:$src3),
1754          OpcodeStr, "$src3, $src2", "$src2, $src3",
1755          (_.VT (X86VPermt2 _.RC:$src2, IdxVT.RC:$src1, _.RC:$src3)), 1>,
1756          EVEX_4V, AVX5128IBase, Sched<[sched]>;
1757
1758  let mayLoad = 1 in
1759  defm rm: AVX512_maskable_3src_cast<opc, MRMSrcMem, _, IdxVT, (outs _.RC:$dst),
1760            (ins _.RC:$src2, _.MemOp:$src3),
1761            OpcodeStr, "$src3, $src2", "$src2, $src3",
1762            (_.VT (X86VPermt2 _.RC:$src2, IdxVT.RC:$src1,
1763                   (_.VT (_.LdFrag addr:$src3)))), 1>,
1764            EVEX_4V, AVX5128IBase, Sched<[sched.Folded, sched.ReadAfterFold]>;
1765  }
1766}
1767
1768multiclass avx512_perm_i_mb<bits<8> opc, string OpcodeStr,
1769                            X86FoldableSchedWrite sched,
1770                            X86VectorVTInfo _, X86VectorVTInfo IdxVT> {
1771  let Constraints = "$src1 = $dst", ExeDomain = _.ExeDomain,
1772      hasSideEffects = 0, mayLoad = 1 in
1773  defm rmb: AVX512_maskable_3src_cast<opc, MRMSrcMem, _, IdxVT, (outs _.RC:$dst),
1774              (ins _.RC:$src2, _.ScalarMemOp:$src3),
1775              OpcodeStr,   !strconcat("${src3}", _.BroadcastStr,", $src2"),
1776              !strconcat("$src2, ${src3}", _.BroadcastStr ),
1777              (_.VT (X86VPermt2 _.RC:$src2,
1778               IdxVT.RC:$src1,(_.VT (_.BroadcastLdFrag addr:$src3)))), 1>,
1779              AVX5128IBase, EVEX_4V, EVEX_B,
1780              Sched<[sched.Folded, sched.ReadAfterFold]>;
1781}
1782
1783multiclass avx512_perm_i_sizes<bits<8> opc, string OpcodeStr,
1784                               X86FoldableSchedWrite sched,
1785                               AVX512VLVectorVTInfo VTInfo,
1786                               AVX512VLVectorVTInfo ShuffleMask> {
1787  defm NAME: avx512_perm_i<opc, OpcodeStr, sched, VTInfo.info512,
1788                           ShuffleMask.info512>,
1789            avx512_perm_i_mb<opc, OpcodeStr, sched, VTInfo.info512,
1790                             ShuffleMask.info512>, EVEX_V512;
1791  let Predicates = [HasVLX] in {
1792  defm NAME#128: avx512_perm_i<opc, OpcodeStr, sched, VTInfo.info128,
1793                               ShuffleMask.info128>,
1794                 avx512_perm_i_mb<opc, OpcodeStr, sched, VTInfo.info128,
1795                                  ShuffleMask.info128>, EVEX_V128;
1796  defm NAME#256: avx512_perm_i<opc, OpcodeStr, sched, VTInfo.info256,
1797                               ShuffleMask.info256>,
1798                 avx512_perm_i_mb<opc, OpcodeStr, sched, VTInfo.info256,
1799                                  ShuffleMask.info256>, EVEX_V256;
1800  }
1801}
1802
1803multiclass avx512_perm_i_sizes_bw<bits<8> opc, string OpcodeStr,
1804                                  X86FoldableSchedWrite sched,
1805                                  AVX512VLVectorVTInfo VTInfo,
1806                                  AVX512VLVectorVTInfo Idx,
1807                                  Predicate Prd> {
1808  let Predicates = [Prd] in
1809  defm NAME: avx512_perm_i<opc, OpcodeStr, sched, VTInfo.info512,
1810                           Idx.info512>, EVEX_V512;
1811  let Predicates = [Prd, HasVLX] in {
1812  defm NAME#128: avx512_perm_i<opc, OpcodeStr, sched, VTInfo.info128,
1813                               Idx.info128>, EVEX_V128;
1814  defm NAME#256: avx512_perm_i<opc, OpcodeStr, sched, VTInfo.info256,
1815                               Idx.info256>,  EVEX_V256;
1816  }
1817}
1818
1819defm VPERMI2D  : avx512_perm_i_sizes<0x76, "vpermi2d", WriteVarShuffle256,
1820                  avx512vl_i32_info, avx512vl_i32_info>, EVEX_CD8<32, CD8VF>;
1821defm VPERMI2Q  : avx512_perm_i_sizes<0x76, "vpermi2q", WriteVarShuffle256,
1822                  avx512vl_i64_info, avx512vl_i64_info>, VEX_W, EVEX_CD8<64, CD8VF>;
1823defm VPERMI2W  : avx512_perm_i_sizes_bw<0x75, "vpermi2w", WriteVarShuffle256,
1824                  avx512vl_i16_info, avx512vl_i16_info, HasBWI>,
1825                  VEX_W, EVEX_CD8<16, CD8VF>;
1826defm VPERMI2B  : avx512_perm_i_sizes_bw<0x75, "vpermi2b", WriteVarShuffle256,
1827                  avx512vl_i8_info, avx512vl_i8_info, HasVBMI>,
1828                  EVEX_CD8<8, CD8VF>;
1829defm VPERMI2PS : avx512_perm_i_sizes<0x77, "vpermi2ps", WriteFVarShuffle256,
1830                  avx512vl_f32_info, avx512vl_i32_info>, EVEX_CD8<32, CD8VF>;
1831defm VPERMI2PD : avx512_perm_i_sizes<0x77, "vpermi2pd", WriteFVarShuffle256,
1832                  avx512vl_f64_info, avx512vl_i64_info>, VEX_W, EVEX_CD8<64, CD8VF>;
1833
1834// Extra patterns to deal with extra bitcasts due to passthru and index being
1835// different types on the fp versions.
1836multiclass avx512_perm_i_lowering<string InstrStr, X86VectorVTInfo _,
1837                                  X86VectorVTInfo IdxVT,
1838                                  X86VectorVTInfo CastVT> {
1839  def : Pat<(_.VT (vselect _.KRCWM:$mask,
1840                             (X86VPermt2 (_.VT _.RC:$src2),
1841                                         (IdxVT.VT (bitconvert (CastVT.VT _.RC:$src1))), _.RC:$src3),
1842                             (_.VT (bitconvert (CastVT.VT _.RC:$src1))))),
1843            (!cast<Instruction>(InstrStr#"rrk") _.RC:$src1, _.KRCWM:$mask,
1844                                                _.RC:$src2, _.RC:$src3)>;
1845  def : Pat<(_.VT (vselect _.KRCWM:$mask,
1846                             (X86VPermt2 _.RC:$src2,
1847                                         (IdxVT.VT (bitconvert  (CastVT.VT _.RC:$src1))),
1848                                         (_.LdFrag addr:$src3)),
1849                             (_.VT (bitconvert  (CastVT.VT _.RC:$src1))))),
1850            (!cast<Instruction>(InstrStr#"rmk") _.RC:$src1, _.KRCWM:$mask,
1851                                                _.RC:$src2, addr:$src3)>;
1852  def : Pat<(_.VT (vselect _.KRCWM:$mask,
1853                             (X86VPermt2 _.RC:$src2,
1854                                         (IdxVT.VT (bitconvert  (CastVT.VT _.RC:$src1))),
1855                                         (_.BroadcastLdFrag addr:$src3)),
1856                             (_.VT (bitconvert  (CastVT.VT _.RC:$src1))))),
1857            (!cast<Instruction>(InstrStr#"rmbk") _.RC:$src1, _.KRCWM:$mask,
1858                                                 _.RC:$src2, addr:$src3)>;
1859}
1860
1861// TODO: Should we add more casts? The vXi64 case is common due to ABI.
1862defm : avx512_perm_i_lowering<"VPERMI2PS", v16f32_info, v16i32_info, v8i64_info>;
1863defm : avx512_perm_i_lowering<"VPERMI2PS256", v8f32x_info, v8i32x_info, v4i64x_info>;
1864defm : avx512_perm_i_lowering<"VPERMI2PS128", v4f32x_info, v4i32x_info, v2i64x_info>;
1865
1866// VPERMT2
1867multiclass avx512_perm_t<bits<8> opc, string OpcodeStr,
1868                         X86FoldableSchedWrite sched,
1869                         X86VectorVTInfo _, X86VectorVTInfo IdxVT> {
1870let Constraints = "$src1 = $dst", ExeDomain = _.ExeDomain in {
1871  defm rr: AVX512_maskable_3src<opc, MRMSrcReg, _, (outs _.RC:$dst),
1872          (ins IdxVT.RC:$src2, _.RC:$src3),
1873          OpcodeStr, "$src3, $src2", "$src2, $src3",
1874          (_.VT (X86VPermt2 _.RC:$src1, IdxVT.RC:$src2, _.RC:$src3)), 1>,
1875          EVEX_4V, AVX5128IBase, Sched<[sched]>;
1876
1877  defm rm: AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst),
1878            (ins IdxVT.RC:$src2, _.MemOp:$src3),
1879            OpcodeStr, "$src3, $src2", "$src2, $src3",
1880            (_.VT (X86VPermt2 _.RC:$src1, IdxVT.RC:$src2,
1881                   (_.LdFrag addr:$src3))), 1>,
1882            EVEX_4V, AVX5128IBase, Sched<[sched.Folded, sched.ReadAfterFold]>;
1883  }
1884}
1885multiclass avx512_perm_t_mb<bits<8> opc, string OpcodeStr,
1886                            X86FoldableSchedWrite sched,
1887                            X86VectorVTInfo _, X86VectorVTInfo IdxVT> {
1888  let Constraints = "$src1 = $dst", ExeDomain = _.ExeDomain in
1889  defm rmb: AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst),
1890              (ins IdxVT.RC:$src2, _.ScalarMemOp:$src3),
1891              OpcodeStr,   !strconcat("${src3}", _.BroadcastStr,", $src2"),
1892              !strconcat("$src2, ${src3}", _.BroadcastStr ),
1893              (_.VT (X86VPermt2 _.RC:$src1,
1894               IdxVT.RC:$src2,(_.VT (_.BroadcastLdFrag addr:$src3)))), 1>,
1895              AVX5128IBase, EVEX_4V, EVEX_B,
1896              Sched<[sched.Folded, sched.ReadAfterFold]>;
1897}
1898
1899multiclass avx512_perm_t_sizes<bits<8> opc, string OpcodeStr,
1900                               X86FoldableSchedWrite sched,
1901                               AVX512VLVectorVTInfo VTInfo,
1902                               AVX512VLVectorVTInfo ShuffleMask> {
1903  defm NAME: avx512_perm_t<opc, OpcodeStr, sched, VTInfo.info512,
1904                              ShuffleMask.info512>,
1905            avx512_perm_t_mb<opc, OpcodeStr, sched, VTInfo.info512,
1906                              ShuffleMask.info512>, EVEX_V512;
1907  let Predicates = [HasVLX] in {
1908  defm NAME#128: avx512_perm_t<opc, OpcodeStr, sched, VTInfo.info128,
1909                              ShuffleMask.info128>,
1910                 avx512_perm_t_mb<opc, OpcodeStr, sched, VTInfo.info128,
1911                              ShuffleMask.info128>, EVEX_V128;
1912  defm NAME#256: avx512_perm_t<opc, OpcodeStr, sched, VTInfo.info256,
1913                              ShuffleMask.info256>,
1914                 avx512_perm_t_mb<opc, OpcodeStr, sched, VTInfo.info256,
1915                              ShuffleMask.info256>, EVEX_V256;
1916  }
1917}
1918
1919multiclass avx512_perm_t_sizes_bw<bits<8> opc, string OpcodeStr,
1920                                  X86FoldableSchedWrite sched,
1921                                  AVX512VLVectorVTInfo VTInfo,
1922                                  AVX512VLVectorVTInfo Idx, Predicate Prd> {
1923  let Predicates = [Prd] in
1924  defm NAME: avx512_perm_t<opc, OpcodeStr, sched, VTInfo.info512,
1925                           Idx.info512>, EVEX_V512;
1926  let Predicates = [Prd, HasVLX] in {
1927  defm NAME#128: avx512_perm_t<opc, OpcodeStr, sched, VTInfo.info128,
1928                               Idx.info128>, EVEX_V128;
1929  defm NAME#256: avx512_perm_t<opc, OpcodeStr, sched, VTInfo.info256,
1930                               Idx.info256>, EVEX_V256;
1931  }
1932}
1933
1934defm VPERMT2D  : avx512_perm_t_sizes<0x7E, "vpermt2d", WriteVarShuffle256,
1935                  avx512vl_i32_info, avx512vl_i32_info>, EVEX_CD8<32, CD8VF>;
1936defm VPERMT2Q  : avx512_perm_t_sizes<0x7E, "vpermt2q", WriteVarShuffle256,
1937                  avx512vl_i64_info, avx512vl_i64_info>, VEX_W, EVEX_CD8<64, CD8VF>;
1938defm VPERMT2W  : avx512_perm_t_sizes_bw<0x7D, "vpermt2w", WriteVarShuffle256,
1939                  avx512vl_i16_info, avx512vl_i16_info, HasBWI>,
1940                  VEX_W, EVEX_CD8<16, CD8VF>;
1941defm VPERMT2B  : avx512_perm_t_sizes_bw<0x7D, "vpermt2b", WriteVarShuffle256,
1942                  avx512vl_i8_info, avx512vl_i8_info, HasVBMI>,
1943                  EVEX_CD8<8, CD8VF>;
1944defm VPERMT2PS : avx512_perm_t_sizes<0x7F, "vpermt2ps", WriteFVarShuffle256,
1945                  avx512vl_f32_info, avx512vl_i32_info>, EVEX_CD8<32, CD8VF>;
1946defm VPERMT2PD : avx512_perm_t_sizes<0x7F, "vpermt2pd", WriteFVarShuffle256,
1947                  avx512vl_f64_info, avx512vl_i64_info>, VEX_W, EVEX_CD8<64, CD8VF>;
1948
1949//===----------------------------------------------------------------------===//
1950// AVX-512 - BLEND using mask
1951//
1952
1953multiclass WriteFVarBlendask<bits<8> opc, string OpcodeStr,
1954                             X86FoldableSchedWrite sched, X86VectorVTInfo _> {
1955  let ExeDomain = _.ExeDomain, hasSideEffects = 0 in {
1956  def rr : AVX5128I<opc, MRMSrcReg, (outs _.RC:$dst),
1957             (ins _.RC:$src1, _.RC:$src2),
1958             !strconcat(OpcodeStr,
1959             "\t{$src2, $src1, ${dst}|${dst}, $src1, $src2}"), []>,
1960             EVEX_4V, Sched<[sched]>;
1961  def rrk : AVX5128I<opc, MRMSrcReg, (outs _.RC:$dst),
1962             (ins _.KRCWM:$mask, _.RC:$src1, _.RC:$src2),
1963             !strconcat(OpcodeStr,
1964             "\t{$src2, $src1, ${dst} {${mask}}|${dst} {${mask}}, $src1, $src2}"),
1965             []>, EVEX_4V, EVEX_K, Sched<[sched]>;
1966  def rrkz : AVX5128I<opc, MRMSrcReg, (outs _.RC:$dst),
1967             (ins _.KRCWM:$mask, _.RC:$src1, _.RC:$src2),
1968             !strconcat(OpcodeStr,
1969             "\t{$src2, $src1, ${dst} {${mask}} {z}|${dst} {${mask}} {z}, $src1, $src2}"),
1970             []>, EVEX_4V, EVEX_KZ, Sched<[sched]>, NotMemoryFoldable;
1971  let mayLoad = 1 in {
1972  def rm  : AVX5128I<opc, MRMSrcMem, (outs _.RC:$dst),
1973             (ins _.RC:$src1, _.MemOp:$src2),
1974             !strconcat(OpcodeStr,
1975             "\t{$src2, $src1, ${dst}|${dst}, $src1, $src2}"),
1976             []>, EVEX_4V, EVEX_CD8<_.EltSize, CD8VF>,
1977             Sched<[sched.Folded, sched.ReadAfterFold]>;
1978  def rmk : AVX5128I<opc, MRMSrcMem, (outs _.RC:$dst),
1979             (ins _.KRCWM:$mask, _.RC:$src1, _.MemOp:$src2),
1980             !strconcat(OpcodeStr,
1981             "\t{$src2, $src1, ${dst} {${mask}}|${dst} {${mask}}, $src1, $src2}"),
1982             []>, EVEX_4V, EVEX_K, EVEX_CD8<_.EltSize, CD8VF>,
1983             Sched<[sched.Folded, sched.ReadAfterFold]>;
1984  def rmkz : AVX5128I<opc, MRMSrcMem, (outs _.RC:$dst),
1985             (ins _.KRCWM:$mask, _.RC:$src1, _.MemOp:$src2),
1986             !strconcat(OpcodeStr,
1987             "\t{$src2, $src1, ${dst} {${mask}} {z}|${dst} {${mask}} {z}, $src1, $src2}"),
1988             []>, EVEX_4V, EVEX_KZ, EVEX_CD8<_.EltSize, CD8VF>,
1989             Sched<[sched.Folded, sched.ReadAfterFold]>, NotMemoryFoldable;
1990  }
1991  }
1992}
1993multiclass WriteFVarBlendask_rmb<bits<8> opc, string OpcodeStr,
1994                                 X86FoldableSchedWrite sched, X86VectorVTInfo _> {
1995  let ExeDomain = _.ExeDomain, mayLoad = 1, hasSideEffects = 0 in {
1996  def rmbk : AVX5128I<opc, MRMSrcMem, (outs _.RC:$dst),
1997      (ins _.KRCWM:$mask, _.RC:$src1, _.ScalarMemOp:$src2),
1998       !strconcat(OpcodeStr,
1999            "\t{${src2}", _.BroadcastStr, ", $src1, $dst {${mask}}|",
2000            "$dst {${mask}}, $src1, ${src2}", _.BroadcastStr, "}"), []>,
2001      EVEX_4V, EVEX_K, EVEX_B, EVEX_CD8<_.EltSize, CD8VF>,
2002      Sched<[sched.Folded, sched.ReadAfterFold]>;
2003
2004  def rmbkz : AVX5128I<opc, MRMSrcMem, (outs _.RC:$dst),
2005      (ins _.KRCWM:$mask, _.RC:$src1, _.ScalarMemOp:$src2),
2006       !strconcat(OpcodeStr,
2007            "\t{${src2}", _.BroadcastStr, ", $src1, $dst {${mask}} {z}|",
2008            "$dst {${mask}} {z}, $src1, ${src2}", _.BroadcastStr, "}"), []>,
2009      EVEX_4V, EVEX_KZ, EVEX_B, EVEX_CD8<_.EltSize, CD8VF>,
2010      Sched<[sched.Folded, sched.ReadAfterFold]>, NotMemoryFoldable;
2011
2012  def rmb : AVX5128I<opc, MRMSrcMem, (outs _.RC:$dst),
2013      (ins _.RC:$src1, _.ScalarMemOp:$src2),
2014       !strconcat(OpcodeStr,
2015            "\t{${src2}", _.BroadcastStr, ", $src1, $dst|",
2016            "$dst, $src1, ${src2}", _.BroadcastStr, "}"), []>,
2017      EVEX_4V, EVEX_B, EVEX_CD8<_.EltSize, CD8VF>,
2018      Sched<[sched.Folded, sched.ReadAfterFold]>;
2019  }
2020}
2021
2022multiclass blendmask_dq<bits<8> opc, string OpcodeStr, X86SchedWriteWidths sched,
2023                        AVX512VLVectorVTInfo VTInfo> {
2024  defm Z : WriteFVarBlendask<opc, OpcodeStr, sched.ZMM, VTInfo.info512>,
2025           WriteFVarBlendask_rmb<opc, OpcodeStr, sched.ZMM, VTInfo.info512>,
2026                                 EVEX_V512;
2027
2028  let Predicates = [HasVLX] in {
2029    defm Z256 : WriteFVarBlendask<opc, OpcodeStr, sched.YMM, VTInfo.info256>,
2030                WriteFVarBlendask_rmb<opc, OpcodeStr, sched.YMM, VTInfo.info256>,
2031                                      EVEX_V256;
2032    defm Z128 : WriteFVarBlendask<opc, OpcodeStr, sched.XMM, VTInfo.info128>,
2033                WriteFVarBlendask_rmb<opc, OpcodeStr, sched.XMM, VTInfo.info128>,
2034                                      EVEX_V128;
2035  }
2036}
2037
2038multiclass blendmask_bw<bits<8> opc, string OpcodeStr, X86SchedWriteWidths sched,
2039                        AVX512VLVectorVTInfo VTInfo> {
2040  let Predicates = [HasBWI] in
2041    defm Z : WriteFVarBlendask<opc, OpcodeStr, sched.ZMM, VTInfo.info512>,
2042                               EVEX_V512;
2043
2044  let Predicates = [HasBWI, HasVLX] in {
2045    defm Z256 : WriteFVarBlendask<opc, OpcodeStr, sched.YMM, VTInfo.info256>,
2046                                  EVEX_V256;
2047    defm Z128 : WriteFVarBlendask<opc, OpcodeStr, sched.XMM, VTInfo.info128>,
2048                                  EVEX_V128;
2049  }
2050}
2051
2052defm VBLENDMPS : blendmask_dq<0x65, "vblendmps", SchedWriteFVarBlend,
2053                              avx512vl_f32_info>;
2054defm VBLENDMPD : blendmask_dq<0x65, "vblendmpd", SchedWriteFVarBlend,
2055                              avx512vl_f64_info>, VEX_W;
2056defm VPBLENDMD : blendmask_dq<0x64, "vpblendmd", SchedWriteVarBlend,
2057                              avx512vl_i32_info>;
2058defm VPBLENDMQ : blendmask_dq<0x64, "vpblendmq", SchedWriteVarBlend,
2059                              avx512vl_i64_info>, VEX_W;
2060defm VPBLENDMB : blendmask_bw<0x66, "vpblendmb", SchedWriteVarBlend,
2061                              avx512vl_i8_info>;
2062defm VPBLENDMW : blendmask_bw<0x66, "vpblendmw", SchedWriteVarBlend,
2063                              avx512vl_i16_info>, VEX_W;
2064
2065//===----------------------------------------------------------------------===//
2066// Compare Instructions
2067//===----------------------------------------------------------------------===//
2068
2069// avx512_cmp_scalar - AVX512 CMPSS and CMPSD
2070
2071multiclass avx512_cmp_scalar<X86VectorVTInfo _, SDNode OpNode, SDNode OpNodeSAE,
2072                             PatFrag OpNode_su, PatFrag OpNodeSAE_su,
2073                             X86FoldableSchedWrite sched> {
2074  defm  rr_Int  : AVX512_maskable_cmp<0xC2, MRMSrcReg, _,
2075                      (outs _.KRC:$dst),
2076                      (ins _.RC:$src1, _.RC:$src2, u8imm:$cc),
2077                      "vcmp"#_.Suffix,
2078                      "$cc, $src2, $src1", "$src1, $src2, $cc",
2079                      (OpNode (_.VT _.RC:$src1), (_.VT _.RC:$src2), timm:$cc),
2080                      (OpNode_su (_.VT _.RC:$src1), (_.VT _.RC:$src2),
2081                                 timm:$cc)>, EVEX_4V, VEX_LIG, Sched<[sched]>;
2082  let mayLoad = 1 in
2083  defm  rm_Int  : AVX512_maskable_cmp<0xC2, MRMSrcMem, _,
2084                    (outs _.KRC:$dst),
2085                    (ins _.RC:$src1, _.IntScalarMemOp:$src2, u8imm:$cc),
2086                    "vcmp"#_.Suffix,
2087                    "$cc, $src2, $src1", "$src1, $src2, $cc",
2088                    (OpNode (_.VT _.RC:$src1), _.ScalarIntMemCPat:$src2,
2089                        timm:$cc),
2090                    (OpNode_su (_.VT _.RC:$src1), _.ScalarIntMemCPat:$src2,
2091                        timm:$cc)>, EVEX_4V, VEX_LIG, EVEX_CD8<_.EltSize, CD8VT1>,
2092                    Sched<[sched.Folded, sched.ReadAfterFold]>;
2093
2094  defm  rrb_Int  : AVX512_maskable_cmp<0xC2, MRMSrcReg, _,
2095                     (outs _.KRC:$dst),
2096                     (ins _.RC:$src1, _.RC:$src2, u8imm:$cc),
2097                     "vcmp"#_.Suffix,
2098                     "$cc, {sae}, $src2, $src1","$src1, $src2, {sae}, $cc",
2099                     (OpNodeSAE (_.VT _.RC:$src1), (_.VT _.RC:$src2),
2100                                timm:$cc),
2101                     (OpNodeSAE_su (_.VT _.RC:$src1), (_.VT _.RC:$src2),
2102                                   timm:$cc)>,
2103                     EVEX_4V, VEX_LIG, EVEX_B, Sched<[sched]>;
2104
2105  let isCodeGenOnly = 1 in {
2106    let isCommutable = 1 in
2107    def rr : AVX512Ii8<0xC2, MRMSrcReg,
2108                (outs _.KRC:$dst), (ins _.FRC:$src1, _.FRC:$src2, u8imm:$cc),
2109                !strconcat("vcmp", _.Suffix,
2110                           "\t{$cc, $src2, $src1, $dst|$dst, $src1, $src2, $cc}"),
2111                [(set _.KRC:$dst, (OpNode _.FRC:$src1,
2112                                          _.FRC:$src2,
2113                                          timm:$cc))]>,
2114                EVEX_4V, VEX_LIG, Sched<[sched]>;
2115    def rm : AVX512Ii8<0xC2, MRMSrcMem,
2116              (outs _.KRC:$dst),
2117              (ins _.FRC:$src1, _.ScalarMemOp:$src2, u8imm:$cc),
2118              !strconcat("vcmp", _.Suffix,
2119                         "\t{$cc, $src2, $src1, $dst|$dst, $src1, $src2, $cc}"),
2120              [(set _.KRC:$dst, (OpNode _.FRC:$src1,
2121                                        (_.ScalarLdFrag addr:$src2),
2122                                        timm:$cc))]>,
2123              EVEX_4V, VEX_LIG, EVEX_CD8<_.EltSize, CD8VT1>,
2124              Sched<[sched.Folded, sched.ReadAfterFold]>;
2125  }
2126}
2127
2128def X86cmpms_su : PatFrag<(ops node:$src1, node:$src2, node:$cc),
2129                          (X86cmpms node:$src1, node:$src2, node:$cc), [{
2130  return N->hasOneUse();
2131}]>;
2132def X86cmpmsSAE_su : PatFrag<(ops node:$src1, node:$src2, node:$cc),
2133                          (X86cmpmsSAE node:$src1, node:$src2, node:$cc), [{
2134  return N->hasOneUse();
2135}]>;
2136
2137let Predicates = [HasAVX512] in {
2138  let ExeDomain = SSEPackedSingle in
2139  defm VCMPSSZ : avx512_cmp_scalar<f32x_info, X86cmpms, X86cmpmsSAE,
2140                                   X86cmpms_su, X86cmpmsSAE_su,
2141                                   SchedWriteFCmp.Scl>, AVX512XSIi8Base;
2142  let ExeDomain = SSEPackedDouble in
2143  defm VCMPSDZ : avx512_cmp_scalar<f64x_info, X86cmpms, X86cmpmsSAE,
2144                                   X86cmpms_su, X86cmpmsSAE_su,
2145                                   SchedWriteFCmp.Scl>, AVX512XDIi8Base, VEX_W;
2146}
2147
2148multiclass avx512_icmp_packed<bits<8> opc, string OpcodeStr,
2149                              X86FoldableSchedWrite sched,
2150                              X86VectorVTInfo _, bit IsCommutable> {
2151  let isCommutable = IsCommutable, hasSideEffects = 0 in
2152  def rr : AVX512BI<opc, MRMSrcReg,
2153             (outs _.KRC:$dst), (ins _.RC:$src1, _.RC:$src2),
2154             !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
2155             []>, EVEX_4V, Sched<[sched]>;
2156  let mayLoad = 1, hasSideEffects = 0 in
2157  def rm : AVX512BI<opc, MRMSrcMem,
2158             (outs _.KRC:$dst), (ins _.RC:$src1, _.MemOp:$src2),
2159             !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
2160             []>, EVEX_4V, Sched<[sched.Folded, sched.ReadAfterFold]>;
2161  let isCommutable = IsCommutable, hasSideEffects = 0 in
2162  def rrk : AVX512BI<opc, MRMSrcReg,
2163              (outs _.KRC:$dst), (ins _.KRCWM:$mask, _.RC:$src1, _.RC:$src2),
2164              !strconcat(OpcodeStr, "\t{$src2, $src1, $dst {${mask}}|",
2165                          "$dst {${mask}}, $src1, $src2}"),
2166              []>, EVEX_4V, EVEX_K, Sched<[sched]>;
2167  let mayLoad = 1, hasSideEffects = 0 in
2168  def rmk : AVX512BI<opc, MRMSrcMem,
2169              (outs _.KRC:$dst), (ins _.KRCWM:$mask, _.RC:$src1, _.MemOp:$src2),
2170              !strconcat(OpcodeStr, "\t{$src2, $src1, $dst {${mask}}|",
2171                          "$dst {${mask}}, $src1, $src2}"),
2172              []>, EVEX_4V, EVEX_K, Sched<[sched.Folded, sched.ReadAfterFold]>;
2173}
2174
2175multiclass avx512_icmp_packed_rmb<bits<8> opc, string OpcodeStr,
2176                                  X86FoldableSchedWrite sched, X86VectorVTInfo _,
2177                                  bit IsCommutable> :
2178           avx512_icmp_packed<opc, OpcodeStr, sched, _, IsCommutable> {
2179  let mayLoad = 1, hasSideEffects = 0 in {
2180  def rmb : AVX512BI<opc, MRMSrcMem,
2181              (outs _.KRC:$dst), (ins _.RC:$src1, _.ScalarMemOp:$src2),
2182              !strconcat(OpcodeStr, "\t{${src2}", _.BroadcastStr, ", $src1, $dst",
2183                                    "|$dst, $src1, ${src2}", _.BroadcastStr, "}"),
2184              []>, EVEX_4V, EVEX_B, Sched<[sched.Folded, sched.ReadAfterFold]>;
2185  def rmbk : AVX512BI<opc, MRMSrcMem,
2186               (outs _.KRC:$dst), (ins _.KRCWM:$mask, _.RC:$src1,
2187                                       _.ScalarMemOp:$src2),
2188               !strconcat(OpcodeStr,
2189                          "\t{${src2}", _.BroadcastStr, ", $src1, $dst {${mask}}|",
2190                          "$dst {${mask}}, $src1, ${src2}", _.BroadcastStr, "}"),
2191               []>, EVEX_4V, EVEX_K, EVEX_B,
2192               Sched<[sched.Folded, sched.ReadAfterFold]>;
2193  }
2194}
2195
2196multiclass avx512_icmp_packed_vl<bits<8> opc, string OpcodeStr,
2197                                 X86SchedWriteWidths sched,
2198                                 AVX512VLVectorVTInfo VTInfo, Predicate prd,
2199                                 bit IsCommutable = 0> {
2200  let Predicates = [prd] in
2201  defm Z : avx512_icmp_packed<opc, OpcodeStr, sched.ZMM,
2202                              VTInfo.info512, IsCommutable>, EVEX_V512;
2203
2204  let Predicates = [prd, HasVLX] in {
2205    defm Z256 : avx512_icmp_packed<opc, OpcodeStr, sched.YMM,
2206                                   VTInfo.info256, IsCommutable>, EVEX_V256;
2207    defm Z128 : avx512_icmp_packed<opc, OpcodeStr, sched.XMM,
2208                                   VTInfo.info128, IsCommutable>, EVEX_V128;
2209  }
2210}
2211
2212multiclass avx512_icmp_packed_rmb_vl<bits<8> opc, string OpcodeStr,
2213                                     X86SchedWriteWidths sched,
2214                                     AVX512VLVectorVTInfo VTInfo,
2215                                     Predicate prd, bit IsCommutable = 0> {
2216  let Predicates = [prd] in
2217  defm Z : avx512_icmp_packed_rmb<opc, OpcodeStr, sched.ZMM,
2218                                  VTInfo.info512, IsCommutable>, EVEX_V512;
2219
2220  let Predicates = [prd, HasVLX] in {
2221    defm Z256 : avx512_icmp_packed_rmb<opc, OpcodeStr, sched.YMM,
2222                                       VTInfo.info256, IsCommutable>, EVEX_V256;
2223    defm Z128 : avx512_icmp_packed_rmb<opc, OpcodeStr, sched.XMM,
2224                                       VTInfo.info128, IsCommutable>, EVEX_V128;
2225  }
2226}
2227
2228// This fragment treats X86cmpm as commutable to help match loads in both
2229// operands for PCMPEQ.
2230def X86setcc_commute : SDNode<"ISD::SETCC", SDTSetCC, [SDNPCommutative]>;
2231def X86pcmpgtm : PatFrag<(ops node:$src1, node:$src2),
2232                         (setcc node:$src1, node:$src2, SETGT)>;
2233
2234// AddedComplexity is needed because the explicit SETEQ/SETGT CondCode doesn't
2235// increase the pattern complexity the way an immediate would.
2236let AddedComplexity = 2 in {
2237// FIXME: Is there a better scheduler class for VPCMP?
2238defm VPCMPEQB : avx512_icmp_packed_vl<0x74, "vpcmpeqb",
2239                      SchedWriteVecALU, avx512vl_i8_info, HasBWI, 1>,
2240                EVEX_CD8<8, CD8VF>, VEX_WIG;
2241
2242defm VPCMPEQW : avx512_icmp_packed_vl<0x75, "vpcmpeqw",
2243                      SchedWriteVecALU, avx512vl_i16_info, HasBWI, 1>,
2244                EVEX_CD8<16, CD8VF>, VEX_WIG;
2245
2246defm VPCMPEQD : avx512_icmp_packed_rmb_vl<0x76, "vpcmpeqd",
2247                      SchedWriteVecALU, avx512vl_i32_info, HasAVX512, 1>,
2248                EVEX_CD8<32, CD8VF>;
2249
2250defm VPCMPEQQ : avx512_icmp_packed_rmb_vl<0x29, "vpcmpeqq",
2251                      SchedWriteVecALU, avx512vl_i64_info, HasAVX512, 1>,
2252                T8PD, VEX_W, EVEX_CD8<64, CD8VF>;
2253
2254defm VPCMPGTB : avx512_icmp_packed_vl<0x64, "vpcmpgtb",
2255                      SchedWriteVecALU, avx512vl_i8_info, HasBWI>,
2256                EVEX_CD8<8, CD8VF>, VEX_WIG;
2257
2258defm VPCMPGTW : avx512_icmp_packed_vl<0x65, "vpcmpgtw",
2259                      SchedWriteVecALU, avx512vl_i16_info, HasBWI>,
2260                EVEX_CD8<16, CD8VF>, VEX_WIG;
2261
2262defm VPCMPGTD : avx512_icmp_packed_rmb_vl<0x66, "vpcmpgtd",
2263                      SchedWriteVecALU, avx512vl_i32_info, HasAVX512>,
2264                EVEX_CD8<32, CD8VF>;
2265
2266defm VPCMPGTQ : avx512_icmp_packed_rmb_vl<0x37, "vpcmpgtq",
2267                      SchedWriteVecALU, avx512vl_i64_info, HasAVX512>,
2268                T8PD, VEX_W, EVEX_CD8<64, CD8VF>;
2269}
2270
2271multiclass avx512_icmp_cc<bits<8> opc, string Suffix, PatFrag Frag,
2272                          PatFrag Frag_su, PatFrag CommFrag, PatFrag CommFrag_su,
2273                          X86FoldableSchedWrite sched,
2274                          X86VectorVTInfo _, string Name> {
2275  let isCommutable = 1 in
2276  def rri : AVX512AIi8<opc, MRMSrcReg,
2277             (outs _.KRC:$dst), (ins _.RC:$src1, _.RC:$src2, u8imm:$cc),
2278             !strconcat("vpcmp", Suffix,
2279                        "\t{$cc, $src2, $src1, $dst|$dst, $src1, $src2, $cc}"),
2280             [(set _.KRC:$dst, (_.KVT (Frag:$cc (_.VT _.RC:$src1),
2281                                                (_.VT _.RC:$src2),
2282                                                cond)))]>,
2283             EVEX_4V, Sched<[sched]>;
2284  def rmi : AVX512AIi8<opc, MRMSrcMem,
2285             (outs _.KRC:$dst), (ins _.RC:$src1, _.MemOp:$src2, u8imm:$cc),
2286             !strconcat("vpcmp", Suffix,
2287                        "\t{$cc, $src2, $src1, $dst|$dst, $src1, $src2, $cc}"),
2288             [(set _.KRC:$dst, (_.KVT
2289                                (Frag:$cc
2290                                 (_.VT _.RC:$src1),
2291                                 (_.VT (_.LdFrag addr:$src2)),
2292                                 cond)))]>,
2293             EVEX_4V, Sched<[sched.Folded, sched.ReadAfterFold]>;
2294  let isCommutable = 1 in
2295  def rrik : AVX512AIi8<opc, MRMSrcReg,
2296              (outs _.KRC:$dst), (ins _.KRCWM:$mask, _.RC:$src1, _.RC:$src2,
2297                                      u8imm:$cc),
2298              !strconcat("vpcmp", Suffix,
2299                         "\t{$cc, $src2, $src1, $dst {${mask}}|",
2300                         "$dst {${mask}}, $src1, $src2, $cc}"),
2301              [(set _.KRC:$dst, (and _.KRCWM:$mask,
2302                                     (_.KVT (Frag_su:$cc (_.VT _.RC:$src1),
2303                                                         (_.VT _.RC:$src2),
2304                                                         cond))))]>,
2305              EVEX_4V, EVEX_K, Sched<[sched]>;
2306  def rmik : AVX512AIi8<opc, MRMSrcMem,
2307              (outs _.KRC:$dst), (ins _.KRCWM:$mask, _.RC:$src1, _.MemOp:$src2,
2308                                    u8imm:$cc),
2309              !strconcat("vpcmp", Suffix,
2310                         "\t{$cc, $src2, $src1, $dst {${mask}}|",
2311                         "$dst {${mask}}, $src1, $src2, $cc}"),
2312              [(set _.KRC:$dst, (and _.KRCWM:$mask,
2313                                     (_.KVT
2314                                      (Frag_su:$cc
2315                                       (_.VT _.RC:$src1),
2316                                       (_.VT (_.LdFrag addr:$src2)),
2317                                       cond))))]>,
2318              EVEX_4V, EVEX_K, Sched<[sched.Folded, sched.ReadAfterFold]>;
2319
2320  def : Pat<(_.KVT (CommFrag:$cc (_.LdFrag addr:$src2),
2321                                 (_.VT _.RC:$src1), cond)),
2322            (!cast<Instruction>(Name#_.ZSuffix#"rmi")
2323             _.RC:$src1, addr:$src2, (CommFrag.OperandTransform $cc))>;
2324
2325  def : Pat<(and _.KRCWM:$mask,
2326                 (_.KVT (CommFrag_su:$cc (_.LdFrag addr:$src2),
2327                                      (_.VT _.RC:$src1), cond))),
2328            (!cast<Instruction>(Name#_.ZSuffix#"rmik")
2329             _.KRCWM:$mask, _.RC:$src1, addr:$src2,
2330             (CommFrag.OperandTransform $cc))>;
2331}
2332
2333multiclass avx512_icmp_cc_rmb<bits<8> opc, string Suffix, PatFrag Frag,
2334                              PatFrag Frag_su, PatFrag CommFrag,
2335                              PatFrag CommFrag_su, X86FoldableSchedWrite sched,
2336                              X86VectorVTInfo _, string Name> :
2337           avx512_icmp_cc<opc, Suffix, Frag, Frag_su, CommFrag, CommFrag_su,
2338                          sched, _, Name> {
2339  def rmib : AVX512AIi8<opc, MRMSrcMem,
2340             (outs _.KRC:$dst), (ins _.RC:$src1, _.ScalarMemOp:$src2,
2341                                     u8imm:$cc),
2342             !strconcat("vpcmp", Suffix,
2343                        "\t{$cc, ${src2}", _.BroadcastStr, ", $src1, $dst|",
2344                        "$dst, $src1, ${src2}", _.BroadcastStr, ", $cc}"),
2345             [(set _.KRC:$dst, (_.KVT (Frag:$cc
2346                                       (_.VT _.RC:$src1),
2347                                       (_.BroadcastLdFrag addr:$src2),
2348                                       cond)))]>,
2349             EVEX_4V, EVEX_B, Sched<[sched.Folded, sched.ReadAfterFold]>;
2350  def rmibk : AVX512AIi8<opc, MRMSrcMem,
2351              (outs _.KRC:$dst), (ins _.KRCWM:$mask, _.RC:$src1,
2352                                       _.ScalarMemOp:$src2, u8imm:$cc),
2353              !strconcat("vpcmp", Suffix,
2354                  "\t{$cc, ${src2}", _.BroadcastStr, ", $src1, $dst {${mask}}|",
2355                  "$dst {${mask}}, $src1, ${src2}", _.BroadcastStr, ", $cc}"),
2356              [(set _.KRC:$dst, (and _.KRCWM:$mask,
2357                                     (_.KVT (Frag_su:$cc
2358                                             (_.VT _.RC:$src1),
2359                                             (_.BroadcastLdFrag addr:$src2),
2360                                             cond))))]>,
2361              EVEX_4V, EVEX_K, EVEX_B, Sched<[sched.Folded, sched.ReadAfterFold]>;
2362
2363  def : Pat<(_.KVT (CommFrag:$cc (_.BroadcastLdFrag addr:$src2),
2364                    (_.VT _.RC:$src1), cond)),
2365            (!cast<Instruction>(Name#_.ZSuffix#"rmib")
2366             _.RC:$src1, addr:$src2, (CommFrag.OperandTransform $cc))>;
2367
2368  def : Pat<(and _.KRCWM:$mask,
2369                 (_.KVT (CommFrag_su:$cc (_.BroadcastLdFrag addr:$src2),
2370                                      (_.VT _.RC:$src1), cond))),
2371            (!cast<Instruction>(Name#_.ZSuffix#"rmibk")
2372             _.KRCWM:$mask, _.RC:$src1, addr:$src2,
2373             (CommFrag_su.OperandTransform $cc))>;
2374}
2375
2376multiclass avx512_icmp_cc_vl<bits<8> opc, string Suffix, PatFrag Frag,
2377                             PatFrag Frag_su, PatFrag CommFrag,
2378                             PatFrag CommFrag_su, X86SchedWriteWidths sched,
2379                             AVX512VLVectorVTInfo VTInfo, Predicate prd> {
2380  let Predicates = [prd] in
2381  defm Z : avx512_icmp_cc<opc, Suffix, Frag, Frag_su, CommFrag, CommFrag_su,
2382                          sched.ZMM, VTInfo.info512, NAME>, EVEX_V512;
2383
2384  let Predicates = [prd, HasVLX] in {
2385    defm Z256 : avx512_icmp_cc<opc, Suffix, Frag, Frag_su, CommFrag, CommFrag_su,
2386                               sched.YMM, VTInfo.info256, NAME>, EVEX_V256;
2387    defm Z128 : avx512_icmp_cc<opc, Suffix, Frag, Frag_su, CommFrag, CommFrag_su,
2388                               sched.XMM, VTInfo.info128, NAME>, EVEX_V128;
2389  }
2390}
2391
2392multiclass avx512_icmp_cc_rmb_vl<bits<8> opc, string Suffix, PatFrag Frag,
2393                                 PatFrag Frag_su, PatFrag CommFrag,
2394                                 PatFrag CommFrag_su, X86SchedWriteWidths sched,
2395                                 AVX512VLVectorVTInfo VTInfo, Predicate prd> {
2396  let Predicates = [prd] in
2397  defm Z : avx512_icmp_cc_rmb<opc, Suffix, Frag, Frag_su, CommFrag, CommFrag_su,
2398                              sched.ZMM, VTInfo.info512, NAME>, EVEX_V512;
2399
2400  let Predicates = [prd, HasVLX] in {
2401    defm Z256 : avx512_icmp_cc_rmb<opc, Suffix, Frag, Frag_su, CommFrag, CommFrag_su,
2402                                   sched.YMM, VTInfo.info256, NAME>, EVEX_V256;
2403    defm Z128 : avx512_icmp_cc_rmb<opc, Suffix, Frag, Frag_su, CommFrag, CommFrag_su,
2404                                   sched.XMM, VTInfo.info128, NAME>, EVEX_V128;
2405  }
2406}
2407
2408def X86pcmpm_imm : SDNodeXForm<setcc, [{
2409  ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(2))->get();
2410  uint8_t SSECC = X86::getVPCMPImmForCond(CC);
2411  return getI8Imm(SSECC, SDLoc(N));
2412}]>;
2413
2414// Swapped operand version of the above.
2415def X86pcmpm_imm_commute : SDNodeXForm<setcc, [{
2416  ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(2))->get();
2417  uint8_t SSECC = X86::getVPCMPImmForCond(CC);
2418  SSECC = X86::getSwappedVPCMPImm(SSECC);
2419  return getI8Imm(SSECC, SDLoc(N));
2420}]>;
2421
2422def X86pcmpm : PatFrag<(ops node:$src1, node:$src2, node:$cc),
2423                       (setcc node:$src1, node:$src2, node:$cc), [{
2424  ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(2))->get();
2425  return !ISD::isUnsignedIntSetCC(CC);
2426}], X86pcmpm_imm>;
2427
2428def X86pcmpm_su : PatFrag<(ops node:$src1, node:$src2, node:$cc),
2429                          (setcc node:$src1, node:$src2, node:$cc), [{
2430  ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(2))->get();
2431  return N->hasOneUse() && !ISD::isUnsignedIntSetCC(CC);
2432}], X86pcmpm_imm>;
2433
2434// Same as above, but commutes immediate. Use for load folding.
2435def X86pcmpm_commute : PatFrag<(ops node:$src1, node:$src2, node:$cc),
2436                               (setcc node:$src1, node:$src2, node:$cc), [{
2437  ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(2))->get();
2438  return !ISD::isUnsignedIntSetCC(CC);
2439}], X86pcmpm_imm_commute>;
2440
2441def X86pcmpm_commute_su : PatFrag<(ops node:$src1, node:$src2, node:$cc),
2442                                  (setcc node:$src1, node:$src2, node:$cc), [{
2443  ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(2))->get();
2444  return N->hasOneUse() && !ISD::isUnsignedIntSetCC(CC);
2445}], X86pcmpm_imm_commute>;
2446
2447def X86pcmpum : PatFrag<(ops node:$src1, node:$src2, node:$cc),
2448                        (setcc node:$src1, node:$src2, node:$cc), [{
2449  ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(2))->get();
2450  return ISD::isUnsignedIntSetCC(CC);
2451}], X86pcmpm_imm>;
2452
2453def X86pcmpum_su : PatFrag<(ops node:$src1, node:$src2, node:$cc),
2454                           (setcc node:$src1, node:$src2, node:$cc), [{
2455  ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(2))->get();
2456  return N->hasOneUse() && ISD::isUnsignedIntSetCC(CC);
2457}], X86pcmpm_imm>;
2458
2459// Same as above, but commutes immediate. Use for load folding.
2460def X86pcmpum_commute : PatFrag<(ops node:$src1, node:$src2, node:$cc),
2461                                (setcc node:$src1, node:$src2, node:$cc), [{
2462  ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(2))->get();
2463  return ISD::isUnsignedIntSetCC(CC);
2464}], X86pcmpm_imm_commute>;
2465
2466def X86pcmpum_commute_su : PatFrag<(ops node:$src1, node:$src2, node:$cc),
2467                                   (setcc node:$src1, node:$src2, node:$cc), [{
2468  ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(2))->get();
2469  return N->hasOneUse() && ISD::isUnsignedIntSetCC(CC);
2470}], X86pcmpm_imm_commute>;
2471
2472// FIXME: Is there a better scheduler class for VPCMP/VPCMPU?
2473defm VPCMPB : avx512_icmp_cc_vl<0x3F, "b", X86pcmpm, X86pcmpm_su,
2474                                X86pcmpm_commute, X86pcmpm_commute_su,
2475                                SchedWriteVecALU, avx512vl_i8_info, HasBWI>,
2476                                EVEX_CD8<8, CD8VF>;
2477defm VPCMPUB : avx512_icmp_cc_vl<0x3E, "ub", X86pcmpum, X86pcmpum_su,
2478                                 X86pcmpum_commute, X86pcmpum_commute_su,
2479                                 SchedWriteVecALU, avx512vl_i8_info, HasBWI>,
2480                                 EVEX_CD8<8, CD8VF>;
2481
2482defm VPCMPW : avx512_icmp_cc_vl<0x3F, "w", X86pcmpm, X86pcmpm_su,
2483                                X86pcmpm_commute, X86pcmpm_commute_su,
2484                                SchedWriteVecALU, avx512vl_i16_info, HasBWI>,
2485                                VEX_W, EVEX_CD8<16, CD8VF>;
2486defm VPCMPUW : avx512_icmp_cc_vl<0x3E, "uw", X86pcmpum, X86pcmpum_su,
2487                                 X86pcmpum_commute, X86pcmpum_commute_su,
2488                                 SchedWriteVecALU, avx512vl_i16_info, HasBWI>,
2489                                 VEX_W, EVEX_CD8<16, CD8VF>;
2490
2491defm VPCMPD : avx512_icmp_cc_rmb_vl<0x1F, "d", X86pcmpm, X86pcmpm_su,
2492                                    X86pcmpm_commute, X86pcmpm_commute_su,
2493                                    SchedWriteVecALU, avx512vl_i32_info,
2494                                    HasAVX512>, EVEX_CD8<32, CD8VF>;
2495defm VPCMPUD : avx512_icmp_cc_rmb_vl<0x1E, "ud", X86pcmpum, X86pcmpum_su,
2496                                     X86pcmpum_commute, X86pcmpum_commute_su,
2497                                     SchedWriteVecALU, avx512vl_i32_info,
2498                                     HasAVX512>, EVEX_CD8<32, CD8VF>;
2499
2500defm VPCMPQ : avx512_icmp_cc_rmb_vl<0x1F, "q", X86pcmpm, X86pcmpm_su,
2501                                    X86pcmpm_commute, X86pcmpm_commute_su,
2502                                    SchedWriteVecALU, avx512vl_i64_info,
2503                                    HasAVX512>, VEX_W, EVEX_CD8<64, CD8VF>;
2504defm VPCMPUQ : avx512_icmp_cc_rmb_vl<0x1E, "uq", X86pcmpum, X86pcmpum_su,
2505                                     X86pcmpum_commute, X86pcmpum_commute_su,
2506                                     SchedWriteVecALU, avx512vl_i64_info,
2507                                     HasAVX512>, VEX_W, EVEX_CD8<64, CD8VF>;
2508
2509def X86cmpm_su : PatFrag<(ops node:$src1, node:$src2, node:$cc),
2510                         (X86cmpm node:$src1, node:$src2, node:$cc), [{
2511  return N->hasOneUse();
2512}]>;
2513def X86cmpmSAE_su : PatFrag<(ops node:$src1, node:$src2, node:$cc),
2514                            (X86cmpmSAE node:$src1, node:$src2, node:$cc), [{
2515  return N->hasOneUse();
2516}]>;
2517
2518def X86cmpm_imm_commute : SDNodeXForm<timm, [{
2519  uint8_t Imm = X86::getSwappedVCMPImm(N->getZExtValue() & 0x1f);
2520  return getI8Imm(Imm, SDLoc(N));
2521}]>;
2522
2523multiclass avx512_vcmp_common<X86FoldableSchedWrite sched, X86VectorVTInfo _,
2524                              string Name> {
2525  defm  rri  : AVX512_maskable_cmp<0xC2, MRMSrcReg, _,
2526                   (outs _.KRC:$dst), (ins _.RC:$src1, _.RC:$src2,u8imm:$cc),
2527                   "vcmp"#_.Suffix,
2528                   "$cc, $src2, $src1", "$src1, $src2, $cc",
2529                   (X86cmpm (_.VT _.RC:$src1), (_.VT _.RC:$src2), timm:$cc),
2530                   (X86cmpm_su (_.VT _.RC:$src1), (_.VT _.RC:$src2), timm:$cc),
2531                   1>, Sched<[sched]>;
2532
2533  defm  rmi  : AVX512_maskable_cmp<0xC2, MRMSrcMem, _,
2534                (outs _.KRC:$dst),(ins _.RC:$src1, _.MemOp:$src2, u8imm:$cc),
2535                "vcmp"#_.Suffix,
2536                "$cc, $src2, $src1", "$src1, $src2, $cc",
2537                (X86cmpm (_.VT _.RC:$src1), (_.VT (_.LdFrag addr:$src2)),
2538                         timm:$cc),
2539                (X86cmpm_su (_.VT _.RC:$src1), (_.VT (_.LdFrag addr:$src2)),
2540                            timm:$cc)>,
2541                Sched<[sched.Folded, sched.ReadAfterFold]>;
2542
2543  defm  rmbi : AVX512_maskable_cmp<0xC2, MRMSrcMem, _,
2544                (outs _.KRC:$dst),
2545                (ins _.RC:$src1, _.ScalarMemOp:$src2, u8imm:$cc),
2546                "vcmp"#_.Suffix,
2547                "$cc, ${src2}"#_.BroadcastStr#", $src1",
2548                "$src1, ${src2}"#_.BroadcastStr#", $cc",
2549                (X86cmpm (_.VT _.RC:$src1),
2550                        (_.VT (_.BroadcastLdFrag addr:$src2)),
2551                        timm:$cc),
2552                (X86cmpm_su (_.VT _.RC:$src1),
2553                            (_.VT (_.BroadcastLdFrag addr:$src2)),
2554                            timm:$cc)>,
2555                EVEX_B, Sched<[sched.Folded, sched.ReadAfterFold]>;
2556
2557  // Patterns for selecting with loads in other operand.
2558  def : Pat<(X86cmpm (_.LdFrag addr:$src2), (_.VT _.RC:$src1),
2559                     timm:$cc),
2560            (!cast<Instruction>(Name#_.ZSuffix#"rmi") _.RC:$src1, addr:$src2,
2561                                                      (X86cmpm_imm_commute timm:$cc))>;
2562
2563  def : Pat<(and _.KRCWM:$mask, (X86cmpm_su (_.LdFrag addr:$src2),
2564                                            (_.VT _.RC:$src1),
2565                                            timm:$cc)),
2566            (!cast<Instruction>(Name#_.ZSuffix#"rmik") _.KRCWM:$mask,
2567                                                       _.RC:$src1, addr:$src2,
2568                                                       (X86cmpm_imm_commute timm:$cc))>;
2569
2570  def : Pat<(X86cmpm (_.BroadcastLdFrag addr:$src2),
2571                     (_.VT _.RC:$src1), timm:$cc),
2572            (!cast<Instruction>(Name#_.ZSuffix#"rmbi") _.RC:$src1, addr:$src2,
2573                                                       (X86cmpm_imm_commute timm:$cc))>;
2574
2575  def : Pat<(and _.KRCWM:$mask, (X86cmpm_su (_.BroadcastLdFrag addr:$src2),
2576                                            (_.VT _.RC:$src1),
2577                                            timm:$cc)),
2578            (!cast<Instruction>(Name#_.ZSuffix#"rmbik") _.KRCWM:$mask,
2579                                                        _.RC:$src1, addr:$src2,
2580                                                        (X86cmpm_imm_commute timm:$cc))>;
2581}
2582
2583multiclass avx512_vcmp_sae<X86FoldableSchedWrite sched, X86VectorVTInfo _> {
2584  // comparison code form (VCMP[EQ/LT/LE/...]
2585  defm  rrib  : AVX512_maskable_cmp<0xC2, MRMSrcReg, _,
2586                     (outs _.KRC:$dst),(ins _.RC:$src1, _.RC:$src2, u8imm:$cc),
2587                     "vcmp"#_.Suffix,
2588                     "$cc, {sae}, $src2, $src1",
2589                     "$src1, $src2, {sae}, $cc",
2590                     (X86cmpmSAE (_.VT _.RC:$src1), (_.VT _.RC:$src2), timm:$cc),
2591                     (X86cmpmSAE_su (_.VT _.RC:$src1), (_.VT _.RC:$src2),
2592                                    timm:$cc)>,
2593                     EVEX_B, Sched<[sched]>;
2594}
2595
2596multiclass avx512_vcmp<X86SchedWriteWidths sched, AVX512VLVectorVTInfo _> {
2597  let Predicates = [HasAVX512] in {
2598    defm Z    : avx512_vcmp_common<sched.ZMM, _.info512, NAME>,
2599                avx512_vcmp_sae<sched.ZMM, _.info512>, EVEX_V512;
2600
2601  }
2602  let Predicates = [HasAVX512,HasVLX] in {
2603   defm Z128 : avx512_vcmp_common<sched.XMM, _.info128, NAME>, EVEX_V128;
2604   defm Z256 : avx512_vcmp_common<sched.YMM, _.info256, NAME>, EVEX_V256;
2605  }
2606}
2607
2608defm VCMPPD : avx512_vcmp<SchedWriteFCmp, avx512vl_f64_info>,
2609                          AVX512PDIi8Base, EVEX_4V, EVEX_CD8<64, CD8VF>, VEX_W;
2610defm VCMPPS : avx512_vcmp<SchedWriteFCmp, avx512vl_f32_info>,
2611                          AVX512PSIi8Base, EVEX_4V, EVEX_CD8<32, CD8VF>;
2612
2613// Patterns to select fp compares with load as first operand.
2614let Predicates = [HasAVX512] in {
2615  def : Pat<(v1i1 (X86cmpms (loadf64 addr:$src2), FR64X:$src1,
2616                            timm:$cc)),
2617            (VCMPSDZrm FR64X:$src1, addr:$src2, (X86cmpm_imm_commute timm:$cc))>;
2618
2619  def : Pat<(v1i1 (X86cmpms (loadf32 addr:$src2), FR32X:$src1,
2620                            timm:$cc)),
2621            (VCMPSSZrm FR32X:$src1, addr:$src2, (X86cmpm_imm_commute timm:$cc))>;
2622}
2623
2624// ----------------------------------------------------------------
2625// FPClass
2626
2627def X86Vfpclasss_su : PatFrag<(ops node:$src1, node:$src2),
2628                              (X86Vfpclasss node:$src1, node:$src2), [{
2629  return N->hasOneUse();
2630}]>;
2631
2632def X86Vfpclass_su : PatFrag<(ops node:$src1, node:$src2),
2633                             (X86Vfpclass node:$src1, node:$src2), [{
2634  return N->hasOneUse();
2635}]>;
2636
2637//handle fpclass instruction  mask =  op(reg_scalar,imm)
2638//                                    op(mem_scalar,imm)
2639multiclass avx512_scalar_fpclass<bits<8> opc, string OpcodeStr,
2640                                 X86FoldableSchedWrite sched, X86VectorVTInfo _,
2641                                 Predicate prd> {
2642  let Predicates = [prd], ExeDomain = _.ExeDomain in {
2643      def rr : AVX512<opc, MRMSrcReg, (outs _.KRC:$dst),
2644                      (ins _.RC:$src1, i32u8imm:$src2),
2645                      OpcodeStr##_.Suffix#"\t{$src2, $src1, $dst|$dst, $src1, $src2}",
2646                      [(set _.KRC:$dst,(X86Vfpclasss (_.VT _.RC:$src1),
2647                              (i32 timm:$src2)))]>,
2648                      Sched<[sched]>;
2649      def rrk : AVX512<opc, MRMSrcReg, (outs _.KRC:$dst),
2650                      (ins _.KRCWM:$mask, _.RC:$src1, i32u8imm:$src2),
2651                      OpcodeStr##_.Suffix#
2652                      "\t{$src2, $src1, $dst {${mask}}|$dst {${mask}}, $src1, $src2}",
2653                      [(set _.KRC:$dst,(and _.KRCWM:$mask,
2654                                      (X86Vfpclasss_su (_.VT _.RC:$src1),
2655                                      (i32 timm:$src2))))]>,
2656                      EVEX_K, Sched<[sched]>;
2657    def rm : AVX512<opc, MRMSrcMem, (outs _.KRC:$dst),
2658                    (ins _.IntScalarMemOp:$src1, i32u8imm:$src2),
2659                    OpcodeStr##_.Suffix##
2660                              "\t{$src2, $src1, $dst|$dst, $src1, $src2}",
2661                    [(set _.KRC:$dst,
2662                          (X86Vfpclasss _.ScalarIntMemCPat:$src1,
2663                                       (i32 timm:$src2)))]>,
2664                    Sched<[sched.Folded, sched.ReadAfterFold]>;
2665    def rmk : AVX512<opc, MRMSrcMem, (outs _.KRC:$dst),
2666                    (ins _.KRCWM:$mask, _.IntScalarMemOp:$src1, i32u8imm:$src2),
2667                    OpcodeStr##_.Suffix##
2668                    "\t{$src2, $src1, $dst {${mask}}|$dst {${mask}}, $src1, $src2}",
2669                    [(set _.KRC:$dst,(and _.KRCWM:$mask,
2670                        (X86Vfpclasss_su _.ScalarIntMemCPat:$src1,
2671                            (i32 timm:$src2))))]>,
2672                    EVEX_K, Sched<[sched.Folded, sched.ReadAfterFold]>;
2673  }
2674}
2675
2676//handle fpclass instruction mask = fpclass(reg_vec, reg_vec, imm)
2677//                                  fpclass(reg_vec, mem_vec, imm)
2678//                                  fpclass(reg_vec, broadcast(eltVt), imm)
2679multiclass avx512_vector_fpclass<bits<8> opc, string OpcodeStr,
2680                                 X86FoldableSchedWrite sched, X86VectorVTInfo _,
2681                                 string mem>{
2682  let ExeDomain = _.ExeDomain in {
2683  def rr : AVX512<opc, MRMSrcReg, (outs _.KRC:$dst),
2684                      (ins _.RC:$src1, i32u8imm:$src2),
2685                      OpcodeStr##_.Suffix#"\t{$src2, $src1, $dst|$dst, $src1, $src2}",
2686                      [(set _.KRC:$dst,(X86Vfpclass (_.VT _.RC:$src1),
2687                                       (i32 timm:$src2)))]>,
2688                      Sched<[sched]>;
2689  def rrk : AVX512<opc, MRMSrcReg, (outs _.KRC:$dst),
2690                      (ins _.KRCWM:$mask, _.RC:$src1, i32u8imm:$src2),
2691                      OpcodeStr##_.Suffix#
2692                      "\t{$src2, $src1, $dst {${mask}}|$dst {${mask}}, $src1, $src2}",
2693                      [(set _.KRC:$dst,(and _.KRCWM:$mask,
2694                                       (X86Vfpclass_su (_.VT _.RC:$src1),
2695                                       (i32 timm:$src2))))]>,
2696                      EVEX_K, Sched<[sched]>;
2697  def rm : AVX512<opc, MRMSrcMem, (outs _.KRC:$dst),
2698                    (ins _.MemOp:$src1, i32u8imm:$src2),
2699                    OpcodeStr##_.Suffix#"{"#mem#"}"#
2700                    "\t{$src2, $src1, $dst|$dst, $src1, $src2}",
2701                    [(set _.KRC:$dst,(X86Vfpclass
2702                                     (_.VT (_.LdFrag addr:$src1)),
2703                                     (i32 timm:$src2)))]>,
2704                    Sched<[sched.Folded, sched.ReadAfterFold]>;
2705  def rmk : AVX512<opc, MRMSrcMem, (outs _.KRC:$dst),
2706                    (ins _.KRCWM:$mask, _.MemOp:$src1, i32u8imm:$src2),
2707                    OpcodeStr##_.Suffix#"{"#mem#"}"#
2708                    "\t{$src2, $src1, $dst {${mask}}|$dst {${mask}}, $src1, $src2}",
2709                    [(set _.KRC:$dst, (and _.KRCWM:$mask, (X86Vfpclass_su
2710                                  (_.VT (_.LdFrag addr:$src1)),
2711                                  (i32 timm:$src2))))]>,
2712                    EVEX_K, Sched<[sched.Folded, sched.ReadAfterFold]>;
2713  def rmb : AVX512<opc, MRMSrcMem, (outs _.KRC:$dst),
2714                    (ins _.ScalarMemOp:$src1, i32u8imm:$src2),
2715                    OpcodeStr##_.Suffix##"\t{$src2, ${src1}"##
2716                                      _.BroadcastStr##", $dst|$dst, ${src1}"
2717                                                  ##_.BroadcastStr##", $src2}",
2718                    [(set _.KRC:$dst,(X86Vfpclass
2719                                     (_.VT (_.BroadcastLdFrag addr:$src1)),
2720                                     (i32 timm:$src2)))]>,
2721                    EVEX_B, Sched<[sched.Folded, sched.ReadAfterFold]>;
2722  def rmbk : AVX512<opc, MRMSrcMem, (outs _.KRC:$dst),
2723                    (ins _.KRCWM:$mask, _.ScalarMemOp:$src1, i32u8imm:$src2),
2724                    OpcodeStr##_.Suffix##"\t{$src2, ${src1}"##
2725                          _.BroadcastStr##", $dst {${mask}}|$dst {${mask}}, ${src1}"##
2726                                                   _.BroadcastStr##", $src2}",
2727                    [(set _.KRC:$dst,(and _.KRCWM:$mask, (X86Vfpclass_su
2728                                     (_.VT (_.BroadcastLdFrag addr:$src1)),
2729                                     (i32 timm:$src2))))]>,
2730                    EVEX_B, EVEX_K,  Sched<[sched.Folded, sched.ReadAfterFold]>;
2731  }
2732
2733  // Allow registers or broadcast with the x, y, z suffix we use to disambiguate
2734  // the memory form.
2735  def : InstAlias<OpcodeStr#_.Suffix#mem#
2736                  "\t{$src2, $src1, $dst|$dst, $src1, $src2}",
2737                  (!cast<Instruction>(NAME#"rr")
2738                   _.KRC:$dst, _.RC:$src1, i32u8imm:$src2), 0, "att">;
2739  def : InstAlias<OpcodeStr#_.Suffix#mem#
2740                  "\t{$src2, $src1, $dst {${mask}}|$dst {${mask}}, $src1, $src2}",
2741                  (!cast<Instruction>(NAME#"rrk")
2742                   _.KRC:$dst, _.KRCWM:$mask, _.RC:$src1, i32u8imm:$src2), 0, "att">;
2743  def : InstAlias<OpcodeStr#_.Suffix#mem#
2744                  "\t{$src2, ${src1}"#_.BroadcastStr#", $dst|$dst, ${src1}"#
2745                  _.BroadcastStr#", $src2}",
2746                  (!cast<Instruction>(NAME#"rmb")
2747                   _.KRC:$dst, _.ScalarMemOp:$src1, i32u8imm:$src2), 0, "att">;
2748  def : InstAlias<OpcodeStr#_.Suffix#mem#
2749                  "\t{$src2, ${src1}"#_.BroadcastStr#", $dst {${mask}}|"
2750                  "$dst {${mask}}, ${src1}"#_.BroadcastStr#", $src2}",
2751                  (!cast<Instruction>(NAME#"rmbk")
2752                   _.KRC:$dst, _.KRCWM:$mask, _.ScalarMemOp:$src1, i32u8imm:$src2), 0, "att">;
2753}
2754
2755multiclass avx512_vector_fpclass_all<string OpcodeStr, AVX512VLVectorVTInfo _,
2756                                     bits<8> opc, X86SchedWriteWidths sched,
2757                                     Predicate prd>{
2758  let Predicates = [prd] in {
2759    defm Z    : avx512_vector_fpclass<opc, OpcodeStr, sched.ZMM,
2760                                      _.info512, "z">, EVEX_V512;
2761  }
2762  let Predicates = [prd, HasVLX] in {
2763    defm Z128 : avx512_vector_fpclass<opc, OpcodeStr, sched.XMM,
2764                                      _.info128, "x">, EVEX_V128;
2765    defm Z256 : avx512_vector_fpclass<opc, OpcodeStr, sched.YMM,
2766                                      _.info256, "y">, EVEX_V256;
2767  }
2768}
2769
2770multiclass avx512_fp_fpclass_all<string OpcodeStr, bits<8> opcVec,
2771                                 bits<8> opcScalar, X86SchedWriteWidths sched,
2772                                 Predicate prd> {
2773  defm PS : avx512_vector_fpclass_all<OpcodeStr,  avx512vl_f32_info, opcVec,
2774                                      sched, prd>,
2775                                      EVEX_CD8<32, CD8VF>;
2776  defm PD : avx512_vector_fpclass_all<OpcodeStr,  avx512vl_f64_info, opcVec,
2777                                      sched, prd>,
2778                                      EVEX_CD8<64, CD8VF> , VEX_W;
2779  defm SSZ : avx512_scalar_fpclass<opcScalar, OpcodeStr,
2780                                   sched.Scl, f32x_info, prd>, VEX_LIG,
2781                                   EVEX_CD8<32, CD8VT1>;
2782  defm SDZ : avx512_scalar_fpclass<opcScalar, OpcodeStr,
2783                                   sched.Scl, f64x_info, prd>, VEX_LIG,
2784                                   EVEX_CD8<64, CD8VT1>, VEX_W;
2785}
2786
2787defm VFPCLASS : avx512_fp_fpclass_all<"vfpclass", 0x66, 0x67, SchedWriteFCmp,
2788                                      HasDQI>, AVX512AIi8Base, EVEX;
2789
2790//-----------------------------------------------------------------
2791// Mask register copy, including
2792// - copy between mask registers
2793// - load/store mask registers
2794// - copy from GPR to mask register and vice versa
2795//
2796multiclass avx512_mask_mov<bits<8> opc_kk, bits<8> opc_km, bits<8> opc_mk,
2797                         string OpcodeStr, RegisterClass KRC,
2798                         ValueType vvt, X86MemOperand x86memop> {
2799  let isMoveReg = 1, hasSideEffects = 0, SchedRW = [WriteMove] in
2800  def kk : I<opc_kk, MRMSrcReg, (outs KRC:$dst), (ins KRC:$src),
2801             !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), []>,
2802             Sched<[WriteMove]>;
2803  def km : I<opc_km, MRMSrcMem, (outs KRC:$dst), (ins x86memop:$src),
2804             !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
2805             [(set KRC:$dst, (vvt (load addr:$src)))]>,
2806             Sched<[WriteLoad]>;
2807  def mk : I<opc_mk, MRMDestMem, (outs), (ins x86memop:$dst, KRC:$src),
2808             !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
2809             [(store KRC:$src, addr:$dst)]>,
2810             Sched<[WriteStore]>;
2811}
2812
2813multiclass avx512_mask_mov_gpr<bits<8> opc_kr, bits<8> opc_rk,
2814                             string OpcodeStr,
2815                             RegisterClass KRC, RegisterClass GRC> {
2816  let hasSideEffects = 0 in {
2817    def kr : I<opc_kr, MRMSrcReg, (outs KRC:$dst), (ins GRC:$src),
2818               !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), []>,
2819               Sched<[WriteMove]>;
2820    def rk : I<opc_rk, MRMSrcReg, (outs GRC:$dst), (ins KRC:$src),
2821               !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), []>,
2822               Sched<[WriteMove]>;
2823  }
2824}
2825
2826let Predicates = [HasDQI] in
2827  defm KMOVB : avx512_mask_mov<0x90, 0x90, 0x91, "kmovb", VK8, v8i1, i8mem>,
2828               avx512_mask_mov_gpr<0x92, 0x93, "kmovb", VK8, GR32>,
2829               VEX, PD;
2830
2831let Predicates = [HasAVX512] in
2832  defm KMOVW : avx512_mask_mov<0x90, 0x90, 0x91, "kmovw", VK16, v16i1, i16mem>,
2833               avx512_mask_mov_gpr<0x92, 0x93, "kmovw", VK16, GR32>,
2834               VEX, PS;
2835
2836let Predicates = [HasBWI] in {
2837  defm KMOVD : avx512_mask_mov<0x90, 0x90, 0x91, "kmovd", VK32, v32i1,i32mem>,
2838               VEX, PD, VEX_W;
2839  defm KMOVD : avx512_mask_mov_gpr<0x92, 0x93, "kmovd", VK32, GR32>,
2840               VEX, XD;
2841  defm KMOVQ : avx512_mask_mov<0x90, 0x90, 0x91, "kmovq", VK64, v64i1, i64mem>,
2842               VEX, PS, VEX_W;
2843  defm KMOVQ : avx512_mask_mov_gpr<0x92, 0x93, "kmovq", VK64, GR64>,
2844               VEX, XD, VEX_W;
2845}
2846
2847// GR from/to mask register
2848def : Pat<(v16i1 (bitconvert (i16 GR16:$src))),
2849          (COPY_TO_REGCLASS (i32 (INSERT_SUBREG (IMPLICIT_DEF), GR16:$src, sub_16bit)), VK16)>;
2850def : Pat<(i16 (bitconvert (v16i1 VK16:$src))),
2851          (EXTRACT_SUBREG (i32 (COPY_TO_REGCLASS VK16:$src, GR32)), sub_16bit)>;
2852
2853def : Pat<(v8i1 (bitconvert (i8 GR8:$src))),
2854          (COPY_TO_REGCLASS (i32 (INSERT_SUBREG (IMPLICIT_DEF), GR8:$src, sub_8bit)), VK8)>;
2855def : Pat<(i8 (bitconvert (v8i1 VK8:$src))),
2856          (EXTRACT_SUBREG (i32 (COPY_TO_REGCLASS VK8:$src, GR32)), sub_8bit)>;
2857
2858def : Pat<(i32 (zext (i16 (bitconvert (v16i1 VK16:$src))))),
2859          (KMOVWrk VK16:$src)>;
2860def : Pat<(i64 (zext (i16 (bitconvert (v16i1 VK16:$src))))),
2861          (SUBREG_TO_REG (i64 0), (KMOVWrk VK16:$src), sub_32bit)>;
2862def : Pat<(i32 (anyext (i16 (bitconvert (v16i1 VK16:$src))))),
2863          (COPY_TO_REGCLASS VK16:$src, GR32)>;
2864def : Pat<(i64 (anyext (i16 (bitconvert (v16i1 VK16:$src))))),
2865          (INSERT_SUBREG (IMPLICIT_DEF), (COPY_TO_REGCLASS VK16:$src, GR32), sub_32bit)>;
2866
2867def : Pat<(i32 (zext (i8 (bitconvert (v8i1 VK8:$src))))),
2868          (KMOVBrk VK8:$src)>, Requires<[HasDQI]>;
2869def : Pat<(i64 (zext (i8 (bitconvert (v8i1 VK8:$src))))),
2870          (SUBREG_TO_REG (i64 0), (KMOVBrk VK8:$src), sub_32bit)>, Requires<[HasDQI]>;
2871def : Pat<(i32 (anyext (i8 (bitconvert (v8i1 VK8:$src))))),
2872          (COPY_TO_REGCLASS VK8:$src, GR32)>;
2873def : Pat<(i64 (anyext (i8 (bitconvert (v8i1 VK8:$src))))),
2874          (INSERT_SUBREG (IMPLICIT_DEF), (COPY_TO_REGCLASS VK8:$src, GR32), sub_32bit)>;
2875
2876def : Pat<(v32i1 (bitconvert (i32 GR32:$src))),
2877          (COPY_TO_REGCLASS GR32:$src, VK32)>;
2878def : Pat<(i32 (bitconvert (v32i1 VK32:$src))),
2879          (COPY_TO_REGCLASS VK32:$src, GR32)>;
2880def : Pat<(v64i1 (bitconvert (i64 GR64:$src))),
2881          (COPY_TO_REGCLASS GR64:$src, VK64)>;
2882def : Pat<(i64 (bitconvert (v64i1 VK64:$src))),
2883          (COPY_TO_REGCLASS VK64:$src, GR64)>;
2884
2885// Load/store kreg
2886let Predicates = [HasDQI] in {
2887  def : Pat<(store VK1:$src, addr:$dst),
2888            (KMOVBmk addr:$dst, (COPY_TO_REGCLASS VK1:$src, VK8))>;
2889
2890  def : Pat<(v1i1 (load addr:$src)),
2891            (COPY_TO_REGCLASS (KMOVBkm addr:$src), VK1)>;
2892  def : Pat<(v2i1 (load addr:$src)),
2893            (COPY_TO_REGCLASS (KMOVBkm addr:$src), VK2)>;
2894  def : Pat<(v4i1 (load addr:$src)),
2895            (COPY_TO_REGCLASS (KMOVBkm addr:$src), VK4)>;
2896}
2897
2898let Predicates = [HasAVX512] in {
2899  def : Pat<(v8i1 (bitconvert (i8 (load addr:$src)))),
2900            (COPY_TO_REGCLASS (MOVZX32rm8 addr:$src), VK8)>;
2901  def : Pat<(v16i1 (bitconvert (loadi16 addr:$src))),
2902            (KMOVWkm addr:$src)>;
2903}
2904
2905def X86kextract : SDNode<"ISD::EXTRACT_VECTOR_ELT",
2906                         SDTypeProfile<1, 2, [SDTCisVT<0, i8>,
2907                                              SDTCVecEltisVT<1, i1>,
2908                                              SDTCisPtrTy<2>]>>;
2909
2910let Predicates = [HasAVX512] in {
2911  multiclass operation_gpr_mask_copy_lowering<RegisterClass maskRC, ValueType maskVT> {
2912    def : Pat<(maskVT (scalar_to_vector GR32:$src)),
2913              (COPY_TO_REGCLASS GR32:$src, maskRC)>;
2914
2915    def : Pat<(maskVT (scalar_to_vector GR8:$src)),
2916              (COPY_TO_REGCLASS (INSERT_SUBREG (i32 (IMPLICIT_DEF)), GR8:$src, sub_8bit), maskRC)>;
2917
2918    def : Pat<(i8 (X86kextract maskRC:$src, (iPTR 0))),
2919              (EXTRACT_SUBREG (i32 (COPY_TO_REGCLASS maskRC:$src, GR32)), sub_8bit)>;
2920
2921    def : Pat<(i32 (anyext (i8 (X86kextract maskRC:$src, (iPTR 0))))),
2922              (i32 (COPY_TO_REGCLASS maskRC:$src, GR32))>;
2923  }
2924
2925  defm : operation_gpr_mask_copy_lowering<VK1,  v1i1>;
2926  defm : operation_gpr_mask_copy_lowering<VK2,  v2i1>;
2927  defm : operation_gpr_mask_copy_lowering<VK4,  v4i1>;
2928  defm : operation_gpr_mask_copy_lowering<VK8,  v8i1>;
2929  defm : operation_gpr_mask_copy_lowering<VK16,  v16i1>;
2930  defm : operation_gpr_mask_copy_lowering<VK32,  v32i1>;
2931  defm : operation_gpr_mask_copy_lowering<VK64,  v64i1>;
2932
2933  def : Pat<(insert_subvector (v16i1 immAllZerosV),
2934                              (v1i1 (scalar_to_vector GR8:$src)), (iPTR 0)),
2935            (COPY_TO_REGCLASS
2936             (KMOVWkr (AND32ri8
2937                       (INSERT_SUBREG (i32 (IMPLICIT_DEF)), GR8:$src, sub_8bit),
2938                       (i32 1))), VK16)>;
2939}
2940
2941// Mask unary operation
2942// - KNOT
2943multiclass avx512_mask_unop<bits<8> opc, string OpcodeStr,
2944                            RegisterClass KRC, SDPatternOperator OpNode,
2945                            X86FoldableSchedWrite sched, Predicate prd> {
2946  let Predicates = [prd] in
2947    def rr : I<opc, MRMSrcReg, (outs KRC:$dst), (ins KRC:$src),
2948               !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
2949               [(set KRC:$dst, (OpNode KRC:$src))]>,
2950               Sched<[sched]>;
2951}
2952
2953multiclass avx512_mask_unop_all<bits<8> opc, string OpcodeStr,
2954                                SDPatternOperator OpNode,
2955                                X86FoldableSchedWrite sched> {
2956  defm B : avx512_mask_unop<opc, !strconcat(OpcodeStr, "b"), VK8, OpNode,
2957                            sched, HasDQI>, VEX, PD;
2958  defm W : avx512_mask_unop<opc, !strconcat(OpcodeStr, "w"), VK16, OpNode,
2959                            sched, HasAVX512>, VEX, PS;
2960  defm D : avx512_mask_unop<opc, !strconcat(OpcodeStr, "d"), VK32, OpNode,
2961                            sched, HasBWI>, VEX, PD, VEX_W;
2962  defm Q : avx512_mask_unop<opc, !strconcat(OpcodeStr, "q"), VK64, OpNode,
2963                            sched, HasBWI>, VEX, PS, VEX_W;
2964}
2965
2966// TODO - do we need a X86SchedWriteWidths::KMASK type?
2967defm KNOT : avx512_mask_unop_all<0x44, "knot", vnot, SchedWriteVecLogic.XMM>;
2968
2969// KNL does not support KMOVB, 8-bit mask is promoted to 16-bit
2970let Predicates = [HasAVX512, NoDQI] in
2971def : Pat<(vnot VK8:$src),
2972          (COPY_TO_REGCLASS (KNOTWrr (COPY_TO_REGCLASS VK8:$src, VK16)), VK8)>;
2973
2974def : Pat<(vnot VK4:$src),
2975          (COPY_TO_REGCLASS (KNOTWrr (COPY_TO_REGCLASS VK4:$src, VK16)), VK4)>;
2976def : Pat<(vnot VK2:$src),
2977          (COPY_TO_REGCLASS (KNOTWrr (COPY_TO_REGCLASS VK2:$src, VK16)), VK2)>;
2978
2979// Mask binary operation
2980// - KAND, KANDN, KOR, KXNOR, KXOR
2981multiclass avx512_mask_binop<bits<8> opc, string OpcodeStr,
2982                           RegisterClass KRC, SDPatternOperator OpNode,
2983                           X86FoldableSchedWrite sched, Predicate prd,
2984                           bit IsCommutable> {
2985  let Predicates = [prd], isCommutable = IsCommutable in
2986    def rr : I<opc, MRMSrcReg, (outs KRC:$dst), (ins KRC:$src1, KRC:$src2),
2987               !strconcat(OpcodeStr,
2988                          "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
2989               [(set KRC:$dst, (OpNode KRC:$src1, KRC:$src2))]>,
2990               Sched<[sched]>;
2991}
2992
2993multiclass avx512_mask_binop_all<bits<8> opc, string OpcodeStr,
2994                                 SDPatternOperator OpNode,
2995                                 X86FoldableSchedWrite sched, bit IsCommutable,
2996                                 Predicate prdW = HasAVX512> {
2997  defm B : avx512_mask_binop<opc, !strconcat(OpcodeStr, "b"), VK8, OpNode,
2998                             sched, HasDQI, IsCommutable>, VEX_4V, VEX_L, PD;
2999  defm W : avx512_mask_binop<opc, !strconcat(OpcodeStr, "w"), VK16, OpNode,
3000                             sched, prdW, IsCommutable>, VEX_4V, VEX_L, PS;
3001  defm D : avx512_mask_binop<opc, !strconcat(OpcodeStr, "d"), VK32, OpNode,
3002                             sched, HasBWI, IsCommutable>, VEX_4V, VEX_L, VEX_W, PD;
3003  defm Q : avx512_mask_binop<opc, !strconcat(OpcodeStr, "q"), VK64, OpNode,
3004                             sched, HasBWI, IsCommutable>, VEX_4V, VEX_L, VEX_W, PS;
3005}
3006
3007def andn : PatFrag<(ops node:$i0, node:$i1), (and (not node:$i0), node:$i1)>;
3008def xnor : PatFrag<(ops node:$i0, node:$i1), (not (xor node:$i0, node:$i1))>;
3009// These nodes use 'vnot' instead of 'not' to support vectors.
3010def vandn : PatFrag<(ops node:$i0, node:$i1), (and (vnot node:$i0), node:$i1)>;
3011def vxnor : PatFrag<(ops node:$i0, node:$i1), (vnot (xor node:$i0, node:$i1))>;
3012
3013// TODO - do we need a X86SchedWriteWidths::KMASK type?
3014defm KAND  : avx512_mask_binop_all<0x41, "kand",  and,     SchedWriteVecLogic.XMM, 1>;
3015defm KOR   : avx512_mask_binop_all<0x45, "kor",   or,      SchedWriteVecLogic.XMM, 1>;
3016defm KXNOR : avx512_mask_binop_all<0x46, "kxnor", vxnor,   SchedWriteVecLogic.XMM, 1>;
3017defm KXOR  : avx512_mask_binop_all<0x47, "kxor",  xor,     SchedWriteVecLogic.XMM, 1>;
3018defm KANDN : avx512_mask_binop_all<0x42, "kandn", vandn,   SchedWriteVecLogic.XMM, 0>;
3019defm KADD  : avx512_mask_binop_all<0x4A, "kadd",  X86kadd, SchedWriteVecLogic.XMM, 1, HasDQI>;
3020
3021multiclass avx512_binop_pat<SDPatternOperator VOpNode, SDPatternOperator OpNode,
3022                            Instruction Inst> {
3023  // With AVX512F, 8-bit mask is promoted to 16-bit mask,
3024  // for the DQI set, this type is legal and KxxxB instruction is used
3025  let Predicates = [NoDQI] in
3026  def : Pat<(VOpNode VK8:$src1, VK8:$src2),
3027            (COPY_TO_REGCLASS
3028              (Inst (COPY_TO_REGCLASS VK8:$src1, VK16),
3029                    (COPY_TO_REGCLASS VK8:$src2, VK16)), VK8)>;
3030
3031  // All types smaller than 8 bits require conversion anyway
3032  def : Pat<(OpNode VK1:$src1, VK1:$src2),
3033        (COPY_TO_REGCLASS (Inst
3034                           (COPY_TO_REGCLASS VK1:$src1, VK16),
3035                           (COPY_TO_REGCLASS VK1:$src2, VK16)), VK1)>;
3036  def : Pat<(VOpNode VK2:$src1, VK2:$src2),
3037        (COPY_TO_REGCLASS (Inst
3038                           (COPY_TO_REGCLASS VK2:$src1, VK16),
3039                           (COPY_TO_REGCLASS VK2:$src2, VK16)), VK1)>;
3040  def : Pat<(VOpNode VK4:$src1, VK4:$src2),
3041        (COPY_TO_REGCLASS (Inst
3042                           (COPY_TO_REGCLASS VK4:$src1, VK16),
3043                           (COPY_TO_REGCLASS VK4:$src2, VK16)), VK1)>;
3044}
3045
3046defm : avx512_binop_pat<and,   and,  KANDWrr>;
3047defm : avx512_binop_pat<vandn, andn, KANDNWrr>;
3048defm : avx512_binop_pat<or,    or,   KORWrr>;
3049defm : avx512_binop_pat<vxnor, xnor, KXNORWrr>;
3050defm : avx512_binop_pat<xor,   xor,  KXORWrr>;
3051
3052// Mask unpacking
3053multiclass avx512_mask_unpck<string Suffix, X86KVectorVTInfo Dst,
3054                             X86KVectorVTInfo Src, X86FoldableSchedWrite sched,
3055                             Predicate prd> {
3056  let Predicates = [prd] in {
3057    let hasSideEffects = 0 in
3058    def rr : I<0x4b, MRMSrcReg, (outs Dst.KRC:$dst),
3059               (ins Src.KRC:$src1, Src.KRC:$src2),
3060               "kunpck"#Suffix#"\t{$src2, $src1, $dst|$dst, $src1, $src2}", []>,
3061               VEX_4V, VEX_L, Sched<[sched]>;
3062
3063    def : Pat<(Dst.KVT (concat_vectors Src.KRC:$src1, Src.KRC:$src2)),
3064              (!cast<Instruction>(NAME##rr) Src.KRC:$src2, Src.KRC:$src1)>;
3065  }
3066}
3067
3068defm KUNPCKBW : avx512_mask_unpck<"bw", v16i1_info, v8i1_info,  WriteShuffle, HasAVX512>, PD;
3069defm KUNPCKWD : avx512_mask_unpck<"wd", v32i1_info, v16i1_info, WriteShuffle, HasBWI>, PS;
3070defm KUNPCKDQ : avx512_mask_unpck<"dq", v64i1_info, v32i1_info, WriteShuffle, HasBWI>, PS, VEX_W;
3071
3072// Mask bit testing
3073multiclass avx512_mask_testop<bits<8> opc, string OpcodeStr, RegisterClass KRC,
3074                              SDNode OpNode, X86FoldableSchedWrite sched,
3075                              Predicate prd> {
3076  let Predicates = [prd], Defs = [EFLAGS] in
3077    def rr : I<opc, MRMSrcReg, (outs), (ins KRC:$src1, KRC:$src2),
3078               !strconcat(OpcodeStr, "\t{$src2, $src1|$src1, $src2}"),
3079               [(set EFLAGS, (OpNode KRC:$src1, KRC:$src2))]>,
3080               Sched<[sched]>;
3081}
3082
3083multiclass avx512_mask_testop_w<bits<8> opc, string OpcodeStr, SDNode OpNode,
3084                                X86FoldableSchedWrite sched,
3085                                Predicate prdW = HasAVX512> {
3086  defm B : avx512_mask_testop<opc, OpcodeStr#"b", VK8, OpNode, sched, HasDQI>,
3087                                                                VEX, PD;
3088  defm W : avx512_mask_testop<opc, OpcodeStr#"w", VK16, OpNode, sched, prdW>,
3089                                                                VEX, PS;
3090  defm Q : avx512_mask_testop<opc, OpcodeStr#"q", VK64, OpNode, sched, HasBWI>,
3091                                                                VEX, PS, VEX_W;
3092  defm D : avx512_mask_testop<opc, OpcodeStr#"d", VK32, OpNode, sched, HasBWI>,
3093                                                                VEX, PD, VEX_W;
3094}
3095
3096// TODO - do we need a X86SchedWriteWidths::KMASK type?
3097defm KORTEST : avx512_mask_testop_w<0x98, "kortest", X86kortest, SchedWriteVecLogic.XMM>;
3098defm KTEST   : avx512_mask_testop_w<0x99, "ktest", X86ktest, SchedWriteVecLogic.XMM, HasDQI>;
3099
3100// Mask shift
3101multiclass avx512_mask_shiftop<bits<8> opc, string OpcodeStr, RegisterClass KRC,
3102                               SDNode OpNode, X86FoldableSchedWrite sched> {
3103  let Predicates = [HasAVX512] in
3104    def ri : Ii8<opc, MRMSrcReg, (outs KRC:$dst), (ins KRC:$src, u8imm:$imm),
3105                 !strconcat(OpcodeStr,
3106                            "\t{$imm, $src, $dst|$dst, $src, $imm}"),
3107                            [(set KRC:$dst, (OpNode KRC:$src, (i8 timm:$imm)))]>,
3108                 Sched<[sched]>;
3109}
3110
3111multiclass avx512_mask_shiftop_w<bits<8> opc1, bits<8> opc2, string OpcodeStr,
3112                                 SDNode OpNode, X86FoldableSchedWrite sched> {
3113  defm W : avx512_mask_shiftop<opc1, !strconcat(OpcodeStr, "w"), VK16, OpNode,
3114                               sched>, VEX, TAPD, VEX_W;
3115  let Predicates = [HasDQI] in
3116  defm B : avx512_mask_shiftop<opc1, !strconcat(OpcodeStr, "b"), VK8, OpNode,
3117                               sched>, VEX, TAPD;
3118  let Predicates = [HasBWI] in {
3119  defm Q : avx512_mask_shiftop<opc2, !strconcat(OpcodeStr, "q"), VK64, OpNode,
3120                               sched>, VEX, TAPD, VEX_W;
3121  defm D : avx512_mask_shiftop<opc2, !strconcat(OpcodeStr, "d"), VK32, OpNode,
3122                               sched>, VEX, TAPD;
3123  }
3124}
3125
3126defm KSHIFTL : avx512_mask_shiftop_w<0x32, 0x33, "kshiftl", X86kshiftl, WriteShuffle>;
3127defm KSHIFTR : avx512_mask_shiftop_w<0x30, 0x31, "kshiftr", X86kshiftr, WriteShuffle>;
3128
3129// Patterns for comparing 128/256-bit integer vectors using 512-bit instruction.
3130multiclass axv512_icmp_packed_cc_no_vlx_lowering<PatFrag Frag, PatFrag Frag_su,
3131                                                 string InstStr,
3132                                                 X86VectorVTInfo Narrow,
3133                                                 X86VectorVTInfo Wide> {
3134def : Pat<(Narrow.KVT (Frag:$cc (Narrow.VT Narrow.RC:$src1),
3135                                (Narrow.VT Narrow.RC:$src2), cond)),
3136          (COPY_TO_REGCLASS
3137           (!cast<Instruction>(InstStr#"Zrri")
3138            (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src1, Narrow.SubRegIdx)),
3139            (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src2, Narrow.SubRegIdx)),
3140            (Frag.OperandTransform $cc)), Narrow.KRC)>;
3141
3142def : Pat<(Narrow.KVT (and Narrow.KRC:$mask,
3143                           (Narrow.KVT (Frag_su:$cc (Narrow.VT Narrow.RC:$src1),
3144                                                    (Narrow.VT Narrow.RC:$src2),
3145                                                    cond)))),
3146          (COPY_TO_REGCLASS (!cast<Instruction>(InstStr#"Zrrik")
3147           (COPY_TO_REGCLASS Narrow.KRC:$mask, Wide.KRC),
3148           (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src1, Narrow.SubRegIdx)),
3149           (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src2, Narrow.SubRegIdx)),
3150           (Frag_su.OperandTransform $cc)), Narrow.KRC)>;
3151}
3152
3153multiclass axv512_icmp_packed_cc_rmb_no_vlx_lowering<PatFrag Frag, PatFrag Frag_su,
3154                                                     PatFrag CommFrag, PatFrag CommFrag_su,
3155                                                     string InstStr,
3156                                                     X86VectorVTInfo Narrow,
3157                                                     X86VectorVTInfo Wide> {
3158// Broadcast load.
3159def : Pat<(Narrow.KVT (Frag:$cc (Narrow.VT Narrow.RC:$src1),
3160                                (Narrow.BroadcastLdFrag addr:$src2), cond)),
3161          (COPY_TO_REGCLASS
3162           (!cast<Instruction>(InstStr#"Zrmib")
3163            (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src1, Narrow.SubRegIdx)),
3164            addr:$src2, (Frag.OperandTransform $cc)), Narrow.KRC)>;
3165
3166def : Pat<(Narrow.KVT (and Narrow.KRC:$mask,
3167                           (Narrow.KVT
3168                            (Frag_su:$cc (Narrow.VT Narrow.RC:$src1),
3169                                         (Narrow.BroadcastLdFrag addr:$src2),
3170                                         cond)))),
3171          (COPY_TO_REGCLASS (!cast<Instruction>(InstStr#"Zrmibk")
3172           (COPY_TO_REGCLASS Narrow.KRC:$mask, Wide.KRC),
3173           (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src1, Narrow.SubRegIdx)),
3174           addr:$src2, (Frag_su.OperandTransform $cc)), Narrow.KRC)>;
3175
3176// Commuted with broadcast load.
3177def : Pat<(Narrow.KVT (CommFrag:$cc (Narrow.BroadcastLdFrag addr:$src2),
3178                                    (Narrow.VT Narrow.RC:$src1),
3179                                    cond)),
3180          (COPY_TO_REGCLASS
3181           (!cast<Instruction>(InstStr#"Zrmib")
3182            (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src1, Narrow.SubRegIdx)),
3183            addr:$src2, (CommFrag.OperandTransform $cc)), Narrow.KRC)>;
3184
3185def : Pat<(Narrow.KVT (and Narrow.KRC:$mask,
3186                           (Narrow.KVT
3187                            (CommFrag_su:$cc (Narrow.BroadcastLdFrag addr:$src2),
3188                                             (Narrow.VT Narrow.RC:$src1),
3189                                             cond)))),
3190          (COPY_TO_REGCLASS (!cast<Instruction>(InstStr#"Zrmibk")
3191           (COPY_TO_REGCLASS Narrow.KRC:$mask, Wide.KRC),
3192           (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src1, Narrow.SubRegIdx)),
3193           addr:$src2, (CommFrag_su.OperandTransform $cc)), Narrow.KRC)>;
3194}
3195
3196// Same as above, but for fp types which don't use PatFrags.
3197multiclass axv512_cmp_packed_cc_no_vlx_lowering<string InstStr,
3198                                                X86VectorVTInfo Narrow,
3199                                                X86VectorVTInfo Wide> {
3200def : Pat<(Narrow.KVT (X86cmpm (Narrow.VT Narrow.RC:$src1),
3201                               (Narrow.VT Narrow.RC:$src2), timm:$cc)),
3202          (COPY_TO_REGCLASS
3203           (!cast<Instruction>(InstStr#"Zrri")
3204            (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src1, Narrow.SubRegIdx)),
3205            (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src2, Narrow.SubRegIdx)),
3206            timm:$cc), Narrow.KRC)>;
3207
3208def : Pat<(Narrow.KVT (and Narrow.KRC:$mask,
3209                           (X86cmpm_su (Narrow.VT Narrow.RC:$src1),
3210                                       (Narrow.VT Narrow.RC:$src2), timm:$cc))),
3211          (COPY_TO_REGCLASS (!cast<Instruction>(InstStr#"Zrrik")
3212           (COPY_TO_REGCLASS Narrow.KRC:$mask, Wide.KRC),
3213           (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src1, Narrow.SubRegIdx)),
3214           (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src2, Narrow.SubRegIdx)),
3215           timm:$cc), Narrow.KRC)>;
3216
3217// Broadcast load.
3218def : Pat<(Narrow.KVT (X86cmpm (Narrow.VT Narrow.RC:$src1),
3219                               (Narrow.VT (Narrow.BroadcastLdFrag addr:$src2)), timm:$cc)),
3220          (COPY_TO_REGCLASS
3221           (!cast<Instruction>(InstStr#"Zrmbi")
3222            (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src1, Narrow.SubRegIdx)),
3223            addr:$src2, timm:$cc), Narrow.KRC)>;
3224
3225def : Pat<(Narrow.KVT (and Narrow.KRC:$mask,
3226                           (X86cmpm_su (Narrow.VT Narrow.RC:$src1),
3227                                       (Narrow.VT (Narrow.BroadcastLdFrag addr:$src2)), timm:$cc))),
3228          (COPY_TO_REGCLASS (!cast<Instruction>(InstStr#"Zrmbik")
3229           (COPY_TO_REGCLASS Narrow.KRC:$mask, Wide.KRC),
3230           (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src1, Narrow.SubRegIdx)),
3231           addr:$src2, timm:$cc), Narrow.KRC)>;
3232
3233// Commuted with broadcast load.
3234def : Pat<(Narrow.KVT (X86cmpm (Narrow.VT (Narrow.BroadcastLdFrag addr:$src2)),
3235                               (Narrow.VT Narrow.RC:$src1), timm:$cc)),
3236          (COPY_TO_REGCLASS
3237           (!cast<Instruction>(InstStr#"Zrmbi")
3238            (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src1, Narrow.SubRegIdx)),
3239            addr:$src2, (X86cmpm_imm_commute timm:$cc)), Narrow.KRC)>;
3240
3241def : Pat<(Narrow.KVT (and Narrow.KRC:$mask,
3242                           (X86cmpm_su (Narrow.VT (Narrow.BroadcastLdFrag addr:$src2)),
3243                                       (Narrow.VT Narrow.RC:$src1), timm:$cc))),
3244          (COPY_TO_REGCLASS (!cast<Instruction>(InstStr#"Zrmbik")
3245           (COPY_TO_REGCLASS Narrow.KRC:$mask, Wide.KRC),
3246           (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src1, Narrow.SubRegIdx)),
3247           addr:$src2, (X86cmpm_imm_commute timm:$cc)), Narrow.KRC)>;
3248}
3249
3250let Predicates = [HasAVX512, NoVLX] in {
3251  defm : axv512_icmp_packed_cc_no_vlx_lowering<X86pcmpm, X86pcmpm_su, "VPCMPD", v8i32x_info, v16i32_info>;
3252  defm : axv512_icmp_packed_cc_no_vlx_lowering<X86pcmpum, X86pcmpum_su, "VPCMPUD", v8i32x_info, v16i32_info>;
3253
3254  defm : axv512_icmp_packed_cc_no_vlx_lowering<X86pcmpm, X86pcmpm_su, "VPCMPD", v4i32x_info, v16i32_info>;
3255  defm : axv512_icmp_packed_cc_no_vlx_lowering<X86pcmpum, X86pcmpum_su, "VPCMPUD", v4i32x_info, v16i32_info>;
3256
3257  defm : axv512_icmp_packed_cc_no_vlx_lowering<X86pcmpm, X86pcmpm_su, "VPCMPQ", v4i64x_info, v8i64_info>;
3258  defm : axv512_icmp_packed_cc_no_vlx_lowering<X86pcmpum, X86pcmpum_su, "VPCMPUQ", v4i64x_info, v8i64_info>;
3259
3260  defm : axv512_icmp_packed_cc_no_vlx_lowering<X86pcmpm, X86pcmpm_su, "VPCMPQ", v2i64x_info, v8i64_info>;
3261  defm : axv512_icmp_packed_cc_no_vlx_lowering<X86pcmpum, X86pcmpum_su, "VPCMPUQ", v2i64x_info, v8i64_info>;
3262
3263  defm : axv512_icmp_packed_cc_rmb_no_vlx_lowering<X86pcmpm, X86pcmpm_su, X86pcmpm_commute, X86pcmpm_commute_su, "VPCMPD", v8i32x_info, v16i32_info>;
3264  defm : axv512_icmp_packed_cc_rmb_no_vlx_lowering<X86pcmpum, X86pcmpum_su, X86pcmpum_commute, X86pcmpum_commute_su, "VPCMPUD", v8i32x_info, v16i32_info>;
3265
3266  defm : axv512_icmp_packed_cc_rmb_no_vlx_lowering<X86pcmpm, X86pcmpm_su, X86pcmpm_commute, X86pcmpm_commute_su, "VPCMPD", v4i32x_info, v16i32_info>;
3267  defm : axv512_icmp_packed_cc_rmb_no_vlx_lowering<X86pcmpum, X86pcmpum_su, X86pcmpum_commute, X86pcmpum_commute_su, "VPCMPUD", v4i32x_info, v16i32_info>;
3268
3269  defm : axv512_icmp_packed_cc_rmb_no_vlx_lowering<X86pcmpm, X86pcmpm_su, X86pcmpm_commute, X86pcmpm_commute_su, "VPCMPQ", v4i64x_info, v8i64_info>;
3270  defm : axv512_icmp_packed_cc_rmb_no_vlx_lowering<X86pcmpum, X86pcmpum_su, X86pcmpum_commute, X86pcmpum_commute_su, "VPCMPUQ", v4i64x_info, v8i64_info>;
3271
3272  defm : axv512_icmp_packed_cc_rmb_no_vlx_lowering<X86pcmpm, X86pcmpm_su, X86pcmpm_commute, X86pcmpm_commute_su, "VPCMPQ", v2i64x_info, v8i64_info>;
3273  defm : axv512_icmp_packed_cc_rmb_no_vlx_lowering<X86pcmpum, X86pcmpum_su, X86pcmpum_commute, X86pcmpum_commute_su, "VPCMPUQ", v2i64x_info, v8i64_info>;
3274
3275  defm : axv512_cmp_packed_cc_no_vlx_lowering<"VCMPPS", v8f32x_info, v16f32_info>;
3276  defm : axv512_cmp_packed_cc_no_vlx_lowering<"VCMPPS", v4f32x_info, v16f32_info>;
3277  defm : axv512_cmp_packed_cc_no_vlx_lowering<"VCMPPD", v4f64x_info, v8f64_info>;
3278  defm : axv512_cmp_packed_cc_no_vlx_lowering<"VCMPPD", v2f64x_info, v8f64_info>;
3279}
3280
3281let Predicates = [HasBWI, NoVLX] in {
3282  defm : axv512_icmp_packed_cc_no_vlx_lowering<X86pcmpm, X86pcmpm_su, "VPCMPB", v32i8x_info, v64i8_info>;
3283  defm : axv512_icmp_packed_cc_no_vlx_lowering<X86pcmpum, X86pcmpum_su, "VPCMPUB", v32i8x_info, v64i8_info>;
3284
3285  defm : axv512_icmp_packed_cc_no_vlx_lowering<X86pcmpm, X86pcmpm_su, "VPCMPB", v16i8x_info, v64i8_info>;
3286  defm : axv512_icmp_packed_cc_no_vlx_lowering<X86pcmpum, X86pcmpum_su, "VPCMPUB", v16i8x_info, v64i8_info>;
3287
3288  defm : axv512_icmp_packed_cc_no_vlx_lowering<X86pcmpm, X86pcmpm_su, "VPCMPW", v16i16x_info, v32i16_info>;
3289  defm : axv512_icmp_packed_cc_no_vlx_lowering<X86pcmpum, X86pcmpum_su, "VPCMPUW", v16i16x_info, v32i16_info>;
3290
3291  defm : axv512_icmp_packed_cc_no_vlx_lowering<X86pcmpm, X86pcmpm_su, "VPCMPW", v8i16x_info, v32i16_info>;
3292  defm : axv512_icmp_packed_cc_no_vlx_lowering<X86pcmpum, X86pcmpum_su, "VPCMPUW", v8i16x_info, v32i16_info>;
3293}
3294
3295// Mask setting all 0s or 1s
3296multiclass avx512_mask_setop<RegisterClass KRC, ValueType VT, PatFrag Val> {
3297  let Predicates = [HasAVX512] in
3298    let isReMaterializable = 1, isAsCheapAsAMove = 1, isPseudo = 1,
3299        SchedRW = [WriteZero] in
3300      def #NAME# : I<0, Pseudo, (outs KRC:$dst), (ins), "",
3301                     [(set KRC:$dst, (VT Val))]>;
3302}
3303
3304multiclass avx512_mask_setop_w<PatFrag Val> {
3305  defm W : avx512_mask_setop<VK16, v16i1, Val>;
3306  defm D : avx512_mask_setop<VK32,  v32i1, Val>;
3307  defm Q : avx512_mask_setop<VK64, v64i1, Val>;
3308}
3309
3310defm KSET0 : avx512_mask_setop_w<immAllZerosV>;
3311defm KSET1 : avx512_mask_setop_w<immAllOnesV>;
3312
3313// With AVX-512 only, 8-bit mask is promoted to 16-bit mask.
3314let Predicates = [HasAVX512] in {
3315  def : Pat<(v8i1 immAllZerosV), (COPY_TO_REGCLASS (KSET0W), VK8)>;
3316  def : Pat<(v4i1 immAllZerosV), (COPY_TO_REGCLASS (KSET0W), VK4)>;
3317  def : Pat<(v2i1 immAllZerosV), (COPY_TO_REGCLASS (KSET0W), VK2)>;
3318  def : Pat<(v1i1 immAllZerosV), (COPY_TO_REGCLASS (KSET0W), VK1)>;
3319  def : Pat<(v8i1 immAllOnesV),  (COPY_TO_REGCLASS (KSET1W), VK8)>;
3320  def : Pat<(v4i1 immAllOnesV),  (COPY_TO_REGCLASS (KSET1W), VK4)>;
3321  def : Pat<(v2i1 immAllOnesV),  (COPY_TO_REGCLASS (KSET1W), VK2)>;
3322  def : Pat<(v1i1 immAllOnesV),  (COPY_TO_REGCLASS (KSET1W), VK1)>;
3323}
3324
3325// Patterns for kmask insert_subvector/extract_subvector to/from index=0
3326multiclass operation_subvector_mask_lowering<RegisterClass subRC, ValueType subVT,
3327                                             RegisterClass RC, ValueType VT> {
3328  def : Pat<(subVT (extract_subvector (VT RC:$src), (iPTR 0))),
3329            (subVT (COPY_TO_REGCLASS RC:$src, subRC))>;
3330
3331  def : Pat<(VT (insert_subvector undef, subRC:$src, (iPTR 0))),
3332            (VT (COPY_TO_REGCLASS subRC:$src, RC))>;
3333}
3334defm : operation_subvector_mask_lowering<VK1,  v1i1,  VK2,  v2i1>;
3335defm : operation_subvector_mask_lowering<VK1,  v1i1,  VK4,  v4i1>;
3336defm : operation_subvector_mask_lowering<VK1,  v1i1,  VK8,  v8i1>;
3337defm : operation_subvector_mask_lowering<VK1,  v1i1,  VK16, v16i1>;
3338defm : operation_subvector_mask_lowering<VK1,  v1i1,  VK32, v32i1>;
3339defm : operation_subvector_mask_lowering<VK1,  v1i1,  VK64, v64i1>;
3340
3341defm : operation_subvector_mask_lowering<VK2,  v2i1,  VK4,  v4i1>;
3342defm : operation_subvector_mask_lowering<VK2,  v2i1,  VK8,  v8i1>;
3343defm : operation_subvector_mask_lowering<VK2,  v2i1,  VK16, v16i1>;
3344defm : operation_subvector_mask_lowering<VK2,  v2i1,  VK32, v32i1>;
3345defm : operation_subvector_mask_lowering<VK2,  v2i1,  VK64, v64i1>;
3346
3347defm : operation_subvector_mask_lowering<VK4,  v4i1,  VK8,  v8i1>;
3348defm : operation_subvector_mask_lowering<VK4,  v4i1,  VK16, v16i1>;
3349defm : operation_subvector_mask_lowering<VK4,  v4i1,  VK32, v32i1>;
3350defm : operation_subvector_mask_lowering<VK4,  v4i1,  VK64, v64i1>;
3351
3352defm : operation_subvector_mask_lowering<VK8,  v8i1,  VK16, v16i1>;
3353defm : operation_subvector_mask_lowering<VK8,  v8i1,  VK32, v32i1>;
3354defm : operation_subvector_mask_lowering<VK8,  v8i1,  VK64, v64i1>;
3355
3356defm : operation_subvector_mask_lowering<VK16, v16i1, VK32, v32i1>;
3357defm : operation_subvector_mask_lowering<VK16, v16i1, VK64, v64i1>;
3358
3359defm : operation_subvector_mask_lowering<VK32, v32i1, VK64, v64i1>;
3360
3361//===----------------------------------------------------------------------===//
3362// AVX-512 - Aligned and unaligned load and store
3363//
3364
3365multiclass avx512_load<bits<8> opc, string OpcodeStr, string Name,
3366                       X86VectorVTInfo _, PatFrag ld_frag, PatFrag mload,
3367                       X86SchedWriteMoveLS Sched, string EVEX2VEXOvrd,
3368                       bit NoRMPattern = 0,
3369                       SDPatternOperator SelectOprr = vselect> {
3370  let hasSideEffects = 0 in {
3371  let isMoveReg = 1 in
3372  def rr : AVX512PI<opc, MRMSrcReg, (outs _.RC:$dst), (ins _.RC:$src),
3373                    !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), [],
3374                    _.ExeDomain>, EVEX, Sched<[Sched.RR]>,
3375                    EVEX2VEXOverride<EVEX2VEXOvrd#"rr">;
3376  def rrkz : AVX512PI<opc, MRMSrcReg, (outs _.RC:$dst),
3377                      (ins _.KRCWM:$mask,  _.RC:$src),
3378                      !strconcat(OpcodeStr, "\t{$src, ${dst} {${mask}} {z}|",
3379                       "${dst} {${mask}} {z}, $src}"),
3380                       [(set _.RC:$dst, (_.VT (SelectOprr _.KRCWM:$mask,
3381                                           (_.VT _.RC:$src),
3382                                           _.ImmAllZerosV)))], _.ExeDomain>,
3383                       EVEX, EVEX_KZ, Sched<[Sched.RR]>;
3384
3385  let mayLoad = 1, canFoldAsLoad = 1, isReMaterializable = 1 in
3386  def rm : AVX512PI<opc, MRMSrcMem, (outs _.RC:$dst), (ins _.MemOp:$src),
3387                    !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
3388                    !if(NoRMPattern, [],
3389                        [(set _.RC:$dst,
3390                          (_.VT (ld_frag addr:$src)))]),
3391                    _.ExeDomain>, EVEX, Sched<[Sched.RM]>,
3392                    EVEX2VEXOverride<EVEX2VEXOvrd#"rm">;
3393
3394  let Constraints = "$src0 = $dst", isConvertibleToThreeAddress = 1 in {
3395    def rrk : AVX512PI<opc, MRMSrcReg, (outs _.RC:$dst),
3396                      (ins _.RC:$src0, _.KRCWM:$mask, _.RC:$src1),
3397                      !strconcat(OpcodeStr, "\t{$src1, ${dst} {${mask}}|",
3398                      "${dst} {${mask}}, $src1}"),
3399                      [(set _.RC:$dst, (_.VT (SelectOprr _.KRCWM:$mask,
3400                                          (_.VT _.RC:$src1),
3401                                          (_.VT _.RC:$src0))))], _.ExeDomain>,
3402                       EVEX, EVEX_K, Sched<[Sched.RR]>;
3403    def rmk : AVX512PI<opc, MRMSrcMem, (outs _.RC:$dst),
3404                     (ins _.RC:$src0, _.KRCWM:$mask, _.MemOp:$src1),
3405                     !strconcat(OpcodeStr, "\t{$src1, ${dst} {${mask}}|",
3406                      "${dst} {${mask}}, $src1}"),
3407                     [(set _.RC:$dst, (_.VT
3408                         (vselect _.KRCWM:$mask,
3409                          (_.VT (ld_frag addr:$src1)),
3410                           (_.VT _.RC:$src0))))], _.ExeDomain>,
3411                     EVEX, EVEX_K, Sched<[Sched.RM]>;
3412  }
3413  def rmkz : AVX512PI<opc, MRMSrcMem, (outs _.RC:$dst),
3414                  (ins _.KRCWM:$mask, _.MemOp:$src),
3415                  OpcodeStr #"\t{$src, ${dst} {${mask}} {z}|"#
3416                                "${dst} {${mask}} {z}, $src}",
3417                  [(set _.RC:$dst, (_.VT (vselect _.KRCWM:$mask,
3418                    (_.VT (ld_frag addr:$src)), _.ImmAllZerosV)))],
3419                  _.ExeDomain>, EVEX, EVEX_KZ, Sched<[Sched.RM]>;
3420  }
3421  def : Pat<(_.VT (mload addr:$ptr, _.KRCWM:$mask, undef)),
3422            (!cast<Instruction>(Name#_.ZSuffix##rmkz) _.KRCWM:$mask, addr:$ptr)>;
3423
3424  def : Pat<(_.VT (mload addr:$ptr, _.KRCWM:$mask, _.ImmAllZerosV)),
3425            (!cast<Instruction>(Name#_.ZSuffix##rmkz) _.KRCWM:$mask, addr:$ptr)>;
3426
3427  def : Pat<(_.VT (mload addr:$ptr, _.KRCWM:$mask, (_.VT _.RC:$src0))),
3428            (!cast<Instruction>(Name#_.ZSuffix##rmk) _.RC:$src0,
3429             _.KRCWM:$mask, addr:$ptr)>;
3430}
3431
3432multiclass avx512_alignedload_vl<bits<8> opc, string OpcodeStr,
3433                                 AVX512VLVectorVTInfo _, Predicate prd,
3434                                 X86SchedWriteMoveLSWidths Sched,
3435                                 string EVEX2VEXOvrd, bit NoRMPattern = 0> {
3436  let Predicates = [prd] in
3437  defm Z : avx512_load<opc, OpcodeStr, NAME, _.info512,
3438                       _.info512.AlignedLdFrag, masked_load_aligned,
3439                       Sched.ZMM, "", NoRMPattern>, EVEX_V512;
3440
3441  let Predicates = [prd, HasVLX] in {
3442  defm Z256 : avx512_load<opc, OpcodeStr, NAME, _.info256,
3443                          _.info256.AlignedLdFrag, masked_load_aligned,
3444                          Sched.YMM, EVEX2VEXOvrd#"Y", NoRMPattern>, EVEX_V256;
3445  defm Z128 : avx512_load<opc, OpcodeStr, NAME, _.info128,
3446                          _.info128.AlignedLdFrag, masked_load_aligned,
3447                          Sched.XMM, EVEX2VEXOvrd, NoRMPattern>, EVEX_V128;
3448  }
3449}
3450
3451multiclass avx512_load_vl<bits<8> opc, string OpcodeStr,
3452                          AVX512VLVectorVTInfo _, Predicate prd,
3453                          X86SchedWriteMoveLSWidths Sched,
3454                          string EVEX2VEXOvrd, bit NoRMPattern = 0,
3455                          SDPatternOperator SelectOprr = vselect> {
3456  let Predicates = [prd] in
3457  defm Z : avx512_load<opc, OpcodeStr, NAME, _.info512, _.info512.LdFrag,
3458                       masked_load, Sched.ZMM, "",
3459                       NoRMPattern, SelectOprr>, EVEX_V512;
3460
3461  let Predicates = [prd, HasVLX] in {
3462  defm Z256 : avx512_load<opc, OpcodeStr, NAME, _.info256, _.info256.LdFrag,
3463                         masked_load, Sched.YMM, EVEX2VEXOvrd#"Y",
3464                         NoRMPattern, SelectOprr>, EVEX_V256;
3465  defm Z128 : avx512_load<opc, OpcodeStr, NAME, _.info128, _.info128.LdFrag,
3466                         masked_load, Sched.XMM, EVEX2VEXOvrd,
3467                         NoRMPattern, SelectOprr>, EVEX_V128;
3468  }
3469}
3470
3471multiclass avx512_store<bits<8> opc, string OpcodeStr, string BaseName,
3472                        X86VectorVTInfo _, PatFrag st_frag, PatFrag mstore,
3473                        X86SchedWriteMoveLS Sched, string EVEX2VEXOvrd,
3474                        bit NoMRPattern = 0> {
3475  let hasSideEffects = 0, isCodeGenOnly = 1, ForceDisassemble = 1 in {
3476  let isMoveReg = 1 in
3477  def rr_REV  : AVX512PI<opc, MRMDestReg, (outs _.RC:$dst), (ins _.RC:$src),
3478                         OpcodeStr # "\t{$src, $dst|$dst, $src}",
3479                         [], _.ExeDomain>, EVEX,
3480                         FoldGenData<BaseName#_.ZSuffix#rr>, Sched<[Sched.RR]>,
3481                         EVEX2VEXOverride<EVEX2VEXOvrd#"rr_REV">;
3482  def rrk_REV : AVX512PI<opc, MRMDestReg, (outs  _.RC:$dst),
3483                         (ins _.KRCWM:$mask, _.RC:$src),
3484                         OpcodeStr # "\t{$src, ${dst} {${mask}}|"#
3485                         "${dst} {${mask}}, $src}",
3486                         [], _.ExeDomain>,  EVEX, EVEX_K,
3487                         FoldGenData<BaseName#_.ZSuffix#rrk>,
3488                         Sched<[Sched.RR]>;
3489  def rrkz_REV : AVX512PI<opc, MRMDestReg, (outs  _.RC:$dst),
3490                          (ins _.KRCWM:$mask, _.RC:$src),
3491                          OpcodeStr # "\t{$src, ${dst} {${mask}} {z}|" #
3492                          "${dst} {${mask}} {z}, $src}",
3493                          [], _.ExeDomain>, EVEX, EVEX_KZ,
3494                          FoldGenData<BaseName#_.ZSuffix#rrkz>,
3495                          Sched<[Sched.RR]>;
3496  }
3497
3498  let hasSideEffects = 0, mayStore = 1 in
3499  def mr : AVX512PI<opc, MRMDestMem, (outs), (ins _.MemOp:$dst, _.RC:$src),
3500                    !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
3501                    !if(NoMRPattern, [],
3502                        [(st_frag (_.VT _.RC:$src), addr:$dst)]),
3503                    _.ExeDomain>, EVEX, Sched<[Sched.MR]>,
3504                    EVEX2VEXOverride<EVEX2VEXOvrd#"mr">;
3505  def mrk : AVX512PI<opc, MRMDestMem, (outs),
3506                     (ins _.MemOp:$dst, _.KRCWM:$mask, _.RC:$src),
3507              OpcodeStr # "\t{$src, ${dst} {${mask}}|${dst} {${mask}}, $src}",
3508               [], _.ExeDomain>, EVEX, EVEX_K, Sched<[Sched.MR]>,
3509               NotMemoryFoldable;
3510
3511  def: Pat<(mstore (_.VT _.RC:$src), addr:$ptr, _.KRCWM:$mask),
3512           (!cast<Instruction>(BaseName#_.ZSuffix#mrk) addr:$ptr,
3513                                                        _.KRCWM:$mask, _.RC:$src)>;
3514
3515  def : InstAlias<OpcodeStr#".s\t{$src, $dst|$dst, $src}",
3516                  (!cast<Instruction>(BaseName#_.ZSuffix#"rr_REV")
3517                   _.RC:$dst, _.RC:$src), 0>;
3518  def : InstAlias<OpcodeStr#".s\t{$src, ${dst} {${mask}}|${dst} {${mask}}, $src}",
3519                  (!cast<Instruction>(BaseName#_.ZSuffix#"rrk_REV")
3520                   _.RC:$dst, _.KRCWM:$mask, _.RC:$src), 0>;
3521  def : InstAlias<OpcodeStr#".s\t{$src, ${dst} {${mask}} {z}|${dst} {${mask}} {z}, $src}",
3522                  (!cast<Instruction>(BaseName#_.ZSuffix#"rrkz_REV")
3523                   _.RC:$dst, _.KRCWM:$mask, _.RC:$src), 0>;
3524}
3525
3526multiclass avx512_store_vl< bits<8> opc, string OpcodeStr,
3527                            AVX512VLVectorVTInfo _, Predicate prd,
3528                            X86SchedWriteMoveLSWidths Sched,
3529                            string EVEX2VEXOvrd, bit NoMRPattern = 0> {
3530  let Predicates = [prd] in
3531  defm Z : avx512_store<opc, OpcodeStr, NAME, _.info512, store,
3532                        masked_store, Sched.ZMM, "",
3533                        NoMRPattern>, EVEX_V512;
3534  let Predicates = [prd, HasVLX] in {
3535    defm Z256 : avx512_store<opc, OpcodeStr, NAME, _.info256, store,
3536                             masked_store, Sched.YMM,
3537                             EVEX2VEXOvrd#"Y", NoMRPattern>, EVEX_V256;
3538    defm Z128 : avx512_store<opc, OpcodeStr, NAME, _.info128, store,
3539                             masked_store, Sched.XMM, EVEX2VEXOvrd,
3540                             NoMRPattern>, EVEX_V128;
3541  }
3542}
3543
3544multiclass avx512_alignedstore_vl<bits<8> opc, string OpcodeStr,
3545                                  AVX512VLVectorVTInfo _, Predicate prd,
3546                                  X86SchedWriteMoveLSWidths Sched,
3547                                  string EVEX2VEXOvrd, bit NoMRPattern = 0> {
3548  let Predicates = [prd] in
3549  defm Z : avx512_store<opc, OpcodeStr, NAME, _.info512, alignedstore,
3550                        masked_store_aligned, Sched.ZMM, "",
3551                        NoMRPattern>, EVEX_V512;
3552
3553  let Predicates = [prd, HasVLX] in {
3554    defm Z256 : avx512_store<opc, OpcodeStr, NAME, _.info256, alignedstore,
3555                             masked_store_aligned, Sched.YMM,
3556                             EVEX2VEXOvrd#"Y", NoMRPattern>, EVEX_V256;
3557    defm Z128 : avx512_store<opc, OpcodeStr, NAME, _.info128, alignedstore,
3558                             masked_store_aligned, Sched.XMM, EVEX2VEXOvrd,
3559                             NoMRPattern>, EVEX_V128;
3560  }
3561}
3562
3563defm VMOVAPS : avx512_alignedload_vl<0x28, "vmovaps", avx512vl_f32_info,
3564                                     HasAVX512, SchedWriteFMoveLS, "VMOVAPS">,
3565               avx512_alignedstore_vl<0x29, "vmovaps", avx512vl_f32_info,
3566                                      HasAVX512, SchedWriteFMoveLS, "VMOVAPS">,
3567               PS, EVEX_CD8<32, CD8VF>;
3568
3569defm VMOVAPD : avx512_alignedload_vl<0x28, "vmovapd", avx512vl_f64_info,
3570                                     HasAVX512, SchedWriteFMoveLS, "VMOVAPD">,
3571               avx512_alignedstore_vl<0x29, "vmovapd", avx512vl_f64_info,
3572                                      HasAVX512, SchedWriteFMoveLS, "VMOVAPD">,
3573               PD, VEX_W, EVEX_CD8<64, CD8VF>;
3574
3575defm VMOVUPS : avx512_load_vl<0x10, "vmovups", avx512vl_f32_info, HasAVX512,
3576                              SchedWriteFMoveLS, "VMOVUPS", 0, null_frag>,
3577               avx512_store_vl<0x11, "vmovups", avx512vl_f32_info, HasAVX512,
3578                               SchedWriteFMoveLS, "VMOVUPS">,
3579                               PS, EVEX_CD8<32, CD8VF>;
3580
3581defm VMOVUPD : avx512_load_vl<0x10, "vmovupd", avx512vl_f64_info, HasAVX512,
3582                              SchedWriteFMoveLS, "VMOVUPD", 0, null_frag>,
3583               avx512_store_vl<0x11, "vmovupd", avx512vl_f64_info, HasAVX512,
3584                               SchedWriteFMoveLS, "VMOVUPD">,
3585               PD, VEX_W, EVEX_CD8<64, CD8VF>;
3586
3587defm VMOVDQA32 : avx512_alignedload_vl<0x6F, "vmovdqa32", avx512vl_i32_info,
3588                                       HasAVX512, SchedWriteVecMoveLS,
3589                                       "VMOVDQA", 1>,
3590                 avx512_alignedstore_vl<0x7F, "vmovdqa32", avx512vl_i32_info,
3591                                        HasAVX512, SchedWriteVecMoveLS,
3592                                        "VMOVDQA", 1>,
3593                 PD, EVEX_CD8<32, CD8VF>;
3594
3595defm VMOVDQA64 : avx512_alignedload_vl<0x6F, "vmovdqa64", avx512vl_i64_info,
3596                                       HasAVX512, SchedWriteVecMoveLS,
3597                                       "VMOVDQA">,
3598                 avx512_alignedstore_vl<0x7F, "vmovdqa64", avx512vl_i64_info,
3599                                        HasAVX512, SchedWriteVecMoveLS,
3600                                        "VMOVDQA">,
3601                 PD, VEX_W, EVEX_CD8<64, CD8VF>;
3602
3603defm VMOVDQU8 : avx512_load_vl<0x6F, "vmovdqu8", avx512vl_i8_info, HasBWI,
3604                               SchedWriteVecMoveLS, "VMOVDQU", 1>,
3605                avx512_store_vl<0x7F, "vmovdqu8", avx512vl_i8_info, HasBWI,
3606                                SchedWriteVecMoveLS, "VMOVDQU", 1>,
3607                XD, EVEX_CD8<8, CD8VF>;
3608
3609defm VMOVDQU16 : avx512_load_vl<0x6F, "vmovdqu16", avx512vl_i16_info, HasBWI,
3610                                SchedWriteVecMoveLS, "VMOVDQU", 1>,
3611                 avx512_store_vl<0x7F, "vmovdqu16", avx512vl_i16_info, HasBWI,
3612                                 SchedWriteVecMoveLS, "VMOVDQU", 1>,
3613                 XD, VEX_W, EVEX_CD8<16, CD8VF>;
3614
3615defm VMOVDQU32 : avx512_load_vl<0x6F, "vmovdqu32", avx512vl_i32_info, HasAVX512,
3616                                SchedWriteVecMoveLS, "VMOVDQU", 1, null_frag>,
3617                 avx512_store_vl<0x7F, "vmovdqu32", avx512vl_i32_info, HasAVX512,
3618                                 SchedWriteVecMoveLS, "VMOVDQU", 1>,
3619                 XS, EVEX_CD8<32, CD8VF>;
3620
3621defm VMOVDQU64 : avx512_load_vl<0x6F, "vmovdqu64", avx512vl_i64_info, HasAVX512,
3622                                SchedWriteVecMoveLS, "VMOVDQU", 0, null_frag>,
3623                 avx512_store_vl<0x7F, "vmovdqu64", avx512vl_i64_info, HasAVX512,
3624                                 SchedWriteVecMoveLS, "VMOVDQU">,
3625                 XS, VEX_W, EVEX_CD8<64, CD8VF>;
3626
3627// Special instructions to help with spilling when we don't have VLX. We need
3628// to load or store from a ZMM register instead. These are converted in
3629// expandPostRAPseudos.
3630let isReMaterializable = 1, canFoldAsLoad = 1,
3631    isPseudo = 1, mayLoad = 1, hasSideEffects = 0 in {
3632def VMOVAPSZ128rm_NOVLX : I<0, Pseudo, (outs VR128X:$dst), (ins f128mem:$src),
3633                            "", []>, Sched<[WriteFLoadX]>;
3634def VMOVAPSZ256rm_NOVLX : I<0, Pseudo, (outs VR256X:$dst), (ins f256mem:$src),
3635                            "", []>, Sched<[WriteFLoadY]>;
3636def VMOVUPSZ128rm_NOVLX : I<0, Pseudo, (outs VR128X:$dst), (ins f128mem:$src),
3637                            "", []>, Sched<[WriteFLoadX]>;
3638def VMOVUPSZ256rm_NOVLX : I<0, Pseudo, (outs VR256X:$dst), (ins f256mem:$src),
3639                            "", []>, Sched<[WriteFLoadY]>;
3640}
3641
3642let isPseudo = 1, mayStore = 1, hasSideEffects = 0 in {
3643def VMOVAPSZ128mr_NOVLX : I<0, Pseudo, (outs), (ins f128mem:$dst, VR128X:$src),
3644                            "", []>, Sched<[WriteFStoreX]>;
3645def VMOVAPSZ256mr_NOVLX : I<0, Pseudo, (outs), (ins f256mem:$dst, VR256X:$src),
3646                            "", []>, Sched<[WriteFStoreY]>;
3647def VMOVUPSZ128mr_NOVLX : I<0, Pseudo, (outs), (ins f128mem:$dst, VR128X:$src),
3648                            "", []>, Sched<[WriteFStoreX]>;
3649def VMOVUPSZ256mr_NOVLX : I<0, Pseudo, (outs), (ins f256mem:$dst, VR256X:$src),
3650                            "", []>, Sched<[WriteFStoreY]>;
3651}
3652
3653def : Pat<(v8i64 (vselect VK8WM:$mask, (v8i64 immAllZerosV),
3654                          (v8i64 VR512:$src))),
3655   (VMOVDQA64Zrrkz (COPY_TO_REGCLASS (KNOTWrr (COPY_TO_REGCLASS VK8:$mask, VK16)),
3656                                              VK8), VR512:$src)>;
3657
3658def : Pat<(v16i32 (vselect VK16WM:$mask, (v16i32 immAllZerosV),
3659                           (v16i32 VR512:$src))),
3660                  (VMOVDQA32Zrrkz (KNOTWrr VK16WM:$mask), VR512:$src)>;
3661
3662// These patterns exist to prevent the above patterns from introducing a second
3663// mask inversion when one already exists.
3664def : Pat<(v8i64 (vselect (xor VK8:$mask, (v8i1 immAllOnesV)),
3665                          (v8i64 immAllZerosV),
3666                          (v8i64 VR512:$src))),
3667                 (VMOVDQA64Zrrkz VK8:$mask, VR512:$src)>;
3668def : Pat<(v16i32 (vselect (xor VK16:$mask, (v16i1 immAllOnesV)),
3669                           (v16i32 immAllZerosV),
3670                           (v16i32 VR512:$src))),
3671                  (VMOVDQA32Zrrkz VK16WM:$mask, VR512:$src)>;
3672
3673multiclass mask_move_lowering<string InstrStr, X86VectorVTInfo Narrow,
3674                              X86VectorVTInfo Wide> {
3675 def : Pat<(Narrow.VT (vselect (Narrow.KVT Narrow.KRCWM:$mask),
3676                               Narrow.RC:$src1, Narrow.RC:$src0)),
3677           (EXTRACT_SUBREG
3678            (Wide.VT
3679             (!cast<Instruction>(InstrStr#"rrk")
3680              (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src0, Narrow.SubRegIdx)),
3681              (COPY_TO_REGCLASS Narrow.KRCWM:$mask, Wide.KRCWM),
3682              (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src1, Narrow.SubRegIdx)))),
3683            Narrow.SubRegIdx)>;
3684
3685 def : Pat<(Narrow.VT (vselect (Narrow.KVT Narrow.KRCWM:$mask),
3686                               Narrow.RC:$src1, Narrow.ImmAllZerosV)),
3687           (EXTRACT_SUBREG
3688            (Wide.VT
3689             (!cast<Instruction>(InstrStr#"rrkz")
3690              (COPY_TO_REGCLASS Narrow.KRCWM:$mask, Wide.KRCWM),
3691              (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src1, Narrow.SubRegIdx)))),
3692            Narrow.SubRegIdx)>;
3693}
3694
3695// Patterns for handling v8i1 selects of 256-bit vectors when VLX isn't
3696// available. Use a 512-bit operation and extract.
3697let Predicates = [HasAVX512, NoVLX] in {
3698  defm : mask_move_lowering<"VMOVAPSZ", v4f32x_info, v16f32_info>;
3699  defm : mask_move_lowering<"VMOVDQA32Z", v4i32x_info, v16i32_info>;
3700  defm : mask_move_lowering<"VMOVAPSZ", v8f32x_info, v16f32_info>;
3701  defm : mask_move_lowering<"VMOVDQA32Z", v8i32x_info, v16i32_info>;
3702
3703  defm : mask_move_lowering<"VMOVAPDZ", v2f64x_info, v8f64_info>;
3704  defm : mask_move_lowering<"VMOVDQA64Z", v2i64x_info, v8i64_info>;
3705  defm : mask_move_lowering<"VMOVAPDZ", v4f64x_info, v8f64_info>;
3706  defm : mask_move_lowering<"VMOVDQA64Z", v4i64x_info, v8i64_info>;
3707}
3708
3709let Predicates = [HasBWI, NoVLX] in {
3710  defm : mask_move_lowering<"VMOVDQU8Z", v16i8x_info, v64i8_info>;
3711  defm : mask_move_lowering<"VMOVDQU8Z", v32i8x_info, v64i8_info>;
3712
3713  defm : mask_move_lowering<"VMOVDQU16Z", v8i16x_info, v32i16_info>;
3714  defm : mask_move_lowering<"VMOVDQU16Z", v16i16x_info, v32i16_info>;
3715}
3716
3717let Predicates = [HasAVX512] in {
3718  // 512-bit load.
3719  def : Pat<(alignedloadv16i32 addr:$src),
3720            (VMOVDQA64Zrm addr:$src)>;
3721  def : Pat<(alignedloadv32i16 addr:$src),
3722            (VMOVDQA64Zrm addr:$src)>;
3723  def : Pat<(alignedloadv64i8 addr:$src),
3724            (VMOVDQA64Zrm addr:$src)>;
3725  def : Pat<(loadv16i32 addr:$src),
3726            (VMOVDQU64Zrm addr:$src)>;
3727  def : Pat<(loadv32i16 addr:$src),
3728            (VMOVDQU64Zrm addr:$src)>;
3729  def : Pat<(loadv64i8 addr:$src),
3730            (VMOVDQU64Zrm addr:$src)>;
3731
3732  // 512-bit store.
3733  def : Pat<(alignedstore (v16i32 VR512:$src), addr:$dst),
3734            (VMOVDQA64Zmr addr:$dst, VR512:$src)>;
3735  def : Pat<(alignedstore (v32i16 VR512:$src), addr:$dst),
3736            (VMOVDQA64Zmr addr:$dst, VR512:$src)>;
3737  def : Pat<(alignedstore (v64i8 VR512:$src), addr:$dst),
3738            (VMOVDQA64Zmr addr:$dst, VR512:$src)>;
3739  def : Pat<(store (v16i32 VR512:$src), addr:$dst),
3740            (VMOVDQU64Zmr addr:$dst, VR512:$src)>;
3741  def : Pat<(store (v32i16 VR512:$src), addr:$dst),
3742            (VMOVDQU64Zmr addr:$dst, VR512:$src)>;
3743  def : Pat<(store (v64i8 VR512:$src), addr:$dst),
3744            (VMOVDQU64Zmr addr:$dst, VR512:$src)>;
3745}
3746
3747let Predicates = [HasVLX] in {
3748  // 128-bit load.
3749  def : Pat<(alignedloadv4i32 addr:$src),
3750            (VMOVDQA64Z128rm addr:$src)>;
3751  def : Pat<(alignedloadv8i16 addr:$src),
3752            (VMOVDQA64Z128rm addr:$src)>;
3753  def : Pat<(alignedloadv16i8 addr:$src),
3754            (VMOVDQA64Z128rm addr:$src)>;
3755  def : Pat<(loadv4i32 addr:$src),
3756            (VMOVDQU64Z128rm addr:$src)>;
3757  def : Pat<(loadv8i16 addr:$src),
3758            (VMOVDQU64Z128rm addr:$src)>;
3759  def : Pat<(loadv16i8 addr:$src),
3760            (VMOVDQU64Z128rm addr:$src)>;
3761
3762  // 128-bit store.
3763  def : Pat<(alignedstore (v4i32 VR128X:$src), addr:$dst),
3764            (VMOVDQA64Z128mr addr:$dst, VR128X:$src)>;
3765  def : Pat<(alignedstore (v8i16 VR128X:$src), addr:$dst),
3766            (VMOVDQA64Z128mr addr:$dst, VR128X:$src)>;
3767  def : Pat<(alignedstore (v16i8 VR128X:$src), addr:$dst),
3768            (VMOVDQA64Z128mr addr:$dst, VR128X:$src)>;
3769  def : Pat<(store (v4i32 VR128X:$src), addr:$dst),
3770            (VMOVDQU64Z128mr addr:$dst, VR128X:$src)>;
3771  def : Pat<(store (v8i16 VR128X:$src), addr:$dst),
3772            (VMOVDQU64Z128mr addr:$dst, VR128X:$src)>;
3773  def : Pat<(store (v16i8 VR128X:$src), addr:$dst),
3774            (VMOVDQU64Z128mr addr:$dst, VR128X:$src)>;
3775
3776  // 256-bit load.
3777  def : Pat<(alignedloadv8i32 addr:$src),
3778            (VMOVDQA64Z256rm addr:$src)>;
3779  def : Pat<(alignedloadv16i16 addr:$src),
3780            (VMOVDQA64Z256rm addr:$src)>;
3781  def : Pat<(alignedloadv32i8 addr:$src),
3782            (VMOVDQA64Z256rm addr:$src)>;
3783  def : Pat<(loadv8i32 addr:$src),
3784            (VMOVDQU64Z256rm addr:$src)>;
3785  def : Pat<(loadv16i16 addr:$src),
3786            (VMOVDQU64Z256rm addr:$src)>;
3787  def : Pat<(loadv32i8 addr:$src),
3788            (VMOVDQU64Z256rm addr:$src)>;
3789
3790  // 256-bit store.
3791  def : Pat<(alignedstore (v8i32 VR256X:$src), addr:$dst),
3792            (VMOVDQA64Z256mr addr:$dst, VR256X:$src)>;
3793  def : Pat<(alignedstore (v16i16 VR256X:$src), addr:$dst),
3794            (VMOVDQA64Z256mr addr:$dst, VR256X:$src)>;
3795  def : Pat<(alignedstore (v32i8 VR256X:$src), addr:$dst),
3796            (VMOVDQA64Z256mr addr:$dst, VR256X:$src)>;
3797  def : Pat<(store (v8i32 VR256X:$src), addr:$dst),
3798            (VMOVDQU64Z256mr addr:$dst, VR256X:$src)>;
3799  def : Pat<(store (v16i16 VR256X:$src), addr:$dst),
3800            (VMOVDQU64Z256mr addr:$dst, VR256X:$src)>;
3801  def : Pat<(store (v32i8 VR256X:$src), addr:$dst),
3802            (VMOVDQU64Z256mr addr:$dst, VR256X:$src)>;
3803}
3804
3805// Move Int Doubleword to Packed Double Int
3806//
3807let ExeDomain = SSEPackedInt in {
3808def VMOVDI2PDIZrr : AVX512BI<0x6E, MRMSrcReg, (outs VR128X:$dst), (ins GR32:$src),
3809                      "vmovd\t{$src, $dst|$dst, $src}",
3810                      [(set VR128X:$dst,
3811                        (v4i32 (scalar_to_vector GR32:$src)))]>,
3812                        EVEX, Sched<[WriteVecMoveFromGpr]>;
3813def VMOVDI2PDIZrm : AVX512BI<0x6E, MRMSrcMem, (outs VR128X:$dst), (ins i32mem:$src),
3814                      "vmovd\t{$src, $dst|$dst, $src}",
3815                      [(set VR128X:$dst,
3816                        (v4i32 (scalar_to_vector (loadi32 addr:$src))))]>,
3817                      EVEX, EVEX_CD8<32, CD8VT1>, Sched<[WriteVecLoad]>;
3818def VMOV64toPQIZrr : AVX512BI<0x6E, MRMSrcReg, (outs VR128X:$dst), (ins GR64:$src),
3819                      "vmovq\t{$src, $dst|$dst, $src}",
3820                        [(set VR128X:$dst,
3821                          (v2i64 (scalar_to_vector GR64:$src)))]>,
3822                      EVEX, VEX_W, Sched<[WriteVecMoveFromGpr]>;
3823let isCodeGenOnly = 1, ForceDisassemble = 1, hasSideEffects = 0, mayLoad = 1 in
3824def VMOV64toPQIZrm : AVX512BI<0x6E, MRMSrcMem, (outs VR128X:$dst),
3825                      (ins i64mem:$src),
3826                      "vmovq\t{$src, $dst|$dst, $src}", []>,
3827                      EVEX, VEX_W, EVEX_CD8<64, CD8VT1>, Sched<[WriteVecLoad]>;
3828let isCodeGenOnly = 1 in {
3829def VMOV64toSDZrr : AVX512BI<0x6E, MRMSrcReg, (outs FR64X:$dst), (ins GR64:$src),
3830                       "vmovq\t{$src, $dst|$dst, $src}",
3831                       [(set FR64X:$dst, (bitconvert GR64:$src))]>,
3832                       EVEX, VEX_W, Sched<[WriteVecMoveFromGpr]>;
3833def VMOVSDto64Zrr : AVX512BI<0x7E, MRMDestReg, (outs GR64:$dst), (ins FR64X:$src),
3834                         "vmovq\t{$src, $dst|$dst, $src}",
3835                         [(set GR64:$dst, (bitconvert FR64X:$src))]>,
3836                         EVEX, VEX_W, Sched<[WriteVecMoveFromGpr]>;
3837}
3838} // ExeDomain = SSEPackedInt
3839
3840// Move Int Doubleword to Single Scalar
3841//
3842let ExeDomain = SSEPackedInt, isCodeGenOnly = 1 in {
3843def VMOVDI2SSZrr  : AVX512BI<0x6E, MRMSrcReg, (outs FR32X:$dst), (ins GR32:$src),
3844                      "vmovd\t{$src, $dst|$dst, $src}",
3845                      [(set FR32X:$dst, (bitconvert GR32:$src))]>,
3846                      EVEX, Sched<[WriteVecMoveFromGpr]>;
3847} // ExeDomain = SSEPackedInt, isCodeGenOnly = 1
3848
3849// Move doubleword from xmm register to r/m32
3850//
3851let ExeDomain = SSEPackedInt in {
3852def VMOVPDI2DIZrr  : AVX512BI<0x7E, MRMDestReg, (outs GR32:$dst), (ins VR128X:$src),
3853                       "vmovd\t{$src, $dst|$dst, $src}",
3854                       [(set GR32:$dst, (extractelt (v4i32 VR128X:$src),
3855                                        (iPTR 0)))]>,
3856                       EVEX, Sched<[WriteVecMoveToGpr]>;
3857def VMOVPDI2DIZmr  : AVX512BI<0x7E, MRMDestMem, (outs),
3858                       (ins i32mem:$dst, VR128X:$src),
3859                       "vmovd\t{$src, $dst|$dst, $src}",
3860                       [(store (i32 (extractelt (v4i32 VR128X:$src),
3861                                     (iPTR 0))), addr:$dst)]>,
3862                       EVEX, EVEX_CD8<32, CD8VT1>, Sched<[WriteVecStore]>;
3863} // ExeDomain = SSEPackedInt
3864
3865// Move quadword from xmm1 register to r/m64
3866//
3867let ExeDomain = SSEPackedInt in {
3868def VMOVPQIto64Zrr : I<0x7E, MRMDestReg, (outs GR64:$dst), (ins VR128X:$src),
3869                      "vmovq\t{$src, $dst|$dst, $src}",
3870                      [(set GR64:$dst, (extractelt (v2i64 VR128X:$src),
3871                                                   (iPTR 0)))]>,
3872                      PD, EVEX, VEX_W, Sched<[WriteVecMoveToGpr]>,
3873                      Requires<[HasAVX512]>;
3874
3875let isCodeGenOnly = 1, ForceDisassemble = 1, hasSideEffects = 0, mayStore = 1 in
3876def VMOVPQIto64Zmr : I<0x7E, MRMDestMem, (outs), (ins i64mem:$dst, VR128X:$src),
3877                      "vmovq\t{$src, $dst|$dst, $src}", []>, PD,
3878                      EVEX, VEX_W, Sched<[WriteVecStore]>,
3879                      Requires<[HasAVX512, In64BitMode]>;
3880
3881def VMOVPQI2QIZmr : I<0xD6, MRMDestMem, (outs),
3882                      (ins i64mem:$dst, VR128X:$src),
3883                      "vmovq\t{$src, $dst|$dst, $src}",
3884                      [(store (extractelt (v2i64 VR128X:$src), (iPTR 0)),
3885                              addr:$dst)]>,
3886                      EVEX, PD, VEX_W, EVEX_CD8<64, CD8VT1>,
3887                      Sched<[WriteVecStore]>, Requires<[HasAVX512]>;
3888
3889let hasSideEffects = 0, isCodeGenOnly = 1, ForceDisassemble = 1 in
3890def VMOVPQI2QIZrr : AVX512BI<0xD6, MRMDestReg, (outs VR128X:$dst),
3891                             (ins VR128X:$src),
3892                             "vmovq\t{$src, $dst|$dst, $src}", []>,
3893                             EVEX, VEX_W, Sched<[SchedWriteVecLogic.XMM]>;
3894} // ExeDomain = SSEPackedInt
3895
3896def : InstAlias<"vmovq.s\t{$src, $dst|$dst, $src}",
3897                (VMOVPQI2QIZrr VR128X:$dst, VR128X:$src), 0>;
3898
3899let Predicates = [HasAVX512] in {
3900  def : Pat<(X86vextractstore64 (v2i64 VR128X:$src), addr:$dst),
3901            (VMOVPQI2QIZmr addr:$dst, VR128X:$src)>;
3902}
3903
3904// Move Scalar Single to Double Int
3905//
3906let ExeDomain = SSEPackedInt, isCodeGenOnly = 1 in {
3907def VMOVSS2DIZrr  : AVX512BI<0x7E, MRMDestReg, (outs GR32:$dst),
3908                      (ins FR32X:$src),
3909                      "vmovd\t{$src, $dst|$dst, $src}",
3910                      [(set GR32:$dst, (bitconvert FR32X:$src))]>,
3911                      EVEX, Sched<[WriteVecMoveToGpr]>;
3912} // ExeDomain = SSEPackedInt, isCodeGenOnly = 1
3913
3914// Move Quadword Int to Packed Quadword Int
3915//
3916let ExeDomain = SSEPackedInt in {
3917def VMOVQI2PQIZrm : AVX512XSI<0x7E, MRMSrcMem, (outs VR128X:$dst),
3918                      (ins i64mem:$src),
3919                      "vmovq\t{$src, $dst|$dst, $src}",
3920                      [(set VR128X:$dst,
3921                        (v2i64 (scalar_to_vector (loadi64 addr:$src))))]>,
3922                      EVEX, VEX_W, EVEX_CD8<8, CD8VT8>, Sched<[WriteVecLoad]>;
3923} // ExeDomain = SSEPackedInt
3924
3925// Allow "vmovd" but print "vmovq".
3926def : InstAlias<"vmovd\t{$src, $dst|$dst, $src}",
3927                (VMOV64toPQIZrr VR128X:$dst, GR64:$src), 0>;
3928def : InstAlias<"vmovd\t{$src, $dst|$dst, $src}",
3929                (VMOVPQIto64Zrr GR64:$dst, VR128X:$src), 0>;
3930
3931//===----------------------------------------------------------------------===//
3932// AVX-512  MOVSS, MOVSD
3933//===----------------------------------------------------------------------===//
3934
3935multiclass avx512_move_scalar<string asm, SDNode OpNode, PatFrag vzload_frag,
3936                              X86VectorVTInfo _> {
3937  let Predicates = [HasAVX512, OptForSize] in
3938  def rr : AVX512PI<0x10, MRMSrcReg, (outs _.RC:$dst),
3939             (ins _.RC:$src1, _.RC:$src2),
3940             !strconcat(asm, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
3941             [(set _.RC:$dst, (_.VT (OpNode _.RC:$src1, _.RC:$src2)))],
3942             _.ExeDomain>, EVEX_4V, Sched<[SchedWriteFShuffle.XMM]>;
3943  def rrkz : AVX512PI<0x10, MRMSrcReg, (outs _.RC:$dst),
3944              (ins _.KRCWM:$mask, _.RC:$src1, _.RC:$src2),
3945              !strconcat(asm, "\t{$src2, $src1, $dst {${mask}} {z}|",
3946              "$dst {${mask}} {z}, $src1, $src2}"),
3947              [(set _.RC:$dst, (_.VT (X86selects _.KRCWM:$mask,
3948                                      (_.VT (OpNode _.RC:$src1, _.RC:$src2)),
3949                                      _.ImmAllZerosV)))],
3950              _.ExeDomain>, EVEX_4V, EVEX_KZ, Sched<[SchedWriteFShuffle.XMM]>;
3951  let Constraints = "$src0 = $dst"  in
3952  def rrk : AVX512PI<0x10, MRMSrcReg, (outs _.RC:$dst),
3953             (ins _.RC:$src0, _.KRCWM:$mask, _.RC:$src1, _.RC:$src2),
3954             !strconcat(asm, "\t{$src2, $src1, $dst {${mask}}|",
3955             "$dst {${mask}}, $src1, $src2}"),
3956             [(set _.RC:$dst, (_.VT (X86selects _.KRCWM:$mask,
3957                                     (_.VT (OpNode _.RC:$src1, _.RC:$src2)),
3958                                     (_.VT _.RC:$src0))))],
3959             _.ExeDomain>, EVEX_4V, EVEX_K, Sched<[SchedWriteFShuffle.XMM]>;
3960  let canFoldAsLoad = 1, isReMaterializable = 1 in {
3961  def rm : AVX512PI<0x10, MRMSrcMem, (outs _.RC:$dst), (ins _.ScalarMemOp:$src),
3962             !strconcat(asm, "\t{$src, $dst|$dst, $src}"),
3963             [(set _.RC:$dst, (_.VT (vzload_frag addr:$src)))],
3964             _.ExeDomain>, EVEX, Sched<[WriteFLoad]>;
3965  // _alt version uses FR32/FR64 register class.
3966  let isCodeGenOnly = 1 in
3967  def rm_alt : AVX512PI<0x10, MRMSrcMem, (outs _.FRC:$dst), (ins _.ScalarMemOp:$src),
3968                 !strconcat(asm, "\t{$src, $dst|$dst, $src}"),
3969                 [(set _.FRC:$dst, (_.ScalarLdFrag addr:$src))],
3970                 _.ExeDomain>, EVEX, Sched<[WriteFLoad]>;
3971  }
3972  let mayLoad = 1, hasSideEffects = 0 in {
3973    let Constraints = "$src0 = $dst" in
3974    def rmk : AVX512PI<0x10, MRMSrcMem, (outs _.RC:$dst),
3975               (ins _.RC:$src0, _.KRCWM:$mask, _.ScalarMemOp:$src),
3976               !strconcat(asm, "\t{$src, $dst {${mask}}|",
3977               "$dst {${mask}}, $src}"),
3978               [], _.ExeDomain>, EVEX, EVEX_K, Sched<[WriteFLoad]>;
3979    def rmkz : AVX512PI<0x10, MRMSrcMem, (outs _.RC:$dst),
3980               (ins _.KRCWM:$mask, _.ScalarMemOp:$src),
3981               !strconcat(asm, "\t{$src, $dst {${mask}} {z}|",
3982               "$dst {${mask}} {z}, $src}"),
3983               [], _.ExeDomain>, EVEX, EVEX_KZ, Sched<[WriteFLoad]>;
3984  }
3985  def mr: AVX512PI<0x11, MRMDestMem, (outs), (ins _.ScalarMemOp:$dst, _.FRC:$src),
3986             !strconcat(asm, "\t{$src, $dst|$dst, $src}"),
3987             [(store _.FRC:$src, addr:$dst)],  _.ExeDomain>,
3988             EVEX, Sched<[WriteFStore]>;
3989  let mayStore = 1, hasSideEffects = 0 in
3990  def mrk: AVX512PI<0x11, MRMDestMem, (outs),
3991              (ins _.ScalarMemOp:$dst, VK1WM:$mask, _.RC:$src),
3992              !strconcat(asm, "\t{$src, $dst {${mask}}|$dst {${mask}}, $src}"),
3993              [], _.ExeDomain>, EVEX, EVEX_K, Sched<[WriteFStore]>,
3994              NotMemoryFoldable;
3995}
3996
3997defm VMOVSSZ : avx512_move_scalar<"vmovss", X86Movss, X86vzload32, f32x_info>,
3998                                  VEX_LIG, XS, EVEX_CD8<32, CD8VT1>;
3999
4000defm VMOVSDZ : avx512_move_scalar<"vmovsd", X86Movsd, X86vzload64, f64x_info>,
4001                                  VEX_LIG, XD, VEX_W, EVEX_CD8<64, CD8VT1>;
4002
4003
4004multiclass avx512_move_scalar_lowering<string InstrStr, SDNode OpNode,
4005                                       PatLeaf ZeroFP, X86VectorVTInfo _> {
4006
4007def : Pat<(_.VT (OpNode _.RC:$src0,
4008                        (_.VT (scalar_to_vector
4009                                  (_.EltVT (X86selects VK1WM:$mask,
4010                                                       (_.EltVT _.FRC:$src1),
4011                                                       (_.EltVT _.FRC:$src2))))))),
4012          (!cast<Instruction>(InstrStr#rrk)
4013                        (_.VT (COPY_TO_REGCLASS _.FRC:$src2, _.RC)),
4014                        VK1WM:$mask,
4015                        (_.VT _.RC:$src0),
4016                        (_.VT (COPY_TO_REGCLASS _.FRC:$src1, _.RC)))>;
4017
4018def : Pat<(_.VT (OpNode _.RC:$src0,
4019                        (_.VT (scalar_to_vector
4020                                  (_.EltVT (X86selects VK1WM:$mask,
4021                                                       (_.EltVT _.FRC:$src1),
4022                                                       (_.EltVT ZeroFP))))))),
4023          (!cast<Instruction>(InstrStr#rrkz)
4024                        VK1WM:$mask,
4025                        (_.VT _.RC:$src0),
4026                        (_.VT (COPY_TO_REGCLASS _.FRC:$src1, _.RC)))>;
4027}
4028
4029multiclass avx512_store_scalar_lowering<string InstrStr, AVX512VLVectorVTInfo _,
4030                                        dag Mask, RegisterClass MaskRC> {
4031
4032def : Pat<(masked_store
4033             (_.info512.VT (insert_subvector undef,
4034                               (_.info128.VT _.info128.RC:$src),
4035                               (iPTR 0))), addr:$dst, Mask),
4036          (!cast<Instruction>(InstrStr#mrk) addr:$dst,
4037                      (COPY_TO_REGCLASS MaskRC:$mask, VK1WM),
4038                      _.info128.RC:$src)>;
4039
4040}
4041
4042multiclass avx512_store_scalar_lowering_subreg<string InstrStr,
4043                                               AVX512VLVectorVTInfo _,
4044                                               dag Mask, RegisterClass MaskRC,
4045                                               SubRegIndex subreg> {
4046
4047def : Pat<(masked_store
4048             (_.info512.VT (insert_subvector undef,
4049                               (_.info128.VT _.info128.RC:$src),
4050                               (iPTR 0))), addr:$dst, Mask),
4051          (!cast<Instruction>(InstrStr#mrk) addr:$dst,
4052                      (COPY_TO_REGCLASS (i32 (INSERT_SUBREG (IMPLICIT_DEF), MaskRC:$mask, subreg)), VK1WM),
4053                      _.info128.RC:$src)>;
4054
4055}
4056
4057// This matches the more recent codegen from clang that avoids emitting a 512
4058// bit masked store directly. Codegen will widen 128-bit masked store to 512
4059// bits on AVX512F only targets.
4060multiclass avx512_store_scalar_lowering_subreg2<string InstrStr,
4061                                               AVX512VLVectorVTInfo _,
4062                                               dag Mask512, dag Mask128,
4063                                               RegisterClass MaskRC,
4064                                               SubRegIndex subreg> {
4065
4066// AVX512F pattern.
4067def : Pat<(masked_store
4068             (_.info512.VT (insert_subvector undef,
4069                               (_.info128.VT _.info128.RC:$src),
4070                               (iPTR 0))), addr:$dst, Mask512),
4071          (!cast<Instruction>(InstrStr#mrk) addr:$dst,
4072                      (COPY_TO_REGCLASS (i32 (INSERT_SUBREG (IMPLICIT_DEF), MaskRC:$mask, subreg)), VK1WM),
4073                      _.info128.RC:$src)>;
4074
4075// AVX512VL pattern.
4076def : Pat<(masked_store (_.info128.VT _.info128.RC:$src), addr:$dst, Mask128),
4077          (!cast<Instruction>(InstrStr#mrk) addr:$dst,
4078                      (COPY_TO_REGCLASS (i32 (INSERT_SUBREG (IMPLICIT_DEF), MaskRC:$mask, subreg)), VK1WM),
4079                      _.info128.RC:$src)>;
4080}
4081
4082multiclass avx512_load_scalar_lowering<string InstrStr, AVX512VLVectorVTInfo _,
4083                                       dag Mask, RegisterClass MaskRC> {
4084
4085def : Pat<(_.info128.VT (extract_subvector
4086                         (_.info512.VT (masked_load addr:$srcAddr, Mask,
4087                                        _.info512.ImmAllZerosV)),
4088                           (iPTR 0))),
4089          (!cast<Instruction>(InstrStr#rmkz)
4090                      (COPY_TO_REGCLASS MaskRC:$mask, VK1WM),
4091                      addr:$srcAddr)>;
4092
4093def : Pat<(_.info128.VT (extract_subvector
4094                (_.info512.VT (masked_load addr:$srcAddr, Mask,
4095                      (_.info512.VT (insert_subvector undef,
4096                            (_.info128.VT (X86vzmovl _.info128.RC:$src)),
4097                            (iPTR 0))))),
4098                (iPTR 0))),
4099          (!cast<Instruction>(InstrStr#rmk) _.info128.RC:$src,
4100                      (COPY_TO_REGCLASS MaskRC:$mask, VK1WM),
4101                      addr:$srcAddr)>;
4102
4103}
4104
4105multiclass avx512_load_scalar_lowering_subreg<string InstrStr,
4106                                              AVX512VLVectorVTInfo _,
4107                                              dag Mask, RegisterClass MaskRC,
4108                                              SubRegIndex subreg> {
4109
4110def : Pat<(_.info128.VT (extract_subvector
4111                         (_.info512.VT (masked_load addr:$srcAddr, Mask,
4112                                        _.info512.ImmAllZerosV)),
4113                           (iPTR 0))),
4114          (!cast<Instruction>(InstrStr#rmkz)
4115                      (COPY_TO_REGCLASS (i32 (INSERT_SUBREG (IMPLICIT_DEF), MaskRC:$mask, subreg)), VK1WM),
4116                      addr:$srcAddr)>;
4117
4118def : Pat<(_.info128.VT (extract_subvector
4119                (_.info512.VT (masked_load addr:$srcAddr, Mask,
4120                      (_.info512.VT (insert_subvector undef,
4121                            (_.info128.VT (X86vzmovl _.info128.RC:$src)),
4122                            (iPTR 0))))),
4123                (iPTR 0))),
4124          (!cast<Instruction>(InstrStr#rmk) _.info128.RC:$src,
4125                      (COPY_TO_REGCLASS (i32 (INSERT_SUBREG (IMPLICIT_DEF), MaskRC:$mask, subreg)), VK1WM),
4126                      addr:$srcAddr)>;
4127
4128}
4129
4130// This matches the more recent codegen from clang that avoids emitting a 512
4131// bit masked load directly. Codegen will widen 128-bit masked load to 512
4132// bits on AVX512F only targets.
4133multiclass avx512_load_scalar_lowering_subreg2<string InstrStr,
4134                                              AVX512VLVectorVTInfo _,
4135                                              dag Mask512, dag Mask128,
4136                                              RegisterClass MaskRC,
4137                                              SubRegIndex subreg> {
4138// AVX512F patterns.
4139def : Pat<(_.info128.VT (extract_subvector
4140                         (_.info512.VT (masked_load addr:$srcAddr, Mask512,
4141                                        _.info512.ImmAllZerosV)),
4142                           (iPTR 0))),
4143          (!cast<Instruction>(InstrStr#rmkz)
4144                      (COPY_TO_REGCLASS (i32 (INSERT_SUBREG (IMPLICIT_DEF), MaskRC:$mask, subreg)), VK1WM),
4145                      addr:$srcAddr)>;
4146
4147def : Pat<(_.info128.VT (extract_subvector
4148                (_.info512.VT (masked_load addr:$srcAddr, Mask512,
4149                      (_.info512.VT (insert_subvector undef,
4150                            (_.info128.VT (X86vzmovl _.info128.RC:$src)),
4151                            (iPTR 0))))),
4152                (iPTR 0))),
4153          (!cast<Instruction>(InstrStr#rmk) _.info128.RC:$src,
4154                      (COPY_TO_REGCLASS (i32 (INSERT_SUBREG (IMPLICIT_DEF), MaskRC:$mask, subreg)), VK1WM),
4155                      addr:$srcAddr)>;
4156
4157// AVX512Vl patterns.
4158def : Pat<(_.info128.VT (masked_load addr:$srcAddr, Mask128,
4159                         _.info128.ImmAllZerosV)),
4160          (!cast<Instruction>(InstrStr#rmkz)
4161                      (COPY_TO_REGCLASS (i32 (INSERT_SUBREG (IMPLICIT_DEF), MaskRC:$mask, subreg)), VK1WM),
4162                      addr:$srcAddr)>;
4163
4164def : Pat<(_.info128.VT (masked_load addr:$srcAddr, Mask128,
4165                         (_.info128.VT (X86vzmovl _.info128.RC:$src)))),
4166          (!cast<Instruction>(InstrStr#rmk) _.info128.RC:$src,
4167                      (COPY_TO_REGCLASS (i32 (INSERT_SUBREG (IMPLICIT_DEF), MaskRC:$mask, subreg)), VK1WM),
4168                      addr:$srcAddr)>;
4169}
4170
4171defm : avx512_move_scalar_lowering<"VMOVSSZ", X86Movss, fp32imm0, v4f32x_info>;
4172defm : avx512_move_scalar_lowering<"VMOVSDZ", X86Movsd, fp64imm0, v2f64x_info>;
4173
4174defm : avx512_store_scalar_lowering<"VMOVSSZ", avx512vl_f32_info,
4175                   (v16i1 (bitconvert (i16 (trunc (and GR32:$mask, (i32 1)))))), GR32>;
4176defm : avx512_store_scalar_lowering_subreg<"VMOVSSZ", avx512vl_f32_info,
4177                   (v16i1 (bitconvert (i16 (and GR16:$mask, (i16 1))))), GR16, sub_16bit>;
4178defm : avx512_store_scalar_lowering_subreg<"VMOVSDZ", avx512vl_f64_info,
4179                   (v8i1 (bitconvert (i8 (and GR8:$mask, (i8 1))))), GR8, sub_8bit>;
4180
4181defm : avx512_store_scalar_lowering_subreg2<"VMOVSSZ", avx512vl_f32_info,
4182                   (v16i1 (insert_subvector
4183                           (v16i1 immAllZerosV),
4184                           (v4i1 (extract_subvector
4185                                  (v8i1 (bitconvert (and GR8:$mask, (i8 1)))),
4186                                  (iPTR 0))),
4187                           (iPTR 0))),
4188                   (v4i1 (extract_subvector
4189                          (v8i1 (bitconvert (and GR8:$mask, (i8 1)))),
4190                          (iPTR 0))), GR8, sub_8bit>;
4191defm : avx512_store_scalar_lowering_subreg2<"VMOVSDZ", avx512vl_f64_info,
4192                   (v8i1
4193                    (extract_subvector
4194                     (v16i1
4195                      (insert_subvector
4196                       (v16i1 immAllZerosV),
4197                       (v2i1 (extract_subvector
4198                              (v8i1 (bitconvert (i8 (and GR8:$mask, (i8 1))))),
4199                              (iPTR 0))),
4200                       (iPTR 0))),
4201                     (iPTR 0))),
4202                   (v2i1 (extract_subvector
4203                          (v8i1 (bitconvert (i8 (and GR8:$mask, (i8 1))))),
4204                          (iPTR 0))), GR8, sub_8bit>;
4205
4206defm : avx512_load_scalar_lowering<"VMOVSSZ", avx512vl_f32_info,
4207                   (v16i1 (bitconvert (i16 (trunc (and GR32:$mask, (i32 1)))))), GR32>;
4208defm : avx512_load_scalar_lowering_subreg<"VMOVSSZ", avx512vl_f32_info,
4209                   (v16i1 (bitconvert (i16 (and GR16:$mask, (i16 1))))), GR16, sub_16bit>;
4210defm : avx512_load_scalar_lowering_subreg<"VMOVSDZ", avx512vl_f64_info,
4211                   (v8i1 (bitconvert (i8 (and GR8:$mask, (i8 1))))), GR8, sub_8bit>;
4212
4213defm : avx512_load_scalar_lowering_subreg2<"VMOVSSZ", avx512vl_f32_info,
4214                   (v16i1 (insert_subvector
4215                           (v16i1 immAllZerosV),
4216                           (v4i1 (extract_subvector
4217                                  (v8i1 (bitconvert (and GR8:$mask, (i8 1)))),
4218                                  (iPTR 0))),
4219                           (iPTR 0))),
4220                   (v4i1 (extract_subvector
4221                          (v8i1 (bitconvert (and GR8:$mask, (i8 1)))),
4222                          (iPTR 0))), GR8, sub_8bit>;
4223defm : avx512_load_scalar_lowering_subreg2<"VMOVSDZ", avx512vl_f64_info,
4224                   (v8i1
4225                    (extract_subvector
4226                     (v16i1
4227                      (insert_subvector
4228                       (v16i1 immAllZerosV),
4229                       (v2i1 (extract_subvector
4230                              (v8i1 (bitconvert (i8 (and GR8:$mask, (i8 1))))),
4231                              (iPTR 0))),
4232                       (iPTR 0))),
4233                     (iPTR 0))),
4234                   (v2i1 (extract_subvector
4235                          (v8i1 (bitconvert (i8 (and GR8:$mask, (i8 1))))),
4236                          (iPTR 0))), GR8, sub_8bit>;
4237
4238def : Pat<(f32 (X86selects VK1WM:$mask, (f32 FR32X:$src1), (f32 FR32X:$src2))),
4239          (COPY_TO_REGCLASS (v4f32 (VMOVSSZrrk
4240           (v4f32 (COPY_TO_REGCLASS FR32X:$src2, VR128X)),
4241           VK1WM:$mask, (v4f32 (IMPLICIT_DEF)),
4242           (v4f32 (COPY_TO_REGCLASS FR32X:$src1, VR128X)))), FR32X)>;
4243
4244def : Pat<(f32 (X86selects VK1WM:$mask, (f32 FR32X:$src1), fp32imm0)),
4245          (COPY_TO_REGCLASS (v4f32 (VMOVSSZrrkz VK1WM:$mask, (v4f32 (IMPLICIT_DEF)),
4246           (v4f32 (COPY_TO_REGCLASS FR32X:$src1, VR128X)))), FR32X)>;
4247
4248def : Pat<(f32 (X86selects VK1WM:$mask, (loadf32 addr:$src), (f32 FR32X:$src0))),
4249          (COPY_TO_REGCLASS
4250           (v4f32 (VMOVSSZrmk (v4f32 (COPY_TO_REGCLASS FR32X:$src0, VR128X)),
4251                                                       VK1WM:$mask, addr:$src)),
4252           FR32X)>;
4253def : Pat<(f32 (X86selects VK1WM:$mask, (loadf32 addr:$src), fp32imm0)),
4254          (COPY_TO_REGCLASS (v4f32 (VMOVSSZrmkz VK1WM:$mask, addr:$src)), FR32X)>;
4255
4256def : Pat<(f64 (X86selects VK1WM:$mask, (f64 FR64X:$src1), (f64 FR64X:$src2))),
4257          (COPY_TO_REGCLASS (v2f64 (VMOVSDZrrk
4258           (v2f64 (COPY_TO_REGCLASS FR64X:$src2, VR128X)),
4259           VK1WM:$mask, (v2f64 (IMPLICIT_DEF)),
4260           (v2f64 (COPY_TO_REGCLASS FR64X:$src1, VR128X)))), FR64X)>;
4261
4262def : Pat<(f64 (X86selects VK1WM:$mask, (f64 FR64X:$src1), fp64imm0)),
4263          (COPY_TO_REGCLASS (v2f64 (VMOVSDZrrkz VK1WM:$mask, (v2f64 (IMPLICIT_DEF)),
4264           (v2f64 (COPY_TO_REGCLASS FR64X:$src1, VR128X)))), FR64X)>;
4265
4266def : Pat<(f64 (X86selects VK1WM:$mask, (loadf64 addr:$src), (f64 FR64X:$src0))),
4267          (COPY_TO_REGCLASS
4268           (v2f64 (VMOVSDZrmk (v2f64 (COPY_TO_REGCLASS FR64X:$src0, VR128X)),
4269                                                       VK1WM:$mask, addr:$src)),
4270           FR64X)>;
4271def : Pat<(f64 (X86selects VK1WM:$mask, (loadf64 addr:$src), fp64imm0)),
4272          (COPY_TO_REGCLASS (v2f64 (VMOVSDZrmkz VK1WM:$mask, addr:$src)), FR64X)>;
4273
4274let hasSideEffects = 0, isCodeGenOnly = 1, ForceDisassemble = 1 in {
4275  def VMOVSSZrr_REV: AVX512<0x11, MRMDestReg, (outs VR128X:$dst),
4276                           (ins VR128X:$src1, VR128X:$src2),
4277                           "vmovss\t{$src2, $src1, $dst|$dst, $src1, $src2}",
4278                           []>, XS, EVEX_4V, VEX_LIG,
4279                           FoldGenData<"VMOVSSZrr">,
4280                           Sched<[SchedWriteFShuffle.XMM]>;
4281
4282  let Constraints = "$src0 = $dst" in
4283  def VMOVSSZrrk_REV: AVX512<0x11, MRMDestReg, (outs VR128X:$dst),
4284                             (ins f32x_info.RC:$src0, f32x_info.KRCWM:$mask,
4285                                                   VR128X:$src1, VR128X:$src2),
4286                             "vmovss\t{$src2, $src1, $dst {${mask}}|"#
4287                                        "$dst {${mask}}, $src1, $src2}",
4288                             []>, EVEX_K, XS, EVEX_4V, VEX_LIG,
4289                             FoldGenData<"VMOVSSZrrk">,
4290                             Sched<[SchedWriteFShuffle.XMM]>;
4291
4292  def VMOVSSZrrkz_REV: AVX512<0x11, MRMDestReg, (outs VR128X:$dst),
4293                         (ins f32x_info.KRCWM:$mask, VR128X:$src1, VR128X:$src2),
4294                         "vmovss\t{$src2, $src1, $dst {${mask}} {z}|"#
4295                                    "$dst {${mask}} {z}, $src1, $src2}",
4296                         []>, EVEX_KZ, XS, EVEX_4V, VEX_LIG,
4297                         FoldGenData<"VMOVSSZrrkz">,
4298                         Sched<[SchedWriteFShuffle.XMM]>;
4299
4300  def VMOVSDZrr_REV: AVX512<0x11, MRMDestReg, (outs VR128X:$dst),
4301                           (ins VR128X:$src1, VR128X:$src2),
4302                           "vmovsd\t{$src2, $src1, $dst|$dst, $src1, $src2}",
4303                           []>, XD, EVEX_4V, VEX_LIG, VEX_W,
4304                           FoldGenData<"VMOVSDZrr">,
4305                           Sched<[SchedWriteFShuffle.XMM]>;
4306
4307  let Constraints = "$src0 = $dst" in
4308  def VMOVSDZrrk_REV: AVX512<0x11, MRMDestReg, (outs VR128X:$dst),
4309                             (ins f64x_info.RC:$src0, f64x_info.KRCWM:$mask,
4310                                                   VR128X:$src1, VR128X:$src2),
4311                             "vmovsd\t{$src2, $src1, $dst {${mask}}|"#
4312                                        "$dst {${mask}}, $src1, $src2}",
4313                             []>, EVEX_K, XD, EVEX_4V, VEX_LIG,
4314                             VEX_W, FoldGenData<"VMOVSDZrrk">,
4315                             Sched<[SchedWriteFShuffle.XMM]>;
4316
4317  def VMOVSDZrrkz_REV: AVX512<0x11, MRMDestReg, (outs VR128X:$dst),
4318                              (ins f64x_info.KRCWM:$mask, VR128X:$src1,
4319                                                          VR128X:$src2),
4320                              "vmovsd\t{$src2, $src1, $dst {${mask}} {z}|"#
4321                                         "$dst {${mask}} {z}, $src1, $src2}",
4322                              []>, EVEX_KZ, XD, EVEX_4V, VEX_LIG,
4323                              VEX_W, FoldGenData<"VMOVSDZrrkz">,
4324                              Sched<[SchedWriteFShuffle.XMM]>;
4325}
4326
4327def : InstAlias<"vmovss.s\t{$src2, $src1, $dst|$dst, $src1, $src2}",
4328                (VMOVSSZrr_REV VR128X:$dst, VR128X:$src1, VR128X:$src2), 0>;
4329def : InstAlias<"vmovss.s\t{$src2, $src1, $dst {${mask}}|"#
4330                             "$dst {${mask}}, $src1, $src2}",
4331                (VMOVSSZrrk_REV VR128X:$dst, VK1WM:$mask,
4332                                VR128X:$src1, VR128X:$src2), 0>;
4333def : InstAlias<"vmovss.s\t{$src2, $src1, $dst {${mask}} {z}|"#
4334                             "$dst {${mask}} {z}, $src1, $src2}",
4335                (VMOVSSZrrkz_REV VR128X:$dst, VK1WM:$mask,
4336                                 VR128X:$src1, VR128X:$src2), 0>;
4337def : InstAlias<"vmovsd.s\t{$src2, $src1, $dst|$dst, $src1, $src2}",
4338                (VMOVSDZrr_REV VR128X:$dst, VR128X:$src1, VR128X:$src2), 0>;
4339def : InstAlias<"vmovsd.s\t{$src2, $src1, $dst {${mask}}|"#
4340                             "$dst {${mask}}, $src1, $src2}",
4341                (VMOVSDZrrk_REV VR128X:$dst, VK1WM:$mask,
4342                                VR128X:$src1, VR128X:$src2), 0>;
4343def : InstAlias<"vmovsd.s\t{$src2, $src1, $dst {${mask}} {z}|"#
4344                             "$dst {${mask}} {z}, $src1, $src2}",
4345                (VMOVSDZrrkz_REV VR128X:$dst, VK1WM:$mask,
4346                                 VR128X:$src1, VR128X:$src2), 0>;
4347
4348let Predicates = [HasAVX512, OptForSize] in {
4349  def : Pat<(v4f32 (X86vzmovl (v4f32 VR128X:$src))),
4350            (VMOVSSZrr (v4f32 (AVX512_128_SET0)), VR128X:$src)>;
4351  def : Pat<(v4i32 (X86vzmovl (v4i32 VR128X:$src))),
4352            (VMOVSSZrr (v4i32 (AVX512_128_SET0)), VR128X:$src)>;
4353
4354  // Move low f32 and clear high bits.
4355  def : Pat<(v8f32 (X86vzmovl (v8f32 VR256X:$src))),
4356            (SUBREG_TO_REG (i32 0),
4357             (v4f32 (VMOVSSZrr (v4f32 (AVX512_128_SET0)),
4358              (v4f32 (EXTRACT_SUBREG (v8f32 VR256X:$src), sub_xmm)))), sub_xmm)>;
4359  def : Pat<(v8i32 (X86vzmovl (v8i32 VR256X:$src))),
4360            (SUBREG_TO_REG (i32 0),
4361             (v4i32 (VMOVSSZrr (v4i32 (AVX512_128_SET0)),
4362              (v4i32 (EXTRACT_SUBREG (v8i32 VR256X:$src), sub_xmm)))), sub_xmm)>;
4363
4364  def : Pat<(v16f32 (X86vzmovl (v16f32 VR512:$src))),
4365            (SUBREG_TO_REG (i32 0),
4366             (v4f32 (VMOVSSZrr (v4f32 (AVX512_128_SET0)),
4367              (v4f32 (EXTRACT_SUBREG (v16f32 VR512:$src), sub_xmm)))), sub_xmm)>;
4368  def : Pat<(v16i32 (X86vzmovl (v16i32 VR512:$src))),
4369            (SUBREG_TO_REG (i32 0),
4370             (v4i32 (VMOVSSZrr (v4i32 (AVX512_128_SET0)),
4371              (v4i32 (EXTRACT_SUBREG (v16i32 VR512:$src), sub_xmm)))), sub_xmm)>;
4372}
4373
4374// Use 128-bit blends for OptForSpeed since BLENDs have better throughput than
4375// VMOVSS/SD. Unfortunately, loses the ability to use XMM16-31.
4376let Predicates = [HasAVX512, OptForSpeed] in {
4377  def : Pat<(v16f32 (X86vzmovl (v16f32 VR512:$src))),
4378            (SUBREG_TO_REG (i32 0),
4379             (v4f32 (VBLENDPSrri (v4f32 (V_SET0)),
4380                          (v4f32 (EXTRACT_SUBREG (v16f32 VR512:$src), sub_xmm)),
4381                          (i8 1))), sub_xmm)>;
4382  def : Pat<(v16i32 (X86vzmovl (v16i32 VR512:$src))),
4383            (SUBREG_TO_REG (i32 0),
4384             (v4i32 (VPBLENDWrri (v4i32 (V_SET0)),
4385                          (v4i32 (EXTRACT_SUBREG (v16i32 VR512:$src), sub_xmm)),
4386                          (i8 3))), sub_xmm)>;
4387}
4388
4389let Predicates = [HasAVX512] in {
4390  def : Pat<(v4f32 (scalar_to_vector (loadf32 addr:$src))),
4391            (VMOVSSZrm addr:$src)>;
4392  def : Pat<(v2f64 (scalar_to_vector (loadf64 addr:$src))),
4393            (VMOVSDZrm addr:$src)>;
4394
4395  // Represent the same patterns above but in the form they appear for
4396  // 256-bit types
4397  def : Pat<(v8f32 (X86vzload32 addr:$src)),
4398            (SUBREG_TO_REG (i32 0), (VMOVSSZrm addr:$src), sub_xmm)>;
4399  def : Pat<(v4f64 (X86vzload64 addr:$src)),
4400            (SUBREG_TO_REG (i32 0), (VMOVSDZrm addr:$src), sub_xmm)>;
4401
4402  // Represent the same patterns above but in the form they appear for
4403  // 512-bit types
4404  def : Pat<(v16f32 (X86vzload32 addr:$src)),
4405            (SUBREG_TO_REG (i32 0), (VMOVSSZrm addr:$src), sub_xmm)>;
4406  def : Pat<(v8f64 (X86vzload64 addr:$src)),
4407            (SUBREG_TO_REG (i32 0), (VMOVSDZrm addr:$src), sub_xmm)>;
4408}
4409
4410let ExeDomain = SSEPackedInt, SchedRW = [SchedWriteVecLogic.XMM] in {
4411def VMOVZPQILo2PQIZrr : AVX512XSI<0x7E, MRMSrcReg, (outs VR128X:$dst),
4412                                (ins VR128X:$src),
4413                                "vmovq\t{$src, $dst|$dst, $src}",
4414                                [(set VR128X:$dst, (v2i64 (X86vzmovl
4415                                                   (v2i64 VR128X:$src))))]>,
4416                                EVEX, VEX_W;
4417}
4418
4419let Predicates = [HasAVX512] in {
4420  def : Pat<(v4i32 (X86vzmovl (v4i32 (scalar_to_vector GR32:$src)))),
4421            (VMOVDI2PDIZrr GR32:$src)>;
4422
4423  def : Pat<(v2i64 (X86vzmovl (v2i64 (scalar_to_vector GR64:$src)))),
4424            (VMOV64toPQIZrr GR64:$src)>;
4425
4426  // AVX 128-bit movd/movq instruction write zeros in the high 128-bit part.
4427  def : Pat<(v2i64 (X86vzmovl (v2i64 (scalar_to_vector (zextloadi64i32 addr:$src))))),
4428            (VMOVDI2PDIZrm addr:$src)>;
4429  def : Pat<(v4i32 (X86vzload32 addr:$src)),
4430            (VMOVDI2PDIZrm addr:$src)>;
4431  def : Pat<(v8i32 (X86vzload32 addr:$src)),
4432            (SUBREG_TO_REG (i32 0), (v4i32 (VMOVDI2PDIZrm addr:$src)), sub_xmm)>;
4433  def : Pat<(v2f64 (X86vzmovl (v2f64 VR128X:$src))),
4434            (VMOVZPQILo2PQIZrr VR128X:$src)>;
4435  def : Pat<(v2i64 (X86vzload64 addr:$src)),
4436            (VMOVQI2PQIZrm addr:$src)>;
4437  def : Pat<(v4i64 (X86vzload64 addr:$src)),
4438            (SUBREG_TO_REG (i64 0), (v2i64 (VMOVQI2PQIZrm addr:$src)), sub_xmm)>;
4439
4440  // Use regular 128-bit instructions to match 512-bit scalar_to_vec+zext.
4441  def : Pat<(v16i32 (X86vzload32 addr:$src)),
4442            (SUBREG_TO_REG (i32 0), (v4i32 (VMOVDI2PDIZrm addr:$src)), sub_xmm)>;
4443  def : Pat<(v8i64 (X86vzload64 addr:$src)),
4444            (SUBREG_TO_REG (i64 0), (v2i64 (VMOVQI2PQIZrm addr:$src)), sub_xmm)>;
4445
4446  def : Pat<(v4f64 (X86vzmovl (v4f64 VR256X:$src))),
4447            (SUBREG_TO_REG (i32 0),
4448             (v2f64 (VMOVZPQILo2PQIZrr
4449                     (v2f64 (EXTRACT_SUBREG (v4f64 VR256X:$src), sub_xmm)))),
4450             sub_xmm)>;
4451  def : Pat<(v4i64 (X86vzmovl (v4i64 VR256X:$src))),
4452            (SUBREG_TO_REG (i32 0),
4453             (v2i64 (VMOVZPQILo2PQIZrr
4454                     (v2i64 (EXTRACT_SUBREG (v4i64 VR256X:$src), sub_xmm)))),
4455             sub_xmm)>;
4456
4457  def : Pat<(v8f64 (X86vzmovl (v8f64 VR512:$src))),
4458            (SUBREG_TO_REG (i32 0),
4459             (v2f64 (VMOVZPQILo2PQIZrr
4460                     (v2f64 (EXTRACT_SUBREG (v8f64 VR512:$src), sub_xmm)))),
4461             sub_xmm)>;
4462  def : Pat<(v8i64 (X86vzmovl (v8i64 VR512:$src))),
4463            (SUBREG_TO_REG (i32 0),
4464             (v2i64 (VMOVZPQILo2PQIZrr
4465                     (v2i64 (EXTRACT_SUBREG (v8i64 VR512:$src), sub_xmm)))),
4466             sub_xmm)>;
4467}
4468
4469//===----------------------------------------------------------------------===//
4470// AVX-512 - Non-temporals
4471//===----------------------------------------------------------------------===//
4472
4473def VMOVNTDQAZrm : AVX512PI<0x2A, MRMSrcMem, (outs VR512:$dst),
4474                      (ins i512mem:$src), "vmovntdqa\t{$src, $dst|$dst, $src}",
4475                      [], SSEPackedInt>, Sched<[SchedWriteVecMoveLS.ZMM.RM]>,
4476                      EVEX, T8PD, EVEX_V512, EVEX_CD8<64, CD8VF>;
4477
4478let Predicates = [HasVLX] in {
4479  def VMOVNTDQAZ256rm : AVX512PI<0x2A, MRMSrcMem, (outs VR256X:$dst),
4480                       (ins i256mem:$src),
4481                       "vmovntdqa\t{$src, $dst|$dst, $src}",
4482                       [], SSEPackedInt>, Sched<[SchedWriteVecMoveLS.YMM.RM]>,
4483                       EVEX, T8PD, EVEX_V256, EVEX_CD8<64, CD8VF>;
4484
4485  def VMOVNTDQAZ128rm : AVX512PI<0x2A, MRMSrcMem, (outs VR128X:$dst),
4486                      (ins i128mem:$src),
4487                      "vmovntdqa\t{$src, $dst|$dst, $src}",
4488                      [], SSEPackedInt>, Sched<[SchedWriteVecMoveLS.XMM.RM]>,
4489                      EVEX, T8PD, EVEX_V128, EVEX_CD8<64, CD8VF>;
4490}
4491
4492multiclass avx512_movnt<bits<8> opc, string OpcodeStr, X86VectorVTInfo _,
4493                        X86SchedWriteMoveLS Sched,
4494                        PatFrag st_frag = alignednontemporalstore> {
4495  let SchedRW = [Sched.MR], AddedComplexity = 400 in
4496  def mr : AVX512PI<opc, MRMDestMem, (outs), (ins _.MemOp:$dst, _.RC:$src),
4497                    !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
4498                    [(st_frag (_.VT _.RC:$src), addr:$dst)],
4499                    _.ExeDomain>, EVEX, EVEX_CD8<_.EltSize, CD8VF>;
4500}
4501
4502multiclass avx512_movnt_vl<bits<8> opc, string OpcodeStr,
4503                           AVX512VLVectorVTInfo VTInfo,
4504                           X86SchedWriteMoveLSWidths Sched> {
4505  let Predicates = [HasAVX512] in
4506    defm Z : avx512_movnt<opc, OpcodeStr, VTInfo.info512, Sched.ZMM>, EVEX_V512;
4507
4508  let Predicates = [HasAVX512, HasVLX] in {
4509    defm Z256 : avx512_movnt<opc, OpcodeStr, VTInfo.info256, Sched.YMM>, EVEX_V256;
4510    defm Z128 : avx512_movnt<opc, OpcodeStr, VTInfo.info128, Sched.XMM>, EVEX_V128;
4511  }
4512}
4513
4514defm VMOVNTDQ : avx512_movnt_vl<0xE7, "vmovntdq", avx512vl_i64_info,
4515                                SchedWriteVecMoveLSNT>, PD;
4516defm VMOVNTPD : avx512_movnt_vl<0x2B, "vmovntpd", avx512vl_f64_info,
4517                                SchedWriteFMoveLSNT>, PD, VEX_W;
4518defm VMOVNTPS : avx512_movnt_vl<0x2B, "vmovntps", avx512vl_f32_info,
4519                                SchedWriteFMoveLSNT>, PS;
4520
4521let Predicates = [HasAVX512], AddedComplexity = 400 in {
4522  def : Pat<(alignednontemporalstore (v16i32 VR512:$src), addr:$dst),
4523            (VMOVNTDQZmr addr:$dst, VR512:$src)>;
4524  def : Pat<(alignednontemporalstore (v32i16 VR512:$src), addr:$dst),
4525            (VMOVNTDQZmr addr:$dst, VR512:$src)>;
4526  def : Pat<(alignednontemporalstore (v64i8 VR512:$src), addr:$dst),
4527            (VMOVNTDQZmr addr:$dst, VR512:$src)>;
4528
4529  def : Pat<(v8f64 (alignednontemporalload addr:$src)),
4530            (VMOVNTDQAZrm addr:$src)>;
4531  def : Pat<(v16f32 (alignednontemporalload addr:$src)),
4532            (VMOVNTDQAZrm addr:$src)>;
4533  def : Pat<(v8i64 (alignednontemporalload addr:$src)),
4534            (VMOVNTDQAZrm addr:$src)>;
4535  def : Pat<(v16i32 (alignednontemporalload addr:$src)),
4536            (VMOVNTDQAZrm addr:$src)>;
4537  def : Pat<(v32i16 (alignednontemporalload addr:$src)),
4538            (VMOVNTDQAZrm addr:$src)>;
4539  def : Pat<(v64i8 (alignednontemporalload addr:$src)),
4540            (VMOVNTDQAZrm addr:$src)>;
4541}
4542
4543let Predicates = [HasVLX], AddedComplexity = 400 in {
4544  def : Pat<(alignednontemporalstore (v8i32 VR256X:$src), addr:$dst),
4545            (VMOVNTDQZ256mr addr:$dst, VR256X:$src)>;
4546  def : Pat<(alignednontemporalstore (v16i16 VR256X:$src), addr:$dst),
4547            (VMOVNTDQZ256mr addr:$dst, VR256X:$src)>;
4548  def : Pat<(alignednontemporalstore (v32i8 VR256X:$src), addr:$dst),
4549            (VMOVNTDQZ256mr addr:$dst, VR256X:$src)>;
4550
4551  def : Pat<(v4f64 (alignednontemporalload addr:$src)),
4552            (VMOVNTDQAZ256rm addr:$src)>;
4553  def : Pat<(v8f32 (alignednontemporalload addr:$src)),
4554            (VMOVNTDQAZ256rm addr:$src)>;
4555  def : Pat<(v4i64 (alignednontemporalload addr:$src)),
4556            (VMOVNTDQAZ256rm addr:$src)>;
4557  def : Pat<(v8i32 (alignednontemporalload addr:$src)),
4558            (VMOVNTDQAZ256rm addr:$src)>;
4559  def : Pat<(v16i16 (alignednontemporalload addr:$src)),
4560            (VMOVNTDQAZ256rm addr:$src)>;
4561  def : Pat<(v32i8 (alignednontemporalload addr:$src)),
4562            (VMOVNTDQAZ256rm addr:$src)>;
4563
4564  def : Pat<(alignednontemporalstore (v4i32 VR128X:$src), addr:$dst),
4565            (VMOVNTDQZ128mr addr:$dst, VR128X:$src)>;
4566  def : Pat<(alignednontemporalstore (v8i16 VR128X:$src), addr:$dst),
4567            (VMOVNTDQZ128mr addr:$dst, VR128X:$src)>;
4568  def : Pat<(alignednontemporalstore (v16i8 VR128X:$src), addr:$dst),
4569            (VMOVNTDQZ128mr addr:$dst, VR128X:$src)>;
4570
4571  def : Pat<(v2f64 (alignednontemporalload addr:$src)),
4572            (VMOVNTDQAZ128rm addr:$src)>;
4573  def : Pat<(v4f32 (alignednontemporalload addr:$src)),
4574            (VMOVNTDQAZ128rm addr:$src)>;
4575  def : Pat<(v2i64 (alignednontemporalload addr:$src)),
4576            (VMOVNTDQAZ128rm addr:$src)>;
4577  def : Pat<(v4i32 (alignednontemporalload addr:$src)),
4578            (VMOVNTDQAZ128rm addr:$src)>;
4579  def : Pat<(v8i16 (alignednontemporalload addr:$src)),
4580            (VMOVNTDQAZ128rm addr:$src)>;
4581  def : Pat<(v16i8 (alignednontemporalload addr:$src)),
4582            (VMOVNTDQAZ128rm addr:$src)>;
4583}
4584
4585//===----------------------------------------------------------------------===//
4586// AVX-512 - Integer arithmetic
4587//
4588multiclass avx512_binop_rm<bits<8> opc, string OpcodeStr, SDNode OpNode,
4589                           X86VectorVTInfo _, X86FoldableSchedWrite sched,
4590                           bit IsCommutable = 0> {
4591  defm rr : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
4592                    (ins _.RC:$src1, _.RC:$src2), OpcodeStr,
4593                    "$src2, $src1", "$src1, $src2",
4594                    (_.VT (OpNode _.RC:$src1, _.RC:$src2)),
4595                    IsCommutable, IsCommutable>, AVX512BIBase, EVEX_4V,
4596                    Sched<[sched]>;
4597
4598  defm rm : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
4599                  (ins _.RC:$src1, _.MemOp:$src2), OpcodeStr,
4600                  "$src2, $src1", "$src1, $src2",
4601                  (_.VT (OpNode _.RC:$src1, (_.LdFrag addr:$src2)))>,
4602                  AVX512BIBase, EVEX_4V,
4603                  Sched<[sched.Folded, sched.ReadAfterFold]>;
4604}
4605
4606multiclass avx512_binop_rmb<bits<8> opc, string OpcodeStr, SDNode OpNode,
4607                            X86VectorVTInfo _, X86FoldableSchedWrite sched,
4608                            bit IsCommutable = 0> :
4609           avx512_binop_rm<opc, OpcodeStr, OpNode, _, sched, IsCommutable> {
4610  defm rmb : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
4611                  (ins _.RC:$src1, _.ScalarMemOp:$src2), OpcodeStr,
4612                  "${src2}"##_.BroadcastStr##", $src1",
4613                  "$src1, ${src2}"##_.BroadcastStr,
4614                  (_.VT (OpNode _.RC:$src1,
4615                                (_.BroadcastLdFrag addr:$src2)))>,
4616                  AVX512BIBase, EVEX_4V, EVEX_B,
4617                  Sched<[sched.Folded, sched.ReadAfterFold]>;
4618}
4619
4620multiclass avx512_binop_rm_vl<bits<8> opc, string OpcodeStr, SDNode OpNode,
4621                              AVX512VLVectorVTInfo VTInfo,
4622                              X86SchedWriteWidths sched, Predicate prd,
4623                              bit IsCommutable = 0> {
4624  let Predicates = [prd] in
4625    defm Z : avx512_binop_rm<opc, OpcodeStr, OpNode, VTInfo.info512, sched.ZMM,
4626                             IsCommutable>, EVEX_V512;
4627
4628  let Predicates = [prd, HasVLX] in {
4629    defm Z256 : avx512_binop_rm<opc, OpcodeStr, OpNode, VTInfo.info256,
4630                                sched.YMM, IsCommutable>, EVEX_V256;
4631    defm Z128 : avx512_binop_rm<opc, OpcodeStr, OpNode, VTInfo.info128,
4632                                sched.XMM, IsCommutable>, EVEX_V128;
4633  }
4634}
4635
4636multiclass avx512_binop_rmb_vl<bits<8> opc, string OpcodeStr, SDNode OpNode,
4637                               AVX512VLVectorVTInfo VTInfo,
4638                               X86SchedWriteWidths sched, Predicate prd,
4639                               bit IsCommutable = 0> {
4640  let Predicates = [prd] in
4641    defm Z : avx512_binop_rmb<opc, OpcodeStr, OpNode, VTInfo.info512, sched.ZMM,
4642                             IsCommutable>, EVEX_V512;
4643
4644  let Predicates = [prd, HasVLX] in {
4645    defm Z256 : avx512_binop_rmb<opc, OpcodeStr, OpNode, VTInfo.info256,
4646                                 sched.YMM, IsCommutable>, EVEX_V256;
4647    defm Z128 : avx512_binop_rmb<opc, OpcodeStr, OpNode, VTInfo.info128,
4648                                 sched.XMM, IsCommutable>, EVEX_V128;
4649  }
4650}
4651
4652multiclass avx512_binop_rm_vl_q<bits<8> opc, string OpcodeStr, SDNode OpNode,
4653                                X86SchedWriteWidths sched, Predicate prd,
4654                                bit IsCommutable = 0> {
4655  defm NAME : avx512_binop_rmb_vl<opc, OpcodeStr, OpNode, avx512vl_i64_info,
4656                                  sched, prd, IsCommutable>,
4657                                  VEX_W, EVEX_CD8<64, CD8VF>;
4658}
4659
4660multiclass avx512_binop_rm_vl_d<bits<8> opc, string OpcodeStr, SDNode OpNode,
4661                                X86SchedWriteWidths sched, Predicate prd,
4662                                bit IsCommutable = 0> {
4663  defm NAME : avx512_binop_rmb_vl<opc, OpcodeStr, OpNode, avx512vl_i32_info,
4664                                  sched, prd, IsCommutable>, EVEX_CD8<32, CD8VF>;
4665}
4666
4667multiclass avx512_binop_rm_vl_w<bits<8> opc, string OpcodeStr, SDNode OpNode,
4668                                X86SchedWriteWidths sched, Predicate prd,
4669                                bit IsCommutable = 0> {
4670  defm NAME : avx512_binop_rm_vl<opc, OpcodeStr, OpNode, avx512vl_i16_info,
4671                                 sched, prd, IsCommutable>, EVEX_CD8<16, CD8VF>,
4672                                 VEX_WIG;
4673}
4674
4675multiclass avx512_binop_rm_vl_b<bits<8> opc, string OpcodeStr, SDNode OpNode,
4676                                X86SchedWriteWidths sched, Predicate prd,
4677                                bit IsCommutable = 0> {
4678  defm NAME : avx512_binop_rm_vl<opc, OpcodeStr, OpNode, avx512vl_i8_info,
4679                                 sched, prd, IsCommutable>, EVEX_CD8<8, CD8VF>,
4680                                 VEX_WIG;
4681}
4682
4683multiclass avx512_binop_rm_vl_dq<bits<8> opc_d, bits<8> opc_q, string OpcodeStr,
4684                                 SDNode OpNode, X86SchedWriteWidths sched,
4685                                 Predicate prd, bit IsCommutable = 0> {
4686  defm Q : avx512_binop_rm_vl_q<opc_q, OpcodeStr#"q", OpNode, sched, prd,
4687                                   IsCommutable>;
4688
4689  defm D : avx512_binop_rm_vl_d<opc_d, OpcodeStr#"d", OpNode, sched, prd,
4690                                   IsCommutable>;
4691}
4692
4693multiclass avx512_binop_rm_vl_bw<bits<8> opc_b, bits<8> opc_w, string OpcodeStr,
4694                                 SDNode OpNode, X86SchedWriteWidths sched,
4695                                 Predicate prd, bit IsCommutable = 0> {
4696  defm W : avx512_binop_rm_vl_w<opc_w, OpcodeStr#"w", OpNode, sched, prd,
4697                                   IsCommutable>;
4698
4699  defm B : avx512_binop_rm_vl_b<opc_b, OpcodeStr#"b", OpNode, sched, prd,
4700                                   IsCommutable>;
4701}
4702
4703multiclass avx512_binop_rm_vl_all<bits<8> opc_b, bits<8> opc_w,
4704                                  bits<8> opc_d, bits<8> opc_q,
4705                                  string OpcodeStr, SDNode OpNode,
4706                                  X86SchedWriteWidths sched,
4707                                  bit IsCommutable = 0> {
4708  defm NAME : avx512_binop_rm_vl_dq<opc_d, opc_q, OpcodeStr, OpNode,
4709                                    sched, HasAVX512, IsCommutable>,
4710              avx512_binop_rm_vl_bw<opc_b, opc_w, OpcodeStr, OpNode,
4711                                    sched, HasBWI, IsCommutable>;
4712}
4713
4714multiclass avx512_binop_rm2<bits<8> opc, string OpcodeStr,
4715                            X86FoldableSchedWrite sched,
4716                            SDNode OpNode,X86VectorVTInfo _Src,
4717                            X86VectorVTInfo _Dst, X86VectorVTInfo _Brdct,
4718                            bit IsCommutable = 0> {
4719  defm rr : AVX512_maskable<opc, MRMSrcReg, _Dst, (outs _Dst.RC:$dst),
4720                            (ins _Src.RC:$src1, _Src.RC:$src2), OpcodeStr,
4721                            "$src2, $src1","$src1, $src2",
4722                            (_Dst.VT (OpNode
4723                                         (_Src.VT _Src.RC:$src1),
4724                                         (_Src.VT _Src.RC:$src2))),
4725                            IsCommutable>,
4726                            AVX512BIBase, EVEX_4V, Sched<[sched]>;
4727  defm rm : AVX512_maskable<opc, MRMSrcMem, _Dst, (outs _Dst.RC:$dst),
4728                        (ins _Src.RC:$src1, _Src.MemOp:$src2), OpcodeStr,
4729                        "$src2, $src1", "$src1, $src2",
4730                        (_Dst.VT (OpNode (_Src.VT _Src.RC:$src1),
4731                                      (_Src.LdFrag addr:$src2)))>,
4732                        AVX512BIBase, EVEX_4V,
4733                        Sched<[sched.Folded, sched.ReadAfterFold]>;
4734
4735  defm rmb : AVX512_maskable<opc, MRMSrcMem, _Dst, (outs _Dst.RC:$dst),
4736                    (ins _Src.RC:$src1, _Brdct.ScalarMemOp:$src2),
4737                    OpcodeStr,
4738                    "${src2}"##_Brdct.BroadcastStr##", $src1",
4739                     "$src1, ${src2}"##_Brdct.BroadcastStr,
4740                    (_Dst.VT (OpNode (_Src.VT _Src.RC:$src1), (bitconvert
4741                                 (_Brdct.VT (_Brdct.BroadcastLdFrag addr:$src2)))))>,
4742                    AVX512BIBase, EVEX_4V, EVEX_B,
4743                    Sched<[sched.Folded, sched.ReadAfterFold]>;
4744}
4745
4746defm VPADD : avx512_binop_rm_vl_all<0xFC, 0xFD, 0xFE, 0xD4, "vpadd", add,
4747                                    SchedWriteVecALU, 1>;
4748defm VPSUB : avx512_binop_rm_vl_all<0xF8, 0xF9, 0xFA, 0xFB, "vpsub", sub,
4749                                    SchedWriteVecALU, 0>;
4750defm VPADDS : avx512_binop_rm_vl_bw<0xEC, 0xED, "vpadds", saddsat,
4751                                    SchedWriteVecALU, HasBWI, 1>;
4752defm VPSUBS : avx512_binop_rm_vl_bw<0xE8, 0xE9, "vpsubs", ssubsat,
4753                                    SchedWriteVecALU, HasBWI, 0>;
4754defm VPADDUS : avx512_binop_rm_vl_bw<0xDC, 0xDD, "vpaddus", uaddsat,
4755                                     SchedWriteVecALU, HasBWI, 1>;
4756defm VPSUBUS : avx512_binop_rm_vl_bw<0xD8, 0xD9, "vpsubus", usubsat,
4757                                     SchedWriteVecALU, HasBWI, 0>;
4758defm VPMULLD : avx512_binop_rm_vl_d<0x40, "vpmulld", mul,
4759                                    SchedWritePMULLD, HasAVX512, 1>, T8PD;
4760defm VPMULLW : avx512_binop_rm_vl_w<0xD5, "vpmullw", mul,
4761                                    SchedWriteVecIMul, HasBWI, 1>;
4762defm VPMULLQ : avx512_binop_rm_vl_q<0x40, "vpmullq", mul,
4763                                    SchedWriteVecIMul, HasDQI, 1>, T8PD,
4764                                    NotEVEX2VEXConvertible;
4765defm VPMULHW : avx512_binop_rm_vl_w<0xE5, "vpmulhw", mulhs, SchedWriteVecIMul,
4766                                    HasBWI, 1>;
4767defm VPMULHUW : avx512_binop_rm_vl_w<0xE4, "vpmulhuw", mulhu, SchedWriteVecIMul,
4768                                     HasBWI, 1>;
4769defm VPMULHRSW : avx512_binop_rm_vl_w<0x0B, "vpmulhrsw", X86mulhrs,
4770                                      SchedWriteVecIMul, HasBWI, 1>, T8PD;
4771defm VPAVG : avx512_binop_rm_vl_bw<0xE0, 0xE3, "vpavg", X86avg,
4772                                   SchedWriteVecALU, HasBWI, 1>;
4773defm VPMULDQ : avx512_binop_rm_vl_q<0x28, "vpmuldq", X86pmuldq,
4774                                    SchedWriteVecIMul, HasAVX512, 1>, T8PD;
4775defm VPMULUDQ : avx512_binop_rm_vl_q<0xF4, "vpmuludq", X86pmuludq,
4776                                     SchedWriteVecIMul, HasAVX512, 1>;
4777
4778multiclass avx512_binop_all<bits<8> opc, string OpcodeStr,
4779                            X86SchedWriteWidths sched,
4780                            AVX512VLVectorVTInfo _SrcVTInfo,
4781                            AVX512VLVectorVTInfo _DstVTInfo,
4782                            SDNode OpNode, Predicate prd,  bit IsCommutable = 0> {
4783  let Predicates = [prd] in
4784    defm NAME#Z : avx512_binop_rm2<opc, OpcodeStr, sched.ZMM, OpNode,
4785                                 _SrcVTInfo.info512, _DstVTInfo.info512,
4786                                 v8i64_info, IsCommutable>,
4787                                  EVEX_V512, EVEX_CD8<64, CD8VF>, VEX_W;
4788  let Predicates = [HasVLX, prd] in {
4789    defm NAME#Z256 : avx512_binop_rm2<opc, OpcodeStr, sched.YMM, OpNode,
4790                                      _SrcVTInfo.info256, _DstVTInfo.info256,
4791                                      v4i64x_info, IsCommutable>,
4792                                      EVEX_V256, EVEX_CD8<64, CD8VF>, VEX_W;
4793    defm NAME#Z128 : avx512_binop_rm2<opc, OpcodeStr, sched.XMM, OpNode,
4794                                      _SrcVTInfo.info128, _DstVTInfo.info128,
4795                                      v2i64x_info, IsCommutable>,
4796                                     EVEX_V128, EVEX_CD8<64, CD8VF>, VEX_W;
4797  }
4798}
4799
4800defm VPMULTISHIFTQB : avx512_binop_all<0x83, "vpmultishiftqb", SchedWriteVecALU,
4801                                avx512vl_i8_info, avx512vl_i8_info,
4802                                X86multishift, HasVBMI, 0>, T8PD;
4803
4804multiclass avx512_packs_rmb<bits<8> opc, string OpcodeStr, SDNode OpNode,
4805                            X86VectorVTInfo _Src, X86VectorVTInfo _Dst,
4806                            X86FoldableSchedWrite sched> {
4807  defm rmb : AVX512_maskable<opc, MRMSrcMem, _Dst, (outs _Dst.RC:$dst),
4808                    (ins _Src.RC:$src1, _Src.ScalarMemOp:$src2),
4809                    OpcodeStr,
4810                    "${src2}"##_Src.BroadcastStr##", $src1",
4811                     "$src1, ${src2}"##_Src.BroadcastStr,
4812                    (_Dst.VT (OpNode (_Src.VT _Src.RC:$src1), (bitconvert
4813                                 (_Src.VT (_Src.BroadcastLdFrag addr:$src2)))))>,
4814                    EVEX_4V, EVEX_B, EVEX_CD8<_Src.EltSize, CD8VF>,
4815                    Sched<[sched.Folded, sched.ReadAfterFold]>;
4816}
4817
4818multiclass avx512_packs_rm<bits<8> opc, string OpcodeStr,
4819                            SDNode OpNode,X86VectorVTInfo _Src,
4820                            X86VectorVTInfo _Dst, X86FoldableSchedWrite sched,
4821                            bit IsCommutable = 0> {
4822  defm rr : AVX512_maskable<opc, MRMSrcReg, _Dst, (outs _Dst.RC:$dst),
4823                            (ins _Src.RC:$src1, _Src.RC:$src2), OpcodeStr,
4824                            "$src2, $src1","$src1, $src2",
4825                            (_Dst.VT (OpNode
4826                                         (_Src.VT _Src.RC:$src1),
4827                                         (_Src.VT _Src.RC:$src2))),
4828                            IsCommutable, IsCommutable>,
4829                            EVEX_CD8<_Src.EltSize, CD8VF>, EVEX_4V, Sched<[sched]>;
4830  defm rm : AVX512_maskable<opc, MRMSrcMem, _Dst, (outs _Dst.RC:$dst),
4831                        (ins _Src.RC:$src1, _Src.MemOp:$src2), OpcodeStr,
4832                        "$src2, $src1", "$src1, $src2",
4833                        (_Dst.VT (OpNode (_Src.VT _Src.RC:$src1),
4834                                      (_Src.LdFrag addr:$src2)))>,
4835                         EVEX_4V, EVEX_CD8<_Src.EltSize, CD8VF>,
4836                         Sched<[sched.Folded, sched.ReadAfterFold]>;
4837}
4838
4839multiclass avx512_packs_all_i32_i16<bits<8> opc, string OpcodeStr,
4840                                    SDNode OpNode> {
4841  let Predicates = [HasBWI] in
4842  defm NAME#Z : avx512_packs_rm<opc, OpcodeStr, OpNode, v16i32_info,
4843                                 v32i16_info, SchedWriteShuffle.ZMM>,
4844                avx512_packs_rmb<opc, OpcodeStr, OpNode, v16i32_info,
4845                                 v32i16_info, SchedWriteShuffle.ZMM>, EVEX_V512;
4846  let Predicates = [HasBWI, HasVLX] in {
4847    defm NAME#Z256 : avx512_packs_rm<opc, OpcodeStr, OpNode, v8i32x_info,
4848                                     v16i16x_info, SchedWriteShuffle.YMM>,
4849                     avx512_packs_rmb<opc, OpcodeStr, OpNode, v8i32x_info,
4850                                      v16i16x_info, SchedWriteShuffle.YMM>,
4851                                      EVEX_V256;
4852    defm NAME#Z128 : avx512_packs_rm<opc, OpcodeStr, OpNode, v4i32x_info,
4853                                     v8i16x_info, SchedWriteShuffle.XMM>,
4854                     avx512_packs_rmb<opc, OpcodeStr, OpNode, v4i32x_info,
4855                                      v8i16x_info, SchedWriteShuffle.XMM>,
4856                                      EVEX_V128;
4857  }
4858}
4859multiclass avx512_packs_all_i16_i8<bits<8> opc, string OpcodeStr,
4860                            SDNode OpNode> {
4861  let Predicates = [HasBWI] in
4862  defm NAME#Z : avx512_packs_rm<opc, OpcodeStr, OpNode, v32i16_info, v64i8_info,
4863                                SchedWriteShuffle.ZMM>, EVEX_V512, VEX_WIG;
4864  let Predicates = [HasBWI, HasVLX] in {
4865    defm NAME#Z256 : avx512_packs_rm<opc, OpcodeStr, OpNode, v16i16x_info,
4866                                     v32i8x_info, SchedWriteShuffle.YMM>,
4867                                     EVEX_V256, VEX_WIG;
4868    defm NAME#Z128 : avx512_packs_rm<opc, OpcodeStr, OpNode, v8i16x_info,
4869                                     v16i8x_info, SchedWriteShuffle.XMM>,
4870                                     EVEX_V128, VEX_WIG;
4871  }
4872}
4873
4874multiclass avx512_vpmadd<bits<8> opc, string OpcodeStr,
4875                            SDNode OpNode, AVX512VLVectorVTInfo _Src,
4876                            AVX512VLVectorVTInfo _Dst, bit IsCommutable = 0> {
4877  let Predicates = [HasBWI] in
4878  defm NAME#Z : avx512_packs_rm<opc, OpcodeStr, OpNode, _Src.info512,
4879                                _Dst.info512, SchedWriteVecIMul.ZMM,
4880                                IsCommutable>, EVEX_V512;
4881  let Predicates = [HasBWI, HasVLX] in {
4882    defm NAME#Z256 : avx512_packs_rm<opc, OpcodeStr, OpNode, _Src.info256,
4883                                     _Dst.info256, SchedWriteVecIMul.YMM,
4884                                     IsCommutable>, EVEX_V256;
4885    defm NAME#Z128 : avx512_packs_rm<opc, OpcodeStr, OpNode, _Src.info128,
4886                                     _Dst.info128, SchedWriteVecIMul.XMM,
4887                                     IsCommutable>, EVEX_V128;
4888  }
4889}
4890
4891defm VPACKSSDW : avx512_packs_all_i32_i16<0x6B, "vpackssdw", X86Packss>, AVX512BIBase;
4892defm VPACKUSDW : avx512_packs_all_i32_i16<0x2b, "vpackusdw", X86Packus>, AVX5128IBase;
4893defm VPACKSSWB : avx512_packs_all_i16_i8 <0x63, "vpacksswb", X86Packss>, AVX512BIBase;
4894defm VPACKUSWB : avx512_packs_all_i16_i8 <0x67, "vpackuswb", X86Packus>, AVX512BIBase;
4895
4896defm VPMADDUBSW : avx512_vpmadd<0x04, "vpmaddubsw", X86vpmaddubsw,
4897                     avx512vl_i8_info, avx512vl_i16_info>, AVX512BIBase, T8PD, VEX_WIG;
4898defm VPMADDWD   : avx512_vpmadd<0xF5, "vpmaddwd", X86vpmaddwd,
4899                     avx512vl_i16_info, avx512vl_i32_info, 1>, AVX512BIBase, VEX_WIG;
4900
4901defm VPMAXSB : avx512_binop_rm_vl_b<0x3C, "vpmaxsb", smax,
4902                                    SchedWriteVecALU, HasBWI, 1>, T8PD;
4903defm VPMAXSW : avx512_binop_rm_vl_w<0xEE, "vpmaxsw", smax,
4904                                    SchedWriteVecALU, HasBWI, 1>;
4905defm VPMAXSD : avx512_binop_rm_vl_d<0x3D, "vpmaxsd", smax,
4906                                    SchedWriteVecALU, HasAVX512, 1>, T8PD;
4907defm VPMAXSQ : avx512_binop_rm_vl_q<0x3D, "vpmaxsq", smax,
4908                                    SchedWriteVecALU, HasAVX512, 1>, T8PD,
4909                                    NotEVEX2VEXConvertible;
4910
4911defm VPMAXUB : avx512_binop_rm_vl_b<0xDE, "vpmaxub", umax,
4912                                    SchedWriteVecALU, HasBWI, 1>;
4913defm VPMAXUW : avx512_binop_rm_vl_w<0x3E, "vpmaxuw", umax,
4914                                    SchedWriteVecALU, HasBWI, 1>, T8PD;
4915defm VPMAXUD : avx512_binop_rm_vl_d<0x3F, "vpmaxud", umax,
4916                                    SchedWriteVecALU, HasAVX512, 1>, T8PD;
4917defm VPMAXUQ : avx512_binop_rm_vl_q<0x3F, "vpmaxuq", umax,
4918                                    SchedWriteVecALU, HasAVX512, 1>, T8PD,
4919                                    NotEVEX2VEXConvertible;
4920
4921defm VPMINSB : avx512_binop_rm_vl_b<0x38, "vpminsb", smin,
4922                                    SchedWriteVecALU, HasBWI, 1>, T8PD;
4923defm VPMINSW : avx512_binop_rm_vl_w<0xEA, "vpminsw", smin,
4924                                    SchedWriteVecALU, HasBWI, 1>;
4925defm VPMINSD : avx512_binop_rm_vl_d<0x39, "vpminsd", smin,
4926                                    SchedWriteVecALU, HasAVX512, 1>, T8PD;
4927defm VPMINSQ : avx512_binop_rm_vl_q<0x39, "vpminsq", smin,
4928                                    SchedWriteVecALU, HasAVX512, 1>, T8PD,
4929                                    NotEVEX2VEXConvertible;
4930
4931defm VPMINUB : avx512_binop_rm_vl_b<0xDA, "vpminub", umin,
4932                                    SchedWriteVecALU, HasBWI, 1>;
4933defm VPMINUW : avx512_binop_rm_vl_w<0x3A, "vpminuw", umin,
4934                                    SchedWriteVecALU, HasBWI, 1>, T8PD;
4935defm VPMINUD : avx512_binop_rm_vl_d<0x3B, "vpminud", umin,
4936                                    SchedWriteVecALU, HasAVX512, 1>, T8PD;
4937defm VPMINUQ : avx512_binop_rm_vl_q<0x3B, "vpminuq", umin,
4938                                    SchedWriteVecALU, HasAVX512, 1>, T8PD,
4939                                    NotEVEX2VEXConvertible;
4940
4941// PMULLQ: Use 512bit version to implement 128/256 bit in case NoVLX.
4942let Predicates = [HasDQI, NoVLX] in {
4943  def : Pat<(v4i64 (mul (v4i64 VR256X:$src1), (v4i64 VR256X:$src2))),
4944            (EXTRACT_SUBREG
4945                (VPMULLQZrr
4946                    (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src1, sub_ymm),
4947                    (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src2, sub_ymm)),
4948             sub_ymm)>;
4949  def : Pat<(v4i64 (mul (v4i64 VR256X:$src1), (v4i64 (X86VBroadcastld64 addr:$src2)))),
4950            (EXTRACT_SUBREG
4951                (VPMULLQZrmb
4952                    (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src1, sub_ymm),
4953                    addr:$src2),
4954             sub_ymm)>;
4955
4956  def : Pat<(v2i64 (mul (v2i64 VR128X:$src1), (v2i64 VR128X:$src2))),
4957            (EXTRACT_SUBREG
4958                (VPMULLQZrr
4959                    (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src1, sub_xmm),
4960                    (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src2, sub_xmm)),
4961             sub_xmm)>;
4962  def : Pat<(v2i64 (mul (v2i64 VR128X:$src1), (v2i64 (X86VBroadcastld64 addr:$src2)))),
4963            (EXTRACT_SUBREG
4964                (VPMULLQZrmb
4965                    (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src1, sub_xmm),
4966                    addr:$src2),
4967             sub_xmm)>;
4968}
4969
4970multiclass avx512_min_max_lowering<string Instr, SDNode OpNode> {
4971  def : Pat<(v4i64 (OpNode VR256X:$src1, VR256X:$src2)),
4972            (EXTRACT_SUBREG
4973                (!cast<Instruction>(Instr#"rr")
4974                    (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src1, sub_ymm),
4975                    (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src2, sub_ymm)),
4976             sub_ymm)>;
4977  def : Pat<(v4i64 (OpNode (v4i64 VR256X:$src1), (v4i64 (X86VBroadcastld64 addr:$src2)))),
4978            (EXTRACT_SUBREG
4979                (!cast<Instruction>(Instr#"rmb")
4980                    (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src1, sub_ymm),
4981                    addr:$src2),
4982             sub_ymm)>;
4983
4984  def : Pat<(v2i64 (OpNode VR128X:$src1, VR128X:$src2)),
4985            (EXTRACT_SUBREG
4986                (!cast<Instruction>(Instr#"rr")
4987                    (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src1, sub_xmm),
4988                    (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src2, sub_xmm)),
4989             sub_xmm)>;
4990  def : Pat<(v2i64 (OpNode (v2i64 VR128X:$src1), (v2i64 (X86VBroadcastld64 addr:$src2)))),
4991            (EXTRACT_SUBREG
4992                (!cast<Instruction>(Instr#"rmb")
4993                    (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src1, sub_xmm),
4994                    addr:$src2),
4995             sub_xmm)>;
4996}
4997
4998let Predicates = [HasAVX512, NoVLX] in {
4999  defm : avx512_min_max_lowering<"VPMAXUQZ", umax>;
5000  defm : avx512_min_max_lowering<"VPMINUQZ", umin>;
5001  defm : avx512_min_max_lowering<"VPMAXSQZ", smax>;
5002  defm : avx512_min_max_lowering<"VPMINSQZ", smin>;
5003}
5004
5005//===----------------------------------------------------------------------===//
5006// AVX-512  Logical Instructions
5007//===----------------------------------------------------------------------===//
5008
5009defm VPAND : avx512_binop_rm_vl_dq<0xDB, 0xDB, "vpand", and,
5010                                   SchedWriteVecLogic, HasAVX512, 1>;
5011defm VPOR : avx512_binop_rm_vl_dq<0xEB, 0xEB, "vpor", or,
5012                                  SchedWriteVecLogic, HasAVX512, 1>;
5013defm VPXOR : avx512_binop_rm_vl_dq<0xEF, 0xEF, "vpxor", xor,
5014                                   SchedWriteVecLogic, HasAVX512, 1>;
5015defm VPANDN : avx512_binop_rm_vl_dq<0xDF, 0xDF, "vpandn", X86andnp,
5016                                    SchedWriteVecLogic, HasAVX512>;
5017
5018let Predicates = [HasVLX] in {
5019  def : Pat<(v16i8 (and VR128X:$src1, VR128X:$src2)),
5020            (VPANDQZ128rr VR128X:$src1, VR128X:$src2)>;
5021  def : Pat<(v8i16 (and VR128X:$src1, VR128X:$src2)),
5022            (VPANDQZ128rr VR128X:$src1, VR128X:$src2)>;
5023
5024  def : Pat<(v16i8 (or VR128X:$src1, VR128X:$src2)),
5025            (VPORQZ128rr VR128X:$src1, VR128X:$src2)>;
5026  def : Pat<(v8i16 (or VR128X:$src1, VR128X:$src2)),
5027            (VPORQZ128rr VR128X:$src1, VR128X:$src2)>;
5028
5029  def : Pat<(v16i8 (xor VR128X:$src1, VR128X:$src2)),
5030            (VPXORQZ128rr VR128X:$src1, VR128X:$src2)>;
5031  def : Pat<(v8i16 (xor VR128X:$src1, VR128X:$src2)),
5032            (VPXORQZ128rr VR128X:$src1, VR128X:$src2)>;
5033
5034  def : Pat<(v16i8 (X86andnp VR128X:$src1, VR128X:$src2)),
5035            (VPANDNQZ128rr VR128X:$src1, VR128X:$src2)>;
5036  def : Pat<(v8i16 (X86andnp VR128X:$src1, VR128X:$src2)),
5037            (VPANDNQZ128rr VR128X:$src1, VR128X:$src2)>;
5038
5039  def : Pat<(and VR128X:$src1, (loadv16i8 addr:$src2)),
5040            (VPANDQZ128rm VR128X:$src1, addr:$src2)>;
5041  def : Pat<(and VR128X:$src1, (loadv8i16 addr:$src2)),
5042            (VPANDQZ128rm VR128X:$src1, addr:$src2)>;
5043
5044  def : Pat<(or VR128X:$src1, (loadv16i8 addr:$src2)),
5045            (VPORQZ128rm VR128X:$src1, addr:$src2)>;
5046  def : Pat<(or VR128X:$src1, (loadv8i16 addr:$src2)),
5047            (VPORQZ128rm VR128X:$src1, addr:$src2)>;
5048
5049  def : Pat<(xor VR128X:$src1, (loadv16i8 addr:$src2)),
5050            (VPXORQZ128rm VR128X:$src1, addr:$src2)>;
5051  def : Pat<(xor VR128X:$src1, (loadv8i16 addr:$src2)),
5052            (VPXORQZ128rm VR128X:$src1, addr:$src2)>;
5053
5054  def : Pat<(X86andnp VR128X:$src1, (loadv16i8 addr:$src2)),
5055            (VPANDNQZ128rm VR128X:$src1, addr:$src2)>;
5056  def : Pat<(X86andnp VR128X:$src1, (loadv8i16 addr:$src2)),
5057            (VPANDNQZ128rm VR128X:$src1, addr:$src2)>;
5058
5059  def : Pat<(v32i8 (and VR256X:$src1, VR256X:$src2)),
5060            (VPANDQZ256rr VR256X:$src1, VR256X:$src2)>;
5061  def : Pat<(v16i16 (and VR256X:$src1, VR256X:$src2)),
5062            (VPANDQZ256rr VR256X:$src1, VR256X:$src2)>;
5063
5064  def : Pat<(v32i8 (or VR256X:$src1, VR256X:$src2)),
5065            (VPORQZ256rr VR256X:$src1, VR256X:$src2)>;
5066  def : Pat<(v16i16 (or VR256X:$src1, VR256X:$src2)),
5067            (VPORQZ256rr VR256X:$src1, VR256X:$src2)>;
5068
5069  def : Pat<(v32i8 (xor VR256X:$src1, VR256X:$src2)),
5070            (VPXORQZ256rr VR256X:$src1, VR256X:$src2)>;
5071  def : Pat<(v16i16 (xor VR256X:$src1, VR256X:$src2)),
5072            (VPXORQZ256rr VR256X:$src1, VR256X:$src2)>;
5073
5074  def : Pat<(v32i8 (X86andnp VR256X:$src1, VR256X:$src2)),
5075            (VPANDNQZ256rr VR256X:$src1, VR256X:$src2)>;
5076  def : Pat<(v16i16 (X86andnp VR256X:$src1, VR256X:$src2)),
5077            (VPANDNQZ256rr VR256X:$src1, VR256X:$src2)>;
5078
5079  def : Pat<(and VR256X:$src1, (loadv32i8 addr:$src2)),
5080            (VPANDQZ256rm VR256X:$src1, addr:$src2)>;
5081  def : Pat<(and VR256X:$src1, (loadv16i16 addr:$src2)),
5082            (VPANDQZ256rm VR256X:$src1, addr:$src2)>;
5083
5084  def : Pat<(or VR256X:$src1, (loadv32i8 addr:$src2)),
5085            (VPORQZ256rm VR256X:$src1, addr:$src2)>;
5086  def : Pat<(or VR256X:$src1, (loadv16i16 addr:$src2)),
5087            (VPORQZ256rm VR256X:$src1, addr:$src2)>;
5088
5089  def : Pat<(xor VR256X:$src1, (loadv32i8 addr:$src2)),
5090            (VPXORQZ256rm VR256X:$src1, addr:$src2)>;
5091  def : Pat<(xor VR256X:$src1, (loadv16i16 addr:$src2)),
5092            (VPXORQZ256rm VR256X:$src1, addr:$src2)>;
5093
5094  def : Pat<(X86andnp VR256X:$src1, (loadv32i8 addr:$src2)),
5095            (VPANDNQZ256rm VR256X:$src1, addr:$src2)>;
5096  def : Pat<(X86andnp VR256X:$src1, (loadv16i16 addr:$src2)),
5097            (VPANDNQZ256rm VR256X:$src1, addr:$src2)>;
5098}
5099
5100let Predicates = [HasAVX512] in {
5101  def : Pat<(v64i8 (and VR512:$src1, VR512:$src2)),
5102            (VPANDQZrr VR512:$src1, VR512:$src2)>;
5103  def : Pat<(v32i16 (and VR512:$src1, VR512:$src2)),
5104            (VPANDQZrr VR512:$src1, VR512:$src2)>;
5105
5106  def : Pat<(v64i8 (or VR512:$src1, VR512:$src2)),
5107            (VPORQZrr VR512:$src1, VR512:$src2)>;
5108  def : Pat<(v32i16 (or VR512:$src1, VR512:$src2)),
5109            (VPORQZrr VR512:$src1, VR512:$src2)>;
5110
5111  def : Pat<(v64i8 (xor VR512:$src1, VR512:$src2)),
5112            (VPXORQZrr VR512:$src1, VR512:$src2)>;
5113  def : Pat<(v32i16 (xor VR512:$src1, VR512:$src2)),
5114            (VPXORQZrr VR512:$src1, VR512:$src2)>;
5115
5116  def : Pat<(v64i8 (X86andnp VR512:$src1, VR512:$src2)),
5117            (VPANDNQZrr VR512:$src1, VR512:$src2)>;
5118  def : Pat<(v32i16 (X86andnp VR512:$src1, VR512:$src2)),
5119            (VPANDNQZrr VR512:$src1, VR512:$src2)>;
5120
5121  def : Pat<(and VR512:$src1, (loadv64i8 addr:$src2)),
5122            (VPANDQZrm VR512:$src1, addr:$src2)>;
5123  def : Pat<(and VR512:$src1, (loadv32i16 addr:$src2)),
5124            (VPANDQZrm VR512:$src1, addr:$src2)>;
5125
5126  def : Pat<(or VR512:$src1, (loadv64i8 addr:$src2)),
5127            (VPORQZrm VR512:$src1, addr:$src2)>;
5128  def : Pat<(or VR512:$src1, (loadv32i16 addr:$src2)),
5129            (VPORQZrm VR512:$src1, addr:$src2)>;
5130
5131  def : Pat<(xor VR512:$src1, (loadv64i8 addr:$src2)),
5132            (VPXORQZrm VR512:$src1, addr:$src2)>;
5133  def : Pat<(xor VR512:$src1, (loadv32i16 addr:$src2)),
5134            (VPXORQZrm VR512:$src1, addr:$src2)>;
5135
5136  def : Pat<(X86andnp VR512:$src1, (loadv64i8 addr:$src2)),
5137            (VPANDNQZrm VR512:$src1, addr:$src2)>;
5138  def : Pat<(X86andnp VR512:$src1, (loadv32i16 addr:$src2)),
5139            (VPANDNQZrm VR512:$src1, addr:$src2)>;
5140}
5141
5142// Patterns to catch vselect with different type than logic op.
5143multiclass avx512_logical_lowering<string InstrStr, SDNode OpNode,
5144                                    X86VectorVTInfo _,
5145                                    X86VectorVTInfo IntInfo> {
5146  // Masked register-register logical operations.
5147  def : Pat<(_.VT (vselect _.KRCWM:$mask,
5148                   (bitconvert (IntInfo.VT (OpNode _.RC:$src1, _.RC:$src2))),
5149                   _.RC:$src0)),
5150            (!cast<Instruction>(InstrStr#rrk) _.RC:$src0, _.KRCWM:$mask,
5151             _.RC:$src1, _.RC:$src2)>;
5152
5153  def : Pat<(_.VT (vselect _.KRCWM:$mask,
5154                   (bitconvert (IntInfo.VT (OpNode _.RC:$src1, _.RC:$src2))),
5155                   _.ImmAllZerosV)),
5156            (!cast<Instruction>(InstrStr#rrkz) _.KRCWM:$mask, _.RC:$src1,
5157             _.RC:$src2)>;
5158
5159  // Masked register-memory logical operations.
5160  def : Pat<(_.VT (vselect _.KRCWM:$mask,
5161                   (bitconvert (IntInfo.VT (OpNode _.RC:$src1,
5162                                            (load addr:$src2)))),
5163                   _.RC:$src0)),
5164            (!cast<Instruction>(InstrStr#rmk) _.RC:$src0, _.KRCWM:$mask,
5165             _.RC:$src1, addr:$src2)>;
5166  def : Pat<(_.VT (vselect _.KRCWM:$mask,
5167                   (bitconvert (IntInfo.VT (OpNode _.RC:$src1,
5168                                            (load addr:$src2)))),
5169                   _.ImmAllZerosV)),
5170            (!cast<Instruction>(InstrStr#rmkz) _.KRCWM:$mask, _.RC:$src1,
5171             addr:$src2)>;
5172}
5173
5174multiclass avx512_logical_lowering_bcast<string InstrStr, SDNode OpNode,
5175                                         X86VectorVTInfo _,
5176                                         X86VectorVTInfo IntInfo> {
5177  // Register-broadcast logical operations.
5178  def : Pat<(_.VT (vselect _.KRCWM:$mask,
5179                   (bitconvert
5180                    (IntInfo.VT (OpNode _.RC:$src1,
5181                                 (IntInfo.VT (IntInfo.BroadcastLdFrag addr:$src2))))),
5182                   _.RC:$src0)),
5183            (!cast<Instruction>(InstrStr#rmbk) _.RC:$src0, _.KRCWM:$mask,
5184             _.RC:$src1, addr:$src2)>;
5185  def : Pat<(_.VT (vselect _.KRCWM:$mask,
5186                   (bitconvert
5187                    (IntInfo.VT (OpNode _.RC:$src1,
5188                                 (IntInfo.VT (IntInfo.BroadcastLdFrag addr:$src2))))),
5189                   _.ImmAllZerosV)),
5190            (!cast<Instruction>(InstrStr#rmbkz)  _.KRCWM:$mask,
5191             _.RC:$src1, addr:$src2)>;
5192}
5193
5194multiclass avx512_logical_lowering_sizes<string InstrStr, SDNode OpNode,
5195                                         AVX512VLVectorVTInfo SelectInfo,
5196                                         AVX512VLVectorVTInfo IntInfo> {
5197let Predicates = [HasVLX] in {
5198  defm : avx512_logical_lowering<InstrStr#"Z128", OpNode, SelectInfo.info128,
5199                                 IntInfo.info128>;
5200  defm : avx512_logical_lowering<InstrStr#"Z256", OpNode, SelectInfo.info256,
5201                                 IntInfo.info256>;
5202}
5203let Predicates = [HasAVX512] in {
5204  defm : avx512_logical_lowering<InstrStr#"Z", OpNode, SelectInfo.info512,
5205                                 IntInfo.info512>;
5206}
5207}
5208
5209multiclass avx512_logical_lowering_sizes_bcast<string InstrStr, SDNode OpNode,
5210                                               AVX512VLVectorVTInfo SelectInfo,
5211                                               AVX512VLVectorVTInfo IntInfo> {
5212let Predicates = [HasVLX] in {
5213  defm : avx512_logical_lowering_bcast<InstrStr#"Z128", OpNode,
5214                                       SelectInfo.info128, IntInfo.info128>;
5215  defm : avx512_logical_lowering_bcast<InstrStr#"Z256", OpNode,
5216                                       SelectInfo.info256, IntInfo.info256>;
5217}
5218let Predicates = [HasAVX512] in {
5219  defm : avx512_logical_lowering_bcast<InstrStr#"Z", OpNode,
5220                                       SelectInfo.info512, IntInfo.info512>;
5221}
5222}
5223
5224multiclass avx512_logical_lowering_types<string InstrStr, SDNode OpNode> {
5225  // i64 vselect with i32/i16/i8 logic op
5226  defm : avx512_logical_lowering_sizes<InstrStr#"Q", OpNode, avx512vl_i64_info,
5227                                       avx512vl_i32_info>;
5228  defm : avx512_logical_lowering_sizes<InstrStr#"Q", OpNode, avx512vl_i64_info,
5229                                       avx512vl_i16_info>;
5230  defm : avx512_logical_lowering_sizes<InstrStr#"Q", OpNode, avx512vl_i64_info,
5231                                       avx512vl_i8_info>;
5232
5233  // i32 vselect with i64/i16/i8 logic op
5234  defm : avx512_logical_lowering_sizes<InstrStr#"D", OpNode, avx512vl_i32_info,
5235                                       avx512vl_i64_info>;
5236  defm : avx512_logical_lowering_sizes<InstrStr#"D", OpNode, avx512vl_i32_info,
5237                                       avx512vl_i16_info>;
5238  defm : avx512_logical_lowering_sizes<InstrStr#"D", OpNode, avx512vl_i32_info,
5239                                       avx512vl_i8_info>;
5240
5241  // f32 vselect with i64/i32/i16/i8 logic op
5242  defm : avx512_logical_lowering_sizes<InstrStr#"D", OpNode, avx512vl_f32_info,
5243                                       avx512vl_i64_info>;
5244  defm : avx512_logical_lowering_sizes<InstrStr#"D", OpNode, avx512vl_f32_info,
5245                                       avx512vl_i32_info>;
5246  defm : avx512_logical_lowering_sizes<InstrStr#"D", OpNode, avx512vl_f32_info,
5247                                       avx512vl_i16_info>;
5248  defm : avx512_logical_lowering_sizes<InstrStr#"D", OpNode, avx512vl_f32_info,
5249                                       avx512vl_i8_info>;
5250
5251  // f64 vselect with i64/i32/i16/i8 logic op
5252  defm : avx512_logical_lowering_sizes<InstrStr#"Q", OpNode, avx512vl_f64_info,
5253                                       avx512vl_i64_info>;
5254  defm : avx512_logical_lowering_sizes<InstrStr#"Q", OpNode, avx512vl_f64_info,
5255                                       avx512vl_i32_info>;
5256  defm : avx512_logical_lowering_sizes<InstrStr#"Q", OpNode, avx512vl_f64_info,
5257                                       avx512vl_i16_info>;
5258  defm : avx512_logical_lowering_sizes<InstrStr#"Q", OpNode, avx512vl_f64_info,
5259                                       avx512vl_i8_info>;
5260
5261  defm : avx512_logical_lowering_sizes_bcast<InstrStr#"D", OpNode,
5262                                             avx512vl_f32_info,
5263                                             avx512vl_i32_info>;
5264  defm : avx512_logical_lowering_sizes_bcast<InstrStr#"Q", OpNode,
5265                                             avx512vl_f64_info,
5266                                             avx512vl_i64_info>;
5267}
5268
5269defm : avx512_logical_lowering_types<"VPAND", and>;
5270defm : avx512_logical_lowering_types<"VPOR",  or>;
5271defm : avx512_logical_lowering_types<"VPXOR", xor>;
5272defm : avx512_logical_lowering_types<"VPANDN", X86andnp>;
5273
5274//===----------------------------------------------------------------------===//
5275// AVX-512  FP arithmetic
5276//===----------------------------------------------------------------------===//
5277
5278multiclass avx512_fp_scalar<bits<8> opc, string OpcodeStr,X86VectorVTInfo _,
5279                            SDNode OpNode, SDNode VecNode,
5280                            X86FoldableSchedWrite sched, bit IsCommutable> {
5281  let ExeDomain = _.ExeDomain in {
5282  defm rr_Int : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
5283                           (ins _.RC:$src1, _.RC:$src2), OpcodeStr,
5284                           "$src2, $src1", "$src1, $src2",
5285                           (_.VT (VecNode _.RC:$src1, _.RC:$src2))>,
5286                           Sched<[sched]>;
5287
5288  defm rm_Int : AVX512_maskable_scalar<opc, MRMSrcMem, _, (outs _.RC:$dst),
5289                         (ins _.RC:$src1, _.IntScalarMemOp:$src2), OpcodeStr,
5290                         "$src2, $src1", "$src1, $src2",
5291                         (_.VT (VecNode _.RC:$src1,
5292                                        _.ScalarIntMemCPat:$src2))>,
5293                         Sched<[sched.Folded, sched.ReadAfterFold]>;
5294  let isCodeGenOnly = 1, Predicates = [HasAVX512] in {
5295  def rr : I< opc, MRMSrcReg, (outs _.FRC:$dst),
5296                         (ins _.FRC:$src1, _.FRC:$src2),
5297                          OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}",
5298                          [(set _.FRC:$dst, (OpNode _.FRC:$src1, _.FRC:$src2))]>,
5299                          Sched<[sched]> {
5300    let isCommutable = IsCommutable;
5301  }
5302  def rm : I< opc, MRMSrcMem, (outs _.FRC:$dst),
5303                         (ins _.FRC:$src1, _.ScalarMemOp:$src2),
5304                         OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}",
5305                         [(set _.FRC:$dst, (OpNode _.FRC:$src1,
5306                         (_.ScalarLdFrag addr:$src2)))]>,
5307                         Sched<[sched.Folded, sched.ReadAfterFold]>;
5308  }
5309  }
5310}
5311
5312multiclass avx512_fp_scalar_round<bits<8> opc, string OpcodeStr,X86VectorVTInfo _,
5313                                  SDNode VecNode, X86FoldableSchedWrite sched,
5314                                  bit IsCommutable = 0> {
5315  let ExeDomain = _.ExeDomain in
5316  defm rrb_Int : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
5317                          (ins _.RC:$src1, _.RC:$src2, AVX512RC:$rc), OpcodeStr,
5318                          "$rc, $src2, $src1", "$src1, $src2, $rc",
5319                          (VecNode (_.VT _.RC:$src1), (_.VT _.RC:$src2),
5320                          (i32 timm:$rc))>,
5321                          EVEX_B, EVEX_RC, Sched<[sched]>;
5322}
5323multiclass avx512_fp_scalar_sae<bits<8> opc, string OpcodeStr,X86VectorVTInfo _,
5324                                SDNode OpNode, SDNode VecNode, SDNode SaeNode,
5325                                X86FoldableSchedWrite sched, bit IsCommutable,
5326                                string EVEX2VexOvrd> {
5327  let ExeDomain = _.ExeDomain in {
5328  defm rr_Int : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
5329                           (ins _.RC:$src1, _.RC:$src2), OpcodeStr,
5330                           "$src2, $src1", "$src1, $src2",
5331                           (_.VT (VecNode _.RC:$src1, _.RC:$src2))>,
5332                           Sched<[sched]>;
5333
5334  defm rm_Int : AVX512_maskable_scalar<opc, MRMSrcMem, _, (outs _.RC:$dst),
5335                         (ins _.RC:$src1, _.IntScalarMemOp:$src2), OpcodeStr,
5336                         "$src2, $src1", "$src1, $src2",
5337                         (_.VT (VecNode _.RC:$src1,
5338                                        _.ScalarIntMemCPat:$src2))>,
5339                         Sched<[sched.Folded, sched.ReadAfterFold]>;
5340
5341  let isCodeGenOnly = 1, Predicates = [HasAVX512] in {
5342  def rr : I< opc, MRMSrcReg, (outs _.FRC:$dst),
5343                         (ins _.FRC:$src1, _.FRC:$src2),
5344                          OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}",
5345                          [(set _.FRC:$dst, (OpNode _.FRC:$src1, _.FRC:$src2))]>,
5346                          Sched<[sched]>,
5347                          EVEX2VEXOverride<EVEX2VexOvrd#"rr"> {
5348    let isCommutable = IsCommutable;
5349  }
5350  def rm : I< opc, MRMSrcMem, (outs _.FRC:$dst),
5351                         (ins _.FRC:$src1, _.ScalarMemOp:$src2),
5352                         OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}",
5353                         [(set _.FRC:$dst, (OpNode _.FRC:$src1,
5354                         (_.ScalarLdFrag addr:$src2)))]>,
5355                         Sched<[sched.Folded, sched.ReadAfterFold]>,
5356                         EVEX2VEXOverride<EVEX2VexOvrd#"rm">;
5357  }
5358
5359  defm rrb_Int : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
5360                            (ins _.RC:$src1, _.RC:$src2), OpcodeStr,
5361                            "{sae}, $src2, $src1", "$src1, $src2, {sae}",
5362                            (SaeNode (_.VT _.RC:$src1), (_.VT _.RC:$src2))>,
5363                            EVEX_B, Sched<[sched]>;
5364  }
5365}
5366
5367multiclass avx512_binop_s_round<bits<8> opc, string OpcodeStr, SDNode OpNode,
5368                                SDNode VecNode, SDNode RndNode,
5369                                X86SchedWriteSizes sched, bit IsCommutable> {
5370  defm SSZ : avx512_fp_scalar<opc, OpcodeStr#"ss", f32x_info, OpNode, VecNode,
5371                              sched.PS.Scl, IsCommutable>,
5372             avx512_fp_scalar_round<opc, OpcodeStr#"ss", f32x_info, RndNode,
5373                              sched.PS.Scl, IsCommutable>,
5374                              XS, EVEX_4V, VEX_LIG,  EVEX_CD8<32, CD8VT1>;
5375  defm SDZ : avx512_fp_scalar<opc, OpcodeStr#"sd", f64x_info, OpNode, VecNode,
5376                              sched.PD.Scl, IsCommutable>,
5377             avx512_fp_scalar_round<opc, OpcodeStr#"sd", f64x_info, RndNode,
5378                              sched.PD.Scl, IsCommutable>,
5379                              XD, VEX_W, EVEX_4V, VEX_LIG, EVEX_CD8<64, CD8VT1>;
5380}
5381
5382multiclass avx512_binop_s_sae<bits<8> opc, string OpcodeStr, SDNode OpNode,
5383                              SDNode VecNode, SDNode SaeNode,
5384                              X86SchedWriteSizes sched, bit IsCommutable> {
5385  defm SSZ : avx512_fp_scalar_sae<opc, OpcodeStr#"ss", f32x_info, OpNode,
5386                              VecNode, SaeNode, sched.PS.Scl, IsCommutable,
5387                              NAME#"SS">,
5388                              XS, EVEX_4V, VEX_LIG,  EVEX_CD8<32, CD8VT1>;
5389  defm SDZ : avx512_fp_scalar_sae<opc, OpcodeStr#"sd", f64x_info, OpNode,
5390                              VecNode, SaeNode, sched.PD.Scl, IsCommutable,
5391                              NAME#"SD">,
5392                              XD, VEX_W, EVEX_4V, VEX_LIG, EVEX_CD8<64, CD8VT1>;
5393}
5394defm VADD : avx512_binop_s_round<0x58, "vadd", fadd, X86fadds, X86faddRnds,
5395                                 SchedWriteFAddSizes, 1>;
5396defm VMUL : avx512_binop_s_round<0x59, "vmul", fmul, X86fmuls, X86fmulRnds,
5397                                 SchedWriteFMulSizes, 1>;
5398defm VSUB : avx512_binop_s_round<0x5C, "vsub", fsub, X86fsubs, X86fsubRnds,
5399                                 SchedWriteFAddSizes, 0>;
5400defm VDIV : avx512_binop_s_round<0x5E, "vdiv", fdiv, X86fdivs, X86fdivRnds,
5401                                 SchedWriteFDivSizes, 0>;
5402defm VMIN : avx512_binop_s_sae<0x5D, "vmin", X86fmin, X86fmins, X86fminSAEs,
5403                               SchedWriteFCmpSizes, 0>;
5404defm VMAX : avx512_binop_s_sae<0x5F, "vmax", X86fmax, X86fmaxs, X86fmaxSAEs,
5405                               SchedWriteFCmpSizes, 0>;
5406
5407// MIN/MAX nodes are commutable under "unsafe-fp-math". In this case we use
5408// X86fminc and X86fmaxc instead of X86fmin and X86fmax
5409multiclass avx512_comutable_binop_s<bits<8> opc, string OpcodeStr,
5410                                    X86VectorVTInfo _, SDNode OpNode,
5411                                    X86FoldableSchedWrite sched,
5412                                    string EVEX2VEXOvrd> {
5413  let isCodeGenOnly = 1, Predicates = [HasAVX512], ExeDomain = _.ExeDomain in {
5414  def rr : I< opc, MRMSrcReg, (outs _.FRC:$dst),
5415                         (ins _.FRC:$src1, _.FRC:$src2),
5416                          OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}",
5417                          [(set _.FRC:$dst, (OpNode _.FRC:$src1, _.FRC:$src2))]>,
5418                          Sched<[sched]>, EVEX2VEXOverride<EVEX2VEXOvrd#"rr"> {
5419    let isCommutable = 1;
5420  }
5421  def rm : I< opc, MRMSrcMem, (outs _.FRC:$dst),
5422                         (ins _.FRC:$src1, _.ScalarMemOp:$src2),
5423                         OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}",
5424                         [(set _.FRC:$dst, (OpNode _.FRC:$src1,
5425                         (_.ScalarLdFrag addr:$src2)))]>,
5426                         Sched<[sched.Folded, sched.ReadAfterFold]>,
5427                         EVEX2VEXOverride<EVEX2VEXOvrd#"rm">;
5428  }
5429}
5430defm VMINCSSZ : avx512_comutable_binop_s<0x5D, "vminss", f32x_info, X86fminc,
5431                                         SchedWriteFCmp.Scl, "VMINCSS">, XS,
5432                                         EVEX_4V, VEX_LIG, EVEX_CD8<32, CD8VT1>;
5433
5434defm VMINCSDZ : avx512_comutable_binop_s<0x5D, "vminsd", f64x_info, X86fminc,
5435                                         SchedWriteFCmp.Scl, "VMINCSD">, XD,
5436                                         VEX_W, EVEX_4V, VEX_LIG,
5437                                         EVEX_CD8<64, CD8VT1>;
5438
5439defm VMAXCSSZ : avx512_comutable_binop_s<0x5F, "vmaxss", f32x_info, X86fmaxc,
5440                                         SchedWriteFCmp.Scl, "VMAXCSS">, XS,
5441                                         EVEX_4V, VEX_LIG, EVEX_CD8<32, CD8VT1>;
5442
5443defm VMAXCSDZ : avx512_comutable_binop_s<0x5F, "vmaxsd", f64x_info, X86fmaxc,
5444                                         SchedWriteFCmp.Scl, "VMAXCSD">, XD,
5445                                         VEX_W, EVEX_4V, VEX_LIG,
5446                                         EVEX_CD8<64, CD8VT1>;
5447
5448multiclass avx512_fp_packed<bits<8> opc, string OpcodeStr, SDPatternOperator OpNode,
5449                            X86VectorVTInfo _, X86FoldableSchedWrite sched,
5450                            bit IsCommutable,
5451                            bit IsKCommutable = IsCommutable> {
5452  let ExeDomain = _.ExeDomain, hasSideEffects = 0 in {
5453  defm rr: AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
5454                  (ins _.RC:$src1, _.RC:$src2), OpcodeStr##_.Suffix,
5455                  "$src2, $src1", "$src1, $src2",
5456                  (_.VT (OpNode _.RC:$src1, _.RC:$src2)), IsCommutable,
5457                  IsKCommutable, IsKCommutable>,
5458                  EVEX_4V, Sched<[sched]>;
5459  let mayLoad = 1 in {
5460    defm rm: AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
5461                    (ins _.RC:$src1, _.MemOp:$src2), OpcodeStr##_.Suffix,
5462                    "$src2, $src1", "$src1, $src2",
5463                    (OpNode _.RC:$src1, (_.LdFrag addr:$src2))>,
5464                    EVEX_4V, Sched<[sched.Folded, sched.ReadAfterFold]>;
5465    defm rmb: AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
5466                     (ins _.RC:$src1, _.ScalarMemOp:$src2), OpcodeStr##_.Suffix,
5467                     "${src2}"##_.BroadcastStr##", $src1",
5468                     "$src1, ${src2}"##_.BroadcastStr,
5469                     (OpNode  _.RC:$src1, (_.VT (_.BroadcastLdFrag addr:$src2)))>,
5470                     EVEX_4V, EVEX_B,
5471                     Sched<[sched.Folded, sched.ReadAfterFold]>;
5472    }
5473  }
5474}
5475
5476multiclass avx512_fp_round_packed<bits<8> opc, string OpcodeStr,
5477                                  SDPatternOperator OpNodeRnd,
5478                                  X86FoldableSchedWrite sched, X86VectorVTInfo _> {
5479  let ExeDomain = _.ExeDomain in
5480  defm rrb: AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
5481                  (ins _.RC:$src1, _.RC:$src2, AVX512RC:$rc), OpcodeStr##_.Suffix,
5482                  "$rc, $src2, $src1", "$src1, $src2, $rc",
5483                  (_.VT (OpNodeRnd _.RC:$src1, _.RC:$src2, (i32 timm:$rc)))>,
5484                  EVEX_4V, EVEX_B, EVEX_RC, Sched<[sched]>;
5485}
5486
5487multiclass avx512_fp_sae_packed<bits<8> opc, string OpcodeStr,
5488                                SDPatternOperator OpNodeSAE,
5489                                X86FoldableSchedWrite sched, X86VectorVTInfo _> {
5490  let ExeDomain = _.ExeDomain in
5491  defm rrb: AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
5492                  (ins _.RC:$src1, _.RC:$src2), OpcodeStr##_.Suffix,
5493                  "{sae}, $src2, $src1", "$src1, $src2, {sae}",
5494                  (_.VT (OpNodeSAE _.RC:$src1, _.RC:$src2))>,
5495                  EVEX_4V, EVEX_B, Sched<[sched]>;
5496}
5497
5498multiclass avx512_fp_binop_p<bits<8> opc, string OpcodeStr, SDPatternOperator OpNode,
5499                             Predicate prd, X86SchedWriteSizes sched,
5500                             bit IsCommutable = 0,
5501                             bit IsPD128Commutable = IsCommutable> {
5502  let Predicates = [prd] in {
5503  defm PSZ : avx512_fp_packed<opc, OpcodeStr, OpNode, v16f32_info,
5504                              sched.PS.ZMM, IsCommutable>, EVEX_V512, PS,
5505                              EVEX_CD8<32, CD8VF>;
5506  defm PDZ : avx512_fp_packed<opc, OpcodeStr, OpNode, v8f64_info,
5507                              sched.PD.ZMM, IsCommutable>, EVEX_V512, PD, VEX_W,
5508                              EVEX_CD8<64, CD8VF>;
5509  }
5510
5511    // Define only if AVX512VL feature is present.
5512  let Predicates = [prd, HasVLX] in {
5513    defm PSZ128 : avx512_fp_packed<opc, OpcodeStr, OpNode, v4f32x_info,
5514                                   sched.PS.XMM, IsCommutable>, EVEX_V128, PS,
5515                                   EVEX_CD8<32, CD8VF>;
5516    defm PSZ256 : avx512_fp_packed<opc, OpcodeStr, OpNode, v8f32x_info,
5517                                   sched.PS.YMM, IsCommutable>, EVEX_V256, PS,
5518                                   EVEX_CD8<32, CD8VF>;
5519    defm PDZ128 : avx512_fp_packed<opc, OpcodeStr, OpNode, v2f64x_info,
5520                                   sched.PD.XMM, IsPD128Commutable,
5521                                   IsCommutable>, EVEX_V128, PD, VEX_W,
5522                                   EVEX_CD8<64, CD8VF>;
5523    defm PDZ256 : avx512_fp_packed<opc, OpcodeStr, OpNode, v4f64x_info,
5524                                   sched.PD.YMM, IsCommutable>, EVEX_V256, PD, VEX_W,
5525                                   EVEX_CD8<64, CD8VF>;
5526  }
5527}
5528
5529multiclass avx512_fp_binop_p_round<bits<8> opc, string OpcodeStr, SDNode OpNodeRnd,
5530                                   X86SchedWriteSizes sched> {
5531  defm PSZ : avx512_fp_round_packed<opc, OpcodeStr, OpNodeRnd, sched.PS.ZMM,
5532                                    v16f32_info>,
5533                                    EVEX_V512, PS, EVEX_CD8<32, CD8VF>;
5534  defm PDZ : avx512_fp_round_packed<opc, OpcodeStr, OpNodeRnd, sched.PD.ZMM,
5535                                    v8f64_info>,
5536                                    EVEX_V512, PD, VEX_W,EVEX_CD8<64, CD8VF>;
5537}
5538
5539multiclass avx512_fp_binop_p_sae<bits<8> opc, string OpcodeStr, SDNode OpNodeRnd,
5540                                 X86SchedWriteSizes sched> {
5541  defm PSZ : avx512_fp_sae_packed<opc, OpcodeStr, OpNodeRnd, sched.PS.ZMM,
5542                                  v16f32_info>,
5543                                  EVEX_V512, PS, EVEX_CD8<32, CD8VF>;
5544  defm PDZ : avx512_fp_sae_packed<opc, OpcodeStr, OpNodeRnd, sched.PD.ZMM,
5545                                  v8f64_info>,
5546                                  EVEX_V512, PD, VEX_W,EVEX_CD8<64, CD8VF>;
5547}
5548
5549defm VADD : avx512_fp_binop_p<0x58, "vadd", fadd, HasAVX512,
5550                              SchedWriteFAddSizes, 1>,
5551            avx512_fp_binop_p_round<0x58, "vadd", X86faddRnd, SchedWriteFAddSizes>;
5552defm VMUL : avx512_fp_binop_p<0x59, "vmul", fmul, HasAVX512,
5553                              SchedWriteFMulSizes, 1>,
5554            avx512_fp_binop_p_round<0x59, "vmul", X86fmulRnd, SchedWriteFMulSizes>;
5555defm VSUB : avx512_fp_binop_p<0x5C, "vsub", fsub, HasAVX512,
5556                              SchedWriteFAddSizes>,
5557            avx512_fp_binop_p_round<0x5C, "vsub", X86fsubRnd, SchedWriteFAddSizes>;
5558defm VDIV : avx512_fp_binop_p<0x5E, "vdiv", fdiv, HasAVX512,
5559                              SchedWriteFDivSizes>,
5560            avx512_fp_binop_p_round<0x5E, "vdiv", X86fdivRnd, SchedWriteFDivSizes>;
5561defm VMIN : avx512_fp_binop_p<0x5D, "vmin", X86fmin, HasAVX512,
5562                              SchedWriteFCmpSizes, 0>,
5563            avx512_fp_binop_p_sae<0x5D, "vmin", X86fminSAE, SchedWriteFCmpSizes>;
5564defm VMAX : avx512_fp_binop_p<0x5F, "vmax", X86fmax, HasAVX512,
5565                              SchedWriteFCmpSizes, 0>,
5566            avx512_fp_binop_p_sae<0x5F, "vmax", X86fmaxSAE, SchedWriteFCmpSizes>;
5567let isCodeGenOnly = 1 in {
5568  defm VMINC : avx512_fp_binop_p<0x5D, "vmin", X86fminc, HasAVX512,
5569                                 SchedWriteFCmpSizes, 1>;
5570  defm VMAXC : avx512_fp_binop_p<0x5F, "vmax", X86fmaxc, HasAVX512,
5571                                 SchedWriteFCmpSizes, 1>;
5572}
5573defm VAND  : avx512_fp_binop_p<0x54, "vand", null_frag, HasDQI,
5574                               SchedWriteFLogicSizes, 1>;
5575defm VANDN : avx512_fp_binop_p<0x55, "vandn", null_frag, HasDQI,
5576                               SchedWriteFLogicSizes, 0>;
5577defm VOR   : avx512_fp_binop_p<0x56, "vor", null_frag, HasDQI,
5578                               SchedWriteFLogicSizes, 1>;
5579defm VXOR  : avx512_fp_binop_p<0x57, "vxor", null_frag, HasDQI,
5580                               SchedWriteFLogicSizes, 1>;
5581
5582multiclass avx512_fp_scalef_p<bits<8> opc, string OpcodeStr, SDNode OpNode,
5583                              X86FoldableSchedWrite sched, X86VectorVTInfo _> {
5584  let ExeDomain = _.ExeDomain in {
5585  defm rr: AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
5586                  (ins _.RC:$src1, _.RC:$src2), OpcodeStr##_.Suffix,
5587                  "$src2, $src1", "$src1, $src2",
5588                  (_.VT (OpNode _.RC:$src1, _.RC:$src2))>,
5589                  EVEX_4V, Sched<[sched]>;
5590  defm rm: AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
5591                  (ins _.RC:$src1, _.MemOp:$src2), OpcodeStr##_.Suffix,
5592                  "$src2, $src1", "$src1, $src2",
5593                  (OpNode _.RC:$src1, (_.LdFrag addr:$src2))>,
5594                  EVEX_4V, Sched<[sched.Folded, sched.ReadAfterFold]>;
5595  defm rmb: AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
5596                   (ins _.RC:$src1, _.ScalarMemOp:$src2), OpcodeStr##_.Suffix,
5597                   "${src2}"##_.BroadcastStr##", $src1",
5598                   "$src1, ${src2}"##_.BroadcastStr,
5599                   (OpNode  _.RC:$src1, (_.VT (_.BroadcastLdFrag addr:$src2)))>,
5600                   EVEX_4V, EVEX_B, Sched<[sched.Folded, sched.ReadAfterFold]>;
5601  }
5602}
5603
5604multiclass avx512_fp_scalef_scalar<bits<8> opc, string OpcodeStr, SDNode OpNode,
5605                                   X86FoldableSchedWrite sched, X86VectorVTInfo _> {
5606  let ExeDomain = _.ExeDomain in {
5607  defm rr: AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
5608                  (ins _.RC:$src1, _.RC:$src2), OpcodeStr##_.Suffix,
5609                  "$src2, $src1", "$src1, $src2",
5610                  (_.VT (OpNode _.RC:$src1, _.RC:$src2))>,
5611                  Sched<[sched]>;
5612  defm rm: AVX512_maskable_scalar<opc, MRMSrcMem, _, (outs _.RC:$dst),
5613                  (ins _.RC:$src1, _.IntScalarMemOp:$src2), OpcodeStr##_.Suffix,
5614                  "$src2, $src1", "$src1, $src2",
5615                  (OpNode _.RC:$src1, _.ScalarIntMemCPat:$src2)>,
5616                  Sched<[sched.Folded, sched.ReadAfterFold]>;
5617  }
5618}
5619
5620multiclass avx512_fp_scalef_all<bits<8> opc, bits<8> opcScaler, string OpcodeStr,
5621                                X86SchedWriteWidths sched> {
5622  defm PSZ : avx512_fp_scalef_p<opc, OpcodeStr, X86scalef, sched.ZMM, v16f32_info>,
5623             avx512_fp_round_packed<opc, OpcodeStr, X86scalefRnd, sched.ZMM, v16f32_info>,
5624                              EVEX_V512, EVEX_CD8<32, CD8VF>;
5625  defm PDZ : avx512_fp_scalef_p<opc, OpcodeStr, X86scalef, sched.ZMM, v8f64_info>,
5626             avx512_fp_round_packed<opc, OpcodeStr, X86scalefRnd, sched.ZMM, v8f64_info>,
5627                              EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>;
5628  defm SSZ : avx512_fp_scalef_scalar<opcScaler, OpcodeStr, X86scalefs, sched.Scl, f32x_info>,
5629             avx512_fp_scalar_round<opcScaler, OpcodeStr##"ss", f32x_info,
5630                                    X86scalefsRnd, sched.Scl>,
5631                                    EVEX_4V, VEX_LIG, EVEX_CD8<32, CD8VT1>;
5632  defm SDZ : avx512_fp_scalef_scalar<opcScaler, OpcodeStr, X86scalefs, sched.Scl, f64x_info>,
5633             avx512_fp_scalar_round<opcScaler, OpcodeStr##"sd", f64x_info,
5634                                    X86scalefsRnd, sched.Scl>,
5635                                    EVEX_4V, VEX_LIG, EVEX_CD8<64, CD8VT1>, VEX_W;
5636
5637  // Define only if AVX512VL feature is present.
5638  let Predicates = [HasVLX] in {
5639    defm PSZ128 : avx512_fp_scalef_p<opc, OpcodeStr, X86scalef, sched.XMM, v4f32x_info>,
5640                                   EVEX_V128, EVEX_CD8<32, CD8VF>;
5641    defm PSZ256 : avx512_fp_scalef_p<opc, OpcodeStr, X86scalef, sched.YMM, v8f32x_info>,
5642                                   EVEX_V256, EVEX_CD8<32, CD8VF>;
5643    defm PDZ128 : avx512_fp_scalef_p<opc, OpcodeStr, X86scalef, sched.XMM, v2f64x_info>,
5644                                   EVEX_V128, VEX_W, EVEX_CD8<64, CD8VF>;
5645    defm PDZ256 : avx512_fp_scalef_p<opc, OpcodeStr, X86scalef, sched.YMM, v4f64x_info>,
5646                                   EVEX_V256, VEX_W, EVEX_CD8<64, CD8VF>;
5647  }
5648}
5649defm VSCALEF : avx512_fp_scalef_all<0x2C, 0x2D, "vscalef",
5650                                    SchedWriteFAdd>, T8PD, NotEVEX2VEXConvertible;
5651
5652//===----------------------------------------------------------------------===//
5653// AVX-512  VPTESTM instructions
5654//===----------------------------------------------------------------------===//
5655
5656multiclass avx512_vptest<bits<8> opc, string OpcodeStr,
5657                         X86FoldableSchedWrite sched, X86VectorVTInfo _,
5658                         string Name> {
5659  // NOTE: Patterns are omitted in favor of manual selection in X86ISelDAGToDAG.
5660  // There are just too many permuations due to commutability and bitcasts.
5661  let ExeDomain = _.ExeDomain, hasSideEffects = 0 in {
5662  defm rr : AVX512_maskable_cmp<opc, MRMSrcReg, _, (outs _.KRC:$dst),
5663                   (ins _.RC:$src1, _.RC:$src2), OpcodeStr,
5664                      "$src2, $src1", "$src1, $src2",
5665                   (null_frag), (null_frag), 1>,
5666                   EVEX_4V, Sched<[sched]>;
5667  let mayLoad = 1 in
5668  defm rm : AVX512_maskable_cmp<opc, MRMSrcMem, _, (outs _.KRC:$dst),
5669                   (ins _.RC:$src1, _.MemOp:$src2), OpcodeStr,
5670                       "$src2, $src1", "$src1, $src2",
5671                   (null_frag), (null_frag)>,
5672                   EVEX_4V, EVEX_CD8<_.EltSize, CD8VF>,
5673                   Sched<[sched.Folded, sched.ReadAfterFold]>;
5674  }
5675}
5676
5677multiclass avx512_vptest_mb<bits<8> opc, string OpcodeStr,
5678                            X86FoldableSchedWrite sched, X86VectorVTInfo _> {
5679  let ExeDomain = _.ExeDomain, mayLoad = 1, hasSideEffects = 0 in
5680  defm rmb : AVX512_maskable_cmp<opc, MRMSrcMem, _, (outs _.KRC:$dst),
5681                    (ins _.RC:$src1, _.ScalarMemOp:$src2), OpcodeStr,
5682                    "${src2}"##_.BroadcastStr##", $src1",
5683                    "$src1, ${src2}"##_.BroadcastStr,
5684                    (null_frag), (null_frag)>,
5685                    EVEX_B, EVEX_4V, EVEX_CD8<_.EltSize, CD8VF>,
5686                    Sched<[sched.Folded, sched.ReadAfterFold]>;
5687}
5688
5689multiclass avx512_vptest_dq_sizes<bits<8> opc, string OpcodeStr,
5690                                  X86SchedWriteWidths sched,
5691                                  AVX512VLVectorVTInfo _> {
5692  let Predicates  = [HasAVX512] in
5693  defm Z : avx512_vptest<opc, OpcodeStr, sched.ZMM, _.info512, NAME>,
5694           avx512_vptest_mb<opc, OpcodeStr, sched.ZMM, _.info512>, EVEX_V512;
5695
5696  let Predicates = [HasAVX512, HasVLX] in {
5697  defm Z256 : avx512_vptest<opc, OpcodeStr, sched.YMM, _.info256, NAME>,
5698              avx512_vptest_mb<opc, OpcodeStr, sched.YMM, _.info256>, EVEX_V256;
5699  defm Z128 : avx512_vptest<opc, OpcodeStr, sched.XMM, _.info128, NAME>,
5700              avx512_vptest_mb<opc, OpcodeStr, sched.XMM, _.info128>, EVEX_V128;
5701  }
5702}
5703
5704multiclass avx512_vptest_dq<bits<8> opc, string OpcodeStr,
5705                            X86SchedWriteWidths sched> {
5706  defm D : avx512_vptest_dq_sizes<opc, OpcodeStr#"d", sched,
5707                                 avx512vl_i32_info>;
5708  defm Q : avx512_vptest_dq_sizes<opc, OpcodeStr#"q", sched,
5709                                 avx512vl_i64_info>, VEX_W;
5710}
5711
5712multiclass avx512_vptest_wb<bits<8> opc, string OpcodeStr,
5713                            X86SchedWriteWidths sched> {
5714  let Predicates = [HasBWI] in {
5715  defm WZ:    avx512_vptest<opc, OpcodeStr#"w", sched.ZMM,
5716                            v32i16_info, NAME#"W">, EVEX_V512, VEX_W;
5717  defm BZ:    avx512_vptest<opc, OpcodeStr#"b", sched.ZMM,
5718                            v64i8_info, NAME#"B">, EVEX_V512;
5719  }
5720  let Predicates = [HasVLX, HasBWI] in {
5721
5722  defm WZ256: avx512_vptest<opc, OpcodeStr#"w", sched.YMM,
5723                            v16i16x_info, NAME#"W">, EVEX_V256, VEX_W;
5724  defm WZ128: avx512_vptest<opc, OpcodeStr#"w", sched.XMM,
5725                            v8i16x_info, NAME#"W">, EVEX_V128, VEX_W;
5726  defm BZ256: avx512_vptest<opc, OpcodeStr#"b", sched.YMM,
5727                            v32i8x_info, NAME#"B">, EVEX_V256;
5728  defm BZ128: avx512_vptest<opc, OpcodeStr#"b", sched.XMM,
5729                            v16i8x_info, NAME#"B">, EVEX_V128;
5730  }
5731}
5732
5733multiclass avx512_vptest_all_forms<bits<8> opc_wb, bits<8> opc_dq, string OpcodeStr,
5734                                   X86SchedWriteWidths sched> :
5735  avx512_vptest_wb<opc_wb, OpcodeStr, sched>,
5736  avx512_vptest_dq<opc_dq, OpcodeStr, sched>;
5737
5738defm VPTESTM   : avx512_vptest_all_forms<0x26, 0x27, "vptestm",
5739                                         SchedWriteVecLogic>, T8PD;
5740defm VPTESTNM  : avx512_vptest_all_forms<0x26, 0x27, "vptestnm",
5741                                         SchedWriteVecLogic>, T8XS;
5742
5743//===----------------------------------------------------------------------===//
5744// AVX-512  Shift instructions
5745//===----------------------------------------------------------------------===//
5746
5747multiclass avx512_shift_rmi<bits<8> opc, Format ImmFormR, Format ImmFormM,
5748                            string OpcodeStr, SDNode OpNode,
5749                            X86FoldableSchedWrite sched, X86VectorVTInfo _> {
5750  let ExeDomain = _.ExeDomain in {
5751  defm ri : AVX512_maskable<opc, ImmFormR, _, (outs _.RC:$dst),
5752                   (ins _.RC:$src1, u8imm:$src2), OpcodeStr,
5753                      "$src2, $src1", "$src1, $src2",
5754                   (_.VT (OpNode _.RC:$src1, (i8 timm:$src2)))>,
5755                   Sched<[sched]>;
5756  defm mi : AVX512_maskable<opc, ImmFormM, _, (outs _.RC:$dst),
5757                   (ins _.MemOp:$src1, u8imm:$src2), OpcodeStr,
5758                       "$src2, $src1", "$src1, $src2",
5759                   (_.VT (OpNode (_.VT (_.LdFrag addr:$src1)),
5760                          (i8 timm:$src2)))>,
5761                   Sched<[sched.Folded]>;
5762  }
5763}
5764
5765multiclass avx512_shift_rmbi<bits<8> opc, Format ImmFormM,
5766                             string OpcodeStr, SDNode OpNode,
5767                             X86FoldableSchedWrite sched, X86VectorVTInfo _> {
5768  let ExeDomain = _.ExeDomain in
5769  defm mbi : AVX512_maskable<opc, ImmFormM, _, (outs _.RC:$dst),
5770                   (ins _.ScalarMemOp:$src1, u8imm:$src2), OpcodeStr,
5771      "$src2, ${src1}"##_.BroadcastStr, "${src1}"##_.BroadcastStr##", $src2",
5772     (_.VT (OpNode (_.BroadcastLdFrag addr:$src1), (i8 timm:$src2)))>,
5773     EVEX_B, Sched<[sched.Folded]>;
5774}
5775
5776multiclass avx512_shift_rrm<bits<8> opc, string OpcodeStr, SDNode OpNode,
5777                            X86FoldableSchedWrite sched, ValueType SrcVT,
5778                            X86VectorVTInfo _> {
5779   // src2 is always 128-bit
5780  let ExeDomain = _.ExeDomain in {
5781  defm rr : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
5782                   (ins _.RC:$src1, VR128X:$src2), OpcodeStr,
5783                      "$src2, $src1", "$src1, $src2",
5784                   (_.VT (OpNode _.RC:$src1, (SrcVT VR128X:$src2)))>,
5785                   AVX512BIBase, EVEX_4V, Sched<[sched]>;
5786  defm rm : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
5787                   (ins _.RC:$src1, i128mem:$src2), OpcodeStr,
5788                       "$src2, $src1", "$src1, $src2",
5789                   (_.VT (OpNode _.RC:$src1, (SrcVT (load addr:$src2))))>,
5790                   AVX512BIBase,
5791                   EVEX_4V, Sched<[sched.Folded, sched.ReadAfterFold]>;
5792  }
5793}
5794
5795multiclass avx512_shift_sizes<bits<8> opc, string OpcodeStr, SDNode OpNode,
5796                              X86SchedWriteWidths sched, ValueType SrcVT,
5797                              AVX512VLVectorVTInfo VTInfo,
5798                              Predicate prd> {
5799  let Predicates = [prd] in
5800  defm Z    : avx512_shift_rrm<opc, OpcodeStr, OpNode, sched.ZMM, SrcVT,
5801                               VTInfo.info512>, EVEX_V512,
5802                               EVEX_CD8<VTInfo.info512.EltSize, CD8VQ> ;
5803  let Predicates = [prd, HasVLX] in {
5804  defm Z256 : avx512_shift_rrm<opc, OpcodeStr, OpNode, sched.YMM, SrcVT,
5805                               VTInfo.info256>, EVEX_V256,
5806                               EVEX_CD8<VTInfo.info256.EltSize, CD8VH>;
5807  defm Z128 : avx512_shift_rrm<opc, OpcodeStr, OpNode, sched.XMM, SrcVT,
5808                               VTInfo.info128>, EVEX_V128,
5809                               EVEX_CD8<VTInfo.info128.EltSize, CD8VF>;
5810  }
5811}
5812
5813multiclass avx512_shift_types<bits<8> opcd, bits<8> opcq, bits<8> opcw,
5814                              string OpcodeStr, SDNode OpNode,
5815                              X86SchedWriteWidths sched,
5816                              bit NotEVEX2VEXConvertibleQ = 0> {
5817  defm D : avx512_shift_sizes<opcd, OpcodeStr#"d", OpNode, sched, v4i32,
5818                              avx512vl_i32_info, HasAVX512>;
5819  let notEVEX2VEXConvertible = NotEVEX2VEXConvertibleQ in
5820  defm Q : avx512_shift_sizes<opcq, OpcodeStr#"q", OpNode, sched, v2i64,
5821                              avx512vl_i64_info, HasAVX512>, VEX_W;
5822  defm W : avx512_shift_sizes<opcw, OpcodeStr#"w", OpNode, sched, v8i16,
5823                              avx512vl_i16_info, HasBWI>;
5824}
5825
5826multiclass avx512_shift_rmi_sizes<bits<8> opc, Format ImmFormR, Format ImmFormM,
5827                                  string OpcodeStr, SDNode OpNode,
5828                                  X86SchedWriteWidths sched,
5829                                  AVX512VLVectorVTInfo VTInfo> {
5830  let Predicates = [HasAVX512] in
5831  defm Z:    avx512_shift_rmi<opc, ImmFormR, ImmFormM, OpcodeStr, OpNode,
5832                              sched.ZMM, VTInfo.info512>,
5833             avx512_shift_rmbi<opc, ImmFormM, OpcodeStr, OpNode, sched.ZMM,
5834                               VTInfo.info512>, EVEX_V512;
5835  let Predicates = [HasAVX512, HasVLX] in {
5836  defm Z256: avx512_shift_rmi<opc, ImmFormR, ImmFormM, OpcodeStr, OpNode,
5837                              sched.YMM, VTInfo.info256>,
5838             avx512_shift_rmbi<opc, ImmFormM, OpcodeStr, OpNode, sched.YMM,
5839                               VTInfo.info256>, EVEX_V256;
5840  defm Z128: avx512_shift_rmi<opc, ImmFormR, ImmFormM, OpcodeStr, OpNode,
5841                              sched.XMM, VTInfo.info128>,
5842             avx512_shift_rmbi<opc, ImmFormM, OpcodeStr, OpNode, sched.XMM,
5843                               VTInfo.info128>, EVEX_V128;
5844  }
5845}
5846
5847multiclass avx512_shift_rmi_w<bits<8> opcw, Format ImmFormR, Format ImmFormM,
5848                              string OpcodeStr, SDNode OpNode,
5849                              X86SchedWriteWidths sched> {
5850  let Predicates = [HasBWI] in
5851  defm WZ:    avx512_shift_rmi<opcw, ImmFormR, ImmFormM, OpcodeStr, OpNode,
5852                               sched.ZMM, v32i16_info>, EVEX_V512, VEX_WIG;
5853  let Predicates = [HasVLX, HasBWI] in {
5854  defm WZ256: avx512_shift_rmi<opcw, ImmFormR, ImmFormM, OpcodeStr, OpNode,
5855                               sched.YMM, v16i16x_info>, EVEX_V256, VEX_WIG;
5856  defm WZ128: avx512_shift_rmi<opcw, ImmFormR, ImmFormM, OpcodeStr, OpNode,
5857                               sched.XMM, v8i16x_info>, EVEX_V128, VEX_WIG;
5858  }
5859}
5860
5861multiclass avx512_shift_rmi_dq<bits<8> opcd, bits<8> opcq,
5862                               Format ImmFormR, Format ImmFormM,
5863                               string OpcodeStr, SDNode OpNode,
5864                               X86SchedWriteWidths sched,
5865                               bit NotEVEX2VEXConvertibleQ = 0> {
5866  defm D: avx512_shift_rmi_sizes<opcd, ImmFormR, ImmFormM, OpcodeStr#"d", OpNode,
5867                                 sched, avx512vl_i32_info>, EVEX_CD8<32, CD8VF>;
5868  let notEVEX2VEXConvertible = NotEVEX2VEXConvertibleQ in
5869  defm Q: avx512_shift_rmi_sizes<opcq, ImmFormR, ImmFormM, OpcodeStr#"q", OpNode,
5870                                 sched, avx512vl_i64_info>, EVEX_CD8<64, CD8VF>, VEX_W;
5871}
5872
5873defm VPSRL : avx512_shift_rmi_dq<0x72, 0x73, MRM2r, MRM2m, "vpsrl", X86vsrli,
5874                                 SchedWriteVecShiftImm>,
5875             avx512_shift_rmi_w<0x71, MRM2r, MRM2m, "vpsrlw", X86vsrli,
5876                                SchedWriteVecShiftImm>, AVX512BIi8Base, EVEX_4V;
5877
5878defm VPSLL : avx512_shift_rmi_dq<0x72, 0x73, MRM6r, MRM6m, "vpsll", X86vshli,
5879                                 SchedWriteVecShiftImm>,
5880             avx512_shift_rmi_w<0x71, MRM6r, MRM6m, "vpsllw", X86vshli,
5881                                SchedWriteVecShiftImm>, AVX512BIi8Base, EVEX_4V;
5882
5883defm VPSRA : avx512_shift_rmi_dq<0x72, 0x72, MRM4r, MRM4m, "vpsra", X86vsrai,
5884                                 SchedWriteVecShiftImm, 1>,
5885             avx512_shift_rmi_w<0x71, MRM4r, MRM4m, "vpsraw", X86vsrai,
5886                                SchedWriteVecShiftImm>, AVX512BIi8Base, EVEX_4V;
5887
5888defm VPROR : avx512_shift_rmi_dq<0x72, 0x72, MRM0r, MRM0m, "vpror", X86vrotri,
5889                                 SchedWriteVecShiftImm>, AVX512BIi8Base, EVEX_4V;
5890defm VPROL : avx512_shift_rmi_dq<0x72, 0x72, MRM1r, MRM1m, "vprol", X86vrotli,
5891                                 SchedWriteVecShiftImm>, AVX512BIi8Base, EVEX_4V;
5892
5893defm VPSLL : avx512_shift_types<0xF2, 0xF3, 0xF1, "vpsll", X86vshl,
5894                                SchedWriteVecShift>;
5895defm VPSRA : avx512_shift_types<0xE2, 0xE2, 0xE1, "vpsra", X86vsra,
5896                                SchedWriteVecShift, 1>;
5897defm VPSRL : avx512_shift_types<0xD2, 0xD3, 0xD1, "vpsrl", X86vsrl,
5898                                SchedWriteVecShift>;
5899
5900// Use 512bit VPSRA/VPSRAI version to implement v2i64/v4i64 in case NoVLX.
5901let Predicates = [HasAVX512, NoVLX] in {
5902  def : Pat<(v4i64 (X86vsra (v4i64 VR256X:$src1), (v2i64 VR128X:$src2))),
5903            (EXTRACT_SUBREG (v8i64
5904              (VPSRAQZrr
5905                (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src1, sub_ymm)),
5906                 VR128X:$src2)), sub_ymm)>;
5907
5908  def : Pat<(v2i64 (X86vsra (v2i64 VR128X:$src1), (v2i64 VR128X:$src2))),
5909            (EXTRACT_SUBREG (v8i64
5910              (VPSRAQZrr
5911                (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR128X:$src1, sub_xmm)),
5912                 VR128X:$src2)), sub_xmm)>;
5913
5914  def : Pat<(v4i64 (X86vsrai (v4i64 VR256X:$src1), (i8 timm:$src2))),
5915            (EXTRACT_SUBREG (v8i64
5916              (VPSRAQZri
5917                (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src1, sub_ymm)),
5918                 timm:$src2)), sub_ymm)>;
5919
5920  def : Pat<(v2i64 (X86vsrai (v2i64 VR128X:$src1), (i8 timm:$src2))),
5921            (EXTRACT_SUBREG (v8i64
5922              (VPSRAQZri
5923                (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR128X:$src1, sub_xmm)),
5924                 timm:$src2)), sub_xmm)>;
5925}
5926
5927//===-------------------------------------------------------------------===//
5928// Variable Bit Shifts
5929//===-------------------------------------------------------------------===//
5930
5931multiclass avx512_var_shift<bits<8> opc, string OpcodeStr, SDNode OpNode,
5932                            X86FoldableSchedWrite sched, X86VectorVTInfo _> {
5933  let ExeDomain = _.ExeDomain in {
5934  defm rr : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
5935                   (ins _.RC:$src1, _.RC:$src2), OpcodeStr,
5936                      "$src2, $src1", "$src1, $src2",
5937                   (_.VT (OpNode _.RC:$src1, (_.VT _.RC:$src2)))>,
5938                   AVX5128IBase, EVEX_4V, Sched<[sched]>;
5939  defm rm : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
5940                   (ins _.RC:$src1, _.MemOp:$src2), OpcodeStr,
5941                       "$src2, $src1", "$src1, $src2",
5942                   (_.VT (OpNode _.RC:$src1,
5943                   (_.VT (_.LdFrag addr:$src2))))>,
5944                   AVX5128IBase, EVEX_4V, EVEX_CD8<_.EltSize, CD8VF>,
5945                   Sched<[sched.Folded, sched.ReadAfterFold]>;
5946  }
5947}
5948
5949multiclass avx512_var_shift_mb<bits<8> opc, string OpcodeStr, SDNode OpNode,
5950                               X86FoldableSchedWrite sched, X86VectorVTInfo _> {
5951  let ExeDomain = _.ExeDomain in
5952  defm rmb : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
5953                    (ins _.RC:$src1, _.ScalarMemOp:$src2), OpcodeStr,
5954                    "${src2}"##_.BroadcastStr##", $src1",
5955                    "$src1, ${src2}"##_.BroadcastStr,
5956                    (_.VT (OpNode _.RC:$src1, (_.VT (_.BroadcastLdFrag addr:$src2))))>,
5957                    AVX5128IBase, EVEX_B, EVEX_4V, EVEX_CD8<_.EltSize, CD8VF>,
5958                    Sched<[sched.Folded, sched.ReadAfterFold]>;
5959}
5960
5961multiclass avx512_var_shift_sizes<bits<8> opc, string OpcodeStr, SDNode OpNode,
5962                                  X86SchedWriteWidths sched, AVX512VLVectorVTInfo _> {
5963  let Predicates  = [HasAVX512] in
5964  defm Z : avx512_var_shift<opc, OpcodeStr, OpNode, sched.ZMM, _.info512>,
5965           avx512_var_shift_mb<opc, OpcodeStr, OpNode, sched.ZMM, _.info512>, EVEX_V512;
5966
5967  let Predicates = [HasAVX512, HasVLX] in {
5968  defm Z256 : avx512_var_shift<opc, OpcodeStr, OpNode, sched.YMM, _.info256>,
5969              avx512_var_shift_mb<opc, OpcodeStr, OpNode, sched.YMM, _.info256>, EVEX_V256;
5970  defm Z128 : avx512_var_shift<opc, OpcodeStr, OpNode, sched.XMM, _.info128>,
5971              avx512_var_shift_mb<opc, OpcodeStr, OpNode, sched.XMM, _.info128>, EVEX_V128;
5972  }
5973}
5974
5975multiclass avx512_var_shift_types<bits<8> opc, string OpcodeStr,
5976                                  SDNode OpNode, X86SchedWriteWidths sched> {
5977  defm D : avx512_var_shift_sizes<opc, OpcodeStr#"d", OpNode, sched,
5978                                 avx512vl_i32_info>;
5979  defm Q : avx512_var_shift_sizes<opc, OpcodeStr#"q", OpNode, sched,
5980                                 avx512vl_i64_info>, VEX_W;
5981}
5982
5983// Use 512bit version to implement 128/256 bit in case NoVLX.
5984multiclass avx512_var_shift_lowering<AVX512VLVectorVTInfo _, string OpcodeStr,
5985                                     SDNode OpNode, list<Predicate> p> {
5986  let Predicates = p in {
5987  def : Pat<(_.info256.VT (OpNode (_.info256.VT _.info256.RC:$src1),
5988                                  (_.info256.VT _.info256.RC:$src2))),
5989            (EXTRACT_SUBREG
5990                (!cast<Instruction>(OpcodeStr#"Zrr")
5991                    (INSERT_SUBREG (_.info512.VT (IMPLICIT_DEF)), VR256X:$src1, sub_ymm),
5992                    (INSERT_SUBREG (_.info512.VT (IMPLICIT_DEF)), VR256X:$src2, sub_ymm)),
5993             sub_ymm)>;
5994
5995  def : Pat<(_.info128.VT (OpNode (_.info128.VT _.info128.RC:$src1),
5996                                  (_.info128.VT _.info128.RC:$src2))),
5997            (EXTRACT_SUBREG
5998                (!cast<Instruction>(OpcodeStr#"Zrr")
5999                    (INSERT_SUBREG (_.info512.VT (IMPLICIT_DEF)), VR128X:$src1, sub_xmm),
6000                    (INSERT_SUBREG (_.info512.VT (IMPLICIT_DEF)), VR128X:$src2, sub_xmm)),
6001             sub_xmm)>;
6002  }
6003}
6004multiclass avx512_var_shift_w<bits<8> opc, string OpcodeStr,
6005                              SDNode OpNode, X86SchedWriteWidths sched> {
6006  let Predicates = [HasBWI] in
6007  defm WZ:    avx512_var_shift<opc, OpcodeStr, OpNode, sched.ZMM, v32i16_info>,
6008              EVEX_V512, VEX_W;
6009  let Predicates = [HasVLX, HasBWI] in {
6010
6011  defm WZ256: avx512_var_shift<opc, OpcodeStr, OpNode, sched.YMM, v16i16x_info>,
6012              EVEX_V256, VEX_W;
6013  defm WZ128: avx512_var_shift<opc, OpcodeStr, OpNode, sched.XMM, v8i16x_info>,
6014              EVEX_V128, VEX_W;
6015  }
6016}
6017
6018defm VPSLLV : avx512_var_shift_types<0x47, "vpsllv", X86vshlv, SchedWriteVarVecShift>,
6019              avx512_var_shift_w<0x12, "vpsllvw", X86vshlv, SchedWriteVarVecShift>;
6020
6021defm VPSRAV : avx512_var_shift_types<0x46, "vpsrav", X86vsrav, SchedWriteVarVecShift>,
6022              avx512_var_shift_w<0x11, "vpsravw", X86vsrav, SchedWriteVarVecShift>;
6023
6024defm VPSRLV : avx512_var_shift_types<0x45, "vpsrlv", X86vsrlv, SchedWriteVarVecShift>,
6025              avx512_var_shift_w<0x10, "vpsrlvw", X86vsrlv, SchedWriteVarVecShift>;
6026
6027defm VPRORV : avx512_var_shift_types<0x14, "vprorv", rotr, SchedWriteVarVecShift>;
6028defm VPROLV : avx512_var_shift_types<0x15, "vprolv", rotl, SchedWriteVarVecShift>;
6029
6030defm : avx512_var_shift_lowering<avx512vl_i64_info, "VPSRAVQ", X86vsrav, [HasAVX512, NoVLX]>;
6031defm : avx512_var_shift_lowering<avx512vl_i16_info, "VPSLLVW", X86vshlv, [HasBWI, NoVLX]>;
6032defm : avx512_var_shift_lowering<avx512vl_i16_info, "VPSRAVW", X86vsrav, [HasBWI, NoVLX]>;
6033defm : avx512_var_shift_lowering<avx512vl_i16_info, "VPSRLVW", X86vsrlv, [HasBWI, NoVLX]>;
6034
6035
6036// Use 512bit VPROL/VPROLI version to implement v2i64/v4i64 + v4i32/v8i32 in case NoVLX.
6037let Predicates = [HasAVX512, NoVLX] in {
6038  def : Pat<(v2i64 (rotl (v2i64 VR128X:$src1), (v2i64 VR128X:$src2))),
6039            (EXTRACT_SUBREG (v8i64
6040              (VPROLVQZrr
6041                (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR128X:$src1, sub_xmm)),
6042                (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR128X:$src2, sub_xmm)))),
6043                       sub_xmm)>;
6044  def : Pat<(v4i64 (rotl (v4i64 VR256X:$src1), (v4i64 VR256X:$src2))),
6045            (EXTRACT_SUBREG (v8i64
6046              (VPROLVQZrr
6047                (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src1, sub_ymm)),
6048                (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src2, sub_ymm)))),
6049                       sub_ymm)>;
6050
6051  def : Pat<(v4i32 (rotl (v4i32 VR128X:$src1), (v4i32 VR128X:$src2))),
6052            (EXTRACT_SUBREG (v16i32
6053              (VPROLVDZrr
6054                (v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR128X:$src1, sub_xmm)),
6055                (v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR128X:$src2, sub_xmm)))),
6056                        sub_xmm)>;
6057  def : Pat<(v8i32 (rotl (v8i32 VR256X:$src1), (v8i32 VR256X:$src2))),
6058            (EXTRACT_SUBREG (v16i32
6059              (VPROLVDZrr
6060                (v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src1, sub_ymm)),
6061                (v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src2, sub_ymm)))),
6062                        sub_ymm)>;
6063
6064  def : Pat<(v2i64 (X86vrotli (v2i64 VR128X:$src1), (i8 timm:$src2))),
6065            (EXTRACT_SUBREG (v8i64
6066              (VPROLQZri
6067                (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR128X:$src1, sub_xmm)),
6068                        timm:$src2)), sub_xmm)>;
6069  def : Pat<(v4i64 (X86vrotli (v4i64 VR256X:$src1), (i8 timm:$src2))),
6070            (EXTRACT_SUBREG (v8i64
6071              (VPROLQZri
6072                (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src1, sub_ymm)),
6073                       timm:$src2)), sub_ymm)>;
6074
6075  def : Pat<(v4i32 (X86vrotli (v4i32 VR128X:$src1), (i8 timm:$src2))),
6076            (EXTRACT_SUBREG (v16i32
6077              (VPROLDZri
6078                (v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR128X:$src1, sub_xmm)),
6079                        timm:$src2)), sub_xmm)>;
6080  def : Pat<(v8i32 (X86vrotli (v8i32 VR256X:$src1), (i8 timm:$src2))),
6081            (EXTRACT_SUBREG (v16i32
6082              (VPROLDZri
6083                (v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src1, sub_ymm)),
6084                        timm:$src2)), sub_ymm)>;
6085}
6086
6087// Use 512bit VPROR/VPRORI version to implement v2i64/v4i64 + v4i32/v8i32 in case NoVLX.
6088let Predicates = [HasAVX512, NoVLX] in {
6089  def : Pat<(v2i64 (rotr (v2i64 VR128X:$src1), (v2i64 VR128X:$src2))),
6090            (EXTRACT_SUBREG (v8i64
6091              (VPRORVQZrr
6092                (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR128X:$src1, sub_xmm)),
6093                (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR128X:$src2, sub_xmm)))),
6094                       sub_xmm)>;
6095  def : Pat<(v4i64 (rotr (v4i64 VR256X:$src1), (v4i64 VR256X:$src2))),
6096            (EXTRACT_SUBREG (v8i64
6097              (VPRORVQZrr
6098                (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src1, sub_ymm)),
6099                (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src2, sub_ymm)))),
6100                       sub_ymm)>;
6101
6102  def : Pat<(v4i32 (rotr (v4i32 VR128X:$src1), (v4i32 VR128X:$src2))),
6103            (EXTRACT_SUBREG (v16i32
6104              (VPRORVDZrr
6105                (v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR128X:$src1, sub_xmm)),
6106                (v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR128X:$src2, sub_xmm)))),
6107                        sub_xmm)>;
6108  def : Pat<(v8i32 (rotr (v8i32 VR256X:$src1), (v8i32 VR256X:$src2))),
6109            (EXTRACT_SUBREG (v16i32
6110              (VPRORVDZrr
6111                (v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src1, sub_ymm)),
6112                (v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src2, sub_ymm)))),
6113                        sub_ymm)>;
6114
6115  def : Pat<(v2i64 (X86vrotri (v2i64 VR128X:$src1), (i8 timm:$src2))),
6116            (EXTRACT_SUBREG (v8i64
6117              (VPRORQZri
6118                (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR128X:$src1, sub_xmm)),
6119                        timm:$src2)), sub_xmm)>;
6120  def : Pat<(v4i64 (X86vrotri (v4i64 VR256X:$src1), (i8 timm:$src2))),
6121            (EXTRACT_SUBREG (v8i64
6122              (VPRORQZri
6123                (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src1, sub_ymm)),
6124                       timm:$src2)), sub_ymm)>;
6125
6126  def : Pat<(v4i32 (X86vrotri (v4i32 VR128X:$src1), (i8 timm:$src2))),
6127            (EXTRACT_SUBREG (v16i32
6128              (VPRORDZri
6129                (v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR128X:$src1, sub_xmm)),
6130                        timm:$src2)), sub_xmm)>;
6131  def : Pat<(v8i32 (X86vrotri (v8i32 VR256X:$src1), (i8 timm:$src2))),
6132            (EXTRACT_SUBREG (v16i32
6133              (VPRORDZri
6134                (v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src1, sub_ymm)),
6135                        timm:$src2)), sub_ymm)>;
6136}
6137
6138//===-------------------------------------------------------------------===//
6139// 1-src variable permutation VPERMW/D/Q
6140//===-------------------------------------------------------------------===//
6141
6142multiclass avx512_vperm_dq_sizes<bits<8> opc, string OpcodeStr, SDNode OpNode,
6143                                 X86FoldableSchedWrite sched, AVX512VLVectorVTInfo _> {
6144  let Predicates  = [HasAVX512] in
6145  defm Z : avx512_var_shift<opc, OpcodeStr, OpNode, sched, _.info512>,
6146           avx512_var_shift_mb<opc, OpcodeStr, OpNode, sched, _.info512>, EVEX_V512;
6147
6148  let Predicates = [HasAVX512, HasVLX] in
6149  defm Z256 : avx512_var_shift<opc, OpcodeStr, OpNode, sched, _.info256>,
6150              avx512_var_shift_mb<opc, OpcodeStr, OpNode, sched, _.info256>, EVEX_V256;
6151}
6152
6153multiclass avx512_vpermi_dq_sizes<bits<8> opc, Format ImmFormR, Format ImmFormM,
6154                                 string OpcodeStr, SDNode OpNode,
6155                                 X86FoldableSchedWrite sched, AVX512VLVectorVTInfo VTInfo> {
6156  let Predicates = [HasAVX512] in
6157  defm Z:    avx512_shift_rmi<opc, ImmFormR, ImmFormM, OpcodeStr, OpNode,
6158                              sched, VTInfo.info512>,
6159             avx512_shift_rmbi<opc, ImmFormM, OpcodeStr, OpNode,
6160                               sched, VTInfo.info512>, EVEX_V512;
6161  let Predicates = [HasAVX512, HasVLX] in
6162  defm Z256: avx512_shift_rmi<opc, ImmFormR, ImmFormM, OpcodeStr, OpNode,
6163                              sched, VTInfo.info256>,
6164             avx512_shift_rmbi<opc, ImmFormM, OpcodeStr, OpNode,
6165                               sched, VTInfo.info256>, EVEX_V256;
6166}
6167
6168multiclass avx512_vperm_bw<bits<8> opc, string OpcodeStr,
6169                              Predicate prd, SDNode OpNode,
6170                              X86FoldableSchedWrite sched, AVX512VLVectorVTInfo _> {
6171  let Predicates = [prd] in
6172  defm Z:    avx512_var_shift<opc, OpcodeStr, OpNode, sched, _.info512>,
6173              EVEX_V512 ;
6174  let Predicates = [HasVLX, prd] in {
6175  defm Z256: avx512_var_shift<opc, OpcodeStr, OpNode, sched, _.info256>,
6176              EVEX_V256 ;
6177  defm Z128: avx512_var_shift<opc, OpcodeStr, OpNode, sched, _.info128>,
6178              EVEX_V128 ;
6179  }
6180}
6181
6182defm VPERMW  : avx512_vperm_bw<0x8D, "vpermw", HasBWI, X86VPermv,
6183                               WriteVarShuffle256, avx512vl_i16_info>, VEX_W;
6184defm VPERMB  : avx512_vperm_bw<0x8D, "vpermb", HasVBMI, X86VPermv,
6185                               WriteVarShuffle256, avx512vl_i8_info>;
6186
6187defm VPERMD : avx512_vperm_dq_sizes<0x36, "vpermd", X86VPermv,
6188                                    WriteVarShuffle256, avx512vl_i32_info>;
6189defm VPERMQ : avx512_vperm_dq_sizes<0x36, "vpermq", X86VPermv,
6190                                    WriteVarShuffle256, avx512vl_i64_info>, VEX_W;
6191defm VPERMPS : avx512_vperm_dq_sizes<0x16, "vpermps", X86VPermv,
6192                                     WriteFVarShuffle256, avx512vl_f32_info>;
6193defm VPERMPD : avx512_vperm_dq_sizes<0x16, "vpermpd", X86VPermv,
6194                                     WriteFVarShuffle256, avx512vl_f64_info>, VEX_W;
6195
6196defm VPERMQ : avx512_vpermi_dq_sizes<0x00, MRMSrcReg, MRMSrcMem, "vpermq",
6197                             X86VPermi, WriteShuffle256, avx512vl_i64_info>,
6198                             EVEX, AVX512AIi8Base, EVEX_CD8<64, CD8VF>, VEX_W;
6199defm VPERMPD : avx512_vpermi_dq_sizes<0x01, MRMSrcReg, MRMSrcMem, "vpermpd",
6200                             X86VPermi, WriteFShuffle256, avx512vl_f64_info>,
6201                             EVEX, AVX512AIi8Base, EVEX_CD8<64, CD8VF>, VEX_W;
6202
6203//===----------------------------------------------------------------------===//
6204// AVX-512 - VPERMIL
6205//===----------------------------------------------------------------------===//
6206
6207multiclass avx512_permil_vec<bits<8> OpcVar, string OpcodeStr, SDNode OpNode,
6208                             X86FoldableSchedWrite sched, X86VectorVTInfo _,
6209                             X86VectorVTInfo Ctrl> {
6210  defm rr: AVX512_maskable<OpcVar, MRMSrcReg, _, (outs _.RC:$dst),
6211                  (ins _.RC:$src1, Ctrl.RC:$src2), OpcodeStr,
6212                  "$src2, $src1", "$src1, $src2",
6213                  (_.VT (OpNode _.RC:$src1,
6214                               (Ctrl.VT Ctrl.RC:$src2)))>,
6215                  T8PD, EVEX_4V, Sched<[sched]>;
6216  defm rm: AVX512_maskable<OpcVar, MRMSrcMem, _, (outs _.RC:$dst),
6217                  (ins _.RC:$src1, Ctrl.MemOp:$src2), OpcodeStr,
6218                  "$src2, $src1", "$src1, $src2",
6219                  (_.VT (OpNode
6220                           _.RC:$src1,
6221                           (Ctrl.VT (Ctrl.LdFrag addr:$src2))))>,
6222                  T8PD, EVEX_4V, EVEX_CD8<_.EltSize, CD8VF>,
6223                  Sched<[sched.Folded, sched.ReadAfterFold]>;
6224  defm rmb: AVX512_maskable<OpcVar, MRMSrcMem, _, (outs _.RC:$dst),
6225                   (ins _.RC:$src1, _.ScalarMemOp:$src2), OpcodeStr,
6226                   "${src2}"##_.BroadcastStr##", $src1",
6227                   "$src1, ${src2}"##_.BroadcastStr,
6228                   (_.VT (OpNode
6229                            _.RC:$src1,
6230                            (Ctrl.VT (Ctrl.BroadcastLdFrag addr:$src2))))>,
6231                   T8PD, EVEX_4V, EVEX_B, EVEX_CD8<_.EltSize, CD8VF>,
6232                   Sched<[sched.Folded, sched.ReadAfterFold]>;
6233}
6234
6235multiclass avx512_permil_vec_common<string OpcodeStr, bits<8> OpcVar,
6236                                    X86SchedWriteWidths sched,
6237                                    AVX512VLVectorVTInfo _,
6238                                    AVX512VLVectorVTInfo Ctrl> {
6239  let Predicates = [HasAVX512] in {
6240    defm Z    : avx512_permil_vec<OpcVar, OpcodeStr, X86VPermilpv, sched.ZMM,
6241                                  _.info512, Ctrl.info512>, EVEX_V512;
6242  }
6243  let Predicates = [HasAVX512, HasVLX] in {
6244    defm Z128 : avx512_permil_vec<OpcVar, OpcodeStr, X86VPermilpv, sched.XMM,
6245                                  _.info128, Ctrl.info128>, EVEX_V128;
6246    defm Z256 : avx512_permil_vec<OpcVar, OpcodeStr, X86VPermilpv, sched.YMM,
6247                                  _.info256, Ctrl.info256>, EVEX_V256;
6248  }
6249}
6250
6251multiclass avx512_permil<string OpcodeStr, bits<8> OpcImm, bits<8> OpcVar,
6252                         AVX512VLVectorVTInfo _, AVX512VLVectorVTInfo Ctrl>{
6253  defm NAME: avx512_permil_vec_common<OpcodeStr, OpcVar, SchedWriteFVarShuffle,
6254                                      _, Ctrl>;
6255  defm NAME: avx512_shift_rmi_sizes<OpcImm, MRMSrcReg, MRMSrcMem, OpcodeStr,
6256                                    X86VPermilpi, SchedWriteFShuffle, _>,
6257                    EVEX, AVX512AIi8Base, EVEX_CD8<_.info128.EltSize, CD8VF>;
6258}
6259
6260let ExeDomain = SSEPackedSingle in
6261defm VPERMILPS : avx512_permil<"vpermilps", 0x04, 0x0C, avx512vl_f32_info,
6262                               avx512vl_i32_info>;
6263let ExeDomain = SSEPackedDouble in
6264defm VPERMILPD : avx512_permil<"vpermilpd", 0x05, 0x0D, avx512vl_f64_info,
6265                               avx512vl_i64_info>, VEX_W1X;
6266
6267//===----------------------------------------------------------------------===//
6268// AVX-512 - VPSHUFD, VPSHUFLW, VPSHUFHW
6269//===----------------------------------------------------------------------===//
6270
6271defm VPSHUFD : avx512_shift_rmi_sizes<0x70, MRMSrcReg, MRMSrcMem, "vpshufd",
6272                             X86PShufd, SchedWriteShuffle, avx512vl_i32_info>,
6273                             EVEX, AVX512BIi8Base, EVEX_CD8<32, CD8VF>;
6274defm VPSHUFH : avx512_shift_rmi_w<0x70, MRMSrcReg, MRMSrcMem, "vpshufhw",
6275                                  X86PShufhw, SchedWriteShuffle>,
6276                                  EVEX, AVX512XSIi8Base;
6277defm VPSHUFL : avx512_shift_rmi_w<0x70, MRMSrcReg, MRMSrcMem, "vpshuflw",
6278                                  X86PShuflw, SchedWriteShuffle>,
6279                                  EVEX, AVX512XDIi8Base;
6280
6281//===----------------------------------------------------------------------===//
6282// AVX-512 - VPSHUFB
6283//===----------------------------------------------------------------------===//
6284
6285multiclass avx512_pshufb_sizes<bits<8> opc, string OpcodeStr, SDNode OpNode,
6286                               X86SchedWriteWidths sched> {
6287  let Predicates = [HasBWI] in
6288  defm Z:    avx512_var_shift<opc, OpcodeStr, OpNode, sched.ZMM, v64i8_info>,
6289                              EVEX_V512;
6290
6291  let Predicates = [HasVLX, HasBWI] in {
6292  defm Z256: avx512_var_shift<opc, OpcodeStr, OpNode, sched.YMM, v32i8x_info>,
6293                              EVEX_V256;
6294  defm Z128: avx512_var_shift<opc, OpcodeStr, OpNode, sched.XMM, v16i8x_info>,
6295                              EVEX_V128;
6296  }
6297}
6298
6299defm VPSHUFB: avx512_pshufb_sizes<0x00, "vpshufb", X86pshufb,
6300                                  SchedWriteVarShuffle>, VEX_WIG;
6301
6302//===----------------------------------------------------------------------===//
6303// Move Low to High and High to Low packed FP Instructions
6304//===----------------------------------------------------------------------===//
6305
6306def VMOVLHPSZrr : AVX512PSI<0x16, MRMSrcReg, (outs VR128X:$dst),
6307          (ins VR128X:$src1, VR128X:$src2),
6308          "vmovlhps\t{$src2, $src1, $dst|$dst, $src1, $src2}",
6309          [(set VR128X:$dst, (v4f32 (X86Movlhps VR128X:$src1, VR128X:$src2)))]>,
6310          Sched<[SchedWriteFShuffle.XMM]>, EVEX_4V;
6311let isCommutable = 1 in
6312def VMOVHLPSZrr : AVX512PSI<0x12, MRMSrcReg, (outs VR128X:$dst),
6313          (ins VR128X:$src1, VR128X:$src2),
6314          "vmovhlps\t{$src2, $src1, $dst|$dst, $src1, $src2}",
6315          [(set VR128X:$dst, (v4f32 (X86Movhlps VR128X:$src1, VR128X:$src2)))]>,
6316          Sched<[SchedWriteFShuffle.XMM]>, EVEX_4V, NotMemoryFoldable;
6317
6318//===----------------------------------------------------------------------===//
6319// VMOVHPS/PD VMOVLPS Instructions
6320// All patterns was taken from SSS implementation.
6321//===----------------------------------------------------------------------===//
6322
6323multiclass avx512_mov_hilo_packed<bits<8> opc, string OpcodeStr,
6324                                  SDPatternOperator OpNode,
6325                                  X86VectorVTInfo _> {
6326  let hasSideEffects = 0, mayLoad = 1, ExeDomain = _.ExeDomain in
6327  def rm : AVX512<opc, MRMSrcMem, (outs _.RC:$dst),
6328                  (ins _.RC:$src1, f64mem:$src2),
6329                  !strconcat(OpcodeStr,
6330                             "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
6331                  [(set _.RC:$dst,
6332                     (OpNode _.RC:$src1,
6333                       (_.VT (bitconvert
6334                         (v2f64 (scalar_to_vector (loadf64 addr:$src2)))))))]>,
6335                  Sched<[SchedWriteFShuffle.XMM.Folded, SchedWriteFShuffle.XMM.ReadAfterFold]>, EVEX_4V;
6336}
6337
6338// No patterns for MOVLPS/MOVHPS as the Movlhps node should only be created in
6339// SSE1. And MOVLPS pattern is even more complex.
6340defm VMOVHPSZ128 : avx512_mov_hilo_packed<0x16, "vmovhps", null_frag,
6341                                  v4f32x_info>, EVEX_CD8<32, CD8VT2>, PS;
6342defm VMOVHPDZ128 : avx512_mov_hilo_packed<0x16, "vmovhpd", X86Unpckl,
6343                                  v2f64x_info>, EVEX_CD8<64, CD8VT1>, PD, VEX_W;
6344defm VMOVLPSZ128 : avx512_mov_hilo_packed<0x12, "vmovlps", null_frag,
6345                                  v4f32x_info>, EVEX_CD8<32, CD8VT2>, PS;
6346defm VMOVLPDZ128 : avx512_mov_hilo_packed<0x12, "vmovlpd", X86Movsd,
6347                                  v2f64x_info>, EVEX_CD8<64, CD8VT1>, PD, VEX_W;
6348
6349let Predicates = [HasAVX512] in {
6350  // VMOVHPD patterns
6351  def : Pat<(v2f64 (X86Unpckl VR128X:$src1,
6352                    (bc_v2f64 (v2i64 (scalar_to_vector (loadi64 addr:$src2)))))),
6353           (VMOVHPDZ128rm VR128X:$src1, addr:$src2)>;
6354  def : Pat<(v2f64 (X86Unpckl VR128X:$src1, (X86vzload64 addr:$src2))),
6355            (VMOVHPDZ128rm VR128X:$src1, addr:$src2)>;
6356
6357  // VMOVLPD patterns
6358  def : Pat<(v2f64 (X86Movsd VR128X:$src1, (X86vzload64 addr:$src2))),
6359            (VMOVLPDZ128rm VR128X:$src1, addr:$src2)>;
6360}
6361
6362let SchedRW = [WriteFStore] in {
6363let mayStore = 1, hasSideEffects = 0 in
6364def VMOVHPSZ128mr : AVX512PSI<0x17, MRMDestMem, (outs),
6365                       (ins f64mem:$dst, VR128X:$src),
6366                       "vmovhps\t{$src, $dst|$dst, $src}",
6367                       []>, EVEX, EVEX_CD8<32, CD8VT2>;
6368def VMOVHPDZ128mr : AVX512PDI<0x17, MRMDestMem, (outs),
6369                       (ins f64mem:$dst, VR128X:$src),
6370                       "vmovhpd\t{$src, $dst|$dst, $src}",
6371                       [(store (f64 (extractelt
6372                                     (v2f64 (X86Unpckh VR128X:$src, VR128X:$src)),
6373                                     (iPTR 0))), addr:$dst)]>,
6374                       EVEX, EVEX_CD8<64, CD8VT1>, VEX_W;
6375let mayStore = 1, hasSideEffects = 0 in
6376def VMOVLPSZ128mr : AVX512PSI<0x13, MRMDestMem, (outs),
6377                       (ins f64mem:$dst, VR128X:$src),
6378                       "vmovlps\t{$src, $dst|$dst, $src}",
6379                       []>, EVEX, EVEX_CD8<32, CD8VT2>;
6380def VMOVLPDZ128mr : AVX512PDI<0x13, MRMDestMem, (outs),
6381                       (ins f64mem:$dst, VR128X:$src),
6382                       "vmovlpd\t{$src, $dst|$dst, $src}",
6383                       [(store (f64 (extractelt (v2f64 VR128X:$src),
6384                                     (iPTR 0))), addr:$dst)]>,
6385                       EVEX, EVEX_CD8<64, CD8VT1>, VEX_W;
6386} // SchedRW
6387
6388let Predicates = [HasAVX512] in {
6389  // VMOVHPD patterns
6390  def : Pat<(store (f64 (extractelt
6391                           (v2f64 (X86VPermilpi VR128X:$src, (i8 1))),
6392                           (iPTR 0))), addr:$dst),
6393           (VMOVHPDZ128mr addr:$dst, VR128X:$src)>;
6394}
6395//===----------------------------------------------------------------------===//
6396// FMA - Fused Multiply Operations
6397//
6398
6399multiclass avx512_fma3p_213_rm<bits<8> opc, string OpcodeStr, SDNode OpNode,
6400                               X86FoldableSchedWrite sched,
6401                               X86VectorVTInfo _, string Suff> {
6402  let Constraints = "$src1 = $dst", ExeDomain = _.ExeDomain, hasSideEffects = 0 in {
6403  defm r: AVX512_maskable_3src<opc, MRMSrcReg, _, (outs _.RC:$dst),
6404          (ins _.RC:$src2, _.RC:$src3),
6405          OpcodeStr, "$src3, $src2", "$src2, $src3",
6406          (_.VT (OpNode _.RC:$src2, _.RC:$src1, _.RC:$src3)), 1, 1>,
6407          AVX512FMA3Base, Sched<[sched]>;
6408
6409  defm m: AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst),
6410          (ins _.RC:$src2, _.MemOp:$src3),
6411          OpcodeStr, "$src3, $src2", "$src2, $src3",
6412          (_.VT (OpNode _.RC:$src2, _.RC:$src1, (_.LdFrag addr:$src3))), 1, 0>,
6413          AVX512FMA3Base, Sched<[sched.Folded, sched.ReadAfterFold]>;
6414
6415  defm mb: AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst),
6416            (ins _.RC:$src2, _.ScalarMemOp:$src3),
6417            OpcodeStr,   !strconcat("${src3}", _.BroadcastStr,", $src2"),
6418            !strconcat("$src2, ${src3}", _.BroadcastStr ),
6419            (OpNode _.RC:$src2,
6420             _.RC:$src1,(_.VT (_.BroadcastLdFrag addr:$src3))), 1, 0>,
6421             AVX512FMA3Base, EVEX_B, Sched<[sched.Folded, sched.ReadAfterFold]>;
6422  }
6423}
6424
6425multiclass avx512_fma3_213_round<bits<8> opc, string OpcodeStr, SDNode OpNode,
6426                                 X86FoldableSchedWrite sched,
6427                                 X86VectorVTInfo _, string Suff> {
6428  let Constraints = "$src1 = $dst", ExeDomain = _.ExeDomain, hasSideEffects = 0 in
6429  defm rb: AVX512_maskable_3src<opc, MRMSrcReg, _, (outs _.RC:$dst),
6430          (ins _.RC:$src2, _.RC:$src3, AVX512RC:$rc),
6431          OpcodeStr, "$rc, $src3, $src2", "$src2, $src3, $rc",
6432          (_.VT ( OpNode _.RC:$src2, _.RC:$src1, _.RC:$src3, (i32 timm:$rc))), 1, 1>,
6433          AVX512FMA3Base, EVEX_B, EVEX_RC, Sched<[sched]>;
6434}
6435
6436multiclass avx512_fma3p_213_common<bits<8> opc, string OpcodeStr, SDNode OpNode,
6437                                   SDNode OpNodeRnd, X86SchedWriteWidths sched,
6438                                   AVX512VLVectorVTInfo _, string Suff> {
6439  let Predicates = [HasAVX512] in {
6440    defm Z      : avx512_fma3p_213_rm<opc, OpcodeStr, OpNode, sched.ZMM,
6441                                      _.info512, Suff>,
6442                  avx512_fma3_213_round<opc, OpcodeStr, OpNodeRnd, sched.ZMM,
6443                                        _.info512, Suff>,
6444                              EVEX_V512, EVEX_CD8<_.info512.EltSize, CD8VF>;
6445  }
6446  let Predicates = [HasVLX, HasAVX512] in {
6447    defm Z256 : avx512_fma3p_213_rm<opc, OpcodeStr, OpNode, sched.YMM,
6448                                    _.info256, Suff>,
6449                      EVEX_V256, EVEX_CD8<_.info256.EltSize, CD8VF>;
6450    defm Z128 : avx512_fma3p_213_rm<opc, OpcodeStr, OpNode, sched.XMM,
6451                                    _.info128, Suff>,
6452                      EVEX_V128, EVEX_CD8<_.info128.EltSize, CD8VF>;
6453  }
6454}
6455
6456multiclass avx512_fma3p_213_f<bits<8> opc, string OpcodeStr, SDNode OpNode,
6457                              SDNode OpNodeRnd> {
6458    defm PS : avx512_fma3p_213_common<opc, OpcodeStr#"ps", OpNode, OpNodeRnd,
6459                                      SchedWriteFMA, avx512vl_f32_info, "PS">;
6460    defm PD : avx512_fma3p_213_common<opc, OpcodeStr#"pd", OpNode, OpNodeRnd,
6461                                      SchedWriteFMA, avx512vl_f64_info, "PD">,
6462                                      VEX_W;
6463}
6464
6465defm VFMADD213    : avx512_fma3p_213_f<0xA8, "vfmadd213", X86Fmadd, X86FmaddRnd>;
6466defm VFMSUB213    : avx512_fma3p_213_f<0xAA, "vfmsub213", X86Fmsub, X86FmsubRnd>;
6467defm VFMADDSUB213 : avx512_fma3p_213_f<0xA6, "vfmaddsub213", X86Fmaddsub, X86FmaddsubRnd>;
6468defm VFMSUBADD213 : avx512_fma3p_213_f<0xA7, "vfmsubadd213", X86Fmsubadd, X86FmsubaddRnd>;
6469defm VFNMADD213   : avx512_fma3p_213_f<0xAC, "vfnmadd213", X86Fnmadd, X86FnmaddRnd>;
6470defm VFNMSUB213   : avx512_fma3p_213_f<0xAE, "vfnmsub213", X86Fnmsub, X86FnmsubRnd>;
6471
6472
6473multiclass avx512_fma3p_231_rm<bits<8> opc, string OpcodeStr, SDNode OpNode,
6474                               X86FoldableSchedWrite sched,
6475                               X86VectorVTInfo _, string Suff> {
6476  let Constraints = "$src1 = $dst", ExeDomain = _.ExeDomain, hasSideEffects = 0 in {
6477  defm r: AVX512_maskable_3src<opc, MRMSrcReg, _, (outs _.RC:$dst),
6478          (ins _.RC:$src2, _.RC:$src3),
6479          OpcodeStr, "$src3, $src2", "$src2, $src3",
6480          (_.VT (OpNode _.RC:$src2, _.RC:$src3, _.RC:$src1)), 1, 1,
6481          vselect, 1>, AVX512FMA3Base, Sched<[sched]>;
6482
6483  defm m: AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst),
6484          (ins _.RC:$src2, _.MemOp:$src3),
6485          OpcodeStr, "$src3, $src2", "$src2, $src3",
6486          (_.VT (OpNode _.RC:$src2, (_.LdFrag addr:$src3), _.RC:$src1)), 1, 0>,
6487          AVX512FMA3Base, Sched<[sched.Folded, sched.ReadAfterFold]>;
6488
6489  defm mb: AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst),
6490         (ins _.RC:$src2, _.ScalarMemOp:$src3),
6491         OpcodeStr, "${src3}"##_.BroadcastStr##", $src2",
6492         "$src2, ${src3}"##_.BroadcastStr,
6493         (_.VT (OpNode _.RC:$src2,
6494                      (_.VT (_.BroadcastLdFrag addr:$src3)),
6495                      _.RC:$src1)), 1, 0>, AVX512FMA3Base, EVEX_B,
6496         Sched<[sched.Folded, sched.ReadAfterFold]>;
6497  }
6498}
6499
6500multiclass avx512_fma3_231_round<bits<8> opc, string OpcodeStr, SDNode OpNode,
6501                                 X86FoldableSchedWrite sched,
6502                                 X86VectorVTInfo _, string Suff> {
6503  let Constraints = "$src1 = $dst", ExeDomain = _.ExeDomain, hasSideEffects = 0 in
6504  defm rb: AVX512_maskable_3src<opc, MRMSrcReg, _, (outs _.RC:$dst),
6505          (ins _.RC:$src2, _.RC:$src3, AVX512RC:$rc),
6506          OpcodeStr, "$rc, $src3, $src2", "$src2, $src3, $rc",
6507          (_.VT ( OpNode _.RC:$src2, _.RC:$src3, _.RC:$src1, (i32 timm:$rc))),
6508          1, 1, vselect, 1>,
6509          AVX512FMA3Base, EVEX_B, EVEX_RC, Sched<[sched]>;
6510}
6511
6512multiclass avx512_fma3p_231_common<bits<8> opc, string OpcodeStr, SDNode OpNode,
6513                                   SDNode OpNodeRnd, X86SchedWriteWidths sched,
6514                                   AVX512VLVectorVTInfo _, string Suff> {
6515  let Predicates = [HasAVX512] in {
6516    defm Z      : avx512_fma3p_231_rm<opc, OpcodeStr, OpNode, sched.ZMM,
6517                                      _.info512, Suff>,
6518                  avx512_fma3_231_round<opc, OpcodeStr, OpNodeRnd, sched.ZMM,
6519                                        _.info512, Suff>,
6520                              EVEX_V512, EVEX_CD8<_.info512.EltSize, CD8VF>;
6521  }
6522  let Predicates = [HasVLX, HasAVX512] in {
6523    defm Z256 : avx512_fma3p_231_rm<opc, OpcodeStr, OpNode, sched.YMM,
6524                                    _.info256, Suff>,
6525                      EVEX_V256, EVEX_CD8<_.info256.EltSize, CD8VF>;
6526    defm Z128 : avx512_fma3p_231_rm<opc, OpcodeStr, OpNode, sched.XMM,
6527                                    _.info128, Suff>,
6528                      EVEX_V128, EVEX_CD8<_.info128.EltSize, CD8VF>;
6529  }
6530}
6531
6532multiclass avx512_fma3p_231_f<bits<8> opc, string OpcodeStr, SDNode OpNode,
6533                              SDNode OpNodeRnd > {
6534    defm PS : avx512_fma3p_231_common<opc, OpcodeStr#"ps", OpNode, OpNodeRnd,
6535                                      SchedWriteFMA, avx512vl_f32_info, "PS">;
6536    defm PD : avx512_fma3p_231_common<opc, OpcodeStr#"pd", OpNode, OpNodeRnd,
6537                                      SchedWriteFMA, avx512vl_f64_info, "PD">,
6538                                      VEX_W;
6539}
6540
6541defm VFMADD231    : avx512_fma3p_231_f<0xB8, "vfmadd231", X86Fmadd, X86FmaddRnd>;
6542defm VFMSUB231    : avx512_fma3p_231_f<0xBA, "vfmsub231", X86Fmsub, X86FmsubRnd>;
6543defm VFMADDSUB231 : avx512_fma3p_231_f<0xB6, "vfmaddsub231", X86Fmaddsub, X86FmaddsubRnd>;
6544defm VFMSUBADD231 : avx512_fma3p_231_f<0xB7, "vfmsubadd231", X86Fmsubadd, X86FmsubaddRnd>;
6545defm VFNMADD231   : avx512_fma3p_231_f<0xBC, "vfnmadd231", X86Fnmadd, X86FnmaddRnd>;
6546defm VFNMSUB231   : avx512_fma3p_231_f<0xBE, "vfnmsub231", X86Fnmsub, X86FnmsubRnd>;
6547
6548multiclass avx512_fma3p_132_rm<bits<8> opc, string OpcodeStr, SDNode OpNode,
6549                               X86FoldableSchedWrite sched,
6550                               X86VectorVTInfo _, string Suff> {
6551  let Constraints = "$src1 = $dst", ExeDomain = _.ExeDomain, hasSideEffects = 0 in {
6552  defm r: AVX512_maskable_3src<opc, MRMSrcReg, _, (outs _.RC:$dst),
6553          (ins _.RC:$src2, _.RC:$src3),
6554          OpcodeStr, "$src3, $src2", "$src2, $src3",
6555          (_.VT (OpNode _.RC:$src1, _.RC:$src3, _.RC:$src2)), 1, 1, vselect, 1>,
6556          AVX512FMA3Base, Sched<[sched]>;
6557
6558  // Pattern is 312 order so that the load is in a different place from the
6559  // 213 and 231 patterns this helps tablegen's duplicate pattern detection.
6560  defm m: AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst),
6561          (ins _.RC:$src2, _.MemOp:$src3),
6562          OpcodeStr, "$src3, $src2", "$src2, $src3",
6563          (_.VT (OpNode (_.LdFrag addr:$src3), _.RC:$src1, _.RC:$src2)), 1, 0>,
6564          AVX512FMA3Base, Sched<[sched.Folded, sched.ReadAfterFold]>;
6565
6566  // Pattern is 312 order so that the load is in a different place from the
6567  // 213 and 231 patterns this helps tablegen's duplicate pattern detection.
6568  defm mb: AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst),
6569         (ins _.RC:$src2, _.ScalarMemOp:$src3),
6570         OpcodeStr, "${src3}"##_.BroadcastStr##", $src2",
6571         "$src2, ${src3}"##_.BroadcastStr,
6572         (_.VT (OpNode (_.VT (_.BroadcastLdFrag addr:$src3)),
6573                       _.RC:$src1, _.RC:$src2)), 1, 0>,
6574         AVX512FMA3Base, EVEX_B, Sched<[sched.Folded, sched.ReadAfterFold]>;
6575  }
6576}
6577
6578multiclass avx512_fma3_132_round<bits<8> opc, string OpcodeStr, SDNode OpNode,
6579                                 X86FoldableSchedWrite sched,
6580                                 X86VectorVTInfo _, string Suff> {
6581  let Constraints = "$src1 = $dst", ExeDomain = _.ExeDomain, hasSideEffects = 0 in
6582  defm rb: AVX512_maskable_3src<opc, MRMSrcReg, _, (outs _.RC:$dst),
6583          (ins _.RC:$src2, _.RC:$src3, AVX512RC:$rc),
6584          OpcodeStr, "$rc, $src3, $src2", "$src2, $src3, $rc",
6585          (_.VT ( OpNode _.RC:$src1, _.RC:$src3, _.RC:$src2, (i32 timm:$rc))),
6586          1, 1, vselect, 1>,
6587          AVX512FMA3Base, EVEX_B, EVEX_RC, Sched<[sched]>;
6588}
6589
6590multiclass avx512_fma3p_132_common<bits<8> opc, string OpcodeStr, SDNode OpNode,
6591                                   SDNode OpNodeRnd, X86SchedWriteWidths sched,
6592                                   AVX512VLVectorVTInfo _, string Suff> {
6593  let Predicates = [HasAVX512] in {
6594    defm Z      : avx512_fma3p_132_rm<opc, OpcodeStr, OpNode, sched.ZMM,
6595                                      _.info512, Suff>,
6596                  avx512_fma3_132_round<opc, OpcodeStr, OpNodeRnd, sched.ZMM,
6597                                        _.info512, Suff>,
6598                              EVEX_V512, EVEX_CD8<_.info512.EltSize, CD8VF>;
6599  }
6600  let Predicates = [HasVLX, HasAVX512] in {
6601    defm Z256 : avx512_fma3p_132_rm<opc, OpcodeStr, OpNode, sched.YMM,
6602                                    _.info256, Suff>,
6603                      EVEX_V256, EVEX_CD8<_.info256.EltSize, CD8VF>;
6604    defm Z128 : avx512_fma3p_132_rm<opc, OpcodeStr, OpNode, sched.XMM,
6605                                    _.info128, Suff>,
6606                      EVEX_V128, EVEX_CD8<_.info128.EltSize, CD8VF>;
6607  }
6608}
6609
6610multiclass avx512_fma3p_132_f<bits<8> opc, string OpcodeStr, SDNode OpNode,
6611                              SDNode OpNodeRnd > {
6612    defm PS : avx512_fma3p_132_common<opc, OpcodeStr#"ps", OpNode, OpNodeRnd,
6613                                      SchedWriteFMA, avx512vl_f32_info, "PS">;
6614    defm PD : avx512_fma3p_132_common<opc, OpcodeStr#"pd", OpNode, OpNodeRnd,
6615                                      SchedWriteFMA, avx512vl_f64_info, "PD">,
6616                                      VEX_W;
6617}
6618
6619defm VFMADD132    : avx512_fma3p_132_f<0x98, "vfmadd132", X86Fmadd, X86FmaddRnd>;
6620defm VFMSUB132    : avx512_fma3p_132_f<0x9A, "vfmsub132", X86Fmsub, X86FmsubRnd>;
6621defm VFMADDSUB132 : avx512_fma3p_132_f<0x96, "vfmaddsub132", X86Fmaddsub, X86FmaddsubRnd>;
6622defm VFMSUBADD132 : avx512_fma3p_132_f<0x97, "vfmsubadd132", X86Fmsubadd, X86FmsubaddRnd>;
6623defm VFNMADD132   : avx512_fma3p_132_f<0x9C, "vfnmadd132", X86Fnmadd, X86FnmaddRnd>;
6624defm VFNMSUB132   : avx512_fma3p_132_f<0x9E, "vfnmsub132", X86Fnmsub, X86FnmsubRnd>;
6625
6626// Scalar FMA
6627multiclass avx512_fma3s_common<bits<8> opc, string OpcodeStr, X86VectorVTInfo _,
6628                               dag RHS_r, dag RHS_m, dag RHS_b, bit MaskOnlyReg> {
6629let Constraints = "$src1 = $dst", hasSideEffects = 0 in {
6630  defm r_Int: AVX512_maskable_3src_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
6631          (ins _.RC:$src2, _.RC:$src3), OpcodeStr,
6632          "$src3, $src2", "$src2, $src3", (null_frag), 1, 1>,
6633          AVX512FMA3Base, Sched<[SchedWriteFMA.Scl]>;
6634
6635  let mayLoad = 1 in
6636  defm m_Int: AVX512_maskable_3src_scalar<opc, MRMSrcMem, _, (outs _.RC:$dst),
6637          (ins _.RC:$src2, _.IntScalarMemOp:$src3), OpcodeStr,
6638          "$src3, $src2", "$src2, $src3", (null_frag), 1, 1>,
6639          AVX512FMA3Base, Sched<[SchedWriteFMA.Scl.Folded, SchedWriteFMA.Scl.ReadAfterFold]>;
6640
6641  defm rb_Int: AVX512_maskable_3src_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
6642         (ins _.RC:$src2, _.RC:$src3, AVX512RC:$rc),
6643         OpcodeStr, "$rc, $src3, $src2", "$src2, $src3, $rc", (null_frag), 1, 1>,
6644         AVX512FMA3Base, EVEX_B, EVEX_RC, Sched<[SchedWriteFMA.Scl]>;
6645
6646  let isCodeGenOnly = 1, isCommutable = 1 in {
6647    def r     : AVX512FMA3S<opc, MRMSrcReg, (outs _.FRC:$dst),
6648                     (ins _.FRC:$src1, _.FRC:$src2, _.FRC:$src3),
6649                     !strconcat(OpcodeStr,
6650                              "\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
6651                     !if(MaskOnlyReg, [], [RHS_r])>, Sched<[SchedWriteFMA.Scl]>;
6652    def m     : AVX512FMA3S<opc, MRMSrcMem, (outs _.FRC:$dst),
6653                    (ins _.FRC:$src1, _.FRC:$src2, _.ScalarMemOp:$src3),
6654                    !strconcat(OpcodeStr,
6655                               "\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
6656                    [RHS_m]>, Sched<[SchedWriteFMA.Scl.Folded, SchedWriteFMA.Scl.ReadAfterFold]>;
6657
6658    def rb    : AVX512FMA3S<opc, MRMSrcReg, (outs _.FRC:$dst),
6659                     (ins _.FRC:$src1, _.FRC:$src2, _.FRC:$src3, AVX512RC:$rc),
6660                     !strconcat(OpcodeStr,
6661                              "\t{$rc, $src3, $src2, $dst|$dst, $src2, $src3, $rc}"),
6662                     !if(MaskOnlyReg, [], [RHS_b])>, EVEX_B, EVEX_RC,
6663                     Sched<[SchedWriteFMA.Scl]>;
6664  }// isCodeGenOnly = 1
6665}// Constraints = "$src1 = $dst"
6666}
6667
6668multiclass avx512_fma3s_all<bits<8> opc213, bits<8> opc231, bits<8> opc132,
6669                            string OpcodeStr, SDNode OpNode, SDNode OpNodeRnd,
6670                            X86VectorVTInfo _, string SUFF> {
6671  let ExeDomain = _.ExeDomain in {
6672  defm NAME#213#SUFF#Z: avx512_fma3s_common<opc213, OpcodeStr#"213"#_.Suffix, _,
6673                // Operands for intrinsic are in 123 order to preserve passthu
6674                // semantics.
6675                (set _.FRC:$dst, (_.EltVT (OpNode _.FRC:$src2, _.FRC:$src1,
6676                         _.FRC:$src3))),
6677                (set _.FRC:$dst, (_.EltVT (OpNode _.FRC:$src2, _.FRC:$src1,
6678                         (_.ScalarLdFrag addr:$src3)))),
6679                (set _.FRC:$dst, (_.EltVT (OpNodeRnd _.FRC:$src2, _.FRC:$src1,
6680                         _.FRC:$src3, (i32 timm:$rc)))), 0>;
6681
6682  defm NAME#231#SUFF#Z: avx512_fma3s_common<opc231, OpcodeStr#"231"#_.Suffix, _,
6683                (set _.FRC:$dst, (_.EltVT (OpNode _.FRC:$src2, _.FRC:$src3,
6684                                          _.FRC:$src1))),
6685                (set _.FRC:$dst, (_.EltVT (OpNode _.FRC:$src2,
6686                            (_.ScalarLdFrag addr:$src3), _.FRC:$src1))),
6687                (set _.FRC:$dst, (_.EltVT (OpNodeRnd _.FRC:$src2, _.FRC:$src3,
6688                         _.FRC:$src1, (i32 timm:$rc)))), 1>;
6689
6690  // One pattern is 312 order so that the load is in a different place from the
6691  // 213 and 231 patterns this helps tablegen's duplicate pattern detection.
6692  defm NAME#132#SUFF#Z: avx512_fma3s_common<opc132, OpcodeStr#"132"#_.Suffix, _,
6693                (set _.FRC:$dst, (_.EltVT (OpNode _.FRC:$src1, _.FRC:$src3,
6694                         _.FRC:$src2))),
6695                (set _.FRC:$dst, (_.EltVT (OpNode (_.ScalarLdFrag addr:$src3),
6696                                 _.FRC:$src1, _.FRC:$src2))),
6697                (set _.FRC:$dst, (_.EltVT (OpNodeRnd _.FRC:$src1, _.FRC:$src3,
6698                         _.FRC:$src2, (i32 timm:$rc)))), 1>;
6699  }
6700}
6701
6702multiclass avx512_fma3s<bits<8> opc213, bits<8> opc231, bits<8> opc132,
6703                        string OpcodeStr, SDNode OpNode, SDNode OpNodeRnd> {
6704  let Predicates = [HasAVX512] in {
6705    defm NAME : avx512_fma3s_all<opc213, opc231, opc132, OpcodeStr, OpNode,
6706                                 OpNodeRnd, f32x_info, "SS">,
6707                                 EVEX_CD8<32, CD8VT1>, VEX_LIG;
6708    defm NAME : avx512_fma3s_all<opc213, opc231, opc132, OpcodeStr, OpNode,
6709                                 OpNodeRnd, f64x_info, "SD">,
6710                                 EVEX_CD8<64, CD8VT1>, VEX_LIG, VEX_W;
6711  }
6712}
6713
6714defm VFMADD  : avx512_fma3s<0xA9, 0xB9, 0x99, "vfmadd", X86Fmadd, X86FmaddRnd>;
6715defm VFMSUB  : avx512_fma3s<0xAB, 0xBB, 0x9B, "vfmsub", X86Fmsub, X86FmsubRnd>;
6716defm VFNMADD : avx512_fma3s<0xAD, 0xBD, 0x9D, "vfnmadd", X86Fnmadd, X86FnmaddRnd>;
6717defm VFNMSUB : avx512_fma3s<0xAF, 0xBF, 0x9F, "vfnmsub", X86Fnmsub, X86FnmsubRnd>;
6718
6719multiclass avx512_scalar_fma_patterns<SDNode Op, SDNode RndOp, string Prefix,
6720                                      string Suffix, SDNode Move,
6721                                      X86VectorVTInfo _, PatLeaf ZeroFP> {
6722  let Predicates = [HasAVX512] in {
6723    def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector
6724                (Op _.FRC:$src2,
6725                    (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0))),
6726                    _.FRC:$src3))))),
6727              (!cast<I>(Prefix#"213"#Suffix#"Zr_Int")
6728               VR128X:$src1, (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)),
6729               (_.VT (COPY_TO_REGCLASS _.FRC:$src3, VR128X)))>;
6730
6731    def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector
6732                (Op _.FRC:$src2, _.FRC:$src3,
6733                    (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0)))))))),
6734              (!cast<I>(Prefix#"231"#Suffix#"Zr_Int")
6735               VR128X:$src1, (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)),
6736               (_.VT (COPY_TO_REGCLASS _.FRC:$src3, VR128X)))>;
6737
6738    def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector
6739                (Op _.FRC:$src2,
6740                    (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0))),
6741                    (_.ScalarLdFrag addr:$src3)))))),
6742              (!cast<I>(Prefix#"213"#Suffix#"Zm_Int")
6743               VR128X:$src1, (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)),
6744               addr:$src3)>;
6745
6746    def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector
6747                (Op (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0))),
6748                    (_.ScalarLdFrag addr:$src3), _.FRC:$src2))))),
6749              (!cast<I>(Prefix#"132"#Suffix#"Zm_Int")
6750               VR128X:$src1, (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)),
6751               addr:$src3)>;
6752
6753    def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector
6754                (Op _.FRC:$src2, (_.ScalarLdFrag addr:$src3),
6755                    (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0)))))))),
6756              (!cast<I>(Prefix#"231"#Suffix#"Zm_Int")
6757               VR128X:$src1, (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)),
6758               addr:$src3)>;
6759
6760    def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector
6761               (X86selects VK1WM:$mask,
6762                (Op _.FRC:$src2,
6763                    (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0))),
6764                    _.FRC:$src3),
6765                (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0)))))))),
6766              (!cast<I>(Prefix#"213"#Suffix#"Zr_Intk")
6767               VR128X:$src1, VK1WM:$mask,
6768               (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)),
6769               (_.VT (COPY_TO_REGCLASS _.FRC:$src3, VR128X)))>;
6770
6771    def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector
6772               (X86selects VK1WM:$mask,
6773                (Op _.FRC:$src2,
6774                    (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0))),
6775                    (_.ScalarLdFrag addr:$src3)),
6776                (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0)))))))),
6777              (!cast<I>(Prefix#"213"#Suffix#"Zm_Intk")
6778               VR128X:$src1, VK1WM:$mask,
6779               (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)), addr:$src3)>;
6780
6781    def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector
6782               (X86selects VK1WM:$mask,
6783                (Op (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0))),
6784                    (_.ScalarLdFrag addr:$src3), _.FRC:$src2),
6785                (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0)))))))),
6786              (!cast<I>(Prefix#"132"#Suffix#"Zm_Intk")
6787               VR128X:$src1, VK1WM:$mask,
6788               (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)), addr:$src3)>;
6789
6790    def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector
6791               (X86selects VK1WM:$mask,
6792                (Op _.FRC:$src2, _.FRC:$src3,
6793                    (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0)))),
6794                (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0)))))))),
6795              (!cast<I>(Prefix#"231"#Suffix#"Zr_Intk")
6796               VR128X:$src1, VK1WM:$mask,
6797               (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)),
6798               (_.VT (COPY_TO_REGCLASS _.FRC:$src3, VR128X)))>;
6799
6800    def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector
6801               (X86selects VK1WM:$mask,
6802                (Op _.FRC:$src2, (_.ScalarLdFrag addr:$src3),
6803                    (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0)))),
6804                (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0)))))))),
6805              (!cast<I>(Prefix#"231"#Suffix#"Zm_Intk")
6806               VR128X:$src1, VK1WM:$mask,
6807               (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)), addr:$src3)>;
6808
6809    def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector
6810               (X86selects VK1WM:$mask,
6811                (Op _.FRC:$src2,
6812                    (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0))),
6813                    _.FRC:$src3),
6814                (_.EltVT ZeroFP)))))),
6815              (!cast<I>(Prefix#"213"#Suffix#"Zr_Intkz")
6816               VR128X:$src1, VK1WM:$mask,
6817               (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)),
6818               (_.VT (COPY_TO_REGCLASS _.FRC:$src3, VR128X)))>;
6819
6820    def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector
6821               (X86selects VK1WM:$mask,
6822                (Op _.FRC:$src2, _.FRC:$src3,
6823                    (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0)))),
6824                (_.EltVT ZeroFP)))))),
6825              (!cast<I>(Prefix#"231"#Suffix#"Zr_Intkz")
6826               VR128X:$src1, VK1WM:$mask,
6827               (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)),
6828               (_.VT (COPY_TO_REGCLASS _.FRC:$src3, VR128X)))>;
6829
6830    def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector
6831               (X86selects VK1WM:$mask,
6832                (Op _.FRC:$src2,
6833                    (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0))),
6834                    (_.ScalarLdFrag addr:$src3)),
6835                (_.EltVT ZeroFP)))))),
6836              (!cast<I>(Prefix#"213"#Suffix#"Zm_Intkz")
6837               VR128X:$src1, VK1WM:$mask,
6838               (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)), addr:$src3)>;
6839
6840    def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector
6841               (X86selects VK1WM:$mask,
6842                (Op (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0))),
6843                    _.FRC:$src2, (_.ScalarLdFrag addr:$src3)),
6844                (_.EltVT ZeroFP)))))),
6845              (!cast<I>(Prefix#"132"#Suffix#"Zm_Intkz")
6846               VR128X:$src1, VK1WM:$mask,
6847               (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)), addr:$src3)>;
6848
6849    def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector
6850               (X86selects VK1WM:$mask,
6851                (Op _.FRC:$src2, (_.ScalarLdFrag addr:$src3),
6852                    (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0)))),
6853                (_.EltVT ZeroFP)))))),
6854              (!cast<I>(Prefix#"231"#Suffix#"Zm_Intkz")
6855               VR128X:$src1, VK1WM:$mask,
6856               (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)), addr:$src3)>;
6857
6858    // Patterns with rounding mode.
6859    def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector
6860                (RndOp _.FRC:$src2,
6861                       (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0))),
6862                       _.FRC:$src3, (i32 timm:$rc)))))),
6863              (!cast<I>(Prefix#"213"#Suffix#"Zrb_Int")
6864               VR128X:$src1, (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)),
6865               (_.VT (COPY_TO_REGCLASS _.FRC:$src3, VR128X)), AVX512RC:$rc)>;
6866
6867    def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector
6868                (RndOp _.FRC:$src2, _.FRC:$src3,
6869                       (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0))),
6870                       (i32 timm:$rc)))))),
6871              (!cast<I>(Prefix#"231"#Suffix#"Zrb_Int")
6872               VR128X:$src1, (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)),
6873               (_.VT (COPY_TO_REGCLASS _.FRC:$src3, VR128X)), AVX512RC:$rc)>;
6874
6875    def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector
6876               (X86selects VK1WM:$mask,
6877                (RndOp _.FRC:$src2,
6878                       (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0))),
6879                       _.FRC:$src3, (i32 timm:$rc)),
6880                (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0)))))))),
6881              (!cast<I>(Prefix#"213"#Suffix#"Zrb_Intk")
6882               VR128X:$src1, VK1WM:$mask,
6883               (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)),
6884               (_.VT (COPY_TO_REGCLASS _.FRC:$src3, VR128X)), AVX512RC:$rc)>;
6885
6886    def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector
6887               (X86selects VK1WM:$mask,
6888                (RndOp _.FRC:$src2, _.FRC:$src3,
6889                       (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0))),
6890                       (i32 timm:$rc)),
6891                (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0)))))))),
6892              (!cast<I>(Prefix#"231"#Suffix#"Zrb_Intk")
6893               VR128X:$src1, VK1WM:$mask,
6894               (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)),
6895               (_.VT (COPY_TO_REGCLASS _.FRC:$src3, VR128X)), AVX512RC:$rc)>;
6896
6897    def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector
6898               (X86selects VK1WM:$mask,
6899                (RndOp _.FRC:$src2,
6900                       (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0))),
6901                       _.FRC:$src3, (i32 timm:$rc)),
6902                (_.EltVT ZeroFP)))))),
6903              (!cast<I>(Prefix#"213"#Suffix#"Zrb_Intkz")
6904               VR128X:$src1, VK1WM:$mask,
6905               (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)),
6906               (_.VT (COPY_TO_REGCLASS _.FRC:$src3, VR128X)), AVX512RC:$rc)>;
6907
6908    def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector
6909               (X86selects VK1WM:$mask,
6910                (RndOp _.FRC:$src2, _.FRC:$src3,
6911                       (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0))),
6912                       (i32 timm:$rc)),
6913                (_.EltVT ZeroFP)))))),
6914              (!cast<I>(Prefix#"231"#Suffix#"Zrb_Intkz")
6915               VR128X:$src1, VK1WM:$mask,
6916               (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)),
6917               (_.VT (COPY_TO_REGCLASS _.FRC:$src3, VR128X)), AVX512RC:$rc)>;
6918  }
6919}
6920
6921defm : avx512_scalar_fma_patterns<X86Fmadd, X86FmaddRnd, "VFMADD", "SS",
6922                                  X86Movss, v4f32x_info, fp32imm0>;
6923defm : avx512_scalar_fma_patterns<X86Fmsub, X86FmsubRnd, "VFMSUB", "SS",
6924                                  X86Movss, v4f32x_info, fp32imm0>;
6925defm : avx512_scalar_fma_patterns<X86Fnmadd, X86FnmaddRnd, "VFNMADD", "SS",
6926                                  X86Movss, v4f32x_info, fp32imm0>;
6927defm : avx512_scalar_fma_patterns<X86Fnmsub, X86FnmsubRnd, "VFNMSUB", "SS",
6928                                  X86Movss, v4f32x_info, fp32imm0>;
6929
6930defm : avx512_scalar_fma_patterns<X86Fmadd, X86FmaddRnd, "VFMADD", "SD",
6931                                  X86Movsd, v2f64x_info, fp64imm0>;
6932defm : avx512_scalar_fma_patterns<X86Fmsub, X86FmsubRnd, "VFMSUB", "SD",
6933                                  X86Movsd, v2f64x_info, fp64imm0>;
6934defm : avx512_scalar_fma_patterns<X86Fnmadd, X86FnmaddRnd, "VFNMADD", "SD",
6935                                  X86Movsd, v2f64x_info, fp64imm0>;
6936defm : avx512_scalar_fma_patterns<X86Fnmsub, X86FnmsubRnd, "VFNMSUB", "SD",
6937                                  X86Movsd, v2f64x_info, fp64imm0>;
6938
6939//===----------------------------------------------------------------------===//
6940// AVX-512  Packed Multiply of Unsigned 52-bit Integers and Add the Low 52-bit IFMA
6941//===----------------------------------------------------------------------===//
6942let Constraints = "$src1 = $dst" in {
6943multiclass avx512_pmadd52_rm<bits<8> opc, string OpcodeStr, SDNode OpNode,
6944                             X86FoldableSchedWrite sched, X86VectorVTInfo _> {
6945  // NOTE: The SDNode have the multiply operands first with the add last.
6946  // This enables commuted load patterns to be autogenerated by tablegen.
6947  let ExeDomain = _.ExeDomain in {
6948  defm r: AVX512_maskable_3src<opc, MRMSrcReg, _, (outs _.RC:$dst),
6949          (ins _.RC:$src2, _.RC:$src3),
6950          OpcodeStr, "$src3, $src2", "$src2, $src3",
6951          (_.VT (OpNode _.RC:$src2, _.RC:$src3, _.RC:$src1)), 1, 1>,
6952         AVX512FMA3Base, Sched<[sched]>;
6953
6954  defm m: AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst),
6955          (ins _.RC:$src2, _.MemOp:$src3),
6956          OpcodeStr, "$src3, $src2", "$src2, $src3",
6957          (_.VT (OpNode _.RC:$src2, (_.LdFrag addr:$src3), _.RC:$src1))>,
6958          AVX512FMA3Base, Sched<[sched.Folded, sched.ReadAfterFold]>;
6959
6960  defm mb: AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst),
6961            (ins _.RC:$src2, _.ScalarMemOp:$src3),
6962            OpcodeStr,   !strconcat("${src3}", _.BroadcastStr,", $src2"),
6963            !strconcat("$src2, ${src3}", _.BroadcastStr ),
6964            (OpNode _.RC:$src2,
6965                    (_.VT (_.BroadcastLdFrag addr:$src3)),
6966                    _.RC:$src1)>,
6967            AVX512FMA3Base, EVEX_B, Sched<[sched.Folded, sched.ReadAfterFold]>;
6968  }
6969}
6970} // Constraints = "$src1 = $dst"
6971
6972multiclass avx512_pmadd52_common<bits<8> opc, string OpcodeStr, SDNode OpNode,
6973                                 X86SchedWriteWidths sched, AVX512VLVectorVTInfo _> {
6974  let Predicates = [HasIFMA] in {
6975    defm Z      : avx512_pmadd52_rm<opc, OpcodeStr, OpNode, sched.ZMM, _.info512>,
6976                      EVEX_V512, EVEX_CD8<_.info512.EltSize, CD8VF>;
6977  }
6978  let Predicates = [HasVLX, HasIFMA] in {
6979    defm Z256 : avx512_pmadd52_rm<opc, OpcodeStr, OpNode, sched.YMM, _.info256>,
6980                      EVEX_V256, EVEX_CD8<_.info256.EltSize, CD8VF>;
6981    defm Z128 : avx512_pmadd52_rm<opc, OpcodeStr, OpNode, sched.XMM, _.info128>,
6982                      EVEX_V128, EVEX_CD8<_.info128.EltSize, CD8VF>;
6983  }
6984}
6985
6986defm VPMADD52LUQ : avx512_pmadd52_common<0xb4, "vpmadd52luq", x86vpmadd52l,
6987                                         SchedWriteVecIMul, avx512vl_i64_info>,
6988                                         VEX_W;
6989defm VPMADD52HUQ : avx512_pmadd52_common<0xb5, "vpmadd52huq", x86vpmadd52h,
6990                                         SchedWriteVecIMul, avx512vl_i64_info>,
6991                                         VEX_W;
6992
6993//===----------------------------------------------------------------------===//
6994// AVX-512  Scalar convert from sign integer to float/double
6995//===----------------------------------------------------------------------===//
6996
6997multiclass avx512_vcvtsi<bits<8> opc, SDPatternOperator OpNode, X86FoldableSchedWrite sched,
6998                    RegisterClass SrcRC, X86VectorVTInfo DstVT,
6999                    X86MemOperand x86memop, PatFrag ld_frag, string asm,
7000                    string mem> {
7001  let hasSideEffects = 0, isCodeGenOnly = 1 in {
7002    def rr : SI<opc, MRMSrcReg, (outs DstVT.FRC:$dst),
7003              (ins DstVT.FRC:$src1, SrcRC:$src),
7004              !strconcat(asm,"\t{$src, $src1, $dst|$dst, $src1, $src}"), []>,
7005              EVEX_4V, Sched<[sched, ReadDefault, ReadInt2Fpu]>;
7006    let mayLoad = 1 in
7007      def rm : SI<opc, MRMSrcMem, (outs DstVT.FRC:$dst),
7008              (ins DstVT.FRC:$src1, x86memop:$src),
7009              asm#"{"#mem#"}\t{$src, $src1, $dst|$dst, $src1, $src}", []>,
7010              EVEX_4V, Sched<[sched.Folded, sched.ReadAfterFold]>;
7011  } // hasSideEffects = 0
7012  def rr_Int : SI<opc, MRMSrcReg, (outs DstVT.RC:$dst),
7013                (ins DstVT.RC:$src1, SrcRC:$src2),
7014                !strconcat(asm,"\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
7015                [(set DstVT.RC:$dst,
7016                      (OpNode (DstVT.VT DstVT.RC:$src1), SrcRC:$src2))]>,
7017               EVEX_4V, Sched<[sched, ReadDefault, ReadInt2Fpu]>;
7018
7019  def rm_Int : SI<opc, MRMSrcMem, (outs DstVT.RC:$dst),
7020                (ins DstVT.RC:$src1, x86memop:$src2),
7021                asm#"{"#mem#"}\t{$src2, $src1, $dst|$dst, $src1, $src2}",
7022                [(set DstVT.RC:$dst,
7023                      (OpNode (DstVT.VT DstVT.RC:$src1),
7024                               (ld_frag addr:$src2)))]>,
7025                EVEX_4V, Sched<[sched.Folded, sched.ReadAfterFold]>;
7026  def : InstAlias<"v"#asm#mem#"\t{$src2, $src1, $dst|$dst, $src1, $src2}",
7027                  (!cast<Instruction>(NAME#"rr_Int") DstVT.RC:$dst,
7028                  DstVT.RC:$src1, SrcRC:$src2), 0, "att">;
7029}
7030
7031multiclass avx512_vcvtsi_round<bits<8> opc, SDNode OpNode,
7032                               X86FoldableSchedWrite sched, RegisterClass SrcRC,
7033                               X86VectorVTInfo DstVT, string asm,
7034                               string mem> {
7035  def rrb_Int : SI<opc, MRMSrcReg, (outs DstVT.RC:$dst),
7036              (ins DstVT.RC:$src1, SrcRC:$src2, AVX512RC:$rc),
7037              !strconcat(asm,
7038                  "\t{$src2, $rc, $src1, $dst|$dst, $src1, $rc, $src2}"),
7039              [(set DstVT.RC:$dst,
7040                    (OpNode (DstVT.VT DstVT.RC:$src1),
7041                             SrcRC:$src2,
7042                             (i32 timm:$rc)))]>,
7043              EVEX_4V, EVEX_B, EVEX_RC, Sched<[sched, ReadDefault, ReadInt2Fpu]>;
7044  def : InstAlias<"v"#asm#mem#"\t{$src2, $rc, $src1, $dst|$dst, $src1, $rc, $src2}",
7045                  (!cast<Instruction>(NAME#"rrb_Int") DstVT.RC:$dst,
7046                  DstVT.RC:$src1, SrcRC:$src2, AVX512RC:$rc), 0, "att">;
7047}
7048
7049multiclass avx512_vcvtsi_common<bits<8> opc, SDNode OpNode, SDNode OpNodeRnd,
7050                                X86FoldableSchedWrite sched,
7051                                RegisterClass SrcRC, X86VectorVTInfo DstVT,
7052                                X86MemOperand x86memop, PatFrag ld_frag,
7053                                string asm, string mem> {
7054  defm NAME : avx512_vcvtsi_round<opc, OpNodeRnd, sched, SrcRC, DstVT, asm, mem>,
7055              avx512_vcvtsi<opc, OpNode, sched, SrcRC, DstVT, x86memop,
7056                            ld_frag, asm, mem>, VEX_LIG;
7057}
7058
7059let Predicates = [HasAVX512] in {
7060defm VCVTSI2SSZ  : avx512_vcvtsi_common<0x2A, X86SintToFp, X86SintToFpRnd,
7061                                 WriteCvtI2SS, GR32,
7062                                 v4f32x_info, i32mem, loadi32, "cvtsi2ss", "l">,
7063                                 XS, EVEX_CD8<32, CD8VT1>;
7064defm VCVTSI642SSZ: avx512_vcvtsi_common<0x2A, X86SintToFp, X86SintToFpRnd,
7065                                 WriteCvtI2SS, GR64,
7066                                 v4f32x_info, i64mem, loadi64, "cvtsi2ss", "q">,
7067                                 XS, VEX_W, EVEX_CD8<64, CD8VT1>;
7068defm VCVTSI2SDZ  : avx512_vcvtsi<0x2A, null_frag, WriteCvtI2SD, GR32,
7069                                 v2f64x_info, i32mem, loadi32, "cvtsi2sd", "l">,
7070                                 XD, VEX_LIG, EVEX_CD8<32, CD8VT1>;
7071defm VCVTSI642SDZ: avx512_vcvtsi_common<0x2A, X86SintToFp, X86SintToFpRnd,
7072                                 WriteCvtI2SD, GR64,
7073                                 v2f64x_info, i64mem, loadi64, "cvtsi2sd", "q">,
7074                                 XD, VEX_W, EVEX_CD8<64, CD8VT1>;
7075
7076def : InstAlias<"vcvtsi2ss\t{$src, $src1, $dst|$dst, $src1, $src}",
7077              (VCVTSI2SSZrm_Int VR128X:$dst, VR128X:$src1, i32mem:$src), 0, "att">;
7078def : InstAlias<"vcvtsi2sd\t{$src, $src1, $dst|$dst, $src1, $src}",
7079              (VCVTSI2SDZrm_Int VR128X:$dst, VR128X:$src1, i32mem:$src), 0, "att">;
7080
7081def : Pat<(f32 (sint_to_fp (loadi32 addr:$src))),
7082          (VCVTSI2SSZrm (f32 (IMPLICIT_DEF)), addr:$src)>;
7083def : Pat<(f32 (sint_to_fp (loadi64 addr:$src))),
7084          (VCVTSI642SSZrm (f32 (IMPLICIT_DEF)), addr:$src)>;
7085def : Pat<(f64 (sint_to_fp (loadi32 addr:$src))),
7086          (VCVTSI2SDZrm (f64 (IMPLICIT_DEF)), addr:$src)>;
7087def : Pat<(f64 (sint_to_fp (loadi64 addr:$src))),
7088          (VCVTSI642SDZrm (f64 (IMPLICIT_DEF)), addr:$src)>;
7089
7090def : Pat<(f32 (sint_to_fp GR32:$src)),
7091          (VCVTSI2SSZrr (f32 (IMPLICIT_DEF)), GR32:$src)>;
7092def : Pat<(f32 (sint_to_fp GR64:$src)),
7093          (VCVTSI642SSZrr (f32 (IMPLICIT_DEF)), GR64:$src)>;
7094def : Pat<(f64 (sint_to_fp GR32:$src)),
7095          (VCVTSI2SDZrr (f64 (IMPLICIT_DEF)), GR32:$src)>;
7096def : Pat<(f64 (sint_to_fp GR64:$src)),
7097          (VCVTSI642SDZrr (f64 (IMPLICIT_DEF)), GR64:$src)>;
7098
7099defm VCVTUSI2SSZ   : avx512_vcvtsi_common<0x7B, X86UintToFp, X86UintToFpRnd,
7100                                  WriteCvtI2SS, GR32,
7101                                  v4f32x_info, i32mem, loadi32,
7102                                  "cvtusi2ss", "l">, XS, EVEX_CD8<32, CD8VT1>;
7103defm VCVTUSI642SSZ : avx512_vcvtsi_common<0x7B, X86UintToFp, X86UintToFpRnd,
7104                                  WriteCvtI2SS, GR64,
7105                                  v4f32x_info, i64mem, loadi64, "cvtusi2ss", "q">,
7106                                  XS, VEX_W, EVEX_CD8<64, CD8VT1>;
7107defm VCVTUSI2SDZ   : avx512_vcvtsi<0x7B, null_frag, WriteCvtI2SD, GR32, v2f64x_info,
7108                                  i32mem, loadi32, "cvtusi2sd", "l">,
7109                                  XD, VEX_LIG, EVEX_CD8<32, CD8VT1>;
7110defm VCVTUSI642SDZ : avx512_vcvtsi_common<0x7B, X86UintToFp, X86UintToFpRnd,
7111                                  WriteCvtI2SD, GR64,
7112                                  v2f64x_info, i64mem, loadi64, "cvtusi2sd", "q">,
7113                                  XD, VEX_W, EVEX_CD8<64, CD8VT1>;
7114
7115def : InstAlias<"vcvtusi2ss\t{$src, $src1, $dst|$dst, $src1, $src}",
7116              (VCVTUSI2SSZrm_Int VR128X:$dst, VR128X:$src1, i32mem:$src), 0, "att">;
7117def : InstAlias<"vcvtusi2sd\t{$src, $src1, $dst|$dst, $src1, $src}",
7118              (VCVTUSI2SDZrm_Int VR128X:$dst, VR128X:$src1, i32mem:$src), 0, "att">;
7119
7120def : Pat<(f32 (uint_to_fp (loadi32 addr:$src))),
7121          (VCVTUSI2SSZrm (f32 (IMPLICIT_DEF)), addr:$src)>;
7122def : Pat<(f32 (uint_to_fp (loadi64 addr:$src))),
7123          (VCVTUSI642SSZrm (f32 (IMPLICIT_DEF)), addr:$src)>;
7124def : Pat<(f64 (uint_to_fp (loadi32 addr:$src))),
7125          (VCVTUSI2SDZrm (f64 (IMPLICIT_DEF)), addr:$src)>;
7126def : Pat<(f64 (uint_to_fp (loadi64 addr:$src))),
7127          (VCVTUSI642SDZrm (f64 (IMPLICIT_DEF)), addr:$src)>;
7128
7129def : Pat<(f32 (uint_to_fp GR32:$src)),
7130          (VCVTUSI2SSZrr (f32 (IMPLICIT_DEF)), GR32:$src)>;
7131def : Pat<(f32 (uint_to_fp GR64:$src)),
7132          (VCVTUSI642SSZrr (f32 (IMPLICIT_DEF)), GR64:$src)>;
7133def : Pat<(f64 (uint_to_fp GR32:$src)),
7134          (VCVTUSI2SDZrr (f64 (IMPLICIT_DEF)), GR32:$src)>;
7135def : Pat<(f64 (uint_to_fp GR64:$src)),
7136          (VCVTUSI642SDZrr (f64 (IMPLICIT_DEF)), GR64:$src)>;
7137}
7138
7139//===----------------------------------------------------------------------===//
7140// AVX-512  Scalar convert from float/double to integer
7141//===----------------------------------------------------------------------===//
7142
7143multiclass avx512_cvt_s_int_round<bits<8> opc, X86VectorVTInfo SrcVT,
7144                                  X86VectorVTInfo DstVT, SDNode OpNode,
7145                                  SDNode OpNodeRnd,
7146                                  X86FoldableSchedWrite sched, string asm,
7147                                  string aliasStr> {
7148  let Predicates = [HasAVX512] in {
7149    def rr_Int : SI<opc, MRMSrcReg, (outs DstVT.RC:$dst), (ins SrcVT.RC:$src),
7150                !strconcat(asm,"\t{$src, $dst|$dst, $src}"),
7151                [(set DstVT.RC:$dst, (OpNode (SrcVT.VT SrcVT.RC:$src)))]>,
7152                EVEX, VEX_LIG, Sched<[sched]>;
7153    def rrb_Int : SI<opc, MRMSrcReg, (outs DstVT.RC:$dst), (ins SrcVT.RC:$src, AVX512RC:$rc),
7154                 !strconcat(asm,"\t{$rc, $src, $dst|$dst, $src, $rc}"),
7155                 [(set DstVT.RC:$dst, (OpNodeRnd (SrcVT.VT SrcVT.RC:$src),(i32 timm:$rc)))]>,
7156                 EVEX, VEX_LIG, EVEX_B, EVEX_RC,
7157                 Sched<[sched]>;
7158    def rm_Int : SI<opc, MRMSrcMem, (outs DstVT.RC:$dst), (ins SrcVT.IntScalarMemOp:$src),
7159                !strconcat(asm,"\t{$src, $dst|$dst, $src}"),
7160                [(set DstVT.RC:$dst, (OpNode
7161                      (SrcVT.VT SrcVT.ScalarIntMemCPat:$src)))]>,
7162                EVEX, VEX_LIG, Sched<[sched.Folded, sched.ReadAfterFold]>;
7163  } // Predicates = [HasAVX512]
7164
7165  def : InstAlias<"v" # asm # aliasStr # "\t{$src, $dst|$dst, $src}",
7166          (!cast<Instruction>(NAME # "rr_Int") DstVT.RC:$dst, SrcVT.RC:$src), 0, "att">;
7167  def : InstAlias<"v" # asm # aliasStr # "\t{$rc, $src, $dst|$dst, $src, $rc}",
7168          (!cast<Instruction>(NAME # "rrb_Int") DstVT.RC:$dst, SrcVT.RC:$src, AVX512RC:$rc), 0, "att">;
7169  def : InstAlias<"v" # asm # aliasStr # "\t{$src, $dst|$dst, $src}",
7170          (!cast<Instruction>(NAME # "rm_Int") DstVT.RC:$dst,
7171                                          SrcVT.IntScalarMemOp:$src), 0, "att">;
7172}
7173
7174// Convert float/double to signed/unsigned int 32/64
7175defm VCVTSS2SIZ: avx512_cvt_s_int_round<0x2D, f32x_info, i32x_info,X86cvts2si,
7176                                   X86cvts2siRnd, WriteCvtSS2I, "cvtss2si", "{l}">,
7177                                   XS, EVEX_CD8<32, CD8VT1>;
7178defm VCVTSS2SI64Z: avx512_cvt_s_int_round<0x2D, f32x_info, i64x_info, X86cvts2si,
7179                                   X86cvts2siRnd, WriteCvtSS2I, "cvtss2si", "{q}">,
7180                                   XS, VEX_W, EVEX_CD8<32, CD8VT1>;
7181defm VCVTSS2USIZ: avx512_cvt_s_int_round<0x79, f32x_info, i32x_info, X86cvts2usi,
7182                                   X86cvts2usiRnd, WriteCvtSS2I, "cvtss2usi", "{l}">,
7183                                   XS, EVEX_CD8<32, CD8VT1>;
7184defm VCVTSS2USI64Z: avx512_cvt_s_int_round<0x79, f32x_info, i64x_info, X86cvts2usi,
7185                                   X86cvts2usiRnd, WriteCvtSS2I, "cvtss2usi", "{q}">,
7186                                   XS, VEX_W, EVEX_CD8<32, CD8VT1>;
7187defm VCVTSD2SIZ: avx512_cvt_s_int_round<0x2D, f64x_info, i32x_info, X86cvts2si,
7188                                   X86cvts2siRnd, WriteCvtSD2I, "cvtsd2si", "{l}">,
7189                                   XD, EVEX_CD8<64, CD8VT1>;
7190defm VCVTSD2SI64Z: avx512_cvt_s_int_round<0x2D, f64x_info, i64x_info, X86cvts2si,
7191                                   X86cvts2siRnd, WriteCvtSD2I, "cvtsd2si", "{q}">,
7192                                   XD, VEX_W, EVEX_CD8<64, CD8VT1>;
7193defm VCVTSD2USIZ:   avx512_cvt_s_int_round<0x79, f64x_info, i32x_info, X86cvts2usi,
7194                                   X86cvts2usiRnd, WriteCvtSD2I, "cvtsd2usi", "{l}">,
7195                                   XD, EVEX_CD8<64, CD8VT1>;
7196defm VCVTSD2USI64Z: avx512_cvt_s_int_round<0x79, f64x_info, i64x_info, X86cvts2usi,
7197                                   X86cvts2usiRnd, WriteCvtSD2I, "cvtsd2usi", "{q}">,
7198                                   XD, VEX_W, EVEX_CD8<64, CD8VT1>;
7199
7200// Patterns used for matching vcvtsi2s{s,d} intrinsic sequences from clang
7201// which produce unnecessary vmovs{s,d} instructions
7202let Predicates = [HasAVX512] in {
7203def : Pat<(v4f32 (X86Movss
7204                   (v4f32 VR128X:$dst),
7205                   (v4f32 (scalar_to_vector (f32 (sint_to_fp GR64:$src)))))),
7206          (VCVTSI642SSZrr_Int VR128X:$dst, GR64:$src)>;
7207
7208def : Pat<(v4f32 (X86Movss
7209                   (v4f32 VR128X:$dst),
7210                   (v4f32 (scalar_to_vector (f32 (sint_to_fp (loadi64 addr:$src))))))),
7211          (VCVTSI642SSZrm_Int VR128X:$dst, addr:$src)>;
7212
7213def : Pat<(v4f32 (X86Movss
7214                   (v4f32 VR128X:$dst),
7215                   (v4f32 (scalar_to_vector (f32 (sint_to_fp GR32:$src)))))),
7216          (VCVTSI2SSZrr_Int VR128X:$dst, GR32:$src)>;
7217
7218def : Pat<(v4f32 (X86Movss
7219                   (v4f32 VR128X:$dst),
7220                   (v4f32 (scalar_to_vector (f32 (sint_to_fp (loadi32 addr:$src))))))),
7221          (VCVTSI2SSZrm_Int VR128X:$dst, addr:$src)>;
7222
7223def : Pat<(v2f64 (X86Movsd
7224                   (v2f64 VR128X:$dst),
7225                   (v2f64 (scalar_to_vector (f64 (sint_to_fp GR64:$src)))))),
7226          (VCVTSI642SDZrr_Int VR128X:$dst, GR64:$src)>;
7227
7228def : Pat<(v2f64 (X86Movsd
7229                   (v2f64 VR128X:$dst),
7230                   (v2f64 (scalar_to_vector (f64 (sint_to_fp (loadi64 addr:$src))))))),
7231          (VCVTSI642SDZrm_Int VR128X:$dst, addr:$src)>;
7232
7233def : Pat<(v2f64 (X86Movsd
7234                   (v2f64 VR128X:$dst),
7235                   (v2f64 (scalar_to_vector (f64 (sint_to_fp GR32:$src)))))),
7236          (VCVTSI2SDZrr_Int VR128X:$dst, GR32:$src)>;
7237
7238def : Pat<(v2f64 (X86Movsd
7239                   (v2f64 VR128X:$dst),
7240                   (v2f64 (scalar_to_vector (f64 (sint_to_fp (loadi32 addr:$src))))))),
7241          (VCVTSI2SDZrm_Int VR128X:$dst, addr:$src)>;
7242
7243def : Pat<(v4f32 (X86Movss
7244                   (v4f32 VR128X:$dst),
7245                   (v4f32 (scalar_to_vector (f32 (uint_to_fp GR64:$src)))))),
7246          (VCVTUSI642SSZrr_Int VR128X:$dst, GR64:$src)>;
7247
7248def : Pat<(v4f32 (X86Movss
7249                   (v4f32 VR128X:$dst),
7250                   (v4f32 (scalar_to_vector (f32 (uint_to_fp (loadi64 addr:$src))))))),
7251          (VCVTUSI642SSZrm_Int VR128X:$dst, addr:$src)>;
7252
7253def : Pat<(v4f32 (X86Movss
7254                   (v4f32 VR128X:$dst),
7255                   (v4f32 (scalar_to_vector (f32 (uint_to_fp GR32:$src)))))),
7256          (VCVTUSI2SSZrr_Int VR128X:$dst, GR32:$src)>;
7257
7258def : Pat<(v4f32 (X86Movss
7259                   (v4f32 VR128X:$dst),
7260                   (v4f32 (scalar_to_vector (f32 (uint_to_fp (loadi32 addr:$src))))))),
7261          (VCVTUSI2SSZrm_Int VR128X:$dst, addr:$src)>;
7262
7263def : Pat<(v2f64 (X86Movsd
7264                   (v2f64 VR128X:$dst),
7265                   (v2f64 (scalar_to_vector (f64 (uint_to_fp GR64:$src)))))),
7266          (VCVTUSI642SDZrr_Int VR128X:$dst, GR64:$src)>;
7267
7268def : Pat<(v2f64 (X86Movsd
7269                   (v2f64 VR128X:$dst),
7270                   (v2f64 (scalar_to_vector (f64 (uint_to_fp (loadi64 addr:$src))))))),
7271          (VCVTUSI642SDZrm_Int VR128X:$dst, addr:$src)>;
7272
7273def : Pat<(v2f64 (X86Movsd
7274                   (v2f64 VR128X:$dst),
7275                   (v2f64 (scalar_to_vector (f64 (uint_to_fp GR32:$src)))))),
7276          (VCVTUSI2SDZrr_Int VR128X:$dst, GR32:$src)>;
7277
7278def : Pat<(v2f64 (X86Movsd
7279                   (v2f64 VR128X:$dst),
7280                   (v2f64 (scalar_to_vector (f64 (uint_to_fp (loadi32 addr:$src))))))),
7281          (VCVTUSI2SDZrm_Int VR128X:$dst, addr:$src)>;
7282} // Predicates = [HasAVX512]
7283
7284// Convert float/double to signed/unsigned int 32/64 with truncation
7285multiclass avx512_cvt_s_all<bits<8> opc, string asm, X86VectorVTInfo _SrcRC,
7286                            X86VectorVTInfo _DstRC, SDNode OpNode,
7287                            SDNode OpNodeInt, SDNode OpNodeSAE,
7288                            X86FoldableSchedWrite sched, string aliasStr>{
7289let Predicates = [HasAVX512] in {
7290  let isCodeGenOnly = 1 in {
7291  def rr : AVX512<opc, MRMSrcReg, (outs _DstRC.RC:$dst), (ins _SrcRC.FRC:$src),
7292              !strconcat(asm,"\t{$src, $dst|$dst, $src}"),
7293              [(set _DstRC.RC:$dst, (OpNode _SrcRC.FRC:$src))]>,
7294              EVEX, VEX_LIG, Sched<[sched]>;
7295  def rm : AVX512<opc, MRMSrcMem, (outs _DstRC.RC:$dst), (ins _SrcRC.ScalarMemOp:$src),
7296              !strconcat(asm,"\t{$src, $dst|$dst, $src}"),
7297              [(set _DstRC.RC:$dst, (OpNode (_SrcRC.ScalarLdFrag addr:$src)))]>,
7298              EVEX, VEX_LIG, Sched<[sched.Folded, sched.ReadAfterFold]>;
7299  }
7300
7301  def rr_Int : AVX512<opc, MRMSrcReg, (outs _DstRC.RC:$dst), (ins _SrcRC.RC:$src),
7302            !strconcat(asm,"\t{$src, $dst|$dst, $src}"),
7303           [(set _DstRC.RC:$dst, (OpNodeInt (_SrcRC.VT _SrcRC.RC:$src)))]>,
7304           EVEX, VEX_LIG, Sched<[sched]>;
7305  def rrb_Int : AVX512<opc, MRMSrcReg, (outs _DstRC.RC:$dst), (ins _SrcRC.RC:$src),
7306            !strconcat(asm,"\t{{sae}, $src, $dst|$dst, $src, {sae}}"),
7307            [(set _DstRC.RC:$dst, (OpNodeSAE (_SrcRC.VT _SrcRC.RC:$src)))]>,
7308                                  EVEX, VEX_LIG, EVEX_B, Sched<[sched]>;
7309  def rm_Int : AVX512<opc, MRMSrcMem, (outs _DstRC.RC:$dst),
7310              (ins _SrcRC.IntScalarMemOp:$src),
7311              !strconcat(asm,"\t{$src, $dst|$dst, $src}"),
7312              [(set _DstRC.RC:$dst,
7313                (OpNodeInt (_SrcRC.VT _SrcRC.ScalarIntMemCPat:$src)))]>,
7314              EVEX, VEX_LIG, Sched<[sched.Folded, sched.ReadAfterFold]>;
7315} //HasAVX512
7316
7317  def : InstAlias<asm # aliasStr # "\t{$src, $dst|$dst, $src}",
7318          (!cast<Instruction>(NAME # "rr_Int") _DstRC.RC:$dst, _SrcRC.RC:$src), 0, "att">;
7319  def : InstAlias<asm # aliasStr # "\t{{sae}, $src, $dst|$dst, $src, {sae}}",
7320          (!cast<Instruction>(NAME # "rrb_Int") _DstRC.RC:$dst, _SrcRC.RC:$src), 0, "att">;
7321  def : InstAlias<asm # aliasStr # "\t{$src, $dst|$dst, $src}",
7322          (!cast<Instruction>(NAME # "rm_Int") _DstRC.RC:$dst,
7323                                          _SrcRC.IntScalarMemOp:$src), 0, "att">;
7324}
7325
7326defm VCVTTSS2SIZ: avx512_cvt_s_all<0x2C, "vcvttss2si", f32x_info, i32x_info,
7327                        fp_to_sint, X86cvtts2Int, X86cvtts2IntSAE, WriteCvtSS2I,
7328                        "{l}">, XS, EVEX_CD8<32, CD8VT1>;
7329defm VCVTTSS2SI64Z: avx512_cvt_s_all<0x2C, "vcvttss2si", f32x_info, i64x_info,
7330                        fp_to_sint, X86cvtts2Int, X86cvtts2IntSAE, WriteCvtSS2I,
7331                        "{q}">, VEX_W, XS, EVEX_CD8<32, CD8VT1>;
7332defm VCVTTSD2SIZ: avx512_cvt_s_all<0x2C, "vcvttsd2si", f64x_info, i32x_info,
7333                        fp_to_sint, X86cvtts2Int, X86cvtts2IntSAE, WriteCvtSD2I,
7334                        "{l}">, XD, EVEX_CD8<64, CD8VT1>;
7335defm VCVTTSD2SI64Z: avx512_cvt_s_all<0x2C, "vcvttsd2si", f64x_info, i64x_info,
7336                        fp_to_sint, X86cvtts2Int, X86cvtts2IntSAE, WriteCvtSD2I,
7337                        "{q}">, VEX_W, XD, EVEX_CD8<64, CD8VT1>;
7338
7339defm VCVTTSS2USIZ: avx512_cvt_s_all<0x78, "vcvttss2usi", f32x_info, i32x_info,
7340                        fp_to_uint, X86cvtts2UInt, X86cvtts2UIntSAE, WriteCvtSS2I,
7341                        "{l}">, XS, EVEX_CD8<32, CD8VT1>;
7342defm VCVTTSS2USI64Z: avx512_cvt_s_all<0x78, "vcvttss2usi", f32x_info, i64x_info,
7343                        fp_to_uint, X86cvtts2UInt, X86cvtts2UIntSAE, WriteCvtSS2I,
7344                        "{q}">, XS,VEX_W, EVEX_CD8<32, CD8VT1>;
7345defm VCVTTSD2USIZ: avx512_cvt_s_all<0x78, "vcvttsd2usi", f64x_info, i32x_info,
7346                        fp_to_uint, X86cvtts2UInt, X86cvtts2UIntSAE, WriteCvtSD2I,
7347                        "{l}">, XD, EVEX_CD8<64, CD8VT1>;
7348defm VCVTTSD2USI64Z: avx512_cvt_s_all<0x78, "vcvttsd2usi", f64x_info, i64x_info,
7349                        fp_to_uint, X86cvtts2UInt, X86cvtts2UIntSAE, WriteCvtSD2I,
7350                        "{q}">, XD, VEX_W, EVEX_CD8<64, CD8VT1>;
7351
7352//===----------------------------------------------------------------------===//
7353// AVX-512  Convert form float to double and back
7354//===----------------------------------------------------------------------===//
7355
7356multiclass avx512_cvt_fp_scalar<bits<8> opc, string OpcodeStr, X86VectorVTInfo _,
7357                                X86VectorVTInfo _Src, SDNode OpNode,
7358                                X86FoldableSchedWrite sched> {
7359  defm rr_Int : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
7360                         (ins _.RC:$src1, _Src.RC:$src2), OpcodeStr,
7361                         "$src2, $src1", "$src1, $src2",
7362                         (_.VT (OpNode (_.VT _.RC:$src1),
7363                                       (_Src.VT _Src.RC:$src2)))>,
7364                         EVEX_4V, VEX_LIG, Sched<[sched]>;
7365  defm rm_Int : AVX512_maskable_scalar<opc, MRMSrcMem, _, (outs _.RC:$dst),
7366                         (ins _.RC:$src1, _Src.IntScalarMemOp:$src2), OpcodeStr,
7367                         "$src2, $src1", "$src1, $src2",
7368                         (_.VT (OpNode (_.VT _.RC:$src1),
7369                                  (_Src.VT _Src.ScalarIntMemCPat:$src2)))>,
7370                         EVEX_4V, VEX_LIG,
7371                         Sched<[sched.Folded, sched.ReadAfterFold]>;
7372
7373  let isCodeGenOnly = 1, hasSideEffects = 0 in {
7374    def rr : I<opc, MRMSrcReg, (outs _.FRC:$dst),
7375               (ins _.FRC:$src1, _Src.FRC:$src2),
7376               OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}", []>,
7377               EVEX_4V, VEX_LIG, Sched<[sched]>;
7378    let mayLoad = 1 in
7379    def rm : I<opc, MRMSrcMem, (outs _.FRC:$dst),
7380               (ins _.FRC:$src1, _Src.ScalarMemOp:$src2),
7381               OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}", []>,
7382               EVEX_4V, VEX_LIG, Sched<[sched.Folded, sched.ReadAfterFold]>;
7383  }
7384}
7385
7386// Scalar Coversion with SAE - suppress all exceptions
7387multiclass avx512_cvt_fp_sae_scalar<bits<8> opc, string OpcodeStr, X86VectorVTInfo _,
7388                                    X86VectorVTInfo _Src, SDNode OpNodeSAE,
7389                                    X86FoldableSchedWrite sched> {
7390  defm rrb_Int : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
7391                        (ins _.RC:$src1, _Src.RC:$src2), OpcodeStr,
7392                        "{sae}, $src2, $src1", "$src1, $src2, {sae}",
7393                        (_.VT (OpNodeSAE (_.VT _.RC:$src1),
7394                                         (_Src.VT _Src.RC:$src2)))>,
7395                        EVEX_4V, VEX_LIG, EVEX_B, Sched<[sched]>;
7396}
7397
7398// Scalar Conversion with rounding control (RC)
7399multiclass avx512_cvt_fp_rc_scalar<bits<8> opc, string OpcodeStr, X86VectorVTInfo _,
7400                                   X86VectorVTInfo _Src, SDNode OpNodeRnd,
7401                                   X86FoldableSchedWrite sched> {
7402  defm rrb_Int : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
7403                        (ins _.RC:$src1, _Src.RC:$src2, AVX512RC:$rc), OpcodeStr,
7404                        "$rc, $src2, $src1", "$src1, $src2, $rc",
7405                        (_.VT (OpNodeRnd (_.VT _.RC:$src1),
7406                                         (_Src.VT _Src.RC:$src2), (i32 timm:$rc)))>,
7407                        EVEX_4V, VEX_LIG, Sched<[sched]>,
7408                        EVEX_B, EVEX_RC;
7409}
7410multiclass avx512_cvt_fp_scalar_sd2ss<bits<8> opc, string OpcodeStr,
7411                                      SDNode OpNode, SDNode OpNodeRnd,
7412                                      X86FoldableSchedWrite sched,
7413                                      X86VectorVTInfo _src, X86VectorVTInfo _dst> {
7414  let Predicates = [HasAVX512] in {
7415    defm Z : avx512_cvt_fp_scalar<opc, OpcodeStr, _dst, _src, OpNode, sched>,
7416             avx512_cvt_fp_rc_scalar<opc, OpcodeStr, _dst, _src,
7417                               OpNodeRnd, sched>, VEX_W, EVEX_CD8<64, CD8VT1>, XD;
7418  }
7419}
7420
7421multiclass avx512_cvt_fp_scalar_ss2sd<bits<8> opc, string OpcodeStr,
7422                                      SDNode OpNode, SDNode OpNodeSAE,
7423                                      X86FoldableSchedWrite sched,
7424                                      X86VectorVTInfo _src, X86VectorVTInfo _dst> {
7425  let Predicates = [HasAVX512] in {
7426    defm Z : avx512_cvt_fp_scalar<opc, OpcodeStr, _dst, _src, OpNode, sched>,
7427             avx512_cvt_fp_sae_scalar<opc, OpcodeStr, _dst, _src, OpNodeSAE, sched>,
7428             EVEX_CD8<32, CD8VT1>, XS;
7429  }
7430}
7431defm VCVTSD2SS : avx512_cvt_fp_scalar_sd2ss<0x5A, "vcvtsd2ss", X86frounds,
7432                                         X86froundsRnd, WriteCvtSD2SS, f64x_info,
7433                                         f32x_info>;
7434defm VCVTSS2SD : avx512_cvt_fp_scalar_ss2sd<0x5A, "vcvtss2sd", X86fpexts,
7435                                          X86fpextsSAE, WriteCvtSS2SD, f32x_info,
7436                                          f64x_info>;
7437
7438def : Pat<(f64 (fpextend FR32X:$src)),
7439          (VCVTSS2SDZrr (f64 (IMPLICIT_DEF)), FR32X:$src)>,
7440          Requires<[HasAVX512]>;
7441def : Pat<(f64 (fpextend (loadf32 addr:$src))),
7442          (VCVTSS2SDZrm (f64 (IMPLICIT_DEF)), addr:$src)>,
7443          Requires<[HasAVX512, OptForSize]>;
7444
7445def : Pat<(f32 (fpround FR64X:$src)),
7446          (VCVTSD2SSZrr (f32 (IMPLICIT_DEF)), FR64X:$src)>,
7447           Requires<[HasAVX512]>;
7448
7449def : Pat<(v4f32 (X86Movss
7450                   (v4f32 VR128X:$dst),
7451                   (v4f32 (scalar_to_vector
7452                     (f32 (fpround (f64 (extractelt VR128X:$src, (iPTR 0))))))))),
7453          (VCVTSD2SSZrr_Int VR128X:$dst, VR128X:$src)>,
7454          Requires<[HasAVX512]>;
7455
7456def : Pat<(v2f64 (X86Movsd
7457                   (v2f64 VR128X:$dst),
7458                   (v2f64 (scalar_to_vector
7459                     (f64 (fpextend (f32 (extractelt VR128X:$src, (iPTR 0))))))))),
7460          (VCVTSS2SDZrr_Int VR128X:$dst, VR128X:$src)>,
7461          Requires<[HasAVX512]>;
7462
7463//===----------------------------------------------------------------------===//
7464// AVX-512  Vector convert from signed/unsigned integer to float/double
7465//          and from float/double to signed/unsigned integer
7466//===----------------------------------------------------------------------===//
7467
7468multiclass avx512_vcvt_fp<bits<8> opc, string OpcodeStr, X86VectorVTInfo _,
7469                          X86VectorVTInfo _Src, SDNode OpNode,
7470                          X86FoldableSchedWrite sched,
7471                          string Broadcast = _.BroadcastStr,
7472                          string Alias = "", X86MemOperand MemOp = _Src.MemOp,
7473                          RegisterClass MaskRC = _.KRCWM,
7474                          dag LdDAG = (_.VT (OpNode (_Src.VT (_Src.LdFrag addr:$src))))> {
7475
7476  defm rr : AVX512_maskable_common<opc, MRMSrcReg, _, (outs _.RC:$dst),
7477                         (ins _Src.RC:$src),
7478                         (ins _.RC:$src0, MaskRC:$mask, _Src.RC:$src),
7479                         (ins MaskRC:$mask, _Src.RC:$src),
7480                          OpcodeStr, "$src", "$src",
7481                         (_.VT (OpNode (_Src.VT _Src.RC:$src))),
7482                         (vselect MaskRC:$mask,
7483                                  (_.VT (OpNode (_Src.VT _Src.RC:$src))),
7484                                  _.RC:$src0),
7485                         vselect, "$src0 = $dst">,
7486                         EVEX, Sched<[sched]>;
7487
7488  defm rm : AVX512_maskable_common<opc, MRMSrcMem, _, (outs _.RC:$dst),
7489                         (ins MemOp:$src),
7490                         (ins _.RC:$src0, MaskRC:$mask, MemOp:$src),
7491                         (ins MaskRC:$mask, MemOp:$src),
7492                         OpcodeStr#Alias, "$src", "$src",
7493                         LdDAG,
7494                         (vselect MaskRC:$mask, LdDAG, _.RC:$src0),
7495                         vselect, "$src0 = $dst">,
7496                         EVEX, Sched<[sched.Folded]>;
7497
7498  defm rmb : AVX512_maskable_common<opc, MRMSrcMem, _, (outs _.RC:$dst),
7499                         (ins _Src.ScalarMemOp:$src),
7500                         (ins _.RC:$src0, MaskRC:$mask, _Src.ScalarMemOp:$src),
7501                         (ins MaskRC:$mask, _Src.ScalarMemOp:$src),
7502                         OpcodeStr,
7503                         "${src}"##Broadcast, "${src}"##Broadcast,
7504                         (_.VT (OpNode (_Src.VT
7505                                  (_Src.BroadcastLdFrag addr:$src))
7506                            )),
7507                         (vselect MaskRC:$mask,
7508                                  (_.VT
7509                                   (OpNode
7510                                    (_Src.VT
7511                                     (_Src.BroadcastLdFrag addr:$src)))),
7512                                  _.RC:$src0),
7513                         vselect, "$src0 = $dst">,
7514                         EVEX, EVEX_B, Sched<[sched.Folded]>;
7515}
7516// Coversion with SAE - suppress all exceptions
7517multiclass avx512_vcvt_fp_sae<bits<8> opc, string OpcodeStr, X86VectorVTInfo _,
7518                              X86VectorVTInfo _Src, SDNode OpNodeSAE,
7519                              X86FoldableSchedWrite sched> {
7520  defm rrb : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
7521                        (ins _Src.RC:$src), OpcodeStr,
7522                        "{sae}, $src", "$src, {sae}",
7523                        (_.VT (OpNodeSAE (_Src.VT _Src.RC:$src)))>,
7524                        EVEX, EVEX_B, Sched<[sched]>;
7525}
7526
7527// Conversion with rounding control (RC)
7528multiclass avx512_vcvt_fp_rc<bits<8> opc, string OpcodeStr, X86VectorVTInfo _,
7529                         X86VectorVTInfo _Src, SDNode OpNodeRnd,
7530                         X86FoldableSchedWrite sched> {
7531  defm rrb : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
7532                        (ins _Src.RC:$src, AVX512RC:$rc), OpcodeStr,
7533                        "$rc, $src", "$src, $rc",
7534                        (_.VT (OpNodeRnd (_Src.VT _Src.RC:$src), (i32 timm:$rc)))>,
7535                        EVEX, EVEX_B, EVEX_RC, Sched<[sched]>;
7536}
7537
7538// Similar to avx512_vcvt_fp, but uses an extload for the memory form.
7539multiclass avx512_vcvt_fpextend<bits<8> opc, string OpcodeStr, X86VectorVTInfo _,
7540                                X86VectorVTInfo _Src, SDNode OpNode,
7541                                X86FoldableSchedWrite sched,
7542                                string Broadcast = _.BroadcastStr,
7543                                string Alias = "", X86MemOperand MemOp = _Src.MemOp,
7544                                RegisterClass MaskRC = _.KRCWM>
7545  : avx512_vcvt_fp<opc, OpcodeStr, _, _Src, OpNode, sched, Broadcast, Alias,
7546                   MemOp, MaskRC,
7547                   (_.VT (!cast<PatFrag>("extload"#_Src.VTName) addr:$src))>;
7548
7549// Extend Float to Double
7550multiclass avx512_cvtps2pd<bits<8> opc, string OpcodeStr,
7551                           X86SchedWriteWidths sched> {
7552  let Predicates = [HasAVX512] in {
7553    defm Z : avx512_vcvt_fpextend<opc, OpcodeStr, v8f64_info, v8f32x_info,
7554                            fpextend, sched.ZMM>,
7555             avx512_vcvt_fp_sae<opc, OpcodeStr, v8f64_info, v8f32x_info,
7556                                X86vfpextSAE, sched.ZMM>, EVEX_V512;
7557  }
7558  let Predicates = [HasVLX] in {
7559    defm Z128 : avx512_vcvt_fpextend<opc, OpcodeStr, v2f64x_info, v4f32x_info,
7560                               X86vfpext, sched.XMM, "{1to2}", "", f64mem>, EVEX_V128;
7561    defm Z256 : avx512_vcvt_fpextend<opc, OpcodeStr, v4f64x_info, v4f32x_info, fpextend,
7562                               sched.YMM>, EVEX_V256;
7563  }
7564}
7565
7566// Truncate Double to Float
7567multiclass avx512_cvtpd2ps<bits<8> opc, string OpcodeStr, X86SchedWriteWidths sched> {
7568  let Predicates = [HasAVX512] in {
7569    defm Z : avx512_vcvt_fp<opc, OpcodeStr, v8f32x_info, v8f64_info, X86vfpround, sched.ZMM>,
7570             avx512_vcvt_fp_rc<opc, OpcodeStr, v8f32x_info, v8f64_info,
7571                               X86vfproundRnd, sched.ZMM>, EVEX_V512;
7572  }
7573  let Predicates = [HasVLX] in {
7574    defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v4f32x_info, v2f64x_info,
7575                               null_frag, sched.XMM, "{1to2}", "{x}", f128mem, VK2WM>,
7576                               EVEX_V128;
7577    defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v4f32x_info, v4f64x_info, X86vfpround,
7578                               sched.YMM, "{1to4}", "{y}">, EVEX_V256;
7579  }
7580
7581  def : InstAlias<OpcodeStr##"x\t{$src, $dst|$dst, $src}",
7582                  (!cast<Instruction>(NAME # "Z128rr") VR128X:$dst, VR128X:$src), 0, "att">;
7583  def : InstAlias<OpcodeStr##"x\t{$src, $dst {${mask}}|$dst {${mask}}, $src}",
7584                  (!cast<Instruction>(NAME # "Z128rrk") VR128X:$dst,
7585                  VK2WM:$mask, VR128X:$src), 0, "att">;
7586  def : InstAlias<OpcodeStr##"x\t{$src, $dst {${mask}} {z}|"
7587                  "$dst {${mask}} {z}, $src}",
7588                  (!cast<Instruction>(NAME # "Z128rrkz") VR128X:$dst,
7589                  VK2WM:$mask, VR128X:$src), 0, "att">;
7590  def : InstAlias<OpcodeStr##"x\t{${src}{1to2}, $dst|$dst, ${src}{1to2}}",
7591                  (!cast<Instruction>(NAME # "Z128rmb") VR128X:$dst, f64mem:$src), 0, "att">;
7592  def : InstAlias<OpcodeStr##"x\t{${src}{1to2}, $dst {${mask}}|"
7593                  "$dst {${mask}}, ${src}{1to2}}",
7594                  (!cast<Instruction>(NAME # "Z128rmbk") VR128X:$dst,
7595                  VK2WM:$mask, f64mem:$src), 0, "att">;
7596  def : InstAlias<OpcodeStr##"x\t{${src}{1to2}, $dst {${mask}} {z}|"
7597                  "$dst {${mask}} {z}, ${src}{1to2}}",
7598                  (!cast<Instruction>(NAME # "Z128rmbkz") VR128X:$dst,
7599                  VK2WM:$mask, f64mem:$src), 0, "att">;
7600
7601  def : InstAlias<OpcodeStr##"y\t{$src, $dst|$dst, $src}",
7602                  (!cast<Instruction>(NAME # "Z256rr") VR128X:$dst, VR256X:$src), 0, "att">;
7603  def : InstAlias<OpcodeStr##"y\t{$src, $dst {${mask}}|$dst {${mask}}, $src}",
7604                  (!cast<Instruction>(NAME # "Z256rrk") VR128X:$dst,
7605                  VK4WM:$mask, VR256X:$src), 0, "att">;
7606  def : InstAlias<OpcodeStr##"y\t{$src, $dst {${mask}} {z}|"
7607                  "$dst {${mask}} {z}, $src}",
7608                  (!cast<Instruction>(NAME # "Z256rrkz") VR128X:$dst,
7609                  VK4WM:$mask, VR256X:$src), 0, "att">;
7610  def : InstAlias<OpcodeStr##"y\t{${src}{1to4}, $dst|$dst, ${src}{1to4}}",
7611                  (!cast<Instruction>(NAME # "Z256rmb") VR128X:$dst, f64mem:$src), 0, "att">;
7612  def : InstAlias<OpcodeStr##"y\t{${src}{1to4}, $dst {${mask}}|"
7613                  "$dst {${mask}}, ${src}{1to4}}",
7614                  (!cast<Instruction>(NAME # "Z256rmbk") VR128X:$dst,
7615                  VK4WM:$mask, f64mem:$src), 0, "att">;
7616  def : InstAlias<OpcodeStr##"y\t{${src}{1to4}, $dst {${mask}} {z}|"
7617                  "$dst {${mask}} {z}, ${src}{1to4}}",
7618                  (!cast<Instruction>(NAME # "Z256rmbkz") VR128X:$dst,
7619                  VK4WM:$mask, f64mem:$src), 0, "att">;
7620}
7621
7622defm VCVTPD2PS : avx512_cvtpd2ps<0x5A, "vcvtpd2ps", SchedWriteCvtPD2PS>,
7623                                  VEX_W, PD, EVEX_CD8<64, CD8VF>;
7624defm VCVTPS2PD : avx512_cvtps2pd<0x5A, "vcvtps2pd", SchedWriteCvtPS2PD>,
7625                                  PS, EVEX_CD8<32, CD8VH>;
7626
7627let Predicates = [HasAVX512] in {
7628  def : Pat<(v8f32 (fpround (v8f64 VR512:$src))),
7629            (VCVTPD2PSZrr VR512:$src)>;
7630  def : Pat<(vselect VK8WM:$mask, (v8f32 (fpround (v8f64 VR512:$src))),
7631                     VR256X:$src0),
7632            (VCVTPD2PSZrrk VR256X:$src0, VK8WM:$mask, VR512:$src)>;
7633  def : Pat<(vselect VK8WM:$mask, (v8f32 (fpround (v8f64 VR512:$src))),
7634                     v8f32x_info.ImmAllZerosV),
7635            (VCVTPD2PSZrrkz VK8WM:$mask, VR512:$src)>;
7636
7637  def : Pat<(v8f32 (fpround (loadv8f64 addr:$src))),
7638            (VCVTPD2PSZrm addr:$src)>;
7639  def : Pat<(vselect VK8WM:$mask, (v8f32 (fpround (loadv8f64 addr:$src))),
7640                     VR256X:$src0),
7641            (VCVTPD2PSZrmk VR256X:$src0, VK8WM:$mask, addr:$src)>;
7642  def : Pat<(vselect VK8WM:$mask, (v8f32 (fpround (loadv8f64 addr:$src))),
7643                     v8f32x_info.ImmAllZerosV),
7644            (VCVTPD2PSZrmkz VK8WM:$mask, addr:$src)>;
7645
7646  def : Pat<(v8f32 (fpround (v8f64 (X86VBroadcastld64 addr:$src)))),
7647            (VCVTPD2PSZrmb addr:$src)>;
7648  def : Pat<(vselect VK8WM:$mask,
7649                     (fpround (v8f64 (X86VBroadcastld64 addr:$src))),
7650                     (v8f32 VR256X:$src0)),
7651            (VCVTPD2PSZrmbk VR256X:$src0, VK8WM:$mask, addr:$src)>;
7652  def : Pat<(vselect VK8WM:$mask,
7653                     (fpround (v8f64 (X86VBroadcastld64 addr:$src))),
7654                     v8f32x_info.ImmAllZerosV),
7655            (VCVTPD2PSZrmbkz VK8WM:$mask, addr:$src)>;
7656}
7657
7658let Predicates = [HasVLX] in {
7659  def : Pat<(v4f32 (fpround (v4f64 VR256X:$src))),
7660            (VCVTPD2PSZ256rr VR256X:$src)>;
7661  def : Pat<(vselect VK4WM:$mask, (v4f32 (fpround (v4f64 VR256X:$src))),
7662                     VR128X:$src0),
7663            (VCVTPD2PSZ256rrk VR128X:$src0, VK4WM:$mask, VR256X:$src)>;
7664  def : Pat<(vselect VK4WM:$mask, (v4f32 (fpround (v4f64 VR256X:$src))),
7665                     v4f32x_info.ImmAllZerosV),
7666            (VCVTPD2PSZ256rrkz VK4WM:$mask, VR256X:$src)>;
7667
7668  def : Pat<(v4f32 (fpround (loadv4f64 addr:$src))),
7669            (VCVTPD2PSZ256rm addr:$src)>;
7670  def : Pat<(vselect VK4WM:$mask, (v4f32 (fpround (loadv4f64 addr:$src))),
7671                     VR128X:$src0),
7672            (VCVTPD2PSZ256rmk VR128X:$src0, VK4WM:$mask, addr:$src)>;
7673  def : Pat<(vselect VK4WM:$mask, (v4f32 (fpround (loadv4f64 addr:$src))),
7674                     v4f32x_info.ImmAllZerosV),
7675            (VCVTPD2PSZ256rmkz VK4WM:$mask, addr:$src)>;
7676
7677  def : Pat<(v4f32 (fpround (v4f64 (X86VBroadcastld64 addr:$src)))),
7678            (VCVTPD2PSZ256rmb addr:$src)>;
7679  def : Pat<(vselect VK4WM:$mask,
7680                     (v4f32 (fpround (v4f64 (X86VBroadcastld64 addr:$src)))),
7681                     VR128X:$src0),
7682            (VCVTPD2PSZ256rmbk VR128X:$src0, VK4WM:$mask, addr:$src)>;
7683  def : Pat<(vselect VK4WM:$mask,
7684                     (v4f32 (fpround (v4f64 (X86VBroadcastld64 addr:$src)))),
7685                     v4f32x_info.ImmAllZerosV),
7686            (VCVTPD2PSZ256rmbkz VK4WM:$mask, addr:$src)>;
7687
7688  // Special patterns to allow use of X86vmfpround for masking. Instruction
7689  // patterns have been disabled with null_frag.
7690  def : Pat<(X86vfpround (v2f64 VR128X:$src)),
7691            (VCVTPD2PSZ128rr VR128X:$src)>;
7692  def : Pat<(X86vmfpround (v2f64 VR128X:$src), (v4f32 VR128X:$src0),
7693                          VK2WM:$mask),
7694            (VCVTPD2PSZ128rrk VR128X:$src0, VK2WM:$mask, VR128X:$src)>;
7695  def : Pat<(X86vmfpround (v2f64 VR128X:$src), v4f32x_info.ImmAllZerosV,
7696                          VK2WM:$mask),
7697            (VCVTPD2PSZ128rrkz VK2WM:$mask, VR128X:$src)>;
7698
7699  def : Pat<(X86vfpround (loadv2f64 addr:$src)),
7700            (VCVTPD2PSZ128rm addr:$src)>;
7701  def : Pat<(X86vmfpround (loadv2f64 addr:$src), (v4f32 VR128X:$src0),
7702                          VK2WM:$mask),
7703            (VCVTPD2PSZ128rmk VR128X:$src0, VK2WM:$mask, addr:$src)>;
7704  def : Pat<(X86vmfpround (loadv2f64 addr:$src), v4f32x_info.ImmAllZerosV,
7705                          VK2WM:$mask),
7706            (VCVTPD2PSZ128rmkz VK2WM:$mask, addr:$src)>;
7707
7708  def : Pat<(X86vfpround (v2f64 (X86VBroadcastld64 addr:$src))),
7709            (VCVTPD2PSZ128rmb addr:$src)>;
7710  def : Pat<(X86vmfpround (v2f64 (X86VBroadcastld64 addr:$src)),
7711                          (v4f32 VR128X:$src0), VK2WM:$mask),
7712            (VCVTPD2PSZ128rmbk VR128X:$src0, VK2WM:$mask, addr:$src)>;
7713  def : Pat<(X86vmfpround (v2f64 (X86VBroadcastld64 addr:$src)),
7714                          v4f32x_info.ImmAllZerosV, VK2WM:$mask),
7715            (VCVTPD2PSZ128rmbkz VK2WM:$mask, addr:$src)>;
7716}
7717
7718// Convert Signed/Unsigned Doubleword to Double
7719multiclass avx512_cvtdq2pd<bits<8> opc, string OpcodeStr, SDNode OpNode,
7720                           SDNode OpNode128, X86SchedWriteWidths sched> {
7721  // No rounding in this op
7722  let Predicates = [HasAVX512] in
7723    defm Z : avx512_vcvt_fp<opc, OpcodeStr, v8f64_info, v8i32x_info, OpNode,
7724                            sched.ZMM>, EVEX_V512;
7725
7726  let Predicates = [HasVLX] in {
7727    defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v2f64x_info, v4i32x_info,
7728                               OpNode128, sched.XMM, "{1to2}", "", i64mem, VK2WM,
7729                               (v2f64 (OpNode128 (bc_v4i32
7730                                (v2i64
7731                                 (scalar_to_vector (loadi64 addr:$src))))))>,
7732                               EVEX_V128;
7733    defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v4f64x_info, v4i32x_info, OpNode,
7734                               sched.YMM>, EVEX_V256;
7735  }
7736}
7737
7738// Convert Signed/Unsigned Doubleword to Float
7739multiclass avx512_cvtdq2ps<bits<8> opc, string OpcodeStr, SDNode OpNode,
7740                           SDNode OpNodeRnd, X86SchedWriteWidths sched> {
7741  let Predicates = [HasAVX512] in
7742    defm Z : avx512_vcvt_fp<opc, OpcodeStr, v16f32_info, v16i32_info, OpNode,
7743                            sched.ZMM>,
7744             avx512_vcvt_fp_rc<opc, OpcodeStr, v16f32_info, v16i32_info,
7745                               OpNodeRnd, sched.ZMM>, EVEX_V512;
7746
7747  let Predicates = [HasVLX] in {
7748    defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v4f32x_info, v4i32x_info, OpNode,
7749                               sched.XMM>, EVEX_V128;
7750    defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v8f32x_info, v8i32x_info, OpNode,
7751                               sched.YMM>, EVEX_V256;
7752  }
7753}
7754
7755// Convert Float to Signed/Unsigned Doubleword with truncation
7756multiclass avx512_cvttps2dq<bits<8> opc, string OpcodeStr, SDNode OpNode,
7757                            SDNode OpNodeSAE, X86SchedWriteWidths sched> {
7758  let Predicates = [HasAVX512] in {
7759    defm Z : avx512_vcvt_fp<opc, OpcodeStr, v16i32_info, v16f32_info, OpNode,
7760                            sched.ZMM>,
7761             avx512_vcvt_fp_sae<opc, OpcodeStr, v16i32_info, v16f32_info,
7762                                OpNodeSAE, sched.ZMM>, EVEX_V512;
7763  }
7764  let Predicates = [HasVLX] in {
7765    defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v4i32x_info, v4f32x_info, OpNode,
7766                               sched.XMM>, EVEX_V128;
7767    defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v8i32x_info, v8f32x_info, OpNode,
7768                               sched.YMM>, EVEX_V256;
7769  }
7770}
7771
7772// Convert Float to Signed/Unsigned Doubleword
7773multiclass avx512_cvtps2dq<bits<8> opc, string OpcodeStr, SDNode OpNode,
7774                           SDNode OpNodeRnd, X86SchedWriteWidths sched> {
7775  let Predicates = [HasAVX512] in {
7776    defm Z : avx512_vcvt_fp<opc, OpcodeStr, v16i32_info, v16f32_info, OpNode,
7777                            sched.ZMM>,
7778             avx512_vcvt_fp_rc<opc, OpcodeStr, v16i32_info, v16f32_info,
7779                                OpNodeRnd, sched.ZMM>, EVEX_V512;
7780  }
7781  let Predicates = [HasVLX] in {
7782    defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v4i32x_info, v4f32x_info, OpNode,
7783                               sched.XMM>, EVEX_V128;
7784    defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v8i32x_info, v8f32x_info, OpNode,
7785                               sched.YMM>, EVEX_V256;
7786  }
7787}
7788
7789// Convert Double to Signed/Unsigned Doubleword with truncation
7790multiclass avx512_cvttpd2dq<bits<8> opc, string OpcodeStr, SDNode OpNode,
7791                            SDNode OpNodeSAE, X86SchedWriteWidths sched> {
7792  let Predicates = [HasAVX512] in {
7793    defm Z : avx512_vcvt_fp<opc, OpcodeStr, v8i32x_info, v8f64_info, OpNode,
7794                            sched.ZMM>,
7795             avx512_vcvt_fp_sae<opc, OpcodeStr, v8i32x_info, v8f64_info,
7796                                OpNodeSAE, sched.ZMM>, EVEX_V512;
7797  }
7798  let Predicates = [HasVLX] in {
7799    // we need "x"/"y" suffixes in order to distinguish between 128 and 256
7800    // memory forms of these instructions in Asm Parser. They have the same
7801    // dest type - 'v4i32x_info'. We also specify the broadcast string explicitly
7802    // due to the same reason.
7803    defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v4i32x_info, v2f64x_info,
7804                               null_frag, sched.XMM, "{1to2}", "{x}", f128mem,
7805                               VK2WM>, EVEX_V128;
7806    defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v4i32x_info, v4f64x_info, OpNode,
7807                               sched.YMM, "{1to4}", "{y}">, EVEX_V256;
7808  }
7809
7810  def : InstAlias<OpcodeStr##"x\t{$src, $dst|$dst, $src}",
7811                  (!cast<Instruction>(NAME # "Z128rr") VR128X:$dst,
7812                  VR128X:$src), 0, "att">;
7813  def : InstAlias<OpcodeStr##"x\t{$src, $dst {${mask}}|$dst {${mask}}, $src}",
7814                  (!cast<Instruction>(NAME # "Z128rrk") VR128X:$dst,
7815                  VK2WM:$mask, VR128X:$src), 0, "att">;
7816  def : InstAlias<OpcodeStr##"x\t{$src, $dst {${mask}} {z}|$dst {${mask}} {z}, $src}",
7817                  (!cast<Instruction>(NAME # "Z128rrkz") VR128X:$dst,
7818                  VK2WM:$mask, VR128X:$src), 0, "att">;
7819  def : InstAlias<OpcodeStr##"x\t{${src}{1to2}, $dst|$dst, ${src}{1to2}}",
7820                  (!cast<Instruction>(NAME # "Z128rmb") VR128X:$dst,
7821                  f64mem:$src), 0, "att">;
7822  def : InstAlias<OpcodeStr##"x\t{${src}{1to2}, $dst {${mask}}|"
7823                  "$dst {${mask}}, ${src}{1to2}}",
7824                  (!cast<Instruction>(NAME # "Z128rmbk") VR128X:$dst,
7825                  VK2WM:$mask, f64mem:$src), 0, "att">;
7826  def : InstAlias<OpcodeStr##"x\t{${src}{1to2}, $dst {${mask}} {z}|"
7827                  "$dst {${mask}} {z}, ${src}{1to2}}",
7828                  (!cast<Instruction>(NAME # "Z128rmbkz") VR128X:$dst,
7829                  VK2WM:$mask, f64mem:$src), 0, "att">;
7830
7831  def : InstAlias<OpcodeStr##"y\t{$src, $dst|$dst, $src}",
7832                  (!cast<Instruction>(NAME # "Z256rr") VR128X:$dst,
7833                  VR256X:$src), 0, "att">;
7834  def : InstAlias<OpcodeStr##"y\t{$src, $dst {${mask}}|$dst {${mask}}, $src}",
7835                  (!cast<Instruction>(NAME # "Z256rrk") VR128X:$dst,
7836                  VK4WM:$mask, VR256X:$src), 0, "att">;
7837  def : InstAlias<OpcodeStr##"y\t{$src, $dst {${mask}} {z}|$dst {${mask}} {z}, $src}",
7838                  (!cast<Instruction>(NAME # "Z256rrkz") VR128X:$dst,
7839                  VK4WM:$mask, VR256X:$src), 0, "att">;
7840  def : InstAlias<OpcodeStr##"y\t{${src}{1to4}, $dst|$dst, ${src}{1to4}}",
7841                  (!cast<Instruction>(NAME # "Z256rmb") VR128X:$dst,
7842                  f64mem:$src), 0, "att">;
7843  def : InstAlias<OpcodeStr##"y\t{${src}{1to4}, $dst {${mask}}|"
7844                  "$dst {${mask}}, ${src}{1to4}}",
7845                  (!cast<Instruction>(NAME # "Z256rmbk") VR128X:$dst,
7846                  VK4WM:$mask, f64mem:$src), 0, "att">;
7847  def : InstAlias<OpcodeStr##"y\t{${src}{1to4}, $dst {${mask}} {z}|"
7848                  "$dst {${mask}} {z}, ${src}{1to4}}",
7849                  (!cast<Instruction>(NAME # "Z256rmbkz") VR128X:$dst,
7850                  VK4WM:$mask, f64mem:$src), 0, "att">;
7851}
7852
7853// Convert Double to Signed/Unsigned Doubleword
7854multiclass avx512_cvtpd2dq<bits<8> opc, string OpcodeStr, SDNode OpNode,
7855                           SDNode OpNodeRnd, X86SchedWriteWidths sched> {
7856  let Predicates = [HasAVX512] in {
7857    defm Z : avx512_vcvt_fp<opc, OpcodeStr, v8i32x_info, v8f64_info, OpNode,
7858                            sched.ZMM>,
7859             avx512_vcvt_fp_rc<opc, OpcodeStr, v8i32x_info, v8f64_info,
7860                               OpNodeRnd, sched.ZMM>, EVEX_V512;
7861  }
7862  let Predicates = [HasVLX] in {
7863    // we need "x"/"y" suffixes in order to distinguish between 128 and 256
7864    // memory forms of these instructions in Asm Parcer. They have the same
7865    // dest type - 'v4i32x_info'. We also specify the broadcast string explicitly
7866    // due to the same reason.
7867    defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v4i32x_info, v2f64x_info,
7868                               null_frag, sched.XMM, "{1to2}", "{x}", f128mem,
7869                               VK2WM>, EVEX_V128;
7870    defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v4i32x_info, v4f64x_info, OpNode,
7871                               sched.YMM, "{1to4}", "{y}">, EVEX_V256;
7872  }
7873
7874  def : InstAlias<OpcodeStr##"x\t{$src, $dst|$dst, $src}",
7875                  (!cast<Instruction>(NAME # "Z128rr") VR128X:$dst, VR128X:$src), 0, "att">;
7876  def : InstAlias<OpcodeStr##"x\t{$src, $dst {${mask}}|$dst {${mask}}, $src}",
7877                  (!cast<Instruction>(NAME # "Z128rrk") VR128X:$dst,
7878                  VK2WM:$mask, VR128X:$src), 0, "att">;
7879  def : InstAlias<OpcodeStr##"x\t{$src, $dst {${mask}} {z}|$dst {${mask}} {z}, $src}",
7880                  (!cast<Instruction>(NAME # "Z128rrkz") VR128X:$dst,
7881                  VK2WM:$mask, VR128X:$src), 0, "att">;
7882  def : InstAlias<OpcodeStr##"x\t{${src}{1to2}, $dst|$dst, ${src}{1to2}}",
7883                  (!cast<Instruction>(NAME # "Z128rmb") VR128X:$dst,
7884                  f64mem:$src), 0, "att">;
7885  def : InstAlias<OpcodeStr##"x\t{${src}{1to2}, $dst {${mask}}|"
7886                  "$dst {${mask}}, ${src}{1to2}}",
7887                  (!cast<Instruction>(NAME # "Z128rmbk") VR128X:$dst,
7888                  VK2WM:$mask, f64mem:$src), 0, "att">;
7889  def : InstAlias<OpcodeStr##"x\t{${src}{1to2}, $dst {${mask}} {z}|"
7890                  "$dst {${mask}} {z}, ${src}{1to2}}",
7891                  (!cast<Instruction>(NAME # "Z128rmbkz") VR128X:$dst,
7892                  VK2WM:$mask, f64mem:$src), 0, "att">;
7893
7894  def : InstAlias<OpcodeStr##"y\t{$src, $dst|$dst, $src}",
7895                  (!cast<Instruction>(NAME # "Z256rr") VR128X:$dst, VR256X:$src), 0, "att">;
7896  def : InstAlias<OpcodeStr##"y\t{$src, $dst {${mask}}|$dst {${mask}}, $src}",
7897                  (!cast<Instruction>(NAME # "Z256rrk") VR128X:$dst,
7898                  VK4WM:$mask, VR256X:$src), 0, "att">;
7899  def : InstAlias<OpcodeStr##"y\t{$src, $dst {${mask}} {z}|$dst {${mask}} {z}, $src}",
7900                  (!cast<Instruction>(NAME # "Z256rrkz") VR128X:$dst,
7901                  VK4WM:$mask, VR256X:$src), 0, "att">;
7902  def : InstAlias<OpcodeStr##"y\t{${src}{1to4}, $dst|$dst, ${src}{1to4}}",
7903                  (!cast<Instruction>(NAME # "Z256rmb") VR128X:$dst,
7904                  f64mem:$src), 0, "att">;
7905  def : InstAlias<OpcodeStr##"y\t{${src}{1to4}, $dst {${mask}}|"
7906                  "$dst {${mask}}, ${src}{1to4}}",
7907                  (!cast<Instruction>(NAME # "Z256rmbk") VR128X:$dst,
7908                  VK4WM:$mask, f64mem:$src), 0, "att">;
7909  def : InstAlias<OpcodeStr##"y\t{${src}{1to4}, $dst {${mask}} {z}|"
7910                  "$dst {${mask}} {z}, ${src}{1to4}}",
7911                  (!cast<Instruction>(NAME # "Z256rmbkz") VR128X:$dst,
7912                  VK4WM:$mask, f64mem:$src), 0, "att">;
7913}
7914
7915// Convert Double to Signed/Unsigned Quardword
7916multiclass avx512_cvtpd2qq<bits<8> opc, string OpcodeStr, SDNode OpNode,
7917                           SDNode OpNodeRnd, X86SchedWriteWidths sched> {
7918  let Predicates = [HasDQI] in {
7919    defm Z : avx512_vcvt_fp<opc, OpcodeStr, v8i64_info, v8f64_info, OpNode,
7920                            sched.ZMM>,
7921             avx512_vcvt_fp_rc<opc, OpcodeStr, v8i64_info, v8f64_info,
7922                               OpNodeRnd, sched.ZMM>, EVEX_V512;
7923  }
7924  let Predicates = [HasDQI, HasVLX] in {
7925    defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v2i64x_info, v2f64x_info, OpNode,
7926                               sched.XMM>, EVEX_V128;
7927    defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v4i64x_info, v4f64x_info, OpNode,
7928                               sched.YMM>, EVEX_V256;
7929  }
7930}
7931
7932// Convert Double to Signed/Unsigned Quardword with truncation
7933multiclass avx512_cvttpd2qq<bits<8> opc, string OpcodeStr, SDNode OpNode,
7934                            SDNode OpNodeRnd, X86SchedWriteWidths sched> {
7935  let Predicates = [HasDQI] in {
7936    defm Z : avx512_vcvt_fp<opc, OpcodeStr, v8i64_info, v8f64_info, OpNode,
7937                            sched.ZMM>,
7938             avx512_vcvt_fp_sae<opc, OpcodeStr, v8i64_info, v8f64_info,
7939                                OpNodeRnd, sched.ZMM>, EVEX_V512;
7940  }
7941  let Predicates = [HasDQI, HasVLX] in {
7942    defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v2i64x_info, v2f64x_info, OpNode,
7943                               sched.XMM>, EVEX_V128;
7944    defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v4i64x_info, v4f64x_info, OpNode,
7945                               sched.YMM>, EVEX_V256;
7946  }
7947}
7948
7949// Convert Signed/Unsigned Quardword to Double
7950multiclass avx512_cvtqq2pd<bits<8> opc, string OpcodeStr, SDNode OpNode,
7951                           SDNode OpNodeRnd, X86SchedWriteWidths sched> {
7952  let Predicates = [HasDQI] in {
7953    defm Z : avx512_vcvt_fp<opc, OpcodeStr, v8f64_info, v8i64_info, OpNode,
7954                            sched.ZMM>,
7955             avx512_vcvt_fp_rc<opc, OpcodeStr, v8f64_info, v8i64_info,
7956                               OpNodeRnd, sched.ZMM>, EVEX_V512;
7957  }
7958  let Predicates = [HasDQI, HasVLX] in {
7959    defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v2f64x_info, v2i64x_info, OpNode,
7960                               sched.XMM>, EVEX_V128, NotEVEX2VEXConvertible;
7961    defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v4f64x_info, v4i64x_info, OpNode,
7962                               sched.YMM>, EVEX_V256, NotEVEX2VEXConvertible;
7963  }
7964}
7965
7966// Convert Float to Signed/Unsigned Quardword
7967multiclass avx512_cvtps2qq<bits<8> opc, string OpcodeStr, SDNode OpNode,
7968                           SDNode OpNodeRnd, X86SchedWriteWidths sched> {
7969  let Predicates = [HasDQI] in {
7970    defm Z : avx512_vcvt_fp<opc, OpcodeStr, v8i64_info, v8f32x_info, OpNode,
7971                            sched.ZMM>,
7972             avx512_vcvt_fp_rc<opc, OpcodeStr, v8i64_info, v8f32x_info,
7973                               OpNodeRnd, sched.ZMM>, EVEX_V512;
7974  }
7975  let Predicates = [HasDQI, HasVLX] in {
7976    // Explicitly specified broadcast string, since we take only 2 elements
7977    // from v4f32x_info source
7978    defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v2i64x_info, v4f32x_info, OpNode,
7979                               sched.XMM, "{1to2}", "", f64mem, VK2WM,
7980                               (v2i64 (OpNode (bc_v4f32
7981                                (v2f64
7982                                 (scalar_to_vector (loadf64 addr:$src))))))>,
7983                               EVEX_V128;
7984    defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v4i64x_info, v4f32x_info, OpNode,
7985                               sched.YMM>, EVEX_V256;
7986  }
7987}
7988
7989// Convert Float to Signed/Unsigned Quardword with truncation
7990multiclass avx512_cvttps2qq<bits<8> opc, string OpcodeStr, SDNode OpNode,
7991                            SDNode OpNodeRnd, X86SchedWriteWidths sched> {
7992  let Predicates = [HasDQI] in {
7993    defm Z : avx512_vcvt_fp<opc, OpcodeStr, v8i64_info, v8f32x_info, OpNode, sched.ZMM>,
7994             avx512_vcvt_fp_sae<opc, OpcodeStr, v8i64_info, v8f32x_info,
7995                                OpNodeRnd, sched.ZMM>, EVEX_V512;
7996  }
7997  let Predicates = [HasDQI, HasVLX] in {
7998    // Explicitly specified broadcast string, since we take only 2 elements
7999    // from v4f32x_info source
8000    defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v2i64x_info, v4f32x_info, OpNode,
8001                               sched.XMM, "{1to2}", "", f64mem, VK2WM,
8002                               (v2i64 (OpNode (bc_v4f32
8003                                (v2f64
8004                                 (scalar_to_vector (loadf64 addr:$src))))))>,
8005                               EVEX_V128;
8006    defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v4i64x_info, v4f32x_info, OpNode,
8007                               sched.YMM>, EVEX_V256;
8008  }
8009}
8010
8011// Convert Signed/Unsigned Quardword to Float
8012multiclass avx512_cvtqq2ps<bits<8> opc, string OpcodeStr, SDNode OpNode,
8013                           SDNode OpNodeRnd, X86SchedWriteWidths sched> {
8014  let Predicates = [HasDQI] in {
8015    defm Z : avx512_vcvt_fp<opc, OpcodeStr, v8f32x_info, v8i64_info, OpNode,
8016                            sched.ZMM>,
8017             avx512_vcvt_fp_rc<opc, OpcodeStr, v8f32x_info, v8i64_info,
8018                               OpNodeRnd, sched.ZMM>, EVEX_V512;
8019  }
8020  let Predicates = [HasDQI, HasVLX] in {
8021    // we need "x"/"y" suffixes in order to distinguish between 128 and 256
8022    // memory forms of these instructions in Asm Parcer. They have the same
8023    // dest type - 'v4i32x_info'. We also specify the broadcast string explicitly
8024    // due to the same reason.
8025    defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v4f32x_info, v2i64x_info, null_frag,
8026                               sched.XMM, "{1to2}", "{x}", i128mem, VK2WM>,
8027                               EVEX_V128, NotEVEX2VEXConvertible;
8028    defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v4f32x_info, v4i64x_info, OpNode,
8029                               sched.YMM, "{1to4}", "{y}">, EVEX_V256,
8030                               NotEVEX2VEXConvertible;
8031  }
8032
8033  def : InstAlias<OpcodeStr##"x\t{$src, $dst|$dst, $src}",
8034                  (!cast<Instruction>(NAME # "Z128rr") VR128X:$dst,
8035                  VR128X:$src), 0, "att">;
8036  def : InstAlias<OpcodeStr##"x\t{$src, $dst {${mask}}|$dst {${mask}}, $src}",
8037                  (!cast<Instruction>(NAME # "Z128rrk") VR128X:$dst,
8038                  VK2WM:$mask, VR128X:$src), 0, "att">;
8039  def : InstAlias<OpcodeStr##"x\t{$src, $dst {${mask}} {z}|$dst {${mask}} {z}, $src}",
8040                  (!cast<Instruction>(NAME # "Z128rrkz") VR128X:$dst,
8041                  VK2WM:$mask, VR128X:$src), 0, "att">;
8042  def : InstAlias<OpcodeStr##"x\t{${src}{1to2}, $dst|$dst, ${src}{1to2}}",
8043                  (!cast<Instruction>(NAME # "Z128rmb") VR128X:$dst,
8044                  i64mem:$src), 0, "att">;
8045  def : InstAlias<OpcodeStr##"x\t{${src}{1to2}, $dst {${mask}}|"
8046                  "$dst {${mask}}, ${src}{1to2}}",
8047                  (!cast<Instruction>(NAME # "Z128rmbk") VR128X:$dst,
8048                  VK2WM:$mask, i64mem:$src), 0, "att">;
8049  def : InstAlias<OpcodeStr##"x\t{${src}{1to2}, $dst {${mask}} {z}|"
8050                  "$dst {${mask}} {z}, ${src}{1to2}}",
8051                  (!cast<Instruction>(NAME # "Z128rmbkz") VR128X:$dst,
8052                  VK2WM:$mask, i64mem:$src), 0, "att">;
8053
8054  def : InstAlias<OpcodeStr##"y\t{$src, $dst|$dst, $src}",
8055                  (!cast<Instruction>(NAME # "Z256rr") VR128X:$dst,
8056                  VR256X:$src), 0, "att">;
8057  def : InstAlias<OpcodeStr##"y\t{$src, $dst {${mask}}|"
8058                  "$dst {${mask}}, $src}",
8059                  (!cast<Instruction>(NAME # "Z256rrk") VR128X:$dst,
8060                  VK4WM:$mask, VR256X:$src), 0, "att">;
8061  def : InstAlias<OpcodeStr##"y\t{$src, $dst {${mask}} {z}|"
8062                  "$dst {${mask}} {z}, $src}",
8063                  (!cast<Instruction>(NAME # "Z256rrkz") VR128X:$dst,
8064                  VK4WM:$mask, VR256X:$src), 0, "att">;
8065  def : InstAlias<OpcodeStr##"y\t{${src}{1to4}, $dst|$dst, ${src}{1to4}}",
8066                  (!cast<Instruction>(NAME # "Z256rmb") VR128X:$dst,
8067                  i64mem:$src), 0, "att">;
8068  def : InstAlias<OpcodeStr##"y\t{${src}{1to4}, $dst {${mask}}|"
8069                  "$dst {${mask}}, ${src}{1to4}}",
8070                  (!cast<Instruction>(NAME # "Z256rmbk") VR128X:$dst,
8071                  VK4WM:$mask, i64mem:$src), 0, "att">;
8072  def : InstAlias<OpcodeStr##"y\t{${src}{1to4}, $dst {${mask}} {z}|"
8073                  "$dst {${mask}} {z}, ${src}{1to4}}",
8074                  (!cast<Instruction>(NAME # "Z256rmbkz") VR128X:$dst,
8075                  VK4WM:$mask, i64mem:$src), 0, "att">;
8076}
8077
8078defm VCVTDQ2PD : avx512_cvtdq2pd<0xE6, "vcvtdq2pd", sint_to_fp, X86VSintToFP,
8079                                 SchedWriteCvtDQ2PD>, XS, EVEX_CD8<32, CD8VH>;
8080
8081defm VCVTDQ2PS : avx512_cvtdq2ps<0x5B, "vcvtdq2ps", sint_to_fp,
8082                                X86VSintToFpRnd, SchedWriteCvtDQ2PS>,
8083                                PS, EVEX_CD8<32, CD8VF>;
8084
8085defm VCVTTPS2DQ : avx512_cvttps2dq<0x5B, "vcvttps2dq", X86cvttp2si,
8086                                X86cvttp2siSAE, SchedWriteCvtPS2DQ>,
8087                                XS, EVEX_CD8<32, CD8VF>;
8088
8089defm VCVTTPD2DQ : avx512_cvttpd2dq<0xE6, "vcvttpd2dq", X86cvttp2si,
8090                                 X86cvttp2siSAE, SchedWriteCvtPD2DQ>,
8091                                 PD, VEX_W, EVEX_CD8<64, CD8VF>;
8092
8093defm VCVTTPS2UDQ : avx512_cvttps2dq<0x78, "vcvttps2udq", X86cvttp2ui,
8094                                 X86cvttp2uiSAE, SchedWriteCvtPS2DQ>, PS,
8095                                 EVEX_CD8<32, CD8VF>;
8096
8097defm VCVTTPD2UDQ : avx512_cvttpd2dq<0x78, "vcvttpd2udq", X86cvttp2ui,
8098                                 X86cvttp2uiSAE, SchedWriteCvtPD2DQ>,
8099                                 PS, VEX_W, EVEX_CD8<64, CD8VF>;
8100
8101defm VCVTUDQ2PD : avx512_cvtdq2pd<0x7A, "vcvtudq2pd", uint_to_fp,
8102                                  X86VUintToFP, SchedWriteCvtDQ2PD>, XS,
8103                                  EVEX_CD8<32, CD8VH>;
8104
8105defm VCVTUDQ2PS : avx512_cvtdq2ps<0x7A, "vcvtudq2ps", uint_to_fp,
8106                                 X86VUintToFpRnd, SchedWriteCvtDQ2PS>, XD,
8107                                 EVEX_CD8<32, CD8VF>;
8108
8109defm VCVTPS2DQ : avx512_cvtps2dq<0x5B, "vcvtps2dq", X86cvtp2Int,
8110                                 X86cvtp2IntRnd, SchedWriteCvtPS2DQ>, PD,
8111                                 EVEX_CD8<32, CD8VF>;
8112
8113defm VCVTPD2DQ : avx512_cvtpd2dq<0xE6, "vcvtpd2dq", X86cvtp2Int,
8114                                 X86cvtp2IntRnd, SchedWriteCvtPD2DQ>, XD,
8115                                 VEX_W, EVEX_CD8<64, CD8VF>;
8116
8117defm VCVTPS2UDQ : avx512_cvtps2dq<0x79, "vcvtps2udq", X86cvtp2UInt,
8118                                 X86cvtp2UIntRnd, SchedWriteCvtPS2DQ>,
8119                                 PS, EVEX_CD8<32, CD8VF>;
8120
8121defm VCVTPD2UDQ : avx512_cvtpd2dq<0x79, "vcvtpd2udq", X86cvtp2UInt,
8122                                 X86cvtp2UIntRnd, SchedWriteCvtPD2DQ>, VEX_W,
8123                                 PS, EVEX_CD8<64, CD8VF>;
8124
8125defm VCVTPD2QQ : avx512_cvtpd2qq<0x7B, "vcvtpd2qq", X86cvtp2Int,
8126                                 X86cvtp2IntRnd, SchedWriteCvtPD2DQ>, VEX_W,
8127                                 PD, EVEX_CD8<64, CD8VF>;
8128
8129defm VCVTPS2QQ : avx512_cvtps2qq<0x7B, "vcvtps2qq", X86cvtp2Int,
8130                                 X86cvtp2IntRnd, SchedWriteCvtPS2DQ>, PD,
8131                                 EVEX_CD8<32, CD8VH>;
8132
8133defm VCVTPD2UQQ : avx512_cvtpd2qq<0x79, "vcvtpd2uqq", X86cvtp2UInt,
8134                                 X86cvtp2UIntRnd, SchedWriteCvtPD2DQ>, VEX_W,
8135                                 PD, EVEX_CD8<64, CD8VF>;
8136
8137defm VCVTPS2UQQ : avx512_cvtps2qq<0x79, "vcvtps2uqq", X86cvtp2UInt,
8138                                 X86cvtp2UIntRnd, SchedWriteCvtPS2DQ>, PD,
8139                                 EVEX_CD8<32, CD8VH>;
8140
8141defm VCVTTPD2QQ : avx512_cvttpd2qq<0x7A, "vcvttpd2qq", X86cvttp2si,
8142                                 X86cvttp2siSAE, SchedWriteCvtPD2DQ>, VEX_W,
8143                                 PD, EVEX_CD8<64, CD8VF>;
8144
8145defm VCVTTPS2QQ : avx512_cvttps2qq<0x7A, "vcvttps2qq", X86cvttp2si,
8146                                 X86cvttp2siSAE, SchedWriteCvtPS2DQ>, PD,
8147                                 EVEX_CD8<32, CD8VH>;
8148
8149defm VCVTTPD2UQQ : avx512_cvttpd2qq<0x78, "vcvttpd2uqq", X86cvttp2ui,
8150                                 X86cvttp2uiSAE, SchedWriteCvtPD2DQ>, VEX_W,
8151                                 PD, EVEX_CD8<64, CD8VF>;
8152
8153defm VCVTTPS2UQQ : avx512_cvttps2qq<0x78, "vcvttps2uqq", X86cvttp2ui,
8154                                 X86cvttp2uiSAE, SchedWriteCvtPS2DQ>, PD,
8155                                 EVEX_CD8<32, CD8VH>;
8156
8157defm VCVTQQ2PD : avx512_cvtqq2pd<0xE6, "vcvtqq2pd", sint_to_fp,
8158                            X86VSintToFpRnd, SchedWriteCvtDQ2PD>, VEX_W, XS,
8159                            EVEX_CD8<64, CD8VF>;
8160
8161defm VCVTUQQ2PD : avx512_cvtqq2pd<0x7A, "vcvtuqq2pd", uint_to_fp,
8162                            X86VUintToFpRnd, SchedWriteCvtDQ2PD>, VEX_W, XS,
8163                            EVEX_CD8<64, CD8VF>;
8164
8165defm VCVTQQ2PS : avx512_cvtqq2ps<0x5B, "vcvtqq2ps", sint_to_fp,
8166                            X86VSintToFpRnd, SchedWriteCvtDQ2PS>, VEX_W, PS,
8167                            EVEX_CD8<64, CD8VF>;
8168
8169defm VCVTUQQ2PS : avx512_cvtqq2ps<0x7A, "vcvtuqq2ps", uint_to_fp,
8170                            X86VUintToFpRnd, SchedWriteCvtDQ2PS>, VEX_W, XD,
8171                            EVEX_CD8<64, CD8VF>;
8172
8173let Predicates = [HasVLX] in {
8174  // Special patterns to allow use of X86mcvtp2Int for masking. Instruction
8175  // patterns have been disabled with null_frag.
8176  def : Pat<(v4i32 (X86cvtp2Int (v2f64 VR128X:$src))),
8177            (VCVTPD2DQZ128rr VR128X:$src)>;
8178  def : Pat<(X86mcvtp2Int (v2f64 VR128X:$src), (v4i32 VR128X:$src0),
8179                          VK2WM:$mask),
8180            (VCVTPD2DQZ128rrk VR128X:$src0, VK2WM:$mask, VR128X:$src)>;
8181  def : Pat<(X86mcvtp2Int (v2f64 VR128X:$src), v4i32x_info.ImmAllZerosV,
8182                          VK2WM:$mask),
8183            (VCVTPD2DQZ128rrkz VK2WM:$mask, VR128X:$src)>;
8184
8185  def : Pat<(v4i32 (X86cvtp2Int (loadv2f64 addr:$src))),
8186            (VCVTPD2DQZ128rm addr:$src)>;
8187  def : Pat<(X86mcvtp2Int (loadv2f64 addr:$src), (v4i32 VR128X:$src0),
8188                          VK2WM:$mask),
8189            (VCVTPD2DQZ128rmk VR128X:$src0, VK2WM:$mask, addr:$src)>;
8190  def : Pat<(X86mcvtp2Int (loadv2f64 addr:$src), v4i32x_info.ImmAllZerosV,
8191                          VK2WM:$mask),
8192            (VCVTPD2DQZ128rmkz VK2WM:$mask, addr:$src)>;
8193
8194  def : Pat<(v4i32 (X86cvtp2Int (v2f64 (X86VBroadcastld64 addr:$src)))),
8195            (VCVTPD2DQZ128rmb addr:$src)>;
8196  def : Pat<(X86mcvtp2Int (v2f64 (X86VBroadcastld64 addr:$src)),
8197                          (v4i32 VR128X:$src0), VK2WM:$mask),
8198            (VCVTPD2DQZ128rmbk VR128X:$src0, VK2WM:$mask, addr:$src)>;
8199  def : Pat<(X86mcvtp2Int (v2f64 (X86VBroadcastld64 addr:$src)),
8200                          v4i32x_info.ImmAllZerosV, VK2WM:$mask),
8201            (VCVTPD2DQZ128rmbkz VK2WM:$mask, addr:$src)>;
8202
8203  // Special patterns to allow use of X86mcvttp2si for masking. Instruction
8204  // patterns have been disabled with null_frag.
8205  def : Pat<(v4i32 (X86cvttp2si (v2f64 VR128X:$src))),
8206            (VCVTTPD2DQZ128rr VR128X:$src)>;
8207  def : Pat<(X86mcvttp2si (v2f64 VR128X:$src), (v4i32 VR128X:$src0),
8208                          VK2WM:$mask),
8209            (VCVTTPD2DQZ128rrk VR128X:$src0, VK2WM:$mask, VR128X:$src)>;
8210  def : Pat<(X86mcvttp2si (v2f64 VR128X:$src), v4i32x_info.ImmAllZerosV,
8211                          VK2WM:$mask),
8212            (VCVTTPD2DQZ128rrkz VK2WM:$mask, VR128X:$src)>;
8213
8214  def : Pat<(v4i32 (X86cvttp2si (loadv2f64 addr:$src))),
8215            (VCVTTPD2DQZ128rm addr:$src)>;
8216  def : Pat<(X86mcvttp2si (loadv2f64 addr:$src), (v4i32 VR128X:$src0),
8217                          VK2WM:$mask),
8218            (VCVTTPD2DQZ128rmk VR128X:$src0, VK2WM:$mask, addr:$src)>;
8219  def : Pat<(X86mcvttp2si (loadv2f64 addr:$src), v4i32x_info.ImmAllZerosV,
8220                          VK2WM:$mask),
8221            (VCVTTPD2DQZ128rmkz VK2WM:$mask, addr:$src)>;
8222
8223  def : Pat<(v4i32 (X86cvttp2si (v2f64 (X86VBroadcastld64 addr:$src)))),
8224            (VCVTTPD2DQZ128rmb addr:$src)>;
8225  def : Pat<(X86mcvttp2si (v2f64 (X86VBroadcastld64 addr:$src)),
8226                          (v4i32 VR128X:$src0), VK2WM:$mask),
8227            (VCVTTPD2DQZ128rmbk VR128X:$src0, VK2WM:$mask, addr:$src)>;
8228  def : Pat<(X86mcvttp2si (v2f64 (X86VBroadcastld64 addr:$src)),
8229                          v4i32x_info.ImmAllZerosV, VK2WM:$mask),
8230            (VCVTTPD2DQZ128rmbkz VK2WM:$mask, addr:$src)>;
8231
8232  // Special patterns to allow use of X86mcvtp2UInt for masking. Instruction
8233  // patterns have been disabled with null_frag.
8234  def : Pat<(v4i32 (X86cvtp2UInt (v2f64 VR128X:$src))),
8235            (VCVTPD2UDQZ128rr VR128X:$src)>;
8236  def : Pat<(X86mcvtp2UInt (v2f64 VR128X:$src), (v4i32 VR128X:$src0),
8237                           VK2WM:$mask),
8238            (VCVTPD2UDQZ128rrk VR128X:$src0, VK2WM:$mask, VR128X:$src)>;
8239  def : Pat<(X86mcvtp2UInt (v2f64 VR128X:$src), v4i32x_info.ImmAllZerosV,
8240                           VK2WM:$mask),
8241            (VCVTPD2UDQZ128rrkz VK2WM:$mask, VR128X:$src)>;
8242
8243  def : Pat<(v4i32 (X86cvtp2UInt (loadv2f64 addr:$src))),
8244            (VCVTPD2UDQZ128rm addr:$src)>;
8245  def : Pat<(X86mcvtp2UInt (loadv2f64 addr:$src), (v4i32 VR128X:$src0),
8246                           VK2WM:$mask),
8247            (VCVTPD2UDQZ128rmk VR128X:$src0, VK2WM:$mask, addr:$src)>;
8248  def : Pat<(X86mcvtp2UInt (loadv2f64 addr:$src), v4i32x_info.ImmAllZerosV,
8249                           VK2WM:$mask),
8250            (VCVTPD2UDQZ128rmkz VK2WM:$mask, addr:$src)>;
8251
8252  def : Pat<(v4i32 (X86cvtp2UInt (v2f64 (X86VBroadcastld64 addr:$src)))),
8253            (VCVTPD2UDQZ128rmb addr:$src)>;
8254  def : Pat<(X86mcvtp2UInt (v2f64 (X86VBroadcastld64 addr:$src)),
8255                           (v4i32 VR128X:$src0), VK2WM:$mask),
8256            (VCVTPD2UDQZ128rmbk VR128X:$src0, VK2WM:$mask, addr:$src)>;
8257  def : Pat<(X86mcvtp2UInt (v2f64 (X86VBroadcastld64 addr:$src)),
8258                           v4i32x_info.ImmAllZerosV, VK2WM:$mask),
8259            (VCVTPD2UDQZ128rmbkz VK2WM:$mask, addr:$src)>;
8260
8261  // Special patterns to allow use of X86mcvtp2UInt for masking. Instruction
8262  // patterns have been disabled with null_frag.
8263  def : Pat<(v4i32 (X86cvttp2ui (v2f64 VR128X:$src))),
8264            (VCVTTPD2UDQZ128rr VR128X:$src)>;
8265  def : Pat<(X86mcvttp2ui (v2f64 VR128X:$src), (v4i32 VR128X:$src0),
8266                          VK2WM:$mask),
8267            (VCVTTPD2UDQZ128rrk VR128X:$src0, VK2WM:$mask, VR128X:$src)>;
8268  def : Pat<(X86mcvttp2ui (v2f64 VR128X:$src), v4i32x_info.ImmAllZerosV,
8269                          VK2WM:$mask),
8270            (VCVTTPD2UDQZ128rrkz VK2WM:$mask, VR128X:$src)>;
8271
8272  def : Pat<(v4i32 (X86cvttp2ui (loadv2f64 addr:$src))),
8273            (VCVTTPD2UDQZ128rm addr:$src)>;
8274  def : Pat<(X86mcvttp2ui (loadv2f64 addr:$src), (v4i32 VR128X:$src0),
8275                          VK2WM:$mask),
8276            (VCVTTPD2UDQZ128rmk VR128X:$src0, VK2WM:$mask, addr:$src)>;
8277  def : Pat<(X86mcvttp2ui (loadv2f64 addr:$src), v4i32x_info.ImmAllZerosV,
8278                          VK2WM:$mask),
8279            (VCVTTPD2UDQZ128rmkz VK2WM:$mask, addr:$src)>;
8280
8281  def : Pat<(v4i32 (X86cvttp2ui (v2f64 (X86VBroadcastld64 addr:$src)))),
8282            (VCVTTPD2UDQZ128rmb addr:$src)>;
8283  def : Pat<(X86mcvttp2ui (v2f64 (X86VBroadcastld64 addr:$src)),
8284                          (v4i32 VR128X:$src0), VK2WM:$mask),
8285            (VCVTTPD2UDQZ128rmbk VR128X:$src0, VK2WM:$mask, addr:$src)>;
8286  def : Pat<(X86mcvttp2ui (v2f64 (X86VBroadcastld64 addr:$src)),
8287                          v4i32x_info.ImmAllZerosV, VK2WM:$mask),
8288            (VCVTTPD2UDQZ128rmbkz VK2WM:$mask, addr:$src)>;
8289}
8290
8291let Predicates = [HasDQI, HasVLX] in {
8292  def : Pat<(v2i64 (X86cvtp2Int (bc_v4f32 (v2f64 (X86vzload64 addr:$src))))),
8293            (VCVTPS2QQZ128rm addr:$src)>;
8294  def : Pat<(v2i64 (vselect VK2WM:$mask,
8295                            (X86cvtp2Int (bc_v4f32 (v2f64 (X86vzload64 addr:$src)))),
8296                            VR128X:$src0)),
8297            (VCVTPS2QQZ128rmk VR128X:$src0, VK2WM:$mask, addr:$src)>;
8298  def : Pat<(v2i64 (vselect VK2WM:$mask,
8299                            (X86cvtp2Int (bc_v4f32 (v2f64 (X86vzload64 addr:$src)))),
8300                            v2i64x_info.ImmAllZerosV)),
8301            (VCVTPS2QQZ128rmkz VK2WM:$mask, addr:$src)>;
8302
8303  def : Pat<(v2i64 (X86cvtp2UInt (bc_v4f32 (v2f64 (X86vzload64 addr:$src))))),
8304            (VCVTPS2UQQZ128rm addr:$src)>;
8305  def : Pat<(v2i64 (vselect VK2WM:$mask,
8306                            (X86cvtp2UInt (bc_v4f32 (v2f64 (X86vzload64 addr:$src)))),
8307                            VR128X:$src0)),
8308            (VCVTPS2UQQZ128rmk VR128X:$src0, VK2WM:$mask, addr:$src)>;
8309  def : Pat<(v2i64 (vselect VK2WM:$mask,
8310                            (X86cvtp2UInt (bc_v4f32 (v2f64 (X86vzload64 addr:$src)))),
8311                            v2i64x_info.ImmAllZerosV)),
8312            (VCVTPS2UQQZ128rmkz VK2WM:$mask, addr:$src)>;
8313
8314  def : Pat<(v2i64 (X86cvttp2si (bc_v4f32 (v2f64 (X86vzload64 addr:$src))))),
8315            (VCVTTPS2QQZ128rm addr:$src)>;
8316  def : Pat<(v2i64 (vselect VK2WM:$mask,
8317                            (X86cvttp2si (bc_v4f32 (v2f64 (X86vzload64 addr:$src)))),
8318                            VR128X:$src0)),
8319            (VCVTTPS2QQZ128rmk VR128X:$src0, VK2WM:$mask, addr:$src)>;
8320  def : Pat<(v2i64 (vselect VK2WM:$mask,
8321                            (X86cvttp2si (bc_v4f32 (v2f64 (X86vzload64 addr:$src)))),
8322                            v2i64x_info.ImmAllZerosV)),
8323            (VCVTTPS2QQZ128rmkz VK2WM:$mask, addr:$src)>;
8324
8325  def : Pat<(v2i64 (X86cvttp2ui (bc_v4f32 (v2f64 (X86vzload64 addr:$src))))),
8326            (VCVTTPS2UQQZ128rm addr:$src)>;
8327  def : Pat<(v2i64 (vselect VK2WM:$mask,
8328                            (X86cvttp2ui (bc_v4f32 (v2f64 (X86vzload64 addr:$src)))),
8329                            VR128X:$src0)),
8330            (VCVTTPS2UQQZ128rmk VR128X:$src0, VK2WM:$mask, addr:$src)>;
8331  def : Pat<(v2i64 (vselect VK2WM:$mask,
8332                            (X86cvttp2ui (bc_v4f32 (v2f64 (X86vzload64 addr:$src)))),
8333                            v2i64x_info.ImmAllZerosV)),
8334            (VCVTTPS2UQQZ128rmkz VK2WM:$mask, addr:$src)>;
8335}
8336
8337let Predicates = [HasAVX512, NoVLX] in {
8338def : Pat<(v8i32 (X86cvttp2ui (v8f32 VR256X:$src1))),
8339          (EXTRACT_SUBREG (v16i32 (VCVTTPS2UDQZrr
8340           (v16f32 (INSERT_SUBREG (IMPLICIT_DEF),
8341                                  VR256X:$src1, sub_ymm)))), sub_ymm)>;
8342
8343def : Pat<(v4i32 (X86cvttp2ui (v4f32 VR128X:$src1))),
8344          (EXTRACT_SUBREG (v16i32 (VCVTTPS2UDQZrr
8345           (v16f32 (INSERT_SUBREG (IMPLICIT_DEF),
8346                                  VR128X:$src1, sub_xmm)))), sub_xmm)>;
8347
8348def : Pat<(v4i32 (X86cvttp2ui (v4f64 VR256X:$src1))),
8349          (EXTRACT_SUBREG (v8i32 (VCVTTPD2UDQZrr
8350           (v8f64 (INSERT_SUBREG (IMPLICIT_DEF),
8351                                 VR256X:$src1, sub_ymm)))), sub_xmm)>;
8352
8353def : Pat<(v8f32 (uint_to_fp (v8i32 VR256X:$src1))),
8354          (EXTRACT_SUBREG (v16f32 (VCVTUDQ2PSZrr
8355           (v16i32 (INSERT_SUBREG (IMPLICIT_DEF),
8356                                  VR256X:$src1, sub_ymm)))), sub_ymm)>;
8357
8358def : Pat<(v4f32 (uint_to_fp (v4i32 VR128X:$src1))),
8359          (EXTRACT_SUBREG (v16f32 (VCVTUDQ2PSZrr
8360           (v16i32 (INSERT_SUBREG (IMPLICIT_DEF),
8361                                  VR128X:$src1, sub_xmm)))), sub_xmm)>;
8362
8363def : Pat<(v4f64 (uint_to_fp (v4i32 VR128X:$src1))),
8364          (EXTRACT_SUBREG (v8f64 (VCVTUDQ2PDZrr
8365           (v8i32 (INSERT_SUBREG (IMPLICIT_DEF),
8366                                 VR128X:$src1, sub_xmm)))), sub_ymm)>;
8367
8368def : Pat<(v2f64 (X86VUintToFP (v4i32 VR128X:$src1))),
8369          (EXTRACT_SUBREG (v8f64 (VCVTUDQ2PDZrr
8370           (v8i32 (INSERT_SUBREG (IMPLICIT_DEF),
8371                                 VR128X:$src1, sub_xmm)))), sub_xmm)>;
8372}
8373
8374let Predicates = [HasVLX] in {
8375  def : Pat<(v2f64 (X86VSintToFP (bc_v4i32 (v2i64 (X86vzload64 addr:$src))))),
8376            (VCVTDQ2PDZ128rm addr:$src)>;
8377  def : Pat<(v2f64 (vselect VK2WM:$mask,
8378                            (X86VSintToFP (bc_v4i32 (v2i64 (X86vzload64 addr:$src)))),
8379                            VR128X:$src0)),
8380            (VCVTDQ2PDZ128rmk VR128X:$src0, VK2WM:$mask, addr:$src)>;
8381  def : Pat<(v2f64 (vselect VK2WM:$mask,
8382                            (X86VSintToFP (bc_v4i32 (v2i64 (X86vzload64 addr:$src)))),
8383                            v2f64x_info.ImmAllZerosV)),
8384            (VCVTDQ2PDZ128rmkz VK2WM:$mask, addr:$src)>;
8385
8386  def : Pat<(v2f64 (X86VUintToFP (bc_v4i32 (v2i64 (X86vzload64 addr:$src))))),
8387            (VCVTUDQ2PDZ128rm addr:$src)>;
8388  def : Pat<(v2f64 (vselect VK2WM:$mask,
8389                            (X86VUintToFP (bc_v4i32 (v2i64 (X86vzload64 addr:$src)))),
8390                            VR128X:$src0)),
8391            (VCVTUDQ2PDZ128rmk VR128X:$src0, VK2WM:$mask, addr:$src)>;
8392  def : Pat<(v2f64 (vselect VK2WM:$mask,
8393                            (X86VUintToFP (bc_v4i32 (v2i64 (X86vzload64 addr:$src)))),
8394                            v2f64x_info.ImmAllZerosV)),
8395            (VCVTUDQ2PDZ128rmkz VK2WM:$mask, addr:$src)>;
8396}
8397
8398let Predicates = [HasDQI, HasVLX] in {
8399  // Special patterns to allow use of X86VMSintToFP for masking. Instruction
8400  // patterns have been disabled with null_frag.
8401  def : Pat<(v4f32 (X86VSintToFP (v2i64 VR128X:$src))),
8402            (VCVTQQ2PSZ128rr VR128X:$src)>;
8403  def : Pat<(X86VMSintToFP (v2i64 VR128X:$src), (v4f32 VR128X:$src0),
8404                           VK2WM:$mask),
8405            (VCVTQQ2PSZ128rrk VR128X:$src0, VK2WM:$mask, VR128X:$src)>;
8406  def : Pat<(X86VMSintToFP (v2i64 VR128X:$src), v4f32x_info.ImmAllZerosV,
8407                           VK2WM:$mask),
8408            (VCVTQQ2PSZ128rrkz VK2WM:$mask, VR128X:$src)>;
8409
8410  def : Pat<(v4f32 (X86VSintToFP (loadv2i64 addr:$src))),
8411            (VCVTQQ2PSZ128rm addr:$src)>;
8412  def : Pat<(X86VMSintToFP (loadv2i64 addr:$src), (v4f32 VR128X:$src0),
8413                           VK2WM:$mask),
8414            (VCVTQQ2PSZ128rmk VR128X:$src0, VK2WM:$mask, addr:$src)>;
8415  def : Pat<(X86VMSintToFP (loadv2i64 addr:$src), v4f32x_info.ImmAllZerosV,
8416                           VK2WM:$mask),
8417            (VCVTQQ2PSZ128rmkz VK2WM:$mask, addr:$src)>;
8418
8419  def : Pat<(v4f32 (X86VSintToFP (v2i64 (X86VBroadcastld64 addr:$src)))),
8420            (VCVTQQ2PSZ128rmb addr:$src)>;
8421  def : Pat<(X86VMSintToFP (v2i64 (X86VBroadcastld64 addr:$src)),
8422                           (v4f32 VR128X:$src0), VK2WM:$mask),
8423            (VCVTQQ2PSZ128rmbk VR128X:$src0, VK2WM:$mask, addr:$src)>;
8424  def : Pat<(X86VMSintToFP (v2i64 (X86VBroadcastld64 addr:$src)),
8425                           v4f32x_info.ImmAllZerosV, VK2WM:$mask),
8426            (VCVTQQ2PSZ128rmbkz VK2WM:$mask, addr:$src)>;
8427
8428  // Special patterns to allow use of X86VMUintToFP for masking. Instruction
8429  // patterns have been disabled with null_frag.
8430  def : Pat<(v4f32 (X86VUintToFP (v2i64 VR128X:$src))),
8431            (VCVTUQQ2PSZ128rr VR128X:$src)>;
8432  def : Pat<(X86VMUintToFP (v2i64 VR128X:$src), (v4f32 VR128X:$src0),
8433                           VK2WM:$mask),
8434            (VCVTUQQ2PSZ128rrk VR128X:$src0, VK2WM:$mask, VR128X:$src)>;
8435  def : Pat<(X86VMUintToFP (v2i64 VR128X:$src), v4f32x_info.ImmAllZerosV,
8436                           VK2WM:$mask),
8437            (VCVTUQQ2PSZ128rrkz VK2WM:$mask, VR128X:$src)>;
8438
8439  def : Pat<(v4f32 (X86VUintToFP (loadv2i64 addr:$src))),
8440            (VCVTUQQ2PSZ128rm addr:$src)>;
8441  def : Pat<(X86VMUintToFP (loadv2i64 addr:$src), (v4f32 VR128X:$src0),
8442                           VK2WM:$mask),
8443            (VCVTUQQ2PSZ128rmk VR128X:$src0, VK2WM:$mask, addr:$src)>;
8444  def : Pat<(X86VMUintToFP (loadv2i64 addr:$src), v4f32x_info.ImmAllZerosV,
8445                           VK2WM:$mask),
8446            (VCVTUQQ2PSZ128rmkz VK2WM:$mask, addr:$src)>;
8447
8448  def : Pat<(v4f32 (X86VUintToFP (v2i64 (X86VBroadcastld64 addr:$src)))),
8449            (VCVTUQQ2PSZ128rmb addr:$src)>;
8450  def : Pat<(X86VMUintToFP (v2i64 (X86VBroadcastld64 addr:$src)),
8451                           (v4f32 VR128X:$src0), VK2WM:$mask),
8452            (VCVTUQQ2PSZ128rmbk VR128X:$src0, VK2WM:$mask, addr:$src)>;
8453  def : Pat<(X86VMUintToFP (v2i64 (X86VBroadcastld64 addr:$src)),
8454                           v4f32x_info.ImmAllZerosV, VK2WM:$mask),
8455            (VCVTUQQ2PSZ128rmbkz VK2WM:$mask, addr:$src)>;
8456}
8457
8458let Predicates = [HasDQI, NoVLX] in {
8459def : Pat<(v2i64 (X86cvttp2si (v2f64 VR128X:$src1))),
8460          (EXTRACT_SUBREG (v8i64 (VCVTTPD2QQZrr
8461           (v8f64 (INSERT_SUBREG (IMPLICIT_DEF),
8462                                  VR128X:$src1, sub_xmm)))), sub_xmm)>;
8463
8464def : Pat<(v4i64 (X86cvttp2si (v4f32 VR128X:$src1))),
8465          (EXTRACT_SUBREG (v8i64 (VCVTTPS2QQZrr
8466           (v8f32 (INSERT_SUBREG (IMPLICIT_DEF),
8467                                  VR128X:$src1, sub_xmm)))), sub_ymm)>;
8468
8469def : Pat<(v4i64 (X86cvttp2si (v4f64 VR256X:$src1))),
8470          (EXTRACT_SUBREG (v8i64 (VCVTTPD2QQZrr
8471           (v8f64 (INSERT_SUBREG (IMPLICIT_DEF),
8472                                  VR256X:$src1, sub_ymm)))), sub_ymm)>;
8473
8474def : Pat<(v2i64 (X86cvttp2ui (v2f64 VR128X:$src1))),
8475          (EXTRACT_SUBREG (v8i64 (VCVTTPD2UQQZrr
8476           (v8f64 (INSERT_SUBREG (IMPLICIT_DEF),
8477                                  VR128X:$src1, sub_xmm)))), sub_xmm)>;
8478
8479def : Pat<(v4i64 (X86cvttp2ui (v4f32 VR128X:$src1))),
8480          (EXTRACT_SUBREG (v8i64 (VCVTTPS2UQQZrr
8481           (v8f32 (INSERT_SUBREG (IMPLICIT_DEF),
8482                                  VR128X:$src1, sub_xmm)))), sub_ymm)>;
8483
8484def : Pat<(v4i64 (X86cvttp2ui (v4f64 VR256X:$src1))),
8485          (EXTRACT_SUBREG (v8i64 (VCVTTPD2UQQZrr
8486           (v8f64 (INSERT_SUBREG (IMPLICIT_DEF),
8487                                  VR256X:$src1, sub_ymm)))), sub_ymm)>;
8488
8489def : Pat<(v4f32 (sint_to_fp (v4i64 VR256X:$src1))),
8490          (EXTRACT_SUBREG (v8f32 (VCVTQQ2PSZrr
8491           (v8i64 (INSERT_SUBREG (IMPLICIT_DEF),
8492                                  VR256X:$src1, sub_ymm)))), sub_xmm)>;
8493
8494def : Pat<(v2f64 (sint_to_fp (v2i64 VR128X:$src1))),
8495          (EXTRACT_SUBREG (v8f64 (VCVTQQ2PDZrr
8496           (v8i64 (INSERT_SUBREG (IMPLICIT_DEF),
8497                                  VR128X:$src1, sub_xmm)))), sub_xmm)>;
8498
8499def : Pat<(v4f64 (sint_to_fp (v4i64 VR256X:$src1))),
8500          (EXTRACT_SUBREG (v8f64 (VCVTQQ2PDZrr
8501           (v8i64 (INSERT_SUBREG (IMPLICIT_DEF),
8502                                  VR256X:$src1, sub_ymm)))), sub_ymm)>;
8503
8504def : Pat<(v4f32 (uint_to_fp (v4i64 VR256X:$src1))),
8505          (EXTRACT_SUBREG (v8f32 (VCVTUQQ2PSZrr
8506           (v8i64 (INSERT_SUBREG (IMPLICIT_DEF),
8507                                  VR256X:$src1, sub_ymm)))), sub_xmm)>;
8508
8509def : Pat<(v2f64 (uint_to_fp (v2i64 VR128X:$src1))),
8510          (EXTRACT_SUBREG (v8f64 (VCVTUQQ2PDZrr
8511           (v8i64 (INSERT_SUBREG (IMPLICIT_DEF),
8512                                  VR128X:$src1, sub_xmm)))), sub_xmm)>;
8513
8514def : Pat<(v4f64 (uint_to_fp (v4i64 VR256X:$src1))),
8515          (EXTRACT_SUBREG (v8f64 (VCVTUQQ2PDZrr
8516           (v8i64 (INSERT_SUBREG (IMPLICIT_DEF),
8517                                  VR256X:$src1, sub_ymm)))), sub_ymm)>;
8518}
8519
8520//===----------------------------------------------------------------------===//
8521// Half precision conversion instructions
8522//===----------------------------------------------------------------------===//
8523
8524multiclass avx512_cvtph2ps<X86VectorVTInfo _dest, X86VectorVTInfo _src,
8525                           X86MemOperand x86memop, PatFrag ld_frag,
8526                           X86FoldableSchedWrite sched> {
8527  defm rr : AVX512_maskable<0x13, MRMSrcReg, _dest ,(outs _dest.RC:$dst),
8528                            (ins _src.RC:$src), "vcvtph2ps", "$src", "$src",
8529                            (X86cvtph2ps (_src.VT _src.RC:$src))>,
8530                            T8PD, Sched<[sched]>;
8531  defm rm : AVX512_maskable<0x13, MRMSrcMem, _dest, (outs _dest.RC:$dst),
8532                            (ins x86memop:$src), "vcvtph2ps", "$src", "$src",
8533                            (X86cvtph2ps (_src.VT
8534                                          (ld_frag addr:$src)))>,
8535                            T8PD, Sched<[sched.Folded]>;
8536}
8537
8538multiclass avx512_cvtph2ps_sae<X86VectorVTInfo _dest, X86VectorVTInfo _src,
8539                               X86FoldableSchedWrite sched> {
8540  defm rrb : AVX512_maskable<0x13, MRMSrcReg, _dest, (outs _dest.RC:$dst),
8541                             (ins _src.RC:$src), "vcvtph2ps",
8542                             "{sae}, $src", "$src, {sae}",
8543                             (X86cvtph2psSAE (_src.VT _src.RC:$src))>,
8544                             T8PD, EVEX_B, Sched<[sched]>;
8545}
8546
8547let Predicates = [HasAVX512] in
8548  defm VCVTPH2PSZ : avx512_cvtph2ps<v16f32_info, v16i16x_info, f256mem, load,
8549                                    WriteCvtPH2PSZ>,
8550                    avx512_cvtph2ps_sae<v16f32_info, v16i16x_info, WriteCvtPH2PSZ>,
8551                    EVEX, EVEX_V512, EVEX_CD8<32, CD8VH>;
8552
8553let Predicates = [HasVLX] in {
8554  defm VCVTPH2PSZ256 : avx512_cvtph2ps<v8f32x_info, v8i16x_info, f128mem,
8555                       load, WriteCvtPH2PSY>, EVEX, EVEX_V256,
8556                       EVEX_CD8<32, CD8VH>;
8557  defm VCVTPH2PSZ128 : avx512_cvtph2ps<v4f32x_info, v8i16x_info, f64mem,
8558                       load, WriteCvtPH2PS>, EVEX, EVEX_V128,
8559                       EVEX_CD8<32, CD8VH>;
8560
8561  // Pattern match vcvtph2ps of a scalar i64 load.
8562  def : Pat<(v4f32 (X86cvtph2ps (bc_v8i16 (v2i64 (X86vzload64 addr:$src))))),
8563            (VCVTPH2PSZ128rm addr:$src)>;
8564  def : Pat<(v4f32 (X86cvtph2ps (v8i16 (bitconvert
8565              (v2i64 (scalar_to_vector (loadi64 addr:$src))))))),
8566            (VCVTPH2PSZ128rm addr:$src)>;
8567}
8568
8569multiclass avx512_cvtps2ph<X86VectorVTInfo _dest, X86VectorVTInfo _src,
8570                           X86MemOperand x86memop, SchedWrite RR, SchedWrite MR> {
8571let ExeDomain = GenericDomain in {
8572  def rr : AVX512AIi8<0x1D, MRMDestReg, (outs _dest.RC:$dst),
8573             (ins _src.RC:$src1, i32u8imm:$src2),
8574             "vcvtps2ph\t{$src2, $src1, $dst|$dst, $src1, $src2}",
8575             [(set _dest.RC:$dst,
8576                   (X86cvtps2ph (_src.VT _src.RC:$src1), (i32 timm:$src2)))]>,
8577             Sched<[RR]>;
8578  let Constraints = "$src0 = $dst" in
8579  def rrk : AVX512AIi8<0x1D, MRMDestReg, (outs _dest.RC:$dst),
8580             (ins _dest.RC:$src0, _src.KRCWM:$mask, _src.RC:$src1, i32u8imm:$src2),
8581             "vcvtps2ph\t{$src2, $src1, $dst {${mask}}|$dst {${mask}}, $src1, $src2}",
8582             [(set _dest.RC:$dst,
8583                   (X86mcvtps2ph (_src.VT _src.RC:$src1), (i32 timm:$src2),
8584                                 _dest.RC:$src0, _src.KRCWM:$mask))]>,
8585             Sched<[RR]>, EVEX_K;
8586  def rrkz : AVX512AIi8<0x1D, MRMDestReg, (outs _dest.RC:$dst),
8587             (ins _src.KRCWM:$mask, _src.RC:$src1, i32u8imm:$src2),
8588             "vcvtps2ph\t{$src2, $src1, $dst {${mask}} {z}|$dst {${mask}} {z}, $src1, $src2}",
8589             [(set _dest.RC:$dst,
8590                   (X86mcvtps2ph (_src.VT _src.RC:$src1), (i32 timm:$src2),
8591                                 _dest.ImmAllZerosV, _src.KRCWM:$mask))]>,
8592             Sched<[RR]>, EVEX_KZ;
8593  let hasSideEffects = 0, mayStore = 1 in {
8594    def mr : AVX512AIi8<0x1D, MRMDestMem, (outs),
8595               (ins x86memop:$dst, _src.RC:$src1, i32u8imm:$src2),
8596               "vcvtps2ph\t{$src2, $src1, $dst|$dst, $src1, $src2}", []>,
8597               Sched<[MR]>;
8598    def mrk : AVX512AIi8<0x1D, MRMDestMem, (outs),
8599               (ins x86memop:$dst, _dest.KRCWM:$mask, _src.RC:$src1, i32u8imm:$src2),
8600               "vcvtps2ph\t{$src2, $src1, $dst {${mask}}|$dst {${mask}}, $src1, $src2}", []>,
8601                EVEX_K, Sched<[MR]>, NotMemoryFoldable;
8602  }
8603}
8604}
8605
8606multiclass avx512_cvtps2ph_sae<X86VectorVTInfo _dest, X86VectorVTInfo _src,
8607                               SchedWrite Sched> {
8608  let hasSideEffects = 0 in
8609  defm rrb : AVX512_maskable_in_asm<0x1D, MRMDestReg, _dest,
8610                   (outs _dest.RC:$dst),
8611                   (ins _src.RC:$src1, i32u8imm:$src2),
8612                   "vcvtps2ph", "$src2, {sae}, $src1", "$src1, {sae}, $src2", []>,
8613                   EVEX_B, AVX512AIi8Base, Sched<[Sched]>;
8614}
8615
8616let Predicates = [HasAVX512] in {
8617  defm VCVTPS2PHZ : avx512_cvtps2ph<v16i16x_info, v16f32_info, f256mem,
8618                                    WriteCvtPS2PHZ, WriteCvtPS2PHZSt>,
8619                    avx512_cvtps2ph_sae<v16i16x_info, v16f32_info, WriteCvtPS2PHZ>,
8620                                        EVEX, EVEX_V512, EVEX_CD8<32, CD8VH>;
8621  let Predicates = [HasVLX] in {
8622    defm VCVTPS2PHZ256 : avx512_cvtps2ph<v8i16x_info, v8f32x_info, f128mem,
8623                                         WriteCvtPS2PHY, WriteCvtPS2PHYSt>,
8624                                         EVEX, EVEX_V256, EVEX_CD8<32, CD8VH>;
8625    defm VCVTPS2PHZ128 : avx512_cvtps2ph<v8i16x_info, v4f32x_info, f64mem,
8626                                         WriteCvtPS2PH, WriteCvtPS2PHSt>,
8627                                         EVEX, EVEX_V128, EVEX_CD8<32, CD8VH>;
8628  }
8629
8630  def : Pat<(store (f64 (extractelt
8631                         (bc_v2f64 (v8i16 (X86cvtps2ph VR128X:$src1, timm:$src2))),
8632                         (iPTR 0))), addr:$dst),
8633            (VCVTPS2PHZ128mr addr:$dst, VR128X:$src1, timm:$src2)>;
8634  def : Pat<(store (i64 (extractelt
8635                         (bc_v2i64 (v8i16 (X86cvtps2ph VR128X:$src1, timm:$src2))),
8636                         (iPTR 0))), addr:$dst),
8637            (VCVTPS2PHZ128mr addr:$dst, VR128X:$src1, timm:$src2)>;
8638  def : Pat<(store (v8i16 (X86cvtps2ph VR256X:$src1, timm:$src2)), addr:$dst),
8639            (VCVTPS2PHZ256mr addr:$dst, VR256X:$src1, timm:$src2)>;
8640  def : Pat<(store (v16i16 (X86cvtps2ph VR512:$src1, timm:$src2)), addr:$dst),
8641            (VCVTPS2PHZmr addr:$dst, VR512:$src1, timm:$src2)>;
8642}
8643
8644// Patterns for matching conversions from float to half-float and vice versa.
8645let Predicates = [HasVLX] in {
8646  // Use MXCSR.RC for rounding instead of explicitly specifying the default
8647  // rounding mode (Nearest-Even, encoded as 0). Both are equivalent in the
8648  // configurations we support (the default). However, falling back to MXCSR is
8649  // more consistent with other instructions, which are always controlled by it.
8650  // It's encoded as 0b100.
8651  def : Pat<(fp_to_f16 FR32X:$src),
8652            (i16 (EXTRACT_SUBREG (VMOVPDI2DIZrr (v8i16 (VCVTPS2PHZ128rr
8653              (v4f32 (COPY_TO_REGCLASS FR32X:$src, VR128X)), 4))), sub_16bit))>;
8654
8655  def : Pat<(f16_to_fp GR16:$src),
8656            (f32 (COPY_TO_REGCLASS (v4f32 (VCVTPH2PSZ128rr
8657              (v8i16 (COPY_TO_REGCLASS (MOVSX32rr16 GR16:$src), VR128X)))), FR32X)) >;
8658
8659  def : Pat<(f16_to_fp (i16 (fp_to_f16 FR32X:$src))),
8660            (f32 (COPY_TO_REGCLASS (v4f32 (VCVTPH2PSZ128rr
8661              (v8i16 (VCVTPS2PHZ128rr
8662               (v4f32 (COPY_TO_REGCLASS FR32X:$src, VR128X)), 4)))), FR32X)) >;
8663}
8664
8665//  Unordered/Ordered scalar fp compare with Sae and set EFLAGS
8666multiclass avx512_ord_cmp_sae<bits<8> opc, X86VectorVTInfo _,
8667                            string OpcodeStr, X86FoldableSchedWrite sched> {
8668  let hasSideEffects = 0 in
8669  def rrb: AVX512<opc, MRMSrcReg, (outs), (ins _.RC:$src1, _.RC:$src2),
8670                  !strconcat(OpcodeStr, "\t{{sae}, $src2, $src1|$src1, $src2, {sae}}"), []>,
8671                  EVEX, EVEX_B, VEX_LIG, EVEX_V128, Sched<[sched]>;
8672}
8673
8674let Defs = [EFLAGS], Predicates = [HasAVX512] in {
8675  defm VUCOMISSZ : avx512_ord_cmp_sae<0x2E, v4f32x_info, "vucomiss", WriteFCom>,
8676                                   AVX512PSIi8Base, EVEX_CD8<32, CD8VT1>;
8677  defm VUCOMISDZ : avx512_ord_cmp_sae<0x2E, v2f64x_info, "vucomisd", WriteFCom>,
8678                                   AVX512PDIi8Base, VEX_W, EVEX_CD8<64, CD8VT1>;
8679  defm VCOMISSZ : avx512_ord_cmp_sae<0x2F, v4f32x_info, "vcomiss", WriteFCom>,
8680                                   AVX512PSIi8Base, EVEX_CD8<32, CD8VT1>;
8681  defm VCOMISDZ : avx512_ord_cmp_sae<0x2F, v2f64x_info, "vcomisd", WriteFCom>,
8682                                   AVX512PDIi8Base, VEX_W, EVEX_CD8<64, CD8VT1>;
8683}
8684
8685let Defs = [EFLAGS], Predicates = [HasAVX512] in {
8686  defm VUCOMISSZ : sse12_ord_cmp<0x2E, FR32X, X86cmp, f32, f32mem, loadf32,
8687                                 "ucomiss", WriteFCom>, PS, EVEX, VEX_LIG,
8688                                 EVEX_CD8<32, CD8VT1>;
8689  defm VUCOMISDZ : sse12_ord_cmp<0x2E, FR64X, X86cmp, f64, f64mem, loadf64,
8690                                  "ucomisd", WriteFCom>, PD, EVEX,
8691                                  VEX_LIG, VEX_W, EVEX_CD8<64, CD8VT1>;
8692  let Pattern = []<dag> in {
8693    defm VCOMISSZ  : sse12_ord_cmp<0x2F, FR32X, undef, f32, f32mem, loadf32,
8694                                   "comiss", WriteFCom>, PS, EVEX, VEX_LIG,
8695                                   EVEX_CD8<32, CD8VT1>;
8696    defm VCOMISDZ  : sse12_ord_cmp<0x2F, FR64X, undef, f64, f64mem, loadf64,
8697                                   "comisd", WriteFCom>, PD, EVEX,
8698                                    VEX_LIG, VEX_W, EVEX_CD8<64, CD8VT1>;
8699  }
8700  let isCodeGenOnly = 1 in {
8701    defm VUCOMISSZ  : sse12_ord_cmp_int<0x2E, VR128X, X86ucomi, v4f32, ssmem,
8702                          sse_load_f32, "ucomiss", WriteFCom>, PS, EVEX, VEX_LIG,
8703                          EVEX_CD8<32, CD8VT1>;
8704    defm VUCOMISDZ  : sse12_ord_cmp_int<0x2E, VR128X, X86ucomi, v2f64, sdmem,
8705                          sse_load_f64, "ucomisd", WriteFCom>, PD, EVEX,
8706                          VEX_LIG, VEX_W, EVEX_CD8<64, CD8VT1>;
8707
8708    defm VCOMISSZ  : sse12_ord_cmp_int<0x2F, VR128X, X86comi, v4f32, ssmem,
8709                          sse_load_f32, "comiss", WriteFCom>, PS, EVEX, VEX_LIG,
8710                          EVEX_CD8<32, CD8VT1>;
8711    defm VCOMISDZ  : sse12_ord_cmp_int<0x2F, VR128X, X86comi, v2f64, sdmem,
8712                          sse_load_f64, "comisd", WriteFCom>, PD, EVEX,
8713                          VEX_LIG, VEX_W, EVEX_CD8<64, CD8VT1>;
8714  }
8715}
8716
8717/// avx512_fp14_s rcp14ss, rcp14sd, rsqrt14ss, rsqrt14sd
8718multiclass avx512_fp14_s<bits<8> opc, string OpcodeStr, SDNode OpNode,
8719                         X86FoldableSchedWrite sched, X86VectorVTInfo _> {
8720  let Predicates = [HasAVX512], ExeDomain = _.ExeDomain in {
8721  defm rr : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
8722                           (ins _.RC:$src1, _.RC:$src2), OpcodeStr,
8723                           "$src2, $src1", "$src1, $src2",
8724                           (OpNode (_.VT _.RC:$src1), (_.VT _.RC:$src2))>,
8725                           EVEX_4V, VEX_LIG, Sched<[sched]>;
8726  defm rm : AVX512_maskable_scalar<opc, MRMSrcMem, _, (outs _.RC:$dst),
8727                         (ins _.RC:$src1, _.IntScalarMemOp:$src2), OpcodeStr,
8728                         "$src2, $src1", "$src1, $src2",
8729                         (OpNode (_.VT _.RC:$src1),
8730                          _.ScalarIntMemCPat:$src2)>, EVEX_4V, VEX_LIG,
8731                          Sched<[sched.Folded, sched.ReadAfterFold]>;
8732}
8733}
8734
8735defm VRCP14SSZ : avx512_fp14_s<0x4D, "vrcp14ss", X86rcp14s, SchedWriteFRcp.Scl,
8736                               f32x_info>, EVEX_CD8<32, CD8VT1>,
8737                               T8PD;
8738defm VRCP14SDZ : avx512_fp14_s<0x4D, "vrcp14sd", X86rcp14s, SchedWriteFRcp.Scl,
8739                               f64x_info>, VEX_W, EVEX_CD8<64, CD8VT1>,
8740                               T8PD;
8741defm VRSQRT14SSZ : avx512_fp14_s<0x4F, "vrsqrt14ss", X86rsqrt14s,
8742                                 SchedWriteFRsqrt.Scl, f32x_info>,
8743                                 EVEX_CD8<32, CD8VT1>, T8PD;
8744defm VRSQRT14SDZ : avx512_fp14_s<0x4F, "vrsqrt14sd", X86rsqrt14s,
8745                                 SchedWriteFRsqrt.Scl, f64x_info>, VEX_W,
8746                                 EVEX_CD8<64, CD8VT1>, T8PD;
8747
8748/// avx512_fp14_p rcp14ps, rcp14pd, rsqrt14ps, rsqrt14pd
8749multiclass avx512_fp14_p<bits<8> opc, string OpcodeStr, SDNode OpNode,
8750                         X86FoldableSchedWrite sched, X86VectorVTInfo _> {
8751  let ExeDomain = _.ExeDomain in {
8752  defm r: AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
8753                         (ins _.RC:$src), OpcodeStr, "$src", "$src",
8754                         (_.VT (OpNode _.RC:$src))>, EVEX, T8PD,
8755                         Sched<[sched]>;
8756  defm m: AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
8757                         (ins _.MemOp:$src), OpcodeStr, "$src", "$src",
8758                         (OpNode (_.VT
8759                           (bitconvert (_.LdFrag addr:$src))))>, EVEX, T8PD,
8760                         Sched<[sched.Folded, sched.ReadAfterFold]>;
8761  defm mb: AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
8762                          (ins _.ScalarMemOp:$src), OpcodeStr,
8763                          "${src}"##_.BroadcastStr, "${src}"##_.BroadcastStr,
8764                          (OpNode (_.VT
8765                            (_.BroadcastLdFrag addr:$src)))>,
8766                          EVEX, T8PD, EVEX_B, Sched<[sched.Folded, sched.ReadAfterFold]>;
8767  }
8768}
8769
8770multiclass avx512_fp14_p_vl_all<bits<8> opc, string OpcodeStr, SDNode OpNode,
8771                                X86SchedWriteWidths sched> {
8772  defm PSZ : avx512_fp14_p<opc, !strconcat(OpcodeStr, "ps"), OpNode, sched.ZMM,
8773                           v16f32_info>, EVEX_V512, EVEX_CD8<32, CD8VF>;
8774  defm PDZ : avx512_fp14_p<opc, !strconcat(OpcodeStr, "pd"), OpNode, sched.ZMM,
8775                           v8f64_info>, EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>;
8776
8777  // Define only if AVX512VL feature is present.
8778  let Predicates = [HasVLX] in {
8779    defm PSZ128 : avx512_fp14_p<opc, !strconcat(OpcodeStr, "ps"),
8780                                OpNode, sched.XMM, v4f32x_info>,
8781                               EVEX_V128, EVEX_CD8<32, CD8VF>;
8782    defm PSZ256 : avx512_fp14_p<opc, !strconcat(OpcodeStr, "ps"),
8783                                OpNode, sched.YMM, v8f32x_info>,
8784                               EVEX_V256, EVEX_CD8<32, CD8VF>;
8785    defm PDZ128 : avx512_fp14_p<opc, !strconcat(OpcodeStr, "pd"),
8786                                OpNode, sched.XMM, v2f64x_info>,
8787                               EVEX_V128, VEX_W, EVEX_CD8<64, CD8VF>;
8788    defm PDZ256 : avx512_fp14_p<opc, !strconcat(OpcodeStr, "pd"),
8789                                OpNode, sched.YMM, v4f64x_info>,
8790                               EVEX_V256, VEX_W, EVEX_CD8<64, CD8VF>;
8791  }
8792}
8793
8794defm VRSQRT14 : avx512_fp14_p_vl_all<0x4E, "vrsqrt14", X86rsqrt14, SchedWriteFRsqrt>;
8795defm VRCP14 : avx512_fp14_p_vl_all<0x4C, "vrcp14", X86rcp14, SchedWriteFRcp>;
8796
8797/// avx512_fp28_s rcp28ss, rcp28sd, rsqrt28ss, rsqrt28sd
8798multiclass avx512_fp28_s<bits<8> opc, string OpcodeStr,X86VectorVTInfo _,
8799                         SDNode OpNode, SDNode OpNodeSAE,
8800                         X86FoldableSchedWrite sched> {
8801  let ExeDomain = _.ExeDomain in {
8802  defm r : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
8803                           (ins _.RC:$src1, _.RC:$src2), OpcodeStr,
8804                           "$src2, $src1", "$src1, $src2",
8805                           (OpNode (_.VT _.RC:$src1), (_.VT _.RC:$src2))>,
8806                           Sched<[sched]>;
8807
8808  defm rb : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
8809                            (ins _.RC:$src1, _.RC:$src2), OpcodeStr,
8810                            "{sae}, $src2, $src1", "$src1, $src2, {sae}",
8811                            (OpNodeSAE (_.VT _.RC:$src1), (_.VT _.RC:$src2))>,
8812                            EVEX_B, Sched<[sched]>;
8813
8814  defm m : AVX512_maskable_scalar<opc, MRMSrcMem, _, (outs _.RC:$dst),
8815                         (ins _.RC:$src1, _.IntScalarMemOp:$src2), OpcodeStr,
8816                         "$src2, $src1", "$src1, $src2",
8817                         (OpNode (_.VT _.RC:$src1), _.ScalarIntMemCPat:$src2)>,
8818                         Sched<[sched.Folded, sched.ReadAfterFold]>;
8819  }
8820}
8821
8822multiclass avx512_eri_s<bits<8> opc, string OpcodeStr, SDNode OpNode,
8823                        SDNode OpNodeSAE, X86FoldableSchedWrite sched> {
8824  defm SSZ : avx512_fp28_s<opc, OpcodeStr#"ss", f32x_info, OpNode, OpNodeSAE,
8825                           sched>, EVEX_CD8<32, CD8VT1>, VEX_LIG;
8826  defm SDZ : avx512_fp28_s<opc, OpcodeStr#"sd", f64x_info, OpNode, OpNodeSAE,
8827                           sched>, EVEX_CD8<64, CD8VT1>, VEX_LIG, VEX_W;
8828}
8829
8830let Predicates = [HasERI] in {
8831  defm VRCP28   : avx512_eri_s<0xCB, "vrcp28", X86rcp28s, X86rcp28SAEs,
8832                               SchedWriteFRcp.Scl>, T8PD, EVEX_4V;
8833  defm VRSQRT28 : avx512_eri_s<0xCD, "vrsqrt28", X86rsqrt28s, X86rsqrt28SAEs,
8834                               SchedWriteFRsqrt.Scl>, T8PD, EVEX_4V;
8835}
8836
8837defm VGETEXP   : avx512_eri_s<0x43, "vgetexp", X86fgetexps, X86fgetexpSAEs,
8838                              SchedWriteFRnd.Scl>, T8PD, EVEX_4V;
8839/// avx512_fp28_p rcp28ps, rcp28pd, rsqrt28ps, rsqrt28pd
8840
8841multiclass avx512_fp28_p<bits<8> opc, string OpcodeStr, X86VectorVTInfo _,
8842                         SDNode OpNode, X86FoldableSchedWrite sched> {
8843  let ExeDomain = _.ExeDomain in {
8844  defm r : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
8845                         (ins _.RC:$src), OpcodeStr, "$src", "$src",
8846                         (OpNode (_.VT _.RC:$src))>,
8847                         Sched<[sched]>;
8848
8849  defm m : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
8850                         (ins _.MemOp:$src), OpcodeStr, "$src", "$src",
8851                         (OpNode (_.VT
8852                             (bitconvert (_.LdFrag addr:$src))))>,
8853                          Sched<[sched.Folded, sched.ReadAfterFold]>;
8854
8855  defm mb : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
8856                         (ins _.ScalarMemOp:$src), OpcodeStr,
8857                         "${src}"##_.BroadcastStr, "${src}"##_.BroadcastStr,
8858                         (OpNode (_.VT
8859                                  (_.BroadcastLdFrag addr:$src)))>,
8860                         EVEX_B, Sched<[sched.Folded, sched.ReadAfterFold]>;
8861  }
8862}
8863multiclass avx512_fp28_p_sae<bits<8> opc, string OpcodeStr, X86VectorVTInfo _,
8864                         SDNode OpNode, X86FoldableSchedWrite sched> {
8865  let ExeDomain = _.ExeDomain in
8866  defm rb : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
8867                        (ins _.RC:$src), OpcodeStr,
8868                        "{sae}, $src", "$src, {sae}",
8869                        (OpNode (_.VT _.RC:$src))>,
8870                        EVEX_B, Sched<[sched]>;
8871}
8872
8873multiclass  avx512_eri<bits<8> opc, string OpcodeStr, SDNode OpNode,
8874                       SDNode OpNodeSAE, X86SchedWriteWidths sched> {
8875   defm PSZ : avx512_fp28_p<opc, OpcodeStr#"ps", v16f32_info, OpNode, sched.ZMM>,
8876              avx512_fp28_p_sae<opc, OpcodeStr#"ps", v16f32_info, OpNodeSAE, sched.ZMM>,
8877              T8PD, EVEX_V512, EVEX_CD8<32, CD8VF>;
8878   defm PDZ : avx512_fp28_p<opc, OpcodeStr#"pd", v8f64_info, OpNode, sched.ZMM>,
8879              avx512_fp28_p_sae<opc, OpcodeStr#"pd", v8f64_info, OpNodeSAE, sched.ZMM>,
8880              T8PD, EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>;
8881}
8882
8883multiclass avx512_fp_unaryop_packed<bits<8> opc, string OpcodeStr,
8884                                  SDNode OpNode, X86SchedWriteWidths sched> {
8885  // Define only if AVX512VL feature is present.
8886  let Predicates = [HasVLX] in {
8887    defm PSZ128 : avx512_fp28_p<opc, OpcodeStr#"ps", v4f32x_info, OpNode,
8888                                sched.XMM>,
8889                                EVEX_V128, T8PD, EVEX_CD8<32, CD8VF>;
8890    defm PSZ256 : avx512_fp28_p<opc, OpcodeStr#"ps", v8f32x_info, OpNode,
8891                                sched.YMM>,
8892                                EVEX_V256, T8PD, EVEX_CD8<32, CD8VF>;
8893    defm PDZ128 : avx512_fp28_p<opc, OpcodeStr#"pd", v2f64x_info, OpNode,
8894                                sched.XMM>,
8895                                EVEX_V128, VEX_W, T8PD, EVEX_CD8<64, CD8VF>;
8896    defm PDZ256 : avx512_fp28_p<opc, OpcodeStr#"pd", v4f64x_info, OpNode,
8897                                sched.YMM>,
8898                                EVEX_V256, VEX_W, T8PD, EVEX_CD8<64, CD8VF>;
8899  }
8900}
8901
8902let Predicates = [HasERI] in {
8903 defm VRSQRT28 : avx512_eri<0xCC, "vrsqrt28", X86rsqrt28, X86rsqrt28SAE,
8904                            SchedWriteFRsqrt>, EVEX;
8905 defm VRCP28   : avx512_eri<0xCA, "vrcp28", X86rcp28, X86rcp28SAE,
8906                            SchedWriteFRcp>, EVEX;
8907 defm VEXP2    : avx512_eri<0xC8, "vexp2", X86exp2, X86exp2SAE,
8908                            SchedWriteFAdd>, EVEX;
8909}
8910defm VGETEXP   : avx512_eri<0x42, "vgetexp", X86fgetexp, X86fgetexpSAE,
8911                            SchedWriteFRnd>,
8912                 avx512_fp_unaryop_packed<0x42, "vgetexp", X86fgetexp,
8913                                          SchedWriteFRnd>, EVEX;
8914
8915multiclass avx512_sqrt_packed_round<bits<8> opc, string OpcodeStr,
8916                                    X86FoldableSchedWrite sched, X86VectorVTInfo _>{
8917  let ExeDomain = _.ExeDomain in
8918  defm rb: AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
8919                         (ins _.RC:$src, AVX512RC:$rc), OpcodeStr, "$rc, $src", "$src, $rc",
8920                         (_.VT (X86fsqrtRnd _.RC:$src, (i32 timm:$rc)))>,
8921                         EVEX, EVEX_B, EVEX_RC, Sched<[sched]>;
8922}
8923
8924multiclass avx512_sqrt_packed<bits<8> opc, string OpcodeStr,
8925                              X86FoldableSchedWrite sched, X86VectorVTInfo _>{
8926  let ExeDomain = _.ExeDomain in {
8927  defm r: AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
8928                         (ins _.RC:$src), OpcodeStr, "$src", "$src",
8929                         (_.VT (fsqrt _.RC:$src))>, EVEX,
8930                         Sched<[sched]>;
8931  defm m: AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
8932                         (ins _.MemOp:$src), OpcodeStr, "$src", "$src",
8933                         (fsqrt (_.VT
8934                           (bitconvert (_.LdFrag addr:$src))))>, EVEX,
8935                           Sched<[sched.Folded, sched.ReadAfterFold]>;
8936  defm mb: AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
8937                          (ins _.ScalarMemOp:$src), OpcodeStr,
8938                          "${src}"##_.BroadcastStr, "${src}"##_.BroadcastStr,
8939                          (fsqrt (_.VT
8940                            (_.BroadcastLdFrag addr:$src)))>,
8941                          EVEX, EVEX_B, Sched<[sched.Folded, sched.ReadAfterFold]>;
8942  }
8943}
8944
8945multiclass avx512_sqrt_packed_all<bits<8> opc, string OpcodeStr,
8946                                  X86SchedWriteSizes sched> {
8947  defm PSZ : avx512_sqrt_packed<opc, !strconcat(OpcodeStr, "ps"),
8948                                sched.PS.ZMM, v16f32_info>,
8949                                EVEX_V512, PS, EVEX_CD8<32, CD8VF>;
8950  defm PDZ : avx512_sqrt_packed<opc, !strconcat(OpcodeStr, "pd"),
8951                                sched.PD.ZMM, v8f64_info>,
8952                                EVEX_V512, VEX_W, PD, EVEX_CD8<64, CD8VF>;
8953  // Define only if AVX512VL feature is present.
8954  let Predicates = [HasVLX] in {
8955    defm PSZ128 : avx512_sqrt_packed<opc, !strconcat(OpcodeStr, "ps"),
8956                                     sched.PS.XMM, v4f32x_info>,
8957                                     EVEX_V128, PS, EVEX_CD8<32, CD8VF>;
8958    defm PSZ256 : avx512_sqrt_packed<opc, !strconcat(OpcodeStr, "ps"),
8959                                     sched.PS.YMM, v8f32x_info>,
8960                                     EVEX_V256, PS, EVEX_CD8<32, CD8VF>;
8961    defm PDZ128 : avx512_sqrt_packed<opc, !strconcat(OpcodeStr, "pd"),
8962                                     sched.PD.XMM, v2f64x_info>,
8963                                     EVEX_V128, VEX_W, PD, EVEX_CD8<64, CD8VF>;
8964    defm PDZ256 : avx512_sqrt_packed<opc, !strconcat(OpcodeStr, "pd"),
8965                                     sched.PD.YMM, v4f64x_info>,
8966                                     EVEX_V256, VEX_W, PD, EVEX_CD8<64, CD8VF>;
8967  }
8968}
8969
8970multiclass avx512_sqrt_packed_all_round<bits<8> opc, string OpcodeStr,
8971                                        X86SchedWriteSizes sched> {
8972  defm PSZ : avx512_sqrt_packed_round<opc, !strconcat(OpcodeStr, "ps"),
8973                                      sched.PS.ZMM, v16f32_info>,
8974                                      EVEX_V512, PS, EVEX_CD8<32, CD8VF>;
8975  defm PDZ : avx512_sqrt_packed_round<opc, !strconcat(OpcodeStr, "pd"),
8976                                      sched.PD.ZMM, v8f64_info>,
8977                                      EVEX_V512, VEX_W, PD, EVEX_CD8<64, CD8VF>;
8978}
8979
8980multiclass avx512_sqrt_scalar<bits<8> opc, string OpcodeStr, X86FoldableSchedWrite sched,
8981                              X86VectorVTInfo _, string Name> {
8982  let ExeDomain = _.ExeDomain in {
8983    defm r_Int : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
8984                         (ins _.RC:$src1, _.RC:$src2), OpcodeStr,
8985                         "$src2, $src1", "$src1, $src2",
8986                         (X86fsqrts (_.VT _.RC:$src1),
8987                                    (_.VT _.RC:$src2))>,
8988                         Sched<[sched]>;
8989    defm m_Int : AVX512_maskable_scalar<opc, MRMSrcMem, _, (outs _.RC:$dst),
8990                         (ins _.RC:$src1, _.IntScalarMemOp:$src2), OpcodeStr,
8991                         "$src2, $src1", "$src1, $src2",
8992                         (X86fsqrts (_.VT _.RC:$src1),
8993                                    _.ScalarIntMemCPat:$src2)>,
8994                         Sched<[sched.Folded, sched.ReadAfterFold]>;
8995    defm rb_Int : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
8996                         (ins _.RC:$src1, _.RC:$src2, AVX512RC:$rc), OpcodeStr,
8997                         "$rc, $src2, $src1", "$src1, $src2, $rc",
8998                         (X86fsqrtRnds (_.VT _.RC:$src1),
8999                                     (_.VT _.RC:$src2),
9000                                     (i32 timm:$rc))>,
9001                         EVEX_B, EVEX_RC, Sched<[sched]>;
9002
9003    let isCodeGenOnly = 1, hasSideEffects = 0, Predicates=[HasAVX512] in {
9004      def r : I<opc, MRMSrcReg, (outs _.FRC:$dst),
9005                (ins _.FRC:$src1, _.FRC:$src2),
9006                OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}", []>,
9007                Sched<[sched]>;
9008      let mayLoad = 1 in
9009        def m : I<opc, MRMSrcMem, (outs _.FRC:$dst),
9010                  (ins _.FRC:$src1, _.ScalarMemOp:$src2),
9011                  OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}", []>,
9012                  Sched<[sched.Folded, sched.ReadAfterFold]>;
9013    }
9014  }
9015
9016  let Predicates = [HasAVX512] in {
9017    def : Pat<(_.EltVT (fsqrt _.FRC:$src)),
9018              (!cast<Instruction>(Name#Zr)
9019                  (_.EltVT (IMPLICIT_DEF)), _.FRC:$src)>;
9020  }
9021
9022  let Predicates = [HasAVX512, OptForSize] in {
9023    def : Pat<(_.EltVT (fsqrt (load addr:$src))),
9024              (!cast<Instruction>(Name#Zm)
9025                  (_.EltVT (IMPLICIT_DEF)), addr:$src)>;
9026  }
9027}
9028
9029multiclass avx512_sqrt_scalar_all<bits<8> opc, string OpcodeStr,
9030                                  X86SchedWriteSizes sched> {
9031  defm SSZ : avx512_sqrt_scalar<opc, OpcodeStr#"ss", sched.PS.Scl, f32x_info, NAME#"SS">,
9032                        EVEX_CD8<32, CD8VT1>, EVEX_4V, XS;
9033  defm SDZ : avx512_sqrt_scalar<opc, OpcodeStr#"sd", sched.PD.Scl, f64x_info, NAME#"SD">,
9034                        EVEX_CD8<64, CD8VT1>, EVEX_4V, XD, VEX_W;
9035}
9036
9037defm VSQRT : avx512_sqrt_packed_all<0x51, "vsqrt", SchedWriteFSqrtSizes>,
9038             avx512_sqrt_packed_all_round<0x51, "vsqrt", SchedWriteFSqrtSizes>;
9039
9040defm VSQRT : avx512_sqrt_scalar_all<0x51, "vsqrt", SchedWriteFSqrtSizes>, VEX_LIG;
9041
9042multiclass avx512_rndscale_scalar<bits<8> opc, string OpcodeStr,
9043                                  X86FoldableSchedWrite sched, X86VectorVTInfo _> {
9044  let ExeDomain = _.ExeDomain in {
9045  defm r_Int : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
9046                           (ins _.RC:$src1, _.RC:$src2, i32u8imm:$src3), OpcodeStr,
9047                           "$src3, $src2, $src1", "$src1, $src2, $src3",
9048                           (_.VT (X86RndScales (_.VT _.RC:$src1), (_.VT _.RC:$src2),
9049                           (i32 timm:$src3)))>,
9050                           Sched<[sched]>;
9051
9052  defm rb_Int : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
9053                         (ins _.RC:$src1, _.RC:$src2, i32u8imm:$src3), OpcodeStr,
9054                         "$src3, {sae}, $src2, $src1", "$src1, $src2, {sae}, $src3",
9055                         (_.VT (X86RndScalesSAE (_.VT _.RC:$src1), (_.VT _.RC:$src2),
9056                         (i32 timm:$src3)))>, EVEX_B,
9057                         Sched<[sched]>;
9058
9059  defm m_Int : AVX512_maskable_scalar<opc, MRMSrcMem, _, (outs _.RC:$dst),
9060                         (ins _.RC:$src1, _.IntScalarMemOp:$src2, i32u8imm:$src3),
9061                         OpcodeStr,
9062                         "$src3, $src2, $src1", "$src1, $src2, $src3",
9063                         (_.VT (X86RndScales _.RC:$src1,
9064                                _.ScalarIntMemCPat:$src2, (i32 timm:$src3)))>,
9065                         Sched<[sched.Folded, sched.ReadAfterFold]>;
9066
9067  let isCodeGenOnly = 1, hasSideEffects = 0, Predicates = [HasAVX512] in {
9068    def r : I<opc, MRMSrcReg, (outs _.FRC:$dst),
9069               (ins _.FRC:$src1, _.FRC:$src2, i32u8imm:$src3),
9070               OpcodeStr#"\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}",
9071               []>, Sched<[sched]>;
9072
9073    let mayLoad = 1 in
9074      def m : I<opc, MRMSrcMem, (outs _.FRC:$dst),
9075                 (ins _.FRC:$src1, _.ScalarMemOp:$src2, i32u8imm:$src3),
9076                 OpcodeStr#"\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}",
9077                 []>, Sched<[sched.Folded, sched.ReadAfterFold]>;
9078  }
9079  }
9080
9081  let Predicates = [HasAVX512] in {
9082    def : Pat<(X86VRndScale _.FRC:$src1, timm:$src2),
9083              (_.EltVT (!cast<Instruction>(NAME##r) (_.EltVT (IMPLICIT_DEF)),
9084               _.FRC:$src1, timm:$src2))>;
9085  }
9086
9087  let Predicates = [HasAVX512, OptForSize] in {
9088    def : Pat<(X86VRndScale (_.ScalarLdFrag addr:$src1), timm:$src2),
9089              (_.EltVT (!cast<Instruction>(NAME##m) (_.EltVT (IMPLICIT_DEF)),
9090               addr:$src1, timm:$src2))>;
9091  }
9092}
9093
9094defm VRNDSCALESSZ : avx512_rndscale_scalar<0x0A, "vrndscaless",
9095                                           SchedWriteFRnd.Scl, f32x_info>,
9096                                           AVX512AIi8Base, EVEX_4V, VEX_LIG,
9097                                           EVEX_CD8<32, CD8VT1>;
9098
9099defm VRNDSCALESDZ : avx512_rndscale_scalar<0x0B, "vrndscalesd",
9100                                           SchedWriteFRnd.Scl, f64x_info>,
9101                                           VEX_W, AVX512AIi8Base, EVEX_4V, VEX_LIG,
9102                                           EVEX_CD8<64, CD8VT1>;
9103
9104multiclass avx512_masked_scalar<SDNode OpNode, string OpcPrefix, SDNode Move,
9105                                dag Mask, X86VectorVTInfo _, PatLeaf ZeroFP,
9106                                dag OutMask, Predicate BasePredicate> {
9107  let Predicates = [BasePredicate] in {
9108    def : Pat<(Move _.VT:$src1, (scalar_to_vector (X86selects Mask,
9109               (OpNode (extractelt _.VT:$src2, (iPTR 0))),
9110               (extractelt _.VT:$dst, (iPTR 0))))),
9111              (!cast<Instruction>("V"#OpcPrefix#r_Intk)
9112               _.VT:$dst, OutMask, _.VT:$src2, _.VT:$src1)>;
9113
9114    def : Pat<(Move _.VT:$src1, (scalar_to_vector (X86selects Mask,
9115               (OpNode (extractelt _.VT:$src2, (iPTR 0))),
9116               ZeroFP))),
9117              (!cast<Instruction>("V"#OpcPrefix#r_Intkz)
9118               OutMask, _.VT:$src2, _.VT:$src1)>;
9119  }
9120}
9121
9122defm : avx512_masked_scalar<fsqrt, "SQRTSSZ", X86Movss,
9123                            (v1i1 (scalar_to_vector (i8 (trunc (i32 GR32:$mask))))), v4f32x_info,
9124                            fp32imm0, (COPY_TO_REGCLASS  $mask, VK1WM), HasAVX512>;
9125defm : avx512_masked_scalar<fsqrt, "SQRTSDZ", X86Movsd,
9126                            (v1i1 (scalar_to_vector (i8 (trunc (i32 GR32:$mask))))), v2f64x_info,
9127                            fp64imm0, (COPY_TO_REGCLASS  $mask, VK1WM), HasAVX512>;
9128
9129
9130//-------------------------------------------------
9131// Integer truncate and extend operations
9132//-------------------------------------------------
9133
9134// PatFrags that contain a select and a truncate op. The take operands in the
9135// same order as X86vmtrunc, X86vmtruncs, X86vmtruncus. This allows us to pass
9136// either to the multiclasses.
9137def select_trunc : PatFrag<(ops node:$src, node:$src0, node:$mask),
9138                           (vselect node:$mask,
9139                                    (trunc node:$src), node:$src0)>;
9140def select_truncs : PatFrag<(ops node:$src, node:$src0, node:$mask),
9141                            (vselect node:$mask,
9142                                     (X86vtruncs node:$src), node:$src0)>;
9143def select_truncus : PatFrag<(ops node:$src, node:$src0, node:$mask),
9144                             (vselect node:$mask,
9145                                      (X86vtruncus node:$src), node:$src0)>;
9146
9147multiclass avx512_trunc_common<bits<8> opc, string OpcodeStr, SDNode OpNode,
9148                              SDPatternOperator MaskNode,
9149                              X86FoldableSchedWrite sched, X86VectorVTInfo SrcInfo,
9150                              X86VectorVTInfo DestInfo, X86MemOperand x86memop> {
9151  let ExeDomain = DestInfo.ExeDomain in {
9152  def rr : AVX512XS8I<opc, MRMDestReg, (outs DestInfo.RC:$dst),
9153             (ins SrcInfo.RC:$src),
9154             OpcodeStr # "\t{$src, $dst|$dst, $src}",
9155             [(set DestInfo.RC:$dst,
9156                   (DestInfo.VT (OpNode (SrcInfo.VT SrcInfo.RC:$src))))]>,
9157             EVEX, Sched<[sched]>;
9158  let Constraints = "$src0 = $dst" in
9159  def rrk : AVX512XS8I<opc, MRMDestReg, (outs DestInfo.RC:$dst),
9160             (ins DestInfo.RC:$src0, SrcInfo.KRCWM:$mask, SrcInfo.RC:$src),
9161             OpcodeStr # "\t{$src, $dst {${mask}}|$dst {${mask}}, $src}",
9162             [(set DestInfo.RC:$dst,
9163                   (MaskNode (SrcInfo.VT SrcInfo.RC:$src),
9164                             (DestInfo.VT DestInfo.RC:$src0),
9165                             SrcInfo.KRCWM:$mask))]>,
9166             EVEX, EVEX_K, Sched<[sched]>;
9167  def rrkz : AVX512XS8I<opc, MRMDestReg, (outs DestInfo.RC:$dst),
9168             (ins SrcInfo.KRCWM:$mask, SrcInfo.RC:$src),
9169             OpcodeStr # "\t{$src, $dst {${mask}} {z}|$dst {${mask}} {z}, $src}",
9170             [(set DestInfo.RC:$dst,
9171                   (DestInfo.VT (MaskNode (SrcInfo.VT SrcInfo.RC:$src),
9172                             DestInfo.ImmAllZerosV, SrcInfo.KRCWM:$mask)))]>,
9173             EVEX, EVEX_KZ, Sched<[sched]>;
9174  }
9175
9176  let mayStore = 1, hasSideEffects = 0, ExeDomain = DestInfo.ExeDomain in {
9177    def mr : AVX512XS8I<opc, MRMDestMem, (outs),
9178               (ins x86memop:$dst, SrcInfo.RC:$src),
9179               OpcodeStr # "\t{$src, $dst|$dst, $src}", []>,
9180               EVEX, Sched<[sched.Folded]>;
9181
9182    def mrk : AVX512XS8I<opc, MRMDestMem, (outs),
9183               (ins x86memop:$dst, SrcInfo.KRCWM:$mask, SrcInfo.RC:$src),
9184               OpcodeStr # "\t{$src, $dst {${mask}}|$dst {${mask}}, $src}", []>,
9185               EVEX, EVEX_K, Sched<[sched.Folded]>, NotMemoryFoldable;
9186  }//mayStore = 1, hasSideEffects = 0
9187}
9188
9189multiclass avx512_trunc_mr_lowering<X86VectorVTInfo SrcInfo,
9190                                    X86VectorVTInfo DestInfo,
9191                                    PatFrag truncFrag, PatFrag mtruncFrag,
9192                                    string Name> {
9193
9194  def : Pat<(truncFrag (SrcInfo.VT SrcInfo.RC:$src), addr:$dst),
9195            (!cast<Instruction>(Name#SrcInfo.ZSuffix##mr)
9196                                    addr:$dst, SrcInfo.RC:$src)>;
9197
9198  def : Pat<(mtruncFrag (SrcInfo.VT SrcInfo.RC:$src), addr:$dst,
9199                        SrcInfo.KRCWM:$mask),
9200            (!cast<Instruction>(Name#SrcInfo.ZSuffix##mrk)
9201                            addr:$dst, SrcInfo.KRCWM:$mask, SrcInfo.RC:$src)>;
9202}
9203
9204multiclass avx512_trunc<bits<8> opc, string OpcodeStr, SDNode OpNode128,
9205                        SDNode OpNode256, SDNode OpNode512,
9206                        SDPatternOperator MaskNode128,
9207                        SDPatternOperator MaskNode256,
9208                        SDPatternOperator MaskNode512,
9209                        X86FoldableSchedWrite sched,
9210                        AVX512VLVectorVTInfo VTSrcInfo,
9211                        X86VectorVTInfo DestInfoZ128,
9212                        X86VectorVTInfo DestInfoZ256, X86VectorVTInfo DestInfoZ,
9213                        X86MemOperand x86memopZ128, X86MemOperand x86memopZ256,
9214                        X86MemOperand x86memopZ, PatFrag truncFrag,
9215                        PatFrag mtruncFrag, Predicate prd = HasAVX512>{
9216
9217  let Predicates = [HasVLX, prd] in {
9218    defm Z128:  avx512_trunc_common<opc, OpcodeStr, OpNode128, MaskNode128, sched,
9219                             VTSrcInfo.info128, DestInfoZ128, x86memopZ128>,
9220                avx512_trunc_mr_lowering<VTSrcInfo.info128, DestInfoZ128,
9221                             truncFrag, mtruncFrag, NAME>, EVEX_V128;
9222
9223    defm Z256:  avx512_trunc_common<opc, OpcodeStr, OpNode256, MaskNode256, sched,
9224                             VTSrcInfo.info256, DestInfoZ256, x86memopZ256>,
9225                avx512_trunc_mr_lowering<VTSrcInfo.info256, DestInfoZ256,
9226                             truncFrag, mtruncFrag, NAME>, EVEX_V256;
9227  }
9228  let Predicates = [prd] in
9229    defm Z:     avx512_trunc_common<opc, OpcodeStr, OpNode512, MaskNode512, sched,
9230                             VTSrcInfo.info512, DestInfoZ, x86memopZ>,
9231                avx512_trunc_mr_lowering<VTSrcInfo.info512, DestInfoZ,
9232                             truncFrag, mtruncFrag, NAME>, EVEX_V512;
9233}
9234
9235multiclass avx512_trunc_qb<bits<8> opc, string OpcodeStr, SDNode OpNode,
9236                           SDPatternOperator MaskNode,
9237                           X86FoldableSchedWrite sched, PatFrag StoreNode,
9238                           PatFrag MaskedStoreNode, SDNode InVecNode,
9239                           SDPatternOperator InVecMaskNode> {
9240  defm NAME: avx512_trunc<opc, OpcodeStr, InVecNode, InVecNode, InVecNode,
9241                          InVecMaskNode, InVecMaskNode, InVecMaskNode, sched,
9242                          avx512vl_i64_info, v16i8x_info, v16i8x_info,
9243                          v16i8x_info, i16mem, i32mem, i64mem, StoreNode,
9244                          MaskedStoreNode>, EVEX_CD8<8, CD8VO>;
9245}
9246
9247multiclass avx512_trunc_qw<bits<8> opc, string OpcodeStr, SDNode OpNode,
9248                           SDPatternOperator MaskNode,
9249                           X86FoldableSchedWrite sched, PatFrag StoreNode,
9250                           PatFrag MaskedStoreNode, SDNode InVecNode,
9251                           SDPatternOperator InVecMaskNode> {
9252  defm NAME: avx512_trunc<opc, OpcodeStr, InVecNode, InVecNode, OpNode,
9253                          InVecMaskNode, InVecMaskNode, MaskNode, sched,
9254                          avx512vl_i64_info, v8i16x_info, v8i16x_info,
9255                          v8i16x_info, i32mem, i64mem, i128mem, StoreNode,
9256                          MaskedStoreNode>, EVEX_CD8<16, CD8VQ>;
9257}
9258
9259multiclass avx512_trunc_qd<bits<8> opc, string OpcodeStr, SDNode OpNode,
9260                           SDPatternOperator MaskNode,
9261                           X86FoldableSchedWrite sched, PatFrag StoreNode,
9262                           PatFrag MaskedStoreNode, SDNode InVecNode,
9263                           SDPatternOperator InVecMaskNode> {
9264  defm NAME: avx512_trunc<opc, OpcodeStr, InVecNode, OpNode, OpNode,
9265                          InVecMaskNode, MaskNode, MaskNode, sched,
9266                          avx512vl_i64_info, v4i32x_info, v4i32x_info,
9267                          v8i32x_info, i64mem, i128mem, i256mem, StoreNode,
9268                          MaskedStoreNode>, EVEX_CD8<32, CD8VH>;
9269}
9270
9271multiclass avx512_trunc_db<bits<8> opc, string OpcodeStr, SDNode OpNode,
9272                           SDPatternOperator MaskNode,
9273                           X86FoldableSchedWrite sched, PatFrag StoreNode,
9274                           PatFrag MaskedStoreNode, SDNode InVecNode,
9275                           SDPatternOperator InVecMaskNode> {
9276  defm NAME: avx512_trunc<opc, OpcodeStr, InVecNode, InVecNode, OpNode,
9277                          InVecMaskNode, InVecMaskNode, MaskNode, sched,
9278                          avx512vl_i32_info, v16i8x_info, v16i8x_info,
9279                          v16i8x_info, i32mem, i64mem, i128mem, StoreNode,
9280                          MaskedStoreNode>, EVEX_CD8<8, CD8VQ>;
9281}
9282
9283multiclass avx512_trunc_dw<bits<8> opc, string OpcodeStr, SDNode OpNode,
9284                           SDPatternOperator MaskNode,
9285                           X86FoldableSchedWrite sched, PatFrag StoreNode,
9286                           PatFrag MaskedStoreNode, SDNode InVecNode,
9287                           SDPatternOperator InVecMaskNode> {
9288  defm NAME: avx512_trunc<opc, OpcodeStr, InVecNode, OpNode, OpNode,
9289                          InVecMaskNode, MaskNode, MaskNode, sched,
9290                          avx512vl_i32_info, v8i16x_info, v8i16x_info,
9291                          v16i16x_info, i64mem, i128mem, i256mem, StoreNode,
9292                          MaskedStoreNode>, EVEX_CD8<16, CD8VH>;
9293}
9294
9295multiclass avx512_trunc_wb<bits<8> opc, string OpcodeStr, SDNode OpNode,
9296                           SDPatternOperator MaskNode,
9297                           X86FoldableSchedWrite sched, PatFrag StoreNode,
9298                           PatFrag MaskedStoreNode, SDNode InVecNode,
9299                           SDPatternOperator InVecMaskNode> {
9300  defm NAME: avx512_trunc<opc, OpcodeStr, InVecNode, OpNode, OpNode,
9301                          InVecMaskNode, MaskNode, MaskNode, sched,
9302                          avx512vl_i16_info, v16i8x_info, v16i8x_info,
9303                          v32i8x_info, i64mem, i128mem, i256mem, StoreNode,
9304                          MaskedStoreNode, HasBWI>, EVEX_CD8<16, CD8VH>;
9305}
9306
9307defm VPMOVQB    : avx512_trunc_qb<0x32, "vpmovqb",   trunc, select_trunc,
9308                                  WriteShuffle256, truncstorevi8,
9309                                  masked_truncstorevi8, X86vtrunc, X86vmtrunc>;
9310defm VPMOVSQB   : avx512_trunc_qb<0x22, "vpmovsqb",  X86vtruncs, select_truncs,
9311                                  WriteShuffle256, truncstore_s_vi8,
9312                                  masked_truncstore_s_vi8, X86vtruncs,
9313                                  X86vmtruncs>;
9314defm VPMOVUSQB  : avx512_trunc_qb<0x12, "vpmovusqb", X86vtruncus,
9315                                  select_truncus, WriteShuffle256,
9316                                  truncstore_us_vi8, masked_truncstore_us_vi8,
9317                                  X86vtruncus, X86vmtruncus>;
9318
9319defm VPMOVQW    : avx512_trunc_qw<0x34, "vpmovqw", trunc, select_trunc,
9320                                  WriteShuffle256, truncstorevi16,
9321                                  masked_truncstorevi16, X86vtrunc, X86vmtrunc>;
9322defm VPMOVSQW   : avx512_trunc_qw<0x24, "vpmovsqw",  X86vtruncs, select_truncs,
9323                                  WriteShuffle256, truncstore_s_vi16,
9324                                  masked_truncstore_s_vi16, X86vtruncs,
9325                                  X86vmtruncs>;
9326defm VPMOVUSQW  : avx512_trunc_qw<0x14, "vpmovusqw", X86vtruncus,
9327                                  select_truncus, WriteShuffle256,
9328                                  truncstore_us_vi16, masked_truncstore_us_vi16,
9329                                  X86vtruncus, X86vmtruncus>;
9330
9331defm VPMOVQD    : avx512_trunc_qd<0x35, "vpmovqd", trunc, select_trunc,
9332                                  WriteShuffle256, truncstorevi32,
9333                                  masked_truncstorevi32, X86vtrunc, X86vmtrunc>;
9334defm VPMOVSQD   : avx512_trunc_qd<0x25, "vpmovsqd",  X86vtruncs, select_truncs,
9335                                  WriteShuffle256, truncstore_s_vi32,
9336                                  masked_truncstore_s_vi32, X86vtruncs,
9337                                  X86vmtruncs>;
9338defm VPMOVUSQD  : avx512_trunc_qd<0x15, "vpmovusqd", X86vtruncus,
9339                                  select_truncus, WriteShuffle256,
9340                                  truncstore_us_vi32, masked_truncstore_us_vi32,
9341                                  X86vtruncus, X86vmtruncus>;
9342
9343defm VPMOVDB    : avx512_trunc_db<0x31, "vpmovdb", trunc, select_trunc,
9344                                  WriteShuffle256, truncstorevi8,
9345                                  masked_truncstorevi8, X86vtrunc, X86vmtrunc>;
9346defm VPMOVSDB   : avx512_trunc_db<0x21, "vpmovsdb", X86vtruncs, select_truncs,
9347                                  WriteShuffle256, truncstore_s_vi8,
9348                                  masked_truncstore_s_vi8, X86vtruncs,
9349                                  X86vmtruncs>;
9350defm VPMOVUSDB  : avx512_trunc_db<0x11, "vpmovusdb",  X86vtruncus,
9351                                  select_truncus, WriteShuffle256,
9352                                  truncstore_us_vi8, masked_truncstore_us_vi8,
9353                                  X86vtruncus, X86vmtruncus>;
9354
9355defm VPMOVDW    : avx512_trunc_dw<0x33, "vpmovdw", trunc, select_trunc,
9356                                  WriteShuffle256, truncstorevi16,
9357                                  masked_truncstorevi16, X86vtrunc, X86vmtrunc>;
9358defm VPMOVSDW   : avx512_trunc_dw<0x23, "vpmovsdw", X86vtruncs, select_truncs,
9359                                  WriteShuffle256, truncstore_s_vi16,
9360                                  masked_truncstore_s_vi16, X86vtruncs,
9361                                  X86vmtruncs>;
9362defm VPMOVUSDW  : avx512_trunc_dw<0x13, "vpmovusdw", X86vtruncus,
9363                                  select_truncus, WriteShuffle256,
9364                                  truncstore_us_vi16, masked_truncstore_us_vi16,
9365                                  X86vtruncus, X86vmtruncus>;
9366
9367defm VPMOVWB    : avx512_trunc_wb<0x30, "vpmovwb", trunc, select_trunc,
9368                                  WriteShuffle256, truncstorevi8,
9369                                  masked_truncstorevi8, X86vtrunc,
9370                                  X86vmtrunc>;
9371defm VPMOVSWB   : avx512_trunc_wb<0x20, "vpmovswb", X86vtruncs, select_truncs,
9372                                  WriteShuffle256, truncstore_s_vi8,
9373                                  masked_truncstore_s_vi8, X86vtruncs,
9374                                  X86vmtruncs>;
9375defm VPMOVUSWB  : avx512_trunc_wb<0x10, "vpmovuswb", X86vtruncus,
9376                                  select_truncus, WriteShuffle256,
9377                                  truncstore_us_vi8, masked_truncstore_us_vi8,
9378                                  X86vtruncus, X86vmtruncus>;
9379
9380let Predicates = [HasAVX512, NoVLX] in {
9381def: Pat<(v8i16 (trunc (v8i32 VR256X:$src))),
9382         (v8i16 (EXTRACT_SUBREG
9383                 (v16i16 (VPMOVDWZrr (v16i32 (INSERT_SUBREG (IMPLICIT_DEF),
9384                                          VR256X:$src, sub_ymm)))), sub_xmm))>;
9385def: Pat<(v4i32 (trunc (v4i64 VR256X:$src))),
9386         (v4i32 (EXTRACT_SUBREG
9387                 (v8i32 (VPMOVQDZrr (v8i64 (INSERT_SUBREG (IMPLICIT_DEF),
9388                                           VR256X:$src, sub_ymm)))), sub_xmm))>;
9389}
9390
9391let Predicates = [HasBWI, NoVLX] in {
9392def: Pat<(v16i8 (trunc (v16i16 VR256X:$src))),
9393         (v16i8 (EXTRACT_SUBREG (VPMOVWBZrr (v32i16 (INSERT_SUBREG (IMPLICIT_DEF),
9394                                            VR256X:$src, sub_ymm))), sub_xmm))>;
9395}
9396
9397// Without BWI we can't use vXi16/vXi8 vselect so we have to use vmtrunc nodes.
9398multiclass mtrunc_lowering<string InstrName, SDNode OpNode,
9399                           X86VectorVTInfo DestInfo,
9400                           X86VectorVTInfo SrcInfo> {
9401  def : Pat<(DestInfo.VT (OpNode (SrcInfo.VT SrcInfo.RC:$src),
9402                                 DestInfo.RC:$src0,
9403                                 SrcInfo.KRCWM:$mask)),
9404            (!cast<Instruction>(InstrName#"rrk") DestInfo.RC:$src0,
9405                                                 SrcInfo.KRCWM:$mask,
9406                                                 SrcInfo.RC:$src)>;
9407
9408  def : Pat<(DestInfo.VT (OpNode (SrcInfo.VT SrcInfo.RC:$src),
9409                                 DestInfo.ImmAllZerosV,
9410                                 SrcInfo.KRCWM:$mask)),
9411            (!cast<Instruction>(InstrName#"rrkz") SrcInfo.KRCWM:$mask,
9412                                                  SrcInfo.RC:$src)>;
9413}
9414
9415let Predicates = [HasVLX] in {
9416defm : mtrunc_lowering<"VPMOVDWZ256", X86vmtrunc, v8i16x_info, v8i32x_info>;
9417defm : mtrunc_lowering<"VPMOVSDWZ256", X86vmtruncs, v8i16x_info, v8i32x_info>;
9418defm : mtrunc_lowering<"VPMOVUSDWZ256", X86vmtruncus, v8i16x_info, v8i32x_info>;
9419}
9420
9421let Predicates = [HasAVX512] in {
9422defm : mtrunc_lowering<"VPMOVDWZ", X86vmtrunc, v16i16x_info, v16i32_info>;
9423defm : mtrunc_lowering<"VPMOVSDWZ", X86vmtruncs, v16i16x_info, v16i32_info>;
9424defm : mtrunc_lowering<"VPMOVUSDWZ", X86vmtruncus, v16i16x_info, v16i32_info>;
9425
9426defm : mtrunc_lowering<"VPMOVDBZ", X86vmtrunc, v16i8x_info, v16i32_info>;
9427defm : mtrunc_lowering<"VPMOVSDBZ", X86vmtruncs, v16i8x_info, v16i32_info>;
9428defm : mtrunc_lowering<"VPMOVUSDBZ", X86vmtruncus, v16i8x_info, v16i32_info>;
9429
9430defm : mtrunc_lowering<"VPMOVQWZ", X86vmtrunc, v8i16x_info, v8i64_info>;
9431defm : mtrunc_lowering<"VPMOVSQWZ", X86vmtruncs, v8i16x_info, v8i64_info>;
9432defm : mtrunc_lowering<"VPMOVUSQWZ", X86vmtruncus, v8i16x_info, v8i64_info>;
9433}
9434
9435multiclass WriteShuffle256_common<bits<8> opc, string OpcodeStr, X86FoldableSchedWrite sched,
9436              X86VectorVTInfo DestInfo, X86VectorVTInfo SrcInfo,
9437              X86MemOperand x86memop, PatFrag LdFrag, SDNode OpNode>{
9438  let ExeDomain = DestInfo.ExeDomain in {
9439  defm rr   : AVX512_maskable<opc, MRMSrcReg, DestInfo, (outs DestInfo.RC:$dst),
9440                    (ins SrcInfo.RC:$src), OpcodeStr ,"$src", "$src",
9441                    (DestInfo.VT (OpNode (SrcInfo.VT SrcInfo.RC:$src)))>,
9442                  EVEX, Sched<[sched]>;
9443
9444  defm rm : AVX512_maskable<opc, MRMSrcMem, DestInfo, (outs DestInfo.RC:$dst),
9445                  (ins x86memop:$src), OpcodeStr ,"$src", "$src",
9446                  (DestInfo.VT (LdFrag addr:$src))>,
9447                EVEX, Sched<[sched.Folded]>;
9448  }
9449}
9450
9451multiclass WriteShuffle256_BW<bits<8> opc, string OpcodeStr,
9452          SDNode OpNode, SDNode InVecNode, string ExtTy,
9453          X86FoldableSchedWrite sched, PatFrag LdFrag = !cast<PatFrag>(ExtTy#"extloadvi8")> {
9454  let Predicates = [HasVLX, HasBWI] in {
9455    defm Z128:  WriteShuffle256_common<opc, OpcodeStr, sched, v8i16x_info,
9456                    v16i8x_info, i64mem, LdFrag, InVecNode>,
9457                     EVEX_CD8<8, CD8VH>, T8PD, EVEX_V128, VEX_WIG;
9458
9459    defm Z256:  WriteShuffle256_common<opc, OpcodeStr, sched, v16i16x_info,
9460                    v16i8x_info, i128mem, LdFrag, OpNode>,
9461                     EVEX_CD8<8, CD8VH>, T8PD, EVEX_V256, VEX_WIG;
9462  }
9463  let Predicates = [HasBWI] in {
9464    defm Z   :  WriteShuffle256_common<opc, OpcodeStr, sched, v32i16_info,
9465                    v32i8x_info, i256mem, LdFrag, OpNode>,
9466                     EVEX_CD8<8, CD8VH>, T8PD, EVEX_V512, VEX_WIG;
9467  }
9468}
9469
9470multiclass WriteShuffle256_BD<bits<8> opc, string OpcodeStr,
9471          SDNode OpNode, SDNode InVecNode, string ExtTy,
9472          X86FoldableSchedWrite sched, PatFrag LdFrag = !cast<PatFrag>(ExtTy#"extloadvi8")> {
9473  let Predicates = [HasVLX, HasAVX512] in {
9474    defm Z128:  WriteShuffle256_common<opc, OpcodeStr, sched, v4i32x_info,
9475                   v16i8x_info, i32mem, LdFrag, InVecNode>,
9476                         EVEX_CD8<8, CD8VQ>, T8PD, EVEX_V128, VEX_WIG;
9477
9478    defm Z256:  WriteShuffle256_common<opc, OpcodeStr, sched, v8i32x_info,
9479                   v16i8x_info, i64mem, LdFrag, InVecNode>,
9480                         EVEX_CD8<8, CD8VQ>, T8PD, EVEX_V256, VEX_WIG;
9481  }
9482  let Predicates = [HasAVX512] in {
9483    defm Z   :  WriteShuffle256_common<opc, OpcodeStr, sched, v16i32_info,
9484                   v16i8x_info, i128mem, LdFrag, OpNode>,
9485                         EVEX_CD8<8, CD8VQ>, T8PD, EVEX_V512, VEX_WIG;
9486  }
9487}
9488
9489multiclass WriteShuffle256_BQ<bits<8> opc, string OpcodeStr,
9490          SDNode OpNode, SDNode InVecNode, string ExtTy,
9491          X86FoldableSchedWrite sched, PatFrag LdFrag = !cast<PatFrag>(ExtTy#"extloadvi8")> {
9492  let Predicates = [HasVLX, HasAVX512] in {
9493    defm Z128:  WriteShuffle256_common<opc, OpcodeStr, sched, v2i64x_info,
9494                   v16i8x_info, i16mem, LdFrag, InVecNode>,
9495                     EVEX_CD8<8, CD8VO>, T8PD, EVEX_V128, VEX_WIG;
9496
9497    defm Z256:  WriteShuffle256_common<opc, OpcodeStr, sched, v4i64x_info,
9498                   v16i8x_info, i32mem, LdFrag, InVecNode>,
9499                     EVEX_CD8<8, CD8VO>, T8PD, EVEX_V256, VEX_WIG;
9500  }
9501  let Predicates = [HasAVX512] in {
9502    defm Z   :  WriteShuffle256_common<opc, OpcodeStr, sched, v8i64_info,
9503                   v16i8x_info, i64mem, LdFrag, InVecNode>,
9504                     EVEX_CD8<8, CD8VO>, T8PD, EVEX_V512, VEX_WIG;
9505  }
9506}
9507
9508multiclass WriteShuffle256_WD<bits<8> opc, string OpcodeStr,
9509         SDNode OpNode, SDNode InVecNode, string ExtTy,
9510         X86FoldableSchedWrite sched, PatFrag LdFrag = !cast<PatFrag>(ExtTy#"extloadvi16")> {
9511  let Predicates = [HasVLX, HasAVX512] in {
9512    defm Z128:  WriteShuffle256_common<opc, OpcodeStr, sched, v4i32x_info,
9513                   v8i16x_info, i64mem, LdFrag, InVecNode>,
9514                     EVEX_CD8<16, CD8VH>, T8PD, EVEX_V128, VEX_WIG;
9515
9516    defm Z256:  WriteShuffle256_common<opc, OpcodeStr, sched, v8i32x_info,
9517                   v8i16x_info, i128mem, LdFrag, OpNode>,
9518                     EVEX_CD8<16, CD8VH>, T8PD, EVEX_V256, VEX_WIG;
9519  }
9520  let Predicates = [HasAVX512] in {
9521    defm Z   :  WriteShuffle256_common<opc, OpcodeStr, sched, v16i32_info,
9522                   v16i16x_info, i256mem, LdFrag, OpNode>,
9523                     EVEX_CD8<16, CD8VH>, T8PD, EVEX_V512, VEX_WIG;
9524  }
9525}
9526
9527multiclass WriteShuffle256_WQ<bits<8> opc, string OpcodeStr,
9528         SDNode OpNode, SDNode InVecNode, string ExtTy,
9529         X86FoldableSchedWrite sched, PatFrag LdFrag = !cast<PatFrag>(ExtTy#"extloadvi16")> {
9530  let Predicates = [HasVLX, HasAVX512] in {
9531    defm Z128:  WriteShuffle256_common<opc, OpcodeStr, sched, v2i64x_info,
9532                   v8i16x_info, i32mem, LdFrag, InVecNode>,
9533                     EVEX_CD8<16, CD8VQ>, T8PD, EVEX_V128, VEX_WIG;
9534
9535    defm Z256:  WriteShuffle256_common<opc, OpcodeStr, sched, v4i64x_info,
9536                   v8i16x_info, i64mem, LdFrag, InVecNode>,
9537                     EVEX_CD8<16, CD8VQ>, T8PD, EVEX_V256, VEX_WIG;
9538  }
9539  let Predicates = [HasAVX512] in {
9540    defm Z   :  WriteShuffle256_common<opc, OpcodeStr, sched, v8i64_info,
9541                   v8i16x_info, i128mem, LdFrag, OpNode>,
9542                     EVEX_CD8<16, CD8VQ>, T8PD, EVEX_V512, VEX_WIG;
9543  }
9544}
9545
9546multiclass WriteShuffle256_DQ<bits<8> opc, string OpcodeStr,
9547         SDNode OpNode, SDNode InVecNode, string ExtTy,
9548         X86FoldableSchedWrite sched, PatFrag LdFrag = !cast<PatFrag>(ExtTy#"extloadvi32")> {
9549
9550  let Predicates = [HasVLX, HasAVX512] in {
9551    defm Z128:  WriteShuffle256_common<opc, OpcodeStr, sched, v2i64x_info,
9552                   v4i32x_info, i64mem, LdFrag, InVecNode>,
9553                     EVEX_CD8<32, CD8VH>, T8PD, EVEX_V128;
9554
9555    defm Z256:  WriteShuffle256_common<opc, OpcodeStr, sched, v4i64x_info,
9556                   v4i32x_info, i128mem, LdFrag, OpNode>,
9557                     EVEX_CD8<32, CD8VH>, T8PD, EVEX_V256;
9558  }
9559  let Predicates = [HasAVX512] in {
9560    defm Z   :  WriteShuffle256_common<opc, OpcodeStr, sched, v8i64_info,
9561                   v8i32x_info, i256mem, LdFrag, OpNode>,
9562                     EVEX_CD8<32, CD8VH>, T8PD, EVEX_V512;
9563  }
9564}
9565
9566defm VPMOVZXBW : WriteShuffle256_BW<0x30, "vpmovzxbw", zext, zext_invec, "z", WriteShuffle256>;
9567defm VPMOVZXBD : WriteShuffle256_BD<0x31, "vpmovzxbd", zext, zext_invec, "z", WriteShuffle256>;
9568defm VPMOVZXBQ : WriteShuffle256_BQ<0x32, "vpmovzxbq", zext, zext_invec, "z", WriteShuffle256>;
9569defm VPMOVZXWD : WriteShuffle256_WD<0x33, "vpmovzxwd", zext, zext_invec, "z", WriteShuffle256>;
9570defm VPMOVZXWQ : WriteShuffle256_WQ<0x34, "vpmovzxwq", zext, zext_invec, "z", WriteShuffle256>;
9571defm VPMOVZXDQ : WriteShuffle256_DQ<0x35, "vpmovzxdq", zext, zext_invec, "z", WriteShuffle256>;
9572
9573defm VPMOVSXBW: WriteShuffle256_BW<0x20, "vpmovsxbw", sext, sext_invec, "s", WriteShuffle256>;
9574defm VPMOVSXBD: WriteShuffle256_BD<0x21, "vpmovsxbd", sext, sext_invec, "s", WriteShuffle256>;
9575defm VPMOVSXBQ: WriteShuffle256_BQ<0x22, "vpmovsxbq", sext, sext_invec, "s", WriteShuffle256>;
9576defm VPMOVSXWD: WriteShuffle256_WD<0x23, "vpmovsxwd", sext, sext_invec, "s", WriteShuffle256>;
9577defm VPMOVSXWQ: WriteShuffle256_WQ<0x24, "vpmovsxwq", sext, sext_invec, "s", WriteShuffle256>;
9578defm VPMOVSXDQ: WriteShuffle256_DQ<0x25, "vpmovsxdq", sext, sext_invec, "s", WriteShuffle256>;
9579
9580
9581// Patterns that we also need any extend versions of. aext_vector_inreg
9582// is currently legalized to zext_vector_inreg.
9583multiclass AVX512_pmovx_patterns_base<string OpcPrefix, SDNode ExtOp> {
9584  // 256-bit patterns
9585  let Predicates = [HasVLX, HasBWI] in {
9586    def : Pat<(v16i16 (ExtOp (loadv16i8 addr:$src))),
9587              (!cast<I>(OpcPrefix#BWZ256rm) addr:$src)>;
9588  }
9589
9590  let Predicates = [HasVLX] in {
9591    def : Pat<(v8i32 (ExtOp (loadv8i16 addr:$src))),
9592              (!cast<I>(OpcPrefix#WDZ256rm) addr:$src)>;
9593
9594    def : Pat<(v4i64 (ExtOp (loadv4i32 addr:$src))),
9595              (!cast<I>(OpcPrefix#DQZ256rm) addr:$src)>;
9596  }
9597
9598  // 512-bit patterns
9599  let Predicates = [HasBWI] in {
9600    def : Pat<(v32i16 (ExtOp (loadv32i8 addr:$src))),
9601              (!cast<I>(OpcPrefix#BWZrm) addr:$src)>;
9602  }
9603  let Predicates = [HasAVX512] in {
9604    def : Pat<(v16i32 (ExtOp (loadv16i8 addr:$src))),
9605              (!cast<I>(OpcPrefix#BDZrm) addr:$src)>;
9606    def : Pat<(v16i32 (ExtOp (loadv16i16 addr:$src))),
9607              (!cast<I>(OpcPrefix#WDZrm) addr:$src)>;
9608
9609    def : Pat<(v8i64 (ExtOp (loadv8i16 addr:$src))),
9610              (!cast<I>(OpcPrefix#WQZrm) addr:$src)>;
9611
9612    def : Pat<(v8i64 (ExtOp (loadv8i32 addr:$src))),
9613              (!cast<I>(OpcPrefix#DQZrm) addr:$src)>;
9614  }
9615}
9616
9617multiclass AVX512_pmovx_patterns<string OpcPrefix, SDNode ExtOp,
9618                                 SDNode InVecOp> :
9619    AVX512_pmovx_patterns_base<OpcPrefix, ExtOp> {
9620  // 128-bit patterns
9621  let Predicates = [HasVLX, HasBWI] in {
9622  def : Pat<(v8i16 (InVecOp (bc_v16i8 (v2i64 (scalar_to_vector (loadi64 addr:$src)))))),
9623            (!cast<I>(OpcPrefix#BWZ128rm) addr:$src)>;
9624  def : Pat<(v8i16 (InVecOp (bc_v16i8 (v2f64 (scalar_to_vector (loadf64 addr:$src)))))),
9625            (!cast<I>(OpcPrefix#BWZ128rm) addr:$src)>;
9626  def : Pat<(v8i16 (InVecOp (bc_v16i8 (v2i64 (X86vzload64 addr:$src))))),
9627            (!cast<I>(OpcPrefix#BWZ128rm) addr:$src)>;
9628  }
9629  let Predicates = [HasVLX] in {
9630  def : Pat<(v4i32 (InVecOp (bc_v16i8 (v4i32 (scalar_to_vector (loadi32 addr:$src)))))),
9631            (!cast<I>(OpcPrefix#BDZ128rm) addr:$src)>;
9632  def : Pat<(v4i32 (InVecOp (bc_v16i8 (v4i32 (X86vzload32 addr:$src))))),
9633            (!cast<I>(OpcPrefix#BDZ128rm) addr:$src)>;
9634
9635  def : Pat<(v2i64 (InVecOp (bc_v16i8 (v4i32 (scalar_to_vector (extloadi32i16 addr:$src)))))),
9636            (!cast<I>(OpcPrefix#BQZ128rm) addr:$src)>;
9637
9638  def : Pat<(v4i32 (InVecOp (bc_v8i16 (v2i64 (scalar_to_vector (loadi64 addr:$src)))))),
9639            (!cast<I>(OpcPrefix#WDZ128rm) addr:$src)>;
9640  def : Pat<(v4i32 (InVecOp (bc_v8i16 (v2f64 (scalar_to_vector (loadf64 addr:$src)))))),
9641            (!cast<I>(OpcPrefix#WDZ128rm) addr:$src)>;
9642  def : Pat<(v4i32 (InVecOp (bc_v8i16 (v2i64 (X86vzload64 addr:$src))))),
9643            (!cast<I>(OpcPrefix#WDZ128rm) addr:$src)>;
9644
9645  def : Pat<(v2i64 (InVecOp (bc_v8i16 (v4i32 (scalar_to_vector (loadi32 addr:$src)))))),
9646            (!cast<I>(OpcPrefix#WQZ128rm) addr:$src)>;
9647  def : Pat<(v2i64 (InVecOp (bc_v8i16 (v4i32 (X86vzload32 addr:$src))))),
9648            (!cast<I>(OpcPrefix#WQZ128rm) addr:$src)>;
9649
9650  def : Pat<(v2i64 (InVecOp (bc_v4i32 (v2i64 (scalar_to_vector (loadi64 addr:$src)))))),
9651            (!cast<I>(OpcPrefix#DQZ128rm) addr:$src)>;
9652  def : Pat<(v2i64 (InVecOp (bc_v4i32 (v2f64 (scalar_to_vector (loadf64 addr:$src)))))),
9653            (!cast<I>(OpcPrefix#DQZ128rm) addr:$src)>;
9654  def : Pat<(v2i64 (InVecOp (bc_v4i32 (v2i64 (X86vzload64 addr:$src))))),
9655            (!cast<I>(OpcPrefix#DQZ128rm) addr:$src)>;
9656  }
9657  let Predicates = [HasVLX] in {
9658  def : Pat<(v8i32 (InVecOp (bc_v16i8 (v2i64 (scalar_to_vector (loadi64 addr:$src)))))),
9659            (!cast<I>(OpcPrefix#BDZ256rm) addr:$src)>;
9660  def : Pat<(v8i32 (InVecOp (bc_v16i8 (v2i64 (X86vzload64 addr:$src))))),
9661            (!cast<I>(OpcPrefix#BDZ256rm) addr:$src)>;
9662
9663  def : Pat<(v4i64 (InVecOp (bc_v16i8 (v4i32 (scalar_to_vector (loadi32 addr:$src)))))),
9664            (!cast<I>(OpcPrefix#BQZ256rm) addr:$src)>;
9665  def : Pat<(v4i64 (InVecOp (bc_v16i8 (v4i32 (X86vzload32 addr:$src))))),
9666            (!cast<I>(OpcPrefix#BQZ256rm) addr:$src)>;
9667
9668  def : Pat<(v4i64 (InVecOp (bc_v8i16 (v2i64 (scalar_to_vector (loadi64 addr:$src)))))),
9669            (!cast<I>(OpcPrefix#WQZ256rm) addr:$src)>;
9670  def : Pat<(v4i64 (InVecOp (bc_v8i16 (v2i64 (X86vzload64 addr:$src))))),
9671            (!cast<I>(OpcPrefix#WQZ256rm) addr:$src)>;
9672  }
9673  // 512-bit patterns
9674  let Predicates = [HasAVX512] in {
9675  def : Pat<(v8i64 (InVecOp (bc_v16i8 (v2i64 (scalar_to_vector (loadi64 addr:$src)))))),
9676            (!cast<I>(OpcPrefix#BQZrm) addr:$src)>;
9677  }
9678}
9679
9680defm : AVX512_pmovx_patterns<"VPMOVSX", sext, sext_invec>;
9681defm : AVX512_pmovx_patterns<"VPMOVZX", zext, zext_invec>;
9682
9683// Without BWI we can't do a trunc from v16i16 to v16i8. DAG combine can merge
9684// ext+trunc aggresively making it impossible to legalize the DAG to this
9685// pattern directly.
9686let Predicates = [HasAVX512, NoBWI] in {
9687def: Pat<(v16i8 (trunc (v16i16 VR256X:$src))),
9688         (VPMOVDBZrr (v16i32 (VPMOVZXWDZrr VR256X:$src)))>;
9689def: Pat<(v16i8 (trunc (loadv16i16 addr:$src))),
9690         (VPMOVDBZrr (v16i32 (VPMOVZXWDZrm addr:$src)))>;
9691}
9692
9693//===----------------------------------------------------------------------===//
9694// GATHER - SCATTER Operations
9695
9696// FIXME: Improve scheduling of gather/scatter instructions.
9697multiclass avx512_gather<bits<8> opc, string OpcodeStr, X86VectorVTInfo _,
9698                         X86MemOperand memop, PatFrag GatherNode,
9699                         RegisterClass MaskRC = _.KRCWM> {
9700  let Constraints = "@earlyclobber $dst, $src1 = $dst, $mask = $mask_wb",
9701      ExeDomain = _.ExeDomain in
9702  def rm  : AVX5128I<opc, MRMSrcMem, (outs _.RC:$dst, MaskRC:$mask_wb),
9703            (ins _.RC:$src1, MaskRC:$mask, memop:$src2),
9704            !strconcat(OpcodeStr#_.Suffix,
9705            "\t{$src2, ${dst} {${mask}}|${dst} {${mask}}, $src2}"),
9706            [(set _.RC:$dst, MaskRC:$mask_wb,
9707              (GatherNode  (_.VT _.RC:$src1), MaskRC:$mask,
9708                     vectoraddr:$src2))]>, EVEX, EVEX_K,
9709             EVEX_CD8<_.EltSize, CD8VT1>, Sched<[WriteLoad]>;
9710}
9711
9712multiclass avx512_gather_q_pd<bits<8> dopc, bits<8> qopc,
9713                        AVX512VLVectorVTInfo _, string OpcodeStr, string SUFF> {
9714  defm NAME##D##SUFF##Z: avx512_gather<dopc, OpcodeStr##"d", _.info512,
9715                                      vy512xmem, mgatherv8i32>, EVEX_V512, VEX_W;
9716  defm NAME##Q##SUFF##Z: avx512_gather<qopc, OpcodeStr##"q", _.info512,
9717                                      vz512mem,  mgatherv8i64>, EVEX_V512, VEX_W;
9718let Predicates = [HasVLX] in {
9719  defm NAME##D##SUFF##Z256: avx512_gather<dopc, OpcodeStr##"d", _.info256,
9720                              vx256xmem, mgatherv4i32>, EVEX_V256, VEX_W;
9721  defm NAME##Q##SUFF##Z256: avx512_gather<qopc, OpcodeStr##"q", _.info256,
9722                              vy256xmem, mgatherv4i64>, EVEX_V256, VEX_W;
9723  defm NAME##D##SUFF##Z128: avx512_gather<dopc, OpcodeStr##"d", _.info128,
9724                              vx128xmem, mgatherv4i32>, EVEX_V128, VEX_W;
9725  defm NAME##Q##SUFF##Z128: avx512_gather<qopc, OpcodeStr##"q", _.info128,
9726                              vx128xmem, mgatherv2i64>, EVEX_V128, VEX_W;
9727}
9728}
9729
9730multiclass avx512_gather_d_ps<bits<8> dopc, bits<8> qopc,
9731                       AVX512VLVectorVTInfo _, string OpcodeStr, string SUFF> {
9732  defm NAME##D##SUFF##Z: avx512_gather<dopc, OpcodeStr##"d", _.info512, vz512mem,
9733                                       mgatherv16i32>, EVEX_V512;
9734  defm NAME##Q##SUFF##Z: avx512_gather<qopc, OpcodeStr##"q", _.info256, vz256mem,
9735                                       mgatherv8i64>, EVEX_V512;
9736let Predicates = [HasVLX] in {
9737  defm NAME##D##SUFF##Z256: avx512_gather<dopc, OpcodeStr##"d", _.info256,
9738                                          vy256xmem, mgatherv8i32>, EVEX_V256;
9739  defm NAME##Q##SUFF##Z256: avx512_gather<qopc, OpcodeStr##"q", _.info128,
9740                                          vy128xmem, mgatherv4i64>, EVEX_V256;
9741  defm NAME##D##SUFF##Z128: avx512_gather<dopc, OpcodeStr##"d", _.info128,
9742                                          vx128xmem, mgatherv4i32>, EVEX_V128;
9743  defm NAME##Q##SUFF##Z128: avx512_gather<qopc, OpcodeStr##"q", _.info128,
9744                                          vx64xmem, mgatherv2i64, VK2WM>,
9745                                          EVEX_V128;
9746}
9747}
9748
9749
9750defm VGATHER : avx512_gather_q_pd<0x92, 0x93, avx512vl_f64_info, "vgather", "PD">,
9751               avx512_gather_d_ps<0x92, 0x93, avx512vl_f32_info, "vgather", "PS">;
9752
9753defm VPGATHER : avx512_gather_q_pd<0x90, 0x91, avx512vl_i64_info, "vpgather", "Q">,
9754                avx512_gather_d_ps<0x90, 0x91, avx512vl_i32_info, "vpgather", "D">;
9755
9756multiclass avx512_scatter<bits<8> opc, string OpcodeStr, X86VectorVTInfo _,
9757                          X86MemOperand memop, PatFrag ScatterNode,
9758                          RegisterClass MaskRC = _.KRCWM> {
9759
9760let mayStore = 1, Constraints = "$mask = $mask_wb", ExeDomain = _.ExeDomain in
9761
9762  def mr  : AVX5128I<opc, MRMDestMem, (outs MaskRC:$mask_wb),
9763            (ins memop:$dst, MaskRC:$mask, _.RC:$src),
9764            !strconcat(OpcodeStr#_.Suffix,
9765            "\t{$src, ${dst} {${mask}}|${dst} {${mask}}, $src}"),
9766            [(set MaskRC:$mask_wb, (ScatterNode (_.VT _.RC:$src),
9767                                    MaskRC:$mask,  vectoraddr:$dst))]>,
9768            EVEX, EVEX_K, EVEX_CD8<_.EltSize, CD8VT1>,
9769            Sched<[WriteStore]>;
9770}
9771
9772multiclass avx512_scatter_q_pd<bits<8> dopc, bits<8> qopc,
9773                        AVX512VLVectorVTInfo _, string OpcodeStr, string SUFF> {
9774  defm NAME##D##SUFF##Z: avx512_scatter<dopc, OpcodeStr##"d", _.info512,
9775                                      vy512xmem, mscatterv8i32>, EVEX_V512, VEX_W;
9776  defm NAME##Q##SUFF##Z: avx512_scatter<qopc, OpcodeStr##"q", _.info512,
9777                                      vz512mem,  mscatterv8i64>, EVEX_V512, VEX_W;
9778let Predicates = [HasVLX] in {
9779  defm NAME##D##SUFF##Z256: avx512_scatter<dopc, OpcodeStr##"d", _.info256,
9780                              vx256xmem, mscatterv4i32>, EVEX_V256, VEX_W;
9781  defm NAME##Q##SUFF##Z256: avx512_scatter<qopc, OpcodeStr##"q", _.info256,
9782                              vy256xmem, mscatterv4i64>, EVEX_V256, VEX_W;
9783  defm NAME##D##SUFF##Z128: avx512_scatter<dopc, OpcodeStr##"d", _.info128,
9784                              vx128xmem, mscatterv4i32>, EVEX_V128, VEX_W;
9785  defm NAME##Q##SUFF##Z128: avx512_scatter<qopc, OpcodeStr##"q", _.info128,
9786                              vx128xmem, mscatterv2i64>, EVEX_V128, VEX_W;
9787}
9788}
9789
9790multiclass avx512_scatter_d_ps<bits<8> dopc, bits<8> qopc,
9791                       AVX512VLVectorVTInfo _, string OpcodeStr, string SUFF> {
9792  defm NAME##D##SUFF##Z: avx512_scatter<dopc, OpcodeStr##"d", _.info512, vz512mem,
9793                                       mscatterv16i32>, EVEX_V512;
9794  defm NAME##Q##SUFF##Z: avx512_scatter<qopc, OpcodeStr##"q", _.info256, vz256mem,
9795                                       mscatterv8i64>, EVEX_V512;
9796let Predicates = [HasVLX] in {
9797  defm NAME##D##SUFF##Z256: avx512_scatter<dopc, OpcodeStr##"d", _.info256,
9798                                          vy256xmem, mscatterv8i32>, EVEX_V256;
9799  defm NAME##Q##SUFF##Z256: avx512_scatter<qopc, OpcodeStr##"q", _.info128,
9800                                          vy128xmem, mscatterv4i64>, EVEX_V256;
9801  defm NAME##D##SUFF##Z128: avx512_scatter<dopc, OpcodeStr##"d", _.info128,
9802                                          vx128xmem, mscatterv4i32>, EVEX_V128;
9803  defm NAME##Q##SUFF##Z128: avx512_scatter<qopc, OpcodeStr##"q", _.info128,
9804                                          vx64xmem, mscatterv2i64, VK2WM>,
9805                                          EVEX_V128;
9806}
9807}
9808
9809defm VSCATTER : avx512_scatter_q_pd<0xA2, 0xA3, avx512vl_f64_info, "vscatter", "PD">,
9810               avx512_scatter_d_ps<0xA2, 0xA3, avx512vl_f32_info, "vscatter", "PS">;
9811
9812defm VPSCATTER : avx512_scatter_q_pd<0xA0, 0xA1, avx512vl_i64_info, "vpscatter", "Q">,
9813                avx512_scatter_d_ps<0xA0, 0xA1, avx512vl_i32_info, "vpscatter", "D">;
9814
9815// prefetch
9816multiclass avx512_gather_scatter_prefetch<bits<8> opc, Format F, string OpcodeStr,
9817                       RegisterClass KRC, X86MemOperand memop> {
9818  let Predicates = [HasPFI], mayLoad = 1, mayStore = 1 in
9819  def m  : AVX5128I<opc, F, (outs), (ins KRC:$mask, memop:$src),
9820            !strconcat(OpcodeStr, "\t{$src {${mask}}|{${mask}}, $src}"), []>,
9821            EVEX, EVEX_K, Sched<[WriteLoad]>;
9822}
9823
9824defm VGATHERPF0DPS: avx512_gather_scatter_prefetch<0xC6, MRM1m, "vgatherpf0dps",
9825                     VK16WM, vz512mem>, EVEX_V512, EVEX_CD8<32, CD8VT1>;
9826
9827defm VGATHERPF0QPS: avx512_gather_scatter_prefetch<0xC7, MRM1m, "vgatherpf0qps",
9828                     VK8WM, vz256mem>, EVEX_V512, EVEX_CD8<64, CD8VT1>;
9829
9830defm VGATHERPF0DPD: avx512_gather_scatter_prefetch<0xC6, MRM1m, "vgatherpf0dpd",
9831                     VK8WM, vy512xmem>, EVEX_V512, VEX_W, EVEX_CD8<32, CD8VT1>;
9832
9833defm VGATHERPF0QPD: avx512_gather_scatter_prefetch<0xC7, MRM1m, "vgatherpf0qpd",
9834                     VK8WM, vz512mem>, EVEX_V512, VEX_W, EVEX_CD8<64, CD8VT1>;
9835
9836defm VGATHERPF1DPS: avx512_gather_scatter_prefetch<0xC6, MRM2m, "vgatherpf1dps",
9837                     VK16WM, vz512mem>, EVEX_V512, EVEX_CD8<32, CD8VT1>;
9838
9839defm VGATHERPF1QPS: avx512_gather_scatter_prefetch<0xC7, MRM2m, "vgatherpf1qps",
9840                     VK8WM, vz256mem>, EVEX_V512, EVEX_CD8<64, CD8VT1>;
9841
9842defm VGATHERPF1DPD: avx512_gather_scatter_prefetch<0xC6, MRM2m, "vgatherpf1dpd",
9843                     VK8WM, vy512xmem>, EVEX_V512, VEX_W, EVEX_CD8<32, CD8VT1>;
9844
9845defm VGATHERPF1QPD: avx512_gather_scatter_prefetch<0xC7, MRM2m, "vgatherpf1qpd",
9846                     VK8WM, vz512mem>, EVEX_V512, VEX_W, EVEX_CD8<64, CD8VT1>;
9847
9848defm VSCATTERPF0DPS: avx512_gather_scatter_prefetch<0xC6, MRM5m, "vscatterpf0dps",
9849                     VK16WM, vz512mem>, EVEX_V512, EVEX_CD8<32, CD8VT1>;
9850
9851defm VSCATTERPF0QPS: avx512_gather_scatter_prefetch<0xC7, MRM5m, "vscatterpf0qps",
9852                     VK8WM, vz256mem>, EVEX_V512, EVEX_CD8<64, CD8VT1>;
9853
9854defm VSCATTERPF0DPD: avx512_gather_scatter_prefetch<0xC6, MRM5m, "vscatterpf0dpd",
9855                     VK8WM, vy512xmem>, EVEX_V512, VEX_W, EVEX_CD8<32, CD8VT1>;
9856
9857defm VSCATTERPF0QPD: avx512_gather_scatter_prefetch<0xC7, MRM5m, "vscatterpf0qpd",
9858                     VK8WM, vz512mem>, EVEX_V512, VEX_W, EVEX_CD8<64, CD8VT1>;
9859
9860defm VSCATTERPF1DPS: avx512_gather_scatter_prefetch<0xC6, MRM6m, "vscatterpf1dps",
9861                     VK16WM, vz512mem>, EVEX_V512, EVEX_CD8<32, CD8VT1>;
9862
9863defm VSCATTERPF1QPS: avx512_gather_scatter_prefetch<0xC7, MRM6m, "vscatterpf1qps",
9864                     VK8WM, vz256mem>, EVEX_V512, EVEX_CD8<64, CD8VT1>;
9865
9866defm VSCATTERPF1DPD: avx512_gather_scatter_prefetch<0xC6, MRM6m, "vscatterpf1dpd",
9867                     VK8WM, vy512xmem>, EVEX_V512, VEX_W, EVEX_CD8<32, CD8VT1>;
9868
9869defm VSCATTERPF1QPD: avx512_gather_scatter_prefetch<0xC7, MRM6m, "vscatterpf1qpd",
9870                     VK8WM, vz512mem>, EVEX_V512, VEX_W, EVEX_CD8<64, CD8VT1>;
9871
9872multiclass cvt_by_vec_width<bits<8> opc, X86VectorVTInfo Vec, string OpcodeStr > {
9873def rr : AVX512XS8I<opc, MRMSrcReg, (outs Vec.RC:$dst), (ins Vec.KRC:$src),
9874                  !strconcat(OpcodeStr##Vec.Suffix, "\t{$src, $dst|$dst, $src}"),
9875                  [(set Vec.RC:$dst, (Vec.VT (sext Vec.KRC:$src)))]>,
9876                  EVEX, Sched<[WriteMove]>; // TODO - WriteVecTrunc?
9877
9878// Also need a pattern for anyextend.
9879def : Pat<(Vec.VT (anyext Vec.KRC:$src)),
9880          (!cast<Instruction>(NAME#"rr") Vec.KRC:$src)>;
9881}
9882
9883multiclass cvt_mask_by_elt_width<bits<8> opc, AVX512VLVectorVTInfo VTInfo,
9884                                 string OpcodeStr, Predicate prd> {
9885let Predicates = [prd] in
9886  defm Z : cvt_by_vec_width<opc, VTInfo.info512, OpcodeStr>, EVEX_V512;
9887
9888  let Predicates = [prd, HasVLX] in {
9889    defm Z256 : cvt_by_vec_width<opc, VTInfo.info256, OpcodeStr>, EVEX_V256;
9890    defm Z128 : cvt_by_vec_width<opc, VTInfo.info128, OpcodeStr>, EVEX_V128;
9891  }
9892}
9893
9894defm VPMOVM2B : cvt_mask_by_elt_width<0x28, avx512vl_i8_info, "vpmovm2" , HasBWI>;
9895defm VPMOVM2W : cvt_mask_by_elt_width<0x28, avx512vl_i16_info, "vpmovm2", HasBWI> , VEX_W;
9896defm VPMOVM2D : cvt_mask_by_elt_width<0x38, avx512vl_i32_info, "vpmovm2", HasDQI>;
9897defm VPMOVM2Q : cvt_mask_by_elt_width<0x38, avx512vl_i64_info, "vpmovm2", HasDQI> , VEX_W;
9898
9899multiclass convert_vector_to_mask_common<bits<8> opc, X86VectorVTInfo _, string OpcodeStr > {
9900    def rr : AVX512XS8I<opc, MRMSrcReg, (outs _.KRC:$dst), (ins _.RC:$src),
9901                        !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
9902                        [(set _.KRC:$dst, (X86pcmpgtm _.ImmAllZerosV, (_.VT _.RC:$src)))]>,
9903                        EVEX, Sched<[WriteMove]>;
9904}
9905
9906// Use 512bit version to implement 128/256 bit in case NoVLX.
9907multiclass convert_vector_to_mask_lowering<X86VectorVTInfo ExtendInfo,
9908                                           X86VectorVTInfo _,
9909                                           string Name> {
9910
9911  def : Pat<(_.KVT (X86pcmpgtm _.ImmAllZerosV, (_.VT _.RC:$src))),
9912            (_.KVT (COPY_TO_REGCLASS
9913                     (!cast<Instruction>(Name#"Zrr")
9914                       (INSERT_SUBREG (ExtendInfo.VT (IMPLICIT_DEF)),
9915                                      _.RC:$src, _.SubRegIdx)),
9916                   _.KRC))>;
9917}
9918
9919multiclass avx512_convert_vector_to_mask<bits<8> opc, string OpcodeStr,
9920                                   AVX512VLVectorVTInfo VTInfo, Predicate prd> {
9921  let Predicates = [prd] in
9922    defm Z : convert_vector_to_mask_common <opc, VTInfo.info512, OpcodeStr>,
9923                                            EVEX_V512;
9924
9925  let Predicates = [prd, HasVLX] in {
9926    defm Z256 : convert_vector_to_mask_common<opc, VTInfo.info256, OpcodeStr>,
9927                                              EVEX_V256;
9928    defm Z128 : convert_vector_to_mask_common<opc, VTInfo.info128, OpcodeStr>,
9929                                               EVEX_V128;
9930  }
9931  let Predicates = [prd, NoVLX] in {
9932    defm Z256_Alt : convert_vector_to_mask_lowering<VTInfo.info512, VTInfo.info256, NAME>;
9933    defm Z128_Alt : convert_vector_to_mask_lowering<VTInfo.info512, VTInfo.info128, NAME>;
9934  }
9935}
9936
9937defm VPMOVB2M : avx512_convert_vector_to_mask<0x29, "vpmovb2m",
9938                                              avx512vl_i8_info, HasBWI>;
9939defm VPMOVW2M : avx512_convert_vector_to_mask<0x29, "vpmovw2m",
9940                                              avx512vl_i16_info, HasBWI>, VEX_W;
9941defm VPMOVD2M : avx512_convert_vector_to_mask<0x39, "vpmovd2m",
9942                                              avx512vl_i32_info, HasDQI>;
9943defm VPMOVQ2M : avx512_convert_vector_to_mask<0x39, "vpmovq2m",
9944                                              avx512vl_i64_info, HasDQI>, VEX_W;
9945
9946// Patterns for handling sext from a mask register to v16i8/v16i16 when DQI
9947// is available, but BWI is not. We can't handle this in lowering because
9948// a target independent DAG combine likes to combine sext and trunc.
9949let Predicates = [HasDQI, NoBWI] in {
9950  def : Pat<(v16i8 (sext (v16i1 VK16:$src))),
9951            (VPMOVDBZrr (v16i32 (VPMOVM2DZrr VK16:$src)))>;
9952  def : Pat<(v16i16 (sext (v16i1 VK16:$src))),
9953            (VPMOVDWZrr (v16i32 (VPMOVM2DZrr VK16:$src)))>;
9954
9955  def : Pat<(v16i8 (anyext (v16i1 VK16:$src))),
9956            (VPMOVDBZrr (v16i32 (VPMOVM2DZrr VK16:$src)))>;
9957  def : Pat<(v16i16 (anyext (v16i1 VK16:$src))),
9958            (VPMOVDWZrr (v16i32 (VPMOVM2DZrr VK16:$src)))>;
9959}
9960
9961let Predicates = [HasDQI, NoBWI, HasVLX] in {
9962  def : Pat<(v8i16 (sext (v8i1 VK8:$src))),
9963            (VPMOVDWZ256rr (v8i32 (VPMOVM2DZ256rr VK8:$src)))>;
9964
9965  def : Pat<(v8i16 (anyext (v8i1 VK8:$src))),
9966            (VPMOVDWZ256rr (v8i32 (VPMOVM2DZ256rr VK8:$src)))>;
9967}
9968
9969//===----------------------------------------------------------------------===//
9970// AVX-512 - COMPRESS and EXPAND
9971//
9972
9973multiclass compress_by_vec_width_common<bits<8> opc, X86VectorVTInfo _,
9974                                 string OpcodeStr, X86FoldableSchedWrite sched> {
9975  defm rr : AVX512_maskable<opc, MRMDestReg, _, (outs _.RC:$dst),
9976              (ins _.RC:$src1), OpcodeStr, "$src1", "$src1",
9977              (null_frag)>, AVX5128IBase,
9978              Sched<[sched]>;
9979
9980  let mayStore = 1, hasSideEffects = 0 in
9981  def mr : AVX5128I<opc, MRMDestMem, (outs),
9982              (ins _.MemOp:$dst, _.RC:$src),
9983              OpcodeStr # "\t{$src, $dst|$dst, $src}",
9984              []>, EVEX_CD8<_.EltSize, CD8VT1>,
9985              Sched<[sched.Folded]>;
9986
9987  def mrk : AVX5128I<opc, MRMDestMem, (outs),
9988              (ins _.MemOp:$dst, _.KRCWM:$mask, _.RC:$src),
9989              OpcodeStr # "\t{$src, $dst {${mask}}|$dst {${mask}}, $src}",
9990              []>,
9991              EVEX_K, EVEX_CD8<_.EltSize, CD8VT1>,
9992              Sched<[sched.Folded]>;
9993}
9994
9995multiclass compress_by_vec_width_lowering<X86VectorVTInfo _, string Name> {
9996  def : Pat<(X86mCompressingStore (_.VT _.RC:$src), addr:$dst, _.KRCWM:$mask),
9997            (!cast<Instruction>(Name#_.ZSuffix##mrk)
9998                            addr:$dst, _.KRCWM:$mask, _.RC:$src)>;
9999
10000  def : Pat<(X86compress (_.VT _.RC:$src), _.RC:$src0, _.KRCWM:$mask),
10001            (!cast<Instruction>(Name#_.ZSuffix##rrk)
10002                            _.RC:$src0, _.KRCWM:$mask, _.RC:$src)>;
10003  def : Pat<(X86compress (_.VT _.RC:$src), _.ImmAllZerosV, _.KRCWM:$mask),
10004            (!cast<Instruction>(Name#_.ZSuffix##rrkz)
10005                            _.KRCWM:$mask, _.RC:$src)>;
10006}
10007
10008multiclass compress_by_elt_width<bits<8> opc, string OpcodeStr,
10009                                 X86FoldableSchedWrite sched,
10010                                 AVX512VLVectorVTInfo VTInfo,
10011                                 Predicate Pred = HasAVX512> {
10012  let Predicates = [Pred] in
10013  defm Z : compress_by_vec_width_common<opc, VTInfo.info512, OpcodeStr, sched>,
10014           compress_by_vec_width_lowering<VTInfo.info512, NAME>, EVEX_V512;
10015
10016  let Predicates = [Pred, HasVLX] in {
10017    defm Z256 : compress_by_vec_width_common<opc, VTInfo.info256, OpcodeStr, sched>,
10018                compress_by_vec_width_lowering<VTInfo.info256, NAME>, EVEX_V256;
10019    defm Z128 : compress_by_vec_width_common<opc, VTInfo.info128, OpcodeStr, sched>,
10020                compress_by_vec_width_lowering<VTInfo.info128, NAME>, EVEX_V128;
10021  }
10022}
10023
10024// FIXME: Is there a better scheduler class for VPCOMPRESS?
10025defm VPCOMPRESSD : compress_by_elt_width <0x8B, "vpcompressd", WriteVarShuffle256,
10026                                          avx512vl_i32_info>, EVEX, NotMemoryFoldable;
10027defm VPCOMPRESSQ : compress_by_elt_width <0x8B, "vpcompressq", WriteVarShuffle256,
10028                                          avx512vl_i64_info>, EVEX, VEX_W, NotMemoryFoldable;
10029defm VCOMPRESSPS : compress_by_elt_width <0x8A, "vcompressps", WriteVarShuffle256,
10030                                          avx512vl_f32_info>, EVEX, NotMemoryFoldable;
10031defm VCOMPRESSPD : compress_by_elt_width <0x8A, "vcompresspd", WriteVarShuffle256,
10032                                          avx512vl_f64_info>, EVEX, VEX_W, NotMemoryFoldable;
10033
10034// expand
10035multiclass expand_by_vec_width<bits<8> opc, X86VectorVTInfo _,
10036                                 string OpcodeStr, X86FoldableSchedWrite sched> {
10037  defm rr : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
10038              (ins _.RC:$src1), OpcodeStr, "$src1", "$src1",
10039              (null_frag)>, AVX5128IBase,
10040              Sched<[sched]>;
10041
10042  defm rm : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
10043              (ins _.MemOp:$src1), OpcodeStr, "$src1", "$src1",
10044              (null_frag)>,
10045            AVX5128IBase, EVEX_CD8<_.EltSize, CD8VT1>,
10046            Sched<[sched.Folded, sched.ReadAfterFold]>;
10047}
10048
10049multiclass expand_by_vec_width_lowering<X86VectorVTInfo _, string Name> {
10050
10051  def : Pat<(_.VT (X86mExpandingLoad addr:$src, _.KRCWM:$mask, undef)),
10052            (!cast<Instruction>(Name#_.ZSuffix##rmkz)
10053                                        _.KRCWM:$mask, addr:$src)>;
10054
10055  def : Pat<(_.VT (X86mExpandingLoad addr:$src, _.KRCWM:$mask, _.ImmAllZerosV)),
10056            (!cast<Instruction>(Name#_.ZSuffix##rmkz)
10057                                        _.KRCWM:$mask, addr:$src)>;
10058
10059  def : Pat<(_.VT (X86mExpandingLoad addr:$src, _.KRCWM:$mask,
10060                                               (_.VT _.RC:$src0))),
10061            (!cast<Instruction>(Name#_.ZSuffix##rmk)
10062                            _.RC:$src0, _.KRCWM:$mask, addr:$src)>;
10063
10064  def : Pat<(X86expand (_.VT _.RC:$src), _.RC:$src0, _.KRCWM:$mask),
10065            (!cast<Instruction>(Name#_.ZSuffix##rrk)
10066                            _.RC:$src0, _.KRCWM:$mask, _.RC:$src)>;
10067  def : Pat<(X86expand (_.VT _.RC:$src), _.ImmAllZerosV, _.KRCWM:$mask),
10068            (!cast<Instruction>(Name#_.ZSuffix##rrkz)
10069                            _.KRCWM:$mask, _.RC:$src)>;
10070}
10071
10072multiclass expand_by_elt_width<bits<8> opc, string OpcodeStr,
10073                               X86FoldableSchedWrite sched,
10074                               AVX512VLVectorVTInfo VTInfo,
10075                               Predicate Pred = HasAVX512> {
10076  let Predicates = [Pred] in
10077  defm Z : expand_by_vec_width<opc, VTInfo.info512, OpcodeStr, sched>,
10078           expand_by_vec_width_lowering<VTInfo.info512, NAME>, EVEX_V512;
10079
10080  let Predicates = [Pred, HasVLX] in {
10081    defm Z256 : expand_by_vec_width<opc, VTInfo.info256, OpcodeStr, sched>,
10082                expand_by_vec_width_lowering<VTInfo.info256, NAME>, EVEX_V256;
10083    defm Z128 : expand_by_vec_width<opc, VTInfo.info128, OpcodeStr, sched>,
10084                expand_by_vec_width_lowering<VTInfo.info128, NAME>, EVEX_V128;
10085  }
10086}
10087
10088// FIXME: Is there a better scheduler class for VPEXPAND?
10089defm VPEXPANDD : expand_by_elt_width <0x89, "vpexpandd", WriteVarShuffle256,
10090                                      avx512vl_i32_info>, EVEX;
10091defm VPEXPANDQ : expand_by_elt_width <0x89, "vpexpandq", WriteVarShuffle256,
10092                                      avx512vl_i64_info>, EVEX, VEX_W;
10093defm VEXPANDPS : expand_by_elt_width <0x88, "vexpandps", WriteVarShuffle256,
10094                                      avx512vl_f32_info>, EVEX;
10095defm VEXPANDPD : expand_by_elt_width <0x88, "vexpandpd", WriteVarShuffle256,
10096                                      avx512vl_f64_info>, EVEX, VEX_W;
10097
10098//handle instruction  reg_vec1 = op(reg_vec,imm)
10099//                               op(mem_vec,imm)
10100//                               op(broadcast(eltVt),imm)
10101//all instruction created with FROUND_CURRENT
10102multiclass avx512_unary_fp_packed_imm<bits<8> opc, string OpcodeStr, SDNode OpNode,
10103                                      X86FoldableSchedWrite sched, X86VectorVTInfo _> {
10104  let ExeDomain = _.ExeDomain in {
10105  defm rri : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
10106                      (ins _.RC:$src1, i32u8imm:$src2),
10107                      OpcodeStr##_.Suffix, "$src2, $src1", "$src1, $src2",
10108                      (OpNode (_.VT _.RC:$src1),
10109                              (i32 timm:$src2))>, Sched<[sched]>;
10110  defm rmi : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
10111                    (ins _.MemOp:$src1, i32u8imm:$src2),
10112                    OpcodeStr##_.Suffix, "$src2, $src1", "$src1, $src2",
10113                    (OpNode (_.VT (bitconvert (_.LdFrag addr:$src1))),
10114                            (i32 timm:$src2))>,
10115                    Sched<[sched.Folded, sched.ReadAfterFold]>;
10116  defm rmbi : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
10117                    (ins _.ScalarMemOp:$src1, i32u8imm:$src2),
10118                    OpcodeStr##_.Suffix, "$src2, ${src1}"##_.BroadcastStr,
10119                    "${src1}"##_.BroadcastStr##", $src2",
10120                    (OpNode (_.VT (_.BroadcastLdFrag addr:$src1)),
10121                            (i32 timm:$src2))>, EVEX_B,
10122                    Sched<[sched.Folded, sched.ReadAfterFold]>;
10123  }
10124}
10125
10126//handle instruction  reg_vec1 = op(reg_vec2,reg_vec3,imm),{sae}
10127multiclass avx512_unary_fp_sae_packed_imm<bits<8> opc, string OpcodeStr,
10128                                          SDNode OpNode, X86FoldableSchedWrite sched,
10129                                          X86VectorVTInfo _> {
10130  let ExeDomain = _.ExeDomain in
10131  defm rrib : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
10132                      (ins _.RC:$src1, i32u8imm:$src2),
10133                      OpcodeStr##_.Suffix, "$src2, {sae}, $src1",
10134                      "$src1, {sae}, $src2",
10135                      (OpNode (_.VT _.RC:$src1),
10136                              (i32 timm:$src2))>,
10137                      EVEX_B, Sched<[sched]>;
10138}
10139
10140multiclass avx512_common_unary_fp_sae_packed_imm<string OpcodeStr,
10141            AVX512VLVectorVTInfo _, bits<8> opc, SDNode OpNode,
10142            SDNode OpNodeSAE, X86SchedWriteWidths sched, Predicate prd>{
10143  let Predicates = [prd] in {
10144    defm Z    : avx512_unary_fp_packed_imm<opc, OpcodeStr, OpNode, sched.ZMM,
10145                                           _.info512>,
10146                avx512_unary_fp_sae_packed_imm<opc, OpcodeStr, OpNodeSAE,
10147                                               sched.ZMM, _.info512>, EVEX_V512;
10148  }
10149  let Predicates = [prd, HasVLX] in {
10150    defm Z128 : avx512_unary_fp_packed_imm<opc, OpcodeStr, OpNode, sched.XMM,
10151                                           _.info128>, EVEX_V128;
10152    defm Z256 : avx512_unary_fp_packed_imm<opc, OpcodeStr, OpNode, sched.YMM,
10153                                           _.info256>, EVEX_V256;
10154  }
10155}
10156
10157//handle instruction  reg_vec1 = op(reg_vec2,reg_vec3,imm)
10158//                               op(reg_vec2,mem_vec,imm)
10159//                               op(reg_vec2,broadcast(eltVt),imm)
10160//all instruction created with FROUND_CURRENT
10161multiclass avx512_fp_packed_imm<bits<8> opc, string OpcodeStr, SDNode OpNode,
10162                                X86FoldableSchedWrite sched, X86VectorVTInfo _>{
10163  let ExeDomain = _.ExeDomain in {
10164  defm rri : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
10165                      (ins _.RC:$src1, _.RC:$src2, i32u8imm:$src3),
10166                      OpcodeStr, "$src3, $src2, $src1", "$src1, $src2, $src3",
10167                      (OpNode (_.VT _.RC:$src1),
10168                              (_.VT _.RC:$src2),
10169                              (i32 timm:$src3))>,
10170                      Sched<[sched]>;
10171  defm rmi : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
10172                    (ins _.RC:$src1, _.MemOp:$src2, i32u8imm:$src3),
10173                    OpcodeStr, "$src3, $src2, $src1", "$src1, $src2, $src3",
10174                    (OpNode (_.VT _.RC:$src1),
10175                            (_.VT (bitconvert (_.LdFrag addr:$src2))),
10176                            (i32 timm:$src3))>,
10177                    Sched<[sched.Folded, sched.ReadAfterFold]>;
10178  defm rmbi : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
10179                    (ins _.RC:$src1, _.ScalarMemOp:$src2, i32u8imm:$src3),
10180                    OpcodeStr, "$src3, ${src2}"##_.BroadcastStr##", $src1",
10181                    "$src1, ${src2}"##_.BroadcastStr##", $src3",
10182                    (OpNode (_.VT _.RC:$src1),
10183                            (_.VT (_.BroadcastLdFrag addr:$src2)),
10184                            (i32 timm:$src3))>, EVEX_B,
10185                    Sched<[sched.Folded, sched.ReadAfterFold]>;
10186  }
10187}
10188
10189//handle instruction  reg_vec1 = op(reg_vec2,reg_vec3,imm)
10190//                               op(reg_vec2,mem_vec,imm)
10191multiclass avx512_3Op_rm_imm8<bits<8> opc, string OpcodeStr, SDNode OpNode,
10192                              X86FoldableSchedWrite sched, X86VectorVTInfo DestInfo,
10193                              X86VectorVTInfo SrcInfo>{
10194  let ExeDomain = DestInfo.ExeDomain in {
10195  defm rri : AVX512_maskable<opc, MRMSrcReg, DestInfo, (outs DestInfo.RC:$dst),
10196                  (ins SrcInfo.RC:$src1, SrcInfo.RC:$src2, u8imm:$src3),
10197                  OpcodeStr, "$src3, $src2, $src1", "$src1, $src2, $src3",
10198                  (DestInfo.VT (OpNode (SrcInfo.VT SrcInfo.RC:$src1),
10199                               (SrcInfo.VT SrcInfo.RC:$src2),
10200                               (i8 timm:$src3)))>,
10201                  Sched<[sched]>;
10202  defm rmi : AVX512_maskable<opc, MRMSrcMem, DestInfo, (outs DestInfo.RC:$dst),
10203                (ins SrcInfo.RC:$src1, SrcInfo.MemOp:$src2, u8imm:$src3),
10204                OpcodeStr, "$src3, $src2, $src1", "$src1, $src2, $src3",
10205                (DestInfo.VT (OpNode (SrcInfo.VT SrcInfo.RC:$src1),
10206                             (SrcInfo.VT (bitconvert
10207                                                (SrcInfo.LdFrag addr:$src2))),
10208                             (i8 timm:$src3)))>,
10209                Sched<[sched.Folded, sched.ReadAfterFold]>;
10210  }
10211}
10212
10213//handle instruction  reg_vec1 = op(reg_vec2,reg_vec3,imm)
10214//                               op(reg_vec2,mem_vec,imm)
10215//                               op(reg_vec2,broadcast(eltVt),imm)
10216multiclass avx512_3Op_imm8<bits<8> opc, string OpcodeStr, SDNode OpNode,
10217                           X86FoldableSchedWrite sched, X86VectorVTInfo _>:
10218  avx512_3Op_rm_imm8<opc, OpcodeStr, OpNode, sched, _, _>{
10219
10220  let ExeDomain = _.ExeDomain in
10221  defm rmbi : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
10222                    (ins _.RC:$src1, _.ScalarMemOp:$src2, u8imm:$src3),
10223                    OpcodeStr, "$src3, ${src2}"##_.BroadcastStr##", $src1",
10224                    "$src1, ${src2}"##_.BroadcastStr##", $src3",
10225                    (OpNode (_.VT _.RC:$src1),
10226                            (_.VT (_.BroadcastLdFrag addr:$src2)),
10227                            (i8 timm:$src3))>, EVEX_B,
10228                    Sched<[sched.Folded, sched.ReadAfterFold]>;
10229}
10230
10231//handle scalar instruction  reg_vec1 = op(reg_vec2,reg_vec3,imm)
10232//                                      op(reg_vec2,mem_scalar,imm)
10233multiclass avx512_fp_scalar_imm<bits<8> opc, string OpcodeStr, SDNode OpNode,
10234                                X86FoldableSchedWrite sched, X86VectorVTInfo _> {
10235  let ExeDomain = _.ExeDomain in {
10236  defm rri : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
10237                      (ins _.RC:$src1, _.RC:$src2, i32u8imm:$src3),
10238                      OpcodeStr, "$src3, $src2, $src1", "$src1, $src2, $src3",
10239                      (OpNode (_.VT _.RC:$src1),
10240                              (_.VT _.RC:$src2),
10241                              (i32 timm:$src3))>,
10242                      Sched<[sched]>;
10243  defm rmi : AVX512_maskable_scalar<opc, MRMSrcMem, _, (outs _.RC:$dst),
10244                    (ins _.RC:$src1, _.IntScalarMemOp:$src2, i32u8imm:$src3),
10245                    OpcodeStr, "$src3, $src2, $src1", "$src1, $src2, $src3",
10246                    (OpNode (_.VT _.RC:$src1),
10247                            (_.VT _.ScalarIntMemCPat:$src2),
10248                            (i32 timm:$src3))>,
10249                    Sched<[sched.Folded, sched.ReadAfterFold]>;
10250  }
10251}
10252
10253//handle instruction  reg_vec1 = op(reg_vec2,reg_vec3,imm),{sae}
10254multiclass avx512_fp_sae_packed_imm<bits<8> opc, string OpcodeStr,
10255                                    SDNode OpNode, X86FoldableSchedWrite sched,
10256                                    X86VectorVTInfo _> {
10257  let ExeDomain = _.ExeDomain in
10258  defm rrib : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
10259                      (ins _.RC:$src1, _.RC:$src2, i32u8imm:$src3),
10260                      OpcodeStr, "$src3, {sae}, $src2, $src1",
10261                      "$src1, $src2, {sae}, $src3",
10262                      (OpNode (_.VT _.RC:$src1),
10263                              (_.VT _.RC:$src2),
10264                              (i32 timm:$src3))>,
10265                      EVEX_B, Sched<[sched]>;
10266}
10267
10268//handle scalar instruction  reg_vec1 = op(reg_vec2,reg_vec3,imm),{sae}
10269multiclass avx512_fp_sae_scalar_imm<bits<8> opc, string OpcodeStr, SDNode OpNode,
10270                                    X86FoldableSchedWrite sched, X86VectorVTInfo _> {
10271  let ExeDomain = _.ExeDomain in
10272  defm NAME#rrib : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
10273                      (ins _.RC:$src1, _.RC:$src2, i32u8imm:$src3),
10274                      OpcodeStr, "$src3, {sae}, $src2, $src1",
10275                      "$src1, $src2, {sae}, $src3",
10276                      (OpNode (_.VT _.RC:$src1),
10277                              (_.VT _.RC:$src2),
10278                              (i32 timm:$src3))>,
10279                      EVEX_B, Sched<[sched]>;
10280}
10281
10282multiclass avx512_common_fp_sae_packed_imm<string OpcodeStr,
10283            AVX512VLVectorVTInfo _, bits<8> opc, SDNode OpNode,
10284            SDNode OpNodeSAE, X86SchedWriteWidths sched, Predicate prd>{
10285  let Predicates = [prd] in {
10286    defm Z    : avx512_fp_packed_imm<opc, OpcodeStr, OpNode, sched.ZMM, _.info512>,
10287                avx512_fp_sae_packed_imm<opc, OpcodeStr, OpNodeSAE, sched.ZMM, _.info512>,
10288                                  EVEX_V512;
10289
10290  }
10291  let Predicates = [prd, HasVLX] in {
10292    defm Z128 : avx512_fp_packed_imm<opc, OpcodeStr, OpNode, sched.XMM, _.info128>,
10293                                  EVEX_V128;
10294    defm Z256 : avx512_fp_packed_imm<opc, OpcodeStr, OpNode, sched.YMM, _.info256>,
10295                                  EVEX_V256;
10296  }
10297}
10298
10299multiclass avx512_common_3Op_rm_imm8<bits<8> opc, SDNode OpNode, string OpStr,
10300                   X86SchedWriteWidths sched, AVX512VLVectorVTInfo DestInfo,
10301                   AVX512VLVectorVTInfo SrcInfo, Predicate Pred = HasBWI> {
10302  let Predicates = [Pred] in {
10303    defm Z    : avx512_3Op_rm_imm8<opc, OpStr, OpNode, sched.ZMM, DestInfo.info512,
10304                           SrcInfo.info512>, EVEX_V512, AVX512AIi8Base, EVEX_4V;
10305  }
10306  let Predicates = [Pred, HasVLX] in {
10307    defm Z128 : avx512_3Op_rm_imm8<opc, OpStr, OpNode, sched.XMM, DestInfo.info128,
10308                           SrcInfo.info128>, EVEX_V128, AVX512AIi8Base, EVEX_4V;
10309    defm Z256 : avx512_3Op_rm_imm8<opc, OpStr, OpNode, sched.YMM, DestInfo.info256,
10310                           SrcInfo.info256>, EVEX_V256, AVX512AIi8Base, EVEX_4V;
10311  }
10312}
10313
10314multiclass avx512_common_3Op_imm8<string OpcodeStr, AVX512VLVectorVTInfo _,
10315                                  bits<8> opc, SDNode OpNode, X86SchedWriteWidths sched,
10316                                  Predicate Pred = HasAVX512> {
10317  let Predicates = [Pred] in {
10318    defm Z    : avx512_3Op_imm8<opc, OpcodeStr, OpNode, sched.ZMM, _.info512>,
10319                                EVEX_V512;
10320  }
10321  let Predicates = [Pred, HasVLX] in {
10322    defm Z128 : avx512_3Op_imm8<opc, OpcodeStr, OpNode, sched.XMM, _.info128>,
10323                                EVEX_V128;
10324    defm Z256 : avx512_3Op_imm8<opc, OpcodeStr, OpNode, sched.YMM, _.info256>,
10325                                EVEX_V256;
10326  }
10327}
10328
10329multiclass avx512_common_fp_sae_scalar_imm<string OpcodeStr,
10330                  X86VectorVTInfo _, bits<8> opc, SDNode OpNode,
10331                  SDNode OpNodeSAE, X86SchedWriteWidths sched, Predicate prd> {
10332  let Predicates = [prd] in {
10333     defm Z : avx512_fp_scalar_imm<opc, OpcodeStr, OpNode, sched.XMM, _>,
10334              avx512_fp_sae_scalar_imm<opc, OpcodeStr, OpNodeSAE, sched.XMM, _>;
10335  }
10336}
10337
10338multiclass avx512_common_unary_fp_sae_packed_imm_all<string OpcodeStr,
10339                    bits<8> opcPs, bits<8> opcPd, SDNode OpNode,
10340                    SDNode OpNodeSAE, X86SchedWriteWidths sched, Predicate prd>{
10341  defm PS : avx512_common_unary_fp_sae_packed_imm<OpcodeStr, avx512vl_f32_info,
10342                            opcPs, OpNode, OpNodeSAE, sched, prd>,
10343                            EVEX_CD8<32, CD8VF>;
10344  defm PD : avx512_common_unary_fp_sae_packed_imm<OpcodeStr, avx512vl_f64_info,
10345                            opcPd, OpNode, OpNodeSAE, sched, prd>,
10346                            EVEX_CD8<64, CD8VF>, VEX_W;
10347}
10348
10349defm VREDUCE   : avx512_common_unary_fp_sae_packed_imm_all<"vreduce", 0x56, 0x56,
10350                              X86VReduce, X86VReduceSAE, SchedWriteFRnd, HasDQI>,
10351                              AVX512AIi8Base, EVEX;
10352defm VRNDSCALE : avx512_common_unary_fp_sae_packed_imm_all<"vrndscale", 0x08, 0x09,
10353                              X86VRndScale, X86VRndScaleSAE, SchedWriteFRnd, HasAVX512>,
10354                              AVX512AIi8Base, EVEX;
10355defm VGETMANT : avx512_common_unary_fp_sae_packed_imm_all<"vgetmant", 0x26, 0x26,
10356                              X86VGetMant, X86VGetMantSAE, SchedWriteFRnd, HasAVX512>,
10357                              AVX512AIi8Base, EVEX;
10358
10359defm VRANGEPD : avx512_common_fp_sae_packed_imm<"vrangepd", avx512vl_f64_info,
10360                                                0x50, X86VRange, X86VRangeSAE,
10361                                                SchedWriteFAdd, HasDQI>,
10362      AVX512AIi8Base, EVEX_4V, EVEX_CD8<64, CD8VF>, VEX_W;
10363defm VRANGEPS : avx512_common_fp_sae_packed_imm<"vrangeps", avx512vl_f32_info,
10364                                                0x50, X86VRange, X86VRangeSAE,
10365                                                SchedWriteFAdd, HasDQI>,
10366      AVX512AIi8Base, EVEX_4V, EVEX_CD8<32, CD8VF>;
10367
10368defm VRANGESD: avx512_common_fp_sae_scalar_imm<"vrangesd",
10369      f64x_info, 0x51, X86Ranges, X86RangesSAE, SchedWriteFAdd, HasDQI>,
10370      AVX512AIi8Base, VEX_LIG, EVEX_4V, EVEX_CD8<64, CD8VT1>, VEX_W;
10371defm VRANGESS: avx512_common_fp_sae_scalar_imm<"vrangess", f32x_info,
10372      0x51, X86Ranges, X86RangesSAE, SchedWriteFAdd, HasDQI>,
10373      AVX512AIi8Base, VEX_LIG, EVEX_4V, EVEX_CD8<32, CD8VT1>;
10374
10375defm VREDUCESD: avx512_common_fp_sae_scalar_imm<"vreducesd", f64x_info,
10376      0x57, X86Reduces, X86ReducesSAE, SchedWriteFRnd, HasDQI>,
10377      AVX512AIi8Base, VEX_LIG, EVEX_4V, EVEX_CD8<64, CD8VT1>, VEX_W;
10378defm VREDUCESS: avx512_common_fp_sae_scalar_imm<"vreducess", f32x_info,
10379      0x57, X86Reduces, X86ReducesSAE, SchedWriteFRnd, HasDQI>,
10380      AVX512AIi8Base, VEX_LIG, EVEX_4V, EVEX_CD8<32, CD8VT1>;
10381
10382defm VGETMANTSD: avx512_common_fp_sae_scalar_imm<"vgetmantsd", f64x_info,
10383      0x27, X86GetMants, X86GetMantsSAE, SchedWriteFRnd, HasAVX512>,
10384      AVX512AIi8Base, VEX_LIG, EVEX_4V, EVEX_CD8<64, CD8VT1>, VEX_W;
10385defm VGETMANTSS: avx512_common_fp_sae_scalar_imm<"vgetmantss", f32x_info,
10386      0x27, X86GetMants, X86GetMantsSAE, SchedWriteFRnd, HasAVX512>,
10387      AVX512AIi8Base, VEX_LIG, EVEX_4V, EVEX_CD8<32, CD8VT1>;
10388
10389multiclass avx512_shuff_packed_128_common<bits<8> opc, string OpcodeStr,
10390                                          X86FoldableSchedWrite sched,
10391                                          X86VectorVTInfo _,
10392                                          X86VectorVTInfo CastInfo,
10393                                          string EVEX2VEXOvrd> {
10394  let ExeDomain = _.ExeDomain in {
10395  defm rri : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
10396                  (ins _.RC:$src1, _.RC:$src2, u8imm:$src3),
10397                  OpcodeStr, "$src3, $src2, $src1", "$src1, $src2, $src3",
10398                  (_.VT (bitconvert
10399                         (CastInfo.VT (X86Shuf128 _.RC:$src1, _.RC:$src2,
10400                                                  (i8 timm:$src3)))))>,
10401                  Sched<[sched]>, EVEX2VEXOverride<EVEX2VEXOvrd#"rr">;
10402  defm rmi : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
10403                (ins _.RC:$src1, _.MemOp:$src2, u8imm:$src3),
10404                OpcodeStr, "$src3, $src2, $src1", "$src1, $src2, $src3",
10405                (_.VT
10406                 (bitconvert
10407                  (CastInfo.VT (X86Shuf128 _.RC:$src1,
10408                                           (CastInfo.LdFrag addr:$src2),
10409                                           (i8 timm:$src3)))))>,
10410                Sched<[sched.Folded, sched.ReadAfterFold]>,
10411                EVEX2VEXOverride<EVEX2VEXOvrd#"rm">;
10412  defm rmbi : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
10413                    (ins _.RC:$src1, _.ScalarMemOp:$src2, u8imm:$src3),
10414                    OpcodeStr, "$src3, ${src2}"##_.BroadcastStr##", $src1",
10415                    "$src1, ${src2}"##_.BroadcastStr##", $src3",
10416                    (_.VT
10417                     (bitconvert
10418                      (CastInfo.VT
10419                       (X86Shuf128 _.RC:$src1,
10420                                   (_.BroadcastLdFrag addr:$src2),
10421                                   (i8 timm:$src3)))))>, EVEX_B,
10422                    Sched<[sched.Folded, sched.ReadAfterFold]>;
10423  }
10424}
10425
10426multiclass avx512_shuff_packed_128<string OpcodeStr, X86FoldableSchedWrite sched,
10427                                   AVX512VLVectorVTInfo _,
10428                                   AVX512VLVectorVTInfo CastInfo, bits<8> opc,
10429                                   string EVEX2VEXOvrd>{
10430  let Predicates = [HasAVX512] in
10431  defm Z : avx512_shuff_packed_128_common<opc, OpcodeStr, sched,
10432                                          _.info512, CastInfo.info512, "">, EVEX_V512;
10433
10434  let Predicates = [HasAVX512, HasVLX] in
10435  defm Z256 : avx512_shuff_packed_128_common<opc, OpcodeStr, sched,
10436                                             _.info256, CastInfo.info256,
10437                                             EVEX2VEXOvrd>, EVEX_V256;
10438}
10439
10440defm VSHUFF32X4 : avx512_shuff_packed_128<"vshuff32x4", WriteFShuffle256,
10441      avx512vl_f32_info, avx512vl_f64_info, 0x23, "VPERM2F128">, AVX512AIi8Base, EVEX_4V, EVEX_CD8<32, CD8VF>;
10442defm VSHUFF64X2 : avx512_shuff_packed_128<"vshuff64x2", WriteFShuffle256,
10443      avx512vl_f64_info, avx512vl_f64_info, 0x23, "VPERM2F128">, AVX512AIi8Base, EVEX_4V, EVEX_CD8<64, CD8VF>, VEX_W;
10444defm VSHUFI32X4 : avx512_shuff_packed_128<"vshufi32x4", WriteFShuffle256,
10445      avx512vl_i32_info, avx512vl_i64_info, 0x43, "VPERM2I128">, AVX512AIi8Base, EVEX_4V, EVEX_CD8<32, CD8VF>;
10446defm VSHUFI64X2 : avx512_shuff_packed_128<"vshufi64x2", WriteFShuffle256,
10447      avx512vl_i64_info, avx512vl_i64_info, 0x43, "VPERM2I128">, AVX512AIi8Base, EVEX_4V, EVEX_CD8<64, CD8VF>, VEX_W;
10448
10449let Predicates = [HasAVX512] in {
10450// Provide fallback in case the load node that is used in the broadcast
10451// patterns above is used by additional users, which prevents the pattern
10452// selection.
10453def : Pat<(v8f64 (X86SubVBroadcast (v2f64 VR128X:$src))),
10454          (VSHUFF64X2Zrri (INSERT_SUBREG (v8f64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
10455                          (INSERT_SUBREG (v8f64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
10456                          0)>;
10457def : Pat<(v8i64 (X86SubVBroadcast (v2i64 VR128X:$src))),
10458          (VSHUFI64X2Zrri (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
10459                          (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
10460                          0)>;
10461
10462def : Pat<(v16f32 (X86SubVBroadcast (v4f32 VR128X:$src))),
10463          (VSHUFF32X4Zrri (INSERT_SUBREG (v16f32 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
10464                          (INSERT_SUBREG (v16f32 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
10465                          0)>;
10466def : Pat<(v16i32 (X86SubVBroadcast (v4i32 VR128X:$src))),
10467          (VSHUFI32X4Zrri (INSERT_SUBREG (v16i32 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
10468                          (INSERT_SUBREG (v16i32 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
10469                          0)>;
10470
10471def : Pat<(v32i16 (X86SubVBroadcast (v8i16 VR128X:$src))),
10472          (VSHUFI32X4Zrri (INSERT_SUBREG (v32i16 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
10473                          (INSERT_SUBREG (v32i16 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
10474                          0)>;
10475
10476def : Pat<(v64i8 (X86SubVBroadcast (v16i8 VR128X:$src))),
10477          (VSHUFI32X4Zrri (INSERT_SUBREG (v64i8 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
10478                          (INSERT_SUBREG (v64i8 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
10479                          0)>;
10480}
10481
10482multiclass avx512_valign<bits<8> opc, string OpcodeStr,
10483                         X86FoldableSchedWrite sched, X86VectorVTInfo _>{
10484  // NOTE: EVEX2VEXOverride changed back to Unset for 256-bit at the
10485  // instantiation of this class.
10486  let ExeDomain = _.ExeDomain in {
10487  defm rri : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
10488                  (ins _.RC:$src1, _.RC:$src2, u8imm:$src3),
10489                  OpcodeStr, "$src3, $src2, $src1", "$src1, $src2, $src3",
10490                  (_.VT (X86VAlign _.RC:$src1, _.RC:$src2, (i8 timm:$src3)))>,
10491                  Sched<[sched]>, EVEX2VEXOverride<"VPALIGNRrri">;
10492  defm rmi : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
10493                (ins _.RC:$src1, _.MemOp:$src2, u8imm:$src3),
10494                OpcodeStr, "$src3, $src2, $src1", "$src1, $src2, $src3",
10495                (_.VT (X86VAlign _.RC:$src1,
10496                                 (bitconvert (_.LdFrag addr:$src2)),
10497                                 (i8 timm:$src3)))>,
10498                Sched<[sched.Folded, sched.ReadAfterFold]>,
10499                EVEX2VEXOverride<"VPALIGNRrmi">;
10500
10501  defm rmbi : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
10502                   (ins _.RC:$src1, _.ScalarMemOp:$src2, u8imm:$src3),
10503                   OpcodeStr, "$src3, ${src2}"##_.BroadcastStr##", $src1",
10504                   "$src1, ${src2}"##_.BroadcastStr##", $src3",
10505                   (X86VAlign _.RC:$src1,
10506                              (_.VT (_.BroadcastLdFrag addr:$src2)),
10507                              (i8 timm:$src3))>, EVEX_B,
10508                   Sched<[sched.Folded, sched.ReadAfterFold]>;
10509  }
10510}
10511
10512multiclass avx512_valign_common<string OpcodeStr, X86SchedWriteWidths sched,
10513                                AVX512VLVectorVTInfo _> {
10514  let Predicates = [HasAVX512] in {
10515    defm Z    : avx512_valign<0x03, OpcodeStr, sched.ZMM, _.info512>,
10516                                AVX512AIi8Base, EVEX_4V, EVEX_V512;
10517  }
10518  let Predicates = [HasAVX512, HasVLX] in {
10519    defm Z128 : avx512_valign<0x03, OpcodeStr, sched.XMM, _.info128>,
10520                                AVX512AIi8Base, EVEX_4V, EVEX_V128;
10521    // We can't really override the 256-bit version so change it back to unset.
10522    let EVEX2VEXOverride = ? in
10523    defm Z256 : avx512_valign<0x03, OpcodeStr, sched.YMM, _.info256>,
10524                                AVX512AIi8Base, EVEX_4V, EVEX_V256;
10525  }
10526}
10527
10528defm VALIGND: avx512_valign_common<"valignd", SchedWriteShuffle,
10529                                   avx512vl_i32_info>, EVEX_CD8<32, CD8VF>;
10530defm VALIGNQ: avx512_valign_common<"valignq", SchedWriteShuffle,
10531                                   avx512vl_i64_info>, EVEX_CD8<64, CD8VF>,
10532                                   VEX_W;
10533
10534defm VPALIGNR: avx512_common_3Op_rm_imm8<0x0F, X86PAlignr, "vpalignr",
10535                                         SchedWriteShuffle, avx512vl_i8_info,
10536                                         avx512vl_i8_info>, EVEX_CD8<8, CD8VF>;
10537
10538// Fragments to help convert valignq into masked valignd. Or valignq/valignd
10539// into vpalignr.
10540def ValignqImm32XForm : SDNodeXForm<timm, [{
10541  return getI8Imm(N->getZExtValue() * 2, SDLoc(N));
10542}]>;
10543def ValignqImm8XForm : SDNodeXForm<timm, [{
10544  return getI8Imm(N->getZExtValue() * 8, SDLoc(N));
10545}]>;
10546def ValigndImm8XForm : SDNodeXForm<timm, [{
10547  return getI8Imm(N->getZExtValue() * 4, SDLoc(N));
10548}]>;
10549
10550multiclass avx512_vpalign_mask_lowering<string OpcodeStr, SDNode OpNode,
10551                                        X86VectorVTInfo From, X86VectorVTInfo To,
10552                                        SDNodeXForm ImmXForm> {
10553  def : Pat<(To.VT (vselect To.KRCWM:$mask,
10554                            (bitconvert
10555                             (From.VT (OpNode From.RC:$src1, From.RC:$src2,
10556                                              timm:$src3))),
10557                            To.RC:$src0)),
10558            (!cast<Instruction>(OpcodeStr#"rrik") To.RC:$src0, To.KRCWM:$mask,
10559                                                  To.RC:$src1, To.RC:$src2,
10560                                                  (ImmXForm timm:$src3))>;
10561
10562  def : Pat<(To.VT (vselect To.KRCWM:$mask,
10563                            (bitconvert
10564                             (From.VT (OpNode From.RC:$src1, From.RC:$src2,
10565                                              timm:$src3))),
10566                            To.ImmAllZerosV)),
10567            (!cast<Instruction>(OpcodeStr#"rrikz") To.KRCWM:$mask,
10568                                                   To.RC:$src1, To.RC:$src2,
10569                                                   (ImmXForm timm:$src3))>;
10570
10571  def : Pat<(To.VT (vselect To.KRCWM:$mask,
10572                            (bitconvert
10573                             (From.VT (OpNode From.RC:$src1,
10574                                              (From.LdFrag addr:$src2),
10575                                      timm:$src3))),
10576                            To.RC:$src0)),
10577            (!cast<Instruction>(OpcodeStr#"rmik") To.RC:$src0, To.KRCWM:$mask,
10578                                                  To.RC:$src1, addr:$src2,
10579                                                  (ImmXForm timm:$src3))>;
10580
10581  def : Pat<(To.VT (vselect To.KRCWM:$mask,
10582                            (bitconvert
10583                             (From.VT (OpNode From.RC:$src1,
10584                                              (From.LdFrag addr:$src2),
10585                                      timm:$src3))),
10586                            To.ImmAllZerosV)),
10587            (!cast<Instruction>(OpcodeStr#"rmikz") To.KRCWM:$mask,
10588                                                   To.RC:$src1, addr:$src2,
10589                                                   (ImmXForm timm:$src3))>;
10590}
10591
10592multiclass avx512_vpalign_mask_lowering_mb<string OpcodeStr, SDNode OpNode,
10593                                           X86VectorVTInfo From,
10594                                           X86VectorVTInfo To,
10595                                           SDNodeXForm ImmXForm> :
10596      avx512_vpalign_mask_lowering<OpcodeStr, OpNode, From, To, ImmXForm> {
10597  def : Pat<(From.VT (OpNode From.RC:$src1,
10598                             (bitconvert (To.VT (To.BroadcastLdFrag addr:$src2))),
10599                             timm:$src3)),
10600            (!cast<Instruction>(OpcodeStr#"rmbi") To.RC:$src1, addr:$src2,
10601                                                  (ImmXForm timm:$src3))>;
10602
10603  def : Pat<(To.VT (vselect To.KRCWM:$mask,
10604                            (bitconvert
10605                             (From.VT (OpNode From.RC:$src1,
10606                                      (bitconvert
10607                                       (To.VT (To.BroadcastLdFrag addr:$src2))),
10608                                      timm:$src3))),
10609                            To.RC:$src0)),
10610            (!cast<Instruction>(OpcodeStr#"rmbik") To.RC:$src0, To.KRCWM:$mask,
10611                                                   To.RC:$src1, addr:$src2,
10612                                                   (ImmXForm timm:$src3))>;
10613
10614  def : Pat<(To.VT (vselect To.KRCWM:$mask,
10615                            (bitconvert
10616                             (From.VT (OpNode From.RC:$src1,
10617                                      (bitconvert
10618                                       (To.VT (To.BroadcastLdFrag addr:$src2))),
10619                                      timm:$src3))),
10620                            To.ImmAllZerosV)),
10621            (!cast<Instruction>(OpcodeStr#"rmbikz") To.KRCWM:$mask,
10622                                                    To.RC:$src1, addr:$src2,
10623                                                    (ImmXForm timm:$src3))>;
10624}
10625
10626let Predicates = [HasAVX512] in {
10627  // For 512-bit we lower to the widest element type we can. So we only need
10628  // to handle converting valignq to valignd.
10629  defm : avx512_vpalign_mask_lowering_mb<"VALIGNDZ", X86VAlign, v8i64_info,
10630                                         v16i32_info, ValignqImm32XForm>;
10631}
10632
10633let Predicates = [HasVLX] in {
10634  // For 128-bit we lower to the widest element type we can. So we only need
10635  // to handle converting valignq to valignd.
10636  defm : avx512_vpalign_mask_lowering_mb<"VALIGNDZ128", X86VAlign, v2i64x_info,
10637                                         v4i32x_info, ValignqImm32XForm>;
10638  // For 256-bit we lower to the widest element type we can. So we only need
10639  // to handle converting valignq to valignd.
10640  defm : avx512_vpalign_mask_lowering_mb<"VALIGNDZ256", X86VAlign, v4i64x_info,
10641                                         v8i32x_info, ValignqImm32XForm>;
10642}
10643
10644let Predicates = [HasVLX, HasBWI] in {
10645  // We can turn 128 and 256 bit VALIGND/VALIGNQ into VPALIGNR.
10646  defm : avx512_vpalign_mask_lowering<"VPALIGNRZ128", X86VAlign, v2i64x_info,
10647                                      v16i8x_info, ValignqImm8XForm>;
10648  defm : avx512_vpalign_mask_lowering<"VPALIGNRZ128", X86VAlign, v4i32x_info,
10649                                      v16i8x_info, ValigndImm8XForm>;
10650}
10651
10652defm VDBPSADBW: avx512_common_3Op_rm_imm8<0x42, X86dbpsadbw, "vdbpsadbw",
10653                SchedWritePSADBW, avx512vl_i16_info, avx512vl_i8_info>,
10654                EVEX_CD8<8, CD8VF>, NotEVEX2VEXConvertible;
10655
10656multiclass avx512_unary_rm<bits<8> opc, string OpcodeStr, SDNode OpNode,
10657                           X86FoldableSchedWrite sched, X86VectorVTInfo _> {
10658  let ExeDomain = _.ExeDomain in {
10659  defm rr : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
10660                    (ins _.RC:$src1), OpcodeStr,
10661                    "$src1", "$src1",
10662                    (_.VT (OpNode (_.VT _.RC:$src1)))>, EVEX, AVX5128IBase,
10663                    Sched<[sched]>;
10664
10665  defm rm : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
10666                  (ins _.MemOp:$src1), OpcodeStr,
10667                  "$src1", "$src1",
10668                  (_.VT (OpNode (_.VT (bitconvert (_.LdFrag addr:$src1)))))>,
10669            EVEX, AVX5128IBase, EVEX_CD8<_.EltSize, CD8VF>,
10670            Sched<[sched.Folded]>;
10671  }
10672}
10673
10674multiclass avx512_unary_rmb<bits<8> opc, string OpcodeStr, SDNode OpNode,
10675                            X86FoldableSchedWrite sched, X86VectorVTInfo _> :
10676           avx512_unary_rm<opc, OpcodeStr, OpNode, sched, _> {
10677  defm rmb : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
10678                  (ins _.ScalarMemOp:$src1), OpcodeStr,
10679                  "${src1}"##_.BroadcastStr,
10680                  "${src1}"##_.BroadcastStr,
10681                  (_.VT (OpNode (_.VT (_.BroadcastLdFrag addr:$src1))))>,
10682             EVEX, AVX5128IBase, EVEX_B, EVEX_CD8<_.EltSize, CD8VF>,
10683             Sched<[sched.Folded]>;
10684}
10685
10686multiclass avx512_unary_rm_vl<bits<8> opc, string OpcodeStr, SDNode OpNode,
10687                              X86SchedWriteWidths sched,
10688                              AVX512VLVectorVTInfo VTInfo, Predicate prd> {
10689  let Predicates = [prd] in
10690    defm Z : avx512_unary_rm<opc, OpcodeStr, OpNode, sched.ZMM, VTInfo.info512>,
10691                             EVEX_V512;
10692
10693  let Predicates = [prd, HasVLX] in {
10694    defm Z256 : avx512_unary_rm<opc, OpcodeStr, OpNode, sched.YMM, VTInfo.info256>,
10695                              EVEX_V256;
10696    defm Z128 : avx512_unary_rm<opc, OpcodeStr, OpNode, sched.XMM, VTInfo.info128>,
10697                              EVEX_V128;
10698  }
10699}
10700
10701multiclass avx512_unary_rmb_vl<bits<8> opc, string OpcodeStr, SDNode OpNode,
10702                               X86SchedWriteWidths sched, AVX512VLVectorVTInfo VTInfo,
10703                               Predicate prd> {
10704  let Predicates = [prd] in
10705    defm Z : avx512_unary_rmb<opc, OpcodeStr, OpNode, sched.ZMM, VTInfo.info512>,
10706                              EVEX_V512;
10707
10708  let Predicates = [prd, HasVLX] in {
10709    defm Z256 : avx512_unary_rmb<opc, OpcodeStr, OpNode, sched.YMM, VTInfo.info256>,
10710                                 EVEX_V256;
10711    defm Z128 : avx512_unary_rmb<opc, OpcodeStr, OpNode, sched.XMM, VTInfo.info128>,
10712                                 EVEX_V128;
10713  }
10714}
10715
10716multiclass avx512_unary_rm_vl_dq<bits<8> opc_d, bits<8> opc_q, string OpcodeStr,
10717                                 SDNode OpNode, X86SchedWriteWidths sched,
10718                                 Predicate prd> {
10719  defm Q : avx512_unary_rmb_vl<opc_q, OpcodeStr#"q", OpNode, sched,
10720                               avx512vl_i64_info, prd>, VEX_W;
10721  defm D : avx512_unary_rmb_vl<opc_d, OpcodeStr#"d", OpNode, sched,
10722                               avx512vl_i32_info, prd>;
10723}
10724
10725multiclass avx512_unary_rm_vl_bw<bits<8> opc_b, bits<8> opc_w, string OpcodeStr,
10726                                 SDNode OpNode, X86SchedWriteWidths sched,
10727                                 Predicate prd> {
10728  defm W : avx512_unary_rm_vl<opc_w, OpcodeStr#"w", OpNode, sched,
10729                              avx512vl_i16_info, prd>, VEX_WIG;
10730  defm B : avx512_unary_rm_vl<opc_b, OpcodeStr#"b", OpNode, sched,
10731                              avx512vl_i8_info, prd>, VEX_WIG;
10732}
10733
10734multiclass avx512_unary_rm_vl_all<bits<8> opc_b, bits<8> opc_w,
10735                                  bits<8> opc_d, bits<8> opc_q,
10736                                  string OpcodeStr, SDNode OpNode,
10737                                  X86SchedWriteWidths sched> {
10738  defm NAME : avx512_unary_rm_vl_dq<opc_d, opc_q, OpcodeStr, OpNode, sched,
10739                                    HasAVX512>,
10740              avx512_unary_rm_vl_bw<opc_b, opc_w, OpcodeStr, OpNode, sched,
10741                                    HasBWI>;
10742}
10743
10744defm VPABS : avx512_unary_rm_vl_all<0x1C, 0x1D, 0x1E, 0x1F, "vpabs", abs,
10745                                    SchedWriteVecALU>;
10746
10747// VPABS: Use 512bit version to implement 128/256 bit in case NoVLX.
10748let Predicates = [HasAVX512, NoVLX] in {
10749  def : Pat<(v4i64 (abs VR256X:$src)),
10750            (EXTRACT_SUBREG
10751                (VPABSQZrr
10752                    (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src, sub_ymm)),
10753             sub_ymm)>;
10754  def : Pat<(v2i64 (abs VR128X:$src)),
10755            (EXTRACT_SUBREG
10756                (VPABSQZrr
10757                    (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm)),
10758             sub_xmm)>;
10759}
10760
10761// Use 512bit version to implement 128/256 bit.
10762multiclass avx512_unary_lowering<string InstrStr, SDNode OpNode,
10763                                 AVX512VLVectorVTInfo _, Predicate prd> {
10764  let Predicates = [prd, NoVLX] in {
10765    def : Pat<(_.info256.VT (OpNode (_.info256.VT _.info256.RC:$src1))),
10766              (EXTRACT_SUBREG
10767                (!cast<Instruction>(InstrStr # "Zrr")
10768                  (INSERT_SUBREG(_.info512.VT(IMPLICIT_DEF)),
10769                                 _.info256.RC:$src1,
10770                                 _.info256.SubRegIdx)),
10771              _.info256.SubRegIdx)>;
10772
10773    def : Pat<(_.info128.VT (OpNode (_.info128.VT _.info128.RC:$src1))),
10774              (EXTRACT_SUBREG
10775                (!cast<Instruction>(InstrStr # "Zrr")
10776                  (INSERT_SUBREG(_.info512.VT(IMPLICIT_DEF)),
10777                                 _.info128.RC:$src1,
10778                                 _.info128.SubRegIdx)),
10779              _.info128.SubRegIdx)>;
10780  }
10781}
10782
10783defm VPLZCNT    : avx512_unary_rm_vl_dq<0x44, 0x44, "vplzcnt", ctlz,
10784                                        SchedWriteVecIMul, HasCDI>;
10785
10786// FIXME: Is there a better scheduler class for VPCONFLICT?
10787defm VPCONFLICT : avx512_unary_rm_vl_dq<0xC4, 0xC4, "vpconflict", X86Conflict,
10788                                        SchedWriteVecALU, HasCDI>;
10789
10790// VPLZCNT: Use 512bit version to implement 128/256 bit in case NoVLX.
10791defm : avx512_unary_lowering<"VPLZCNTQ", ctlz, avx512vl_i64_info, HasCDI>;
10792defm : avx512_unary_lowering<"VPLZCNTD", ctlz, avx512vl_i32_info, HasCDI>;
10793
10794//===---------------------------------------------------------------------===//
10795// Counts number of ones - VPOPCNTD and VPOPCNTQ
10796//===---------------------------------------------------------------------===//
10797
10798// FIXME: Is there a better scheduler class for VPOPCNTD/VPOPCNTQ?
10799defm VPOPCNT : avx512_unary_rm_vl_dq<0x55, 0x55, "vpopcnt", ctpop,
10800                                     SchedWriteVecALU, HasVPOPCNTDQ>;
10801
10802defm : avx512_unary_lowering<"VPOPCNTQ", ctpop, avx512vl_i64_info, HasVPOPCNTDQ>;
10803defm : avx512_unary_lowering<"VPOPCNTD", ctpop, avx512vl_i32_info, HasVPOPCNTDQ>;
10804
10805//===---------------------------------------------------------------------===//
10806// Replicate Single FP - MOVSHDUP and MOVSLDUP
10807//===---------------------------------------------------------------------===//
10808
10809multiclass avx512_replicate<bits<8> opc, string OpcodeStr, SDNode OpNode,
10810                            X86SchedWriteWidths sched> {
10811  defm NAME:       avx512_unary_rm_vl<opc, OpcodeStr, OpNode, sched,
10812                                      avx512vl_f32_info, HasAVX512>, XS;
10813}
10814
10815defm VMOVSHDUP : avx512_replicate<0x16, "vmovshdup", X86Movshdup,
10816                                  SchedWriteFShuffle>;
10817defm VMOVSLDUP : avx512_replicate<0x12, "vmovsldup", X86Movsldup,
10818                                  SchedWriteFShuffle>;
10819
10820//===----------------------------------------------------------------------===//
10821// AVX-512 - MOVDDUP
10822//===----------------------------------------------------------------------===//
10823
10824multiclass avx512_movddup_128<bits<8> opc, string OpcodeStr,
10825                              X86FoldableSchedWrite sched, X86VectorVTInfo _> {
10826  let ExeDomain = _.ExeDomain in {
10827  defm rr : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
10828                   (ins _.RC:$src), OpcodeStr, "$src", "$src",
10829                   (_.VT (X86VBroadcast (_.VT _.RC:$src)))>, EVEX,
10830                   Sched<[sched]>;
10831  defm rm : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
10832                 (ins _.ScalarMemOp:$src), OpcodeStr, "$src", "$src",
10833                 (_.VT (_.BroadcastLdFrag addr:$src))>,
10834                 EVEX, EVEX_CD8<_.EltSize, CD8VH>,
10835                 Sched<[sched.Folded]>;
10836  }
10837}
10838
10839multiclass avx512_movddup_common<bits<8> opc, string OpcodeStr, SDNode OpNode,
10840                                 X86SchedWriteWidths sched, AVX512VLVectorVTInfo VTInfo> {
10841  defm Z : avx512_unary_rm<opc, OpcodeStr, X86Movddup, sched.ZMM,
10842                           VTInfo.info512>, EVEX_V512;
10843
10844  let Predicates = [HasAVX512, HasVLX] in {
10845    defm Z256 : avx512_unary_rm<opc, OpcodeStr, X86Movddup, sched.YMM,
10846                                VTInfo.info256>, EVEX_V256;
10847    defm Z128 : avx512_movddup_128<opc, OpcodeStr, sched.XMM,
10848                                   VTInfo.info128>, EVEX_V128;
10849  }
10850}
10851
10852multiclass avx512_movddup<bits<8> opc, string OpcodeStr, SDNode OpNode,
10853                          X86SchedWriteWidths sched> {
10854  defm NAME:      avx512_movddup_common<opc, OpcodeStr, OpNode, sched,
10855                                        avx512vl_f64_info>, XD, VEX_W;
10856}
10857
10858defm VMOVDDUP : avx512_movddup<0x12, "vmovddup", X86Movddup, SchedWriteFShuffle>;
10859
10860let Predicates = [HasVLX] in {
10861def : Pat<(v2f64 (X86VBroadcast f64:$src)),
10862          (VMOVDDUPZ128rr (v2f64 (COPY_TO_REGCLASS FR64X:$src, VR128X)))>;
10863def : Pat<(v2f64 (X86VBroadcast (v2f64 (simple_load addr:$src)))),
10864          (VMOVDDUPZ128rm addr:$src)>;
10865def : Pat<(v2f64 (X86VBroadcast (v2f64 (X86vzload64 addr:$src)))),
10866          (VMOVDDUPZ128rm addr:$src)>;
10867
10868def : Pat<(vselect (v2i1 VK2WM:$mask), (v2f64 (X86VBroadcast f64:$src)),
10869                   (v2f64 VR128X:$src0)),
10870          (VMOVDDUPZ128rrk VR128X:$src0, VK2WM:$mask,
10871                           (v2f64 (COPY_TO_REGCLASS FR64X:$src, VR128X)))>;
10872def : Pat<(vselect (v2i1 VK2WM:$mask), (v2f64 (X86VBroadcast f64:$src)),
10873                   immAllZerosV),
10874          (VMOVDDUPZ128rrkz VK2WM:$mask, (v2f64 (COPY_TO_REGCLASS FR64X:$src, VR128X)))>;
10875
10876def : Pat<(vselect (v2i1 VK2WM:$mask), (v2f64 (X86VBroadcastld64 addr:$src)),
10877                   (v2f64 VR128X:$src0)),
10878          (VMOVDDUPZ128rmk VR128X:$src0, VK2WM:$mask, addr:$src)>;
10879def : Pat<(vselect (v2i1 VK2WM:$mask), (v2f64 (X86VBroadcastld64 addr:$src)),
10880                   immAllZerosV),
10881          (VMOVDDUPZ128rmkz VK2WM:$mask, addr:$src)>;
10882
10883def : Pat<(vselect (v2i1 VK2WM:$mask), (v2f64 (X86VBroadcast (v2f64 (simple_load addr:$src)))),
10884                   (v2f64 VR128X:$src0)),
10885          (VMOVDDUPZ128rmk VR128X:$src0, VK2WM:$mask, addr:$src)>;
10886def : Pat<(vselect (v2i1 VK2WM:$mask), (v2f64 (X86VBroadcast (v2f64 (simple_load addr:$src)))),
10887                   immAllZerosV),
10888          (VMOVDDUPZ128rmkz VK2WM:$mask, addr:$src)>;
10889}
10890
10891//===----------------------------------------------------------------------===//
10892// AVX-512 - Unpack Instructions
10893//===----------------------------------------------------------------------===//
10894
10895defm VUNPCKH : avx512_fp_binop_p<0x15, "vunpckh", X86Unpckh, HasAVX512,
10896                                 SchedWriteFShuffleSizes, 0, 1>;
10897defm VUNPCKL : avx512_fp_binop_p<0x14, "vunpckl", X86Unpckl, HasAVX512,
10898                                 SchedWriteFShuffleSizes>;
10899
10900defm VPUNPCKLBW : avx512_binop_rm_vl_b<0x60, "vpunpcklbw", X86Unpckl,
10901                                       SchedWriteShuffle, HasBWI>;
10902defm VPUNPCKHBW : avx512_binop_rm_vl_b<0x68, "vpunpckhbw", X86Unpckh,
10903                                       SchedWriteShuffle, HasBWI>;
10904defm VPUNPCKLWD : avx512_binop_rm_vl_w<0x61, "vpunpcklwd", X86Unpckl,
10905                                       SchedWriteShuffle, HasBWI>;
10906defm VPUNPCKHWD : avx512_binop_rm_vl_w<0x69, "vpunpckhwd", X86Unpckh,
10907                                       SchedWriteShuffle, HasBWI>;
10908
10909defm VPUNPCKLDQ : avx512_binop_rm_vl_d<0x62, "vpunpckldq", X86Unpckl,
10910                                       SchedWriteShuffle, HasAVX512>;
10911defm VPUNPCKHDQ : avx512_binop_rm_vl_d<0x6A, "vpunpckhdq", X86Unpckh,
10912                                       SchedWriteShuffle, HasAVX512>;
10913defm VPUNPCKLQDQ : avx512_binop_rm_vl_q<0x6C, "vpunpcklqdq", X86Unpckl,
10914                                        SchedWriteShuffle, HasAVX512>;
10915defm VPUNPCKHQDQ : avx512_binop_rm_vl_q<0x6D, "vpunpckhqdq", X86Unpckh,
10916                                        SchedWriteShuffle, HasAVX512>;
10917
10918//===----------------------------------------------------------------------===//
10919// AVX-512 - Extract & Insert Integer Instructions
10920//===----------------------------------------------------------------------===//
10921
10922multiclass avx512_extract_elt_bw_m<bits<8> opc, string OpcodeStr, SDNode OpNode,
10923                                                            X86VectorVTInfo _> {
10924  def mr : AVX512Ii8<opc, MRMDestMem, (outs),
10925              (ins _.ScalarMemOp:$dst, _.RC:$src1, u8imm:$src2),
10926              OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}",
10927              [(store (_.EltVT (trunc (OpNode (_.VT _.RC:$src1), imm:$src2))),
10928                       addr:$dst)]>,
10929              EVEX, EVEX_CD8<_.EltSize, CD8VT1>, Sched<[WriteVecExtractSt]>;
10930}
10931
10932multiclass avx512_extract_elt_b<string OpcodeStr, X86VectorVTInfo _> {
10933  let Predicates = [HasBWI] in {
10934    def rr : AVX512Ii8<0x14, MRMDestReg, (outs GR32orGR64:$dst),
10935                  (ins _.RC:$src1, u8imm:$src2),
10936                  OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}",
10937                  [(set GR32orGR64:$dst,
10938                        (X86pextrb (_.VT _.RC:$src1), imm:$src2))]>,
10939                  EVEX, TAPD, Sched<[WriteVecExtract]>;
10940
10941    defm NAME : avx512_extract_elt_bw_m<0x14, OpcodeStr, X86pextrb, _>, TAPD;
10942  }
10943}
10944
10945multiclass avx512_extract_elt_w<string OpcodeStr, X86VectorVTInfo _> {
10946  let Predicates = [HasBWI] in {
10947    def rr : AVX512Ii8<0xC5, MRMSrcReg, (outs GR32orGR64:$dst),
10948                  (ins _.RC:$src1, u8imm:$src2),
10949                  OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}",
10950                  [(set GR32orGR64:$dst,
10951                        (X86pextrw (_.VT _.RC:$src1), imm:$src2))]>,
10952                  EVEX, PD, Sched<[WriteVecExtract]>;
10953
10954    let hasSideEffects = 0, isCodeGenOnly = 1, ForceDisassemble = 1 in
10955    def rr_REV : AVX512Ii8<0x15, MRMDestReg, (outs GR32orGR64:$dst),
10956                   (ins _.RC:$src1, u8imm:$src2),
10957                   OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}", []>,
10958                   EVEX, TAPD, FoldGenData<NAME#rr>,
10959                   Sched<[WriteVecExtract]>;
10960
10961    defm NAME : avx512_extract_elt_bw_m<0x15, OpcodeStr, X86pextrw, _>, TAPD;
10962  }
10963}
10964
10965multiclass avx512_extract_elt_dq<string OpcodeStr, X86VectorVTInfo _,
10966                                                            RegisterClass GRC> {
10967  let Predicates = [HasDQI] in {
10968    def rr : AVX512Ii8<0x16, MRMDestReg, (outs GRC:$dst),
10969                  (ins _.RC:$src1, u8imm:$src2),
10970                  OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}",
10971                  [(set GRC:$dst,
10972                      (extractelt (_.VT _.RC:$src1), imm:$src2))]>,
10973                  EVEX, TAPD, Sched<[WriteVecExtract]>;
10974
10975    def mr : AVX512Ii8<0x16, MRMDestMem, (outs),
10976                (ins _.ScalarMemOp:$dst, _.RC:$src1, u8imm:$src2),
10977                OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}",
10978                [(store (extractelt (_.VT _.RC:$src1),
10979                                    imm:$src2),addr:$dst)]>,
10980                EVEX, EVEX_CD8<_.EltSize, CD8VT1>, TAPD,
10981                Sched<[WriteVecExtractSt]>;
10982  }
10983}
10984
10985defm VPEXTRBZ : avx512_extract_elt_b<"vpextrb", v16i8x_info>, VEX_WIG;
10986defm VPEXTRWZ : avx512_extract_elt_w<"vpextrw", v8i16x_info>, VEX_WIG;
10987defm VPEXTRDZ : avx512_extract_elt_dq<"vpextrd", v4i32x_info, GR32>;
10988defm VPEXTRQZ : avx512_extract_elt_dq<"vpextrq", v2i64x_info, GR64>, VEX_W;
10989
10990multiclass avx512_insert_elt_m<bits<8> opc, string OpcodeStr, SDNode OpNode,
10991                                            X86VectorVTInfo _, PatFrag LdFrag> {
10992  def rm : AVX512Ii8<opc, MRMSrcMem, (outs _.RC:$dst),
10993      (ins _.RC:$src1,  _.ScalarMemOp:$src2, u8imm:$src3),
10994      OpcodeStr#"\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}",
10995      [(set _.RC:$dst,
10996          (_.VT (OpNode _.RC:$src1, (LdFrag addr:$src2), imm:$src3)))]>,
10997      EVEX_4V, EVEX_CD8<_.EltSize, CD8VT1>, Sched<[WriteVecInsert.Folded, WriteVecInsert.ReadAfterFold]>;
10998}
10999
11000multiclass avx512_insert_elt_bw<bits<8> opc, string OpcodeStr, SDNode OpNode,
11001                                            X86VectorVTInfo _, PatFrag LdFrag> {
11002  let Predicates = [HasBWI] in {
11003    def rr : AVX512Ii8<opc, MRMSrcReg, (outs _.RC:$dst),
11004        (ins _.RC:$src1, GR32orGR64:$src2, u8imm:$src3),
11005        OpcodeStr#"\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}",
11006        [(set _.RC:$dst,
11007            (OpNode _.RC:$src1, GR32orGR64:$src2, imm:$src3))]>, EVEX_4V,
11008        Sched<[WriteVecInsert]>;
11009
11010    defm NAME : avx512_insert_elt_m<opc, OpcodeStr, OpNode, _, LdFrag>;
11011  }
11012}
11013
11014multiclass avx512_insert_elt_dq<bits<8> opc, string OpcodeStr,
11015                                         X86VectorVTInfo _, RegisterClass GRC> {
11016  let Predicates = [HasDQI] in {
11017    def rr : AVX512Ii8<opc, MRMSrcReg, (outs _.RC:$dst),
11018        (ins _.RC:$src1, GRC:$src2, u8imm:$src3),
11019        OpcodeStr#"\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}",
11020        [(set _.RC:$dst,
11021            (_.VT (insertelt _.RC:$src1, GRC:$src2, imm:$src3)))]>,
11022        EVEX_4V, TAPD, Sched<[WriteVecInsert]>;
11023
11024    defm NAME : avx512_insert_elt_m<opc, OpcodeStr, insertelt, _,
11025                                    _.ScalarLdFrag>, TAPD;
11026  }
11027}
11028
11029defm VPINSRBZ : avx512_insert_elt_bw<0x20, "vpinsrb", X86pinsrb, v16i8x_info,
11030                                     extloadi8>, TAPD, VEX_WIG;
11031defm VPINSRWZ : avx512_insert_elt_bw<0xC4, "vpinsrw", X86pinsrw, v8i16x_info,
11032                                     extloadi16>, PD, VEX_WIG;
11033defm VPINSRDZ : avx512_insert_elt_dq<0x22, "vpinsrd", v4i32x_info, GR32>;
11034defm VPINSRQZ : avx512_insert_elt_dq<0x22, "vpinsrq", v2i64x_info, GR64>, VEX_W;
11035
11036//===----------------------------------------------------------------------===//
11037// VSHUFPS - VSHUFPD Operations
11038//===----------------------------------------------------------------------===//
11039
11040multiclass avx512_shufp<string OpcodeStr, AVX512VLVectorVTInfo VTInfo_I,
11041                        AVX512VLVectorVTInfo VTInfo_FP>{
11042  defm NAME: avx512_common_3Op_imm8<OpcodeStr, VTInfo_FP, 0xC6, X86Shufp,
11043                                    SchedWriteFShuffle>,
11044                                    EVEX_CD8<VTInfo_FP.info512.EltSize, CD8VF>,
11045                                    AVX512AIi8Base, EVEX_4V;
11046}
11047
11048defm VSHUFPS: avx512_shufp<"vshufps", avx512vl_i32_info, avx512vl_f32_info>, PS;
11049defm VSHUFPD: avx512_shufp<"vshufpd", avx512vl_i64_info, avx512vl_f64_info>, PD, VEX_W;
11050
11051//===----------------------------------------------------------------------===//
11052// AVX-512 - Byte shift Left/Right
11053//===----------------------------------------------------------------------===//
11054
11055// FIXME: The SSE/AVX names are PSLLDQri etc. - should we add the i here as well?
11056multiclass avx512_shift_packed<bits<8> opc, SDNode OpNode, Format MRMr,
11057                               Format MRMm, string OpcodeStr,
11058                               X86FoldableSchedWrite sched, X86VectorVTInfo _>{
11059  def rr : AVX512<opc, MRMr,
11060             (outs _.RC:$dst), (ins _.RC:$src1, u8imm:$src2),
11061             !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
11062             [(set _.RC:$dst,(_.VT (OpNode _.RC:$src1, (i8 timm:$src2))))]>,
11063             Sched<[sched]>;
11064  def rm : AVX512<opc, MRMm,
11065           (outs _.RC:$dst), (ins _.MemOp:$src1, u8imm:$src2),
11066           !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
11067           [(set _.RC:$dst,(_.VT (OpNode
11068                                 (_.VT (bitconvert (_.LdFrag addr:$src1))),
11069                                 (i8 timm:$src2))))]>,
11070           Sched<[sched.Folded, sched.ReadAfterFold]>;
11071}
11072
11073multiclass avx512_shift_packed_all<bits<8> opc, SDNode OpNode, Format MRMr,
11074                                   Format MRMm, string OpcodeStr,
11075                                   X86SchedWriteWidths sched, Predicate prd>{
11076  let Predicates = [prd] in
11077    defm Z : avx512_shift_packed<opc, OpNode, MRMr, MRMm, OpcodeStr,
11078                                 sched.ZMM, v64i8_info>, EVEX_V512;
11079  let Predicates = [prd, HasVLX] in {
11080    defm Z256 : avx512_shift_packed<opc, OpNode, MRMr, MRMm, OpcodeStr,
11081                                    sched.YMM, v32i8x_info>, EVEX_V256;
11082    defm Z128 : avx512_shift_packed<opc, OpNode, MRMr, MRMm, OpcodeStr,
11083                                    sched.XMM, v16i8x_info>, EVEX_V128;
11084  }
11085}
11086defm VPSLLDQ : avx512_shift_packed_all<0x73, X86vshldq, MRM7r, MRM7m, "vpslldq",
11087                                       SchedWriteShuffle, HasBWI>,
11088                                       AVX512PDIi8Base, EVEX_4V, VEX_WIG;
11089defm VPSRLDQ : avx512_shift_packed_all<0x73, X86vshrdq, MRM3r, MRM3m, "vpsrldq",
11090                                       SchedWriteShuffle, HasBWI>,
11091                                       AVX512PDIi8Base, EVEX_4V, VEX_WIG;
11092
11093multiclass avx512_psadbw_packed<bits<8> opc, SDNode OpNode,
11094                                string OpcodeStr, X86FoldableSchedWrite sched,
11095                                X86VectorVTInfo _dst, X86VectorVTInfo _src> {
11096  let isCommutable = 1 in
11097  def rr : AVX512BI<opc, MRMSrcReg,
11098             (outs _dst.RC:$dst), (ins _src.RC:$src1, _src.RC:$src2),
11099             !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
11100             [(set _dst.RC:$dst,(_dst.VT
11101                                (OpNode (_src.VT _src.RC:$src1),
11102                                        (_src.VT _src.RC:$src2))))]>,
11103             Sched<[sched]>;
11104  def rm : AVX512BI<opc, MRMSrcMem,
11105           (outs _dst.RC:$dst), (ins _src.RC:$src1, _src.MemOp:$src2),
11106           !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
11107           [(set _dst.RC:$dst,(_dst.VT
11108                              (OpNode (_src.VT _src.RC:$src1),
11109                              (_src.VT (bitconvert
11110                                        (_src.LdFrag addr:$src2))))))]>,
11111           Sched<[sched.Folded, sched.ReadAfterFold]>;
11112}
11113
11114multiclass avx512_psadbw_packed_all<bits<8> opc, SDNode OpNode,
11115                                    string OpcodeStr, X86SchedWriteWidths sched,
11116                                    Predicate prd> {
11117  let Predicates = [prd] in
11118    defm Z : avx512_psadbw_packed<opc, OpNode, OpcodeStr, sched.ZMM,
11119                                  v8i64_info, v64i8_info>, EVEX_V512;
11120  let Predicates = [prd, HasVLX] in {
11121    defm Z256 : avx512_psadbw_packed<opc, OpNode, OpcodeStr, sched.YMM,
11122                                     v4i64x_info, v32i8x_info>, EVEX_V256;
11123    defm Z128 : avx512_psadbw_packed<opc, OpNode, OpcodeStr, sched.XMM,
11124                                     v2i64x_info, v16i8x_info>, EVEX_V128;
11125  }
11126}
11127
11128defm VPSADBW : avx512_psadbw_packed_all<0xf6, X86psadbw, "vpsadbw",
11129                                        SchedWritePSADBW, HasBWI>, EVEX_4V, VEX_WIG;
11130
11131// Transforms to swizzle an immediate to enable better matching when
11132// memory operand isn't in the right place.
11133def VPTERNLOG321_imm8 : SDNodeXForm<timm, [{
11134  // Convert a VPTERNLOG immediate by swapping operand 0 and operand 2.
11135  uint8_t Imm = N->getZExtValue();
11136  // Swap bits 1/4 and 3/6.
11137  uint8_t NewImm = Imm & 0xa5;
11138  if (Imm & 0x02) NewImm |= 0x10;
11139  if (Imm & 0x10) NewImm |= 0x02;
11140  if (Imm & 0x08) NewImm |= 0x40;
11141  if (Imm & 0x40) NewImm |= 0x08;
11142  return getI8Imm(NewImm, SDLoc(N));
11143}]>;
11144def VPTERNLOG213_imm8 : SDNodeXForm<timm, [{
11145  // Convert a VPTERNLOG immediate by swapping operand 1 and operand 2.
11146  uint8_t Imm = N->getZExtValue();
11147  // Swap bits 2/4 and 3/5.
11148  uint8_t NewImm = Imm & 0xc3;
11149  if (Imm & 0x04) NewImm |= 0x10;
11150  if (Imm & 0x10) NewImm |= 0x04;
11151  if (Imm & 0x08) NewImm |= 0x20;
11152  if (Imm & 0x20) NewImm |= 0x08;
11153  return getI8Imm(NewImm, SDLoc(N));
11154}]>;
11155def VPTERNLOG132_imm8 : SDNodeXForm<timm, [{
11156  // Convert a VPTERNLOG immediate by swapping operand 1 and operand 2.
11157  uint8_t Imm = N->getZExtValue();
11158  // Swap bits 1/2 and 5/6.
11159  uint8_t NewImm = Imm & 0x99;
11160  if (Imm & 0x02) NewImm |= 0x04;
11161  if (Imm & 0x04) NewImm |= 0x02;
11162  if (Imm & 0x20) NewImm |= 0x40;
11163  if (Imm & 0x40) NewImm |= 0x20;
11164  return getI8Imm(NewImm, SDLoc(N));
11165}]>;
11166def VPTERNLOG231_imm8 : SDNodeXForm<timm, [{
11167  // Convert a VPTERNLOG immediate by moving operand 1 to the end.
11168  uint8_t Imm = N->getZExtValue();
11169  // Move bits 1->2, 2->4, 3->6, 4->1, 5->3, 6->5
11170  uint8_t NewImm = Imm & 0x81;
11171  if (Imm & 0x02) NewImm |= 0x04;
11172  if (Imm & 0x04) NewImm |= 0x10;
11173  if (Imm & 0x08) NewImm |= 0x40;
11174  if (Imm & 0x10) NewImm |= 0x02;
11175  if (Imm & 0x20) NewImm |= 0x08;
11176  if (Imm & 0x40) NewImm |= 0x20;
11177  return getI8Imm(NewImm, SDLoc(N));
11178}]>;
11179def VPTERNLOG312_imm8 : SDNodeXForm<timm, [{
11180  // Convert a VPTERNLOG immediate by moving operand 2 to the beginning.
11181  uint8_t Imm = N->getZExtValue();
11182  // Move bits 1->4, 2->1, 3->5, 4->2, 5->6, 6->3
11183  uint8_t NewImm = Imm & 0x81;
11184  if (Imm & 0x02) NewImm |= 0x10;
11185  if (Imm & 0x04) NewImm |= 0x02;
11186  if (Imm & 0x08) NewImm |= 0x20;
11187  if (Imm & 0x10) NewImm |= 0x04;
11188  if (Imm & 0x20) NewImm |= 0x40;
11189  if (Imm & 0x40) NewImm |= 0x08;
11190  return getI8Imm(NewImm, SDLoc(N));
11191}]>;
11192
11193multiclass avx512_ternlog<bits<8> opc, string OpcodeStr, SDNode OpNode,
11194                          X86FoldableSchedWrite sched, X86VectorVTInfo _,
11195                          string Name>{
11196  let Constraints = "$src1 = $dst", ExeDomain = _.ExeDomain in {
11197  defm rri : AVX512_maskable_3src<opc, MRMSrcReg, _, (outs _.RC:$dst),
11198                      (ins _.RC:$src2, _.RC:$src3, u8imm:$src4),
11199                      OpcodeStr, "$src4, $src3, $src2", "$src2, $src3, $src4",
11200                      (OpNode (_.VT _.RC:$src1),
11201                              (_.VT _.RC:$src2),
11202                              (_.VT _.RC:$src3),
11203                              (i8 timm:$src4)), 1, 1>,
11204                      AVX512AIi8Base, EVEX_4V, Sched<[sched]>;
11205  defm rmi : AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst),
11206                    (ins _.RC:$src2, _.MemOp:$src3, u8imm:$src4),
11207                    OpcodeStr, "$src4, $src3, $src2", "$src2, $src3, $src4",
11208                    (OpNode (_.VT _.RC:$src1),
11209                            (_.VT _.RC:$src2),
11210                            (_.VT (bitconvert (_.LdFrag addr:$src3))),
11211                            (i8 timm:$src4)), 1, 0>,
11212                    AVX512AIi8Base, EVEX_4V, EVEX_CD8<_.EltSize, CD8VF>,
11213                    Sched<[sched.Folded, sched.ReadAfterFold]>;
11214  defm rmbi : AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst),
11215                    (ins _.RC:$src2, _.ScalarMemOp:$src3, u8imm:$src4),
11216                    OpcodeStr, "$src4, ${src3}"##_.BroadcastStr##", $src2",
11217                    "$src2, ${src3}"##_.BroadcastStr##", $src4",
11218                    (OpNode (_.VT _.RC:$src1),
11219                            (_.VT _.RC:$src2),
11220                            (_.VT (_.BroadcastLdFrag addr:$src3)),
11221                            (i8 timm:$src4)), 1, 0>, EVEX_B,
11222                    AVX512AIi8Base, EVEX_4V, EVEX_CD8<_.EltSize, CD8VF>,
11223                    Sched<[sched.Folded, sched.ReadAfterFold]>;
11224  }// Constraints = "$src1 = $dst"
11225
11226  // Additional patterns for matching passthru operand in other positions.
11227  def : Pat<(_.VT (vselect _.KRCWM:$mask,
11228                   (OpNode _.RC:$src3, _.RC:$src2, _.RC:$src1, (i8 timm:$src4)),
11229                   _.RC:$src1)),
11230            (!cast<Instruction>(Name#_.ZSuffix#rrik) _.RC:$src1, _.KRCWM:$mask,
11231             _.RC:$src2, _.RC:$src3, (VPTERNLOG321_imm8 timm:$src4))>;
11232  def : Pat<(_.VT (vselect _.KRCWM:$mask,
11233                   (OpNode _.RC:$src2, _.RC:$src1, _.RC:$src3, (i8 timm:$src4)),
11234                   _.RC:$src1)),
11235            (!cast<Instruction>(Name#_.ZSuffix#rrik) _.RC:$src1, _.KRCWM:$mask,
11236             _.RC:$src2, _.RC:$src3, (VPTERNLOG213_imm8 timm:$src4))>;
11237
11238  // Additional patterns for matching loads in other positions.
11239  def : Pat<(_.VT (OpNode (bitconvert (_.LdFrag addr:$src3)),
11240                          _.RC:$src2, _.RC:$src1, (i8 timm:$src4))),
11241            (!cast<Instruction>(Name#_.ZSuffix#rmi) _.RC:$src1, _.RC:$src2,
11242                                   addr:$src3, (VPTERNLOG321_imm8 timm:$src4))>;
11243  def : Pat<(_.VT (OpNode _.RC:$src1,
11244                          (bitconvert (_.LdFrag addr:$src3)),
11245                          _.RC:$src2, (i8 timm:$src4))),
11246            (!cast<Instruction>(Name#_.ZSuffix#rmi) _.RC:$src1, _.RC:$src2,
11247                                   addr:$src3, (VPTERNLOG132_imm8 timm:$src4))>;
11248
11249  // Additional patterns for matching zero masking with loads in other
11250  // positions.
11251  def : Pat<(_.VT (vselect _.KRCWM:$mask,
11252                   (OpNode (bitconvert (_.LdFrag addr:$src3)),
11253                    _.RC:$src2, _.RC:$src1, (i8 timm:$src4)),
11254                   _.ImmAllZerosV)),
11255            (!cast<Instruction>(Name#_.ZSuffix#rmikz) _.RC:$src1, _.KRCWM:$mask,
11256             _.RC:$src2, addr:$src3, (VPTERNLOG321_imm8 timm:$src4))>;
11257  def : Pat<(_.VT (vselect _.KRCWM:$mask,
11258                   (OpNode _.RC:$src1, (bitconvert (_.LdFrag addr:$src3)),
11259                    _.RC:$src2, (i8 timm:$src4)),
11260                   _.ImmAllZerosV)),
11261            (!cast<Instruction>(Name#_.ZSuffix#rmikz) _.RC:$src1, _.KRCWM:$mask,
11262             _.RC:$src2, addr:$src3, (VPTERNLOG132_imm8 timm:$src4))>;
11263
11264  // Additional patterns for matching masked loads with different
11265  // operand orders.
11266  def : Pat<(_.VT (vselect _.KRCWM:$mask,
11267                   (OpNode _.RC:$src1, (bitconvert (_.LdFrag addr:$src3)),
11268                    _.RC:$src2, (i8 timm:$src4)),
11269                   _.RC:$src1)),
11270            (!cast<Instruction>(Name#_.ZSuffix#rmik) _.RC:$src1, _.KRCWM:$mask,
11271             _.RC:$src2, addr:$src3, (VPTERNLOG132_imm8 timm:$src4))>;
11272  def : Pat<(_.VT (vselect _.KRCWM:$mask,
11273                   (OpNode (bitconvert (_.LdFrag addr:$src3)),
11274                    _.RC:$src2, _.RC:$src1, (i8 timm:$src4)),
11275                   _.RC:$src1)),
11276            (!cast<Instruction>(Name#_.ZSuffix#rmik) _.RC:$src1, _.KRCWM:$mask,
11277             _.RC:$src2, addr:$src3, (VPTERNLOG321_imm8 timm:$src4))>;
11278  def : Pat<(_.VT (vselect _.KRCWM:$mask,
11279                   (OpNode _.RC:$src2, _.RC:$src1,
11280                    (bitconvert (_.LdFrag addr:$src3)), (i8 timm:$src4)),
11281                   _.RC:$src1)),
11282            (!cast<Instruction>(Name#_.ZSuffix#rmik) _.RC:$src1, _.KRCWM:$mask,
11283             _.RC:$src2, addr:$src3, (VPTERNLOG213_imm8 timm:$src4))>;
11284  def : Pat<(_.VT (vselect _.KRCWM:$mask,
11285                   (OpNode _.RC:$src2, (bitconvert (_.LdFrag addr:$src3)),
11286                    _.RC:$src1, (i8 timm:$src4)),
11287                   _.RC:$src1)),
11288            (!cast<Instruction>(Name#_.ZSuffix#rmik) _.RC:$src1, _.KRCWM:$mask,
11289             _.RC:$src2, addr:$src3, (VPTERNLOG231_imm8 timm:$src4))>;
11290  def : Pat<(_.VT (vselect _.KRCWM:$mask,
11291                   (OpNode (bitconvert (_.LdFrag addr:$src3)),
11292                    _.RC:$src1, _.RC:$src2, (i8 timm:$src4)),
11293                   _.RC:$src1)),
11294            (!cast<Instruction>(Name#_.ZSuffix#rmik) _.RC:$src1, _.KRCWM:$mask,
11295             _.RC:$src2, addr:$src3, (VPTERNLOG312_imm8 timm:$src4))>;
11296
11297  // Additional patterns for matching broadcasts in other positions.
11298  def : Pat<(_.VT (OpNode (_.BroadcastLdFrag addr:$src3),
11299                          _.RC:$src2, _.RC:$src1, (i8 timm:$src4))),
11300            (!cast<Instruction>(Name#_.ZSuffix#rmbi) _.RC:$src1, _.RC:$src2,
11301                                   addr:$src3, (VPTERNLOG321_imm8 timm:$src4))>;
11302  def : Pat<(_.VT (OpNode _.RC:$src1,
11303                          (_.BroadcastLdFrag addr:$src3),
11304                          _.RC:$src2, (i8 timm:$src4))),
11305            (!cast<Instruction>(Name#_.ZSuffix#rmbi) _.RC:$src1, _.RC:$src2,
11306                                   addr:$src3, (VPTERNLOG132_imm8 timm:$src4))>;
11307
11308  // Additional patterns for matching zero masking with broadcasts in other
11309  // positions.
11310  def : Pat<(_.VT (vselect _.KRCWM:$mask,
11311                   (OpNode (_.BroadcastLdFrag addr:$src3),
11312                    _.RC:$src2, _.RC:$src1, (i8 timm:$src4)),
11313                   _.ImmAllZerosV)),
11314            (!cast<Instruction>(Name#_.ZSuffix#rmbikz) _.RC:$src1,
11315             _.KRCWM:$mask, _.RC:$src2, addr:$src3,
11316             (VPTERNLOG321_imm8 timm:$src4))>;
11317  def : Pat<(_.VT (vselect _.KRCWM:$mask,
11318                   (OpNode _.RC:$src1,
11319                    (_.BroadcastLdFrag addr:$src3),
11320                    _.RC:$src2, (i8 timm:$src4)),
11321                   _.ImmAllZerosV)),
11322            (!cast<Instruction>(Name#_.ZSuffix#rmbikz) _.RC:$src1,
11323             _.KRCWM:$mask, _.RC:$src2, addr:$src3,
11324             (VPTERNLOG132_imm8 timm:$src4))>;
11325
11326  // Additional patterns for matching masked broadcasts with different
11327  // operand orders.
11328  def : Pat<(_.VT (vselect _.KRCWM:$mask,
11329                   (OpNode _.RC:$src1, (_.BroadcastLdFrag addr:$src3),
11330                    _.RC:$src2, (i8 timm:$src4)),
11331                   _.RC:$src1)),
11332            (!cast<Instruction>(Name#_.ZSuffix#rmbik) _.RC:$src1, _.KRCWM:$mask,
11333             _.RC:$src2, addr:$src3, (VPTERNLOG132_imm8 timm:$src4))>;
11334  def : Pat<(_.VT (vselect _.KRCWM:$mask,
11335                   (OpNode (_.BroadcastLdFrag addr:$src3),
11336                    _.RC:$src2, _.RC:$src1, (i8 timm:$src4)),
11337                   _.RC:$src1)),
11338            (!cast<Instruction>(Name#_.ZSuffix#rmbik) _.RC:$src1, _.KRCWM:$mask,
11339             _.RC:$src2, addr:$src3, (VPTERNLOG321_imm8 timm:$src4))>;
11340  def : Pat<(_.VT (vselect _.KRCWM:$mask,
11341                   (OpNode _.RC:$src2, _.RC:$src1,
11342                    (_.BroadcastLdFrag addr:$src3),
11343                    (i8 timm:$src4)), _.RC:$src1)),
11344            (!cast<Instruction>(Name#_.ZSuffix#rmbik) _.RC:$src1, _.KRCWM:$mask,
11345             _.RC:$src2, addr:$src3, (VPTERNLOG213_imm8 timm:$src4))>;
11346  def : Pat<(_.VT (vselect _.KRCWM:$mask,
11347                   (OpNode _.RC:$src2,
11348                    (_.BroadcastLdFrag addr:$src3),
11349                    _.RC:$src1, (i8 timm:$src4)),
11350                   _.RC:$src1)),
11351            (!cast<Instruction>(Name#_.ZSuffix#rmbik) _.RC:$src1, _.KRCWM:$mask,
11352             _.RC:$src2, addr:$src3, (VPTERNLOG231_imm8 timm:$src4))>;
11353  def : Pat<(_.VT (vselect _.KRCWM:$mask,
11354                   (OpNode (_.BroadcastLdFrag addr:$src3),
11355                    _.RC:$src1, _.RC:$src2, (i8 timm:$src4)),
11356                   _.RC:$src1)),
11357            (!cast<Instruction>(Name#_.ZSuffix#rmbik) _.RC:$src1, _.KRCWM:$mask,
11358             _.RC:$src2, addr:$src3, (VPTERNLOG312_imm8 timm:$src4))>;
11359}
11360
11361multiclass avx512_common_ternlog<string OpcodeStr, X86SchedWriteWidths sched,
11362                                 AVX512VLVectorVTInfo _> {
11363  let Predicates = [HasAVX512] in
11364    defm Z    : avx512_ternlog<0x25, OpcodeStr, X86vpternlog, sched.ZMM,
11365                               _.info512, NAME>, EVEX_V512;
11366  let Predicates = [HasAVX512, HasVLX] in {
11367    defm Z128 : avx512_ternlog<0x25, OpcodeStr, X86vpternlog, sched.XMM,
11368                               _.info128, NAME>, EVEX_V128;
11369    defm Z256 : avx512_ternlog<0x25, OpcodeStr, X86vpternlog, sched.YMM,
11370                               _.info256, NAME>, EVEX_V256;
11371  }
11372}
11373
11374defm VPTERNLOGD : avx512_common_ternlog<"vpternlogd", SchedWriteVecALU,
11375                                        avx512vl_i32_info>;
11376defm VPTERNLOGQ : avx512_common_ternlog<"vpternlogq", SchedWriteVecALU,
11377                                        avx512vl_i64_info>, VEX_W;
11378
11379// Patterns to use VPTERNLOG for vXi16/vXi8 vectors.
11380let Predicates = [HasVLX] in {
11381  def : Pat<(v16i8 (X86vpternlog VR128X:$src1, VR128X:$src2, VR128X:$src3,
11382                                 (i8 timm:$src4))),
11383            (VPTERNLOGQZ128rri VR128X:$src1, VR128X:$src2, VR128X:$src3,
11384                               timm:$src4)>;
11385  def : Pat<(v16i8 (X86vpternlog VR128X:$src1, VR128X:$src2,
11386                                 (loadv16i8 addr:$src3), (i8 timm:$src4))),
11387            (VPTERNLOGQZ128rmi VR128X:$src1, VR128X:$src2, addr:$src3,
11388                               timm:$src4)>;
11389  def : Pat<(v16i8 (X86vpternlog (loadv16i8 addr:$src3), VR128X:$src2,
11390                                 VR128X:$src1, (i8 timm:$src4))),
11391            (VPTERNLOGQZ128rmi VR128X:$src1, VR128X:$src2, addr:$src3,
11392                               (VPTERNLOG321_imm8 timm:$src4))>;
11393  def : Pat<(v16i8 (X86vpternlog VR128X:$src1, (loadv16i8 addr:$src3),
11394                                 VR128X:$src2, (i8 timm:$src4))),
11395            (VPTERNLOGQZ128rmi VR128X:$src1, VR128X:$src2, addr:$src3,
11396                               (VPTERNLOG132_imm8 timm:$src4))>;
11397
11398  def : Pat<(v8i16 (X86vpternlog VR128X:$src1, VR128X:$src2, VR128X:$src3,
11399                                 (i8 timm:$src4))),
11400            (VPTERNLOGQZ128rri VR128X:$src1, VR128X:$src2, VR128X:$src3,
11401                               timm:$src4)>;
11402  def : Pat<(v8i16 (X86vpternlog VR128X:$src1, VR128X:$src2,
11403                                 (loadv8i16 addr:$src3), (i8 timm:$src4))),
11404            (VPTERNLOGQZ128rmi VR128X:$src1, VR128X:$src2, addr:$src3,
11405                               timm:$src4)>;
11406  def : Pat<(v8i16 (X86vpternlog (loadv8i16 addr:$src3), VR128X:$src2,
11407                                 VR128X:$src1, (i8 timm:$src4))),
11408            (VPTERNLOGQZ128rmi VR128X:$src1, VR128X:$src2, addr:$src3,
11409                               (VPTERNLOG321_imm8 timm:$src4))>;
11410  def : Pat<(v8i16 (X86vpternlog VR128X:$src1, (loadv8i16 addr:$src3),
11411                                 VR128X:$src2, (i8 timm:$src4))),
11412            (VPTERNLOGQZ128rmi VR128X:$src1, VR128X:$src2, addr:$src3,
11413                               (VPTERNLOG132_imm8 timm:$src4))>;
11414
11415  def : Pat<(v32i8 (X86vpternlog VR256X:$src1, VR256X:$src2, VR256X:$src3,
11416                                 (i8 timm:$src4))),
11417            (VPTERNLOGQZ256rri VR256X:$src1, VR256X:$src2, VR256X:$src3,
11418                               timm:$src4)>;
11419  def : Pat<(v32i8 (X86vpternlog VR256X:$src1, VR256X:$src2,
11420                                 (loadv32i8 addr:$src3), (i8 timm:$src4))),
11421            (VPTERNLOGQZ256rmi VR256X:$src1, VR256X:$src2, addr:$src3,
11422                               timm:$src4)>;
11423  def : Pat<(v32i8 (X86vpternlog (loadv32i8 addr:$src3), VR256X:$src2,
11424                                 VR256X:$src1, (i8 timm:$src4))),
11425            (VPTERNLOGQZ256rmi VR256X:$src1, VR256X:$src2, addr:$src3,
11426                               (VPTERNLOG321_imm8 timm:$src4))>;
11427  def : Pat<(v32i8 (X86vpternlog VR256X:$src1, (loadv32i8 addr:$src3),
11428                                 VR256X:$src2, (i8 timm:$src4))),
11429            (VPTERNLOGQZ256rmi VR256X:$src1, VR256X:$src2, addr:$src3,
11430                               (VPTERNLOG132_imm8 timm:$src4))>;
11431
11432  def : Pat<(v16i16 (X86vpternlog VR256X:$src1, VR256X:$src2, VR256X:$src3,
11433                                  (i8 timm:$src4))),
11434            (VPTERNLOGQZ256rri VR256X:$src1, VR256X:$src2, VR256X:$src3,
11435                               timm:$src4)>;
11436  def : Pat<(v16i16 (X86vpternlog VR256X:$src1, VR256X:$src2,
11437                                  (loadv16i16 addr:$src3), (i8 timm:$src4))),
11438            (VPTERNLOGQZ256rmi VR256X:$src1, VR256X:$src2, addr:$src3,
11439                               timm:$src4)>;
11440  def : Pat<(v16i16 (X86vpternlog (loadv16i16 addr:$src3), VR256X:$src2,
11441                                  VR256X:$src1, (i8 timm:$src4))),
11442            (VPTERNLOGQZ256rmi VR256X:$src1, VR256X:$src2, addr:$src3,
11443                               (VPTERNLOG321_imm8 timm:$src4))>;
11444  def : Pat<(v16i16 (X86vpternlog VR256X:$src1, (loadv16i16 addr:$src3),
11445                                  VR256X:$src2, (i8 timm:$src4))),
11446            (VPTERNLOGQZ256rmi VR256X:$src1, VR256X:$src2, addr:$src3,
11447                               (VPTERNLOG132_imm8 timm:$src4))>;
11448}
11449
11450let Predicates = [HasAVX512] in {
11451  def : Pat<(v64i8 (X86vpternlog VR512:$src1, VR512:$src2, VR512:$src3,
11452                                 (i8 timm:$src4))),
11453            (VPTERNLOGQZrri VR512:$src1, VR512:$src2, VR512:$src3,
11454                            timm:$src4)>;
11455  def : Pat<(v64i8 (X86vpternlog VR512:$src1, VR512:$src2,
11456                                 (loadv64i8 addr:$src3), (i8 timm:$src4))),
11457            (VPTERNLOGQZrmi VR512:$src1, VR512:$src2, addr:$src3,
11458                            timm:$src4)>;
11459  def : Pat<(v64i8 (X86vpternlog (loadv64i8 addr:$src3), VR512:$src2,
11460                                  VR512:$src1, (i8 timm:$src4))),
11461            (VPTERNLOGQZrmi VR512:$src1, VR512:$src2, addr:$src3,
11462                            (VPTERNLOG321_imm8 timm:$src4))>;
11463  def : Pat<(v64i8 (X86vpternlog VR512:$src1, (loadv64i8 addr:$src3),
11464                                 VR512:$src2, (i8 timm:$src4))),
11465            (VPTERNLOGQZrmi VR512:$src1, VR512:$src2, addr:$src3,
11466                            (VPTERNLOG132_imm8 timm:$src4))>;
11467
11468  def : Pat<(v32i16 (X86vpternlog VR512:$src1, VR512:$src2, VR512:$src3,
11469                                  (i8 timm:$src4))),
11470            (VPTERNLOGQZrri VR512:$src1, VR512:$src2, VR512:$src3,
11471                            timm:$src4)>;
11472  def : Pat<(v32i16 (X86vpternlog VR512:$src1, VR512:$src2,
11473                                  (loadv32i16 addr:$src3), (i8 timm:$src4))),
11474            (VPTERNLOGQZrmi VR512:$src1, VR512:$src2, addr:$src3,
11475                            timm:$src4)>;
11476  def : Pat<(v32i16 (X86vpternlog (loadv32i16 addr:$src3), VR512:$src2,
11477                                  VR512:$src1, (i8 timm:$src4))),
11478            (VPTERNLOGQZrmi VR512:$src1, VR512:$src2, addr:$src3,
11479                            (VPTERNLOG321_imm8 timm:$src4))>;
11480  def : Pat<(v32i16 (X86vpternlog VR512:$src1, (loadv32i16 addr:$src3),
11481                                 VR512:$src2, (i8 timm:$src4))),
11482            (VPTERNLOGQZrmi VR512:$src1, VR512:$src2, addr:$src3,
11483                            (VPTERNLOG132_imm8 timm:$src4))>;
11484}
11485
11486// Patterns to implement vnot using vpternlog instead of creating all ones
11487// using pcmpeq or vpternlog and then xoring with that. The value 15 is chosen
11488// so that the result is only dependent on src0. But we use the same source
11489// for all operands to prevent a false dependency.
11490// TODO: We should maybe have a more generalized algorithm for folding to
11491// vpternlog.
11492let Predicates = [HasAVX512] in {
11493  def : Pat<(xor VR512:$src, (v64i8 immAllOnesV)),
11494            (VPTERNLOGQZrri VR512:$src, VR512:$src, VR512:$src, (i8 15))>;
11495  def : Pat<(xor VR512:$src, (v32i16 immAllOnesV)),
11496            (VPTERNLOGQZrri VR512:$src, VR512:$src, VR512:$src, (i8 15))>;
11497  def : Pat<(xor VR512:$src, (v16i32 immAllOnesV)),
11498            (VPTERNLOGQZrri VR512:$src, VR512:$src, VR512:$src, (i8 15))>;
11499  def : Pat<(xor VR512:$src, (v8i64 immAllOnesV)),
11500            (VPTERNLOGQZrri VR512:$src, VR512:$src, VR512:$src, (i8 15))>;
11501}
11502
11503let Predicates = [HasAVX512, NoVLX] in {
11504  def : Pat<(xor VR128X:$src, (v16i8 immAllOnesV)),
11505            (EXTRACT_SUBREG
11506             (VPTERNLOGQZrri
11507              (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
11508              (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
11509              (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
11510              (i8 15)), sub_xmm)>;
11511  def : Pat<(xor VR128X:$src, (v8i16 immAllOnesV)),
11512            (EXTRACT_SUBREG
11513             (VPTERNLOGQZrri
11514              (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
11515              (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
11516              (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
11517              (i8 15)), sub_xmm)>;
11518  def : Pat<(xor VR128X:$src, (v4i32 immAllOnesV)),
11519            (EXTRACT_SUBREG
11520             (VPTERNLOGQZrri
11521              (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
11522              (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
11523              (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
11524              (i8 15)), sub_xmm)>;
11525  def : Pat<(xor VR128X:$src, (v2i64 immAllOnesV)),
11526            (EXTRACT_SUBREG
11527             (VPTERNLOGQZrri
11528              (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
11529              (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
11530              (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
11531              (i8 15)), sub_xmm)>;
11532
11533  def : Pat<(xor VR256X:$src, (v32i8 immAllOnesV)),
11534            (EXTRACT_SUBREG
11535             (VPTERNLOGQZrri
11536              (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src, sub_ymm),
11537              (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src, sub_ymm),
11538              (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src, sub_ymm),
11539              (i8 15)), sub_ymm)>;
11540  def : Pat<(xor VR256X:$src, (v16i16 immAllOnesV)),
11541            (EXTRACT_SUBREG
11542             (VPTERNLOGQZrri
11543              (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src, sub_ymm),
11544              (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src, sub_ymm),
11545              (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src, sub_ymm),
11546              (i8 15)), sub_ymm)>;
11547  def : Pat<(xor VR256X:$src, (v8i32 immAllOnesV)),
11548            (EXTRACT_SUBREG
11549             (VPTERNLOGQZrri
11550              (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src, sub_ymm),
11551              (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src, sub_ymm),
11552              (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src, sub_ymm),
11553              (i8 15)), sub_ymm)>;
11554  def : Pat<(xor VR256X:$src, (v4i64 immAllOnesV)),
11555            (EXTRACT_SUBREG
11556             (VPTERNLOGQZrri
11557              (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src, sub_ymm),
11558              (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src, sub_ymm),
11559              (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src, sub_ymm),
11560              (i8 15)), sub_ymm)>;
11561}
11562
11563let Predicates = [HasVLX] in {
11564  def : Pat<(xor VR128X:$src, (v16i8 immAllOnesV)),
11565            (VPTERNLOGQZ128rri VR128X:$src, VR128X:$src, VR128X:$src, (i8 15))>;
11566  def : Pat<(xor VR128X:$src, (v8i16 immAllOnesV)),
11567            (VPTERNLOGQZ128rri VR128X:$src, VR128X:$src, VR128X:$src, (i8 15))>;
11568  def : Pat<(xor VR128X:$src, (v4i32 immAllOnesV)),
11569            (VPTERNLOGQZ128rri VR128X:$src, VR128X:$src, VR128X:$src, (i8 15))>;
11570  def : Pat<(xor VR128X:$src, (v2i64 immAllOnesV)),
11571            (VPTERNLOGQZ128rri VR128X:$src, VR128X:$src, VR128X:$src, (i8 15))>;
11572
11573  def : Pat<(xor VR256X:$src, (v32i8 immAllOnesV)),
11574            (VPTERNLOGQZ256rri VR256X:$src, VR256X:$src, VR256X:$src, (i8 15))>;
11575  def : Pat<(xor VR256X:$src, (v16i16 immAllOnesV)),
11576            (VPTERNLOGQZ256rri VR256X:$src, VR256X:$src, VR256X:$src, (i8 15))>;
11577  def : Pat<(xor VR256X:$src, (v8i32 immAllOnesV)),
11578            (VPTERNLOGQZ256rri VR256X:$src, VR256X:$src, VR256X:$src, (i8 15))>;
11579  def : Pat<(xor VR256X:$src, (v4i64 immAllOnesV)),
11580            (VPTERNLOGQZ256rri VR256X:$src, VR256X:$src, VR256X:$src, (i8 15))>;
11581}
11582
11583//===----------------------------------------------------------------------===//
11584// AVX-512 - FixupImm
11585//===----------------------------------------------------------------------===//
11586
11587multiclass avx512_fixupimm_packed<bits<8> opc, string OpcodeStr,
11588                                  X86FoldableSchedWrite sched, X86VectorVTInfo _,
11589                                  X86VectorVTInfo TblVT>{
11590  let Constraints = "$src1 = $dst", ExeDomain = _.ExeDomain in {
11591    defm rri : AVX512_maskable_3src<opc, MRMSrcReg, _, (outs _.RC:$dst),
11592                        (ins _.RC:$src2, _.RC:$src3, i32u8imm:$src4),
11593                         OpcodeStr##_.Suffix, "$src4, $src3, $src2", "$src2, $src3, $src4",
11594                        (X86VFixupimm (_.VT _.RC:$src1),
11595                                      (_.VT _.RC:$src2),
11596                                      (TblVT.VT _.RC:$src3),
11597                                      (i32 timm:$src4))>, Sched<[sched]>;
11598    defm rmi : AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst),
11599                      (ins _.RC:$src2, _.MemOp:$src3, i32u8imm:$src4),
11600                      OpcodeStr##_.Suffix, "$src4, $src3, $src2", "$src2, $src3, $src4",
11601                      (X86VFixupimm (_.VT _.RC:$src1),
11602                                    (_.VT _.RC:$src2),
11603                                    (TblVT.VT (bitconvert (TblVT.LdFrag addr:$src3))),
11604                                    (i32 timm:$src4))>,
11605                      Sched<[sched.Folded, sched.ReadAfterFold]>;
11606    defm rmbi : AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst),
11607                      (ins _.RC:$src2, _.ScalarMemOp:$src3, i32u8imm:$src4),
11608                    OpcodeStr##_.Suffix, "$src4, ${src3}"##_.BroadcastStr##", $src2",
11609                    "$src2, ${src3}"##_.BroadcastStr##", $src4",
11610                      (X86VFixupimm (_.VT _.RC:$src1),
11611                                    (_.VT _.RC:$src2),
11612                                    (TblVT.VT (TblVT.BroadcastLdFrag addr:$src3)),
11613                                    (i32 timm:$src4))>,
11614                    EVEX_B, Sched<[sched.Folded, sched.ReadAfterFold]>;
11615  } // Constraints = "$src1 = $dst"
11616}
11617
11618multiclass avx512_fixupimm_packed_sae<bits<8> opc, string OpcodeStr,
11619                                      X86FoldableSchedWrite sched,
11620                                      X86VectorVTInfo _, X86VectorVTInfo TblVT>
11621  : avx512_fixupimm_packed<opc, OpcodeStr, sched, _, TblVT> {
11622let Constraints = "$src1 = $dst", ExeDomain = _.ExeDomain in {
11623  defm rrib : AVX512_maskable_3src<opc, MRMSrcReg, _, (outs _.RC:$dst),
11624                      (ins _.RC:$src2, _.RC:$src3, i32u8imm:$src4),
11625                      OpcodeStr##_.Suffix, "$src4, {sae}, $src3, $src2",
11626                      "$src2, $src3, {sae}, $src4",
11627                      (X86VFixupimmSAE (_.VT _.RC:$src1),
11628                                       (_.VT _.RC:$src2),
11629                                       (TblVT.VT _.RC:$src3),
11630                                       (i32 timm:$src4))>,
11631                      EVEX_B, Sched<[sched]>;
11632  }
11633}
11634
11635multiclass avx512_fixupimm_scalar<bits<8> opc, string OpcodeStr,
11636                                  X86FoldableSchedWrite sched, X86VectorVTInfo _,
11637                                  X86VectorVTInfo _src3VT> {
11638  let Constraints = "$src1 = $dst" , Predicates = [HasAVX512],
11639      ExeDomain = _.ExeDomain in {
11640    defm rri : AVX512_maskable_3src_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
11641                      (ins _.RC:$src2, _.RC:$src3, i32u8imm:$src4),
11642                      OpcodeStr##_.Suffix, "$src4, $src3, $src2", "$src2, $src3, $src4",
11643                      (X86VFixupimms (_.VT _.RC:$src1),
11644                                     (_.VT _.RC:$src2),
11645                                     (_src3VT.VT _src3VT.RC:$src3),
11646                                     (i32 timm:$src4))>, Sched<[sched]>;
11647    defm rrib : AVX512_maskable_3src_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
11648                      (ins _.RC:$src2, _.RC:$src3, i32u8imm:$src4),
11649                      OpcodeStr##_.Suffix, "$src4, {sae}, $src3, $src2",
11650                      "$src2, $src3, {sae}, $src4",
11651                      (X86VFixupimmSAEs (_.VT _.RC:$src1),
11652                                        (_.VT _.RC:$src2),
11653                                        (_src3VT.VT _src3VT.RC:$src3),
11654                                        (i32 timm:$src4))>,
11655                      EVEX_B, Sched<[sched.Folded, sched.ReadAfterFold]>;
11656    defm rmi : AVX512_maskable_3src_scalar<opc, MRMSrcMem, _, (outs _.RC:$dst),
11657                     (ins _.RC:$src2, _.ScalarMemOp:$src3, i32u8imm:$src4),
11658                     OpcodeStr##_.Suffix, "$src4, $src3, $src2", "$src2, $src3, $src4",
11659                     (X86VFixupimms (_.VT _.RC:$src1),
11660                                    (_.VT _.RC:$src2),
11661                                    (_src3VT.VT (scalar_to_vector
11662                                              (_src3VT.ScalarLdFrag addr:$src3))),
11663                                    (i32 timm:$src4))>,
11664                     Sched<[sched.Folded, sched.ReadAfterFold]>;
11665  }
11666}
11667
11668multiclass avx512_fixupimm_packed_all<X86SchedWriteWidths sched,
11669                                      AVX512VLVectorVTInfo _Vec,
11670                                      AVX512VLVectorVTInfo _Tbl> {
11671  let Predicates = [HasAVX512] in
11672    defm Z    : avx512_fixupimm_packed_sae<0x54, "vfixupimm", sched.ZMM,
11673                                _Vec.info512, _Tbl.info512>, AVX512AIi8Base,
11674                                EVEX_4V, EVEX_V512;
11675  let Predicates = [HasAVX512, HasVLX] in {
11676    defm Z128 : avx512_fixupimm_packed<0x54, "vfixupimm", sched.XMM,
11677                            _Vec.info128, _Tbl.info128>, AVX512AIi8Base,
11678                            EVEX_4V, EVEX_V128;
11679    defm Z256 : avx512_fixupimm_packed<0x54, "vfixupimm", sched.YMM,
11680                            _Vec.info256, _Tbl.info256>, AVX512AIi8Base,
11681                            EVEX_4V, EVEX_V256;
11682  }
11683}
11684
11685defm VFIXUPIMMSSZ : avx512_fixupimm_scalar<0x55, "vfixupimm",
11686                                           SchedWriteFAdd.Scl, f32x_info, v4i32x_info>,
11687                          AVX512AIi8Base, VEX_LIG, EVEX_4V, EVEX_CD8<32, CD8VT1>;
11688defm VFIXUPIMMSDZ : avx512_fixupimm_scalar<0x55, "vfixupimm",
11689                                           SchedWriteFAdd.Scl, f64x_info, v2i64x_info>,
11690                          AVX512AIi8Base, VEX_LIG, EVEX_4V, EVEX_CD8<64, CD8VT1>, VEX_W;
11691defm VFIXUPIMMPS : avx512_fixupimm_packed_all<SchedWriteFAdd, avx512vl_f32_info,
11692                         avx512vl_i32_info>, EVEX_CD8<32, CD8VF>;
11693defm VFIXUPIMMPD : avx512_fixupimm_packed_all<SchedWriteFAdd, avx512vl_f64_info,
11694                         avx512vl_i64_info>, EVEX_CD8<64, CD8VF>, VEX_W;
11695
11696// Patterns used to select SSE scalar fp arithmetic instructions from
11697// either:
11698//
11699// (1) a scalar fp operation followed by a blend
11700//
11701// The effect is that the backend no longer emits unnecessary vector
11702// insert instructions immediately after SSE scalar fp instructions
11703// like addss or mulss.
11704//
11705// For example, given the following code:
11706//   __m128 foo(__m128 A, __m128 B) {
11707//     A[0] += B[0];
11708//     return A;
11709//   }
11710//
11711// Previously we generated:
11712//   addss %xmm0, %xmm1
11713//   movss %xmm1, %xmm0
11714//
11715// We now generate:
11716//   addss %xmm1, %xmm0
11717//
11718// (2) a vector packed single/double fp operation followed by a vector insert
11719//
11720// The effect is that the backend converts the packed fp instruction
11721// followed by a vector insert into a single SSE scalar fp instruction.
11722//
11723// For example, given the following code:
11724//   __m128 foo(__m128 A, __m128 B) {
11725//     __m128 C = A + B;
11726//     return (__m128) {c[0], a[1], a[2], a[3]};
11727//   }
11728//
11729// Previously we generated:
11730//   addps %xmm0, %xmm1
11731//   movss %xmm1, %xmm0
11732//
11733// We now generate:
11734//   addss %xmm1, %xmm0
11735
11736// TODO: Some canonicalization in lowering would simplify the number of
11737// patterns we have to try to match.
11738multiclass AVX512_scalar_math_fp_patterns<SDNode Op, string OpcPrefix, SDNode MoveNode,
11739                                           X86VectorVTInfo _, PatLeaf ZeroFP> {
11740  let Predicates = [HasAVX512] in {
11741    // extracted scalar math op with insert via movss
11742    def : Pat<(MoveNode
11743               (_.VT VR128X:$dst),
11744               (_.VT (scalar_to_vector
11745                      (Op (_.EltVT (extractelt (_.VT VR128X:$dst), (iPTR 0))),
11746                          _.FRC:$src)))),
11747              (!cast<Instruction>("V"#OpcPrefix#Zrr_Int) _.VT:$dst,
11748               (_.VT (COPY_TO_REGCLASS _.FRC:$src, VR128X)))>;
11749    def : Pat<(MoveNode
11750               (_.VT VR128X:$dst),
11751               (_.VT (scalar_to_vector
11752                      (Op (_.EltVT (extractelt (_.VT VR128X:$dst), (iPTR 0))),
11753                          (_.ScalarLdFrag addr:$src))))),
11754              (!cast<Instruction>("V"#OpcPrefix#Zrm_Int) _.VT:$dst, addr:$src)>;
11755
11756    // extracted masked scalar math op with insert via movss
11757    def : Pat<(MoveNode (_.VT VR128X:$src1),
11758               (scalar_to_vector
11759                (X86selects VK1WM:$mask,
11760                            (Op (_.EltVT
11761                                 (extractelt (_.VT VR128X:$src1), (iPTR 0))),
11762                                _.FRC:$src2),
11763                            _.FRC:$src0))),
11764              (!cast<Instruction>("V"#OpcPrefix#Zrr_Intk)
11765               (_.VT (COPY_TO_REGCLASS _.FRC:$src0, VR128X)),
11766               VK1WM:$mask, _.VT:$src1,
11767               (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)))>;
11768    def : Pat<(MoveNode (_.VT VR128X:$src1),
11769               (scalar_to_vector
11770                (X86selects VK1WM:$mask,
11771                            (Op (_.EltVT
11772                                 (extractelt (_.VT VR128X:$src1), (iPTR 0))),
11773                                (_.ScalarLdFrag addr:$src2)),
11774                            _.FRC:$src0))),
11775              (!cast<Instruction>("V"#OpcPrefix#Zrm_Intk)
11776               (_.VT (COPY_TO_REGCLASS _.FRC:$src0, VR128X)),
11777               VK1WM:$mask, _.VT:$src1, addr:$src2)>;
11778
11779    // extracted masked scalar math op with insert via movss
11780    def : Pat<(MoveNode (_.VT VR128X:$src1),
11781               (scalar_to_vector
11782                (X86selects VK1WM:$mask,
11783                            (Op (_.EltVT
11784                                 (extractelt (_.VT VR128X:$src1), (iPTR 0))),
11785                                _.FRC:$src2), (_.EltVT ZeroFP)))),
11786      (!cast<I>("V"#OpcPrefix#Zrr_Intkz)
11787          VK1WM:$mask, _.VT:$src1,
11788          (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)))>;
11789    def : Pat<(MoveNode (_.VT VR128X:$src1),
11790               (scalar_to_vector
11791                (X86selects VK1WM:$mask,
11792                            (Op (_.EltVT
11793                                 (extractelt (_.VT VR128X:$src1), (iPTR 0))),
11794                                (_.ScalarLdFrag addr:$src2)), (_.EltVT ZeroFP)))),
11795      (!cast<I>("V"#OpcPrefix#Zrm_Intkz) VK1WM:$mask, _.VT:$src1, addr:$src2)>;
11796  }
11797}
11798
11799defm : AVX512_scalar_math_fp_patterns<fadd, "ADDSS", X86Movss, v4f32x_info, fp32imm0>;
11800defm : AVX512_scalar_math_fp_patterns<fsub, "SUBSS", X86Movss, v4f32x_info, fp32imm0>;
11801defm : AVX512_scalar_math_fp_patterns<fmul, "MULSS", X86Movss, v4f32x_info, fp32imm0>;
11802defm : AVX512_scalar_math_fp_patterns<fdiv, "DIVSS", X86Movss, v4f32x_info, fp32imm0>;
11803
11804defm : AVX512_scalar_math_fp_patterns<fadd, "ADDSD", X86Movsd, v2f64x_info, fp64imm0>;
11805defm : AVX512_scalar_math_fp_patterns<fsub, "SUBSD", X86Movsd, v2f64x_info, fp64imm0>;
11806defm : AVX512_scalar_math_fp_patterns<fmul, "MULSD", X86Movsd, v2f64x_info, fp64imm0>;
11807defm : AVX512_scalar_math_fp_patterns<fdiv, "DIVSD", X86Movsd, v2f64x_info, fp64imm0>;
11808
11809multiclass AVX512_scalar_unary_math_patterns<SDNode OpNode, string OpcPrefix,
11810                                             SDNode Move, X86VectorVTInfo _> {
11811  let Predicates = [HasAVX512] in {
11812    def : Pat<(_.VT (Move _.VT:$dst,
11813                     (scalar_to_vector (OpNode (extractelt _.VT:$src, 0))))),
11814              (!cast<Instruction>("V"#OpcPrefix#Zr_Int) _.VT:$dst, _.VT:$src)>;
11815  }
11816}
11817
11818defm : AVX512_scalar_unary_math_patterns<fsqrt, "SQRTSS", X86Movss, v4f32x_info>;
11819defm : AVX512_scalar_unary_math_patterns<fsqrt, "SQRTSD", X86Movsd, v2f64x_info>;
11820
11821//===----------------------------------------------------------------------===//
11822// AES instructions
11823//===----------------------------------------------------------------------===//
11824
11825multiclass avx512_vaes<bits<8> Op, string OpStr, string IntPrefix> {
11826  let Predicates = [HasVLX, HasVAES] in {
11827    defm Z128 : AESI_binop_rm_int<Op, OpStr,
11828                                  !cast<Intrinsic>(IntPrefix),
11829                                  loadv2i64, 0, VR128X, i128mem>,
11830                  EVEX_4V, EVEX_CD8<64, CD8VF>, EVEX_V128, VEX_WIG;
11831    defm Z256 : AESI_binop_rm_int<Op, OpStr,
11832                                  !cast<Intrinsic>(IntPrefix##"_256"),
11833                                  loadv4i64, 0, VR256X, i256mem>,
11834                  EVEX_4V, EVEX_CD8<64, CD8VF>, EVEX_V256, VEX_WIG;
11835    }
11836    let Predicates = [HasAVX512, HasVAES] in
11837    defm Z    : AESI_binop_rm_int<Op, OpStr,
11838                                  !cast<Intrinsic>(IntPrefix##"_512"),
11839                                  loadv8i64, 0, VR512, i512mem>,
11840                  EVEX_4V, EVEX_CD8<64, CD8VF>, EVEX_V512, VEX_WIG;
11841}
11842
11843defm VAESENC      : avx512_vaes<0xDC, "vaesenc", "int_x86_aesni_aesenc">;
11844defm VAESENCLAST  : avx512_vaes<0xDD, "vaesenclast", "int_x86_aesni_aesenclast">;
11845defm VAESDEC      : avx512_vaes<0xDE, "vaesdec", "int_x86_aesni_aesdec">;
11846defm VAESDECLAST  : avx512_vaes<0xDF, "vaesdeclast", "int_x86_aesni_aesdeclast">;
11847
11848//===----------------------------------------------------------------------===//
11849// PCLMUL instructions - Carry less multiplication
11850//===----------------------------------------------------------------------===//
11851
11852let Predicates = [HasAVX512, HasVPCLMULQDQ] in
11853defm VPCLMULQDQZ : vpclmulqdq<VR512, i512mem, loadv8i64, int_x86_pclmulqdq_512>,
11854                              EVEX_4V, EVEX_V512, EVEX_CD8<64, CD8VF>, VEX_WIG;
11855
11856let Predicates = [HasVLX, HasVPCLMULQDQ] in {
11857defm VPCLMULQDQZ128 : vpclmulqdq<VR128X, i128mem, loadv2i64, int_x86_pclmulqdq>,
11858                              EVEX_4V, EVEX_V128, EVEX_CD8<64, CD8VF>, VEX_WIG;
11859
11860defm VPCLMULQDQZ256: vpclmulqdq<VR256X, i256mem, loadv4i64,
11861                                int_x86_pclmulqdq_256>, EVEX_4V, EVEX_V256,
11862                                EVEX_CD8<64, CD8VF>, VEX_WIG;
11863}
11864
11865// Aliases
11866defm : vpclmulqdq_aliases<"VPCLMULQDQZ", VR512, i512mem>;
11867defm : vpclmulqdq_aliases<"VPCLMULQDQZ128", VR128X, i128mem>;
11868defm : vpclmulqdq_aliases<"VPCLMULQDQZ256", VR256X, i256mem>;
11869
11870//===----------------------------------------------------------------------===//
11871// VBMI2
11872//===----------------------------------------------------------------------===//
11873
11874multiclass VBMI2_shift_var_rm<bits<8> Op, string OpStr, SDNode OpNode,
11875                              X86FoldableSchedWrite sched, X86VectorVTInfo VTI> {
11876  let Constraints = "$src1 = $dst",
11877      ExeDomain   = VTI.ExeDomain in {
11878    defm r:   AVX512_maskable_3src<Op, MRMSrcReg, VTI, (outs VTI.RC:$dst),
11879                (ins VTI.RC:$src2, VTI.RC:$src3), OpStr,
11880                "$src3, $src2", "$src2, $src3",
11881                (VTI.VT (OpNode VTI.RC:$src1, VTI.RC:$src2, VTI.RC:$src3))>,
11882                AVX512FMA3Base, Sched<[sched]>;
11883    defm m:   AVX512_maskable_3src<Op, MRMSrcMem, VTI, (outs VTI.RC:$dst),
11884                (ins VTI.RC:$src2, VTI.MemOp:$src3), OpStr,
11885                "$src3, $src2", "$src2, $src3",
11886                (VTI.VT (OpNode VTI.RC:$src1, VTI.RC:$src2,
11887                        (VTI.VT (VTI.LdFrag addr:$src3))))>,
11888                AVX512FMA3Base,
11889                Sched<[sched.Folded, sched.ReadAfterFold]>;
11890  }
11891}
11892
11893multiclass VBMI2_shift_var_rmb<bits<8> Op, string OpStr, SDNode OpNode,
11894                               X86FoldableSchedWrite sched, X86VectorVTInfo VTI>
11895         : VBMI2_shift_var_rm<Op, OpStr, OpNode, sched, VTI> {
11896  let Constraints = "$src1 = $dst",
11897      ExeDomain   = VTI.ExeDomain in
11898  defm mb:  AVX512_maskable_3src<Op, MRMSrcMem, VTI, (outs VTI.RC:$dst),
11899              (ins VTI.RC:$src2, VTI.ScalarMemOp:$src3), OpStr,
11900              "${src3}"##VTI.BroadcastStr##", $src2",
11901              "$src2, ${src3}"##VTI.BroadcastStr,
11902              (OpNode VTI.RC:$src1, VTI.RC:$src2,
11903               (VTI.VT (VTI.BroadcastLdFrag addr:$src3)))>,
11904              AVX512FMA3Base, EVEX_B,
11905              Sched<[sched.Folded, sched.ReadAfterFold]>;
11906}
11907
11908multiclass VBMI2_shift_var_rm_common<bits<8> Op, string OpStr, SDNode OpNode,
11909                                     X86SchedWriteWidths sched, AVX512VLVectorVTInfo VTI> {
11910  let Predicates = [HasVBMI2] in
11911  defm Z      : VBMI2_shift_var_rm<Op, OpStr, OpNode, sched.ZMM, VTI.info512>,
11912                                   EVEX_V512;
11913  let Predicates = [HasVBMI2, HasVLX] in {
11914    defm Z256 : VBMI2_shift_var_rm<Op, OpStr, OpNode, sched.YMM, VTI.info256>,
11915                                   EVEX_V256;
11916    defm Z128 : VBMI2_shift_var_rm<Op, OpStr, OpNode, sched.XMM, VTI.info128>,
11917                                   EVEX_V128;
11918  }
11919}
11920
11921multiclass VBMI2_shift_var_rmb_common<bits<8> Op, string OpStr, SDNode OpNode,
11922                                      X86SchedWriteWidths sched, AVX512VLVectorVTInfo VTI> {
11923  let Predicates = [HasVBMI2] in
11924  defm Z      : VBMI2_shift_var_rmb<Op, OpStr, OpNode, sched.ZMM, VTI.info512>,
11925                                    EVEX_V512;
11926  let Predicates = [HasVBMI2, HasVLX] in {
11927    defm Z256 : VBMI2_shift_var_rmb<Op, OpStr, OpNode, sched.YMM, VTI.info256>,
11928                                    EVEX_V256;
11929    defm Z128 : VBMI2_shift_var_rmb<Op, OpStr, OpNode, sched.XMM, VTI.info128>,
11930                                    EVEX_V128;
11931  }
11932}
11933multiclass VBMI2_shift_var<bits<8> wOp, bits<8> dqOp, string Prefix,
11934                           SDNode OpNode, X86SchedWriteWidths sched> {
11935  defm W : VBMI2_shift_var_rm_common<wOp, Prefix##"w", OpNode, sched,
11936             avx512vl_i16_info>, VEX_W, EVEX_CD8<16, CD8VF>;
11937  defm D : VBMI2_shift_var_rmb_common<dqOp, Prefix##"d", OpNode, sched,
11938             avx512vl_i32_info>, EVEX_CD8<32, CD8VF>;
11939  defm Q : VBMI2_shift_var_rmb_common<dqOp, Prefix##"q", OpNode, sched,
11940             avx512vl_i64_info>, VEX_W, EVEX_CD8<64, CD8VF>;
11941}
11942
11943multiclass VBMI2_shift_imm<bits<8> wOp, bits<8> dqOp, string Prefix,
11944                           SDNode OpNode, X86SchedWriteWidths sched> {
11945  defm W : avx512_common_3Op_rm_imm8<wOp, OpNode, Prefix##"w", sched,
11946             avx512vl_i16_info, avx512vl_i16_info, HasVBMI2>,
11947             VEX_W, EVEX_CD8<16, CD8VF>;
11948  defm D : avx512_common_3Op_imm8<Prefix##"d", avx512vl_i32_info, dqOp,
11949             OpNode, sched, HasVBMI2>, AVX512AIi8Base, EVEX_4V, EVEX_CD8<32, CD8VF>;
11950  defm Q : avx512_common_3Op_imm8<Prefix##"q", avx512vl_i64_info, dqOp, OpNode,
11951             sched, HasVBMI2>, AVX512AIi8Base, EVEX_4V, EVEX_CD8<64, CD8VF>, VEX_W;
11952}
11953
11954// Concat & Shift
11955defm VPSHLDV : VBMI2_shift_var<0x70, 0x71, "vpshldv", X86VShldv, SchedWriteVecIMul>;
11956defm VPSHRDV : VBMI2_shift_var<0x72, 0x73, "vpshrdv", X86VShrdv, SchedWriteVecIMul>;
11957defm VPSHLD  : VBMI2_shift_imm<0x70, 0x71, "vpshld", X86VShld, SchedWriteVecIMul>;
11958defm VPSHRD  : VBMI2_shift_imm<0x72, 0x73, "vpshrd", X86VShrd, SchedWriteVecIMul>;
11959
11960// Compress
11961defm VPCOMPRESSB : compress_by_elt_width<0x63, "vpcompressb", WriteVarShuffle256,
11962                                         avx512vl_i8_info, HasVBMI2>, EVEX,
11963                                         NotMemoryFoldable;
11964defm VPCOMPRESSW : compress_by_elt_width <0x63, "vpcompressw", WriteVarShuffle256,
11965                                          avx512vl_i16_info, HasVBMI2>, EVEX, VEX_W,
11966                                          NotMemoryFoldable;
11967// Expand
11968defm VPEXPANDB : expand_by_elt_width <0x62, "vpexpandb", WriteVarShuffle256,
11969                                      avx512vl_i8_info, HasVBMI2>, EVEX;
11970defm VPEXPANDW : expand_by_elt_width <0x62, "vpexpandw", WriteVarShuffle256,
11971                                      avx512vl_i16_info, HasVBMI2>, EVEX, VEX_W;
11972
11973//===----------------------------------------------------------------------===//
11974// VNNI
11975//===----------------------------------------------------------------------===//
11976
11977let Constraints = "$src1 = $dst" in
11978multiclass VNNI_rmb<bits<8> Op, string OpStr, SDNode OpNode,
11979                    X86FoldableSchedWrite sched, X86VectorVTInfo VTI,
11980                    bit IsCommutable> {
11981  defm r  :   AVX512_maskable_3src<Op, MRMSrcReg, VTI, (outs VTI.RC:$dst),
11982                                   (ins VTI.RC:$src2, VTI.RC:$src3), OpStr,
11983                                   "$src3, $src2", "$src2, $src3",
11984                                   (VTI.VT (OpNode VTI.RC:$src1,
11985                                            VTI.RC:$src2, VTI.RC:$src3)),
11986                                   IsCommutable, IsCommutable>,
11987                                   EVEX_4V, T8PD, Sched<[sched]>;
11988  defm m  :   AVX512_maskable_3src<Op, MRMSrcMem, VTI, (outs VTI.RC:$dst),
11989                                   (ins VTI.RC:$src2, VTI.MemOp:$src3), OpStr,
11990                                   "$src3, $src2", "$src2, $src3",
11991                                   (VTI.VT (OpNode VTI.RC:$src1, VTI.RC:$src2,
11992                                            (VTI.VT (VTI.LdFrag addr:$src3))))>,
11993                                   EVEX_4V, EVEX_CD8<32, CD8VF>, T8PD,
11994                                   Sched<[sched.Folded, sched.ReadAfterFold]>;
11995  defm mb :   AVX512_maskable_3src<Op, MRMSrcMem, VTI, (outs VTI.RC:$dst),
11996                                   (ins VTI.RC:$src2, VTI.ScalarMemOp:$src3),
11997                                   OpStr, "${src3}"##VTI.BroadcastStr##", $src2",
11998                                   "$src2, ${src3}"##VTI.BroadcastStr,
11999                                   (OpNode VTI.RC:$src1, VTI.RC:$src2,
12000                                    (VTI.VT (VTI.BroadcastLdFrag addr:$src3)))>,
12001                                   EVEX_4V, EVEX_CD8<32, CD8VF>, EVEX_B,
12002                                   T8PD, Sched<[sched.Folded, sched.ReadAfterFold]>;
12003}
12004
12005multiclass VNNI_common<bits<8> Op, string OpStr, SDNode OpNode,
12006                       X86SchedWriteWidths sched, bit IsCommutable> {
12007  let Predicates = [HasVNNI] in
12008  defm Z      :   VNNI_rmb<Op, OpStr, OpNode, sched.ZMM, v16i32_info,
12009                           IsCommutable>, EVEX_V512;
12010  let Predicates = [HasVNNI, HasVLX] in {
12011    defm Z256 :   VNNI_rmb<Op, OpStr, OpNode, sched.YMM, v8i32x_info,
12012                           IsCommutable>, EVEX_V256;
12013    defm Z128 :   VNNI_rmb<Op, OpStr, OpNode, sched.XMM, v4i32x_info,
12014                           IsCommutable>, EVEX_V128;
12015  }
12016}
12017
12018// FIXME: Is there a better scheduler class for VPDP?
12019defm VPDPBUSD   : VNNI_common<0x50, "vpdpbusd", X86Vpdpbusd, SchedWriteVecIMul, 0>;
12020defm VPDPBUSDS  : VNNI_common<0x51, "vpdpbusds", X86Vpdpbusds, SchedWriteVecIMul, 0>;
12021defm VPDPWSSD   : VNNI_common<0x52, "vpdpwssd", X86Vpdpwssd, SchedWriteVecIMul, 1>;
12022defm VPDPWSSDS  : VNNI_common<0x53, "vpdpwssds", X86Vpdpwssds, SchedWriteVecIMul, 1>;
12023
12024def X86vpmaddwd_su : PatFrag<(ops node:$lhs, node:$rhs),
12025                             (X86vpmaddwd node:$lhs, node:$rhs), [{
12026  return N->hasOneUse();
12027}]>;
12028
12029// Patterns to match VPDPWSSD from existing instructions/intrinsics.
12030let Predicates = [HasVNNI] in {
12031  def : Pat<(v16i32 (add VR512:$src1,
12032                         (X86vpmaddwd_su VR512:$src2, VR512:$src3))),
12033            (VPDPWSSDZr VR512:$src1, VR512:$src2, VR512:$src3)>;
12034  def : Pat<(v16i32 (add VR512:$src1,
12035                         (X86vpmaddwd_su VR512:$src2, (load addr:$src3)))),
12036            (VPDPWSSDZm VR512:$src1, VR512:$src2, addr:$src3)>;
12037}
12038let Predicates = [HasVNNI,HasVLX] in {
12039  def : Pat<(v8i32 (add VR256X:$src1,
12040                        (X86vpmaddwd_su VR256X:$src2, VR256X:$src3))),
12041            (VPDPWSSDZ256r VR256X:$src1, VR256X:$src2, VR256X:$src3)>;
12042  def : Pat<(v8i32 (add VR256X:$src1,
12043                        (X86vpmaddwd_su VR256X:$src2, (load addr:$src3)))),
12044            (VPDPWSSDZ256m VR256X:$src1, VR256X:$src2, addr:$src3)>;
12045  def : Pat<(v4i32 (add VR128X:$src1,
12046                        (X86vpmaddwd_su VR128X:$src2, VR128X:$src3))),
12047            (VPDPWSSDZ128r VR128X:$src1, VR128X:$src2, VR128X:$src3)>;
12048  def : Pat<(v4i32 (add VR128X:$src1,
12049                        (X86vpmaddwd_su VR128X:$src2, (load addr:$src3)))),
12050            (VPDPWSSDZ128m VR128X:$src1, VR128X:$src2, addr:$src3)>;
12051}
12052
12053//===----------------------------------------------------------------------===//
12054// Bit Algorithms
12055//===----------------------------------------------------------------------===//
12056
12057// FIXME: Is there a better scheduler class for VPOPCNTB/VPOPCNTW?
12058defm VPOPCNTB : avx512_unary_rm_vl<0x54, "vpopcntb", ctpop, SchedWriteVecALU,
12059                                   avx512vl_i8_info, HasBITALG>;
12060defm VPOPCNTW : avx512_unary_rm_vl<0x54, "vpopcntw", ctpop, SchedWriteVecALU,
12061                                   avx512vl_i16_info, HasBITALG>, VEX_W;
12062
12063defm : avx512_unary_lowering<"VPOPCNTB", ctpop, avx512vl_i8_info, HasBITALG>;
12064defm : avx512_unary_lowering<"VPOPCNTW", ctpop, avx512vl_i16_info, HasBITALG>;
12065
12066def X86Vpshufbitqmb_su : PatFrag<(ops node:$src1, node:$src2),
12067                                 (X86Vpshufbitqmb node:$src1, node:$src2), [{
12068  return N->hasOneUse();
12069}]>;
12070
12071multiclass VPSHUFBITQMB_rm<X86FoldableSchedWrite sched, X86VectorVTInfo VTI> {
12072  defm rr : AVX512_maskable_cmp<0x8F, MRMSrcReg, VTI, (outs VTI.KRC:$dst),
12073                                (ins VTI.RC:$src1, VTI.RC:$src2),
12074                                "vpshufbitqmb",
12075                                "$src2, $src1", "$src1, $src2",
12076                                (X86Vpshufbitqmb (VTI.VT VTI.RC:$src1),
12077                                (VTI.VT VTI.RC:$src2)),
12078                                (X86Vpshufbitqmb_su (VTI.VT VTI.RC:$src1),
12079                                (VTI.VT VTI.RC:$src2))>, EVEX_4V, T8PD,
12080                                Sched<[sched]>;
12081  defm rm : AVX512_maskable_cmp<0x8F, MRMSrcMem, VTI, (outs VTI.KRC:$dst),
12082                                (ins VTI.RC:$src1, VTI.MemOp:$src2),
12083                                "vpshufbitqmb",
12084                                "$src2, $src1", "$src1, $src2",
12085                                (X86Vpshufbitqmb (VTI.VT VTI.RC:$src1),
12086                                (VTI.VT (VTI.LdFrag addr:$src2))),
12087                                (X86Vpshufbitqmb_su (VTI.VT VTI.RC:$src1),
12088                                (VTI.VT (VTI.LdFrag addr:$src2)))>,
12089                                EVEX_4V, EVEX_CD8<8, CD8VF>, T8PD,
12090                                Sched<[sched.Folded, sched.ReadAfterFold]>;
12091}
12092
12093multiclass VPSHUFBITQMB_common<X86SchedWriteWidths sched, AVX512VLVectorVTInfo VTI> {
12094  let Predicates = [HasBITALG] in
12095  defm Z      : VPSHUFBITQMB_rm<sched.ZMM, VTI.info512>, EVEX_V512;
12096  let Predicates = [HasBITALG, HasVLX] in {
12097    defm Z256 : VPSHUFBITQMB_rm<sched.YMM, VTI.info256>, EVEX_V256;
12098    defm Z128 : VPSHUFBITQMB_rm<sched.XMM, VTI.info128>, EVEX_V128;
12099  }
12100}
12101
12102// FIXME: Is there a better scheduler class for VPSHUFBITQMB?
12103defm VPSHUFBITQMB : VPSHUFBITQMB_common<SchedWriteVecIMul, avx512vl_i8_info>;
12104
12105//===----------------------------------------------------------------------===//
12106// GFNI
12107//===----------------------------------------------------------------------===//
12108
12109multiclass GF2P8MULB_avx512_common<bits<8> Op, string OpStr, SDNode OpNode,
12110                                   X86SchedWriteWidths sched> {
12111  let Predicates = [HasGFNI, HasAVX512, HasBWI] in
12112  defm Z      : avx512_binop_rm<Op, OpStr, OpNode, v64i8_info, sched.ZMM, 1>,
12113                                EVEX_V512;
12114  let Predicates = [HasGFNI, HasVLX, HasBWI] in {
12115    defm Z256 : avx512_binop_rm<Op, OpStr, OpNode, v32i8x_info, sched.YMM, 1>,
12116                                EVEX_V256;
12117    defm Z128 : avx512_binop_rm<Op, OpStr, OpNode, v16i8x_info, sched.XMM, 1>,
12118                                EVEX_V128;
12119  }
12120}
12121
12122defm VGF2P8MULB : GF2P8MULB_avx512_common<0xCF, "vgf2p8mulb", X86GF2P8mulb,
12123                                          SchedWriteVecALU>,
12124                                          EVEX_CD8<8, CD8VF>, T8PD;
12125
12126multiclass GF2P8AFFINE_avx512_rmb_imm<bits<8> Op, string OpStr, SDNode OpNode,
12127                                      X86FoldableSchedWrite sched, X86VectorVTInfo VTI,
12128                                      X86VectorVTInfo BcstVTI>
12129           : avx512_3Op_rm_imm8<Op, OpStr, OpNode, sched, VTI, VTI> {
12130  let ExeDomain = VTI.ExeDomain in
12131  defm rmbi : AVX512_maskable<Op, MRMSrcMem, VTI, (outs VTI.RC:$dst),
12132                (ins VTI.RC:$src1, VTI.ScalarMemOp:$src2, u8imm:$src3),
12133                OpStr, "$src3, ${src2}"##BcstVTI.BroadcastStr##", $src1",
12134                "$src1, ${src2}"##BcstVTI.BroadcastStr##", $src3",
12135                (OpNode (VTI.VT VTI.RC:$src1),
12136                 (bitconvert (BcstVTI.VT (X86VBroadcastld64 addr:$src2))),
12137                 (i8 timm:$src3))>, EVEX_B,
12138                 Sched<[sched.Folded, sched.ReadAfterFold]>;
12139}
12140
12141multiclass GF2P8AFFINE_avx512_common<bits<8> Op, string OpStr, SDNode OpNode,
12142                                     X86SchedWriteWidths sched> {
12143  let Predicates = [HasGFNI, HasAVX512, HasBWI] in
12144  defm Z      : GF2P8AFFINE_avx512_rmb_imm<Op, OpStr, OpNode, sched.ZMM,
12145                                           v64i8_info, v8i64_info>, EVEX_V512;
12146  let Predicates = [HasGFNI, HasVLX, HasBWI] in {
12147    defm Z256 : GF2P8AFFINE_avx512_rmb_imm<Op, OpStr, OpNode, sched.YMM,
12148                                           v32i8x_info, v4i64x_info>, EVEX_V256;
12149    defm Z128 : GF2P8AFFINE_avx512_rmb_imm<Op, OpStr, OpNode, sched.XMM,
12150                                           v16i8x_info, v2i64x_info>, EVEX_V128;
12151  }
12152}
12153
12154defm VGF2P8AFFINEINVQB : GF2P8AFFINE_avx512_common<0xCF, "vgf2p8affineinvqb",
12155                         X86GF2P8affineinvqb, SchedWriteVecIMul>,
12156                         EVEX_4V, EVEX_CD8<8, CD8VF>, VEX_W, AVX512AIi8Base;
12157defm VGF2P8AFFINEQB    : GF2P8AFFINE_avx512_common<0xCE, "vgf2p8affineqb",
12158                         X86GF2P8affineqb, SchedWriteVecIMul>,
12159                         EVEX_4V, EVEX_CD8<8, CD8VF>, VEX_W, AVX512AIi8Base;
12160
12161
12162//===----------------------------------------------------------------------===//
12163// AVX5124FMAPS
12164//===----------------------------------------------------------------------===//
12165
12166let hasSideEffects = 0, mayLoad = 1, ExeDomain = SSEPackedSingle,
12167    Constraints = "$src1 = $dst" in {
12168defm V4FMADDPSrm : AVX512_maskable_3src_in_asm<0x9A, MRMSrcMem, v16f32_info,
12169                    (outs VR512:$dst), (ins VR512:$src2, f128mem:$src3),
12170                    "v4fmaddps", "$src3, $src2", "$src2, $src3",
12171                    []>, EVEX_V512, EVEX_4V, T8XD, EVEX_CD8<32, CD8VQ>,
12172                    Sched<[SchedWriteFMA.ZMM.Folded]>;
12173
12174defm V4FNMADDPSrm : AVX512_maskable_3src_in_asm<0xAA, MRMSrcMem, v16f32_info,
12175                     (outs VR512:$dst), (ins VR512:$src2, f128mem:$src3),
12176                     "v4fnmaddps", "$src3, $src2", "$src2, $src3",
12177                     []>, EVEX_V512, EVEX_4V, T8XD, EVEX_CD8<32, CD8VQ>,
12178                     Sched<[SchedWriteFMA.ZMM.Folded]>;
12179
12180defm V4FMADDSSrm : AVX512_maskable_3src_in_asm<0x9B, MRMSrcMem, f32x_info,
12181                    (outs VR128X:$dst), (ins  VR128X:$src2, f128mem:$src3),
12182                    "v4fmaddss", "$src3, $src2", "$src2, $src3",
12183                    []>, VEX_LIG, EVEX_4V, T8XD, EVEX_CD8<32, CD8VF>,
12184                    Sched<[SchedWriteFMA.Scl.Folded]>;
12185
12186defm V4FNMADDSSrm : AVX512_maskable_3src_in_asm<0xAB, MRMSrcMem, f32x_info,
12187                     (outs VR128X:$dst), (ins VR128X:$src2, f128mem:$src3),
12188                     "v4fnmaddss", "$src3, $src2", "$src2, $src3",
12189                     []>, VEX_LIG, EVEX_4V, T8XD, EVEX_CD8<32, CD8VF>,
12190                     Sched<[SchedWriteFMA.Scl.Folded]>;
12191}
12192
12193//===----------------------------------------------------------------------===//
12194// AVX5124VNNIW
12195//===----------------------------------------------------------------------===//
12196
12197let hasSideEffects = 0, mayLoad = 1, ExeDomain = SSEPackedInt,
12198    Constraints = "$src1 = $dst" in {
12199defm VP4DPWSSDrm : AVX512_maskable_3src_in_asm<0x52, MRMSrcMem, v16i32_info,
12200                    (outs VR512:$dst), (ins VR512:$src2, f128mem:$src3),
12201                     "vp4dpwssd", "$src3, $src2", "$src2, $src3",
12202                    []>, EVEX_V512, EVEX_4V, T8XD, EVEX_CD8<32, CD8VQ>,
12203                    Sched<[SchedWriteFMA.ZMM.Folded]>;
12204
12205defm VP4DPWSSDSrm : AVX512_maskable_3src_in_asm<0x53, MRMSrcMem, v16i32_info,
12206                     (outs VR512:$dst), (ins VR512:$src2, f128mem:$src3),
12207                     "vp4dpwssds", "$src3, $src2", "$src2, $src3",
12208                     []>, EVEX_V512, EVEX_4V, T8XD, EVEX_CD8<32, CD8VQ>,
12209                     Sched<[SchedWriteFMA.ZMM.Folded]>;
12210}
12211
12212let hasSideEffects = 0 in {
12213  let mayStore = 1 in
12214  def MASKPAIR16STORE : PseudoI<(outs), (ins anymem:$dst, VK16PAIR:$src), []>;
12215  let mayLoad = 1 in
12216  def MASKPAIR16LOAD : PseudoI<(outs VK16PAIR:$dst), (ins anymem:$src), []>;
12217}
12218
12219//===----------------------------------------------------------------------===//
12220// VP2INTERSECT
12221//===----------------------------------------------------------------------===//
12222
12223multiclass avx512_vp2intersect_modes<X86VectorVTInfo _> {
12224  def rr : I<0x68, MRMSrcReg,
12225                  (outs _.KRPC:$dst),
12226                  (ins _.RC:$src1, _.RC:$src2),
12227                  !strconcat("vp2intersect", _.Suffix,
12228                             "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
12229                  [(set _.KRPC:$dst, (X86vp2intersect
12230                            _.RC:$src1, (_.VT _.RC:$src2)))]>,
12231                  EVEX_4V, T8XD;
12232
12233  def rm : I<0x68, MRMSrcMem,
12234                  (outs _.KRPC:$dst),
12235                  (ins  _.RC:$src1, _.MemOp:$src2),
12236                  !strconcat("vp2intersect", _.Suffix,
12237                             "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
12238                  [(set _.KRPC:$dst, (X86vp2intersect
12239                            _.RC:$src1, (_.VT (bitconvert (_.LdFrag addr:$src2)))))]>,
12240                  EVEX_4V, T8XD, EVEX_CD8<_.EltSize, CD8VF>;
12241
12242  def rmb : I<0x68, MRMSrcMem,
12243                  (outs _.KRPC:$dst),
12244                  (ins _.RC:$src1, _.ScalarMemOp:$src2),
12245                  !strconcat("vp2intersect", _.Suffix, "\t{${src2}", _.BroadcastStr,
12246                             ", $src1, $dst|$dst, $src1, ${src2}", _.BroadcastStr ,"}"),
12247                  [(set _.KRPC:$dst, (X86vp2intersect
12248                             _.RC:$src1, (_.VT (_.BroadcastLdFrag addr:$src2))))]>,
12249                  EVEX_4V, T8XD, EVEX_B, EVEX_CD8<_.EltSize, CD8VF>;
12250}
12251
12252multiclass avx512_vp2intersect<AVX512VLVectorVTInfo _> {
12253  let Predicates  = [HasAVX512, HasVP2INTERSECT] in
12254    defm Z : avx512_vp2intersect_modes<_.info512>, EVEX_V512;
12255
12256  let Predicates = [HasAVX512, HasVP2INTERSECT, HasVLX] in {
12257    defm Z256 : avx512_vp2intersect_modes<_.info256>, EVEX_V256;
12258    defm Z128 : avx512_vp2intersect_modes<_.info128>, EVEX_V128;
12259  }
12260}
12261
12262defm VP2INTERSECTD : avx512_vp2intersect<avx512vl_i32_info>;
12263defm VP2INTERSECTQ : avx512_vp2intersect<avx512vl_i64_info>, VEX_W;
12264
12265multiclass avx512_binop_all2<bits<8> opc, string OpcodeStr,
12266                             X86SchedWriteWidths sched,
12267                             AVX512VLVectorVTInfo _SrcVTInfo,
12268                             AVX512VLVectorVTInfo _DstVTInfo,
12269                             SDNode OpNode, Predicate prd,
12270                             bit IsCommutable = 0> {
12271  let Predicates = [prd] in
12272    defm NAME#Z : avx512_binop_rm2<opc, OpcodeStr, sched.ZMM, OpNode,
12273                                   _SrcVTInfo.info512, _DstVTInfo.info512,
12274                                   _SrcVTInfo.info512, IsCommutable>,
12275                                   EVEX_V512, EVEX_CD8<32, CD8VF>;
12276  let Predicates = [HasVLX, prd] in {
12277    defm NAME#Z256 : avx512_binop_rm2<opc, OpcodeStr, sched.YMM, OpNode,
12278                                      _SrcVTInfo.info256, _DstVTInfo.info256,
12279                                      _SrcVTInfo.info256, IsCommutable>,
12280                                     EVEX_V256, EVEX_CD8<32, CD8VF>;
12281    defm NAME#Z128 : avx512_binop_rm2<opc, OpcodeStr, sched.XMM, OpNode,
12282                                      _SrcVTInfo.info128, _DstVTInfo.info128,
12283                                      _SrcVTInfo.info128, IsCommutable>,
12284                                      EVEX_V128, EVEX_CD8<32, CD8VF>;
12285  }
12286}
12287
12288defm VCVTNE2PS2BF16 : avx512_binop_all2<0x72, "vcvtne2ps2bf16",
12289                                        SchedWriteCvtPD2PS, //FIXME: Shoulod be SchedWriteCvtPS2BF
12290                                        avx512vl_f32_info, avx512vl_i16_info,
12291                                        X86cvtne2ps2bf16, HasBF16, 0>, T8XD;
12292
12293// Truncate Float to BFloat16
12294multiclass avx512_cvtps2bf16<bits<8> opc, string OpcodeStr,
12295                             X86SchedWriteWidths sched> {
12296  let Predicates = [HasBF16] in {
12297    defm Z : avx512_vcvt_fp<opc, OpcodeStr, v16i16x_info, v16f32_info,
12298                            X86cvtneps2bf16, sched.ZMM>, EVEX_V512;
12299  }
12300  let Predicates = [HasBF16, HasVLX] in {
12301    defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v8i16x_info, v4f32x_info,
12302                               null_frag, sched.XMM, "{1to4}", "{x}", f128mem,
12303                               VK4WM>, EVEX_V128;
12304    defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v8i16x_info, v8f32x_info,
12305                               X86cvtneps2bf16,
12306                               sched.YMM, "{1to8}", "{y}">, EVEX_V256;
12307
12308    def : InstAlias<OpcodeStr##"x\t{$src, $dst|$dst, $src}",
12309                    (!cast<Instruction>(NAME # "Z128rr") VR128X:$dst,
12310                    VR128X:$src), 0>;
12311    def : InstAlias<OpcodeStr##"x\t{$src, $dst|$dst, $src}",
12312                    (!cast<Instruction>(NAME # "Z128rm") VR128X:$dst,
12313                    f128mem:$src), 0, "intel">;
12314    def : InstAlias<OpcodeStr##"y\t{$src, $dst|$dst, $src}",
12315                    (!cast<Instruction>(NAME # "Z256rr") VR128X:$dst,
12316                    VR256X:$src), 0>;
12317    def : InstAlias<OpcodeStr##"y\t{$src, $dst|$dst, $src}",
12318                    (!cast<Instruction>(NAME # "Z256rm") VR128X:$dst,
12319                    f256mem:$src), 0, "intel">;
12320  }
12321}
12322
12323defm VCVTNEPS2BF16 : avx512_cvtps2bf16<0x72, "vcvtneps2bf16",
12324                                       SchedWriteCvtPD2PS>, T8XS,
12325                                       EVEX_CD8<32, CD8VF>;
12326
12327let Predicates = [HasBF16, HasVLX] in {
12328  // Special patterns to allow use of X86mcvtneps2bf16 for masking. Instruction
12329  // patterns have been disabled with null_frag.
12330  def : Pat<(v8i16 (X86cvtneps2bf16 (v4f32 VR128X:$src))),
12331            (VCVTNEPS2BF16Z128rr VR128X:$src)>;
12332  def : Pat<(X86mcvtneps2bf16 (v4f32 VR128X:$src), (v8i16 VR128X:$src0),
12333                              VK4WM:$mask),
12334            (VCVTNEPS2BF16Z128rrk VR128X:$src0, VK4WM:$mask, VR128X:$src)>;
12335  def : Pat<(X86mcvtneps2bf16 (v4f32 VR128X:$src), v8i16x_info.ImmAllZerosV,
12336                              VK4WM:$mask),
12337            (VCVTNEPS2BF16Z128rrkz VK4WM:$mask, VR128X:$src)>;
12338
12339  def : Pat<(v8i16 (X86cvtneps2bf16 (loadv4f32 addr:$src))),
12340            (VCVTNEPS2BF16Z128rm addr:$src)>;
12341  def : Pat<(X86mcvtneps2bf16 (loadv4f32 addr:$src), (v8i16 VR128X:$src0),
12342                              VK4WM:$mask),
12343            (VCVTNEPS2BF16Z128rmk VR128X:$src0, VK4WM:$mask, addr:$src)>;
12344  def : Pat<(X86mcvtneps2bf16 (loadv4f32 addr:$src), v8i16x_info.ImmAllZerosV,
12345                              VK4WM:$mask),
12346            (VCVTNEPS2BF16Z128rmkz VK4WM:$mask, addr:$src)>;
12347
12348  def : Pat<(v8i16 (X86cvtneps2bf16 (v4f32
12349                                     (X86VBroadcastld32 addr:$src)))),
12350            (VCVTNEPS2BF16Z128rmb addr:$src)>;
12351  def : Pat<(X86mcvtneps2bf16 (v4f32 (X86VBroadcastld32 addr:$src)),
12352                              (v8i16 VR128X:$src0), VK4WM:$mask),
12353            (VCVTNEPS2BF16Z128rmbk VR128X:$src0, VK4WM:$mask, addr:$src)>;
12354  def : Pat<(X86mcvtneps2bf16 (v4f32 (X86VBroadcastld32 addr:$src)),
12355                              v8i16x_info.ImmAllZerosV, VK4WM:$mask),
12356            (VCVTNEPS2BF16Z128rmbkz VK4WM:$mask, addr:$src)>;
12357}
12358
12359let Constraints = "$src1 = $dst" in {
12360multiclass avx512_dpbf16ps_rm<bits<8> opc, string OpcodeStr, SDNode OpNode,
12361                              X86VectorVTInfo _, X86VectorVTInfo src_v> {
12362  defm r: AVX512_maskable_3src<opc, MRMSrcReg, _, (outs _.RC:$dst),
12363                           (ins _.RC:$src2, _.RC:$src3),
12364                           OpcodeStr, "$src3, $src2", "$src2, $src3",
12365                           (_.VT (OpNode _.RC:$src1, _.RC:$src2, _.RC:$src3))>,
12366                           EVEX_4V;
12367
12368  defm m: AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst),
12369                               (ins _.RC:$src2, _.MemOp:$src3),
12370                               OpcodeStr, "$src3, $src2", "$src2, $src3",
12371                               (_.VT (OpNode _.RC:$src1, _.RC:$src2,
12372                               (src_v.VT (bitconvert
12373                               (src_v.LdFrag addr:$src3)))))>, EVEX_4V;
12374
12375  defm mb: AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst),
12376                  (ins _.RC:$src2, _.ScalarMemOp:$src3),
12377                  OpcodeStr,
12378                  !strconcat("${src3}", _.BroadcastStr,", $src2"),
12379                  !strconcat("$src2, ${src3}", _.BroadcastStr),
12380                  (_.VT (OpNode _.RC:$src1, _.RC:$src2,
12381                  (src_v.VT (src_v.BroadcastLdFrag addr:$src3))))>,
12382                  EVEX_B, EVEX_4V;
12383
12384}
12385} // Constraints = "$src1 = $dst"
12386
12387multiclass avx512_dpbf16ps_sizes<bits<8> opc, string OpcodeStr, SDNode OpNode,
12388                                 AVX512VLVectorVTInfo _,
12389                                 AVX512VLVectorVTInfo src_v, Predicate prd> {
12390  let Predicates = [prd] in {
12391    defm Z    : avx512_dpbf16ps_rm<opc, OpcodeStr, OpNode, _.info512,
12392                                   src_v.info512>, EVEX_V512;
12393  }
12394  let Predicates = [HasVLX, prd] in {
12395    defm Z256 : avx512_dpbf16ps_rm<opc, OpcodeStr, OpNode, _.info256,
12396                                   src_v.info256>, EVEX_V256;
12397    defm Z128 : avx512_dpbf16ps_rm<opc, OpcodeStr, OpNode, _.info128,
12398                                   src_v.info128>, EVEX_V128;
12399  }
12400}
12401
12402defm VDPBF16PS : avx512_dpbf16ps_sizes<0x52, "vdpbf16ps", X86dpbf16ps,
12403                                       avx512vl_f32_info, avx512vl_i32_info,
12404                                       HasBF16>, T8XS, EVEX_CD8<32, CD8VF>;
12405