xref: /freebsd/contrib/llvm-project/llvm/lib/Target/X86/X86InstrAVX512.td (revision 77a1348b3c1cfe8547be49a121b56299a1e18b69)
1//===-- X86InstrAVX512.td - AVX512 Instruction Set ---------*- tablegen -*-===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file describes the X86 AVX512 instruction set, defining the
10// instructions, and properties of the instructions which are needed for code
11// generation, machine code emission, and analysis.
12//
13//===----------------------------------------------------------------------===//
14
15// Group template arguments that can be derived from the vector type (EltNum x
16// EltVT).  These are things like the register class for the writemask, etc.
17// The idea is to pass one of these as the template argument rather than the
18// individual arguments.
19// The template is also used for scalar types, in this case numelts is 1.
20class X86VectorVTInfo<int numelts, ValueType eltvt, RegisterClass rc,
21                      string suffix = ""> {
22  RegisterClass RC = rc;
23  ValueType EltVT = eltvt;
24  int NumElts = numelts;
25
26  // Corresponding mask register class.
27  RegisterClass KRC = !cast<RegisterClass>("VK" # NumElts);
28
29  // Corresponding mask register pair class.
30  RegisterOperand KRPC = !if (!gt(NumElts, 16), ?,
31                              !cast<RegisterOperand>("VK" # NumElts # "Pair"));
32
33  // Corresponding write-mask register class.
34  RegisterClass KRCWM = !cast<RegisterClass>("VK" # NumElts # "WM");
35
36  // The mask VT.
37  ValueType KVT = !cast<ValueType>("v" # NumElts # "i1");
38
39  // Suffix used in the instruction mnemonic.
40  string Suffix = suffix;
41
42  // VTName is a string name for vector VT. For vector types it will be
43  // v # NumElts # EltVT, so for vector of 8 elements of i32 it will be v8i32
44  // It is a little bit complex for scalar types, where NumElts = 1.
45  // In this case we build v4f32 or v2f64
46  string VTName = "v" # !if (!eq (NumElts, 1),
47                        !if (!eq (EltVT.Size, 32), 4,
48                        !if (!eq (EltVT.Size, 64), 2, NumElts)), NumElts) # EltVT;
49
50  // The vector VT.
51  ValueType VT = !cast<ValueType>(VTName);
52
53  string EltTypeName = !cast<string>(EltVT);
54  // Size of the element type in bits, e.g. 32 for v16i32.
55  string EltSizeName = !subst("i", "", !subst("f", "", EltTypeName));
56  int EltSize = EltVT.Size;
57
58  // "i" for integer types and "f" for floating-point types
59  string TypeVariantName = !subst(EltSizeName, "", EltTypeName);
60
61  // Size of RC in bits, e.g. 512 for VR512.
62  int Size = VT.Size;
63
64  // The corresponding memory operand, e.g. i512mem for VR512.
65  X86MemOperand MemOp = !cast<X86MemOperand>(TypeVariantName # Size # "mem");
66  X86MemOperand ScalarMemOp = !cast<X86MemOperand>(EltVT # "mem");
67  // FP scalar memory operand for intrinsics - ssmem/sdmem.
68  Operand IntScalarMemOp = !if (!eq (EltTypeName, "f32"), !cast<Operand>("ssmem"),
69                           !if (!eq (EltTypeName, "f64"), !cast<Operand>("sdmem"), ?));
70
71  // Load patterns
72  PatFrag LdFrag = !cast<PatFrag>("load" # VTName);
73
74  PatFrag AlignedLdFrag = !cast<PatFrag>("alignedload" # VTName);
75
76  PatFrag ScalarLdFrag = !cast<PatFrag>("load" # EltVT);
77  PatFrag BroadcastLdFrag = !cast<PatFrag>("X86VBroadcastld" # EltSizeName);
78
79  ComplexPattern ScalarIntMemCPat = !if (!eq (EltTypeName, "f32"),
80                                          !cast<ComplexPattern>("sse_load_f32"),
81                                    !if (!eq (EltTypeName, "f64"),
82                                          !cast<ComplexPattern>("sse_load_f64"),
83                                    ?));
84
85  // The string to specify embedded broadcast in assembly.
86  string BroadcastStr = "{1to" # NumElts # "}";
87
88  // 8-bit compressed displacement tuple/subvector format.  This is only
89  // defined for NumElts <= 8.
90  CD8VForm CD8TupleForm = !if (!eq (!srl(NumElts, 4), 0),
91                               !cast<CD8VForm>("CD8VT" # NumElts), ?);
92
93  SubRegIndex SubRegIdx = !if (!eq (Size, 128), sub_xmm,
94                          !if (!eq (Size, 256), sub_ymm, ?));
95
96  Domain ExeDomain = !if (!eq (EltTypeName, "f32"), SSEPackedSingle,
97                     !if (!eq (EltTypeName, "f64"), SSEPackedDouble,
98                     SSEPackedInt));
99
100  RegisterClass FRC = !if (!eq (EltTypeName, "f32"), FR32X, FR64X);
101
102  dag ImmAllZerosV = (VT immAllZerosV);
103
104  string ZSuffix = !if (!eq (Size, 128), "Z128",
105                   !if (!eq (Size, 256), "Z256", "Z"));
106}
107
108def v64i8_info  : X86VectorVTInfo<64,  i8, VR512, "b">;
109def v32i16_info : X86VectorVTInfo<32, i16, VR512, "w">;
110def v16i32_info : X86VectorVTInfo<16, i32, VR512, "d">;
111def v8i64_info  : X86VectorVTInfo<8,  i64, VR512, "q">;
112def v16f32_info : X86VectorVTInfo<16, f32, VR512, "ps">;
113def v8f64_info  : X86VectorVTInfo<8,  f64, VR512, "pd">;
114
115// "x" in v32i8x_info means RC = VR256X
116def v32i8x_info  : X86VectorVTInfo<32,  i8, VR256X, "b">;
117def v16i16x_info : X86VectorVTInfo<16, i16, VR256X, "w">;
118def v8i32x_info  : X86VectorVTInfo<8,  i32, VR256X, "d">;
119def v4i64x_info  : X86VectorVTInfo<4,  i64, VR256X, "q">;
120def v8f32x_info  : X86VectorVTInfo<8,  f32, VR256X, "ps">;
121def v4f64x_info  : X86VectorVTInfo<4,  f64, VR256X, "pd">;
122
123def v16i8x_info  : X86VectorVTInfo<16,  i8, VR128X, "b">;
124def v8i16x_info  : X86VectorVTInfo<8,  i16, VR128X, "w">;
125def v4i32x_info  : X86VectorVTInfo<4,  i32, VR128X, "d">;
126def v2i64x_info  : X86VectorVTInfo<2,  i64, VR128X, "q">;
127def v4f32x_info  : X86VectorVTInfo<4,  f32, VR128X, "ps">;
128def v2f64x_info  : X86VectorVTInfo<2,  f64, VR128X, "pd">;
129
130// We map scalar types to the smallest (128-bit) vector type
131// with the appropriate element type. This allows to use the same masking logic.
132def i32x_info    : X86VectorVTInfo<1,  i32, GR32, "si">;
133def i64x_info    : X86VectorVTInfo<1,  i64, GR64, "sq">;
134def f32x_info    : X86VectorVTInfo<1,  f32, VR128X, "ss">;
135def f64x_info    : X86VectorVTInfo<1,  f64, VR128X, "sd">;
136
137class AVX512VLVectorVTInfo<X86VectorVTInfo i512, X86VectorVTInfo i256,
138                           X86VectorVTInfo i128> {
139  X86VectorVTInfo info512 = i512;
140  X86VectorVTInfo info256 = i256;
141  X86VectorVTInfo info128 = i128;
142}
143
144def avx512vl_i8_info  : AVX512VLVectorVTInfo<v64i8_info, v32i8x_info,
145                                             v16i8x_info>;
146def avx512vl_i16_info : AVX512VLVectorVTInfo<v32i16_info, v16i16x_info,
147                                             v8i16x_info>;
148def avx512vl_i32_info : AVX512VLVectorVTInfo<v16i32_info, v8i32x_info,
149                                             v4i32x_info>;
150def avx512vl_i64_info : AVX512VLVectorVTInfo<v8i64_info, v4i64x_info,
151                                             v2i64x_info>;
152def avx512vl_f32_info : AVX512VLVectorVTInfo<v16f32_info, v8f32x_info,
153                                             v4f32x_info>;
154def avx512vl_f64_info : AVX512VLVectorVTInfo<v8f64_info, v4f64x_info,
155                                             v2f64x_info>;
156
157class X86KVectorVTInfo<RegisterClass _krc, RegisterClass _krcwm,
158                       ValueType _vt> {
159  RegisterClass KRC = _krc;
160  RegisterClass KRCWM = _krcwm;
161  ValueType KVT = _vt;
162}
163
164def v1i1_info : X86KVectorVTInfo<VK1, VK1WM, v1i1>;
165def v2i1_info : X86KVectorVTInfo<VK2, VK2WM, v2i1>;
166def v4i1_info : X86KVectorVTInfo<VK4, VK4WM, v4i1>;
167def v8i1_info : X86KVectorVTInfo<VK8, VK8WM, v8i1>;
168def v16i1_info : X86KVectorVTInfo<VK16, VK16WM, v16i1>;
169def v32i1_info : X86KVectorVTInfo<VK32, VK32WM, v32i1>;
170def v64i1_info : X86KVectorVTInfo<VK64, VK64WM, v64i1>;
171
172// This multiclass generates the masking variants from the non-masking
173// variant.  It only provides the assembly pieces for the masking variants.
174// It assumes custom ISel patterns for masking which can be provided as
175// template arguments.
176multiclass AVX512_maskable_custom<bits<8> O, Format F,
177                                  dag Outs,
178                                  dag Ins, dag MaskingIns, dag ZeroMaskingIns,
179                                  string OpcodeStr,
180                                  string AttSrcAsm, string IntelSrcAsm,
181                                  list<dag> Pattern,
182                                  list<dag> MaskingPattern,
183                                  list<dag> ZeroMaskingPattern,
184                                  string MaskingConstraint = "",
185                                  bit IsCommutable = 0,
186                                  bit IsKCommutable = 0,
187                                  bit IsKZCommutable = IsCommutable> {
188  let isCommutable = IsCommutable in
189    def NAME: AVX512<O, F, Outs, Ins,
190                       OpcodeStr#"\t{"#AttSrcAsm#", $dst|"#
191                                     "$dst, "#IntelSrcAsm#"}",
192                       Pattern>;
193
194  // Prefer over VMOV*rrk Pat<>
195  let isCommutable = IsKCommutable in
196    def NAME#k: AVX512<O, F, Outs, MaskingIns,
197                       OpcodeStr#"\t{"#AttSrcAsm#", $dst {${mask}}|"#
198                                     "$dst {${mask}}, "#IntelSrcAsm#"}",
199                       MaskingPattern>,
200              EVEX_K {
201      // In case of the 3src subclass this is overridden with a let.
202      string Constraints = MaskingConstraint;
203    }
204
205  // Zero mask does not add any restrictions to commute operands transformation.
206  // So, it is Ok to use IsCommutable instead of IsKCommutable.
207  let isCommutable = IsKZCommutable in // Prefer over VMOV*rrkz Pat<>
208    def NAME#kz: AVX512<O, F, Outs, ZeroMaskingIns,
209                       OpcodeStr#"\t{"#AttSrcAsm#", $dst {${mask}} {z}|"#
210                                     "$dst {${mask}} {z}, "#IntelSrcAsm#"}",
211                       ZeroMaskingPattern>,
212              EVEX_KZ;
213}
214
215
216// Common base class of AVX512_maskable and AVX512_maskable_3src.
217multiclass AVX512_maskable_common<bits<8> O, Format F, X86VectorVTInfo _,
218                                  dag Outs,
219                                  dag Ins, dag MaskingIns, dag ZeroMaskingIns,
220                                  string OpcodeStr,
221                                  string AttSrcAsm, string IntelSrcAsm,
222                                  dag RHS, dag MaskingRHS,
223                                  SDNode Select = vselect,
224                                  string MaskingConstraint = "",
225                                  bit IsCommutable = 0,
226                                  bit IsKCommutable = 0,
227                                  bit IsKZCommutable = IsCommutable> :
228  AVX512_maskable_custom<O, F, Outs, Ins, MaskingIns, ZeroMaskingIns, OpcodeStr,
229                         AttSrcAsm, IntelSrcAsm,
230                         [(set _.RC:$dst, RHS)],
231                         [(set _.RC:$dst, MaskingRHS)],
232                         [(set _.RC:$dst,
233                               (Select _.KRCWM:$mask, RHS, _.ImmAllZerosV))],
234                         MaskingConstraint, IsCommutable,
235                         IsKCommutable, IsKZCommutable>;
236
237// This multiclass generates the unconditional/non-masking, the masking and
238// the zero-masking variant of the vector instruction.  In the masking case, the
239// perserved vector elements come from a new dummy input operand tied to $dst.
240// This version uses a separate dag for non-masking and masking.
241multiclass AVX512_maskable_split<bits<8> O, Format F, X86VectorVTInfo _,
242                           dag Outs, dag Ins, string OpcodeStr,
243                           string AttSrcAsm, string IntelSrcAsm,
244                           dag RHS, dag MaskRHS,
245                           bit IsCommutable = 0, bit IsKCommutable = 0,
246                           SDNode Select = vselect> :
247   AVX512_maskable_custom<O, F, Outs, Ins,
248                          !con((ins _.RC:$src0, _.KRCWM:$mask), Ins),
249                          !con((ins _.KRCWM:$mask), Ins),
250                          OpcodeStr, AttSrcAsm, IntelSrcAsm,
251                          [(set _.RC:$dst, RHS)],
252                          [(set _.RC:$dst,
253                              (Select _.KRCWM:$mask, MaskRHS, _.RC:$src0))],
254                          [(set _.RC:$dst,
255                              (Select _.KRCWM:$mask, MaskRHS, _.ImmAllZerosV))],
256                          "$src0 = $dst", IsCommutable, IsKCommutable>;
257
258// This multiclass generates the unconditional/non-masking, the masking and
259// the zero-masking variant of the vector instruction.  In the masking case, the
260// perserved vector elements come from a new dummy input operand tied to $dst.
261multiclass AVX512_maskable<bits<8> O, Format F, X86VectorVTInfo _,
262                           dag Outs, dag Ins, string OpcodeStr,
263                           string AttSrcAsm, string IntelSrcAsm,
264                           dag RHS,
265                           bit IsCommutable = 0, bit IsKCommutable = 0,
266                           bit IsKZCommutable = IsCommutable,
267                           SDNode Select = vselect> :
268   AVX512_maskable_common<O, F, _, Outs, Ins,
269                          !con((ins _.RC:$src0, _.KRCWM:$mask), Ins),
270                          !con((ins _.KRCWM:$mask), Ins),
271                          OpcodeStr, AttSrcAsm, IntelSrcAsm, RHS,
272                          (Select _.KRCWM:$mask, RHS, _.RC:$src0),
273                          Select, "$src0 = $dst", IsCommutable, IsKCommutable,
274                          IsKZCommutable>;
275
276// This multiclass generates the unconditional/non-masking, the masking and
277// the zero-masking variant of the scalar instruction.
278multiclass AVX512_maskable_scalar<bits<8> O, Format F, X86VectorVTInfo _,
279                           dag Outs, dag Ins, string OpcodeStr,
280                           string AttSrcAsm, string IntelSrcAsm,
281                           dag RHS> :
282   AVX512_maskable<O, F, _, Outs, Ins, OpcodeStr, AttSrcAsm, IntelSrcAsm,
283                   RHS, 0, 0, 0, X86selects>;
284
285// Similar to AVX512_maskable but in this case one of the source operands
286// ($src1) is already tied to $dst so we just use that for the preserved
287// vector elements.  NOTE that the NonTiedIns (the ins dag) should exclude
288// $src1.
289multiclass AVX512_maskable_3src<bits<8> O, Format F, X86VectorVTInfo _,
290                                dag Outs, dag NonTiedIns, string OpcodeStr,
291                                string AttSrcAsm, string IntelSrcAsm,
292                                dag RHS,
293                                bit IsCommutable = 0,
294                                bit IsKCommutable = 0,
295                                SDNode Select = vselect,
296                                bit MaskOnly = 0> :
297   AVX512_maskable_common<O, F, _, Outs,
298                          !con((ins _.RC:$src1), NonTiedIns),
299                          !con((ins _.RC:$src1, _.KRCWM:$mask), NonTiedIns),
300                          !con((ins _.RC:$src1, _.KRCWM:$mask), NonTiedIns),
301                          OpcodeStr, AttSrcAsm, IntelSrcAsm,
302                          !if(MaskOnly, (null_frag), RHS),
303                          (Select _.KRCWM:$mask, RHS, _.RC:$src1),
304                          Select, "", IsCommutable, IsKCommutable>;
305
306// Similar to AVX512_maskable_3src but in this case the input VT for the tied
307// operand differs from the output VT. This requires a bitconvert on
308// the preserved vector going into the vselect.
309// NOTE: The unmasked pattern is disabled.
310multiclass AVX512_maskable_3src_cast<bits<8> O, Format F, X86VectorVTInfo OutVT,
311                                     X86VectorVTInfo InVT,
312                                     dag Outs, dag NonTiedIns, string OpcodeStr,
313                                     string AttSrcAsm, string IntelSrcAsm,
314                                     dag RHS, bit IsCommutable = 0> :
315   AVX512_maskable_common<O, F, OutVT, Outs,
316                          !con((ins InVT.RC:$src1), NonTiedIns),
317                          !con((ins InVT.RC:$src1, InVT.KRCWM:$mask), NonTiedIns),
318                          !con((ins InVT.RC:$src1, InVT.KRCWM:$mask), NonTiedIns),
319                          OpcodeStr, AttSrcAsm, IntelSrcAsm, (null_frag),
320                          (vselect InVT.KRCWM:$mask, RHS,
321                           (bitconvert InVT.RC:$src1)),
322                           vselect, "", IsCommutable>;
323
324multiclass AVX512_maskable_3src_scalar<bits<8> O, Format F, X86VectorVTInfo _,
325                                     dag Outs, dag NonTiedIns, string OpcodeStr,
326                                     string AttSrcAsm, string IntelSrcAsm,
327                                     dag RHS,
328                                     bit IsCommutable = 0,
329                                     bit IsKCommutable = 0,
330                                     bit MaskOnly = 0> :
331   AVX512_maskable_3src<O, F, _, Outs, NonTiedIns, OpcodeStr, AttSrcAsm,
332                        IntelSrcAsm, RHS, IsCommutable, IsKCommutable,
333                        X86selects, MaskOnly>;
334
335multiclass AVX512_maskable_in_asm<bits<8> O, Format F, X86VectorVTInfo _,
336                                  dag Outs, dag Ins,
337                                  string OpcodeStr,
338                                  string AttSrcAsm, string IntelSrcAsm,
339                                  list<dag> Pattern> :
340   AVX512_maskable_custom<O, F, Outs, Ins,
341                          !con((ins _.RC:$src0, _.KRCWM:$mask), Ins),
342                          !con((ins _.KRCWM:$mask), Ins),
343                          OpcodeStr, AttSrcAsm, IntelSrcAsm, Pattern, [], [],
344                          "$src0 = $dst">;
345
346multiclass AVX512_maskable_3src_in_asm<bits<8> O, Format F, X86VectorVTInfo _,
347                                       dag Outs, dag NonTiedIns,
348                                       string OpcodeStr,
349                                       string AttSrcAsm, string IntelSrcAsm,
350                                       list<dag> Pattern> :
351   AVX512_maskable_custom<O, F, Outs,
352                          !con((ins _.RC:$src1), NonTiedIns),
353                          !con((ins _.RC:$src1, _.KRCWM:$mask), NonTiedIns),
354                          !con((ins _.RC:$src1, _.KRCWM:$mask), NonTiedIns),
355                          OpcodeStr, AttSrcAsm, IntelSrcAsm, Pattern, [], [],
356                          "">;
357
358// Instruction with mask that puts result in mask register,
359// like "compare" and "vptest"
360multiclass AVX512_maskable_custom_cmp<bits<8> O, Format F,
361                                  dag Outs,
362                                  dag Ins, dag MaskingIns,
363                                  string OpcodeStr,
364                                  string AttSrcAsm, string IntelSrcAsm,
365                                  list<dag> Pattern,
366                                  list<dag> MaskingPattern,
367                                  bit IsCommutable = 0> {
368    let isCommutable = IsCommutable in {
369    def NAME: AVX512<O, F, Outs, Ins,
370                       OpcodeStr#"\t{"#AttSrcAsm#", $dst|"#
371                                     "$dst, "#IntelSrcAsm#"}",
372                       Pattern>;
373
374    def NAME#k: AVX512<O, F, Outs, MaskingIns,
375                       OpcodeStr#"\t{"#AttSrcAsm#", $dst {${mask}}|"#
376                                     "$dst {${mask}}, "#IntelSrcAsm#"}",
377                       MaskingPattern>, EVEX_K;
378    }
379}
380
381multiclass AVX512_maskable_common_cmp<bits<8> O, Format F, X86VectorVTInfo _,
382                                  dag Outs,
383                                  dag Ins, dag MaskingIns,
384                                  string OpcodeStr,
385                                  string AttSrcAsm, string IntelSrcAsm,
386                                  dag RHS, dag MaskingRHS,
387                                  bit IsCommutable = 0> :
388  AVX512_maskable_custom_cmp<O, F, Outs, Ins, MaskingIns, OpcodeStr,
389                         AttSrcAsm, IntelSrcAsm,
390                         [(set _.KRC:$dst, RHS)],
391                         [(set _.KRC:$dst, MaskingRHS)], IsCommutable>;
392
393multiclass AVX512_maskable_cmp<bits<8> O, Format F, X86VectorVTInfo _,
394                           dag Outs, dag Ins, string OpcodeStr,
395                           string AttSrcAsm, string IntelSrcAsm,
396                           dag RHS, dag RHS_su, bit IsCommutable = 0> :
397   AVX512_maskable_common_cmp<O, F, _, Outs, Ins,
398                          !con((ins _.KRCWM:$mask), Ins),
399                          OpcodeStr, AttSrcAsm, IntelSrcAsm, RHS,
400                          (and _.KRCWM:$mask, RHS_su), IsCommutable>;
401
402
403// Alias instruction that maps zero vector to pxor / xorp* for AVX-512.
404// This is expanded by ExpandPostRAPseudos to an xorps / vxorps, and then
405// swizzled by ExecutionDomainFix to pxor.
406// We set canFoldAsLoad because this can be converted to a constant-pool
407// load of an all-zeros value if folding it would be beneficial.
408let isReMaterializable = 1, isAsCheapAsAMove = 1, canFoldAsLoad = 1,
409    isPseudo = 1, Predicates = [HasAVX512], SchedRW = [WriteZero] in {
410def AVX512_512_SET0 : I<0, Pseudo, (outs VR512:$dst), (ins), "",
411               [(set VR512:$dst, (v16i32 immAllZerosV))]>;
412def AVX512_512_SETALLONES : I<0, Pseudo, (outs VR512:$dst), (ins), "",
413               [(set VR512:$dst, (v16i32 immAllOnesV))]>;
414}
415
416let Predicates = [HasAVX512] in {
417def : Pat<(v64i8 immAllZerosV), (AVX512_512_SET0)>;
418def : Pat<(v32i16 immAllZerosV), (AVX512_512_SET0)>;
419def : Pat<(v8i64 immAllZerosV), (AVX512_512_SET0)>;
420def : Pat<(v16f32 immAllZerosV), (AVX512_512_SET0)>;
421def : Pat<(v8f64 immAllZerosV), (AVX512_512_SET0)>;
422}
423
424// Alias instructions that allow VPTERNLOG to be used with a mask to create
425// a mix of all ones and all zeros elements. This is done this way to force
426// the same register to be used as input for all three sources.
427let isPseudo = 1, Predicates = [HasAVX512], SchedRW = [WriteVecALU] in {
428def AVX512_512_SEXT_MASK_32 : I<0, Pseudo, (outs VR512:$dst),
429                                (ins VK16WM:$mask), "",
430                           [(set VR512:$dst, (vselect (v16i1 VK16WM:$mask),
431                                                      (v16i32 immAllOnesV),
432                                                      (v16i32 immAllZerosV)))]>;
433def AVX512_512_SEXT_MASK_64 : I<0, Pseudo, (outs VR512:$dst),
434                                (ins VK8WM:$mask), "",
435                [(set VR512:$dst, (vselect (v8i1 VK8WM:$mask),
436                                           (v8i64 immAllOnesV),
437                                           (v8i64 immAllZerosV)))]>;
438}
439
440let isReMaterializable = 1, isAsCheapAsAMove = 1, canFoldAsLoad = 1,
441    isPseudo = 1, Predicates = [HasAVX512], SchedRW = [WriteZero] in {
442def AVX512_128_SET0 : I<0, Pseudo, (outs VR128X:$dst), (ins), "",
443               [(set VR128X:$dst, (v4i32 immAllZerosV))]>;
444def AVX512_256_SET0 : I<0, Pseudo, (outs VR256X:$dst), (ins), "",
445               [(set VR256X:$dst, (v8i32 immAllZerosV))]>;
446}
447
448let Predicates = [HasAVX512] in {
449def : Pat<(v8i16 immAllZerosV), (AVX512_128_SET0)>;
450def : Pat<(v16i8 immAllZerosV), (AVX512_128_SET0)>;
451def : Pat<(v2i64 immAllZerosV), (AVX512_128_SET0)>;
452def : Pat<(v4f32 immAllZerosV), (AVX512_128_SET0)>;
453def : Pat<(v2f64 immAllZerosV), (AVX512_128_SET0)>;
454def : Pat<(v32i8 immAllZerosV), (AVX512_256_SET0)>;
455def : Pat<(v16i16 immAllZerosV), (AVX512_256_SET0)>;
456def : Pat<(v4i64 immAllZerosV), (AVX512_256_SET0)>;
457def : Pat<(v8f32 immAllZerosV), (AVX512_256_SET0)>;
458def : Pat<(v4f64 immAllZerosV), (AVX512_256_SET0)>;
459}
460
461// Alias instructions that map fld0 to xorps for sse or vxorps for avx.
462// This is expanded by ExpandPostRAPseudos.
463let isReMaterializable = 1, isAsCheapAsAMove = 1, canFoldAsLoad = 1,
464    isPseudo = 1, SchedRW = [WriteZero], Predicates = [HasAVX512] in {
465  def AVX512_FsFLD0SS : I<0, Pseudo, (outs FR32X:$dst), (ins), "",
466                          [(set FR32X:$dst, fp32imm0)]>;
467  def AVX512_FsFLD0SD : I<0, Pseudo, (outs FR64X:$dst), (ins), "",
468                          [(set FR64X:$dst, fp64imm0)]>;
469  def AVX512_FsFLD0F128 : I<0, Pseudo, (outs VR128X:$dst), (ins), "",
470                            [(set VR128X:$dst, fp128imm0)]>;
471}
472
473//===----------------------------------------------------------------------===//
474// AVX-512 - VECTOR INSERT
475//
476
477// Supports two different pattern operators for mask and unmasked ops. Allows
478// null_frag to be passed for one.
479multiclass vinsert_for_size_split<int Opcode, X86VectorVTInfo From,
480                                  X86VectorVTInfo To,
481                                  SDPatternOperator vinsert_insert,
482                                  SDPatternOperator vinsert_for_mask,
483                                  X86FoldableSchedWrite sched> {
484  let hasSideEffects = 0, ExeDomain = To.ExeDomain in {
485    defm rr : AVX512_maskable_split<Opcode, MRMSrcReg, To, (outs To.RC:$dst),
486                   (ins To.RC:$src1, From.RC:$src2, u8imm:$src3),
487                   "vinsert" # From.EltTypeName # "x" # From.NumElts,
488                   "$src3, $src2, $src1", "$src1, $src2, $src3",
489                   (vinsert_insert:$src3 (To.VT To.RC:$src1),
490                                         (From.VT From.RC:$src2),
491                                         (iPTR imm)),
492                   (vinsert_for_mask:$src3 (To.VT To.RC:$src1),
493                                           (From.VT From.RC:$src2),
494                                           (iPTR imm))>,
495                   AVX512AIi8Base, EVEX_4V, Sched<[sched]>;
496    let mayLoad = 1 in
497    defm rm : AVX512_maskable_split<Opcode, MRMSrcMem, To, (outs To.RC:$dst),
498                   (ins To.RC:$src1, From.MemOp:$src2, u8imm:$src3),
499                   "vinsert" # From.EltTypeName # "x" # From.NumElts,
500                   "$src3, $src2, $src1", "$src1, $src2, $src3",
501                   (vinsert_insert:$src3 (To.VT To.RC:$src1),
502                               (From.VT (From.LdFrag addr:$src2)),
503                               (iPTR imm)),
504                   (vinsert_for_mask:$src3 (To.VT To.RC:$src1),
505                               (From.VT (From.LdFrag addr:$src2)),
506                               (iPTR imm))>, AVX512AIi8Base, EVEX_4V,
507                   EVEX_CD8<From.EltSize, From.CD8TupleForm>,
508                   Sched<[sched.Folded, sched.ReadAfterFold]>;
509  }
510}
511
512// Passes the same pattern operator for masked and unmasked ops.
513multiclass vinsert_for_size<int Opcode, X86VectorVTInfo From,
514                            X86VectorVTInfo To,
515                            SDPatternOperator vinsert_insert,
516                            X86FoldableSchedWrite sched> :
517  vinsert_for_size_split<Opcode, From, To, vinsert_insert, vinsert_insert, sched>;
518
519multiclass vinsert_for_size_lowering<string InstrStr, X86VectorVTInfo From,
520                       X86VectorVTInfo To, PatFrag vinsert_insert,
521                       SDNodeXForm INSERT_get_vinsert_imm , list<Predicate> p> {
522  let Predicates = p in {
523    def : Pat<(vinsert_insert:$ins
524                     (To.VT To.RC:$src1), (From.VT From.RC:$src2), (iPTR imm)),
525              (To.VT (!cast<Instruction>(InstrStr#"rr")
526                     To.RC:$src1, From.RC:$src2,
527                     (INSERT_get_vinsert_imm To.RC:$ins)))>;
528
529    def : Pat<(vinsert_insert:$ins
530                  (To.VT To.RC:$src1),
531                  (From.VT (From.LdFrag addr:$src2)),
532                  (iPTR imm)),
533              (To.VT (!cast<Instruction>(InstrStr#"rm")
534                  To.RC:$src1, addr:$src2,
535                  (INSERT_get_vinsert_imm To.RC:$ins)))>;
536  }
537}
538
539multiclass vinsert_for_type<ValueType EltVT32, int Opcode128,
540                            ValueType EltVT64, int Opcode256,
541                            X86FoldableSchedWrite sched> {
542
543  let Predicates = [HasVLX] in
544    defm NAME # "32x4Z256" : vinsert_for_size<Opcode128,
545                                 X86VectorVTInfo< 4, EltVT32, VR128X>,
546                                 X86VectorVTInfo< 8, EltVT32, VR256X>,
547                                 vinsert128_insert, sched>, EVEX_V256;
548
549  defm NAME # "32x4Z" : vinsert_for_size<Opcode128,
550                                 X86VectorVTInfo< 4, EltVT32, VR128X>,
551                                 X86VectorVTInfo<16, EltVT32, VR512>,
552                                 vinsert128_insert, sched>, EVEX_V512;
553
554  defm NAME # "64x4Z" : vinsert_for_size<Opcode256,
555                                 X86VectorVTInfo< 4, EltVT64, VR256X>,
556                                 X86VectorVTInfo< 8, EltVT64, VR512>,
557                                 vinsert256_insert, sched>, VEX_W, EVEX_V512;
558
559  // Even with DQI we'd like to only use these instructions for masking.
560  let Predicates = [HasVLX, HasDQI] in
561    defm NAME # "64x2Z256" : vinsert_for_size_split<Opcode128,
562                                   X86VectorVTInfo< 2, EltVT64, VR128X>,
563                                   X86VectorVTInfo< 4, EltVT64, VR256X>,
564                                   null_frag, vinsert128_insert, sched>,
565                                   VEX_W1X, EVEX_V256;
566
567  // Even with DQI we'd like to only use these instructions for masking.
568  let Predicates = [HasDQI] in {
569    defm NAME # "64x2Z" : vinsert_for_size_split<Opcode128,
570                                 X86VectorVTInfo< 2, EltVT64, VR128X>,
571                                 X86VectorVTInfo< 8, EltVT64, VR512>,
572                                 null_frag, vinsert128_insert, sched>,
573                                 VEX_W, EVEX_V512;
574
575    defm NAME # "32x8Z" : vinsert_for_size_split<Opcode256,
576                                   X86VectorVTInfo< 8, EltVT32, VR256X>,
577                                   X86VectorVTInfo<16, EltVT32, VR512>,
578                                   null_frag, vinsert256_insert, sched>,
579                                   EVEX_V512;
580  }
581}
582
583// FIXME: Is there a better scheduler class for VINSERTF/VINSERTI?
584defm VINSERTF : vinsert_for_type<f32, 0x18, f64, 0x1a, WriteFShuffle256>;
585defm VINSERTI : vinsert_for_type<i32, 0x38, i64, 0x3a, WriteShuffle256>;
586
587// Codegen pattern with the alternative types,
588// Even with AVX512DQ we'll still use these for unmasked operations.
589defm : vinsert_for_size_lowering<"VINSERTF32x4Z256", v2f64x_info, v4f64x_info,
590              vinsert128_insert, INSERT_get_vinsert128_imm, [HasVLX]>;
591defm : vinsert_for_size_lowering<"VINSERTI32x4Z256", v2i64x_info, v4i64x_info,
592              vinsert128_insert, INSERT_get_vinsert128_imm, [HasVLX]>;
593
594defm : vinsert_for_size_lowering<"VINSERTF32x4Z", v2f64x_info, v8f64_info,
595              vinsert128_insert, INSERT_get_vinsert128_imm, [HasAVX512]>;
596defm : vinsert_for_size_lowering<"VINSERTI32x4Z", v2i64x_info, v8i64_info,
597              vinsert128_insert, INSERT_get_vinsert128_imm, [HasAVX512]>;
598
599defm : vinsert_for_size_lowering<"VINSERTF64x4Z", v8f32x_info, v16f32_info,
600              vinsert256_insert, INSERT_get_vinsert256_imm, [HasAVX512]>;
601defm : vinsert_for_size_lowering<"VINSERTI64x4Z", v8i32x_info, v16i32_info,
602              vinsert256_insert, INSERT_get_vinsert256_imm, [HasAVX512]>;
603
604// Codegen pattern with the alternative types insert VEC128 into VEC256
605defm : vinsert_for_size_lowering<"VINSERTI32x4Z256", v8i16x_info, v16i16x_info,
606              vinsert128_insert, INSERT_get_vinsert128_imm, [HasVLX]>;
607defm : vinsert_for_size_lowering<"VINSERTI32x4Z256", v16i8x_info, v32i8x_info,
608              vinsert128_insert, INSERT_get_vinsert128_imm, [HasVLX]>;
609// Codegen pattern with the alternative types insert VEC128 into VEC512
610defm : vinsert_for_size_lowering<"VINSERTI32x4Z", v8i16x_info, v32i16_info,
611              vinsert128_insert, INSERT_get_vinsert128_imm, [HasAVX512]>;
612defm : vinsert_for_size_lowering<"VINSERTI32x4Z", v16i8x_info, v64i8_info,
613               vinsert128_insert, INSERT_get_vinsert128_imm, [HasAVX512]>;
614// Codegen pattern with the alternative types insert VEC256 into VEC512
615defm : vinsert_for_size_lowering<"VINSERTI64x4Z", v16i16x_info, v32i16_info,
616              vinsert256_insert, INSERT_get_vinsert256_imm, [HasAVX512]>;
617defm : vinsert_for_size_lowering<"VINSERTI64x4Z", v32i8x_info, v64i8_info,
618              vinsert256_insert, INSERT_get_vinsert256_imm, [HasAVX512]>;
619
620
621multiclass vinsert_for_mask_cast<string InstrStr, X86VectorVTInfo From,
622                                 X86VectorVTInfo To, X86VectorVTInfo Cast,
623                                 PatFrag vinsert_insert,
624                                 SDNodeXForm INSERT_get_vinsert_imm,
625                                 list<Predicate> p> {
626let Predicates = p in {
627  def : Pat<(Cast.VT
628             (vselect Cast.KRCWM:$mask,
629                      (bitconvert
630                       (vinsert_insert:$ins (To.VT To.RC:$src1),
631                                            (From.VT From.RC:$src2),
632                                            (iPTR imm))),
633                      Cast.RC:$src0)),
634            (!cast<Instruction>(InstrStr#"rrk")
635             Cast.RC:$src0, Cast.KRCWM:$mask, To.RC:$src1, From.RC:$src2,
636             (INSERT_get_vinsert_imm To.RC:$ins))>;
637  def : Pat<(Cast.VT
638             (vselect Cast.KRCWM:$mask,
639                      (bitconvert
640                       (vinsert_insert:$ins (To.VT To.RC:$src1),
641                                            (From.VT
642                                             (bitconvert
643                                              (From.LdFrag addr:$src2))),
644                                            (iPTR imm))),
645                      Cast.RC:$src0)),
646            (!cast<Instruction>(InstrStr#"rmk")
647             Cast.RC:$src0, Cast.KRCWM:$mask, To.RC:$src1, addr:$src2,
648             (INSERT_get_vinsert_imm To.RC:$ins))>;
649
650  def : Pat<(Cast.VT
651             (vselect Cast.KRCWM:$mask,
652                      (bitconvert
653                       (vinsert_insert:$ins (To.VT To.RC:$src1),
654                                            (From.VT From.RC:$src2),
655                                            (iPTR imm))),
656                      Cast.ImmAllZerosV)),
657            (!cast<Instruction>(InstrStr#"rrkz")
658             Cast.KRCWM:$mask, To.RC:$src1, From.RC:$src2,
659             (INSERT_get_vinsert_imm To.RC:$ins))>;
660  def : Pat<(Cast.VT
661             (vselect Cast.KRCWM:$mask,
662                      (bitconvert
663                       (vinsert_insert:$ins (To.VT To.RC:$src1),
664                                            (From.VT (From.LdFrag addr:$src2)),
665                                            (iPTR imm))),
666                      Cast.ImmAllZerosV)),
667            (!cast<Instruction>(InstrStr#"rmkz")
668             Cast.KRCWM:$mask, To.RC:$src1, addr:$src2,
669             (INSERT_get_vinsert_imm To.RC:$ins))>;
670}
671}
672
673defm : vinsert_for_mask_cast<"VINSERTF32x4Z256", v2f64x_info, v4f64x_info,
674                             v8f32x_info, vinsert128_insert,
675                             INSERT_get_vinsert128_imm, [HasVLX]>;
676defm : vinsert_for_mask_cast<"VINSERTF64x2Z256", v4f32x_info, v8f32x_info,
677                             v4f64x_info, vinsert128_insert,
678                             INSERT_get_vinsert128_imm, [HasDQI, HasVLX]>;
679
680defm : vinsert_for_mask_cast<"VINSERTI32x4Z256", v2i64x_info, v4i64x_info,
681                             v8i32x_info, vinsert128_insert,
682                             INSERT_get_vinsert128_imm, [HasVLX]>;
683defm : vinsert_for_mask_cast<"VINSERTI32x4Z256", v8i16x_info, v16i16x_info,
684                             v8i32x_info, vinsert128_insert,
685                             INSERT_get_vinsert128_imm, [HasVLX]>;
686defm : vinsert_for_mask_cast<"VINSERTI32x4Z256", v16i8x_info, v32i8x_info,
687                             v8i32x_info, vinsert128_insert,
688                             INSERT_get_vinsert128_imm, [HasVLX]>;
689defm : vinsert_for_mask_cast<"VINSERTF64x2Z256", v4i32x_info, v8i32x_info,
690                             v4i64x_info, vinsert128_insert,
691                             INSERT_get_vinsert128_imm, [HasDQI, HasVLX]>;
692defm : vinsert_for_mask_cast<"VINSERTF64x2Z256", v8i16x_info, v16i16x_info,
693                             v4i64x_info, vinsert128_insert,
694                             INSERT_get_vinsert128_imm, [HasDQI, HasVLX]>;
695defm : vinsert_for_mask_cast<"VINSERTF64x2Z256", v16i8x_info, v32i8x_info,
696                             v4i64x_info, vinsert128_insert,
697                             INSERT_get_vinsert128_imm, [HasDQI, HasVLX]>;
698
699defm : vinsert_for_mask_cast<"VINSERTF32x4Z", v2f64x_info, v8f64_info,
700                             v16f32_info, vinsert128_insert,
701                             INSERT_get_vinsert128_imm, [HasAVX512]>;
702defm : vinsert_for_mask_cast<"VINSERTF64x2Z", v4f32x_info, v16f32_info,
703                             v8f64_info, vinsert128_insert,
704                             INSERT_get_vinsert128_imm, [HasDQI]>;
705
706defm : vinsert_for_mask_cast<"VINSERTI32x4Z", v2i64x_info, v8i64_info,
707                             v16i32_info, vinsert128_insert,
708                             INSERT_get_vinsert128_imm, [HasAVX512]>;
709defm : vinsert_for_mask_cast<"VINSERTI32x4Z", v8i16x_info, v32i16_info,
710                             v16i32_info, vinsert128_insert,
711                             INSERT_get_vinsert128_imm, [HasAVX512]>;
712defm : vinsert_for_mask_cast<"VINSERTI32x4Z", v16i8x_info, v64i8_info,
713                             v16i32_info, vinsert128_insert,
714                             INSERT_get_vinsert128_imm, [HasAVX512]>;
715defm : vinsert_for_mask_cast<"VINSERTI64x2Z", v4i32x_info, v16i32_info,
716                             v8i64_info, vinsert128_insert,
717                             INSERT_get_vinsert128_imm, [HasDQI]>;
718defm : vinsert_for_mask_cast<"VINSERTI64x2Z", v8i16x_info, v32i16_info,
719                             v8i64_info, vinsert128_insert,
720                             INSERT_get_vinsert128_imm, [HasDQI]>;
721defm : vinsert_for_mask_cast<"VINSERTI64x2Z", v16i8x_info, v64i8_info,
722                             v8i64_info, vinsert128_insert,
723                             INSERT_get_vinsert128_imm, [HasDQI]>;
724
725defm : vinsert_for_mask_cast<"VINSERTF32x8Z", v4f64x_info, v8f64_info,
726                             v16f32_info, vinsert256_insert,
727                             INSERT_get_vinsert256_imm, [HasDQI]>;
728defm : vinsert_for_mask_cast<"VINSERTF64x4Z", v8f32x_info, v16f32_info,
729                             v8f64_info, vinsert256_insert,
730                             INSERT_get_vinsert256_imm, [HasAVX512]>;
731
732defm : vinsert_for_mask_cast<"VINSERTI32x8Z", v4i64x_info, v8i64_info,
733                             v16i32_info, vinsert256_insert,
734                             INSERT_get_vinsert256_imm, [HasDQI]>;
735defm : vinsert_for_mask_cast<"VINSERTI32x8Z", v16i16x_info, v32i16_info,
736                             v16i32_info, vinsert256_insert,
737                             INSERT_get_vinsert256_imm, [HasDQI]>;
738defm : vinsert_for_mask_cast<"VINSERTI32x8Z", v32i8x_info, v64i8_info,
739                             v16i32_info, vinsert256_insert,
740                             INSERT_get_vinsert256_imm, [HasDQI]>;
741defm : vinsert_for_mask_cast<"VINSERTI64x4Z", v8i32x_info, v16i32_info,
742                             v8i64_info, vinsert256_insert,
743                             INSERT_get_vinsert256_imm, [HasAVX512]>;
744defm : vinsert_for_mask_cast<"VINSERTI64x4Z", v16i16x_info, v32i16_info,
745                             v8i64_info, vinsert256_insert,
746                             INSERT_get_vinsert256_imm, [HasAVX512]>;
747defm : vinsert_for_mask_cast<"VINSERTI64x4Z", v32i8x_info, v64i8_info,
748                             v8i64_info, vinsert256_insert,
749                             INSERT_get_vinsert256_imm, [HasAVX512]>;
750
751// vinsertps - insert f32 to XMM
752let ExeDomain = SSEPackedSingle in {
753let isCommutable = 1 in
754def VINSERTPSZrr : AVX512AIi8<0x21, MRMSrcReg, (outs VR128X:$dst),
755      (ins VR128X:$src1, VR128X:$src2, u8imm:$src3),
756      "vinsertps\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}",
757      [(set VR128X:$dst, (X86insertps VR128X:$src1, VR128X:$src2, timm:$src3))]>,
758      EVEX_4V, Sched<[SchedWriteFShuffle.XMM]>;
759def VINSERTPSZrm: AVX512AIi8<0x21, MRMSrcMem, (outs VR128X:$dst),
760      (ins VR128X:$src1, f32mem:$src2, u8imm:$src3),
761      "vinsertps\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}",
762      [(set VR128X:$dst, (X86insertps VR128X:$src1,
763                          (v4f32 (scalar_to_vector (loadf32 addr:$src2))),
764                          timm:$src3))]>,
765      EVEX_4V, EVEX_CD8<32, CD8VT1>,
766      Sched<[SchedWriteFShuffle.XMM.Folded, SchedWriteFShuffle.XMM.ReadAfterFold]>;
767}
768
769//===----------------------------------------------------------------------===//
770// AVX-512 VECTOR EXTRACT
771//---
772
773// Supports two different pattern operators for mask and unmasked ops. Allows
774// null_frag to be passed for one.
775multiclass vextract_for_size_split<int Opcode,
776                                   X86VectorVTInfo From, X86VectorVTInfo To,
777                                   SDPatternOperator vextract_extract,
778                                   SDPatternOperator vextract_for_mask,
779                                   SchedWrite SchedRR, SchedWrite SchedMR> {
780
781  let hasSideEffects = 0, ExeDomain = To.ExeDomain in {
782    defm rr : AVX512_maskable_split<Opcode, MRMDestReg, To, (outs To.RC:$dst),
783                (ins From.RC:$src1, u8imm:$idx),
784                "vextract" # To.EltTypeName # "x" # To.NumElts,
785                "$idx, $src1", "$src1, $idx",
786                (vextract_extract:$idx (From.VT From.RC:$src1), (iPTR imm)),
787                (vextract_for_mask:$idx (From.VT From.RC:$src1), (iPTR imm))>,
788                AVX512AIi8Base, EVEX, Sched<[SchedRR]>;
789
790    def mr  : AVX512AIi8<Opcode, MRMDestMem, (outs),
791                    (ins To.MemOp:$dst, From.RC:$src1, u8imm:$idx),
792                    "vextract" # To.EltTypeName # "x" # To.NumElts #
793                        "\t{$idx, $src1, $dst|$dst, $src1, $idx}",
794                    [(store (To.VT (vextract_extract:$idx
795                                    (From.VT From.RC:$src1), (iPTR imm))),
796                             addr:$dst)]>, EVEX,
797                    Sched<[SchedMR]>;
798
799    let mayStore = 1, hasSideEffects = 0 in
800    def mrk : AVX512AIi8<Opcode, MRMDestMem, (outs),
801                    (ins To.MemOp:$dst, To.KRCWM:$mask,
802                                        From.RC:$src1, u8imm:$idx),
803                     "vextract" # To.EltTypeName # "x" # To.NumElts #
804                          "\t{$idx, $src1, $dst {${mask}}|"
805                          "$dst {${mask}}, $src1, $idx}", []>,
806                    EVEX_K, EVEX, Sched<[SchedMR]>, NotMemoryFoldable;
807  }
808}
809
810// Passes the same pattern operator for masked and unmasked ops.
811multiclass vextract_for_size<int Opcode, X86VectorVTInfo From,
812                             X86VectorVTInfo To,
813                             SDPatternOperator vextract_extract,
814                             SchedWrite SchedRR, SchedWrite SchedMR> :
815  vextract_for_size_split<Opcode, From, To, vextract_extract, vextract_extract, SchedRR, SchedMR>;
816
817// Codegen pattern for the alternative types
818multiclass vextract_for_size_lowering<string InstrStr, X86VectorVTInfo From,
819                X86VectorVTInfo To, PatFrag vextract_extract,
820                SDNodeXForm EXTRACT_get_vextract_imm, list<Predicate> p> {
821  let Predicates = p in {
822     def : Pat<(vextract_extract:$ext (From.VT From.RC:$src1), (iPTR imm)),
823               (To.VT (!cast<Instruction>(InstrStr#"rr")
824                          From.RC:$src1,
825                          (EXTRACT_get_vextract_imm To.RC:$ext)))>;
826     def : Pat<(store (To.VT (vextract_extract:$ext (From.VT From.RC:$src1),
827                              (iPTR imm))), addr:$dst),
828               (!cast<Instruction>(InstrStr#"mr") addr:$dst, From.RC:$src1,
829                (EXTRACT_get_vextract_imm To.RC:$ext))>;
830  }
831}
832
833multiclass vextract_for_type<ValueType EltVT32, int Opcode128,
834                             ValueType EltVT64, int Opcode256,
835                             SchedWrite SchedRR, SchedWrite SchedMR> {
836  let Predicates = [HasAVX512] in {
837    defm NAME # "32x4Z" : vextract_for_size<Opcode128,
838                                   X86VectorVTInfo<16, EltVT32, VR512>,
839                                   X86VectorVTInfo< 4, EltVT32, VR128X>,
840                                   vextract128_extract, SchedRR, SchedMR>,
841                                       EVEX_V512, EVEX_CD8<32, CD8VT4>;
842    defm NAME # "64x4Z" : vextract_for_size<Opcode256,
843                                   X86VectorVTInfo< 8, EltVT64, VR512>,
844                                   X86VectorVTInfo< 4, EltVT64, VR256X>,
845                                   vextract256_extract, SchedRR, SchedMR>,
846                                       VEX_W, EVEX_V512, EVEX_CD8<64, CD8VT4>;
847  }
848  let Predicates = [HasVLX] in
849    defm NAME # "32x4Z256" : vextract_for_size<Opcode128,
850                                 X86VectorVTInfo< 8, EltVT32, VR256X>,
851                                 X86VectorVTInfo< 4, EltVT32, VR128X>,
852                                 vextract128_extract, SchedRR, SchedMR>,
853                                     EVEX_V256, EVEX_CD8<32, CD8VT4>;
854
855  // Even with DQI we'd like to only use these instructions for masking.
856  let Predicates = [HasVLX, HasDQI] in
857    defm NAME # "64x2Z256" : vextract_for_size_split<Opcode128,
858                                 X86VectorVTInfo< 4, EltVT64, VR256X>,
859                                 X86VectorVTInfo< 2, EltVT64, VR128X>,
860                                 null_frag, vextract128_extract, SchedRR, SchedMR>,
861                                     VEX_W1X, EVEX_V256, EVEX_CD8<64, CD8VT2>;
862
863  // Even with DQI we'd like to only use these instructions for masking.
864  let Predicates = [HasDQI] in {
865    defm NAME # "64x2Z" : vextract_for_size_split<Opcode128,
866                                 X86VectorVTInfo< 8, EltVT64, VR512>,
867                                 X86VectorVTInfo< 2, EltVT64, VR128X>,
868                                 null_frag, vextract128_extract, SchedRR, SchedMR>,
869                                     VEX_W, EVEX_V512, EVEX_CD8<64, CD8VT2>;
870    defm NAME # "32x8Z" : vextract_for_size_split<Opcode256,
871                                 X86VectorVTInfo<16, EltVT32, VR512>,
872                                 X86VectorVTInfo< 8, EltVT32, VR256X>,
873                                 null_frag, vextract256_extract, SchedRR, SchedMR>,
874                                     EVEX_V512, EVEX_CD8<32, CD8VT8>;
875  }
876}
877
878// TODO - replace WriteFStore/WriteVecStore with X86SchedWriteMoveLSWidths types.
879defm VEXTRACTF : vextract_for_type<f32, 0x19, f64, 0x1b, WriteFShuffle256, WriteFStore>;
880defm VEXTRACTI : vextract_for_type<i32, 0x39, i64, 0x3b, WriteShuffle256, WriteVecStore>;
881
882// extract_subvector codegen patterns with the alternative types.
883// Even with AVX512DQ we'll still use these for unmasked operations.
884defm : vextract_for_size_lowering<"VEXTRACTF32x4Z", v8f64_info, v2f64x_info,
885          vextract128_extract, EXTRACT_get_vextract128_imm, [HasAVX512]>;
886defm : vextract_for_size_lowering<"VEXTRACTI32x4Z", v8i64_info, v2i64x_info,
887          vextract128_extract, EXTRACT_get_vextract128_imm, [HasAVX512]>;
888
889defm : vextract_for_size_lowering<"VEXTRACTF64x4Z", v16f32_info, v8f32x_info,
890          vextract256_extract, EXTRACT_get_vextract256_imm, [HasAVX512]>;
891defm : vextract_for_size_lowering<"VEXTRACTI64x4Z", v16i32_info, v8i32x_info,
892          vextract256_extract, EXTRACT_get_vextract256_imm, [HasAVX512]>;
893
894defm : vextract_for_size_lowering<"VEXTRACTF32x4Z256", v4f64x_info, v2f64x_info,
895          vextract128_extract, EXTRACT_get_vextract128_imm, [HasVLX]>;
896defm : vextract_for_size_lowering<"VEXTRACTI32x4Z256", v4i64x_info, v2i64x_info,
897          vextract128_extract, EXTRACT_get_vextract128_imm, [HasVLX]>;
898
899// Codegen pattern with the alternative types extract VEC128 from VEC256
900defm : vextract_for_size_lowering<"VEXTRACTI32x4Z256", v16i16x_info, v8i16x_info,
901          vextract128_extract, EXTRACT_get_vextract128_imm, [HasVLX]>;
902defm : vextract_for_size_lowering<"VEXTRACTI32x4Z256", v32i8x_info, v16i8x_info,
903          vextract128_extract, EXTRACT_get_vextract128_imm, [HasVLX]>;
904
905// Codegen pattern with the alternative types extract VEC128 from VEC512
906defm : vextract_for_size_lowering<"VEXTRACTI32x4Z", v32i16_info, v8i16x_info,
907                 vextract128_extract, EXTRACT_get_vextract128_imm, [HasAVX512]>;
908defm : vextract_for_size_lowering<"VEXTRACTI32x4Z", v64i8_info, v16i8x_info,
909                 vextract128_extract, EXTRACT_get_vextract128_imm, [HasAVX512]>;
910// Codegen pattern with the alternative types extract VEC256 from VEC512
911defm : vextract_for_size_lowering<"VEXTRACTI64x4Z", v32i16_info, v16i16x_info,
912                 vextract256_extract, EXTRACT_get_vextract256_imm, [HasAVX512]>;
913defm : vextract_for_size_lowering<"VEXTRACTI64x4Z", v64i8_info, v32i8x_info,
914                 vextract256_extract, EXTRACT_get_vextract256_imm, [HasAVX512]>;
915
916
917// A 128-bit extract from bits [255:128] of a 512-bit vector should use a
918// smaller extract to enable EVEX->VEX.
919let Predicates = [NoVLX] in {
920def : Pat<(v2i64 (extract_subvector (v8i64 VR512:$src), (iPTR 2))),
921          (v2i64 (VEXTRACTI128rr
922                  (v4i64 (EXTRACT_SUBREG (v8i64 VR512:$src), sub_ymm)),
923                  (iPTR 1)))>;
924def : Pat<(v2f64 (extract_subvector (v8f64 VR512:$src), (iPTR 2))),
925          (v2f64 (VEXTRACTF128rr
926                  (v4f64 (EXTRACT_SUBREG (v8f64 VR512:$src), sub_ymm)),
927                  (iPTR 1)))>;
928def : Pat<(v4i32 (extract_subvector (v16i32 VR512:$src), (iPTR 4))),
929          (v4i32 (VEXTRACTI128rr
930                  (v8i32 (EXTRACT_SUBREG (v16i32 VR512:$src), sub_ymm)),
931                  (iPTR 1)))>;
932def : Pat<(v4f32 (extract_subvector (v16f32 VR512:$src), (iPTR 4))),
933          (v4f32 (VEXTRACTF128rr
934                  (v8f32 (EXTRACT_SUBREG (v16f32 VR512:$src), sub_ymm)),
935                  (iPTR 1)))>;
936def : Pat<(v8i16 (extract_subvector (v32i16 VR512:$src), (iPTR 8))),
937          (v8i16 (VEXTRACTI128rr
938                  (v16i16 (EXTRACT_SUBREG (v32i16 VR512:$src), sub_ymm)),
939                  (iPTR 1)))>;
940def : Pat<(v16i8 (extract_subvector (v64i8 VR512:$src), (iPTR 16))),
941          (v16i8 (VEXTRACTI128rr
942                  (v32i8 (EXTRACT_SUBREG (v64i8 VR512:$src), sub_ymm)),
943                  (iPTR 1)))>;
944}
945
946// A 128-bit extract from bits [255:128] of a 512-bit vector should use a
947// smaller extract to enable EVEX->VEX.
948let Predicates = [HasVLX] in {
949def : Pat<(v2i64 (extract_subvector (v8i64 VR512:$src), (iPTR 2))),
950          (v2i64 (VEXTRACTI32x4Z256rr
951                  (v4i64 (EXTRACT_SUBREG (v8i64 VR512:$src), sub_ymm)),
952                  (iPTR 1)))>;
953def : Pat<(v2f64 (extract_subvector (v8f64 VR512:$src), (iPTR 2))),
954          (v2f64 (VEXTRACTF32x4Z256rr
955                  (v4f64 (EXTRACT_SUBREG (v8f64 VR512:$src), sub_ymm)),
956                  (iPTR 1)))>;
957def : Pat<(v4i32 (extract_subvector (v16i32 VR512:$src), (iPTR 4))),
958          (v4i32 (VEXTRACTI32x4Z256rr
959                  (v8i32 (EXTRACT_SUBREG (v16i32 VR512:$src), sub_ymm)),
960                  (iPTR 1)))>;
961def : Pat<(v4f32 (extract_subvector (v16f32 VR512:$src), (iPTR 4))),
962          (v4f32 (VEXTRACTF32x4Z256rr
963                  (v8f32 (EXTRACT_SUBREG (v16f32 VR512:$src), sub_ymm)),
964                  (iPTR 1)))>;
965def : Pat<(v8i16 (extract_subvector (v32i16 VR512:$src), (iPTR 8))),
966          (v8i16 (VEXTRACTI32x4Z256rr
967                  (v16i16 (EXTRACT_SUBREG (v32i16 VR512:$src), sub_ymm)),
968                  (iPTR 1)))>;
969def : Pat<(v16i8 (extract_subvector (v64i8 VR512:$src), (iPTR 16))),
970          (v16i8 (VEXTRACTI32x4Z256rr
971                  (v32i8 (EXTRACT_SUBREG (v64i8 VR512:$src), sub_ymm)),
972                  (iPTR 1)))>;
973}
974
975
976// Additional patterns for handling a bitcast between the vselect and the
977// extract_subvector.
978multiclass vextract_for_mask_cast<string InstrStr, X86VectorVTInfo From,
979                                  X86VectorVTInfo To, X86VectorVTInfo Cast,
980                                  PatFrag vextract_extract,
981                                  SDNodeXForm EXTRACT_get_vextract_imm,
982                                  list<Predicate> p> {
983let Predicates = p in {
984  def : Pat<(Cast.VT (vselect Cast.KRCWM:$mask,
985                              (bitconvert
986                               (To.VT (vextract_extract:$ext
987                                       (From.VT From.RC:$src), (iPTR imm)))),
988                              To.RC:$src0)),
989            (Cast.VT (!cast<Instruction>(InstrStr#"rrk")
990                      Cast.RC:$src0, Cast.KRCWM:$mask, From.RC:$src,
991                      (EXTRACT_get_vextract_imm To.RC:$ext)))>;
992
993  def : Pat<(Cast.VT (vselect Cast.KRCWM:$mask,
994                              (bitconvert
995                               (To.VT (vextract_extract:$ext
996                                       (From.VT From.RC:$src), (iPTR imm)))),
997                              Cast.ImmAllZerosV)),
998            (Cast.VT (!cast<Instruction>(InstrStr#"rrkz")
999                      Cast.KRCWM:$mask, From.RC:$src,
1000                      (EXTRACT_get_vextract_imm To.RC:$ext)))>;
1001}
1002}
1003
1004defm : vextract_for_mask_cast<"VEXTRACTF32x4Z256", v4f64x_info, v2f64x_info,
1005                              v4f32x_info, vextract128_extract,
1006                              EXTRACT_get_vextract128_imm, [HasVLX]>;
1007defm : vextract_for_mask_cast<"VEXTRACTF64x2Z256", v8f32x_info, v4f32x_info,
1008                              v2f64x_info, vextract128_extract,
1009                              EXTRACT_get_vextract128_imm, [HasDQI, HasVLX]>;
1010
1011defm : vextract_for_mask_cast<"VEXTRACTI32x4Z256", v4i64x_info, v2i64x_info,
1012                              v4i32x_info, vextract128_extract,
1013                              EXTRACT_get_vextract128_imm, [HasVLX]>;
1014defm : vextract_for_mask_cast<"VEXTRACTI32x4Z256", v16i16x_info, v8i16x_info,
1015                              v4i32x_info, vextract128_extract,
1016                              EXTRACT_get_vextract128_imm, [HasVLX]>;
1017defm : vextract_for_mask_cast<"VEXTRACTI32x4Z256", v32i8x_info, v16i8x_info,
1018                              v4i32x_info, vextract128_extract,
1019                              EXTRACT_get_vextract128_imm, [HasVLX]>;
1020defm : vextract_for_mask_cast<"VEXTRACTI64x2Z256", v8i32x_info, v4i32x_info,
1021                              v2i64x_info, vextract128_extract,
1022                              EXTRACT_get_vextract128_imm, [HasDQI, HasVLX]>;
1023defm : vextract_for_mask_cast<"VEXTRACTI64x2Z256", v16i16x_info, v8i16x_info,
1024                              v2i64x_info, vextract128_extract,
1025                              EXTRACT_get_vextract128_imm, [HasDQI, HasVLX]>;
1026defm : vextract_for_mask_cast<"VEXTRACTI64x2Z256", v32i8x_info, v16i8x_info,
1027                              v2i64x_info, vextract128_extract,
1028                              EXTRACT_get_vextract128_imm, [HasDQI, HasVLX]>;
1029
1030defm : vextract_for_mask_cast<"VEXTRACTF32x4Z", v8f64_info, v2f64x_info,
1031                              v4f32x_info, vextract128_extract,
1032                              EXTRACT_get_vextract128_imm, [HasAVX512]>;
1033defm : vextract_for_mask_cast<"VEXTRACTF64x2Z", v16f32_info, v4f32x_info,
1034                              v2f64x_info, vextract128_extract,
1035                              EXTRACT_get_vextract128_imm, [HasDQI]>;
1036
1037defm : vextract_for_mask_cast<"VEXTRACTI32x4Z", v8i64_info, v2i64x_info,
1038                              v4i32x_info, vextract128_extract,
1039                              EXTRACT_get_vextract128_imm, [HasAVX512]>;
1040defm : vextract_for_mask_cast<"VEXTRACTI32x4Z", v32i16_info, v8i16x_info,
1041                              v4i32x_info, vextract128_extract,
1042                              EXTRACT_get_vextract128_imm, [HasAVX512]>;
1043defm : vextract_for_mask_cast<"VEXTRACTI32x4Z", v64i8_info, v16i8x_info,
1044                              v4i32x_info, vextract128_extract,
1045                              EXTRACT_get_vextract128_imm, [HasAVX512]>;
1046defm : vextract_for_mask_cast<"VEXTRACTI64x2Z", v16i32_info, v4i32x_info,
1047                              v2i64x_info, vextract128_extract,
1048                              EXTRACT_get_vextract128_imm, [HasDQI]>;
1049defm : vextract_for_mask_cast<"VEXTRACTI64x2Z", v32i16_info, v8i16x_info,
1050                              v2i64x_info, vextract128_extract,
1051                              EXTRACT_get_vextract128_imm, [HasDQI]>;
1052defm : vextract_for_mask_cast<"VEXTRACTI64x2Z", v64i8_info, v16i8x_info,
1053                              v2i64x_info, vextract128_extract,
1054                              EXTRACT_get_vextract128_imm, [HasDQI]>;
1055
1056defm : vextract_for_mask_cast<"VEXTRACTF32x8Z", v8f64_info, v4f64x_info,
1057                              v8f32x_info, vextract256_extract,
1058                              EXTRACT_get_vextract256_imm, [HasDQI]>;
1059defm : vextract_for_mask_cast<"VEXTRACTF64x4Z", v16f32_info, v8f32x_info,
1060                              v4f64x_info, vextract256_extract,
1061                              EXTRACT_get_vextract256_imm, [HasAVX512]>;
1062
1063defm : vextract_for_mask_cast<"VEXTRACTI32x8Z", v8i64_info, v4i64x_info,
1064                              v8i32x_info, vextract256_extract,
1065                              EXTRACT_get_vextract256_imm, [HasDQI]>;
1066defm : vextract_for_mask_cast<"VEXTRACTI32x8Z", v32i16_info, v16i16x_info,
1067                              v8i32x_info, vextract256_extract,
1068                              EXTRACT_get_vextract256_imm, [HasDQI]>;
1069defm : vextract_for_mask_cast<"VEXTRACTI32x8Z", v64i8_info, v32i8x_info,
1070                              v8i32x_info, vextract256_extract,
1071                              EXTRACT_get_vextract256_imm, [HasDQI]>;
1072defm : vextract_for_mask_cast<"VEXTRACTI64x4Z", v16i32_info, v8i32x_info,
1073                              v4i64x_info, vextract256_extract,
1074                              EXTRACT_get_vextract256_imm, [HasAVX512]>;
1075defm : vextract_for_mask_cast<"VEXTRACTI64x4Z", v32i16_info, v16i16x_info,
1076                              v4i64x_info, vextract256_extract,
1077                              EXTRACT_get_vextract256_imm, [HasAVX512]>;
1078defm : vextract_for_mask_cast<"VEXTRACTI64x4Z", v64i8_info, v32i8x_info,
1079                              v4i64x_info, vextract256_extract,
1080                              EXTRACT_get_vextract256_imm, [HasAVX512]>;
1081
1082// vextractps - extract 32 bits from XMM
1083def VEXTRACTPSZrr : AVX512AIi8<0x17, MRMDestReg, (outs GR32:$dst),
1084      (ins VR128X:$src1, u8imm:$src2),
1085      "vextractps\t{$src2, $src1, $dst|$dst, $src1, $src2}",
1086      [(set GR32:$dst, (extractelt (bc_v4i32 (v4f32 VR128X:$src1)), imm:$src2))]>,
1087      EVEX, VEX_WIG, Sched<[WriteVecExtract]>;
1088
1089def VEXTRACTPSZmr : AVX512AIi8<0x17, MRMDestMem, (outs),
1090      (ins f32mem:$dst, VR128X:$src1, u8imm:$src2),
1091      "vextractps\t{$src2, $src1, $dst|$dst, $src1, $src2}",
1092      [(store (extractelt (bc_v4i32 (v4f32 VR128X:$src1)), imm:$src2),
1093                          addr:$dst)]>,
1094      EVEX, VEX_WIG, EVEX_CD8<32, CD8VT1>, Sched<[WriteVecExtractSt]>;
1095
1096//===---------------------------------------------------------------------===//
1097// AVX-512 BROADCAST
1098//---
1099// broadcast with a scalar argument.
1100multiclass avx512_broadcast_scalar<bits<8> opc, string OpcodeStr,
1101                            string Name,
1102                            X86VectorVTInfo DestInfo, X86VectorVTInfo SrcInfo> {
1103  def : Pat<(DestInfo.VT (X86VBroadcast SrcInfo.FRC:$src)),
1104            (!cast<Instruction>(Name#DestInfo.ZSuffix#r)
1105             (SrcInfo.VT (COPY_TO_REGCLASS SrcInfo.FRC:$src, SrcInfo.RC)))>;
1106  def : Pat<(DestInfo.VT (vselect DestInfo.KRCWM:$mask,
1107                                  (X86VBroadcast SrcInfo.FRC:$src),
1108                                  DestInfo.RC:$src0)),
1109            (!cast<Instruction>(Name#DestInfo.ZSuffix#rk)
1110             DestInfo.RC:$src0, DestInfo.KRCWM:$mask,
1111             (SrcInfo.VT (COPY_TO_REGCLASS SrcInfo.FRC:$src, SrcInfo.RC)))>;
1112  def : Pat<(DestInfo.VT (vselect DestInfo.KRCWM:$mask,
1113                                  (X86VBroadcast SrcInfo.FRC:$src),
1114                                  DestInfo.ImmAllZerosV)),
1115            (!cast<Instruction>(Name#DestInfo.ZSuffix#rkz)
1116             DestInfo.KRCWM:$mask, (SrcInfo.VT (COPY_TO_REGCLASS SrcInfo.FRC:$src, SrcInfo.RC)))>;
1117}
1118
1119// Split version to allow mask and broadcast node to be different types. This
1120// helps support the 32x2 broadcasts.
1121multiclass avx512_broadcast_rm_split<bits<8> opc, string OpcodeStr,
1122                                     string Name,
1123                                     SchedWrite SchedRR, SchedWrite SchedRM,
1124                                     X86VectorVTInfo MaskInfo,
1125                                     X86VectorVTInfo DestInfo,
1126                                     X86VectorVTInfo SrcInfo,
1127                                     bit IsConvertibleToThreeAddress,
1128                                     SDPatternOperator UnmaskedOp = X86VBroadcast,
1129                                     SDPatternOperator UnmaskedBcastOp = SrcInfo.BroadcastLdFrag> {
1130  let hasSideEffects = 0 in
1131  def r : AVX512PI<opc, MRMSrcReg, (outs MaskInfo.RC:$dst), (ins SrcInfo.RC:$src),
1132                   !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
1133                   [(set MaskInfo.RC:$dst,
1134                     (MaskInfo.VT
1135                      (bitconvert
1136                       (DestInfo.VT
1137                        (UnmaskedOp (SrcInfo.VT SrcInfo.RC:$src))))))],
1138                   DestInfo.ExeDomain>, T8PD, EVEX, Sched<[SchedRR]>;
1139  def rkz : AVX512PI<opc, MRMSrcReg, (outs MaskInfo.RC:$dst),
1140                     (ins MaskInfo.KRCWM:$mask, SrcInfo.RC:$src),
1141                     !strconcat(OpcodeStr, "\t{$src, ${dst} {${mask}} {z}|",
1142                      "${dst} {${mask}} {z}, $src}"),
1143                      [(set MaskInfo.RC:$dst,
1144                        (vselect MaskInfo.KRCWM:$mask,
1145                         (MaskInfo.VT
1146                          (bitconvert
1147                           (DestInfo.VT
1148                            (X86VBroadcast (SrcInfo.VT SrcInfo.RC:$src))))),
1149                         MaskInfo.ImmAllZerosV))],
1150                      DestInfo.ExeDomain>, T8PD, EVEX, EVEX_KZ, Sched<[SchedRR]>;
1151  let Constraints = "$src0 = $dst" in
1152  def rk : AVX512PI<opc, MRMSrcReg, (outs MaskInfo.RC:$dst),
1153                    (ins MaskInfo.RC:$src0, MaskInfo.KRCWM:$mask,
1154                         SrcInfo.RC:$src),
1155                    !strconcat(OpcodeStr, "\t{$src, ${dst} {${mask}}|",
1156                    "${dst} {${mask}}, $src}"),
1157                    [(set MaskInfo.RC:$dst,
1158                      (vselect MaskInfo.KRCWM:$mask,
1159                       (MaskInfo.VT
1160                        (bitconvert
1161                         (DestInfo.VT
1162                          (X86VBroadcast (SrcInfo.VT SrcInfo.RC:$src))))),
1163                       MaskInfo.RC:$src0))],
1164                     DestInfo.ExeDomain>, T8PD, EVEX, EVEX_K, Sched<[SchedRR]>;
1165
1166  let hasSideEffects = 0, mayLoad = 1 in
1167  def m : AVX512PI<opc, MRMSrcMem, (outs MaskInfo.RC:$dst),
1168                   (ins SrcInfo.ScalarMemOp:$src),
1169                   !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
1170                   [(set MaskInfo.RC:$dst,
1171                     (MaskInfo.VT
1172                      (bitconvert
1173                       (DestInfo.VT
1174                        (UnmaskedBcastOp addr:$src)))))],
1175                   DestInfo.ExeDomain>, T8PD, EVEX,
1176                   EVEX_CD8<SrcInfo.EltSize, CD8VT1>, Sched<[SchedRM]>;
1177
1178  def mkz : AVX512PI<opc, MRMSrcMem, (outs MaskInfo.RC:$dst),
1179                     (ins MaskInfo.KRCWM:$mask, SrcInfo.ScalarMemOp:$src),
1180                     !strconcat(OpcodeStr, "\t{$src, ${dst} {${mask}} {z}|",
1181                      "${dst} {${mask}} {z}, $src}"),
1182                      [(set MaskInfo.RC:$dst,
1183                        (vselect MaskInfo.KRCWM:$mask,
1184                         (MaskInfo.VT
1185                          (bitconvert
1186                           (DestInfo.VT
1187                            (SrcInfo.BroadcastLdFrag addr:$src)))),
1188                         MaskInfo.ImmAllZerosV))],
1189                      DestInfo.ExeDomain>, T8PD, EVEX, EVEX_KZ,
1190                      EVEX_CD8<SrcInfo.EltSize, CD8VT1>, Sched<[SchedRM]>;
1191
1192  let Constraints = "$src0 = $dst",
1193      isConvertibleToThreeAddress = IsConvertibleToThreeAddress in
1194  def mk : AVX512PI<opc, MRMSrcMem, (outs MaskInfo.RC:$dst),
1195                    (ins MaskInfo.RC:$src0, MaskInfo.KRCWM:$mask,
1196                         SrcInfo.ScalarMemOp:$src),
1197                    !strconcat(OpcodeStr, "\t{$src, ${dst} {${mask}}|",
1198                    "${dst} {${mask}}, $src}"),
1199                    [(set MaskInfo.RC:$dst,
1200                      (vselect MaskInfo.KRCWM:$mask,
1201                       (MaskInfo.VT
1202                        (bitconvert
1203                         (DestInfo.VT
1204                          (SrcInfo.BroadcastLdFrag addr:$src)))),
1205                       MaskInfo.RC:$src0))],
1206                     DestInfo.ExeDomain>, T8PD, EVEX, EVEX_K,
1207                     EVEX_CD8<SrcInfo.EltSize, CD8VT1>, Sched<[SchedRM]>;
1208}
1209
1210// Helper class to force mask and broadcast result to same type.
1211multiclass avx512_broadcast_rm<bits<8> opc, string OpcodeStr, string Name,
1212                               SchedWrite SchedRR, SchedWrite SchedRM,
1213                               X86VectorVTInfo DestInfo,
1214                               X86VectorVTInfo SrcInfo,
1215                               bit IsConvertibleToThreeAddress> :
1216  avx512_broadcast_rm_split<opc, OpcodeStr, Name, SchedRR, SchedRM,
1217                            DestInfo, DestInfo, SrcInfo,
1218                            IsConvertibleToThreeAddress>;
1219
1220multiclass avx512_fp_broadcast_sd<bits<8> opc, string OpcodeStr,
1221                                                       AVX512VLVectorVTInfo _> {
1222  let Predicates = [HasAVX512] in {
1223    defm Z  : avx512_broadcast_rm<opc, OpcodeStr, NAME, WriteFShuffle256,
1224                                  WriteFShuffle256Ld, _.info512, _.info128, 1>,
1225              avx512_broadcast_scalar<opc, OpcodeStr, NAME, _.info512,
1226                                      _.info128>,
1227              EVEX_V512;
1228  }
1229
1230  let Predicates = [HasVLX] in {
1231    defm Z256  : avx512_broadcast_rm<opc, OpcodeStr, NAME, WriteFShuffle256,
1232                                     WriteFShuffle256Ld, _.info256, _.info128, 1>,
1233                 avx512_broadcast_scalar<opc, OpcodeStr, NAME, _.info256,
1234                                         _.info128>,
1235                 EVEX_V256;
1236  }
1237}
1238
1239multiclass avx512_fp_broadcast_ss<bits<8> opc, string OpcodeStr,
1240                                                       AVX512VLVectorVTInfo _> {
1241  let Predicates = [HasAVX512] in {
1242    defm Z  : avx512_broadcast_rm<opc, OpcodeStr, NAME, WriteFShuffle256,
1243                                  WriteFShuffle256Ld, _.info512, _.info128, 1>,
1244              avx512_broadcast_scalar<opc, OpcodeStr, NAME, _.info512,
1245                                      _.info128>,
1246              EVEX_V512;
1247  }
1248
1249  let Predicates = [HasVLX] in {
1250    defm Z256  : avx512_broadcast_rm<opc, OpcodeStr, NAME, WriteFShuffle256,
1251                                     WriteFShuffle256Ld, _.info256, _.info128, 1>,
1252                 avx512_broadcast_scalar<opc, OpcodeStr, NAME, _.info256,
1253                                         _.info128>,
1254                 EVEX_V256;
1255    defm Z128  : avx512_broadcast_rm<opc, OpcodeStr, NAME, WriteFShuffle256,
1256                                     WriteFShuffle256Ld, _.info128, _.info128, 1>,
1257                 avx512_broadcast_scalar<opc, OpcodeStr, NAME, _.info128,
1258                                         _.info128>,
1259                 EVEX_V128;
1260  }
1261}
1262defm VBROADCASTSS  : avx512_fp_broadcast_ss<0x18, "vbroadcastss",
1263                                       avx512vl_f32_info>;
1264defm VBROADCASTSD  : avx512_fp_broadcast_sd<0x19, "vbroadcastsd",
1265                                       avx512vl_f64_info>, VEX_W1X;
1266
1267multiclass avx512_int_broadcast_reg<bits<8> opc, SchedWrite SchedRR,
1268                                    X86VectorVTInfo _, SDPatternOperator OpNode,
1269                                    RegisterClass SrcRC> {
1270  let ExeDomain = _.ExeDomain in
1271  defm r : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
1272                         (ins SrcRC:$src),
1273                         "vpbroadcast"##_.Suffix, "$src", "$src",
1274                         (_.VT (OpNode SrcRC:$src))>, T8PD, EVEX,
1275                         Sched<[SchedRR]>;
1276}
1277
1278multiclass avx512_int_broadcastbw_reg<bits<8> opc, string Name, SchedWrite SchedRR,
1279                                    X86VectorVTInfo _, SDPatternOperator OpNode,
1280                                    RegisterClass SrcRC, SubRegIndex Subreg> {
1281  let hasSideEffects = 0, ExeDomain = _.ExeDomain in
1282  defm r : AVX512_maskable_custom<opc, MRMSrcReg,
1283                        (outs _.RC:$dst), (ins GR32:$src),
1284                        !con((ins _.RC:$src0, _.KRCWM:$mask), (ins GR32:$src)),
1285                        !con((ins _.KRCWM:$mask), (ins GR32:$src)),
1286                        "vpbroadcast"##_.Suffix, "$src", "$src", [], [], [],
1287                        "$src0 = $dst">, T8PD, EVEX, Sched<[SchedRR]>;
1288
1289  def : Pat <(_.VT (OpNode SrcRC:$src)),
1290             (!cast<Instruction>(Name#r)
1291              (i32 (INSERT_SUBREG (i32 (IMPLICIT_DEF)), SrcRC:$src, Subreg)))>;
1292
1293  def : Pat <(vselect _.KRCWM:$mask, (_.VT (OpNode SrcRC:$src)), _.RC:$src0),
1294             (!cast<Instruction>(Name#rk) _.RC:$src0, _.KRCWM:$mask,
1295              (i32 (INSERT_SUBREG (i32 (IMPLICIT_DEF)), SrcRC:$src, Subreg)))>;
1296
1297  def : Pat <(vselect _.KRCWM:$mask, (_.VT (OpNode SrcRC:$src)), _.ImmAllZerosV),
1298             (!cast<Instruction>(Name#rkz) _.KRCWM:$mask,
1299              (i32 (INSERT_SUBREG (i32 (IMPLICIT_DEF)), SrcRC:$src, Subreg)))>;
1300}
1301
1302multiclass avx512_int_broadcastbw_reg_vl<bits<8> opc, string Name,
1303                      AVX512VLVectorVTInfo _, SDPatternOperator OpNode,
1304                      RegisterClass SrcRC, SubRegIndex Subreg, Predicate prd> {
1305  let Predicates = [prd] in
1306    defm Z : avx512_int_broadcastbw_reg<opc, Name#Z, WriteShuffle256, _.info512,
1307              OpNode, SrcRC, Subreg>, EVEX_V512;
1308  let Predicates = [prd, HasVLX] in {
1309    defm Z256 : avx512_int_broadcastbw_reg<opc, Name#Z256, WriteShuffle256,
1310              _.info256, OpNode, SrcRC, Subreg>, EVEX_V256;
1311    defm Z128 : avx512_int_broadcastbw_reg<opc, Name#Z128, WriteShuffle,
1312              _.info128, OpNode, SrcRC, Subreg>, EVEX_V128;
1313  }
1314}
1315
1316multiclass avx512_int_broadcast_reg_vl<bits<8> opc, AVX512VLVectorVTInfo _,
1317                                       SDPatternOperator OpNode,
1318                                       RegisterClass SrcRC, Predicate prd> {
1319  let Predicates = [prd] in
1320    defm Z : avx512_int_broadcast_reg<opc, WriteShuffle256, _.info512, OpNode,
1321                                      SrcRC>, EVEX_V512;
1322  let Predicates = [prd, HasVLX] in {
1323    defm Z256 : avx512_int_broadcast_reg<opc, WriteShuffle256, _.info256, OpNode,
1324                                         SrcRC>, EVEX_V256;
1325    defm Z128 : avx512_int_broadcast_reg<opc, WriteShuffle, _.info128, OpNode,
1326                                         SrcRC>, EVEX_V128;
1327  }
1328}
1329
1330defm VPBROADCASTBr : avx512_int_broadcastbw_reg_vl<0x7A, "VPBROADCASTBr",
1331                       avx512vl_i8_info, X86VBroadcast, GR8, sub_8bit, HasBWI>;
1332defm VPBROADCASTWr : avx512_int_broadcastbw_reg_vl<0x7B, "VPBROADCASTWr",
1333                       avx512vl_i16_info, X86VBroadcast, GR16, sub_16bit,
1334                       HasBWI>;
1335defm VPBROADCASTDr : avx512_int_broadcast_reg_vl<0x7C, avx512vl_i32_info,
1336                                                 X86VBroadcast, GR32, HasAVX512>;
1337defm VPBROADCASTQr : avx512_int_broadcast_reg_vl<0x7C, avx512vl_i64_info,
1338                                                 X86VBroadcast, GR64, HasAVX512>, VEX_W;
1339
1340multiclass avx512_int_broadcast_rm_vl<bits<8> opc, string OpcodeStr,
1341                                        AVX512VLVectorVTInfo _, Predicate prd,
1342                                        bit IsConvertibleToThreeAddress> {
1343  let Predicates = [prd] in {
1344    defm Z :   avx512_broadcast_rm<opc, OpcodeStr, NAME, WriteShuffle256,
1345                                   WriteShuffle256Ld, _.info512, _.info128,
1346                                   IsConvertibleToThreeAddress>,
1347                                  EVEX_V512;
1348  }
1349  let Predicates = [prd, HasVLX] in {
1350    defm Z256 : avx512_broadcast_rm<opc, OpcodeStr, NAME, WriteShuffle256,
1351                                    WriteShuffle256Ld, _.info256, _.info128,
1352                                    IsConvertibleToThreeAddress>,
1353                                 EVEX_V256;
1354    defm Z128 : avx512_broadcast_rm<opc, OpcodeStr, NAME, WriteShuffle,
1355                                    WriteShuffleXLd, _.info128, _.info128,
1356                                    IsConvertibleToThreeAddress>,
1357                                 EVEX_V128;
1358  }
1359}
1360
1361defm VPBROADCASTB  : avx512_int_broadcast_rm_vl<0x78, "vpbroadcastb",
1362                                           avx512vl_i8_info, HasBWI, 0>;
1363defm VPBROADCASTW  : avx512_int_broadcast_rm_vl<0x79, "vpbroadcastw",
1364                                           avx512vl_i16_info, HasBWI, 0>;
1365defm VPBROADCASTD  : avx512_int_broadcast_rm_vl<0x58, "vpbroadcastd",
1366                                           avx512vl_i32_info, HasAVX512, 1>;
1367defm VPBROADCASTQ  : avx512_int_broadcast_rm_vl<0x59, "vpbroadcastq",
1368                                           avx512vl_i64_info, HasAVX512, 1>, VEX_W1X;
1369
1370multiclass avx512_subvec_broadcast_rm<bits<8> opc, string OpcodeStr,
1371                          X86VectorVTInfo _Dst, X86VectorVTInfo _Src> {
1372  defm rm : AVX512_maskable<opc, MRMSrcMem, _Dst, (outs _Dst.RC:$dst),
1373                           (ins _Src.MemOp:$src), OpcodeStr, "$src", "$src",
1374                           (_Dst.VT (X86SubVBroadcast
1375                             (_Src.VT (_Src.LdFrag addr:$src))))>,
1376                           Sched<[SchedWriteShuffle.YMM.Folded]>,
1377                           AVX5128IBase, EVEX;
1378}
1379
1380// This should be used for the AVX512DQ broadcast instructions. It disables
1381// the unmasked patterns so that we only use the DQ instructions when masking
1382//  is requested.
1383multiclass avx512_subvec_broadcast_rm_dq<bits<8> opc, string OpcodeStr,
1384                          X86VectorVTInfo _Dst, X86VectorVTInfo _Src> {
1385  let hasSideEffects = 0, mayLoad = 1 in
1386  defm rm : AVX512_maskable_split<opc, MRMSrcMem, _Dst, (outs _Dst.RC:$dst),
1387                           (ins _Src.MemOp:$src), OpcodeStr, "$src", "$src",
1388                           (null_frag),
1389                           (_Dst.VT (X86SubVBroadcast
1390                             (_Src.VT (_Src.LdFrag addr:$src))))>,
1391                           Sched<[SchedWriteShuffle.YMM.Folded]>,
1392                           AVX5128IBase, EVEX;
1393}
1394
1395let Predicates = [HasAVX512] in {
1396  // 32-bit targets will fail to load a i64 directly but can use ZEXT_LOAD.
1397  def : Pat<(v8i64 (X86VBroadcast (v2i64 (X86vzload64 addr:$src)))),
1398            (VPBROADCASTQZm addr:$src)>;
1399
1400  // FIXME this is to handle aligned extloads from i8.
1401  def : Pat<(v16i32 (X86VBroadcast (loadi32 addr:$src))),
1402            (VPBROADCASTDZm addr:$src)>;
1403}
1404
1405let Predicates = [HasVLX] in {
1406  // 32-bit targets will fail to load a i64 directly but can use ZEXT_LOAD.
1407  def : Pat<(v2i64 (X86VBroadcast (v2i64 (X86vzload64 addr:$src)))),
1408            (VPBROADCASTQZ128m addr:$src)>;
1409  def : Pat<(v4i64 (X86VBroadcast (v2i64 (X86vzload64 addr:$src)))),
1410            (VPBROADCASTQZ256m addr:$src)>;
1411
1412  // FIXME this is to handle aligned extloads from i8.
1413  def : Pat<(v4i32 (X86VBroadcast (loadi32 addr:$src))),
1414            (VPBROADCASTDZ128m addr:$src)>;
1415  def : Pat<(v8i32 (X86VBroadcast (loadi32 addr:$src))),
1416            (VPBROADCASTDZ256m addr:$src)>;
1417}
1418let Predicates = [HasVLX, HasBWI] in {
1419  // loadi16 is tricky to fold, because !isTypeDesirableForOp, justifiably.
1420  // This means we'll encounter truncated i32 loads; match that here.
1421  def : Pat<(v8i16 (X86VBroadcast (i16 (trunc (i32 (load addr:$src)))))),
1422            (VPBROADCASTWZ128m addr:$src)>;
1423  def : Pat<(v16i16 (X86VBroadcast (i16 (trunc (i32 (load addr:$src)))))),
1424            (VPBROADCASTWZ256m addr:$src)>;
1425  def : Pat<(v8i16 (X86VBroadcast
1426              (i16 (trunc (i32 (extloadi16 addr:$src)))))),
1427            (VPBROADCASTWZ128m addr:$src)>;
1428  def : Pat<(v8i16 (X86VBroadcast
1429              (i16 (trunc (i32 (zextloadi16 addr:$src)))))),
1430            (VPBROADCASTWZ128m addr:$src)>;
1431  def : Pat<(v16i16 (X86VBroadcast
1432              (i16 (trunc (i32 (extloadi16 addr:$src)))))),
1433            (VPBROADCASTWZ256m addr:$src)>;
1434  def : Pat<(v16i16 (X86VBroadcast
1435              (i16 (trunc (i32 (zextloadi16 addr:$src)))))),
1436            (VPBROADCASTWZ256m addr:$src)>;
1437
1438  // FIXME this is to handle aligned extloads from i8.
1439  def : Pat<(v8i16 (X86VBroadcast (loadi16 addr:$src))),
1440            (VPBROADCASTWZ128m addr:$src)>;
1441  def : Pat<(v16i16 (X86VBroadcast (loadi16 addr:$src))),
1442            (VPBROADCASTWZ256m addr:$src)>;
1443}
1444let Predicates = [HasBWI] in {
1445  // loadi16 is tricky to fold, because !isTypeDesirableForOp, justifiably.
1446  // This means we'll encounter truncated i32 loads; match that here.
1447  def : Pat<(v32i16 (X86VBroadcast (i16 (trunc (i32 (load addr:$src)))))),
1448            (VPBROADCASTWZm addr:$src)>;
1449  def : Pat<(v32i16 (X86VBroadcast
1450              (i16 (trunc (i32 (extloadi16 addr:$src)))))),
1451            (VPBROADCASTWZm addr:$src)>;
1452  def : Pat<(v32i16 (X86VBroadcast
1453              (i16 (trunc (i32 (zextloadi16 addr:$src)))))),
1454            (VPBROADCASTWZm addr:$src)>;
1455
1456  // FIXME this is to handle aligned extloads from i8.
1457  def : Pat<(v32i16 (X86VBroadcast (loadi16 addr:$src))),
1458            (VPBROADCASTWZm addr:$src)>;
1459}
1460
1461//===----------------------------------------------------------------------===//
1462// AVX-512 BROADCAST SUBVECTORS
1463//
1464
1465defm VBROADCASTI32X4 : avx512_subvec_broadcast_rm<0x5a, "vbroadcasti32x4",
1466                       v16i32_info, v4i32x_info>,
1467                       EVEX_V512, EVEX_CD8<32, CD8VT4>;
1468defm VBROADCASTF32X4 : avx512_subvec_broadcast_rm<0x1a, "vbroadcastf32x4",
1469                       v16f32_info, v4f32x_info>,
1470                       EVEX_V512, EVEX_CD8<32, CD8VT4>;
1471defm VBROADCASTI64X4 : avx512_subvec_broadcast_rm<0x5b, "vbroadcasti64x4",
1472                       v8i64_info, v4i64x_info>, VEX_W,
1473                       EVEX_V512, EVEX_CD8<64, CD8VT4>;
1474defm VBROADCASTF64X4 : avx512_subvec_broadcast_rm<0x1b, "vbroadcastf64x4",
1475                       v8f64_info, v4f64x_info>, VEX_W,
1476                       EVEX_V512, EVEX_CD8<64, CD8VT4>;
1477
1478let Predicates = [HasAVX512] in {
1479def : Pat<(v16f32 (X86SubVBroadcast (loadv8f32 addr:$src))),
1480          (VBROADCASTF64X4rm addr:$src)>;
1481def : Pat<(v16i32 (X86SubVBroadcast (loadv8i32 addr:$src))),
1482          (VBROADCASTI64X4rm addr:$src)>;
1483def : Pat<(v32i16 (X86SubVBroadcast (loadv16i16 addr:$src))),
1484          (VBROADCASTI64X4rm addr:$src)>;
1485def : Pat<(v64i8 (X86SubVBroadcast (loadv32i8 addr:$src))),
1486          (VBROADCASTI64X4rm addr:$src)>;
1487
1488// Provide fallback in case the load node that is used in the patterns above
1489// is used by additional users, which prevents the pattern selection.
1490def : Pat<(v8f64 (X86SubVBroadcast (v4f64 VR256X:$src))),
1491          (VINSERTF64x4Zrr (INSERT_SUBREG (v8f64 (IMPLICIT_DEF)), VR256X:$src, sub_ymm),
1492                           (v4f64 VR256X:$src), 1)>;
1493def : Pat<(v16f32 (X86SubVBroadcast (v8f32 VR256X:$src))),
1494          (VINSERTF64x4Zrr (INSERT_SUBREG (v16f32 (IMPLICIT_DEF)), VR256X:$src, sub_ymm),
1495                           (v8f32 VR256X:$src), 1)>;
1496def : Pat<(v8i64 (X86SubVBroadcast (v4i64 VR256X:$src))),
1497          (VINSERTI64x4Zrr (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src, sub_ymm),
1498                           (v4i64 VR256X:$src), 1)>;
1499def : Pat<(v16i32 (X86SubVBroadcast (v8i32 VR256X:$src))),
1500          (VINSERTI64x4Zrr (INSERT_SUBREG (v16i32 (IMPLICIT_DEF)), VR256X:$src, sub_ymm),
1501                           (v8i32 VR256X:$src), 1)>;
1502def : Pat<(v32i16 (X86SubVBroadcast (v16i16 VR256X:$src))),
1503          (VINSERTI64x4Zrr (INSERT_SUBREG (v32i16 (IMPLICIT_DEF)), VR256X:$src, sub_ymm),
1504                           (v16i16 VR256X:$src), 1)>;
1505def : Pat<(v64i8 (X86SubVBroadcast (v32i8 VR256X:$src))),
1506          (VINSERTI64x4Zrr (INSERT_SUBREG (v64i8 (IMPLICIT_DEF)), VR256X:$src, sub_ymm),
1507                           (v32i8 VR256X:$src), 1)>;
1508
1509def : Pat<(v8f64 (X86SubVBroadcast (loadv2f64 addr:$src))),
1510          (VBROADCASTF32X4rm addr:$src)>;
1511def : Pat<(v8i64 (X86SubVBroadcast (loadv2i64 addr:$src))),
1512          (VBROADCASTI32X4rm addr:$src)>;
1513def : Pat<(v32i16 (X86SubVBroadcast (loadv8i16 addr:$src))),
1514          (VBROADCASTI32X4rm addr:$src)>;
1515def : Pat<(v64i8 (X86SubVBroadcast (loadv16i8 addr:$src))),
1516          (VBROADCASTI32X4rm addr:$src)>;
1517
1518// Patterns for selects of bitcasted operations.
1519def : Pat<(vselect VK16WM:$mask,
1520                   (bc_v16f32 (v8f64 (X86SubVBroadcast (loadv2f64 addr:$src)))),
1521                   (v16f32 immAllZerosV)),
1522          (VBROADCASTF32X4rmkz VK16WM:$mask, addr:$src)>;
1523def : Pat<(vselect VK16WM:$mask,
1524                   (bc_v16f32 (v8f64 (X86SubVBroadcast (loadv2f64 addr:$src)))),
1525                   VR512:$src0),
1526          (VBROADCASTF32X4rmk VR512:$src0, VK16WM:$mask, addr:$src)>;
1527def : Pat<(vselect VK16WM:$mask,
1528                   (bc_v16i32 (v8i64 (X86SubVBroadcast (loadv2i64 addr:$src)))),
1529                   (v16i32 immAllZerosV)),
1530          (VBROADCASTI32X4rmkz VK16WM:$mask, addr:$src)>;
1531def : Pat<(vselect VK16WM:$mask,
1532                   (bc_v16i32 (v8i64 (X86SubVBroadcast (loadv2i64 addr:$src)))),
1533                   VR512:$src0),
1534          (VBROADCASTI32X4rmk VR512:$src0, VK16WM:$mask, addr:$src)>;
1535
1536def : Pat<(vselect VK8WM:$mask,
1537                   (bc_v8f64 (v16f32 (X86SubVBroadcast (loadv8f32 addr:$src)))),
1538                   (v8f64 immAllZerosV)),
1539          (VBROADCASTF64X4rmkz VK8WM:$mask, addr:$src)>;
1540def : Pat<(vselect VK8WM:$mask,
1541                   (bc_v8f64 (v16f32 (X86SubVBroadcast (loadv8f32 addr:$src)))),
1542                   VR512:$src0),
1543          (VBROADCASTF64X4rmk VR512:$src0, VK8WM:$mask, addr:$src)>;
1544def : Pat<(vselect VK8WM:$mask,
1545                   (bc_v8i64 (v16i32 (X86SubVBroadcast (loadv8i32 addr:$src)))),
1546                   (v8i64 immAllZerosV)),
1547          (VBROADCASTI64X4rmkz VK8WM:$mask, addr:$src)>;
1548def : Pat<(vselect VK8WM:$mask,
1549                   (bc_v8i64 (v16i32 (X86SubVBroadcast (loadv8i32 addr:$src)))),
1550                   VR512:$src0),
1551          (VBROADCASTI64X4rmk VR512:$src0, VK8WM:$mask, addr:$src)>;
1552}
1553
1554let Predicates = [HasVLX] in {
1555defm VBROADCASTI32X4Z256 : avx512_subvec_broadcast_rm<0x5a, "vbroadcasti32x4",
1556                           v8i32x_info, v4i32x_info>,
1557                           EVEX_V256, EVEX_CD8<32, CD8VT4>;
1558defm VBROADCASTF32X4Z256 : avx512_subvec_broadcast_rm<0x1a, "vbroadcastf32x4",
1559                           v8f32x_info, v4f32x_info>,
1560                           EVEX_V256, EVEX_CD8<32, CD8VT4>;
1561
1562def : Pat<(v4f64 (X86SubVBroadcast (loadv2f64 addr:$src))),
1563          (VBROADCASTF32X4Z256rm addr:$src)>;
1564def : Pat<(v4i64 (X86SubVBroadcast (loadv2i64 addr:$src))),
1565          (VBROADCASTI32X4Z256rm addr:$src)>;
1566def : Pat<(v16i16 (X86SubVBroadcast (loadv8i16 addr:$src))),
1567          (VBROADCASTI32X4Z256rm addr:$src)>;
1568def : Pat<(v32i8 (X86SubVBroadcast (loadv16i8 addr:$src))),
1569          (VBROADCASTI32X4Z256rm addr:$src)>;
1570
1571// Patterns for selects of bitcasted operations.
1572def : Pat<(vselect VK8WM:$mask,
1573                   (bc_v8f32 (v4f64 (X86SubVBroadcast (loadv2f64 addr:$src)))),
1574                   (v8f32 immAllZerosV)),
1575          (VBROADCASTF32X4Z256rmkz VK8WM:$mask, addr:$src)>;
1576def : Pat<(vselect VK8WM:$mask,
1577                   (bc_v8f32 (v4f64 (X86SubVBroadcast (loadv2f64 addr:$src)))),
1578                   VR256X:$src0),
1579          (VBROADCASTF32X4Z256rmk VR256X:$src0, VK8WM:$mask, addr:$src)>;
1580def : Pat<(vselect VK8WM:$mask,
1581                   (bc_v8i32 (v4i64 (X86SubVBroadcast (loadv2i64 addr:$src)))),
1582                   (v8i32 immAllZerosV)),
1583          (VBROADCASTI32X4Z256rmkz VK8WM:$mask, addr:$src)>;
1584def : Pat<(vselect VK8WM:$mask,
1585                   (bc_v8i32 (v4i64 (X86SubVBroadcast (loadv2i64 addr:$src)))),
1586                   VR256X:$src0),
1587          (VBROADCASTI32X4Z256rmk VR256X:$src0, VK8WM:$mask, addr:$src)>;
1588
1589
1590// Provide fallback in case the load node that is used in the patterns above
1591// is used by additional users, which prevents the pattern selection.
1592def : Pat<(v4f64 (X86SubVBroadcast (v2f64 VR128X:$src))),
1593          (VINSERTF32x4Z256rr (INSERT_SUBREG (v4f64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
1594                              (v2f64 VR128X:$src), 1)>;
1595def : Pat<(v8f32 (X86SubVBroadcast (v4f32 VR128X:$src))),
1596          (VINSERTF32x4Z256rr (INSERT_SUBREG (v8f32 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
1597                              (v4f32 VR128X:$src), 1)>;
1598def : Pat<(v4i64 (X86SubVBroadcast (v2i64 VR128X:$src))),
1599          (VINSERTI32x4Z256rr (INSERT_SUBREG (v4i64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
1600                              (v2i64 VR128X:$src), 1)>;
1601def : Pat<(v8i32 (X86SubVBroadcast (v4i32 VR128X:$src))),
1602          (VINSERTI32x4Z256rr (INSERT_SUBREG (v8i32 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
1603                              (v4i32 VR128X:$src), 1)>;
1604def : Pat<(v16i16 (X86SubVBroadcast (v8i16 VR128X:$src))),
1605          (VINSERTI32x4Z256rr (INSERT_SUBREG (v16i16 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
1606                              (v8i16 VR128X:$src), 1)>;
1607def : Pat<(v32i8 (X86SubVBroadcast (v16i8 VR128X:$src))),
1608          (VINSERTI32x4Z256rr (INSERT_SUBREG (v32i8 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
1609                              (v16i8 VR128X:$src), 1)>;
1610}
1611
1612let Predicates = [HasVLX, HasDQI] in {
1613defm VBROADCASTI64X2Z128 : avx512_subvec_broadcast_rm_dq<0x5a, "vbroadcasti64x2",
1614                           v4i64x_info, v2i64x_info>, VEX_W1X,
1615                           EVEX_V256, EVEX_CD8<64, CD8VT2>;
1616defm VBROADCASTF64X2Z128 : avx512_subvec_broadcast_rm_dq<0x1a, "vbroadcastf64x2",
1617                           v4f64x_info, v2f64x_info>, VEX_W1X,
1618                           EVEX_V256, EVEX_CD8<64, CD8VT2>;
1619
1620// Patterns for selects of bitcasted operations.
1621def : Pat<(vselect VK4WM:$mask,
1622                   (bc_v4f64 (v8f32 (X86SubVBroadcast (loadv4f32 addr:$src)))),
1623                   (v4f64 immAllZerosV)),
1624          (VBROADCASTF64X2Z128rmkz VK4WM:$mask, addr:$src)>;
1625def : Pat<(vselect VK4WM:$mask,
1626                   (bc_v4f64 (v8f32 (X86SubVBroadcast (loadv4f32 addr:$src)))),
1627                   VR256X:$src0),
1628          (VBROADCASTF64X2Z128rmk VR256X:$src0, VK4WM:$mask, addr:$src)>;
1629def : Pat<(vselect VK4WM:$mask,
1630                   (bc_v4i64 (v8i32 (X86SubVBroadcast (loadv4i32 addr:$src)))),
1631                   (v4i64 immAllZerosV)),
1632          (VBROADCASTI64X2Z128rmkz VK4WM:$mask, addr:$src)>;
1633def : Pat<(vselect VK4WM:$mask,
1634                   (bc_v4i64 (v8i32 (X86SubVBroadcast (loadv4i32 addr:$src)))),
1635                   VR256X:$src0),
1636          (VBROADCASTI64X2Z128rmk VR256X:$src0, VK4WM:$mask, addr:$src)>;
1637}
1638
1639let Predicates = [HasDQI] in {
1640defm VBROADCASTI64X2 : avx512_subvec_broadcast_rm_dq<0x5a, "vbroadcasti64x2",
1641                       v8i64_info, v2i64x_info>, VEX_W,
1642                       EVEX_V512, EVEX_CD8<64, CD8VT2>;
1643defm VBROADCASTI32X8 : avx512_subvec_broadcast_rm_dq<0x5b, "vbroadcasti32x8",
1644                       v16i32_info, v8i32x_info>,
1645                       EVEX_V512, EVEX_CD8<32, CD8VT8>;
1646defm VBROADCASTF64X2 : avx512_subvec_broadcast_rm_dq<0x1a, "vbroadcastf64x2",
1647                       v8f64_info, v2f64x_info>, VEX_W,
1648                       EVEX_V512, EVEX_CD8<64, CD8VT2>;
1649defm VBROADCASTF32X8 : avx512_subvec_broadcast_rm_dq<0x1b, "vbroadcastf32x8",
1650                       v16f32_info, v8f32x_info>,
1651                       EVEX_V512, EVEX_CD8<32, CD8VT8>;
1652
1653// Patterns for selects of bitcasted operations.
1654def : Pat<(vselect VK16WM:$mask,
1655                   (bc_v16f32 (v8f64 (X86SubVBroadcast (loadv4f64 addr:$src)))),
1656                   (v16f32 immAllZerosV)),
1657          (VBROADCASTF32X8rmkz VK16WM:$mask, addr:$src)>;
1658def : Pat<(vselect VK16WM:$mask,
1659                   (bc_v16f32 (v8f64 (X86SubVBroadcast (loadv4f64 addr:$src)))),
1660                   VR512:$src0),
1661          (VBROADCASTF32X8rmk VR512:$src0, VK16WM:$mask, addr:$src)>;
1662def : Pat<(vselect VK16WM:$mask,
1663                   (bc_v16i32 (v8i64 (X86SubVBroadcast (loadv4i64 addr:$src)))),
1664                   (v16i32 immAllZerosV)),
1665          (VBROADCASTI32X8rmkz VK16WM:$mask, addr:$src)>;
1666def : Pat<(vselect VK16WM:$mask,
1667                   (bc_v16i32 (v8i64 (X86SubVBroadcast (loadv4i64 addr:$src)))),
1668                   VR512:$src0),
1669          (VBROADCASTI32X8rmk VR512:$src0, VK16WM:$mask, addr:$src)>;
1670
1671def : Pat<(vselect VK8WM:$mask,
1672                   (bc_v8f64 (v16f32 (X86SubVBroadcast (loadv4f32 addr:$src)))),
1673                   (v8f64 immAllZerosV)),
1674          (VBROADCASTF64X2rmkz VK8WM:$mask, addr:$src)>;
1675def : Pat<(vselect VK8WM:$mask,
1676                   (bc_v8f64 (v16f32 (X86SubVBroadcast (loadv4f32 addr:$src)))),
1677                   VR512:$src0),
1678          (VBROADCASTF64X2rmk VR512:$src0, VK8WM:$mask, addr:$src)>;
1679def : Pat<(vselect VK8WM:$mask,
1680                   (bc_v8i64 (v16i32 (X86SubVBroadcast (loadv4i32 addr:$src)))),
1681                   (v8i64 immAllZerosV)),
1682          (VBROADCASTI64X2rmkz VK8WM:$mask, addr:$src)>;
1683def : Pat<(vselect VK8WM:$mask,
1684                   (bc_v8i64 (v16i32 (X86SubVBroadcast (loadv4i32 addr:$src)))),
1685                   VR512:$src0),
1686          (VBROADCASTI64X2rmk VR512:$src0, VK8WM:$mask, addr:$src)>;
1687}
1688
1689multiclass avx512_common_broadcast_32x2<bits<8> opc, string OpcodeStr,
1690                         AVX512VLVectorVTInfo _Dst, AVX512VLVectorVTInfo _Src> {
1691  let Predicates = [HasDQI] in
1692    defm Z :    avx512_broadcast_rm_split<opc, OpcodeStr, NAME, WriteShuffle256,
1693                                          WriteShuffle256Ld, _Dst.info512,
1694                                          _Src.info512, _Src.info128, 0, null_frag, null_frag>,
1695                                          EVEX_V512;
1696  let Predicates = [HasDQI, HasVLX] in
1697    defm Z256 : avx512_broadcast_rm_split<opc, OpcodeStr, NAME, WriteShuffle256,
1698                                          WriteShuffle256Ld, _Dst.info256,
1699                                          _Src.info256, _Src.info128, 0, null_frag, null_frag>,
1700                                          EVEX_V256;
1701}
1702
1703multiclass avx512_common_broadcast_i32x2<bits<8> opc, string OpcodeStr,
1704                         AVX512VLVectorVTInfo _Dst, AVX512VLVectorVTInfo _Src> :
1705  avx512_common_broadcast_32x2<opc, OpcodeStr, _Dst, _Src> {
1706
1707  let Predicates = [HasDQI, HasVLX] in
1708    defm Z128 : avx512_broadcast_rm_split<opc, OpcodeStr, NAME, WriteShuffle,
1709                                          WriteShuffleXLd, _Dst.info128,
1710                                          _Src.info128, _Src.info128, 0, null_frag, null_frag>,
1711                                          EVEX_V128;
1712}
1713
1714defm VBROADCASTI32X2  : avx512_common_broadcast_i32x2<0x59, "vbroadcasti32x2",
1715                                          avx512vl_i32_info, avx512vl_i64_info>;
1716defm VBROADCASTF32X2  : avx512_common_broadcast_32x2<0x19, "vbroadcastf32x2",
1717                                          avx512vl_f32_info, avx512vl_f64_info>;
1718
1719//===----------------------------------------------------------------------===//
1720// AVX-512 BROADCAST MASK TO VECTOR REGISTER
1721//---
1722multiclass avx512_mask_broadcastm<bits<8> opc, string OpcodeStr,
1723                                  X86VectorVTInfo _, RegisterClass KRC> {
1724  def rr : AVX512XS8I<opc, MRMSrcReg, (outs _.RC:$dst), (ins KRC:$src),
1725                  !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
1726                  [(set _.RC:$dst, (_.VT (X86VBroadcastm KRC:$src)))]>,
1727                  EVEX, Sched<[WriteShuffle]>;
1728}
1729
1730multiclass avx512_mask_broadcast<bits<8> opc, string OpcodeStr,
1731                                 AVX512VLVectorVTInfo VTInfo, RegisterClass KRC> {
1732  let Predicates = [HasCDI] in
1733    defm Z : avx512_mask_broadcastm<opc, OpcodeStr, VTInfo.info512, KRC>, EVEX_V512;
1734  let Predicates = [HasCDI, HasVLX] in {
1735    defm Z256 : avx512_mask_broadcastm<opc, OpcodeStr, VTInfo.info256, KRC>, EVEX_V256;
1736    defm Z128 : avx512_mask_broadcastm<opc, OpcodeStr, VTInfo.info128, KRC>, EVEX_V128;
1737  }
1738}
1739
1740defm VPBROADCASTMW2D : avx512_mask_broadcast<0x3A, "vpbroadcastmw2d",
1741                                               avx512vl_i32_info, VK16>;
1742defm VPBROADCASTMB2Q : avx512_mask_broadcast<0x2A, "vpbroadcastmb2q",
1743                                               avx512vl_i64_info, VK8>, VEX_W;
1744
1745//===----------------------------------------------------------------------===//
1746// -- VPERMI2 - 3 source operands form --
1747multiclass avx512_perm_i<bits<8> opc, string OpcodeStr,
1748                         X86FoldableSchedWrite sched,
1749                         X86VectorVTInfo _, X86VectorVTInfo IdxVT> {
1750let Constraints = "$src1 = $dst", ExeDomain = _.ExeDomain,
1751    hasSideEffects = 0 in {
1752  defm rr: AVX512_maskable_3src_cast<opc, MRMSrcReg, _, IdxVT, (outs _.RC:$dst),
1753          (ins _.RC:$src2, _.RC:$src3),
1754          OpcodeStr, "$src3, $src2", "$src2, $src3",
1755          (_.VT (X86VPermt2 _.RC:$src2, IdxVT.RC:$src1, _.RC:$src3)), 1>,
1756          EVEX_4V, AVX5128IBase, Sched<[sched]>;
1757
1758  let mayLoad = 1 in
1759  defm rm: AVX512_maskable_3src_cast<opc, MRMSrcMem, _, IdxVT, (outs _.RC:$dst),
1760            (ins _.RC:$src2, _.MemOp:$src3),
1761            OpcodeStr, "$src3, $src2", "$src2, $src3",
1762            (_.VT (X86VPermt2 _.RC:$src2, IdxVT.RC:$src1,
1763                   (_.VT (_.LdFrag addr:$src3)))), 1>,
1764            EVEX_4V, AVX5128IBase, Sched<[sched.Folded, sched.ReadAfterFold]>;
1765  }
1766}
1767
1768multiclass avx512_perm_i_mb<bits<8> opc, string OpcodeStr,
1769                            X86FoldableSchedWrite sched,
1770                            X86VectorVTInfo _, X86VectorVTInfo IdxVT> {
1771  let Constraints = "$src1 = $dst", ExeDomain = _.ExeDomain,
1772      hasSideEffects = 0, mayLoad = 1 in
1773  defm rmb: AVX512_maskable_3src_cast<opc, MRMSrcMem, _, IdxVT, (outs _.RC:$dst),
1774              (ins _.RC:$src2, _.ScalarMemOp:$src3),
1775              OpcodeStr,   !strconcat("${src3}", _.BroadcastStr,", $src2"),
1776              !strconcat("$src2, ${src3}", _.BroadcastStr ),
1777              (_.VT (X86VPermt2 _.RC:$src2,
1778               IdxVT.RC:$src1,(_.VT (_.BroadcastLdFrag addr:$src3)))), 1>,
1779              AVX5128IBase, EVEX_4V, EVEX_B,
1780              Sched<[sched.Folded, sched.ReadAfterFold]>;
1781}
1782
1783multiclass avx512_perm_i_sizes<bits<8> opc, string OpcodeStr,
1784                               X86FoldableSchedWrite sched,
1785                               AVX512VLVectorVTInfo VTInfo,
1786                               AVX512VLVectorVTInfo ShuffleMask> {
1787  defm NAME: avx512_perm_i<opc, OpcodeStr, sched, VTInfo.info512,
1788                           ShuffleMask.info512>,
1789            avx512_perm_i_mb<opc, OpcodeStr, sched, VTInfo.info512,
1790                             ShuffleMask.info512>, EVEX_V512;
1791  let Predicates = [HasVLX] in {
1792  defm NAME#128: avx512_perm_i<opc, OpcodeStr, sched, VTInfo.info128,
1793                               ShuffleMask.info128>,
1794                 avx512_perm_i_mb<opc, OpcodeStr, sched, VTInfo.info128,
1795                                  ShuffleMask.info128>, EVEX_V128;
1796  defm NAME#256: avx512_perm_i<opc, OpcodeStr, sched, VTInfo.info256,
1797                               ShuffleMask.info256>,
1798                 avx512_perm_i_mb<opc, OpcodeStr, sched, VTInfo.info256,
1799                                  ShuffleMask.info256>, EVEX_V256;
1800  }
1801}
1802
1803multiclass avx512_perm_i_sizes_bw<bits<8> opc, string OpcodeStr,
1804                                  X86FoldableSchedWrite sched,
1805                                  AVX512VLVectorVTInfo VTInfo,
1806                                  AVX512VLVectorVTInfo Idx,
1807                                  Predicate Prd> {
1808  let Predicates = [Prd] in
1809  defm NAME: avx512_perm_i<opc, OpcodeStr, sched, VTInfo.info512,
1810                           Idx.info512>, EVEX_V512;
1811  let Predicates = [Prd, HasVLX] in {
1812  defm NAME#128: avx512_perm_i<opc, OpcodeStr, sched, VTInfo.info128,
1813                               Idx.info128>, EVEX_V128;
1814  defm NAME#256: avx512_perm_i<opc, OpcodeStr, sched, VTInfo.info256,
1815                               Idx.info256>,  EVEX_V256;
1816  }
1817}
1818
1819defm VPERMI2D  : avx512_perm_i_sizes<0x76, "vpermi2d", WriteVarShuffle256,
1820                  avx512vl_i32_info, avx512vl_i32_info>, EVEX_CD8<32, CD8VF>;
1821defm VPERMI2Q  : avx512_perm_i_sizes<0x76, "vpermi2q", WriteVarShuffle256,
1822                  avx512vl_i64_info, avx512vl_i64_info>, VEX_W, EVEX_CD8<64, CD8VF>;
1823defm VPERMI2W  : avx512_perm_i_sizes_bw<0x75, "vpermi2w", WriteVarShuffle256,
1824                  avx512vl_i16_info, avx512vl_i16_info, HasBWI>,
1825                  VEX_W, EVEX_CD8<16, CD8VF>;
1826defm VPERMI2B  : avx512_perm_i_sizes_bw<0x75, "vpermi2b", WriteVarShuffle256,
1827                  avx512vl_i8_info, avx512vl_i8_info, HasVBMI>,
1828                  EVEX_CD8<8, CD8VF>;
1829defm VPERMI2PS : avx512_perm_i_sizes<0x77, "vpermi2ps", WriteFVarShuffle256,
1830                  avx512vl_f32_info, avx512vl_i32_info>, EVEX_CD8<32, CD8VF>;
1831defm VPERMI2PD : avx512_perm_i_sizes<0x77, "vpermi2pd", WriteFVarShuffle256,
1832                  avx512vl_f64_info, avx512vl_i64_info>, VEX_W, EVEX_CD8<64, CD8VF>;
1833
1834// Extra patterns to deal with extra bitcasts due to passthru and index being
1835// different types on the fp versions.
1836multiclass avx512_perm_i_lowering<string InstrStr, X86VectorVTInfo _,
1837                                  X86VectorVTInfo IdxVT,
1838                                  X86VectorVTInfo CastVT> {
1839  def : Pat<(_.VT (vselect _.KRCWM:$mask,
1840                             (X86VPermt2 (_.VT _.RC:$src2),
1841                                         (IdxVT.VT (bitconvert (CastVT.VT _.RC:$src1))), _.RC:$src3),
1842                             (_.VT (bitconvert (CastVT.VT _.RC:$src1))))),
1843            (!cast<Instruction>(InstrStr#"rrk") _.RC:$src1, _.KRCWM:$mask,
1844                                                _.RC:$src2, _.RC:$src3)>;
1845  def : Pat<(_.VT (vselect _.KRCWM:$mask,
1846                             (X86VPermt2 _.RC:$src2,
1847                                         (IdxVT.VT (bitconvert  (CastVT.VT _.RC:$src1))),
1848                                         (_.LdFrag addr:$src3)),
1849                             (_.VT (bitconvert  (CastVT.VT _.RC:$src1))))),
1850            (!cast<Instruction>(InstrStr#"rmk") _.RC:$src1, _.KRCWM:$mask,
1851                                                _.RC:$src2, addr:$src3)>;
1852  def : Pat<(_.VT (vselect _.KRCWM:$mask,
1853                             (X86VPermt2 _.RC:$src2,
1854                                         (IdxVT.VT (bitconvert  (CastVT.VT _.RC:$src1))),
1855                                         (_.BroadcastLdFrag addr:$src3)),
1856                             (_.VT (bitconvert  (CastVT.VT _.RC:$src1))))),
1857            (!cast<Instruction>(InstrStr#"rmbk") _.RC:$src1, _.KRCWM:$mask,
1858                                                 _.RC:$src2, addr:$src3)>;
1859}
1860
1861// TODO: Should we add more casts? The vXi64 case is common due to ABI.
1862defm : avx512_perm_i_lowering<"VPERMI2PS", v16f32_info, v16i32_info, v8i64_info>;
1863defm : avx512_perm_i_lowering<"VPERMI2PS256", v8f32x_info, v8i32x_info, v4i64x_info>;
1864defm : avx512_perm_i_lowering<"VPERMI2PS128", v4f32x_info, v4i32x_info, v2i64x_info>;
1865
1866// VPERMT2
1867multiclass avx512_perm_t<bits<8> opc, string OpcodeStr,
1868                         X86FoldableSchedWrite sched,
1869                         X86VectorVTInfo _, X86VectorVTInfo IdxVT> {
1870let Constraints = "$src1 = $dst", ExeDomain = _.ExeDomain in {
1871  defm rr: AVX512_maskable_3src<opc, MRMSrcReg, _, (outs _.RC:$dst),
1872          (ins IdxVT.RC:$src2, _.RC:$src3),
1873          OpcodeStr, "$src3, $src2", "$src2, $src3",
1874          (_.VT (X86VPermt2 _.RC:$src1, IdxVT.RC:$src2, _.RC:$src3)), 1>,
1875          EVEX_4V, AVX5128IBase, Sched<[sched]>;
1876
1877  defm rm: AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst),
1878            (ins IdxVT.RC:$src2, _.MemOp:$src3),
1879            OpcodeStr, "$src3, $src2", "$src2, $src3",
1880            (_.VT (X86VPermt2 _.RC:$src1, IdxVT.RC:$src2,
1881                   (_.LdFrag addr:$src3))), 1>,
1882            EVEX_4V, AVX5128IBase, Sched<[sched.Folded, sched.ReadAfterFold]>;
1883  }
1884}
1885multiclass avx512_perm_t_mb<bits<8> opc, string OpcodeStr,
1886                            X86FoldableSchedWrite sched,
1887                            X86VectorVTInfo _, X86VectorVTInfo IdxVT> {
1888  let Constraints = "$src1 = $dst", ExeDomain = _.ExeDomain in
1889  defm rmb: AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst),
1890              (ins IdxVT.RC:$src2, _.ScalarMemOp:$src3),
1891              OpcodeStr,   !strconcat("${src3}", _.BroadcastStr,", $src2"),
1892              !strconcat("$src2, ${src3}", _.BroadcastStr ),
1893              (_.VT (X86VPermt2 _.RC:$src1,
1894               IdxVT.RC:$src2,(_.VT (_.BroadcastLdFrag addr:$src3)))), 1>,
1895              AVX5128IBase, EVEX_4V, EVEX_B,
1896              Sched<[sched.Folded, sched.ReadAfterFold]>;
1897}
1898
1899multiclass avx512_perm_t_sizes<bits<8> opc, string OpcodeStr,
1900                               X86FoldableSchedWrite sched,
1901                               AVX512VLVectorVTInfo VTInfo,
1902                               AVX512VLVectorVTInfo ShuffleMask> {
1903  defm NAME: avx512_perm_t<opc, OpcodeStr, sched, VTInfo.info512,
1904                              ShuffleMask.info512>,
1905            avx512_perm_t_mb<opc, OpcodeStr, sched, VTInfo.info512,
1906                              ShuffleMask.info512>, EVEX_V512;
1907  let Predicates = [HasVLX] in {
1908  defm NAME#128: avx512_perm_t<opc, OpcodeStr, sched, VTInfo.info128,
1909                              ShuffleMask.info128>,
1910                 avx512_perm_t_mb<opc, OpcodeStr, sched, VTInfo.info128,
1911                              ShuffleMask.info128>, EVEX_V128;
1912  defm NAME#256: avx512_perm_t<opc, OpcodeStr, sched, VTInfo.info256,
1913                              ShuffleMask.info256>,
1914                 avx512_perm_t_mb<opc, OpcodeStr, sched, VTInfo.info256,
1915                              ShuffleMask.info256>, EVEX_V256;
1916  }
1917}
1918
1919multiclass avx512_perm_t_sizes_bw<bits<8> opc, string OpcodeStr,
1920                                  X86FoldableSchedWrite sched,
1921                                  AVX512VLVectorVTInfo VTInfo,
1922                                  AVX512VLVectorVTInfo Idx, Predicate Prd> {
1923  let Predicates = [Prd] in
1924  defm NAME: avx512_perm_t<opc, OpcodeStr, sched, VTInfo.info512,
1925                           Idx.info512>, EVEX_V512;
1926  let Predicates = [Prd, HasVLX] in {
1927  defm NAME#128: avx512_perm_t<opc, OpcodeStr, sched, VTInfo.info128,
1928                               Idx.info128>, EVEX_V128;
1929  defm NAME#256: avx512_perm_t<opc, OpcodeStr, sched, VTInfo.info256,
1930                               Idx.info256>, EVEX_V256;
1931  }
1932}
1933
1934defm VPERMT2D  : avx512_perm_t_sizes<0x7E, "vpermt2d", WriteVarShuffle256,
1935                  avx512vl_i32_info, avx512vl_i32_info>, EVEX_CD8<32, CD8VF>;
1936defm VPERMT2Q  : avx512_perm_t_sizes<0x7E, "vpermt2q", WriteVarShuffle256,
1937                  avx512vl_i64_info, avx512vl_i64_info>, VEX_W, EVEX_CD8<64, CD8VF>;
1938defm VPERMT2W  : avx512_perm_t_sizes_bw<0x7D, "vpermt2w", WriteVarShuffle256,
1939                  avx512vl_i16_info, avx512vl_i16_info, HasBWI>,
1940                  VEX_W, EVEX_CD8<16, CD8VF>;
1941defm VPERMT2B  : avx512_perm_t_sizes_bw<0x7D, "vpermt2b", WriteVarShuffle256,
1942                  avx512vl_i8_info, avx512vl_i8_info, HasVBMI>,
1943                  EVEX_CD8<8, CD8VF>;
1944defm VPERMT2PS : avx512_perm_t_sizes<0x7F, "vpermt2ps", WriteFVarShuffle256,
1945                  avx512vl_f32_info, avx512vl_i32_info>, EVEX_CD8<32, CD8VF>;
1946defm VPERMT2PD : avx512_perm_t_sizes<0x7F, "vpermt2pd", WriteFVarShuffle256,
1947                  avx512vl_f64_info, avx512vl_i64_info>, VEX_W, EVEX_CD8<64, CD8VF>;
1948
1949//===----------------------------------------------------------------------===//
1950// AVX-512 - BLEND using mask
1951//
1952
1953multiclass WriteFVarBlendask<bits<8> opc, string OpcodeStr,
1954                             X86FoldableSchedWrite sched, X86VectorVTInfo _> {
1955  let ExeDomain = _.ExeDomain, hasSideEffects = 0 in {
1956  def rr : AVX5128I<opc, MRMSrcReg, (outs _.RC:$dst),
1957             (ins _.RC:$src1, _.RC:$src2),
1958             !strconcat(OpcodeStr,
1959             "\t{$src2, $src1, ${dst}|${dst}, $src1, $src2}"), []>,
1960             EVEX_4V, Sched<[sched]>;
1961  def rrk : AVX5128I<opc, MRMSrcReg, (outs _.RC:$dst),
1962             (ins _.KRCWM:$mask, _.RC:$src1, _.RC:$src2),
1963             !strconcat(OpcodeStr,
1964             "\t{$src2, $src1, ${dst} {${mask}}|${dst} {${mask}}, $src1, $src2}"),
1965             []>, EVEX_4V, EVEX_K, Sched<[sched]>;
1966  def rrkz : AVX5128I<opc, MRMSrcReg, (outs _.RC:$dst),
1967             (ins _.KRCWM:$mask, _.RC:$src1, _.RC:$src2),
1968             !strconcat(OpcodeStr,
1969             "\t{$src2, $src1, ${dst} {${mask}} {z}|${dst} {${mask}} {z}, $src1, $src2}"),
1970             []>, EVEX_4V, EVEX_KZ, Sched<[sched]>, NotMemoryFoldable;
1971  let mayLoad = 1 in {
1972  def rm  : AVX5128I<opc, MRMSrcMem, (outs _.RC:$dst),
1973             (ins _.RC:$src1, _.MemOp:$src2),
1974             !strconcat(OpcodeStr,
1975             "\t{$src2, $src1, ${dst}|${dst}, $src1, $src2}"),
1976             []>, EVEX_4V, EVEX_CD8<_.EltSize, CD8VF>,
1977             Sched<[sched.Folded, sched.ReadAfterFold]>;
1978  def rmk : AVX5128I<opc, MRMSrcMem, (outs _.RC:$dst),
1979             (ins _.KRCWM:$mask, _.RC:$src1, _.MemOp:$src2),
1980             !strconcat(OpcodeStr,
1981             "\t{$src2, $src1, ${dst} {${mask}}|${dst} {${mask}}, $src1, $src2}"),
1982             []>, EVEX_4V, EVEX_K, EVEX_CD8<_.EltSize, CD8VF>,
1983             Sched<[sched.Folded, sched.ReadAfterFold]>;
1984  def rmkz : AVX5128I<opc, MRMSrcMem, (outs _.RC:$dst),
1985             (ins _.KRCWM:$mask, _.RC:$src1, _.MemOp:$src2),
1986             !strconcat(OpcodeStr,
1987             "\t{$src2, $src1, ${dst} {${mask}} {z}|${dst} {${mask}} {z}, $src1, $src2}"),
1988             []>, EVEX_4V, EVEX_KZ, EVEX_CD8<_.EltSize, CD8VF>,
1989             Sched<[sched.Folded, sched.ReadAfterFold]>, NotMemoryFoldable;
1990  }
1991  }
1992}
1993multiclass WriteFVarBlendask_rmb<bits<8> opc, string OpcodeStr,
1994                                 X86FoldableSchedWrite sched, X86VectorVTInfo _> {
1995  let ExeDomain = _.ExeDomain, mayLoad = 1, hasSideEffects = 0 in {
1996  def rmbk : AVX5128I<opc, MRMSrcMem, (outs _.RC:$dst),
1997      (ins _.KRCWM:$mask, _.RC:$src1, _.ScalarMemOp:$src2),
1998       !strconcat(OpcodeStr,
1999            "\t{${src2}", _.BroadcastStr, ", $src1, $dst {${mask}}|",
2000            "$dst {${mask}}, $src1, ${src2}", _.BroadcastStr, "}"), []>,
2001      EVEX_4V, EVEX_K, EVEX_B, EVEX_CD8<_.EltSize, CD8VF>,
2002      Sched<[sched.Folded, sched.ReadAfterFold]>;
2003
2004  def rmbkz : AVX5128I<opc, MRMSrcMem, (outs _.RC:$dst),
2005      (ins _.KRCWM:$mask, _.RC:$src1, _.ScalarMemOp:$src2),
2006       !strconcat(OpcodeStr,
2007            "\t{${src2}", _.BroadcastStr, ", $src1, $dst {${mask}} {z}|",
2008            "$dst {${mask}} {z}, $src1, ${src2}", _.BroadcastStr, "}"), []>,
2009      EVEX_4V, EVEX_KZ, EVEX_B, EVEX_CD8<_.EltSize, CD8VF>,
2010      Sched<[sched.Folded, sched.ReadAfterFold]>, NotMemoryFoldable;
2011
2012  def rmb : AVX5128I<opc, MRMSrcMem, (outs _.RC:$dst),
2013      (ins _.RC:$src1, _.ScalarMemOp:$src2),
2014       !strconcat(OpcodeStr,
2015            "\t{${src2}", _.BroadcastStr, ", $src1, $dst|",
2016            "$dst, $src1, ${src2}", _.BroadcastStr, "}"), []>,
2017      EVEX_4V, EVEX_B, EVEX_CD8<_.EltSize, CD8VF>,
2018      Sched<[sched.Folded, sched.ReadAfterFold]>;
2019  }
2020}
2021
2022multiclass blendmask_dq<bits<8> opc, string OpcodeStr, X86SchedWriteWidths sched,
2023                        AVX512VLVectorVTInfo VTInfo> {
2024  defm Z : WriteFVarBlendask<opc, OpcodeStr, sched.ZMM, VTInfo.info512>,
2025           WriteFVarBlendask_rmb<opc, OpcodeStr, sched.ZMM, VTInfo.info512>,
2026                                 EVEX_V512;
2027
2028  let Predicates = [HasVLX] in {
2029    defm Z256 : WriteFVarBlendask<opc, OpcodeStr, sched.YMM, VTInfo.info256>,
2030                WriteFVarBlendask_rmb<opc, OpcodeStr, sched.YMM, VTInfo.info256>,
2031                                      EVEX_V256;
2032    defm Z128 : WriteFVarBlendask<opc, OpcodeStr, sched.XMM, VTInfo.info128>,
2033                WriteFVarBlendask_rmb<opc, OpcodeStr, sched.XMM, VTInfo.info128>,
2034                                      EVEX_V128;
2035  }
2036}
2037
2038multiclass blendmask_bw<bits<8> opc, string OpcodeStr, X86SchedWriteWidths sched,
2039                        AVX512VLVectorVTInfo VTInfo> {
2040  let Predicates = [HasBWI] in
2041    defm Z : WriteFVarBlendask<opc, OpcodeStr, sched.ZMM, VTInfo.info512>,
2042                               EVEX_V512;
2043
2044  let Predicates = [HasBWI, HasVLX] in {
2045    defm Z256 : WriteFVarBlendask<opc, OpcodeStr, sched.YMM, VTInfo.info256>,
2046                                  EVEX_V256;
2047    defm Z128 : WriteFVarBlendask<opc, OpcodeStr, sched.XMM, VTInfo.info128>,
2048                                  EVEX_V128;
2049  }
2050}
2051
2052defm VBLENDMPS : blendmask_dq<0x65, "vblendmps", SchedWriteFVarBlend,
2053                              avx512vl_f32_info>;
2054defm VBLENDMPD : blendmask_dq<0x65, "vblendmpd", SchedWriteFVarBlend,
2055                              avx512vl_f64_info>, VEX_W;
2056defm VPBLENDMD : blendmask_dq<0x64, "vpblendmd", SchedWriteVarBlend,
2057                              avx512vl_i32_info>;
2058defm VPBLENDMQ : blendmask_dq<0x64, "vpblendmq", SchedWriteVarBlend,
2059                              avx512vl_i64_info>, VEX_W;
2060defm VPBLENDMB : blendmask_bw<0x66, "vpblendmb", SchedWriteVarBlend,
2061                              avx512vl_i8_info>;
2062defm VPBLENDMW : blendmask_bw<0x66, "vpblendmw", SchedWriteVarBlend,
2063                              avx512vl_i16_info>, VEX_W;
2064
2065//===----------------------------------------------------------------------===//
2066// Compare Instructions
2067//===----------------------------------------------------------------------===//
2068
2069// avx512_cmp_scalar - AVX512 CMPSS and CMPSD
2070
2071multiclass avx512_cmp_scalar<X86VectorVTInfo _, SDNode OpNode, SDNode OpNodeSAE,
2072                             PatFrag OpNode_su, PatFrag OpNodeSAE_su,
2073                             X86FoldableSchedWrite sched> {
2074  defm  rr_Int  : AVX512_maskable_cmp<0xC2, MRMSrcReg, _,
2075                      (outs _.KRC:$dst),
2076                      (ins _.RC:$src1, _.RC:$src2, u8imm:$cc),
2077                      "vcmp"#_.Suffix,
2078                      "$cc, $src2, $src1", "$src1, $src2, $cc",
2079                      (OpNode (_.VT _.RC:$src1), (_.VT _.RC:$src2), timm:$cc),
2080                      (OpNode_su (_.VT _.RC:$src1), (_.VT _.RC:$src2),
2081                                 timm:$cc)>, EVEX_4V, VEX_LIG, Sched<[sched]>, SIMD_EXC;
2082  let mayLoad = 1 in
2083  defm  rm_Int  : AVX512_maskable_cmp<0xC2, MRMSrcMem, _,
2084                    (outs _.KRC:$dst),
2085                    (ins _.RC:$src1, _.IntScalarMemOp:$src2, u8imm:$cc),
2086                    "vcmp"#_.Suffix,
2087                    "$cc, $src2, $src1", "$src1, $src2, $cc",
2088                    (OpNode (_.VT _.RC:$src1), _.ScalarIntMemCPat:$src2,
2089                        timm:$cc),
2090                    (OpNode_su (_.VT _.RC:$src1), _.ScalarIntMemCPat:$src2,
2091                        timm:$cc)>, EVEX_4V, VEX_LIG, EVEX_CD8<_.EltSize, CD8VT1>,
2092                    Sched<[sched.Folded, sched.ReadAfterFold]>, SIMD_EXC;
2093
2094  let Uses = [MXCSR] in
2095  defm  rrb_Int  : AVX512_maskable_cmp<0xC2, MRMSrcReg, _,
2096                     (outs _.KRC:$dst),
2097                     (ins _.RC:$src1, _.RC:$src2, u8imm:$cc),
2098                     "vcmp"#_.Suffix,
2099                     "$cc, {sae}, $src2, $src1","$src1, $src2, {sae}, $cc",
2100                     (OpNodeSAE (_.VT _.RC:$src1), (_.VT _.RC:$src2),
2101                                timm:$cc),
2102                     (OpNodeSAE_su (_.VT _.RC:$src1), (_.VT _.RC:$src2),
2103                                   timm:$cc)>,
2104                     EVEX_4V, VEX_LIG, EVEX_B, Sched<[sched]>;
2105
2106  let isCodeGenOnly = 1 in {
2107    let isCommutable = 1 in
2108    def rr : AVX512Ii8<0xC2, MRMSrcReg,
2109                (outs _.KRC:$dst), (ins _.FRC:$src1, _.FRC:$src2, u8imm:$cc),
2110                !strconcat("vcmp", _.Suffix,
2111                           "\t{$cc, $src2, $src1, $dst|$dst, $src1, $src2, $cc}"),
2112                [(set _.KRC:$dst, (OpNode _.FRC:$src1,
2113                                          _.FRC:$src2,
2114                                          timm:$cc))]>,
2115                EVEX_4V, VEX_LIG, Sched<[sched]>, SIMD_EXC;
2116    def rm : AVX512Ii8<0xC2, MRMSrcMem,
2117              (outs _.KRC:$dst),
2118              (ins _.FRC:$src1, _.ScalarMemOp:$src2, u8imm:$cc),
2119              !strconcat("vcmp", _.Suffix,
2120                         "\t{$cc, $src2, $src1, $dst|$dst, $src1, $src2, $cc}"),
2121              [(set _.KRC:$dst, (OpNode _.FRC:$src1,
2122                                        (_.ScalarLdFrag addr:$src2),
2123                                        timm:$cc))]>,
2124              EVEX_4V, VEX_LIG, EVEX_CD8<_.EltSize, CD8VT1>,
2125              Sched<[sched.Folded, sched.ReadAfterFold]>, SIMD_EXC;
2126  }
2127}
2128
2129def X86cmpms_su : PatFrag<(ops node:$src1, node:$src2, node:$cc),
2130                          (X86cmpms node:$src1, node:$src2, node:$cc), [{
2131  return N->hasOneUse();
2132}]>;
2133def X86cmpmsSAE_su : PatFrag<(ops node:$src1, node:$src2, node:$cc),
2134                          (X86cmpmsSAE node:$src1, node:$src2, node:$cc), [{
2135  return N->hasOneUse();
2136}]>;
2137
2138let Predicates = [HasAVX512] in {
2139  let ExeDomain = SSEPackedSingle in
2140  defm VCMPSSZ : avx512_cmp_scalar<f32x_info, X86cmpms, X86cmpmsSAE,
2141                                   X86cmpms_su, X86cmpmsSAE_su,
2142                                   SchedWriteFCmp.Scl>, AVX512XSIi8Base;
2143  let ExeDomain = SSEPackedDouble in
2144  defm VCMPSDZ : avx512_cmp_scalar<f64x_info, X86cmpms, X86cmpmsSAE,
2145                                   X86cmpms_su, X86cmpmsSAE_su,
2146                                   SchedWriteFCmp.Scl>, AVX512XDIi8Base, VEX_W;
2147}
2148
2149multiclass avx512_icmp_packed<bits<8> opc, string OpcodeStr,
2150                              X86FoldableSchedWrite sched,
2151                              X86VectorVTInfo _, bit IsCommutable> {
2152  let isCommutable = IsCommutable, hasSideEffects = 0 in
2153  def rr : AVX512BI<opc, MRMSrcReg,
2154             (outs _.KRC:$dst), (ins _.RC:$src1, _.RC:$src2),
2155             !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
2156             []>, EVEX_4V, Sched<[sched]>;
2157  let mayLoad = 1, hasSideEffects = 0 in
2158  def rm : AVX512BI<opc, MRMSrcMem,
2159             (outs _.KRC:$dst), (ins _.RC:$src1, _.MemOp:$src2),
2160             !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
2161             []>, EVEX_4V, Sched<[sched.Folded, sched.ReadAfterFold]>;
2162  let isCommutable = IsCommutable, hasSideEffects = 0 in
2163  def rrk : AVX512BI<opc, MRMSrcReg,
2164              (outs _.KRC:$dst), (ins _.KRCWM:$mask, _.RC:$src1, _.RC:$src2),
2165              !strconcat(OpcodeStr, "\t{$src2, $src1, $dst {${mask}}|",
2166                          "$dst {${mask}}, $src1, $src2}"),
2167              []>, EVEX_4V, EVEX_K, Sched<[sched]>;
2168  let mayLoad = 1, hasSideEffects = 0 in
2169  def rmk : AVX512BI<opc, MRMSrcMem,
2170              (outs _.KRC:$dst), (ins _.KRCWM:$mask, _.RC:$src1, _.MemOp:$src2),
2171              !strconcat(OpcodeStr, "\t{$src2, $src1, $dst {${mask}}|",
2172                          "$dst {${mask}}, $src1, $src2}"),
2173              []>, EVEX_4V, EVEX_K, Sched<[sched.Folded, sched.ReadAfterFold]>;
2174}
2175
2176multiclass avx512_icmp_packed_rmb<bits<8> opc, string OpcodeStr,
2177                                  X86FoldableSchedWrite sched, X86VectorVTInfo _,
2178                                  bit IsCommutable> :
2179           avx512_icmp_packed<opc, OpcodeStr, sched, _, IsCommutable> {
2180  let mayLoad = 1, hasSideEffects = 0 in {
2181  def rmb : AVX512BI<opc, MRMSrcMem,
2182              (outs _.KRC:$dst), (ins _.RC:$src1, _.ScalarMemOp:$src2),
2183              !strconcat(OpcodeStr, "\t{${src2}", _.BroadcastStr, ", $src1, $dst",
2184                                    "|$dst, $src1, ${src2}", _.BroadcastStr, "}"),
2185              []>, EVEX_4V, EVEX_B, Sched<[sched.Folded, sched.ReadAfterFold]>;
2186  def rmbk : AVX512BI<opc, MRMSrcMem,
2187               (outs _.KRC:$dst), (ins _.KRCWM:$mask, _.RC:$src1,
2188                                       _.ScalarMemOp:$src2),
2189               !strconcat(OpcodeStr,
2190                          "\t{${src2}", _.BroadcastStr, ", $src1, $dst {${mask}}|",
2191                          "$dst {${mask}}, $src1, ${src2}", _.BroadcastStr, "}"),
2192               []>, EVEX_4V, EVEX_K, EVEX_B,
2193               Sched<[sched.Folded, sched.ReadAfterFold]>;
2194  }
2195}
2196
2197multiclass avx512_icmp_packed_vl<bits<8> opc, string OpcodeStr,
2198                                 X86SchedWriteWidths sched,
2199                                 AVX512VLVectorVTInfo VTInfo, Predicate prd,
2200                                 bit IsCommutable = 0> {
2201  let Predicates = [prd] in
2202  defm Z : avx512_icmp_packed<opc, OpcodeStr, sched.ZMM,
2203                              VTInfo.info512, IsCommutable>, EVEX_V512;
2204
2205  let Predicates = [prd, HasVLX] in {
2206    defm Z256 : avx512_icmp_packed<opc, OpcodeStr, sched.YMM,
2207                                   VTInfo.info256, IsCommutable>, EVEX_V256;
2208    defm Z128 : avx512_icmp_packed<opc, OpcodeStr, sched.XMM,
2209                                   VTInfo.info128, IsCommutable>, EVEX_V128;
2210  }
2211}
2212
2213multiclass avx512_icmp_packed_rmb_vl<bits<8> opc, string OpcodeStr,
2214                                     X86SchedWriteWidths sched,
2215                                     AVX512VLVectorVTInfo VTInfo,
2216                                     Predicate prd, bit IsCommutable = 0> {
2217  let Predicates = [prd] in
2218  defm Z : avx512_icmp_packed_rmb<opc, OpcodeStr, sched.ZMM,
2219                                  VTInfo.info512, IsCommutable>, EVEX_V512;
2220
2221  let Predicates = [prd, HasVLX] in {
2222    defm Z256 : avx512_icmp_packed_rmb<opc, OpcodeStr, sched.YMM,
2223                                       VTInfo.info256, IsCommutable>, EVEX_V256;
2224    defm Z128 : avx512_icmp_packed_rmb<opc, OpcodeStr, sched.XMM,
2225                                       VTInfo.info128, IsCommutable>, EVEX_V128;
2226  }
2227}
2228
2229// This fragment treats X86cmpm as commutable to help match loads in both
2230// operands for PCMPEQ.
2231def X86setcc_commute : SDNode<"ISD::SETCC", SDTSetCC, [SDNPCommutative]>;
2232def X86pcmpgtm : PatFrag<(ops node:$src1, node:$src2),
2233                         (setcc node:$src1, node:$src2, SETGT)>;
2234
2235// AddedComplexity is needed because the explicit SETEQ/SETGT CondCode doesn't
2236// increase the pattern complexity the way an immediate would.
2237let AddedComplexity = 2 in {
2238// FIXME: Is there a better scheduler class for VPCMP?
2239defm VPCMPEQB : avx512_icmp_packed_vl<0x74, "vpcmpeqb",
2240                      SchedWriteVecALU, avx512vl_i8_info, HasBWI, 1>,
2241                EVEX_CD8<8, CD8VF>, VEX_WIG;
2242
2243defm VPCMPEQW : avx512_icmp_packed_vl<0x75, "vpcmpeqw",
2244                      SchedWriteVecALU, avx512vl_i16_info, HasBWI, 1>,
2245                EVEX_CD8<16, CD8VF>, VEX_WIG;
2246
2247defm VPCMPEQD : avx512_icmp_packed_rmb_vl<0x76, "vpcmpeqd",
2248                      SchedWriteVecALU, avx512vl_i32_info, HasAVX512, 1>,
2249                EVEX_CD8<32, CD8VF>;
2250
2251defm VPCMPEQQ : avx512_icmp_packed_rmb_vl<0x29, "vpcmpeqq",
2252                      SchedWriteVecALU, avx512vl_i64_info, HasAVX512, 1>,
2253                T8PD, VEX_W, EVEX_CD8<64, CD8VF>;
2254
2255defm VPCMPGTB : avx512_icmp_packed_vl<0x64, "vpcmpgtb",
2256                      SchedWriteVecALU, avx512vl_i8_info, HasBWI>,
2257                EVEX_CD8<8, CD8VF>, VEX_WIG;
2258
2259defm VPCMPGTW : avx512_icmp_packed_vl<0x65, "vpcmpgtw",
2260                      SchedWriteVecALU, avx512vl_i16_info, HasBWI>,
2261                EVEX_CD8<16, CD8VF>, VEX_WIG;
2262
2263defm VPCMPGTD : avx512_icmp_packed_rmb_vl<0x66, "vpcmpgtd",
2264                      SchedWriteVecALU, avx512vl_i32_info, HasAVX512>,
2265                EVEX_CD8<32, CD8VF>;
2266
2267defm VPCMPGTQ : avx512_icmp_packed_rmb_vl<0x37, "vpcmpgtq",
2268                      SchedWriteVecALU, avx512vl_i64_info, HasAVX512>,
2269                T8PD, VEX_W, EVEX_CD8<64, CD8VF>;
2270}
2271
2272multiclass avx512_icmp_cc<bits<8> opc, string Suffix, PatFrag Frag,
2273                          PatFrag Frag_su, PatFrag CommFrag, PatFrag CommFrag_su,
2274                          X86FoldableSchedWrite sched,
2275                          X86VectorVTInfo _, string Name> {
2276  let isCommutable = 1 in
2277  def rri : AVX512AIi8<opc, MRMSrcReg,
2278             (outs _.KRC:$dst), (ins _.RC:$src1, _.RC:$src2, u8imm:$cc),
2279             !strconcat("vpcmp", Suffix,
2280                        "\t{$cc, $src2, $src1, $dst|$dst, $src1, $src2, $cc}"),
2281             [(set _.KRC:$dst, (_.KVT (Frag:$cc (_.VT _.RC:$src1),
2282                                                (_.VT _.RC:$src2),
2283                                                cond)))]>,
2284             EVEX_4V, Sched<[sched]>;
2285  def rmi : AVX512AIi8<opc, MRMSrcMem,
2286             (outs _.KRC:$dst), (ins _.RC:$src1, _.MemOp:$src2, u8imm:$cc),
2287             !strconcat("vpcmp", Suffix,
2288                        "\t{$cc, $src2, $src1, $dst|$dst, $src1, $src2, $cc}"),
2289             [(set _.KRC:$dst, (_.KVT
2290                                (Frag:$cc
2291                                 (_.VT _.RC:$src1),
2292                                 (_.VT (_.LdFrag addr:$src2)),
2293                                 cond)))]>,
2294             EVEX_4V, Sched<[sched.Folded, sched.ReadAfterFold]>;
2295  let isCommutable = 1 in
2296  def rrik : AVX512AIi8<opc, MRMSrcReg,
2297              (outs _.KRC:$dst), (ins _.KRCWM:$mask, _.RC:$src1, _.RC:$src2,
2298                                      u8imm:$cc),
2299              !strconcat("vpcmp", Suffix,
2300                         "\t{$cc, $src2, $src1, $dst {${mask}}|",
2301                         "$dst {${mask}}, $src1, $src2, $cc}"),
2302              [(set _.KRC:$dst, (and _.KRCWM:$mask,
2303                                     (_.KVT (Frag_su:$cc (_.VT _.RC:$src1),
2304                                                         (_.VT _.RC:$src2),
2305                                                         cond))))]>,
2306              EVEX_4V, EVEX_K, Sched<[sched]>;
2307  def rmik : AVX512AIi8<opc, MRMSrcMem,
2308              (outs _.KRC:$dst), (ins _.KRCWM:$mask, _.RC:$src1, _.MemOp:$src2,
2309                                    u8imm:$cc),
2310              !strconcat("vpcmp", Suffix,
2311                         "\t{$cc, $src2, $src1, $dst {${mask}}|",
2312                         "$dst {${mask}}, $src1, $src2, $cc}"),
2313              [(set _.KRC:$dst, (and _.KRCWM:$mask,
2314                                     (_.KVT
2315                                      (Frag_su:$cc
2316                                       (_.VT _.RC:$src1),
2317                                       (_.VT (_.LdFrag addr:$src2)),
2318                                       cond))))]>,
2319              EVEX_4V, EVEX_K, Sched<[sched.Folded, sched.ReadAfterFold]>;
2320
2321  def : Pat<(_.KVT (CommFrag:$cc (_.LdFrag addr:$src2),
2322                                 (_.VT _.RC:$src1), cond)),
2323            (!cast<Instruction>(Name#_.ZSuffix#"rmi")
2324             _.RC:$src1, addr:$src2, (CommFrag.OperandTransform $cc))>;
2325
2326  def : Pat<(and _.KRCWM:$mask,
2327                 (_.KVT (CommFrag_su:$cc (_.LdFrag addr:$src2),
2328                                      (_.VT _.RC:$src1), cond))),
2329            (!cast<Instruction>(Name#_.ZSuffix#"rmik")
2330             _.KRCWM:$mask, _.RC:$src1, addr:$src2,
2331             (CommFrag.OperandTransform $cc))>;
2332}
2333
2334multiclass avx512_icmp_cc_rmb<bits<8> opc, string Suffix, PatFrag Frag,
2335                              PatFrag Frag_su, PatFrag CommFrag,
2336                              PatFrag CommFrag_su, X86FoldableSchedWrite sched,
2337                              X86VectorVTInfo _, string Name> :
2338           avx512_icmp_cc<opc, Suffix, Frag, Frag_su, CommFrag, CommFrag_su,
2339                          sched, _, Name> {
2340  def rmib : AVX512AIi8<opc, MRMSrcMem,
2341             (outs _.KRC:$dst), (ins _.RC:$src1, _.ScalarMemOp:$src2,
2342                                     u8imm:$cc),
2343             !strconcat("vpcmp", Suffix,
2344                        "\t{$cc, ${src2}", _.BroadcastStr, ", $src1, $dst|",
2345                        "$dst, $src1, ${src2}", _.BroadcastStr, ", $cc}"),
2346             [(set _.KRC:$dst, (_.KVT (Frag:$cc
2347                                       (_.VT _.RC:$src1),
2348                                       (_.BroadcastLdFrag addr:$src2),
2349                                       cond)))]>,
2350             EVEX_4V, EVEX_B, Sched<[sched.Folded, sched.ReadAfterFold]>;
2351  def rmibk : AVX512AIi8<opc, MRMSrcMem,
2352              (outs _.KRC:$dst), (ins _.KRCWM:$mask, _.RC:$src1,
2353                                       _.ScalarMemOp:$src2, u8imm:$cc),
2354              !strconcat("vpcmp", Suffix,
2355                  "\t{$cc, ${src2}", _.BroadcastStr, ", $src1, $dst {${mask}}|",
2356                  "$dst {${mask}}, $src1, ${src2}", _.BroadcastStr, ", $cc}"),
2357              [(set _.KRC:$dst, (and _.KRCWM:$mask,
2358                                     (_.KVT (Frag_su:$cc
2359                                             (_.VT _.RC:$src1),
2360                                             (_.BroadcastLdFrag addr:$src2),
2361                                             cond))))]>,
2362              EVEX_4V, EVEX_K, EVEX_B, Sched<[sched.Folded, sched.ReadAfterFold]>;
2363
2364  def : Pat<(_.KVT (CommFrag:$cc (_.BroadcastLdFrag addr:$src2),
2365                    (_.VT _.RC:$src1), cond)),
2366            (!cast<Instruction>(Name#_.ZSuffix#"rmib")
2367             _.RC:$src1, addr:$src2, (CommFrag.OperandTransform $cc))>;
2368
2369  def : Pat<(and _.KRCWM:$mask,
2370                 (_.KVT (CommFrag_su:$cc (_.BroadcastLdFrag addr:$src2),
2371                                      (_.VT _.RC:$src1), cond))),
2372            (!cast<Instruction>(Name#_.ZSuffix#"rmibk")
2373             _.KRCWM:$mask, _.RC:$src1, addr:$src2,
2374             (CommFrag_su.OperandTransform $cc))>;
2375}
2376
2377multiclass avx512_icmp_cc_vl<bits<8> opc, string Suffix, PatFrag Frag,
2378                             PatFrag Frag_su, PatFrag CommFrag,
2379                             PatFrag CommFrag_su, X86SchedWriteWidths sched,
2380                             AVX512VLVectorVTInfo VTInfo, Predicate prd> {
2381  let Predicates = [prd] in
2382  defm Z : avx512_icmp_cc<opc, Suffix, Frag, Frag_su, CommFrag, CommFrag_su,
2383                          sched.ZMM, VTInfo.info512, NAME>, EVEX_V512;
2384
2385  let Predicates = [prd, HasVLX] in {
2386    defm Z256 : avx512_icmp_cc<opc, Suffix, Frag, Frag_su, CommFrag, CommFrag_su,
2387                               sched.YMM, VTInfo.info256, NAME>, EVEX_V256;
2388    defm Z128 : avx512_icmp_cc<opc, Suffix, Frag, Frag_su, CommFrag, CommFrag_su,
2389                               sched.XMM, VTInfo.info128, NAME>, EVEX_V128;
2390  }
2391}
2392
2393multiclass avx512_icmp_cc_rmb_vl<bits<8> opc, string Suffix, PatFrag Frag,
2394                                 PatFrag Frag_su, PatFrag CommFrag,
2395                                 PatFrag CommFrag_su, X86SchedWriteWidths sched,
2396                                 AVX512VLVectorVTInfo VTInfo, Predicate prd> {
2397  let Predicates = [prd] in
2398  defm Z : avx512_icmp_cc_rmb<opc, Suffix, Frag, Frag_su, CommFrag, CommFrag_su,
2399                              sched.ZMM, VTInfo.info512, NAME>, EVEX_V512;
2400
2401  let Predicates = [prd, HasVLX] in {
2402    defm Z256 : avx512_icmp_cc_rmb<opc, Suffix, Frag, Frag_su, CommFrag, CommFrag_su,
2403                                   sched.YMM, VTInfo.info256, NAME>, EVEX_V256;
2404    defm Z128 : avx512_icmp_cc_rmb<opc, Suffix, Frag, Frag_su, CommFrag, CommFrag_su,
2405                                   sched.XMM, VTInfo.info128, NAME>, EVEX_V128;
2406  }
2407}
2408
2409def X86pcmpm_imm : SDNodeXForm<setcc, [{
2410  ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(2))->get();
2411  uint8_t SSECC = X86::getVPCMPImmForCond(CC);
2412  return getI8Imm(SSECC, SDLoc(N));
2413}]>;
2414
2415// Swapped operand version of the above.
2416def X86pcmpm_imm_commute : SDNodeXForm<setcc, [{
2417  ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(2))->get();
2418  uint8_t SSECC = X86::getVPCMPImmForCond(CC);
2419  SSECC = X86::getSwappedVPCMPImm(SSECC);
2420  return getI8Imm(SSECC, SDLoc(N));
2421}]>;
2422
2423def X86pcmpm : PatFrag<(ops node:$src1, node:$src2, node:$cc),
2424                       (setcc node:$src1, node:$src2, node:$cc), [{
2425  ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(2))->get();
2426  return !ISD::isUnsignedIntSetCC(CC);
2427}], X86pcmpm_imm>;
2428
2429def X86pcmpm_su : PatFrag<(ops node:$src1, node:$src2, node:$cc),
2430                          (setcc node:$src1, node:$src2, node:$cc), [{
2431  ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(2))->get();
2432  return N->hasOneUse() && !ISD::isUnsignedIntSetCC(CC);
2433}], X86pcmpm_imm>;
2434
2435// Same as above, but commutes immediate. Use for load folding.
2436def X86pcmpm_commute : PatFrag<(ops node:$src1, node:$src2, node:$cc),
2437                               (setcc node:$src1, node:$src2, node:$cc), [{
2438  ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(2))->get();
2439  return !ISD::isUnsignedIntSetCC(CC);
2440}], X86pcmpm_imm_commute>;
2441
2442def X86pcmpm_commute_su : PatFrag<(ops node:$src1, node:$src2, node:$cc),
2443                                  (setcc node:$src1, node:$src2, node:$cc), [{
2444  ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(2))->get();
2445  return N->hasOneUse() && !ISD::isUnsignedIntSetCC(CC);
2446}], X86pcmpm_imm_commute>;
2447
2448def X86pcmpum : PatFrag<(ops node:$src1, node:$src2, node:$cc),
2449                        (setcc node:$src1, node:$src2, node:$cc), [{
2450  ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(2))->get();
2451  return ISD::isUnsignedIntSetCC(CC);
2452}], X86pcmpm_imm>;
2453
2454def X86pcmpum_su : PatFrag<(ops node:$src1, node:$src2, node:$cc),
2455                           (setcc node:$src1, node:$src2, node:$cc), [{
2456  ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(2))->get();
2457  return N->hasOneUse() && ISD::isUnsignedIntSetCC(CC);
2458}], X86pcmpm_imm>;
2459
2460// Same as above, but commutes immediate. Use for load folding.
2461def X86pcmpum_commute : PatFrag<(ops node:$src1, node:$src2, node:$cc),
2462                                (setcc node:$src1, node:$src2, node:$cc), [{
2463  ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(2))->get();
2464  return ISD::isUnsignedIntSetCC(CC);
2465}], X86pcmpm_imm_commute>;
2466
2467def X86pcmpum_commute_su : PatFrag<(ops node:$src1, node:$src2, node:$cc),
2468                                   (setcc node:$src1, node:$src2, node:$cc), [{
2469  ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(2))->get();
2470  return N->hasOneUse() && ISD::isUnsignedIntSetCC(CC);
2471}], X86pcmpm_imm_commute>;
2472
2473// FIXME: Is there a better scheduler class for VPCMP/VPCMPU?
2474defm VPCMPB : avx512_icmp_cc_vl<0x3F, "b", X86pcmpm, X86pcmpm_su,
2475                                X86pcmpm_commute, X86pcmpm_commute_su,
2476                                SchedWriteVecALU, avx512vl_i8_info, HasBWI>,
2477                                EVEX_CD8<8, CD8VF>;
2478defm VPCMPUB : avx512_icmp_cc_vl<0x3E, "ub", X86pcmpum, X86pcmpum_su,
2479                                 X86pcmpum_commute, X86pcmpum_commute_su,
2480                                 SchedWriteVecALU, avx512vl_i8_info, HasBWI>,
2481                                 EVEX_CD8<8, CD8VF>;
2482
2483defm VPCMPW : avx512_icmp_cc_vl<0x3F, "w", X86pcmpm, X86pcmpm_su,
2484                                X86pcmpm_commute, X86pcmpm_commute_su,
2485                                SchedWriteVecALU, avx512vl_i16_info, HasBWI>,
2486                                VEX_W, EVEX_CD8<16, CD8VF>;
2487defm VPCMPUW : avx512_icmp_cc_vl<0x3E, "uw", X86pcmpum, X86pcmpum_su,
2488                                 X86pcmpum_commute, X86pcmpum_commute_su,
2489                                 SchedWriteVecALU, avx512vl_i16_info, HasBWI>,
2490                                 VEX_W, EVEX_CD8<16, CD8VF>;
2491
2492defm VPCMPD : avx512_icmp_cc_rmb_vl<0x1F, "d", X86pcmpm, X86pcmpm_su,
2493                                    X86pcmpm_commute, X86pcmpm_commute_su,
2494                                    SchedWriteVecALU, avx512vl_i32_info,
2495                                    HasAVX512>, EVEX_CD8<32, CD8VF>;
2496defm VPCMPUD : avx512_icmp_cc_rmb_vl<0x1E, "ud", X86pcmpum, X86pcmpum_su,
2497                                     X86pcmpum_commute, X86pcmpum_commute_su,
2498                                     SchedWriteVecALU, avx512vl_i32_info,
2499                                     HasAVX512>, EVEX_CD8<32, CD8VF>;
2500
2501defm VPCMPQ : avx512_icmp_cc_rmb_vl<0x1F, "q", X86pcmpm, X86pcmpm_su,
2502                                    X86pcmpm_commute, X86pcmpm_commute_su,
2503                                    SchedWriteVecALU, avx512vl_i64_info,
2504                                    HasAVX512>, VEX_W, EVEX_CD8<64, CD8VF>;
2505defm VPCMPUQ : avx512_icmp_cc_rmb_vl<0x1E, "uq", X86pcmpum, X86pcmpum_su,
2506                                     X86pcmpum_commute, X86pcmpum_commute_su,
2507                                     SchedWriteVecALU, avx512vl_i64_info,
2508                                     HasAVX512>, VEX_W, EVEX_CD8<64, CD8VF>;
2509
2510def X86cmpm_su : PatFrag<(ops node:$src1, node:$src2, node:$cc),
2511                         (X86cmpm node:$src1, node:$src2, node:$cc), [{
2512  return N->hasOneUse();
2513}]>;
2514def X86cmpmSAE_su : PatFrag<(ops node:$src1, node:$src2, node:$cc),
2515                            (X86cmpmSAE node:$src1, node:$src2, node:$cc), [{
2516  return N->hasOneUse();
2517}]>;
2518
2519def X86cmpm_imm_commute : SDNodeXForm<timm, [{
2520  uint8_t Imm = X86::getSwappedVCMPImm(N->getZExtValue() & 0x1f);
2521  return getI8Imm(Imm, SDLoc(N));
2522}]>;
2523
2524multiclass avx512_vcmp_common<X86FoldableSchedWrite sched, X86VectorVTInfo _,
2525                              string Name> {
2526let Uses = [MXCSR], mayRaiseFPException = 1 in {
2527  defm  rri  : AVX512_maskable_cmp<0xC2, MRMSrcReg, _,
2528                   (outs _.KRC:$dst), (ins _.RC:$src1, _.RC:$src2,u8imm:$cc),
2529                   "vcmp"#_.Suffix,
2530                   "$cc, $src2, $src1", "$src1, $src2, $cc",
2531                   (X86any_cmpm (_.VT _.RC:$src1), (_.VT _.RC:$src2), timm:$cc),
2532                   (X86cmpm_su (_.VT _.RC:$src1), (_.VT _.RC:$src2), timm:$cc),
2533                   1>, Sched<[sched]>;
2534
2535  defm  rmi  : AVX512_maskable_cmp<0xC2, MRMSrcMem, _,
2536                (outs _.KRC:$dst),(ins _.RC:$src1, _.MemOp:$src2, u8imm:$cc),
2537                "vcmp"#_.Suffix,
2538                "$cc, $src2, $src1", "$src1, $src2, $cc",
2539                (X86any_cmpm (_.VT _.RC:$src1), (_.VT (_.LdFrag addr:$src2)),
2540                             timm:$cc),
2541                (X86cmpm_su (_.VT _.RC:$src1), (_.VT (_.LdFrag addr:$src2)),
2542                            timm:$cc)>,
2543                Sched<[sched.Folded, sched.ReadAfterFold]>;
2544
2545  defm  rmbi : AVX512_maskable_cmp<0xC2, MRMSrcMem, _,
2546                (outs _.KRC:$dst),
2547                (ins _.RC:$src1, _.ScalarMemOp:$src2, u8imm:$cc),
2548                "vcmp"#_.Suffix,
2549                "$cc, ${src2}"#_.BroadcastStr#", $src1",
2550                "$src1, ${src2}"#_.BroadcastStr#", $cc",
2551                (X86any_cmpm (_.VT _.RC:$src1),
2552                             (_.VT (_.BroadcastLdFrag addr:$src2)),
2553                             timm:$cc),
2554                (X86cmpm_su (_.VT _.RC:$src1),
2555                            (_.VT (_.BroadcastLdFrag addr:$src2)),
2556                            timm:$cc)>,
2557                EVEX_B, Sched<[sched.Folded, sched.ReadAfterFold]>;
2558  }
2559
2560  // Patterns for selecting with loads in other operand.
2561  def : Pat<(X86any_cmpm (_.LdFrag addr:$src2), (_.VT _.RC:$src1),
2562                         timm:$cc),
2563            (!cast<Instruction>(Name#_.ZSuffix#"rmi") _.RC:$src1, addr:$src2,
2564                                                      (X86cmpm_imm_commute timm:$cc))>;
2565
2566  def : Pat<(and _.KRCWM:$mask, (X86cmpm_su (_.LdFrag addr:$src2),
2567                                            (_.VT _.RC:$src1),
2568                                            timm:$cc)),
2569            (!cast<Instruction>(Name#_.ZSuffix#"rmik") _.KRCWM:$mask,
2570                                                       _.RC:$src1, addr:$src2,
2571                                                       (X86cmpm_imm_commute timm:$cc))>;
2572
2573  def : Pat<(X86any_cmpm (_.BroadcastLdFrag addr:$src2),
2574                         (_.VT _.RC:$src1), timm:$cc),
2575            (!cast<Instruction>(Name#_.ZSuffix#"rmbi") _.RC:$src1, addr:$src2,
2576                                                       (X86cmpm_imm_commute timm:$cc))>;
2577
2578  def : Pat<(and _.KRCWM:$mask, (X86cmpm_su (_.BroadcastLdFrag addr:$src2),
2579                                            (_.VT _.RC:$src1),
2580                                            timm:$cc)),
2581            (!cast<Instruction>(Name#_.ZSuffix#"rmbik") _.KRCWM:$mask,
2582                                                        _.RC:$src1, addr:$src2,
2583                                                        (X86cmpm_imm_commute timm:$cc))>;
2584}
2585
2586multiclass avx512_vcmp_sae<X86FoldableSchedWrite sched, X86VectorVTInfo _> {
2587  // comparison code form (VCMP[EQ/LT/LE/...]
2588  let Uses = [MXCSR] in
2589  defm  rrib  : AVX512_maskable_cmp<0xC2, MRMSrcReg, _,
2590                     (outs _.KRC:$dst),(ins _.RC:$src1, _.RC:$src2, u8imm:$cc),
2591                     "vcmp"#_.Suffix,
2592                     "$cc, {sae}, $src2, $src1",
2593                     "$src1, $src2, {sae}, $cc",
2594                     (X86cmpmSAE (_.VT _.RC:$src1), (_.VT _.RC:$src2), timm:$cc),
2595                     (X86cmpmSAE_su (_.VT _.RC:$src1), (_.VT _.RC:$src2),
2596                                    timm:$cc)>,
2597                     EVEX_B, Sched<[sched]>;
2598}
2599
2600multiclass avx512_vcmp<X86SchedWriteWidths sched, AVX512VLVectorVTInfo _> {
2601  let Predicates = [HasAVX512] in {
2602    defm Z    : avx512_vcmp_common<sched.ZMM, _.info512, NAME>,
2603                avx512_vcmp_sae<sched.ZMM, _.info512>, EVEX_V512;
2604
2605  }
2606  let Predicates = [HasAVX512,HasVLX] in {
2607   defm Z128 : avx512_vcmp_common<sched.XMM, _.info128, NAME>, EVEX_V128;
2608   defm Z256 : avx512_vcmp_common<sched.YMM, _.info256, NAME>, EVEX_V256;
2609  }
2610}
2611
2612defm VCMPPD : avx512_vcmp<SchedWriteFCmp, avx512vl_f64_info>,
2613                          AVX512PDIi8Base, EVEX_4V, EVEX_CD8<64, CD8VF>, VEX_W;
2614defm VCMPPS : avx512_vcmp<SchedWriteFCmp, avx512vl_f32_info>,
2615                          AVX512PSIi8Base, EVEX_4V, EVEX_CD8<32, CD8VF>;
2616
2617// Patterns to select fp compares with load as first operand.
2618let Predicates = [HasAVX512] in {
2619  def : Pat<(v1i1 (X86cmpms (loadf64 addr:$src2), FR64X:$src1,
2620                            timm:$cc)),
2621            (VCMPSDZrm FR64X:$src1, addr:$src2, (X86cmpm_imm_commute timm:$cc))>;
2622
2623  def : Pat<(v1i1 (X86cmpms (loadf32 addr:$src2), FR32X:$src1,
2624                            timm:$cc)),
2625            (VCMPSSZrm FR32X:$src1, addr:$src2, (X86cmpm_imm_commute timm:$cc))>;
2626}
2627
2628// ----------------------------------------------------------------
2629// FPClass
2630
2631def X86Vfpclasss_su : PatFrag<(ops node:$src1, node:$src2),
2632                              (X86Vfpclasss node:$src1, node:$src2), [{
2633  return N->hasOneUse();
2634}]>;
2635
2636def X86Vfpclass_su : PatFrag<(ops node:$src1, node:$src2),
2637                             (X86Vfpclass node:$src1, node:$src2), [{
2638  return N->hasOneUse();
2639}]>;
2640
2641//handle fpclass instruction  mask =  op(reg_scalar,imm)
2642//                                    op(mem_scalar,imm)
2643multiclass avx512_scalar_fpclass<bits<8> opc, string OpcodeStr,
2644                                 X86FoldableSchedWrite sched, X86VectorVTInfo _,
2645                                 Predicate prd> {
2646  let Predicates = [prd], ExeDomain = _.ExeDomain, Uses = [MXCSR] in {
2647      def rr : AVX512<opc, MRMSrcReg, (outs _.KRC:$dst),
2648                      (ins _.RC:$src1, i32u8imm:$src2),
2649                      OpcodeStr##_.Suffix#"\t{$src2, $src1, $dst|$dst, $src1, $src2}",
2650                      [(set _.KRC:$dst,(X86Vfpclasss (_.VT _.RC:$src1),
2651                              (i32 timm:$src2)))]>,
2652                      Sched<[sched]>;
2653      def rrk : AVX512<opc, MRMSrcReg, (outs _.KRC:$dst),
2654                      (ins _.KRCWM:$mask, _.RC:$src1, i32u8imm:$src2),
2655                      OpcodeStr##_.Suffix#
2656                      "\t{$src2, $src1, $dst {${mask}}|$dst {${mask}}, $src1, $src2}",
2657                      [(set _.KRC:$dst,(and _.KRCWM:$mask,
2658                                      (X86Vfpclasss_su (_.VT _.RC:$src1),
2659                                      (i32 timm:$src2))))]>,
2660                      EVEX_K, Sched<[sched]>;
2661    def rm : AVX512<opc, MRMSrcMem, (outs _.KRC:$dst),
2662                    (ins _.IntScalarMemOp:$src1, i32u8imm:$src2),
2663                    OpcodeStr##_.Suffix##
2664                              "\t{$src2, $src1, $dst|$dst, $src1, $src2}",
2665                    [(set _.KRC:$dst,
2666                          (X86Vfpclasss _.ScalarIntMemCPat:$src1,
2667                                       (i32 timm:$src2)))]>,
2668                    Sched<[sched.Folded, sched.ReadAfterFold]>;
2669    def rmk : AVX512<opc, MRMSrcMem, (outs _.KRC:$dst),
2670                    (ins _.KRCWM:$mask, _.IntScalarMemOp:$src1, i32u8imm:$src2),
2671                    OpcodeStr##_.Suffix##
2672                    "\t{$src2, $src1, $dst {${mask}}|$dst {${mask}}, $src1, $src2}",
2673                    [(set _.KRC:$dst,(and _.KRCWM:$mask,
2674                        (X86Vfpclasss_su _.ScalarIntMemCPat:$src1,
2675                            (i32 timm:$src2))))]>,
2676                    EVEX_K, Sched<[sched.Folded, sched.ReadAfterFold]>;
2677  }
2678}
2679
2680//handle fpclass instruction mask = fpclass(reg_vec, reg_vec, imm)
2681//                                  fpclass(reg_vec, mem_vec, imm)
2682//                                  fpclass(reg_vec, broadcast(eltVt), imm)
2683multiclass avx512_vector_fpclass<bits<8> opc, string OpcodeStr,
2684                                 X86FoldableSchedWrite sched, X86VectorVTInfo _,
2685                                 string mem>{
2686  let ExeDomain = _.ExeDomain, Uses = [MXCSR] in {
2687  def rr : AVX512<opc, MRMSrcReg, (outs _.KRC:$dst),
2688                      (ins _.RC:$src1, i32u8imm:$src2),
2689                      OpcodeStr##_.Suffix#"\t{$src2, $src1, $dst|$dst, $src1, $src2}",
2690                      [(set _.KRC:$dst,(X86Vfpclass (_.VT _.RC:$src1),
2691                                       (i32 timm:$src2)))]>,
2692                      Sched<[sched]>;
2693  def rrk : AVX512<opc, MRMSrcReg, (outs _.KRC:$dst),
2694                      (ins _.KRCWM:$mask, _.RC:$src1, i32u8imm:$src2),
2695                      OpcodeStr##_.Suffix#
2696                      "\t{$src2, $src1, $dst {${mask}}|$dst {${mask}}, $src1, $src2}",
2697                      [(set _.KRC:$dst,(and _.KRCWM:$mask,
2698                                       (X86Vfpclass_su (_.VT _.RC:$src1),
2699                                       (i32 timm:$src2))))]>,
2700                      EVEX_K, Sched<[sched]>;
2701  def rm : AVX512<opc, MRMSrcMem, (outs _.KRC:$dst),
2702                    (ins _.MemOp:$src1, i32u8imm:$src2),
2703                    OpcodeStr##_.Suffix#"{"#mem#"}"#
2704                    "\t{$src2, $src1, $dst|$dst, $src1, $src2}",
2705                    [(set _.KRC:$dst,(X86Vfpclass
2706                                     (_.VT (_.LdFrag addr:$src1)),
2707                                     (i32 timm:$src2)))]>,
2708                    Sched<[sched.Folded, sched.ReadAfterFold]>;
2709  def rmk : AVX512<opc, MRMSrcMem, (outs _.KRC:$dst),
2710                    (ins _.KRCWM:$mask, _.MemOp:$src1, i32u8imm:$src2),
2711                    OpcodeStr##_.Suffix#"{"#mem#"}"#
2712                    "\t{$src2, $src1, $dst {${mask}}|$dst {${mask}}, $src1, $src2}",
2713                    [(set _.KRC:$dst, (and _.KRCWM:$mask, (X86Vfpclass_su
2714                                  (_.VT (_.LdFrag addr:$src1)),
2715                                  (i32 timm:$src2))))]>,
2716                    EVEX_K, Sched<[sched.Folded, sched.ReadAfterFold]>;
2717  def rmb : AVX512<opc, MRMSrcMem, (outs _.KRC:$dst),
2718                    (ins _.ScalarMemOp:$src1, i32u8imm:$src2),
2719                    OpcodeStr##_.Suffix##"\t{$src2, ${src1}"##
2720                                      _.BroadcastStr##", $dst|$dst, ${src1}"
2721                                                  ##_.BroadcastStr##", $src2}",
2722                    [(set _.KRC:$dst,(X86Vfpclass
2723                                     (_.VT (_.BroadcastLdFrag addr:$src1)),
2724                                     (i32 timm:$src2)))]>,
2725                    EVEX_B, Sched<[sched.Folded, sched.ReadAfterFold]>;
2726  def rmbk : AVX512<opc, MRMSrcMem, (outs _.KRC:$dst),
2727                    (ins _.KRCWM:$mask, _.ScalarMemOp:$src1, i32u8imm:$src2),
2728                    OpcodeStr##_.Suffix##"\t{$src2, ${src1}"##
2729                          _.BroadcastStr##", $dst {${mask}}|$dst {${mask}}, ${src1}"##
2730                                                   _.BroadcastStr##", $src2}",
2731                    [(set _.KRC:$dst,(and _.KRCWM:$mask, (X86Vfpclass_su
2732                                     (_.VT (_.BroadcastLdFrag addr:$src1)),
2733                                     (i32 timm:$src2))))]>,
2734                    EVEX_B, EVEX_K,  Sched<[sched.Folded, sched.ReadAfterFold]>;
2735  }
2736
2737  // Allow registers or broadcast with the x, y, z suffix we use to disambiguate
2738  // the memory form.
2739  def : InstAlias<OpcodeStr#_.Suffix#mem#
2740                  "\t{$src2, $src1, $dst|$dst, $src1, $src2}",
2741                  (!cast<Instruction>(NAME#"rr")
2742                   _.KRC:$dst, _.RC:$src1, i32u8imm:$src2), 0, "att">;
2743  def : InstAlias<OpcodeStr#_.Suffix#mem#
2744                  "\t{$src2, $src1, $dst {${mask}}|$dst {${mask}}, $src1, $src2}",
2745                  (!cast<Instruction>(NAME#"rrk")
2746                   _.KRC:$dst, _.KRCWM:$mask, _.RC:$src1, i32u8imm:$src2), 0, "att">;
2747  def : InstAlias<OpcodeStr#_.Suffix#mem#
2748                  "\t{$src2, ${src1}"#_.BroadcastStr#", $dst|$dst, ${src1}"#
2749                  _.BroadcastStr#", $src2}",
2750                  (!cast<Instruction>(NAME#"rmb")
2751                   _.KRC:$dst, _.ScalarMemOp:$src1, i32u8imm:$src2), 0, "att">;
2752  def : InstAlias<OpcodeStr#_.Suffix#mem#
2753                  "\t{$src2, ${src1}"#_.BroadcastStr#", $dst {${mask}}|"
2754                  "$dst {${mask}}, ${src1}"#_.BroadcastStr#", $src2}",
2755                  (!cast<Instruction>(NAME#"rmbk")
2756                   _.KRC:$dst, _.KRCWM:$mask, _.ScalarMemOp:$src1, i32u8imm:$src2), 0, "att">;
2757}
2758
2759multiclass avx512_vector_fpclass_all<string OpcodeStr, AVX512VLVectorVTInfo _,
2760                                     bits<8> opc, X86SchedWriteWidths sched,
2761                                     Predicate prd>{
2762  let Predicates = [prd] in {
2763    defm Z    : avx512_vector_fpclass<opc, OpcodeStr, sched.ZMM,
2764                                      _.info512, "z">, EVEX_V512;
2765  }
2766  let Predicates = [prd, HasVLX] in {
2767    defm Z128 : avx512_vector_fpclass<opc, OpcodeStr, sched.XMM,
2768                                      _.info128, "x">, EVEX_V128;
2769    defm Z256 : avx512_vector_fpclass<opc, OpcodeStr, sched.YMM,
2770                                      _.info256, "y">, EVEX_V256;
2771  }
2772}
2773
2774multiclass avx512_fp_fpclass_all<string OpcodeStr, bits<8> opcVec,
2775                                 bits<8> opcScalar, X86SchedWriteWidths sched,
2776                                 Predicate prd> {
2777  defm PS : avx512_vector_fpclass_all<OpcodeStr,  avx512vl_f32_info, opcVec,
2778                                      sched, prd>,
2779                                      EVEX_CD8<32, CD8VF>;
2780  defm PD : avx512_vector_fpclass_all<OpcodeStr,  avx512vl_f64_info, opcVec,
2781                                      sched, prd>,
2782                                      EVEX_CD8<64, CD8VF> , VEX_W;
2783  defm SSZ : avx512_scalar_fpclass<opcScalar, OpcodeStr,
2784                                   sched.Scl, f32x_info, prd>, VEX_LIG,
2785                                   EVEX_CD8<32, CD8VT1>;
2786  defm SDZ : avx512_scalar_fpclass<opcScalar, OpcodeStr,
2787                                   sched.Scl, f64x_info, prd>, VEX_LIG,
2788                                   EVEX_CD8<64, CD8VT1>, VEX_W;
2789}
2790
2791defm VFPCLASS : avx512_fp_fpclass_all<"vfpclass", 0x66, 0x67, SchedWriteFCmp,
2792                                      HasDQI>, AVX512AIi8Base, EVEX;
2793
2794//-----------------------------------------------------------------
2795// Mask register copy, including
2796// - copy between mask registers
2797// - load/store mask registers
2798// - copy from GPR to mask register and vice versa
2799//
2800multiclass avx512_mask_mov<bits<8> opc_kk, bits<8> opc_km, bits<8> opc_mk,
2801                         string OpcodeStr, RegisterClass KRC,
2802                         ValueType vvt, X86MemOperand x86memop> {
2803  let isMoveReg = 1, hasSideEffects = 0, SchedRW = [WriteMove] in
2804  def kk : I<opc_kk, MRMSrcReg, (outs KRC:$dst), (ins KRC:$src),
2805             !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), []>,
2806             Sched<[WriteMove]>;
2807  def km : I<opc_km, MRMSrcMem, (outs KRC:$dst), (ins x86memop:$src),
2808             !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
2809             [(set KRC:$dst, (vvt (load addr:$src)))]>,
2810             Sched<[WriteLoad]>;
2811  def mk : I<opc_mk, MRMDestMem, (outs), (ins x86memop:$dst, KRC:$src),
2812             !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
2813             [(store KRC:$src, addr:$dst)]>,
2814             Sched<[WriteStore]>;
2815}
2816
2817multiclass avx512_mask_mov_gpr<bits<8> opc_kr, bits<8> opc_rk,
2818                             string OpcodeStr,
2819                             RegisterClass KRC, RegisterClass GRC> {
2820  let hasSideEffects = 0 in {
2821    def kr : I<opc_kr, MRMSrcReg, (outs KRC:$dst), (ins GRC:$src),
2822               !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), []>,
2823               Sched<[WriteMove]>;
2824    def rk : I<opc_rk, MRMSrcReg, (outs GRC:$dst), (ins KRC:$src),
2825               !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), []>,
2826               Sched<[WriteMove]>;
2827  }
2828}
2829
2830let Predicates = [HasDQI] in
2831  defm KMOVB : avx512_mask_mov<0x90, 0x90, 0x91, "kmovb", VK8, v8i1, i8mem>,
2832               avx512_mask_mov_gpr<0x92, 0x93, "kmovb", VK8, GR32>,
2833               VEX, PD;
2834
2835let Predicates = [HasAVX512] in
2836  defm KMOVW : avx512_mask_mov<0x90, 0x90, 0x91, "kmovw", VK16, v16i1, i16mem>,
2837               avx512_mask_mov_gpr<0x92, 0x93, "kmovw", VK16, GR32>,
2838               VEX, PS;
2839
2840let Predicates = [HasBWI] in {
2841  defm KMOVD : avx512_mask_mov<0x90, 0x90, 0x91, "kmovd", VK32, v32i1,i32mem>,
2842               VEX, PD, VEX_W;
2843  defm KMOVD : avx512_mask_mov_gpr<0x92, 0x93, "kmovd", VK32, GR32>,
2844               VEX, XD;
2845  defm KMOVQ : avx512_mask_mov<0x90, 0x90, 0x91, "kmovq", VK64, v64i1, i64mem>,
2846               VEX, PS, VEX_W;
2847  defm KMOVQ : avx512_mask_mov_gpr<0x92, 0x93, "kmovq", VK64, GR64>,
2848               VEX, XD, VEX_W;
2849}
2850
2851// GR from/to mask register
2852def : Pat<(v16i1 (bitconvert (i16 GR16:$src))),
2853          (COPY_TO_REGCLASS (i32 (INSERT_SUBREG (IMPLICIT_DEF), GR16:$src, sub_16bit)), VK16)>;
2854def : Pat<(i16 (bitconvert (v16i1 VK16:$src))),
2855          (EXTRACT_SUBREG (i32 (COPY_TO_REGCLASS VK16:$src, GR32)), sub_16bit)>;
2856
2857def : Pat<(v8i1 (bitconvert (i8 GR8:$src))),
2858          (COPY_TO_REGCLASS (i32 (INSERT_SUBREG (IMPLICIT_DEF), GR8:$src, sub_8bit)), VK8)>;
2859def : Pat<(i8 (bitconvert (v8i1 VK8:$src))),
2860          (EXTRACT_SUBREG (i32 (COPY_TO_REGCLASS VK8:$src, GR32)), sub_8bit)>;
2861
2862def : Pat<(i32 (zext (i16 (bitconvert (v16i1 VK16:$src))))),
2863          (KMOVWrk VK16:$src)>;
2864def : Pat<(i64 (zext (i16 (bitconvert (v16i1 VK16:$src))))),
2865          (SUBREG_TO_REG (i64 0), (KMOVWrk VK16:$src), sub_32bit)>;
2866def : Pat<(i32 (anyext (i16 (bitconvert (v16i1 VK16:$src))))),
2867          (COPY_TO_REGCLASS VK16:$src, GR32)>;
2868def : Pat<(i64 (anyext (i16 (bitconvert (v16i1 VK16:$src))))),
2869          (INSERT_SUBREG (IMPLICIT_DEF), (COPY_TO_REGCLASS VK16:$src, GR32), sub_32bit)>;
2870
2871def : Pat<(i32 (zext (i8 (bitconvert (v8i1 VK8:$src))))),
2872          (KMOVBrk VK8:$src)>, Requires<[HasDQI]>;
2873def : Pat<(i64 (zext (i8 (bitconvert (v8i1 VK8:$src))))),
2874          (SUBREG_TO_REG (i64 0), (KMOVBrk VK8:$src), sub_32bit)>, Requires<[HasDQI]>;
2875def : Pat<(i32 (anyext (i8 (bitconvert (v8i1 VK8:$src))))),
2876          (COPY_TO_REGCLASS VK8:$src, GR32)>;
2877def : Pat<(i64 (anyext (i8 (bitconvert (v8i1 VK8:$src))))),
2878          (INSERT_SUBREG (IMPLICIT_DEF), (COPY_TO_REGCLASS VK8:$src, GR32), sub_32bit)>;
2879
2880def : Pat<(v32i1 (bitconvert (i32 GR32:$src))),
2881          (COPY_TO_REGCLASS GR32:$src, VK32)>;
2882def : Pat<(i32 (bitconvert (v32i1 VK32:$src))),
2883          (COPY_TO_REGCLASS VK32:$src, GR32)>;
2884def : Pat<(v64i1 (bitconvert (i64 GR64:$src))),
2885          (COPY_TO_REGCLASS GR64:$src, VK64)>;
2886def : Pat<(i64 (bitconvert (v64i1 VK64:$src))),
2887          (COPY_TO_REGCLASS VK64:$src, GR64)>;
2888
2889// Load/store kreg
2890let Predicates = [HasDQI] in {
2891  def : Pat<(store VK1:$src, addr:$dst),
2892            (KMOVBmk addr:$dst, (COPY_TO_REGCLASS VK1:$src, VK8))>;
2893
2894  def : Pat<(v1i1 (load addr:$src)),
2895            (COPY_TO_REGCLASS (KMOVBkm addr:$src), VK1)>;
2896  def : Pat<(v2i1 (load addr:$src)),
2897            (COPY_TO_REGCLASS (KMOVBkm addr:$src), VK2)>;
2898  def : Pat<(v4i1 (load addr:$src)),
2899            (COPY_TO_REGCLASS (KMOVBkm addr:$src), VK4)>;
2900}
2901
2902let Predicates = [HasAVX512] in {
2903  def : Pat<(v8i1 (bitconvert (i8 (load addr:$src)))),
2904            (COPY_TO_REGCLASS (MOVZX32rm8 addr:$src), VK8)>;
2905  def : Pat<(v16i1 (bitconvert (loadi16 addr:$src))),
2906            (KMOVWkm addr:$src)>;
2907}
2908
2909def X86kextract : SDNode<"ISD::EXTRACT_VECTOR_ELT",
2910                         SDTypeProfile<1, 2, [SDTCisVT<0, i8>,
2911                                              SDTCVecEltisVT<1, i1>,
2912                                              SDTCisPtrTy<2>]>>;
2913
2914let Predicates = [HasAVX512] in {
2915  multiclass operation_gpr_mask_copy_lowering<RegisterClass maskRC, ValueType maskVT> {
2916    def : Pat<(maskVT (scalar_to_vector GR32:$src)),
2917              (COPY_TO_REGCLASS GR32:$src, maskRC)>;
2918
2919    def : Pat<(maskVT (scalar_to_vector GR8:$src)),
2920              (COPY_TO_REGCLASS (INSERT_SUBREG (i32 (IMPLICIT_DEF)), GR8:$src, sub_8bit), maskRC)>;
2921
2922    def : Pat<(i8 (X86kextract maskRC:$src, (iPTR 0))),
2923              (EXTRACT_SUBREG (i32 (COPY_TO_REGCLASS maskRC:$src, GR32)), sub_8bit)>;
2924
2925    def : Pat<(i32 (anyext (i8 (X86kextract maskRC:$src, (iPTR 0))))),
2926              (i32 (COPY_TO_REGCLASS maskRC:$src, GR32))>;
2927  }
2928
2929  defm : operation_gpr_mask_copy_lowering<VK1,  v1i1>;
2930  defm : operation_gpr_mask_copy_lowering<VK2,  v2i1>;
2931  defm : operation_gpr_mask_copy_lowering<VK4,  v4i1>;
2932  defm : operation_gpr_mask_copy_lowering<VK8,  v8i1>;
2933  defm : operation_gpr_mask_copy_lowering<VK16,  v16i1>;
2934  defm : operation_gpr_mask_copy_lowering<VK32,  v32i1>;
2935  defm : operation_gpr_mask_copy_lowering<VK64,  v64i1>;
2936
2937  def : Pat<(insert_subvector (v16i1 immAllZerosV),
2938                              (v1i1 (scalar_to_vector GR8:$src)), (iPTR 0)),
2939            (COPY_TO_REGCLASS
2940             (KMOVWkr (AND32ri8
2941                       (INSERT_SUBREG (i32 (IMPLICIT_DEF)), GR8:$src, sub_8bit),
2942                       (i32 1))), VK16)>;
2943}
2944
2945// Mask unary operation
2946// - KNOT
2947multiclass avx512_mask_unop<bits<8> opc, string OpcodeStr,
2948                            RegisterClass KRC, SDPatternOperator OpNode,
2949                            X86FoldableSchedWrite sched, Predicate prd> {
2950  let Predicates = [prd] in
2951    def rr : I<opc, MRMSrcReg, (outs KRC:$dst), (ins KRC:$src),
2952               !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
2953               [(set KRC:$dst, (OpNode KRC:$src))]>,
2954               Sched<[sched]>;
2955}
2956
2957multiclass avx512_mask_unop_all<bits<8> opc, string OpcodeStr,
2958                                SDPatternOperator OpNode,
2959                                X86FoldableSchedWrite sched> {
2960  defm B : avx512_mask_unop<opc, !strconcat(OpcodeStr, "b"), VK8, OpNode,
2961                            sched, HasDQI>, VEX, PD;
2962  defm W : avx512_mask_unop<opc, !strconcat(OpcodeStr, "w"), VK16, OpNode,
2963                            sched, HasAVX512>, VEX, PS;
2964  defm D : avx512_mask_unop<opc, !strconcat(OpcodeStr, "d"), VK32, OpNode,
2965                            sched, HasBWI>, VEX, PD, VEX_W;
2966  defm Q : avx512_mask_unop<opc, !strconcat(OpcodeStr, "q"), VK64, OpNode,
2967                            sched, HasBWI>, VEX, PS, VEX_W;
2968}
2969
2970// TODO - do we need a X86SchedWriteWidths::KMASK type?
2971defm KNOT : avx512_mask_unop_all<0x44, "knot", vnot, SchedWriteVecLogic.XMM>;
2972
2973// KNL does not support KMOVB, 8-bit mask is promoted to 16-bit
2974let Predicates = [HasAVX512, NoDQI] in
2975def : Pat<(vnot VK8:$src),
2976          (COPY_TO_REGCLASS (KNOTWrr (COPY_TO_REGCLASS VK8:$src, VK16)), VK8)>;
2977
2978def : Pat<(vnot VK4:$src),
2979          (COPY_TO_REGCLASS (KNOTWrr (COPY_TO_REGCLASS VK4:$src, VK16)), VK4)>;
2980def : Pat<(vnot VK2:$src),
2981          (COPY_TO_REGCLASS (KNOTWrr (COPY_TO_REGCLASS VK2:$src, VK16)), VK2)>;
2982
2983// Mask binary operation
2984// - KAND, KANDN, KOR, KXNOR, KXOR
2985multiclass avx512_mask_binop<bits<8> opc, string OpcodeStr,
2986                           RegisterClass KRC, SDPatternOperator OpNode,
2987                           X86FoldableSchedWrite sched, Predicate prd,
2988                           bit IsCommutable> {
2989  let Predicates = [prd], isCommutable = IsCommutable in
2990    def rr : I<opc, MRMSrcReg, (outs KRC:$dst), (ins KRC:$src1, KRC:$src2),
2991               !strconcat(OpcodeStr,
2992                          "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
2993               [(set KRC:$dst, (OpNode KRC:$src1, KRC:$src2))]>,
2994               Sched<[sched]>;
2995}
2996
2997multiclass avx512_mask_binop_all<bits<8> opc, string OpcodeStr,
2998                                 SDPatternOperator OpNode,
2999                                 X86FoldableSchedWrite sched, bit IsCommutable,
3000                                 Predicate prdW = HasAVX512> {
3001  defm B : avx512_mask_binop<opc, !strconcat(OpcodeStr, "b"), VK8, OpNode,
3002                             sched, HasDQI, IsCommutable>, VEX_4V, VEX_L, PD;
3003  defm W : avx512_mask_binop<opc, !strconcat(OpcodeStr, "w"), VK16, OpNode,
3004                             sched, prdW, IsCommutable>, VEX_4V, VEX_L, PS;
3005  defm D : avx512_mask_binop<opc, !strconcat(OpcodeStr, "d"), VK32, OpNode,
3006                             sched, HasBWI, IsCommutable>, VEX_4V, VEX_L, VEX_W, PD;
3007  defm Q : avx512_mask_binop<opc, !strconcat(OpcodeStr, "q"), VK64, OpNode,
3008                             sched, HasBWI, IsCommutable>, VEX_4V, VEX_L, VEX_W, PS;
3009}
3010
3011def andn : PatFrag<(ops node:$i0, node:$i1), (and (not node:$i0), node:$i1)>;
3012def xnor : PatFrag<(ops node:$i0, node:$i1), (not (xor node:$i0, node:$i1))>;
3013// These nodes use 'vnot' instead of 'not' to support vectors.
3014def vandn : PatFrag<(ops node:$i0, node:$i1), (and (vnot node:$i0), node:$i1)>;
3015def vxnor : PatFrag<(ops node:$i0, node:$i1), (vnot (xor node:$i0, node:$i1))>;
3016
3017// TODO - do we need a X86SchedWriteWidths::KMASK type?
3018defm KAND  : avx512_mask_binop_all<0x41, "kand",  and,     SchedWriteVecLogic.XMM, 1>;
3019defm KOR   : avx512_mask_binop_all<0x45, "kor",   or,      SchedWriteVecLogic.XMM, 1>;
3020defm KXNOR : avx512_mask_binop_all<0x46, "kxnor", vxnor,   SchedWriteVecLogic.XMM, 1>;
3021defm KXOR  : avx512_mask_binop_all<0x47, "kxor",  xor,     SchedWriteVecLogic.XMM, 1>;
3022defm KANDN : avx512_mask_binop_all<0x42, "kandn", vandn,   SchedWriteVecLogic.XMM, 0>;
3023defm KADD  : avx512_mask_binop_all<0x4A, "kadd",  X86kadd, SchedWriteVecLogic.XMM, 1, HasDQI>;
3024
3025multiclass avx512_binop_pat<SDPatternOperator VOpNode, SDPatternOperator OpNode,
3026                            Instruction Inst> {
3027  // With AVX512F, 8-bit mask is promoted to 16-bit mask,
3028  // for the DQI set, this type is legal and KxxxB instruction is used
3029  let Predicates = [NoDQI] in
3030  def : Pat<(VOpNode VK8:$src1, VK8:$src2),
3031            (COPY_TO_REGCLASS
3032              (Inst (COPY_TO_REGCLASS VK8:$src1, VK16),
3033                    (COPY_TO_REGCLASS VK8:$src2, VK16)), VK8)>;
3034
3035  // All types smaller than 8 bits require conversion anyway
3036  def : Pat<(OpNode VK1:$src1, VK1:$src2),
3037        (COPY_TO_REGCLASS (Inst
3038                           (COPY_TO_REGCLASS VK1:$src1, VK16),
3039                           (COPY_TO_REGCLASS VK1:$src2, VK16)), VK1)>;
3040  def : Pat<(VOpNode VK2:$src1, VK2:$src2),
3041        (COPY_TO_REGCLASS (Inst
3042                           (COPY_TO_REGCLASS VK2:$src1, VK16),
3043                           (COPY_TO_REGCLASS VK2:$src2, VK16)), VK1)>;
3044  def : Pat<(VOpNode VK4:$src1, VK4:$src2),
3045        (COPY_TO_REGCLASS (Inst
3046                           (COPY_TO_REGCLASS VK4:$src1, VK16),
3047                           (COPY_TO_REGCLASS VK4:$src2, VK16)), VK1)>;
3048}
3049
3050defm : avx512_binop_pat<and,   and,  KANDWrr>;
3051defm : avx512_binop_pat<vandn, andn, KANDNWrr>;
3052defm : avx512_binop_pat<or,    or,   KORWrr>;
3053defm : avx512_binop_pat<vxnor, xnor, KXNORWrr>;
3054defm : avx512_binop_pat<xor,   xor,  KXORWrr>;
3055
3056// Mask unpacking
3057multiclass avx512_mask_unpck<string Suffix, X86KVectorVTInfo Dst,
3058                             X86KVectorVTInfo Src, X86FoldableSchedWrite sched,
3059                             Predicate prd> {
3060  let Predicates = [prd] in {
3061    let hasSideEffects = 0 in
3062    def rr : I<0x4b, MRMSrcReg, (outs Dst.KRC:$dst),
3063               (ins Src.KRC:$src1, Src.KRC:$src2),
3064               "kunpck"#Suffix#"\t{$src2, $src1, $dst|$dst, $src1, $src2}", []>,
3065               VEX_4V, VEX_L, Sched<[sched]>;
3066
3067    def : Pat<(Dst.KVT (concat_vectors Src.KRC:$src1, Src.KRC:$src2)),
3068              (!cast<Instruction>(NAME##rr) Src.KRC:$src2, Src.KRC:$src1)>;
3069  }
3070}
3071
3072defm KUNPCKBW : avx512_mask_unpck<"bw", v16i1_info, v8i1_info,  WriteShuffle, HasAVX512>, PD;
3073defm KUNPCKWD : avx512_mask_unpck<"wd", v32i1_info, v16i1_info, WriteShuffle, HasBWI>, PS;
3074defm KUNPCKDQ : avx512_mask_unpck<"dq", v64i1_info, v32i1_info, WriteShuffle, HasBWI>, PS, VEX_W;
3075
3076// Mask bit testing
3077multiclass avx512_mask_testop<bits<8> opc, string OpcodeStr, RegisterClass KRC,
3078                              SDNode OpNode, X86FoldableSchedWrite sched,
3079                              Predicate prd> {
3080  let Predicates = [prd], Defs = [EFLAGS] in
3081    def rr : I<opc, MRMSrcReg, (outs), (ins KRC:$src1, KRC:$src2),
3082               !strconcat(OpcodeStr, "\t{$src2, $src1|$src1, $src2}"),
3083               [(set EFLAGS, (OpNode KRC:$src1, KRC:$src2))]>,
3084               Sched<[sched]>;
3085}
3086
3087multiclass avx512_mask_testop_w<bits<8> opc, string OpcodeStr, SDNode OpNode,
3088                                X86FoldableSchedWrite sched,
3089                                Predicate prdW = HasAVX512> {
3090  defm B : avx512_mask_testop<opc, OpcodeStr#"b", VK8, OpNode, sched, HasDQI>,
3091                                                                VEX, PD;
3092  defm W : avx512_mask_testop<opc, OpcodeStr#"w", VK16, OpNode, sched, prdW>,
3093                                                                VEX, PS;
3094  defm Q : avx512_mask_testop<opc, OpcodeStr#"q", VK64, OpNode, sched, HasBWI>,
3095                                                                VEX, PS, VEX_W;
3096  defm D : avx512_mask_testop<opc, OpcodeStr#"d", VK32, OpNode, sched, HasBWI>,
3097                                                                VEX, PD, VEX_W;
3098}
3099
3100// TODO - do we need a X86SchedWriteWidths::KMASK type?
3101defm KORTEST : avx512_mask_testop_w<0x98, "kortest", X86kortest, SchedWriteVecLogic.XMM>;
3102defm KTEST   : avx512_mask_testop_w<0x99, "ktest", X86ktest, SchedWriteVecLogic.XMM, HasDQI>;
3103
3104// Mask shift
3105multiclass avx512_mask_shiftop<bits<8> opc, string OpcodeStr, RegisterClass KRC,
3106                               SDNode OpNode, X86FoldableSchedWrite sched> {
3107  let Predicates = [HasAVX512] in
3108    def ri : Ii8<opc, MRMSrcReg, (outs KRC:$dst), (ins KRC:$src, u8imm:$imm),
3109                 !strconcat(OpcodeStr,
3110                            "\t{$imm, $src, $dst|$dst, $src, $imm}"),
3111                            [(set KRC:$dst, (OpNode KRC:$src, (i8 timm:$imm)))]>,
3112                 Sched<[sched]>;
3113}
3114
3115multiclass avx512_mask_shiftop_w<bits<8> opc1, bits<8> opc2, string OpcodeStr,
3116                                 SDNode OpNode, X86FoldableSchedWrite sched> {
3117  defm W : avx512_mask_shiftop<opc1, !strconcat(OpcodeStr, "w"), VK16, OpNode,
3118                               sched>, VEX, TAPD, VEX_W;
3119  let Predicates = [HasDQI] in
3120  defm B : avx512_mask_shiftop<opc1, !strconcat(OpcodeStr, "b"), VK8, OpNode,
3121                               sched>, VEX, TAPD;
3122  let Predicates = [HasBWI] in {
3123  defm Q : avx512_mask_shiftop<opc2, !strconcat(OpcodeStr, "q"), VK64, OpNode,
3124                               sched>, VEX, TAPD, VEX_W;
3125  defm D : avx512_mask_shiftop<opc2, !strconcat(OpcodeStr, "d"), VK32, OpNode,
3126                               sched>, VEX, TAPD;
3127  }
3128}
3129
3130defm KSHIFTL : avx512_mask_shiftop_w<0x32, 0x33, "kshiftl", X86kshiftl, WriteShuffle>;
3131defm KSHIFTR : avx512_mask_shiftop_w<0x30, 0x31, "kshiftr", X86kshiftr, WriteShuffle>;
3132
3133// Patterns for comparing 128/256-bit integer vectors using 512-bit instruction.
3134multiclass axv512_icmp_packed_cc_no_vlx_lowering<PatFrag Frag, PatFrag Frag_su,
3135                                                 string InstStr,
3136                                                 X86VectorVTInfo Narrow,
3137                                                 X86VectorVTInfo Wide> {
3138def : Pat<(Narrow.KVT (Frag:$cc (Narrow.VT Narrow.RC:$src1),
3139                                (Narrow.VT Narrow.RC:$src2), cond)),
3140          (COPY_TO_REGCLASS
3141           (!cast<Instruction>(InstStr#"Zrri")
3142            (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src1, Narrow.SubRegIdx)),
3143            (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src2, Narrow.SubRegIdx)),
3144            (Frag.OperandTransform $cc)), Narrow.KRC)>;
3145
3146def : Pat<(Narrow.KVT (and Narrow.KRC:$mask,
3147                           (Narrow.KVT (Frag_su:$cc (Narrow.VT Narrow.RC:$src1),
3148                                                    (Narrow.VT Narrow.RC:$src2),
3149                                                    cond)))),
3150          (COPY_TO_REGCLASS (!cast<Instruction>(InstStr#"Zrrik")
3151           (COPY_TO_REGCLASS Narrow.KRC:$mask, Wide.KRC),
3152           (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src1, Narrow.SubRegIdx)),
3153           (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src2, Narrow.SubRegIdx)),
3154           (Frag_su.OperandTransform $cc)), Narrow.KRC)>;
3155}
3156
3157multiclass axv512_icmp_packed_cc_rmb_no_vlx_lowering<PatFrag Frag, PatFrag Frag_su,
3158                                                     PatFrag CommFrag, PatFrag CommFrag_su,
3159                                                     string InstStr,
3160                                                     X86VectorVTInfo Narrow,
3161                                                     X86VectorVTInfo Wide> {
3162// Broadcast load.
3163def : Pat<(Narrow.KVT (Frag:$cc (Narrow.VT Narrow.RC:$src1),
3164                                (Narrow.BroadcastLdFrag addr:$src2), cond)),
3165          (COPY_TO_REGCLASS
3166           (!cast<Instruction>(InstStr#"Zrmib")
3167            (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src1, Narrow.SubRegIdx)),
3168            addr:$src2, (Frag.OperandTransform $cc)), Narrow.KRC)>;
3169
3170def : Pat<(Narrow.KVT (and Narrow.KRC:$mask,
3171                           (Narrow.KVT
3172                            (Frag_su:$cc (Narrow.VT Narrow.RC:$src1),
3173                                         (Narrow.BroadcastLdFrag addr:$src2),
3174                                         cond)))),
3175          (COPY_TO_REGCLASS (!cast<Instruction>(InstStr#"Zrmibk")
3176           (COPY_TO_REGCLASS Narrow.KRC:$mask, Wide.KRC),
3177           (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src1, Narrow.SubRegIdx)),
3178           addr:$src2, (Frag_su.OperandTransform $cc)), Narrow.KRC)>;
3179
3180// Commuted with broadcast load.
3181def : Pat<(Narrow.KVT (CommFrag:$cc (Narrow.BroadcastLdFrag addr:$src2),
3182                                    (Narrow.VT Narrow.RC:$src1),
3183                                    cond)),
3184          (COPY_TO_REGCLASS
3185           (!cast<Instruction>(InstStr#"Zrmib")
3186            (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src1, Narrow.SubRegIdx)),
3187            addr:$src2, (CommFrag.OperandTransform $cc)), Narrow.KRC)>;
3188
3189def : Pat<(Narrow.KVT (and Narrow.KRC:$mask,
3190                           (Narrow.KVT
3191                            (CommFrag_su:$cc (Narrow.BroadcastLdFrag addr:$src2),
3192                                             (Narrow.VT Narrow.RC:$src1),
3193                                             cond)))),
3194          (COPY_TO_REGCLASS (!cast<Instruction>(InstStr#"Zrmibk")
3195           (COPY_TO_REGCLASS Narrow.KRC:$mask, Wide.KRC),
3196           (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src1, Narrow.SubRegIdx)),
3197           addr:$src2, (CommFrag_su.OperandTransform $cc)), Narrow.KRC)>;
3198}
3199
3200// Same as above, but for fp types which don't use PatFrags.
3201multiclass axv512_cmp_packed_cc_no_vlx_lowering<string InstStr,
3202                                                X86VectorVTInfo Narrow,
3203                                                X86VectorVTInfo Wide> {
3204def : Pat<(Narrow.KVT (X86any_cmpm (Narrow.VT Narrow.RC:$src1),
3205                                   (Narrow.VT Narrow.RC:$src2), timm:$cc)),
3206          (COPY_TO_REGCLASS
3207           (!cast<Instruction>(InstStr#"Zrri")
3208            (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src1, Narrow.SubRegIdx)),
3209            (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src2, Narrow.SubRegIdx)),
3210            timm:$cc), Narrow.KRC)>;
3211
3212def : Pat<(Narrow.KVT (and Narrow.KRC:$mask,
3213                           (X86cmpm_su (Narrow.VT Narrow.RC:$src1),
3214                                       (Narrow.VT Narrow.RC:$src2), timm:$cc))),
3215          (COPY_TO_REGCLASS (!cast<Instruction>(InstStr#"Zrrik")
3216           (COPY_TO_REGCLASS Narrow.KRC:$mask, Wide.KRC),
3217           (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src1, Narrow.SubRegIdx)),
3218           (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src2, Narrow.SubRegIdx)),
3219           timm:$cc), Narrow.KRC)>;
3220
3221// Broadcast load.
3222def : Pat<(Narrow.KVT (X86any_cmpm (Narrow.VT Narrow.RC:$src1),
3223                                   (Narrow.VT (Narrow.BroadcastLdFrag addr:$src2)), timm:$cc)),
3224          (COPY_TO_REGCLASS
3225           (!cast<Instruction>(InstStr#"Zrmbi")
3226            (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src1, Narrow.SubRegIdx)),
3227            addr:$src2, timm:$cc), Narrow.KRC)>;
3228
3229def : Pat<(Narrow.KVT (and Narrow.KRC:$mask,
3230                           (X86cmpm_su (Narrow.VT Narrow.RC:$src1),
3231                                       (Narrow.VT (Narrow.BroadcastLdFrag addr:$src2)), timm:$cc))),
3232          (COPY_TO_REGCLASS (!cast<Instruction>(InstStr#"Zrmbik")
3233           (COPY_TO_REGCLASS Narrow.KRC:$mask, Wide.KRC),
3234           (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src1, Narrow.SubRegIdx)),
3235           addr:$src2, timm:$cc), Narrow.KRC)>;
3236
3237// Commuted with broadcast load.
3238def : Pat<(Narrow.KVT (X86any_cmpm (Narrow.VT (Narrow.BroadcastLdFrag addr:$src2)),
3239                                   (Narrow.VT Narrow.RC:$src1), timm:$cc)),
3240          (COPY_TO_REGCLASS
3241           (!cast<Instruction>(InstStr#"Zrmbi")
3242            (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src1, Narrow.SubRegIdx)),
3243            addr:$src2, (X86cmpm_imm_commute timm:$cc)), Narrow.KRC)>;
3244
3245def : Pat<(Narrow.KVT (and Narrow.KRC:$mask,
3246                           (X86cmpm_su (Narrow.VT (Narrow.BroadcastLdFrag addr:$src2)),
3247                                       (Narrow.VT Narrow.RC:$src1), timm:$cc))),
3248          (COPY_TO_REGCLASS (!cast<Instruction>(InstStr#"Zrmbik")
3249           (COPY_TO_REGCLASS Narrow.KRC:$mask, Wide.KRC),
3250           (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src1, Narrow.SubRegIdx)),
3251           addr:$src2, (X86cmpm_imm_commute timm:$cc)), Narrow.KRC)>;
3252}
3253
3254let Predicates = [HasAVX512, NoVLX] in {
3255  defm : axv512_icmp_packed_cc_no_vlx_lowering<X86pcmpm, X86pcmpm_su, "VPCMPD", v8i32x_info, v16i32_info>;
3256  defm : axv512_icmp_packed_cc_no_vlx_lowering<X86pcmpum, X86pcmpum_su, "VPCMPUD", v8i32x_info, v16i32_info>;
3257
3258  defm : axv512_icmp_packed_cc_no_vlx_lowering<X86pcmpm, X86pcmpm_su, "VPCMPD", v4i32x_info, v16i32_info>;
3259  defm : axv512_icmp_packed_cc_no_vlx_lowering<X86pcmpum, X86pcmpum_su, "VPCMPUD", v4i32x_info, v16i32_info>;
3260
3261  defm : axv512_icmp_packed_cc_no_vlx_lowering<X86pcmpm, X86pcmpm_su, "VPCMPQ", v4i64x_info, v8i64_info>;
3262  defm : axv512_icmp_packed_cc_no_vlx_lowering<X86pcmpum, X86pcmpum_su, "VPCMPUQ", v4i64x_info, v8i64_info>;
3263
3264  defm : axv512_icmp_packed_cc_no_vlx_lowering<X86pcmpm, X86pcmpm_su, "VPCMPQ", v2i64x_info, v8i64_info>;
3265  defm : axv512_icmp_packed_cc_no_vlx_lowering<X86pcmpum, X86pcmpum_su, "VPCMPUQ", v2i64x_info, v8i64_info>;
3266
3267  defm : axv512_icmp_packed_cc_rmb_no_vlx_lowering<X86pcmpm, X86pcmpm_su, X86pcmpm_commute, X86pcmpm_commute_su, "VPCMPD", v8i32x_info, v16i32_info>;
3268  defm : axv512_icmp_packed_cc_rmb_no_vlx_lowering<X86pcmpum, X86pcmpum_su, X86pcmpum_commute, X86pcmpum_commute_su, "VPCMPUD", v8i32x_info, v16i32_info>;
3269
3270  defm : axv512_icmp_packed_cc_rmb_no_vlx_lowering<X86pcmpm, X86pcmpm_su, X86pcmpm_commute, X86pcmpm_commute_su, "VPCMPD", v4i32x_info, v16i32_info>;
3271  defm : axv512_icmp_packed_cc_rmb_no_vlx_lowering<X86pcmpum, X86pcmpum_su, X86pcmpum_commute, X86pcmpum_commute_su, "VPCMPUD", v4i32x_info, v16i32_info>;
3272
3273  defm : axv512_icmp_packed_cc_rmb_no_vlx_lowering<X86pcmpm, X86pcmpm_su, X86pcmpm_commute, X86pcmpm_commute_su, "VPCMPQ", v4i64x_info, v8i64_info>;
3274  defm : axv512_icmp_packed_cc_rmb_no_vlx_lowering<X86pcmpum, X86pcmpum_su, X86pcmpum_commute, X86pcmpum_commute_su, "VPCMPUQ", v4i64x_info, v8i64_info>;
3275
3276  defm : axv512_icmp_packed_cc_rmb_no_vlx_lowering<X86pcmpm, X86pcmpm_su, X86pcmpm_commute, X86pcmpm_commute_su, "VPCMPQ", v2i64x_info, v8i64_info>;
3277  defm : axv512_icmp_packed_cc_rmb_no_vlx_lowering<X86pcmpum, X86pcmpum_su, X86pcmpum_commute, X86pcmpum_commute_su, "VPCMPUQ", v2i64x_info, v8i64_info>;
3278
3279  defm : axv512_cmp_packed_cc_no_vlx_lowering<"VCMPPS", v8f32x_info, v16f32_info>;
3280  defm : axv512_cmp_packed_cc_no_vlx_lowering<"VCMPPS", v4f32x_info, v16f32_info>;
3281  defm : axv512_cmp_packed_cc_no_vlx_lowering<"VCMPPD", v4f64x_info, v8f64_info>;
3282  defm : axv512_cmp_packed_cc_no_vlx_lowering<"VCMPPD", v2f64x_info, v8f64_info>;
3283}
3284
3285let Predicates = [HasBWI, NoVLX] in {
3286  defm : axv512_icmp_packed_cc_no_vlx_lowering<X86pcmpm, X86pcmpm_su, "VPCMPB", v32i8x_info, v64i8_info>;
3287  defm : axv512_icmp_packed_cc_no_vlx_lowering<X86pcmpum, X86pcmpum_su, "VPCMPUB", v32i8x_info, v64i8_info>;
3288
3289  defm : axv512_icmp_packed_cc_no_vlx_lowering<X86pcmpm, X86pcmpm_su, "VPCMPB", v16i8x_info, v64i8_info>;
3290  defm : axv512_icmp_packed_cc_no_vlx_lowering<X86pcmpum, X86pcmpum_su, "VPCMPUB", v16i8x_info, v64i8_info>;
3291
3292  defm : axv512_icmp_packed_cc_no_vlx_lowering<X86pcmpm, X86pcmpm_su, "VPCMPW", v16i16x_info, v32i16_info>;
3293  defm : axv512_icmp_packed_cc_no_vlx_lowering<X86pcmpum, X86pcmpum_su, "VPCMPUW", v16i16x_info, v32i16_info>;
3294
3295  defm : axv512_icmp_packed_cc_no_vlx_lowering<X86pcmpm, X86pcmpm_su, "VPCMPW", v8i16x_info, v32i16_info>;
3296  defm : axv512_icmp_packed_cc_no_vlx_lowering<X86pcmpum, X86pcmpum_su, "VPCMPUW", v8i16x_info, v32i16_info>;
3297}
3298
3299// Mask setting all 0s or 1s
3300multiclass avx512_mask_setop<RegisterClass KRC, ValueType VT, PatFrag Val> {
3301  let Predicates = [HasAVX512] in
3302    let isReMaterializable = 1, isAsCheapAsAMove = 1, isPseudo = 1,
3303        SchedRW = [WriteZero] in
3304      def #NAME# : I<0, Pseudo, (outs KRC:$dst), (ins), "",
3305                     [(set KRC:$dst, (VT Val))]>;
3306}
3307
3308multiclass avx512_mask_setop_w<PatFrag Val> {
3309  defm W : avx512_mask_setop<VK16, v16i1, Val>;
3310  defm D : avx512_mask_setop<VK32,  v32i1, Val>;
3311  defm Q : avx512_mask_setop<VK64, v64i1, Val>;
3312}
3313
3314defm KSET0 : avx512_mask_setop_w<immAllZerosV>;
3315defm KSET1 : avx512_mask_setop_w<immAllOnesV>;
3316
3317// With AVX-512 only, 8-bit mask is promoted to 16-bit mask.
3318let Predicates = [HasAVX512] in {
3319  def : Pat<(v8i1 immAllZerosV), (COPY_TO_REGCLASS (KSET0W), VK8)>;
3320  def : Pat<(v4i1 immAllZerosV), (COPY_TO_REGCLASS (KSET0W), VK4)>;
3321  def : Pat<(v2i1 immAllZerosV), (COPY_TO_REGCLASS (KSET0W), VK2)>;
3322  def : Pat<(v1i1 immAllZerosV), (COPY_TO_REGCLASS (KSET0W), VK1)>;
3323  def : Pat<(v8i1 immAllOnesV),  (COPY_TO_REGCLASS (KSET1W), VK8)>;
3324  def : Pat<(v4i1 immAllOnesV),  (COPY_TO_REGCLASS (KSET1W), VK4)>;
3325  def : Pat<(v2i1 immAllOnesV),  (COPY_TO_REGCLASS (KSET1W), VK2)>;
3326  def : Pat<(v1i1 immAllOnesV),  (COPY_TO_REGCLASS (KSET1W), VK1)>;
3327}
3328
3329// Patterns for kmask insert_subvector/extract_subvector to/from index=0
3330multiclass operation_subvector_mask_lowering<RegisterClass subRC, ValueType subVT,
3331                                             RegisterClass RC, ValueType VT> {
3332  def : Pat<(subVT (extract_subvector (VT RC:$src), (iPTR 0))),
3333            (subVT (COPY_TO_REGCLASS RC:$src, subRC))>;
3334
3335  def : Pat<(VT (insert_subvector undef, subRC:$src, (iPTR 0))),
3336            (VT (COPY_TO_REGCLASS subRC:$src, RC))>;
3337}
3338defm : operation_subvector_mask_lowering<VK1,  v1i1,  VK2,  v2i1>;
3339defm : operation_subvector_mask_lowering<VK1,  v1i1,  VK4,  v4i1>;
3340defm : operation_subvector_mask_lowering<VK1,  v1i1,  VK8,  v8i1>;
3341defm : operation_subvector_mask_lowering<VK1,  v1i1,  VK16, v16i1>;
3342defm : operation_subvector_mask_lowering<VK1,  v1i1,  VK32, v32i1>;
3343defm : operation_subvector_mask_lowering<VK1,  v1i1,  VK64, v64i1>;
3344
3345defm : operation_subvector_mask_lowering<VK2,  v2i1,  VK4,  v4i1>;
3346defm : operation_subvector_mask_lowering<VK2,  v2i1,  VK8,  v8i1>;
3347defm : operation_subvector_mask_lowering<VK2,  v2i1,  VK16, v16i1>;
3348defm : operation_subvector_mask_lowering<VK2,  v2i1,  VK32, v32i1>;
3349defm : operation_subvector_mask_lowering<VK2,  v2i1,  VK64, v64i1>;
3350
3351defm : operation_subvector_mask_lowering<VK4,  v4i1,  VK8,  v8i1>;
3352defm : operation_subvector_mask_lowering<VK4,  v4i1,  VK16, v16i1>;
3353defm : operation_subvector_mask_lowering<VK4,  v4i1,  VK32, v32i1>;
3354defm : operation_subvector_mask_lowering<VK4,  v4i1,  VK64, v64i1>;
3355
3356defm : operation_subvector_mask_lowering<VK8,  v8i1,  VK16, v16i1>;
3357defm : operation_subvector_mask_lowering<VK8,  v8i1,  VK32, v32i1>;
3358defm : operation_subvector_mask_lowering<VK8,  v8i1,  VK64, v64i1>;
3359
3360defm : operation_subvector_mask_lowering<VK16, v16i1, VK32, v32i1>;
3361defm : operation_subvector_mask_lowering<VK16, v16i1, VK64, v64i1>;
3362
3363defm : operation_subvector_mask_lowering<VK32, v32i1, VK64, v64i1>;
3364
3365//===----------------------------------------------------------------------===//
3366// AVX-512 - Aligned and unaligned load and store
3367//
3368
3369multiclass avx512_load<bits<8> opc, string OpcodeStr, string Name,
3370                       X86VectorVTInfo _, PatFrag ld_frag, PatFrag mload,
3371                       X86SchedWriteMoveLS Sched, string EVEX2VEXOvrd,
3372                       bit NoRMPattern = 0,
3373                       SDPatternOperator SelectOprr = vselect> {
3374  let hasSideEffects = 0 in {
3375  let isMoveReg = 1 in
3376  def rr : AVX512PI<opc, MRMSrcReg, (outs _.RC:$dst), (ins _.RC:$src),
3377                    !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), [],
3378                    _.ExeDomain>, EVEX, Sched<[Sched.RR]>,
3379                    EVEX2VEXOverride<EVEX2VEXOvrd#"rr">;
3380  def rrkz : AVX512PI<opc, MRMSrcReg, (outs _.RC:$dst),
3381                      (ins _.KRCWM:$mask,  _.RC:$src),
3382                      !strconcat(OpcodeStr, "\t{$src, ${dst} {${mask}} {z}|",
3383                       "${dst} {${mask}} {z}, $src}"),
3384                       [(set _.RC:$dst, (_.VT (SelectOprr _.KRCWM:$mask,
3385                                           (_.VT _.RC:$src),
3386                                           _.ImmAllZerosV)))], _.ExeDomain>,
3387                       EVEX, EVEX_KZ, Sched<[Sched.RR]>;
3388
3389  let mayLoad = 1, canFoldAsLoad = 1, isReMaterializable = 1 in
3390  def rm : AVX512PI<opc, MRMSrcMem, (outs _.RC:$dst), (ins _.MemOp:$src),
3391                    !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
3392                    !if(NoRMPattern, [],
3393                        [(set _.RC:$dst,
3394                          (_.VT (ld_frag addr:$src)))]),
3395                    _.ExeDomain>, EVEX, Sched<[Sched.RM]>,
3396                    EVEX2VEXOverride<EVEX2VEXOvrd#"rm">;
3397
3398  let Constraints = "$src0 = $dst", isConvertibleToThreeAddress = 1 in {
3399    def rrk : AVX512PI<opc, MRMSrcReg, (outs _.RC:$dst),
3400                      (ins _.RC:$src0, _.KRCWM:$mask, _.RC:$src1),
3401                      !strconcat(OpcodeStr, "\t{$src1, ${dst} {${mask}}|",
3402                      "${dst} {${mask}}, $src1}"),
3403                      [(set _.RC:$dst, (_.VT (SelectOprr _.KRCWM:$mask,
3404                                          (_.VT _.RC:$src1),
3405                                          (_.VT _.RC:$src0))))], _.ExeDomain>,
3406                       EVEX, EVEX_K, Sched<[Sched.RR]>;
3407    def rmk : AVX512PI<opc, MRMSrcMem, (outs _.RC:$dst),
3408                     (ins _.RC:$src0, _.KRCWM:$mask, _.MemOp:$src1),
3409                     !strconcat(OpcodeStr, "\t{$src1, ${dst} {${mask}}|",
3410                      "${dst} {${mask}}, $src1}"),
3411                     [(set _.RC:$dst, (_.VT
3412                         (vselect _.KRCWM:$mask,
3413                          (_.VT (ld_frag addr:$src1)),
3414                           (_.VT _.RC:$src0))))], _.ExeDomain>,
3415                     EVEX, EVEX_K, Sched<[Sched.RM]>;
3416  }
3417  def rmkz : AVX512PI<opc, MRMSrcMem, (outs _.RC:$dst),
3418                  (ins _.KRCWM:$mask, _.MemOp:$src),
3419                  OpcodeStr #"\t{$src, ${dst} {${mask}} {z}|"#
3420                                "${dst} {${mask}} {z}, $src}",
3421                  [(set _.RC:$dst, (_.VT (vselect _.KRCWM:$mask,
3422                    (_.VT (ld_frag addr:$src)), _.ImmAllZerosV)))],
3423                  _.ExeDomain>, EVEX, EVEX_KZ, Sched<[Sched.RM]>;
3424  }
3425  def : Pat<(_.VT (mload addr:$ptr, _.KRCWM:$mask, undef)),
3426            (!cast<Instruction>(Name#_.ZSuffix##rmkz) _.KRCWM:$mask, addr:$ptr)>;
3427
3428  def : Pat<(_.VT (mload addr:$ptr, _.KRCWM:$mask, _.ImmAllZerosV)),
3429            (!cast<Instruction>(Name#_.ZSuffix##rmkz) _.KRCWM:$mask, addr:$ptr)>;
3430
3431  def : Pat<(_.VT (mload addr:$ptr, _.KRCWM:$mask, (_.VT _.RC:$src0))),
3432            (!cast<Instruction>(Name#_.ZSuffix##rmk) _.RC:$src0,
3433             _.KRCWM:$mask, addr:$ptr)>;
3434}
3435
3436multiclass avx512_alignedload_vl<bits<8> opc, string OpcodeStr,
3437                                 AVX512VLVectorVTInfo _, Predicate prd,
3438                                 X86SchedWriteMoveLSWidths Sched,
3439                                 string EVEX2VEXOvrd, bit NoRMPattern = 0> {
3440  let Predicates = [prd] in
3441  defm Z : avx512_load<opc, OpcodeStr, NAME, _.info512,
3442                       _.info512.AlignedLdFrag, masked_load_aligned,
3443                       Sched.ZMM, "", NoRMPattern>, EVEX_V512;
3444
3445  let Predicates = [prd, HasVLX] in {
3446  defm Z256 : avx512_load<opc, OpcodeStr, NAME, _.info256,
3447                          _.info256.AlignedLdFrag, masked_load_aligned,
3448                          Sched.YMM, EVEX2VEXOvrd#"Y", NoRMPattern>, EVEX_V256;
3449  defm Z128 : avx512_load<opc, OpcodeStr, NAME, _.info128,
3450                          _.info128.AlignedLdFrag, masked_load_aligned,
3451                          Sched.XMM, EVEX2VEXOvrd, NoRMPattern>, EVEX_V128;
3452  }
3453}
3454
3455multiclass avx512_load_vl<bits<8> opc, string OpcodeStr,
3456                          AVX512VLVectorVTInfo _, Predicate prd,
3457                          X86SchedWriteMoveLSWidths Sched,
3458                          string EVEX2VEXOvrd, bit NoRMPattern = 0,
3459                          SDPatternOperator SelectOprr = vselect> {
3460  let Predicates = [prd] in
3461  defm Z : avx512_load<opc, OpcodeStr, NAME, _.info512, _.info512.LdFrag,
3462                       masked_load, Sched.ZMM, "",
3463                       NoRMPattern, SelectOprr>, EVEX_V512;
3464
3465  let Predicates = [prd, HasVLX] in {
3466  defm Z256 : avx512_load<opc, OpcodeStr, NAME, _.info256, _.info256.LdFrag,
3467                         masked_load, Sched.YMM, EVEX2VEXOvrd#"Y",
3468                         NoRMPattern, SelectOprr>, EVEX_V256;
3469  defm Z128 : avx512_load<opc, OpcodeStr, NAME, _.info128, _.info128.LdFrag,
3470                         masked_load, Sched.XMM, EVEX2VEXOvrd,
3471                         NoRMPattern, SelectOprr>, EVEX_V128;
3472  }
3473}
3474
3475multiclass avx512_store<bits<8> opc, string OpcodeStr, string BaseName,
3476                        X86VectorVTInfo _, PatFrag st_frag, PatFrag mstore,
3477                        X86SchedWriteMoveLS Sched, string EVEX2VEXOvrd,
3478                        bit NoMRPattern = 0> {
3479  let hasSideEffects = 0, isCodeGenOnly = 1, ForceDisassemble = 1 in {
3480  let isMoveReg = 1 in
3481  def rr_REV  : AVX512PI<opc, MRMDestReg, (outs _.RC:$dst), (ins _.RC:$src),
3482                         OpcodeStr # "\t{$src, $dst|$dst, $src}",
3483                         [], _.ExeDomain>, EVEX,
3484                         FoldGenData<BaseName#_.ZSuffix#rr>, Sched<[Sched.RR]>,
3485                         EVEX2VEXOverride<EVEX2VEXOvrd#"rr_REV">;
3486  def rrk_REV : AVX512PI<opc, MRMDestReg, (outs  _.RC:$dst),
3487                         (ins _.KRCWM:$mask, _.RC:$src),
3488                         OpcodeStr # "\t{$src, ${dst} {${mask}}|"#
3489                         "${dst} {${mask}}, $src}",
3490                         [], _.ExeDomain>,  EVEX, EVEX_K,
3491                         FoldGenData<BaseName#_.ZSuffix#rrk>,
3492                         Sched<[Sched.RR]>;
3493  def rrkz_REV : AVX512PI<opc, MRMDestReg, (outs  _.RC:$dst),
3494                          (ins _.KRCWM:$mask, _.RC:$src),
3495                          OpcodeStr # "\t{$src, ${dst} {${mask}} {z}|" #
3496                          "${dst} {${mask}} {z}, $src}",
3497                          [], _.ExeDomain>, EVEX, EVEX_KZ,
3498                          FoldGenData<BaseName#_.ZSuffix#rrkz>,
3499                          Sched<[Sched.RR]>;
3500  }
3501
3502  let hasSideEffects = 0, mayStore = 1 in
3503  def mr : AVX512PI<opc, MRMDestMem, (outs), (ins _.MemOp:$dst, _.RC:$src),
3504                    !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
3505                    !if(NoMRPattern, [],
3506                        [(st_frag (_.VT _.RC:$src), addr:$dst)]),
3507                    _.ExeDomain>, EVEX, Sched<[Sched.MR]>,
3508                    EVEX2VEXOverride<EVEX2VEXOvrd#"mr">;
3509  def mrk : AVX512PI<opc, MRMDestMem, (outs),
3510                     (ins _.MemOp:$dst, _.KRCWM:$mask, _.RC:$src),
3511              OpcodeStr # "\t{$src, ${dst} {${mask}}|${dst} {${mask}}, $src}",
3512               [], _.ExeDomain>, EVEX, EVEX_K, Sched<[Sched.MR]>,
3513               NotMemoryFoldable;
3514
3515  def: Pat<(mstore (_.VT _.RC:$src), addr:$ptr, _.KRCWM:$mask),
3516           (!cast<Instruction>(BaseName#_.ZSuffix#mrk) addr:$ptr,
3517                                                        _.KRCWM:$mask, _.RC:$src)>;
3518
3519  def : InstAlias<OpcodeStr#".s\t{$src, $dst|$dst, $src}",
3520                  (!cast<Instruction>(BaseName#_.ZSuffix#"rr_REV")
3521                   _.RC:$dst, _.RC:$src), 0>;
3522  def : InstAlias<OpcodeStr#".s\t{$src, ${dst} {${mask}}|${dst} {${mask}}, $src}",
3523                  (!cast<Instruction>(BaseName#_.ZSuffix#"rrk_REV")
3524                   _.RC:$dst, _.KRCWM:$mask, _.RC:$src), 0>;
3525  def : InstAlias<OpcodeStr#".s\t{$src, ${dst} {${mask}} {z}|${dst} {${mask}} {z}, $src}",
3526                  (!cast<Instruction>(BaseName#_.ZSuffix#"rrkz_REV")
3527                   _.RC:$dst, _.KRCWM:$mask, _.RC:$src), 0>;
3528}
3529
3530multiclass avx512_store_vl< bits<8> opc, string OpcodeStr,
3531                            AVX512VLVectorVTInfo _, Predicate prd,
3532                            X86SchedWriteMoveLSWidths Sched,
3533                            string EVEX2VEXOvrd, bit NoMRPattern = 0> {
3534  let Predicates = [prd] in
3535  defm Z : avx512_store<opc, OpcodeStr, NAME, _.info512, store,
3536                        masked_store, Sched.ZMM, "",
3537                        NoMRPattern>, EVEX_V512;
3538  let Predicates = [prd, HasVLX] in {
3539    defm Z256 : avx512_store<opc, OpcodeStr, NAME, _.info256, store,
3540                             masked_store, Sched.YMM,
3541                             EVEX2VEXOvrd#"Y", NoMRPattern>, EVEX_V256;
3542    defm Z128 : avx512_store<opc, OpcodeStr, NAME, _.info128, store,
3543                             masked_store, Sched.XMM, EVEX2VEXOvrd,
3544                             NoMRPattern>, EVEX_V128;
3545  }
3546}
3547
3548multiclass avx512_alignedstore_vl<bits<8> opc, string OpcodeStr,
3549                                  AVX512VLVectorVTInfo _, Predicate prd,
3550                                  X86SchedWriteMoveLSWidths Sched,
3551                                  string EVEX2VEXOvrd, bit NoMRPattern = 0> {
3552  let Predicates = [prd] in
3553  defm Z : avx512_store<opc, OpcodeStr, NAME, _.info512, alignedstore,
3554                        masked_store_aligned, Sched.ZMM, "",
3555                        NoMRPattern>, EVEX_V512;
3556
3557  let Predicates = [prd, HasVLX] in {
3558    defm Z256 : avx512_store<opc, OpcodeStr, NAME, _.info256, alignedstore,
3559                             masked_store_aligned, Sched.YMM,
3560                             EVEX2VEXOvrd#"Y", NoMRPattern>, EVEX_V256;
3561    defm Z128 : avx512_store<opc, OpcodeStr, NAME, _.info128, alignedstore,
3562                             masked_store_aligned, Sched.XMM, EVEX2VEXOvrd,
3563                             NoMRPattern>, EVEX_V128;
3564  }
3565}
3566
3567defm VMOVAPS : avx512_alignedload_vl<0x28, "vmovaps", avx512vl_f32_info,
3568                                     HasAVX512, SchedWriteFMoveLS, "VMOVAPS">,
3569               avx512_alignedstore_vl<0x29, "vmovaps", avx512vl_f32_info,
3570                                      HasAVX512, SchedWriteFMoveLS, "VMOVAPS">,
3571               PS, EVEX_CD8<32, CD8VF>;
3572
3573defm VMOVAPD : avx512_alignedload_vl<0x28, "vmovapd", avx512vl_f64_info,
3574                                     HasAVX512, SchedWriteFMoveLS, "VMOVAPD">,
3575               avx512_alignedstore_vl<0x29, "vmovapd", avx512vl_f64_info,
3576                                      HasAVX512, SchedWriteFMoveLS, "VMOVAPD">,
3577               PD, VEX_W, EVEX_CD8<64, CD8VF>;
3578
3579defm VMOVUPS : avx512_load_vl<0x10, "vmovups", avx512vl_f32_info, HasAVX512,
3580                              SchedWriteFMoveLS, "VMOVUPS", 0, null_frag>,
3581               avx512_store_vl<0x11, "vmovups", avx512vl_f32_info, HasAVX512,
3582                               SchedWriteFMoveLS, "VMOVUPS">,
3583                               PS, EVEX_CD8<32, CD8VF>;
3584
3585defm VMOVUPD : avx512_load_vl<0x10, "vmovupd", avx512vl_f64_info, HasAVX512,
3586                              SchedWriteFMoveLS, "VMOVUPD", 0, null_frag>,
3587               avx512_store_vl<0x11, "vmovupd", avx512vl_f64_info, HasAVX512,
3588                               SchedWriteFMoveLS, "VMOVUPD">,
3589               PD, VEX_W, EVEX_CD8<64, CD8VF>;
3590
3591defm VMOVDQA32 : avx512_alignedload_vl<0x6F, "vmovdqa32", avx512vl_i32_info,
3592                                       HasAVX512, SchedWriteVecMoveLS,
3593                                       "VMOVDQA", 1>,
3594                 avx512_alignedstore_vl<0x7F, "vmovdqa32", avx512vl_i32_info,
3595                                        HasAVX512, SchedWriteVecMoveLS,
3596                                        "VMOVDQA", 1>,
3597                 PD, EVEX_CD8<32, CD8VF>;
3598
3599defm VMOVDQA64 : avx512_alignedload_vl<0x6F, "vmovdqa64", avx512vl_i64_info,
3600                                       HasAVX512, SchedWriteVecMoveLS,
3601                                       "VMOVDQA">,
3602                 avx512_alignedstore_vl<0x7F, "vmovdqa64", avx512vl_i64_info,
3603                                        HasAVX512, SchedWriteVecMoveLS,
3604                                        "VMOVDQA">,
3605                 PD, VEX_W, EVEX_CD8<64, CD8VF>;
3606
3607defm VMOVDQU8 : avx512_load_vl<0x6F, "vmovdqu8", avx512vl_i8_info, HasBWI,
3608                               SchedWriteVecMoveLS, "VMOVDQU", 1>,
3609                avx512_store_vl<0x7F, "vmovdqu8", avx512vl_i8_info, HasBWI,
3610                                SchedWriteVecMoveLS, "VMOVDQU", 1>,
3611                XD, EVEX_CD8<8, CD8VF>;
3612
3613defm VMOVDQU16 : avx512_load_vl<0x6F, "vmovdqu16", avx512vl_i16_info, HasBWI,
3614                                SchedWriteVecMoveLS, "VMOVDQU", 1>,
3615                 avx512_store_vl<0x7F, "vmovdqu16", avx512vl_i16_info, HasBWI,
3616                                 SchedWriteVecMoveLS, "VMOVDQU", 1>,
3617                 XD, VEX_W, EVEX_CD8<16, CD8VF>;
3618
3619defm VMOVDQU32 : avx512_load_vl<0x6F, "vmovdqu32", avx512vl_i32_info, HasAVX512,
3620                                SchedWriteVecMoveLS, "VMOVDQU", 1, null_frag>,
3621                 avx512_store_vl<0x7F, "vmovdqu32", avx512vl_i32_info, HasAVX512,
3622                                 SchedWriteVecMoveLS, "VMOVDQU", 1>,
3623                 XS, EVEX_CD8<32, CD8VF>;
3624
3625defm VMOVDQU64 : avx512_load_vl<0x6F, "vmovdqu64", avx512vl_i64_info, HasAVX512,
3626                                SchedWriteVecMoveLS, "VMOVDQU", 0, null_frag>,
3627                 avx512_store_vl<0x7F, "vmovdqu64", avx512vl_i64_info, HasAVX512,
3628                                 SchedWriteVecMoveLS, "VMOVDQU">,
3629                 XS, VEX_W, EVEX_CD8<64, CD8VF>;
3630
3631// Special instructions to help with spilling when we don't have VLX. We need
3632// to load or store from a ZMM register instead. These are converted in
3633// expandPostRAPseudos.
3634let isReMaterializable = 1, canFoldAsLoad = 1,
3635    isPseudo = 1, mayLoad = 1, hasSideEffects = 0 in {
3636def VMOVAPSZ128rm_NOVLX : I<0, Pseudo, (outs VR128X:$dst), (ins f128mem:$src),
3637                            "", []>, Sched<[WriteFLoadX]>;
3638def VMOVAPSZ256rm_NOVLX : I<0, Pseudo, (outs VR256X:$dst), (ins f256mem:$src),
3639                            "", []>, Sched<[WriteFLoadY]>;
3640def VMOVUPSZ128rm_NOVLX : I<0, Pseudo, (outs VR128X:$dst), (ins f128mem:$src),
3641                            "", []>, Sched<[WriteFLoadX]>;
3642def VMOVUPSZ256rm_NOVLX : I<0, Pseudo, (outs VR256X:$dst), (ins f256mem:$src),
3643                            "", []>, Sched<[WriteFLoadY]>;
3644}
3645
3646let isPseudo = 1, mayStore = 1, hasSideEffects = 0 in {
3647def VMOVAPSZ128mr_NOVLX : I<0, Pseudo, (outs), (ins f128mem:$dst, VR128X:$src),
3648                            "", []>, Sched<[WriteFStoreX]>;
3649def VMOVAPSZ256mr_NOVLX : I<0, Pseudo, (outs), (ins f256mem:$dst, VR256X:$src),
3650                            "", []>, Sched<[WriteFStoreY]>;
3651def VMOVUPSZ128mr_NOVLX : I<0, Pseudo, (outs), (ins f128mem:$dst, VR128X:$src),
3652                            "", []>, Sched<[WriteFStoreX]>;
3653def VMOVUPSZ256mr_NOVLX : I<0, Pseudo, (outs), (ins f256mem:$dst, VR256X:$src),
3654                            "", []>, Sched<[WriteFStoreY]>;
3655}
3656
3657def : Pat<(v8i64 (vselect VK8WM:$mask, (v8i64 immAllZerosV),
3658                          (v8i64 VR512:$src))),
3659   (VMOVDQA64Zrrkz (COPY_TO_REGCLASS (KNOTWrr (COPY_TO_REGCLASS VK8:$mask, VK16)),
3660                                              VK8), VR512:$src)>;
3661
3662def : Pat<(v16i32 (vselect VK16WM:$mask, (v16i32 immAllZerosV),
3663                           (v16i32 VR512:$src))),
3664                  (VMOVDQA32Zrrkz (KNOTWrr VK16WM:$mask), VR512:$src)>;
3665
3666// These patterns exist to prevent the above patterns from introducing a second
3667// mask inversion when one already exists.
3668def : Pat<(v8i64 (vselect (xor VK8:$mask, (v8i1 immAllOnesV)),
3669                          (v8i64 immAllZerosV),
3670                          (v8i64 VR512:$src))),
3671                 (VMOVDQA64Zrrkz VK8:$mask, VR512:$src)>;
3672def : Pat<(v16i32 (vselect (xor VK16:$mask, (v16i1 immAllOnesV)),
3673                           (v16i32 immAllZerosV),
3674                           (v16i32 VR512:$src))),
3675                  (VMOVDQA32Zrrkz VK16WM:$mask, VR512:$src)>;
3676
3677multiclass mask_move_lowering<string InstrStr, X86VectorVTInfo Narrow,
3678                              X86VectorVTInfo Wide> {
3679 def : Pat<(Narrow.VT (vselect (Narrow.KVT Narrow.KRCWM:$mask),
3680                               Narrow.RC:$src1, Narrow.RC:$src0)),
3681           (EXTRACT_SUBREG
3682            (Wide.VT
3683             (!cast<Instruction>(InstrStr#"rrk")
3684              (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src0, Narrow.SubRegIdx)),
3685              (COPY_TO_REGCLASS Narrow.KRCWM:$mask, Wide.KRCWM),
3686              (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src1, Narrow.SubRegIdx)))),
3687            Narrow.SubRegIdx)>;
3688
3689 def : Pat<(Narrow.VT (vselect (Narrow.KVT Narrow.KRCWM:$mask),
3690                               Narrow.RC:$src1, Narrow.ImmAllZerosV)),
3691           (EXTRACT_SUBREG
3692            (Wide.VT
3693             (!cast<Instruction>(InstrStr#"rrkz")
3694              (COPY_TO_REGCLASS Narrow.KRCWM:$mask, Wide.KRCWM),
3695              (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src1, Narrow.SubRegIdx)))),
3696            Narrow.SubRegIdx)>;
3697}
3698
3699// Patterns for handling v8i1 selects of 256-bit vectors when VLX isn't
3700// available. Use a 512-bit operation and extract.
3701let Predicates = [HasAVX512, NoVLX] in {
3702  defm : mask_move_lowering<"VMOVAPSZ", v4f32x_info, v16f32_info>;
3703  defm : mask_move_lowering<"VMOVDQA32Z", v4i32x_info, v16i32_info>;
3704  defm : mask_move_lowering<"VMOVAPSZ", v8f32x_info, v16f32_info>;
3705  defm : mask_move_lowering<"VMOVDQA32Z", v8i32x_info, v16i32_info>;
3706
3707  defm : mask_move_lowering<"VMOVAPDZ", v2f64x_info, v8f64_info>;
3708  defm : mask_move_lowering<"VMOVDQA64Z", v2i64x_info, v8i64_info>;
3709  defm : mask_move_lowering<"VMOVAPDZ", v4f64x_info, v8f64_info>;
3710  defm : mask_move_lowering<"VMOVDQA64Z", v4i64x_info, v8i64_info>;
3711}
3712
3713let Predicates = [HasBWI, NoVLX] in {
3714  defm : mask_move_lowering<"VMOVDQU8Z", v16i8x_info, v64i8_info>;
3715  defm : mask_move_lowering<"VMOVDQU8Z", v32i8x_info, v64i8_info>;
3716
3717  defm : mask_move_lowering<"VMOVDQU16Z", v8i16x_info, v32i16_info>;
3718  defm : mask_move_lowering<"VMOVDQU16Z", v16i16x_info, v32i16_info>;
3719}
3720
3721let Predicates = [HasAVX512] in {
3722  // 512-bit load.
3723  def : Pat<(alignedloadv16i32 addr:$src),
3724            (VMOVDQA64Zrm addr:$src)>;
3725  def : Pat<(alignedloadv32i16 addr:$src),
3726            (VMOVDQA64Zrm addr:$src)>;
3727  def : Pat<(alignedloadv64i8 addr:$src),
3728            (VMOVDQA64Zrm addr:$src)>;
3729  def : Pat<(loadv16i32 addr:$src),
3730            (VMOVDQU64Zrm addr:$src)>;
3731  def : Pat<(loadv32i16 addr:$src),
3732            (VMOVDQU64Zrm addr:$src)>;
3733  def : Pat<(loadv64i8 addr:$src),
3734            (VMOVDQU64Zrm addr:$src)>;
3735
3736  // 512-bit store.
3737  def : Pat<(alignedstore (v16i32 VR512:$src), addr:$dst),
3738            (VMOVDQA64Zmr addr:$dst, VR512:$src)>;
3739  def : Pat<(alignedstore (v32i16 VR512:$src), addr:$dst),
3740            (VMOVDQA64Zmr addr:$dst, VR512:$src)>;
3741  def : Pat<(alignedstore (v64i8 VR512:$src), addr:$dst),
3742            (VMOVDQA64Zmr addr:$dst, VR512:$src)>;
3743  def : Pat<(store (v16i32 VR512:$src), addr:$dst),
3744            (VMOVDQU64Zmr addr:$dst, VR512:$src)>;
3745  def : Pat<(store (v32i16 VR512:$src), addr:$dst),
3746            (VMOVDQU64Zmr addr:$dst, VR512:$src)>;
3747  def : Pat<(store (v64i8 VR512:$src), addr:$dst),
3748            (VMOVDQU64Zmr addr:$dst, VR512:$src)>;
3749}
3750
3751let Predicates = [HasVLX] in {
3752  // 128-bit load.
3753  def : Pat<(alignedloadv4i32 addr:$src),
3754            (VMOVDQA64Z128rm addr:$src)>;
3755  def : Pat<(alignedloadv8i16 addr:$src),
3756            (VMOVDQA64Z128rm addr:$src)>;
3757  def : Pat<(alignedloadv16i8 addr:$src),
3758            (VMOVDQA64Z128rm addr:$src)>;
3759  def : Pat<(loadv4i32 addr:$src),
3760            (VMOVDQU64Z128rm addr:$src)>;
3761  def : Pat<(loadv8i16 addr:$src),
3762            (VMOVDQU64Z128rm addr:$src)>;
3763  def : Pat<(loadv16i8 addr:$src),
3764            (VMOVDQU64Z128rm addr:$src)>;
3765
3766  // 128-bit store.
3767  def : Pat<(alignedstore (v4i32 VR128X:$src), addr:$dst),
3768            (VMOVDQA64Z128mr addr:$dst, VR128X:$src)>;
3769  def : Pat<(alignedstore (v8i16 VR128X:$src), addr:$dst),
3770            (VMOVDQA64Z128mr addr:$dst, VR128X:$src)>;
3771  def : Pat<(alignedstore (v16i8 VR128X:$src), addr:$dst),
3772            (VMOVDQA64Z128mr addr:$dst, VR128X:$src)>;
3773  def : Pat<(store (v4i32 VR128X:$src), addr:$dst),
3774            (VMOVDQU64Z128mr addr:$dst, VR128X:$src)>;
3775  def : Pat<(store (v8i16 VR128X:$src), addr:$dst),
3776            (VMOVDQU64Z128mr addr:$dst, VR128X:$src)>;
3777  def : Pat<(store (v16i8 VR128X:$src), addr:$dst),
3778            (VMOVDQU64Z128mr addr:$dst, VR128X:$src)>;
3779
3780  // 256-bit load.
3781  def : Pat<(alignedloadv8i32 addr:$src),
3782            (VMOVDQA64Z256rm addr:$src)>;
3783  def : Pat<(alignedloadv16i16 addr:$src),
3784            (VMOVDQA64Z256rm addr:$src)>;
3785  def : Pat<(alignedloadv32i8 addr:$src),
3786            (VMOVDQA64Z256rm addr:$src)>;
3787  def : Pat<(loadv8i32 addr:$src),
3788            (VMOVDQU64Z256rm addr:$src)>;
3789  def : Pat<(loadv16i16 addr:$src),
3790            (VMOVDQU64Z256rm addr:$src)>;
3791  def : Pat<(loadv32i8 addr:$src),
3792            (VMOVDQU64Z256rm addr:$src)>;
3793
3794  // 256-bit store.
3795  def : Pat<(alignedstore (v8i32 VR256X:$src), addr:$dst),
3796            (VMOVDQA64Z256mr addr:$dst, VR256X:$src)>;
3797  def : Pat<(alignedstore (v16i16 VR256X:$src), addr:$dst),
3798            (VMOVDQA64Z256mr addr:$dst, VR256X:$src)>;
3799  def : Pat<(alignedstore (v32i8 VR256X:$src), addr:$dst),
3800            (VMOVDQA64Z256mr addr:$dst, VR256X:$src)>;
3801  def : Pat<(store (v8i32 VR256X:$src), addr:$dst),
3802            (VMOVDQU64Z256mr addr:$dst, VR256X:$src)>;
3803  def : Pat<(store (v16i16 VR256X:$src), addr:$dst),
3804            (VMOVDQU64Z256mr addr:$dst, VR256X:$src)>;
3805  def : Pat<(store (v32i8 VR256X:$src), addr:$dst),
3806            (VMOVDQU64Z256mr addr:$dst, VR256X:$src)>;
3807}
3808
3809// Move Int Doubleword to Packed Double Int
3810//
3811let ExeDomain = SSEPackedInt in {
3812def VMOVDI2PDIZrr : AVX512BI<0x6E, MRMSrcReg, (outs VR128X:$dst), (ins GR32:$src),
3813                      "vmovd\t{$src, $dst|$dst, $src}",
3814                      [(set VR128X:$dst,
3815                        (v4i32 (scalar_to_vector GR32:$src)))]>,
3816                        EVEX, Sched<[WriteVecMoveFromGpr]>;
3817def VMOVDI2PDIZrm : AVX512BI<0x6E, MRMSrcMem, (outs VR128X:$dst), (ins i32mem:$src),
3818                      "vmovd\t{$src, $dst|$dst, $src}",
3819                      [(set VR128X:$dst,
3820                        (v4i32 (scalar_to_vector (loadi32 addr:$src))))]>,
3821                      EVEX, EVEX_CD8<32, CD8VT1>, Sched<[WriteVecLoad]>;
3822def VMOV64toPQIZrr : AVX512BI<0x6E, MRMSrcReg, (outs VR128X:$dst), (ins GR64:$src),
3823                      "vmovq\t{$src, $dst|$dst, $src}",
3824                        [(set VR128X:$dst,
3825                          (v2i64 (scalar_to_vector GR64:$src)))]>,
3826                      EVEX, VEX_W, Sched<[WriteVecMoveFromGpr]>;
3827let isCodeGenOnly = 1, ForceDisassemble = 1, hasSideEffects = 0, mayLoad = 1 in
3828def VMOV64toPQIZrm : AVX512BI<0x6E, MRMSrcMem, (outs VR128X:$dst),
3829                      (ins i64mem:$src),
3830                      "vmovq\t{$src, $dst|$dst, $src}", []>,
3831                      EVEX, VEX_W, EVEX_CD8<64, CD8VT1>, Sched<[WriteVecLoad]>;
3832let isCodeGenOnly = 1 in {
3833def VMOV64toSDZrr : AVX512BI<0x6E, MRMSrcReg, (outs FR64X:$dst), (ins GR64:$src),
3834                       "vmovq\t{$src, $dst|$dst, $src}",
3835                       [(set FR64X:$dst, (bitconvert GR64:$src))]>,
3836                       EVEX, VEX_W, Sched<[WriteVecMoveFromGpr]>;
3837def VMOVSDto64Zrr : AVX512BI<0x7E, MRMDestReg, (outs GR64:$dst), (ins FR64X:$src),
3838                         "vmovq\t{$src, $dst|$dst, $src}",
3839                         [(set GR64:$dst, (bitconvert FR64X:$src))]>,
3840                         EVEX, VEX_W, Sched<[WriteVecMoveFromGpr]>;
3841}
3842} // ExeDomain = SSEPackedInt
3843
3844// Move Int Doubleword to Single Scalar
3845//
3846let ExeDomain = SSEPackedInt, isCodeGenOnly = 1 in {
3847def VMOVDI2SSZrr  : AVX512BI<0x6E, MRMSrcReg, (outs FR32X:$dst), (ins GR32:$src),
3848                      "vmovd\t{$src, $dst|$dst, $src}",
3849                      [(set FR32X:$dst, (bitconvert GR32:$src))]>,
3850                      EVEX, Sched<[WriteVecMoveFromGpr]>;
3851} // ExeDomain = SSEPackedInt, isCodeGenOnly = 1
3852
3853// Move doubleword from xmm register to r/m32
3854//
3855let ExeDomain = SSEPackedInt in {
3856def VMOVPDI2DIZrr  : AVX512BI<0x7E, MRMDestReg, (outs GR32:$dst), (ins VR128X:$src),
3857                       "vmovd\t{$src, $dst|$dst, $src}",
3858                       [(set GR32:$dst, (extractelt (v4i32 VR128X:$src),
3859                                        (iPTR 0)))]>,
3860                       EVEX, Sched<[WriteVecMoveToGpr]>;
3861def VMOVPDI2DIZmr  : AVX512BI<0x7E, MRMDestMem, (outs),
3862                       (ins i32mem:$dst, VR128X:$src),
3863                       "vmovd\t{$src, $dst|$dst, $src}",
3864                       [(store (i32 (extractelt (v4i32 VR128X:$src),
3865                                     (iPTR 0))), addr:$dst)]>,
3866                       EVEX, EVEX_CD8<32, CD8VT1>, Sched<[WriteVecStore]>;
3867} // ExeDomain = SSEPackedInt
3868
3869// Move quadword from xmm1 register to r/m64
3870//
3871let ExeDomain = SSEPackedInt in {
3872def VMOVPQIto64Zrr : I<0x7E, MRMDestReg, (outs GR64:$dst), (ins VR128X:$src),
3873                      "vmovq\t{$src, $dst|$dst, $src}",
3874                      [(set GR64:$dst, (extractelt (v2i64 VR128X:$src),
3875                                                   (iPTR 0)))]>,
3876                      PD, EVEX, VEX_W, Sched<[WriteVecMoveToGpr]>,
3877                      Requires<[HasAVX512]>;
3878
3879let isCodeGenOnly = 1, ForceDisassemble = 1, hasSideEffects = 0, mayStore = 1 in
3880def VMOVPQIto64Zmr : I<0x7E, MRMDestMem, (outs), (ins i64mem:$dst, VR128X:$src),
3881                      "vmovq\t{$src, $dst|$dst, $src}", []>, PD,
3882                      EVEX, VEX_W, Sched<[WriteVecStore]>,
3883                      Requires<[HasAVX512, In64BitMode]>;
3884
3885def VMOVPQI2QIZmr : I<0xD6, MRMDestMem, (outs),
3886                      (ins i64mem:$dst, VR128X:$src),
3887                      "vmovq\t{$src, $dst|$dst, $src}",
3888                      [(store (extractelt (v2i64 VR128X:$src), (iPTR 0)),
3889                              addr:$dst)]>,
3890                      EVEX, PD, VEX_W, EVEX_CD8<64, CD8VT1>,
3891                      Sched<[WriteVecStore]>, Requires<[HasAVX512]>;
3892
3893let hasSideEffects = 0, isCodeGenOnly = 1, ForceDisassemble = 1 in
3894def VMOVPQI2QIZrr : AVX512BI<0xD6, MRMDestReg, (outs VR128X:$dst),
3895                             (ins VR128X:$src),
3896                             "vmovq\t{$src, $dst|$dst, $src}", []>,
3897                             EVEX, VEX_W, Sched<[SchedWriteVecLogic.XMM]>;
3898} // ExeDomain = SSEPackedInt
3899
3900def : InstAlias<"vmovq.s\t{$src, $dst|$dst, $src}",
3901                (VMOVPQI2QIZrr VR128X:$dst, VR128X:$src), 0>;
3902
3903let Predicates = [HasAVX512] in {
3904  def : Pat<(X86vextractstore64 (v2i64 VR128X:$src), addr:$dst),
3905            (VMOVPQI2QIZmr addr:$dst, VR128X:$src)>;
3906}
3907
3908// Move Scalar Single to Double Int
3909//
3910let ExeDomain = SSEPackedInt, isCodeGenOnly = 1 in {
3911def VMOVSS2DIZrr  : AVX512BI<0x7E, MRMDestReg, (outs GR32:$dst),
3912                      (ins FR32X:$src),
3913                      "vmovd\t{$src, $dst|$dst, $src}",
3914                      [(set GR32:$dst, (bitconvert FR32X:$src))]>,
3915                      EVEX, Sched<[WriteVecMoveToGpr]>;
3916} // ExeDomain = SSEPackedInt, isCodeGenOnly = 1
3917
3918// Move Quadword Int to Packed Quadword Int
3919//
3920let ExeDomain = SSEPackedInt in {
3921def VMOVQI2PQIZrm : AVX512XSI<0x7E, MRMSrcMem, (outs VR128X:$dst),
3922                      (ins i64mem:$src),
3923                      "vmovq\t{$src, $dst|$dst, $src}",
3924                      [(set VR128X:$dst,
3925                        (v2i64 (scalar_to_vector (loadi64 addr:$src))))]>,
3926                      EVEX, VEX_W, EVEX_CD8<8, CD8VT8>, Sched<[WriteVecLoad]>;
3927} // ExeDomain = SSEPackedInt
3928
3929// Allow "vmovd" but print "vmovq".
3930def : InstAlias<"vmovd\t{$src, $dst|$dst, $src}",
3931                (VMOV64toPQIZrr VR128X:$dst, GR64:$src), 0>;
3932def : InstAlias<"vmovd\t{$src, $dst|$dst, $src}",
3933                (VMOVPQIto64Zrr GR64:$dst, VR128X:$src), 0>;
3934
3935// Conversions between masks and scalar fp.
3936def : Pat<(v32i1 (bitconvert FR32X:$src)),
3937          (KMOVDkr (VMOVSS2DIZrr FR32X:$src))>;
3938def : Pat<(f32 (bitconvert VK32:$src)),
3939          (VMOVDI2SSZrr (KMOVDrk VK32:$src))>;
3940
3941def : Pat<(v64i1 (bitconvert FR64X:$src)),
3942          (KMOVQkr (VMOVSDto64Zrr FR64X:$src))>;
3943def : Pat<(f64 (bitconvert VK64:$src)),
3944          (VMOV64toSDZrr (KMOVQrk VK64:$src))>;
3945
3946//===----------------------------------------------------------------------===//
3947// AVX-512  MOVSS, MOVSD
3948//===----------------------------------------------------------------------===//
3949
3950multiclass avx512_move_scalar<string asm, SDNode OpNode, PatFrag vzload_frag,
3951                              X86VectorVTInfo _> {
3952  let Predicates = [HasAVX512, OptForSize] in
3953  def rr : AVX512PI<0x10, MRMSrcReg, (outs _.RC:$dst),
3954             (ins _.RC:$src1, _.RC:$src2),
3955             !strconcat(asm, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
3956             [(set _.RC:$dst, (_.VT (OpNode _.RC:$src1, _.RC:$src2)))],
3957             _.ExeDomain>, EVEX_4V, Sched<[SchedWriteFShuffle.XMM]>;
3958  def rrkz : AVX512PI<0x10, MRMSrcReg, (outs _.RC:$dst),
3959              (ins _.KRCWM:$mask, _.RC:$src1, _.RC:$src2),
3960              !strconcat(asm, "\t{$src2, $src1, $dst {${mask}} {z}|",
3961              "$dst {${mask}} {z}, $src1, $src2}"),
3962              [(set _.RC:$dst, (_.VT (X86selects _.KRCWM:$mask,
3963                                      (_.VT (OpNode _.RC:$src1, _.RC:$src2)),
3964                                      _.ImmAllZerosV)))],
3965              _.ExeDomain>, EVEX_4V, EVEX_KZ, Sched<[SchedWriteFShuffle.XMM]>;
3966  let Constraints = "$src0 = $dst"  in
3967  def rrk : AVX512PI<0x10, MRMSrcReg, (outs _.RC:$dst),
3968             (ins _.RC:$src0, _.KRCWM:$mask, _.RC:$src1, _.RC:$src2),
3969             !strconcat(asm, "\t{$src2, $src1, $dst {${mask}}|",
3970             "$dst {${mask}}, $src1, $src2}"),
3971             [(set _.RC:$dst, (_.VT (X86selects _.KRCWM:$mask,
3972                                     (_.VT (OpNode _.RC:$src1, _.RC:$src2)),
3973                                     (_.VT _.RC:$src0))))],
3974             _.ExeDomain>, EVEX_4V, EVEX_K, Sched<[SchedWriteFShuffle.XMM]>;
3975  let canFoldAsLoad = 1, isReMaterializable = 1 in {
3976  def rm : AVX512PI<0x10, MRMSrcMem, (outs _.RC:$dst), (ins _.ScalarMemOp:$src),
3977             !strconcat(asm, "\t{$src, $dst|$dst, $src}"),
3978             [(set _.RC:$dst, (_.VT (vzload_frag addr:$src)))],
3979             _.ExeDomain>, EVEX, Sched<[WriteFLoad]>;
3980  // _alt version uses FR32/FR64 register class.
3981  let isCodeGenOnly = 1 in
3982  def rm_alt : AVX512PI<0x10, MRMSrcMem, (outs _.FRC:$dst), (ins _.ScalarMemOp:$src),
3983                 !strconcat(asm, "\t{$src, $dst|$dst, $src}"),
3984                 [(set _.FRC:$dst, (_.ScalarLdFrag addr:$src))],
3985                 _.ExeDomain>, EVEX, Sched<[WriteFLoad]>;
3986  }
3987  let mayLoad = 1, hasSideEffects = 0 in {
3988    let Constraints = "$src0 = $dst" in
3989    def rmk : AVX512PI<0x10, MRMSrcMem, (outs _.RC:$dst),
3990               (ins _.RC:$src0, _.KRCWM:$mask, _.ScalarMemOp:$src),
3991               !strconcat(asm, "\t{$src, $dst {${mask}}|",
3992               "$dst {${mask}}, $src}"),
3993               [], _.ExeDomain>, EVEX, EVEX_K, Sched<[WriteFLoad]>;
3994    def rmkz : AVX512PI<0x10, MRMSrcMem, (outs _.RC:$dst),
3995               (ins _.KRCWM:$mask, _.ScalarMemOp:$src),
3996               !strconcat(asm, "\t{$src, $dst {${mask}} {z}|",
3997               "$dst {${mask}} {z}, $src}"),
3998               [], _.ExeDomain>, EVEX, EVEX_KZ, Sched<[WriteFLoad]>;
3999  }
4000  def mr: AVX512PI<0x11, MRMDestMem, (outs), (ins _.ScalarMemOp:$dst, _.FRC:$src),
4001             !strconcat(asm, "\t{$src, $dst|$dst, $src}"),
4002             [(store _.FRC:$src, addr:$dst)],  _.ExeDomain>,
4003             EVEX, Sched<[WriteFStore]>;
4004  let mayStore = 1, hasSideEffects = 0 in
4005  def mrk: AVX512PI<0x11, MRMDestMem, (outs),
4006              (ins _.ScalarMemOp:$dst, VK1WM:$mask, _.RC:$src),
4007              !strconcat(asm, "\t{$src, $dst {${mask}}|$dst {${mask}}, $src}"),
4008              [], _.ExeDomain>, EVEX, EVEX_K, Sched<[WriteFStore]>,
4009              NotMemoryFoldable;
4010}
4011
4012defm VMOVSSZ : avx512_move_scalar<"vmovss", X86Movss, X86vzload32, f32x_info>,
4013                                  VEX_LIG, XS, EVEX_CD8<32, CD8VT1>;
4014
4015defm VMOVSDZ : avx512_move_scalar<"vmovsd", X86Movsd, X86vzload64, f64x_info>,
4016                                  VEX_LIG, XD, VEX_W, EVEX_CD8<64, CD8VT1>;
4017
4018
4019multiclass avx512_move_scalar_lowering<string InstrStr, SDNode OpNode,
4020                                       PatLeaf ZeroFP, X86VectorVTInfo _> {
4021
4022def : Pat<(_.VT (OpNode _.RC:$src0,
4023                        (_.VT (scalar_to_vector
4024                                  (_.EltVT (X86selects VK1WM:$mask,
4025                                                       (_.EltVT _.FRC:$src1),
4026                                                       (_.EltVT _.FRC:$src2))))))),
4027          (!cast<Instruction>(InstrStr#rrk)
4028                        (_.VT (COPY_TO_REGCLASS _.FRC:$src2, _.RC)),
4029                        VK1WM:$mask,
4030                        (_.VT _.RC:$src0),
4031                        (_.VT (COPY_TO_REGCLASS _.FRC:$src1, _.RC)))>;
4032
4033def : Pat<(_.VT (OpNode _.RC:$src0,
4034                        (_.VT (scalar_to_vector
4035                                  (_.EltVT (X86selects VK1WM:$mask,
4036                                                       (_.EltVT _.FRC:$src1),
4037                                                       (_.EltVT ZeroFP))))))),
4038          (!cast<Instruction>(InstrStr#rrkz)
4039                        VK1WM:$mask,
4040                        (_.VT _.RC:$src0),
4041                        (_.VT (COPY_TO_REGCLASS _.FRC:$src1, _.RC)))>;
4042}
4043
4044multiclass avx512_store_scalar_lowering<string InstrStr, AVX512VLVectorVTInfo _,
4045                                        dag Mask, RegisterClass MaskRC> {
4046
4047def : Pat<(masked_store
4048             (_.info512.VT (insert_subvector undef,
4049                               (_.info128.VT _.info128.RC:$src),
4050                               (iPTR 0))), addr:$dst, Mask),
4051          (!cast<Instruction>(InstrStr#mrk) addr:$dst,
4052                      (COPY_TO_REGCLASS MaskRC:$mask, VK1WM),
4053                      _.info128.RC:$src)>;
4054
4055}
4056
4057multiclass avx512_store_scalar_lowering_subreg<string InstrStr,
4058                                               AVX512VLVectorVTInfo _,
4059                                               dag Mask, RegisterClass MaskRC,
4060                                               SubRegIndex subreg> {
4061
4062def : Pat<(masked_store
4063             (_.info512.VT (insert_subvector undef,
4064                               (_.info128.VT _.info128.RC:$src),
4065                               (iPTR 0))), addr:$dst, Mask),
4066          (!cast<Instruction>(InstrStr#mrk) addr:$dst,
4067                      (COPY_TO_REGCLASS (i32 (INSERT_SUBREG (IMPLICIT_DEF), MaskRC:$mask, subreg)), VK1WM),
4068                      _.info128.RC:$src)>;
4069
4070}
4071
4072// This matches the more recent codegen from clang that avoids emitting a 512
4073// bit masked store directly. Codegen will widen 128-bit masked store to 512
4074// bits on AVX512F only targets.
4075multiclass avx512_store_scalar_lowering_subreg2<string InstrStr,
4076                                               AVX512VLVectorVTInfo _,
4077                                               dag Mask512, dag Mask128,
4078                                               RegisterClass MaskRC,
4079                                               SubRegIndex subreg> {
4080
4081// AVX512F pattern.
4082def : Pat<(masked_store
4083             (_.info512.VT (insert_subvector undef,
4084                               (_.info128.VT _.info128.RC:$src),
4085                               (iPTR 0))), addr:$dst, Mask512),
4086          (!cast<Instruction>(InstrStr#mrk) addr:$dst,
4087                      (COPY_TO_REGCLASS (i32 (INSERT_SUBREG (IMPLICIT_DEF), MaskRC:$mask, subreg)), VK1WM),
4088                      _.info128.RC:$src)>;
4089
4090// AVX512VL pattern.
4091def : Pat<(masked_store (_.info128.VT _.info128.RC:$src), addr:$dst, Mask128),
4092          (!cast<Instruction>(InstrStr#mrk) addr:$dst,
4093                      (COPY_TO_REGCLASS (i32 (INSERT_SUBREG (IMPLICIT_DEF), MaskRC:$mask, subreg)), VK1WM),
4094                      _.info128.RC:$src)>;
4095}
4096
4097multiclass avx512_load_scalar_lowering<string InstrStr, AVX512VLVectorVTInfo _,
4098                                       dag Mask, RegisterClass MaskRC> {
4099
4100def : Pat<(_.info128.VT (extract_subvector
4101                         (_.info512.VT (masked_load addr:$srcAddr, Mask,
4102                                        _.info512.ImmAllZerosV)),
4103                           (iPTR 0))),
4104          (!cast<Instruction>(InstrStr#rmkz)
4105                      (COPY_TO_REGCLASS MaskRC:$mask, VK1WM),
4106                      addr:$srcAddr)>;
4107
4108def : Pat<(_.info128.VT (extract_subvector
4109                (_.info512.VT (masked_load addr:$srcAddr, Mask,
4110                      (_.info512.VT (insert_subvector undef,
4111                            (_.info128.VT (X86vzmovl _.info128.RC:$src)),
4112                            (iPTR 0))))),
4113                (iPTR 0))),
4114          (!cast<Instruction>(InstrStr#rmk) _.info128.RC:$src,
4115                      (COPY_TO_REGCLASS MaskRC:$mask, VK1WM),
4116                      addr:$srcAddr)>;
4117
4118}
4119
4120multiclass avx512_load_scalar_lowering_subreg<string InstrStr,
4121                                              AVX512VLVectorVTInfo _,
4122                                              dag Mask, RegisterClass MaskRC,
4123                                              SubRegIndex subreg> {
4124
4125def : Pat<(_.info128.VT (extract_subvector
4126                         (_.info512.VT (masked_load addr:$srcAddr, Mask,
4127                                        _.info512.ImmAllZerosV)),
4128                           (iPTR 0))),
4129          (!cast<Instruction>(InstrStr#rmkz)
4130                      (COPY_TO_REGCLASS (i32 (INSERT_SUBREG (IMPLICIT_DEF), MaskRC:$mask, subreg)), VK1WM),
4131                      addr:$srcAddr)>;
4132
4133def : Pat<(_.info128.VT (extract_subvector
4134                (_.info512.VT (masked_load addr:$srcAddr, Mask,
4135                      (_.info512.VT (insert_subvector undef,
4136                            (_.info128.VT (X86vzmovl _.info128.RC:$src)),
4137                            (iPTR 0))))),
4138                (iPTR 0))),
4139          (!cast<Instruction>(InstrStr#rmk) _.info128.RC:$src,
4140                      (COPY_TO_REGCLASS (i32 (INSERT_SUBREG (IMPLICIT_DEF), MaskRC:$mask, subreg)), VK1WM),
4141                      addr:$srcAddr)>;
4142
4143}
4144
4145// This matches the more recent codegen from clang that avoids emitting a 512
4146// bit masked load directly. Codegen will widen 128-bit masked load to 512
4147// bits on AVX512F only targets.
4148multiclass avx512_load_scalar_lowering_subreg2<string InstrStr,
4149                                              AVX512VLVectorVTInfo _,
4150                                              dag Mask512, dag Mask128,
4151                                              RegisterClass MaskRC,
4152                                              SubRegIndex subreg> {
4153// AVX512F patterns.
4154def : Pat<(_.info128.VT (extract_subvector
4155                         (_.info512.VT (masked_load addr:$srcAddr, Mask512,
4156                                        _.info512.ImmAllZerosV)),
4157                           (iPTR 0))),
4158          (!cast<Instruction>(InstrStr#rmkz)
4159                      (COPY_TO_REGCLASS (i32 (INSERT_SUBREG (IMPLICIT_DEF), MaskRC:$mask, subreg)), VK1WM),
4160                      addr:$srcAddr)>;
4161
4162def : Pat<(_.info128.VT (extract_subvector
4163                (_.info512.VT (masked_load addr:$srcAddr, Mask512,
4164                      (_.info512.VT (insert_subvector undef,
4165                            (_.info128.VT (X86vzmovl _.info128.RC:$src)),
4166                            (iPTR 0))))),
4167                (iPTR 0))),
4168          (!cast<Instruction>(InstrStr#rmk) _.info128.RC:$src,
4169                      (COPY_TO_REGCLASS (i32 (INSERT_SUBREG (IMPLICIT_DEF), MaskRC:$mask, subreg)), VK1WM),
4170                      addr:$srcAddr)>;
4171
4172// AVX512Vl patterns.
4173def : Pat<(_.info128.VT (masked_load addr:$srcAddr, Mask128,
4174                         _.info128.ImmAllZerosV)),
4175          (!cast<Instruction>(InstrStr#rmkz)
4176                      (COPY_TO_REGCLASS (i32 (INSERT_SUBREG (IMPLICIT_DEF), MaskRC:$mask, subreg)), VK1WM),
4177                      addr:$srcAddr)>;
4178
4179def : Pat<(_.info128.VT (masked_load addr:$srcAddr, Mask128,
4180                         (_.info128.VT (X86vzmovl _.info128.RC:$src)))),
4181          (!cast<Instruction>(InstrStr#rmk) _.info128.RC:$src,
4182                      (COPY_TO_REGCLASS (i32 (INSERT_SUBREG (IMPLICIT_DEF), MaskRC:$mask, subreg)), VK1WM),
4183                      addr:$srcAddr)>;
4184}
4185
4186defm : avx512_move_scalar_lowering<"VMOVSSZ", X86Movss, fp32imm0, v4f32x_info>;
4187defm : avx512_move_scalar_lowering<"VMOVSDZ", X86Movsd, fp64imm0, v2f64x_info>;
4188
4189defm : avx512_store_scalar_lowering<"VMOVSSZ", avx512vl_f32_info,
4190                   (v16i1 (bitconvert (i16 (trunc (and GR32:$mask, (i32 1)))))), GR32>;
4191defm : avx512_store_scalar_lowering_subreg<"VMOVSSZ", avx512vl_f32_info,
4192                   (v16i1 (bitconvert (i16 (and GR16:$mask, (i16 1))))), GR16, sub_16bit>;
4193defm : avx512_store_scalar_lowering_subreg<"VMOVSDZ", avx512vl_f64_info,
4194                   (v8i1 (bitconvert (i8 (and GR8:$mask, (i8 1))))), GR8, sub_8bit>;
4195
4196defm : avx512_store_scalar_lowering_subreg2<"VMOVSSZ", avx512vl_f32_info,
4197                   (v16i1 (insert_subvector
4198                           (v16i1 immAllZerosV),
4199                           (v4i1 (extract_subvector
4200                                  (v8i1 (bitconvert (and GR8:$mask, (i8 1)))),
4201                                  (iPTR 0))),
4202                           (iPTR 0))),
4203                   (v4i1 (extract_subvector
4204                          (v8i1 (bitconvert (and GR8:$mask, (i8 1)))),
4205                          (iPTR 0))), GR8, sub_8bit>;
4206defm : avx512_store_scalar_lowering_subreg2<"VMOVSDZ", avx512vl_f64_info,
4207                   (v8i1
4208                    (extract_subvector
4209                     (v16i1
4210                      (insert_subvector
4211                       (v16i1 immAllZerosV),
4212                       (v2i1 (extract_subvector
4213                              (v8i1 (bitconvert (i8 (and GR8:$mask, (i8 1))))),
4214                              (iPTR 0))),
4215                       (iPTR 0))),
4216                     (iPTR 0))),
4217                   (v2i1 (extract_subvector
4218                          (v8i1 (bitconvert (i8 (and GR8:$mask, (i8 1))))),
4219                          (iPTR 0))), GR8, sub_8bit>;
4220
4221defm : avx512_load_scalar_lowering<"VMOVSSZ", avx512vl_f32_info,
4222                   (v16i1 (bitconvert (i16 (trunc (and GR32:$mask, (i32 1)))))), GR32>;
4223defm : avx512_load_scalar_lowering_subreg<"VMOVSSZ", avx512vl_f32_info,
4224                   (v16i1 (bitconvert (i16 (and GR16:$mask, (i16 1))))), GR16, sub_16bit>;
4225defm : avx512_load_scalar_lowering_subreg<"VMOVSDZ", avx512vl_f64_info,
4226                   (v8i1 (bitconvert (i8 (and GR8:$mask, (i8 1))))), GR8, sub_8bit>;
4227
4228defm : avx512_load_scalar_lowering_subreg2<"VMOVSSZ", avx512vl_f32_info,
4229                   (v16i1 (insert_subvector
4230                           (v16i1 immAllZerosV),
4231                           (v4i1 (extract_subvector
4232                                  (v8i1 (bitconvert (and GR8:$mask, (i8 1)))),
4233                                  (iPTR 0))),
4234                           (iPTR 0))),
4235                   (v4i1 (extract_subvector
4236                          (v8i1 (bitconvert (and GR8:$mask, (i8 1)))),
4237                          (iPTR 0))), GR8, sub_8bit>;
4238defm : avx512_load_scalar_lowering_subreg2<"VMOVSDZ", avx512vl_f64_info,
4239                   (v8i1
4240                    (extract_subvector
4241                     (v16i1
4242                      (insert_subvector
4243                       (v16i1 immAllZerosV),
4244                       (v2i1 (extract_subvector
4245                              (v8i1 (bitconvert (i8 (and GR8:$mask, (i8 1))))),
4246                              (iPTR 0))),
4247                       (iPTR 0))),
4248                     (iPTR 0))),
4249                   (v2i1 (extract_subvector
4250                          (v8i1 (bitconvert (i8 (and GR8:$mask, (i8 1))))),
4251                          (iPTR 0))), GR8, sub_8bit>;
4252
4253def : Pat<(f32 (X86selects VK1WM:$mask, (f32 FR32X:$src1), (f32 FR32X:$src2))),
4254          (COPY_TO_REGCLASS (v4f32 (VMOVSSZrrk
4255           (v4f32 (COPY_TO_REGCLASS FR32X:$src2, VR128X)),
4256           VK1WM:$mask, (v4f32 (IMPLICIT_DEF)),
4257           (v4f32 (COPY_TO_REGCLASS FR32X:$src1, VR128X)))), FR32X)>;
4258
4259def : Pat<(f32 (X86selects VK1WM:$mask, (f32 FR32X:$src1), fp32imm0)),
4260          (COPY_TO_REGCLASS (v4f32 (VMOVSSZrrkz VK1WM:$mask, (v4f32 (IMPLICIT_DEF)),
4261           (v4f32 (COPY_TO_REGCLASS FR32X:$src1, VR128X)))), FR32X)>;
4262
4263def : Pat<(f32 (X86selects VK1WM:$mask, (loadf32 addr:$src), (f32 FR32X:$src0))),
4264          (COPY_TO_REGCLASS
4265           (v4f32 (VMOVSSZrmk (v4f32 (COPY_TO_REGCLASS FR32X:$src0, VR128X)),
4266                                                       VK1WM:$mask, addr:$src)),
4267           FR32X)>;
4268def : Pat<(f32 (X86selects VK1WM:$mask, (loadf32 addr:$src), fp32imm0)),
4269          (COPY_TO_REGCLASS (v4f32 (VMOVSSZrmkz VK1WM:$mask, addr:$src)), FR32X)>;
4270
4271def : Pat<(f64 (X86selects VK1WM:$mask, (f64 FR64X:$src1), (f64 FR64X:$src2))),
4272          (COPY_TO_REGCLASS (v2f64 (VMOVSDZrrk
4273           (v2f64 (COPY_TO_REGCLASS FR64X:$src2, VR128X)),
4274           VK1WM:$mask, (v2f64 (IMPLICIT_DEF)),
4275           (v2f64 (COPY_TO_REGCLASS FR64X:$src1, VR128X)))), FR64X)>;
4276
4277def : Pat<(f64 (X86selects VK1WM:$mask, (f64 FR64X:$src1), fp64imm0)),
4278          (COPY_TO_REGCLASS (v2f64 (VMOVSDZrrkz VK1WM:$mask, (v2f64 (IMPLICIT_DEF)),
4279           (v2f64 (COPY_TO_REGCLASS FR64X:$src1, VR128X)))), FR64X)>;
4280
4281def : Pat<(f64 (X86selects VK1WM:$mask, (loadf64 addr:$src), (f64 FR64X:$src0))),
4282          (COPY_TO_REGCLASS
4283           (v2f64 (VMOVSDZrmk (v2f64 (COPY_TO_REGCLASS FR64X:$src0, VR128X)),
4284                                                       VK1WM:$mask, addr:$src)),
4285           FR64X)>;
4286def : Pat<(f64 (X86selects VK1WM:$mask, (loadf64 addr:$src), fp64imm0)),
4287          (COPY_TO_REGCLASS (v2f64 (VMOVSDZrmkz VK1WM:$mask, addr:$src)), FR64X)>;
4288
4289let hasSideEffects = 0, isCodeGenOnly = 1, ForceDisassemble = 1 in {
4290  def VMOVSSZrr_REV: AVX512<0x11, MRMDestReg, (outs VR128X:$dst),
4291                           (ins VR128X:$src1, VR128X:$src2),
4292                           "vmovss\t{$src2, $src1, $dst|$dst, $src1, $src2}",
4293                           []>, XS, EVEX_4V, VEX_LIG,
4294                           FoldGenData<"VMOVSSZrr">,
4295                           Sched<[SchedWriteFShuffle.XMM]>;
4296
4297  let Constraints = "$src0 = $dst" in
4298  def VMOVSSZrrk_REV: AVX512<0x11, MRMDestReg, (outs VR128X:$dst),
4299                             (ins f32x_info.RC:$src0, f32x_info.KRCWM:$mask,
4300                                                   VR128X:$src1, VR128X:$src2),
4301                             "vmovss\t{$src2, $src1, $dst {${mask}}|"#
4302                                        "$dst {${mask}}, $src1, $src2}",
4303                             []>, EVEX_K, XS, EVEX_4V, VEX_LIG,
4304                             FoldGenData<"VMOVSSZrrk">,
4305                             Sched<[SchedWriteFShuffle.XMM]>;
4306
4307  def VMOVSSZrrkz_REV: AVX512<0x11, MRMDestReg, (outs VR128X:$dst),
4308                         (ins f32x_info.KRCWM:$mask, VR128X:$src1, VR128X:$src2),
4309                         "vmovss\t{$src2, $src1, $dst {${mask}} {z}|"#
4310                                    "$dst {${mask}} {z}, $src1, $src2}",
4311                         []>, EVEX_KZ, XS, EVEX_4V, VEX_LIG,
4312                         FoldGenData<"VMOVSSZrrkz">,
4313                         Sched<[SchedWriteFShuffle.XMM]>;
4314
4315  def VMOVSDZrr_REV: AVX512<0x11, MRMDestReg, (outs VR128X:$dst),
4316                           (ins VR128X:$src1, VR128X:$src2),
4317                           "vmovsd\t{$src2, $src1, $dst|$dst, $src1, $src2}",
4318                           []>, XD, EVEX_4V, VEX_LIG, VEX_W,
4319                           FoldGenData<"VMOVSDZrr">,
4320                           Sched<[SchedWriteFShuffle.XMM]>;
4321
4322  let Constraints = "$src0 = $dst" in
4323  def VMOVSDZrrk_REV: AVX512<0x11, MRMDestReg, (outs VR128X:$dst),
4324                             (ins f64x_info.RC:$src0, f64x_info.KRCWM:$mask,
4325                                                   VR128X:$src1, VR128X:$src2),
4326                             "vmovsd\t{$src2, $src1, $dst {${mask}}|"#
4327                                        "$dst {${mask}}, $src1, $src2}",
4328                             []>, EVEX_K, XD, EVEX_4V, VEX_LIG,
4329                             VEX_W, FoldGenData<"VMOVSDZrrk">,
4330                             Sched<[SchedWriteFShuffle.XMM]>;
4331
4332  def VMOVSDZrrkz_REV: AVX512<0x11, MRMDestReg, (outs VR128X:$dst),
4333                              (ins f64x_info.KRCWM:$mask, VR128X:$src1,
4334                                                          VR128X:$src2),
4335                              "vmovsd\t{$src2, $src1, $dst {${mask}} {z}|"#
4336                                         "$dst {${mask}} {z}, $src1, $src2}",
4337                              []>, EVEX_KZ, XD, EVEX_4V, VEX_LIG,
4338                              VEX_W, FoldGenData<"VMOVSDZrrkz">,
4339                              Sched<[SchedWriteFShuffle.XMM]>;
4340}
4341
4342def : InstAlias<"vmovss.s\t{$src2, $src1, $dst|$dst, $src1, $src2}",
4343                (VMOVSSZrr_REV VR128X:$dst, VR128X:$src1, VR128X:$src2), 0>;
4344def : InstAlias<"vmovss.s\t{$src2, $src1, $dst {${mask}}|"#
4345                             "$dst {${mask}}, $src1, $src2}",
4346                (VMOVSSZrrk_REV VR128X:$dst, VK1WM:$mask,
4347                                VR128X:$src1, VR128X:$src2), 0>;
4348def : InstAlias<"vmovss.s\t{$src2, $src1, $dst {${mask}} {z}|"#
4349                             "$dst {${mask}} {z}, $src1, $src2}",
4350                (VMOVSSZrrkz_REV VR128X:$dst, VK1WM:$mask,
4351                                 VR128X:$src1, VR128X:$src2), 0>;
4352def : InstAlias<"vmovsd.s\t{$src2, $src1, $dst|$dst, $src1, $src2}",
4353                (VMOVSDZrr_REV VR128X:$dst, VR128X:$src1, VR128X:$src2), 0>;
4354def : InstAlias<"vmovsd.s\t{$src2, $src1, $dst {${mask}}|"#
4355                             "$dst {${mask}}, $src1, $src2}",
4356                (VMOVSDZrrk_REV VR128X:$dst, VK1WM:$mask,
4357                                VR128X:$src1, VR128X:$src2), 0>;
4358def : InstAlias<"vmovsd.s\t{$src2, $src1, $dst {${mask}} {z}|"#
4359                             "$dst {${mask}} {z}, $src1, $src2}",
4360                (VMOVSDZrrkz_REV VR128X:$dst, VK1WM:$mask,
4361                                 VR128X:$src1, VR128X:$src2), 0>;
4362
4363let Predicates = [HasAVX512, OptForSize] in {
4364  def : Pat<(v4f32 (X86vzmovl (v4f32 VR128X:$src))),
4365            (VMOVSSZrr (v4f32 (AVX512_128_SET0)), VR128X:$src)>;
4366  def : Pat<(v4i32 (X86vzmovl (v4i32 VR128X:$src))),
4367            (VMOVSSZrr (v4i32 (AVX512_128_SET0)), VR128X:$src)>;
4368
4369  // Move low f32 and clear high bits.
4370  def : Pat<(v8f32 (X86vzmovl (v8f32 VR256X:$src))),
4371            (SUBREG_TO_REG (i32 0),
4372             (v4f32 (VMOVSSZrr (v4f32 (AVX512_128_SET0)),
4373              (v4f32 (EXTRACT_SUBREG (v8f32 VR256X:$src), sub_xmm)))), sub_xmm)>;
4374  def : Pat<(v8i32 (X86vzmovl (v8i32 VR256X:$src))),
4375            (SUBREG_TO_REG (i32 0),
4376             (v4i32 (VMOVSSZrr (v4i32 (AVX512_128_SET0)),
4377              (v4i32 (EXTRACT_SUBREG (v8i32 VR256X:$src), sub_xmm)))), sub_xmm)>;
4378
4379  def : Pat<(v16f32 (X86vzmovl (v16f32 VR512:$src))),
4380            (SUBREG_TO_REG (i32 0),
4381             (v4f32 (VMOVSSZrr (v4f32 (AVX512_128_SET0)),
4382              (v4f32 (EXTRACT_SUBREG (v16f32 VR512:$src), sub_xmm)))), sub_xmm)>;
4383  def : Pat<(v16i32 (X86vzmovl (v16i32 VR512:$src))),
4384            (SUBREG_TO_REG (i32 0),
4385             (v4i32 (VMOVSSZrr (v4i32 (AVX512_128_SET0)),
4386              (v4i32 (EXTRACT_SUBREG (v16i32 VR512:$src), sub_xmm)))), sub_xmm)>;
4387}
4388
4389// Use 128-bit blends for OptForSpeed since BLENDs have better throughput than
4390// VMOVSS/SD. Unfortunately, loses the ability to use XMM16-31.
4391let Predicates = [HasAVX512, OptForSpeed] in {
4392  def : Pat<(v16f32 (X86vzmovl (v16f32 VR512:$src))),
4393            (SUBREG_TO_REG (i32 0),
4394             (v4f32 (VBLENDPSrri (v4f32 (V_SET0)),
4395                          (v4f32 (EXTRACT_SUBREG (v16f32 VR512:$src), sub_xmm)),
4396                          (i8 1))), sub_xmm)>;
4397  def : Pat<(v16i32 (X86vzmovl (v16i32 VR512:$src))),
4398            (SUBREG_TO_REG (i32 0),
4399             (v4i32 (VPBLENDWrri (v4i32 (V_SET0)),
4400                          (v4i32 (EXTRACT_SUBREG (v16i32 VR512:$src), sub_xmm)),
4401                          (i8 3))), sub_xmm)>;
4402}
4403
4404let Predicates = [HasAVX512] in {
4405  def : Pat<(v4f32 (scalar_to_vector (loadf32 addr:$src))),
4406            (VMOVSSZrm addr:$src)>;
4407  def : Pat<(v2f64 (scalar_to_vector (loadf64 addr:$src))),
4408            (VMOVSDZrm addr:$src)>;
4409
4410  // Represent the same patterns above but in the form they appear for
4411  // 256-bit types
4412  def : Pat<(v8f32 (X86vzload32 addr:$src)),
4413            (SUBREG_TO_REG (i32 0), (VMOVSSZrm addr:$src), sub_xmm)>;
4414  def : Pat<(v4f64 (X86vzload64 addr:$src)),
4415            (SUBREG_TO_REG (i32 0), (VMOVSDZrm addr:$src), sub_xmm)>;
4416
4417  // Represent the same patterns above but in the form they appear for
4418  // 512-bit types
4419  def : Pat<(v16f32 (X86vzload32 addr:$src)),
4420            (SUBREG_TO_REG (i32 0), (VMOVSSZrm addr:$src), sub_xmm)>;
4421  def : Pat<(v8f64 (X86vzload64 addr:$src)),
4422            (SUBREG_TO_REG (i32 0), (VMOVSDZrm addr:$src), sub_xmm)>;
4423}
4424
4425let ExeDomain = SSEPackedInt, SchedRW = [SchedWriteVecLogic.XMM] in {
4426def VMOVZPQILo2PQIZrr : AVX512XSI<0x7E, MRMSrcReg, (outs VR128X:$dst),
4427                                (ins VR128X:$src),
4428                                "vmovq\t{$src, $dst|$dst, $src}",
4429                                [(set VR128X:$dst, (v2i64 (X86vzmovl
4430                                                   (v2i64 VR128X:$src))))]>,
4431                                EVEX, VEX_W;
4432}
4433
4434let Predicates = [HasAVX512] in {
4435  def : Pat<(v4i32 (X86vzmovl (v4i32 (scalar_to_vector GR32:$src)))),
4436            (VMOVDI2PDIZrr GR32:$src)>;
4437
4438  def : Pat<(v2i64 (X86vzmovl (v2i64 (scalar_to_vector GR64:$src)))),
4439            (VMOV64toPQIZrr GR64:$src)>;
4440
4441  // AVX 128-bit movd/movq instruction write zeros in the high 128-bit part.
4442  def : Pat<(v2i64 (X86vzmovl (v2i64 (scalar_to_vector (zextloadi64i32 addr:$src))))),
4443            (VMOVDI2PDIZrm addr:$src)>;
4444  def : Pat<(v4i32 (X86vzload32 addr:$src)),
4445            (VMOVDI2PDIZrm addr:$src)>;
4446  def : Pat<(v8i32 (X86vzload32 addr:$src)),
4447            (SUBREG_TO_REG (i32 0), (v4i32 (VMOVDI2PDIZrm addr:$src)), sub_xmm)>;
4448  def : Pat<(v2f64 (X86vzmovl (v2f64 VR128X:$src))),
4449            (VMOVZPQILo2PQIZrr VR128X:$src)>;
4450  def : Pat<(v2i64 (X86vzload64 addr:$src)),
4451            (VMOVQI2PQIZrm addr:$src)>;
4452  def : Pat<(v4i64 (X86vzload64 addr:$src)),
4453            (SUBREG_TO_REG (i64 0), (v2i64 (VMOVQI2PQIZrm addr:$src)), sub_xmm)>;
4454
4455  // Use regular 128-bit instructions to match 512-bit scalar_to_vec+zext.
4456  def : Pat<(v16i32 (X86vzload32 addr:$src)),
4457            (SUBREG_TO_REG (i32 0), (v4i32 (VMOVDI2PDIZrm addr:$src)), sub_xmm)>;
4458  def : Pat<(v8i64 (X86vzload64 addr:$src)),
4459            (SUBREG_TO_REG (i64 0), (v2i64 (VMOVQI2PQIZrm addr:$src)), sub_xmm)>;
4460
4461  def : Pat<(v4f64 (X86vzmovl (v4f64 VR256X:$src))),
4462            (SUBREG_TO_REG (i32 0),
4463             (v2f64 (VMOVZPQILo2PQIZrr
4464                     (v2f64 (EXTRACT_SUBREG (v4f64 VR256X:$src), sub_xmm)))),
4465             sub_xmm)>;
4466  def : Pat<(v4i64 (X86vzmovl (v4i64 VR256X:$src))),
4467            (SUBREG_TO_REG (i32 0),
4468             (v2i64 (VMOVZPQILo2PQIZrr
4469                     (v2i64 (EXTRACT_SUBREG (v4i64 VR256X:$src), sub_xmm)))),
4470             sub_xmm)>;
4471
4472  def : Pat<(v8f64 (X86vzmovl (v8f64 VR512:$src))),
4473            (SUBREG_TO_REG (i32 0),
4474             (v2f64 (VMOVZPQILo2PQIZrr
4475                     (v2f64 (EXTRACT_SUBREG (v8f64 VR512:$src), sub_xmm)))),
4476             sub_xmm)>;
4477  def : Pat<(v8i64 (X86vzmovl (v8i64 VR512:$src))),
4478            (SUBREG_TO_REG (i32 0),
4479             (v2i64 (VMOVZPQILo2PQIZrr
4480                     (v2i64 (EXTRACT_SUBREG (v8i64 VR512:$src), sub_xmm)))),
4481             sub_xmm)>;
4482}
4483
4484//===----------------------------------------------------------------------===//
4485// AVX-512 - Non-temporals
4486//===----------------------------------------------------------------------===//
4487
4488def VMOVNTDQAZrm : AVX512PI<0x2A, MRMSrcMem, (outs VR512:$dst),
4489                      (ins i512mem:$src), "vmovntdqa\t{$src, $dst|$dst, $src}",
4490                      [], SSEPackedInt>, Sched<[SchedWriteVecMoveLS.ZMM.RM]>,
4491                      EVEX, T8PD, EVEX_V512, EVEX_CD8<64, CD8VF>;
4492
4493let Predicates = [HasVLX] in {
4494  def VMOVNTDQAZ256rm : AVX512PI<0x2A, MRMSrcMem, (outs VR256X:$dst),
4495                       (ins i256mem:$src),
4496                       "vmovntdqa\t{$src, $dst|$dst, $src}",
4497                       [], SSEPackedInt>, Sched<[SchedWriteVecMoveLS.YMM.RM]>,
4498                       EVEX, T8PD, EVEX_V256, EVEX_CD8<64, CD8VF>;
4499
4500  def VMOVNTDQAZ128rm : AVX512PI<0x2A, MRMSrcMem, (outs VR128X:$dst),
4501                      (ins i128mem:$src),
4502                      "vmovntdqa\t{$src, $dst|$dst, $src}",
4503                      [], SSEPackedInt>, Sched<[SchedWriteVecMoveLS.XMM.RM]>,
4504                      EVEX, T8PD, EVEX_V128, EVEX_CD8<64, CD8VF>;
4505}
4506
4507multiclass avx512_movnt<bits<8> opc, string OpcodeStr, X86VectorVTInfo _,
4508                        X86SchedWriteMoveLS Sched,
4509                        PatFrag st_frag = alignednontemporalstore> {
4510  let SchedRW = [Sched.MR], AddedComplexity = 400 in
4511  def mr : AVX512PI<opc, MRMDestMem, (outs), (ins _.MemOp:$dst, _.RC:$src),
4512                    !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
4513                    [(st_frag (_.VT _.RC:$src), addr:$dst)],
4514                    _.ExeDomain>, EVEX, EVEX_CD8<_.EltSize, CD8VF>;
4515}
4516
4517multiclass avx512_movnt_vl<bits<8> opc, string OpcodeStr,
4518                           AVX512VLVectorVTInfo VTInfo,
4519                           X86SchedWriteMoveLSWidths Sched> {
4520  let Predicates = [HasAVX512] in
4521    defm Z : avx512_movnt<opc, OpcodeStr, VTInfo.info512, Sched.ZMM>, EVEX_V512;
4522
4523  let Predicates = [HasAVX512, HasVLX] in {
4524    defm Z256 : avx512_movnt<opc, OpcodeStr, VTInfo.info256, Sched.YMM>, EVEX_V256;
4525    defm Z128 : avx512_movnt<opc, OpcodeStr, VTInfo.info128, Sched.XMM>, EVEX_V128;
4526  }
4527}
4528
4529defm VMOVNTDQ : avx512_movnt_vl<0xE7, "vmovntdq", avx512vl_i64_info,
4530                                SchedWriteVecMoveLSNT>, PD;
4531defm VMOVNTPD : avx512_movnt_vl<0x2B, "vmovntpd", avx512vl_f64_info,
4532                                SchedWriteFMoveLSNT>, PD, VEX_W;
4533defm VMOVNTPS : avx512_movnt_vl<0x2B, "vmovntps", avx512vl_f32_info,
4534                                SchedWriteFMoveLSNT>, PS;
4535
4536let Predicates = [HasAVX512], AddedComplexity = 400 in {
4537  def : Pat<(alignednontemporalstore (v16i32 VR512:$src), addr:$dst),
4538            (VMOVNTDQZmr addr:$dst, VR512:$src)>;
4539  def : Pat<(alignednontemporalstore (v32i16 VR512:$src), addr:$dst),
4540            (VMOVNTDQZmr addr:$dst, VR512:$src)>;
4541  def : Pat<(alignednontemporalstore (v64i8 VR512:$src), addr:$dst),
4542            (VMOVNTDQZmr addr:$dst, VR512:$src)>;
4543
4544  def : Pat<(v8f64 (alignednontemporalload addr:$src)),
4545            (VMOVNTDQAZrm addr:$src)>;
4546  def : Pat<(v16f32 (alignednontemporalload addr:$src)),
4547            (VMOVNTDQAZrm addr:$src)>;
4548  def : Pat<(v8i64 (alignednontemporalload addr:$src)),
4549            (VMOVNTDQAZrm addr:$src)>;
4550  def : Pat<(v16i32 (alignednontemporalload addr:$src)),
4551            (VMOVNTDQAZrm addr:$src)>;
4552  def : Pat<(v32i16 (alignednontemporalload addr:$src)),
4553            (VMOVNTDQAZrm addr:$src)>;
4554  def : Pat<(v64i8 (alignednontemporalload addr:$src)),
4555            (VMOVNTDQAZrm addr:$src)>;
4556}
4557
4558let Predicates = [HasVLX], AddedComplexity = 400 in {
4559  def : Pat<(alignednontemporalstore (v8i32 VR256X:$src), addr:$dst),
4560            (VMOVNTDQZ256mr addr:$dst, VR256X:$src)>;
4561  def : Pat<(alignednontemporalstore (v16i16 VR256X:$src), addr:$dst),
4562            (VMOVNTDQZ256mr addr:$dst, VR256X:$src)>;
4563  def : Pat<(alignednontemporalstore (v32i8 VR256X:$src), addr:$dst),
4564            (VMOVNTDQZ256mr addr:$dst, VR256X:$src)>;
4565
4566  def : Pat<(v4f64 (alignednontemporalload addr:$src)),
4567            (VMOVNTDQAZ256rm addr:$src)>;
4568  def : Pat<(v8f32 (alignednontemporalload addr:$src)),
4569            (VMOVNTDQAZ256rm addr:$src)>;
4570  def : Pat<(v4i64 (alignednontemporalload addr:$src)),
4571            (VMOVNTDQAZ256rm addr:$src)>;
4572  def : Pat<(v8i32 (alignednontemporalload addr:$src)),
4573            (VMOVNTDQAZ256rm addr:$src)>;
4574  def : Pat<(v16i16 (alignednontemporalload addr:$src)),
4575            (VMOVNTDQAZ256rm addr:$src)>;
4576  def : Pat<(v32i8 (alignednontemporalload addr:$src)),
4577            (VMOVNTDQAZ256rm addr:$src)>;
4578
4579  def : Pat<(alignednontemporalstore (v4i32 VR128X:$src), addr:$dst),
4580            (VMOVNTDQZ128mr addr:$dst, VR128X:$src)>;
4581  def : Pat<(alignednontemporalstore (v8i16 VR128X:$src), addr:$dst),
4582            (VMOVNTDQZ128mr addr:$dst, VR128X:$src)>;
4583  def : Pat<(alignednontemporalstore (v16i8 VR128X:$src), addr:$dst),
4584            (VMOVNTDQZ128mr addr:$dst, VR128X:$src)>;
4585
4586  def : Pat<(v2f64 (alignednontemporalload addr:$src)),
4587            (VMOVNTDQAZ128rm addr:$src)>;
4588  def : Pat<(v4f32 (alignednontemporalload addr:$src)),
4589            (VMOVNTDQAZ128rm addr:$src)>;
4590  def : Pat<(v2i64 (alignednontemporalload addr:$src)),
4591            (VMOVNTDQAZ128rm addr:$src)>;
4592  def : Pat<(v4i32 (alignednontemporalload addr:$src)),
4593            (VMOVNTDQAZ128rm addr:$src)>;
4594  def : Pat<(v8i16 (alignednontemporalload addr:$src)),
4595            (VMOVNTDQAZ128rm addr:$src)>;
4596  def : Pat<(v16i8 (alignednontemporalload addr:$src)),
4597            (VMOVNTDQAZ128rm addr:$src)>;
4598}
4599
4600//===----------------------------------------------------------------------===//
4601// AVX-512 - Integer arithmetic
4602//
4603multiclass avx512_binop_rm<bits<8> opc, string OpcodeStr, SDNode OpNode,
4604                           X86VectorVTInfo _, X86FoldableSchedWrite sched,
4605                           bit IsCommutable = 0> {
4606  defm rr : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
4607                    (ins _.RC:$src1, _.RC:$src2), OpcodeStr,
4608                    "$src2, $src1", "$src1, $src2",
4609                    (_.VT (OpNode _.RC:$src1, _.RC:$src2)),
4610                    IsCommutable, IsCommutable>, AVX512BIBase, EVEX_4V,
4611                    Sched<[sched]>;
4612
4613  defm rm : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
4614                  (ins _.RC:$src1, _.MemOp:$src2), OpcodeStr,
4615                  "$src2, $src1", "$src1, $src2",
4616                  (_.VT (OpNode _.RC:$src1, (_.LdFrag addr:$src2)))>,
4617                  AVX512BIBase, EVEX_4V,
4618                  Sched<[sched.Folded, sched.ReadAfterFold]>;
4619}
4620
4621multiclass avx512_binop_rmb<bits<8> opc, string OpcodeStr, SDNode OpNode,
4622                            X86VectorVTInfo _, X86FoldableSchedWrite sched,
4623                            bit IsCommutable = 0> :
4624           avx512_binop_rm<opc, OpcodeStr, OpNode, _, sched, IsCommutable> {
4625  defm rmb : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
4626                  (ins _.RC:$src1, _.ScalarMemOp:$src2), OpcodeStr,
4627                  "${src2}"##_.BroadcastStr##", $src1",
4628                  "$src1, ${src2}"##_.BroadcastStr,
4629                  (_.VT (OpNode _.RC:$src1,
4630                                (_.BroadcastLdFrag addr:$src2)))>,
4631                  AVX512BIBase, EVEX_4V, EVEX_B,
4632                  Sched<[sched.Folded, sched.ReadAfterFold]>;
4633}
4634
4635multiclass avx512_binop_rm_vl<bits<8> opc, string OpcodeStr, SDNode OpNode,
4636                              AVX512VLVectorVTInfo VTInfo,
4637                              X86SchedWriteWidths sched, Predicate prd,
4638                              bit IsCommutable = 0> {
4639  let Predicates = [prd] in
4640    defm Z : avx512_binop_rm<opc, OpcodeStr, OpNode, VTInfo.info512, sched.ZMM,
4641                             IsCommutable>, EVEX_V512;
4642
4643  let Predicates = [prd, HasVLX] in {
4644    defm Z256 : avx512_binop_rm<opc, OpcodeStr, OpNode, VTInfo.info256,
4645                                sched.YMM, IsCommutable>, EVEX_V256;
4646    defm Z128 : avx512_binop_rm<opc, OpcodeStr, OpNode, VTInfo.info128,
4647                                sched.XMM, IsCommutable>, EVEX_V128;
4648  }
4649}
4650
4651multiclass avx512_binop_rmb_vl<bits<8> opc, string OpcodeStr, SDNode OpNode,
4652                               AVX512VLVectorVTInfo VTInfo,
4653                               X86SchedWriteWidths sched, Predicate prd,
4654                               bit IsCommutable = 0> {
4655  let Predicates = [prd] in
4656    defm Z : avx512_binop_rmb<opc, OpcodeStr, OpNode, VTInfo.info512, sched.ZMM,
4657                             IsCommutable>, EVEX_V512;
4658
4659  let Predicates = [prd, HasVLX] in {
4660    defm Z256 : avx512_binop_rmb<opc, OpcodeStr, OpNode, VTInfo.info256,
4661                                 sched.YMM, IsCommutable>, EVEX_V256;
4662    defm Z128 : avx512_binop_rmb<opc, OpcodeStr, OpNode, VTInfo.info128,
4663                                 sched.XMM, IsCommutable>, EVEX_V128;
4664  }
4665}
4666
4667multiclass avx512_binop_rm_vl_q<bits<8> opc, string OpcodeStr, SDNode OpNode,
4668                                X86SchedWriteWidths sched, Predicate prd,
4669                                bit IsCommutable = 0> {
4670  defm NAME : avx512_binop_rmb_vl<opc, OpcodeStr, OpNode, avx512vl_i64_info,
4671                                  sched, prd, IsCommutable>,
4672                                  VEX_W, EVEX_CD8<64, CD8VF>;
4673}
4674
4675multiclass avx512_binop_rm_vl_d<bits<8> opc, string OpcodeStr, SDNode OpNode,
4676                                X86SchedWriteWidths sched, Predicate prd,
4677                                bit IsCommutable = 0> {
4678  defm NAME : avx512_binop_rmb_vl<opc, OpcodeStr, OpNode, avx512vl_i32_info,
4679                                  sched, prd, IsCommutable>, EVEX_CD8<32, CD8VF>;
4680}
4681
4682multiclass avx512_binop_rm_vl_w<bits<8> opc, string OpcodeStr, SDNode OpNode,
4683                                X86SchedWriteWidths sched, Predicate prd,
4684                                bit IsCommutable = 0> {
4685  defm NAME : avx512_binop_rm_vl<opc, OpcodeStr, OpNode, avx512vl_i16_info,
4686                                 sched, prd, IsCommutable>, EVEX_CD8<16, CD8VF>,
4687                                 VEX_WIG;
4688}
4689
4690multiclass avx512_binop_rm_vl_b<bits<8> opc, string OpcodeStr, SDNode OpNode,
4691                                X86SchedWriteWidths sched, Predicate prd,
4692                                bit IsCommutable = 0> {
4693  defm NAME : avx512_binop_rm_vl<opc, OpcodeStr, OpNode, avx512vl_i8_info,
4694                                 sched, prd, IsCommutable>, EVEX_CD8<8, CD8VF>,
4695                                 VEX_WIG;
4696}
4697
4698multiclass avx512_binop_rm_vl_dq<bits<8> opc_d, bits<8> opc_q, string OpcodeStr,
4699                                 SDNode OpNode, X86SchedWriteWidths sched,
4700                                 Predicate prd, bit IsCommutable = 0> {
4701  defm Q : avx512_binop_rm_vl_q<opc_q, OpcodeStr#"q", OpNode, sched, prd,
4702                                   IsCommutable>;
4703
4704  defm D : avx512_binop_rm_vl_d<opc_d, OpcodeStr#"d", OpNode, sched, prd,
4705                                   IsCommutable>;
4706}
4707
4708multiclass avx512_binop_rm_vl_bw<bits<8> opc_b, bits<8> opc_w, string OpcodeStr,
4709                                 SDNode OpNode, X86SchedWriteWidths sched,
4710                                 Predicate prd, bit IsCommutable = 0> {
4711  defm W : avx512_binop_rm_vl_w<opc_w, OpcodeStr#"w", OpNode, sched, prd,
4712                                   IsCommutable>;
4713
4714  defm B : avx512_binop_rm_vl_b<opc_b, OpcodeStr#"b", OpNode, sched, prd,
4715                                   IsCommutable>;
4716}
4717
4718multiclass avx512_binop_rm_vl_all<bits<8> opc_b, bits<8> opc_w,
4719                                  bits<8> opc_d, bits<8> opc_q,
4720                                  string OpcodeStr, SDNode OpNode,
4721                                  X86SchedWriteWidths sched,
4722                                  bit IsCommutable = 0> {
4723  defm NAME : avx512_binop_rm_vl_dq<opc_d, opc_q, OpcodeStr, OpNode,
4724                                    sched, HasAVX512, IsCommutable>,
4725              avx512_binop_rm_vl_bw<opc_b, opc_w, OpcodeStr, OpNode,
4726                                    sched, HasBWI, IsCommutable>;
4727}
4728
4729multiclass avx512_binop_rm2<bits<8> opc, string OpcodeStr,
4730                            X86FoldableSchedWrite sched,
4731                            SDNode OpNode,X86VectorVTInfo _Src,
4732                            X86VectorVTInfo _Dst, X86VectorVTInfo _Brdct,
4733                            bit IsCommutable = 0> {
4734  defm rr : AVX512_maskable<opc, MRMSrcReg, _Dst, (outs _Dst.RC:$dst),
4735                            (ins _Src.RC:$src1, _Src.RC:$src2), OpcodeStr,
4736                            "$src2, $src1","$src1, $src2",
4737                            (_Dst.VT (OpNode
4738                                         (_Src.VT _Src.RC:$src1),
4739                                         (_Src.VT _Src.RC:$src2))),
4740                            IsCommutable>,
4741                            AVX512BIBase, EVEX_4V, Sched<[sched]>;
4742  defm rm : AVX512_maskable<opc, MRMSrcMem, _Dst, (outs _Dst.RC:$dst),
4743                        (ins _Src.RC:$src1, _Src.MemOp:$src2), OpcodeStr,
4744                        "$src2, $src1", "$src1, $src2",
4745                        (_Dst.VT (OpNode (_Src.VT _Src.RC:$src1),
4746                                      (_Src.LdFrag addr:$src2)))>,
4747                        AVX512BIBase, EVEX_4V,
4748                        Sched<[sched.Folded, sched.ReadAfterFold]>;
4749
4750  defm rmb : AVX512_maskable<opc, MRMSrcMem, _Dst, (outs _Dst.RC:$dst),
4751                    (ins _Src.RC:$src1, _Brdct.ScalarMemOp:$src2),
4752                    OpcodeStr,
4753                    "${src2}"##_Brdct.BroadcastStr##", $src1",
4754                     "$src1, ${src2}"##_Brdct.BroadcastStr,
4755                    (_Dst.VT (OpNode (_Src.VT _Src.RC:$src1), (bitconvert
4756                                 (_Brdct.VT (_Brdct.BroadcastLdFrag addr:$src2)))))>,
4757                    AVX512BIBase, EVEX_4V, EVEX_B,
4758                    Sched<[sched.Folded, sched.ReadAfterFold]>;
4759}
4760
4761defm VPADD : avx512_binop_rm_vl_all<0xFC, 0xFD, 0xFE, 0xD4, "vpadd", add,
4762                                    SchedWriteVecALU, 1>;
4763defm VPSUB : avx512_binop_rm_vl_all<0xF8, 0xF9, 0xFA, 0xFB, "vpsub", sub,
4764                                    SchedWriteVecALU, 0>;
4765defm VPADDS : avx512_binop_rm_vl_bw<0xEC, 0xED, "vpadds", saddsat,
4766                                    SchedWriteVecALU, HasBWI, 1>;
4767defm VPSUBS : avx512_binop_rm_vl_bw<0xE8, 0xE9, "vpsubs", ssubsat,
4768                                    SchedWriteVecALU, HasBWI, 0>;
4769defm VPADDUS : avx512_binop_rm_vl_bw<0xDC, 0xDD, "vpaddus", uaddsat,
4770                                     SchedWriteVecALU, HasBWI, 1>;
4771defm VPSUBUS : avx512_binop_rm_vl_bw<0xD8, 0xD9, "vpsubus", usubsat,
4772                                     SchedWriteVecALU, HasBWI, 0>;
4773defm VPMULLD : avx512_binop_rm_vl_d<0x40, "vpmulld", mul,
4774                                    SchedWritePMULLD, HasAVX512, 1>, T8PD;
4775defm VPMULLW : avx512_binop_rm_vl_w<0xD5, "vpmullw", mul,
4776                                    SchedWriteVecIMul, HasBWI, 1>;
4777defm VPMULLQ : avx512_binop_rm_vl_q<0x40, "vpmullq", mul,
4778                                    SchedWriteVecIMul, HasDQI, 1>, T8PD,
4779                                    NotEVEX2VEXConvertible;
4780defm VPMULHW : avx512_binop_rm_vl_w<0xE5, "vpmulhw", mulhs, SchedWriteVecIMul,
4781                                    HasBWI, 1>;
4782defm VPMULHUW : avx512_binop_rm_vl_w<0xE4, "vpmulhuw", mulhu, SchedWriteVecIMul,
4783                                     HasBWI, 1>;
4784defm VPMULHRSW : avx512_binop_rm_vl_w<0x0B, "vpmulhrsw", X86mulhrs,
4785                                      SchedWriteVecIMul, HasBWI, 1>, T8PD;
4786defm VPAVG : avx512_binop_rm_vl_bw<0xE0, 0xE3, "vpavg", X86avg,
4787                                   SchedWriteVecALU, HasBWI, 1>;
4788defm VPMULDQ : avx512_binop_rm_vl_q<0x28, "vpmuldq", X86pmuldq,
4789                                    SchedWriteVecIMul, HasAVX512, 1>, T8PD;
4790defm VPMULUDQ : avx512_binop_rm_vl_q<0xF4, "vpmuludq", X86pmuludq,
4791                                     SchedWriteVecIMul, HasAVX512, 1>;
4792
4793multiclass avx512_binop_all<bits<8> opc, string OpcodeStr,
4794                            X86SchedWriteWidths sched,
4795                            AVX512VLVectorVTInfo _SrcVTInfo,
4796                            AVX512VLVectorVTInfo _DstVTInfo,
4797                            SDNode OpNode, Predicate prd,  bit IsCommutable = 0> {
4798  let Predicates = [prd] in
4799    defm NAME#Z : avx512_binop_rm2<opc, OpcodeStr, sched.ZMM, OpNode,
4800                                 _SrcVTInfo.info512, _DstVTInfo.info512,
4801                                 v8i64_info, IsCommutable>,
4802                                  EVEX_V512, EVEX_CD8<64, CD8VF>, VEX_W;
4803  let Predicates = [HasVLX, prd] in {
4804    defm NAME#Z256 : avx512_binop_rm2<opc, OpcodeStr, sched.YMM, OpNode,
4805                                      _SrcVTInfo.info256, _DstVTInfo.info256,
4806                                      v4i64x_info, IsCommutable>,
4807                                      EVEX_V256, EVEX_CD8<64, CD8VF>, VEX_W;
4808    defm NAME#Z128 : avx512_binop_rm2<opc, OpcodeStr, sched.XMM, OpNode,
4809                                      _SrcVTInfo.info128, _DstVTInfo.info128,
4810                                      v2i64x_info, IsCommutable>,
4811                                     EVEX_V128, EVEX_CD8<64, CD8VF>, VEX_W;
4812  }
4813}
4814
4815defm VPMULTISHIFTQB : avx512_binop_all<0x83, "vpmultishiftqb", SchedWriteVecALU,
4816                                avx512vl_i8_info, avx512vl_i8_info,
4817                                X86multishift, HasVBMI, 0>, T8PD;
4818
4819multiclass avx512_packs_rmb<bits<8> opc, string OpcodeStr, SDNode OpNode,
4820                            X86VectorVTInfo _Src, X86VectorVTInfo _Dst,
4821                            X86FoldableSchedWrite sched> {
4822  defm rmb : AVX512_maskable<opc, MRMSrcMem, _Dst, (outs _Dst.RC:$dst),
4823                    (ins _Src.RC:$src1, _Src.ScalarMemOp:$src2),
4824                    OpcodeStr,
4825                    "${src2}"##_Src.BroadcastStr##", $src1",
4826                     "$src1, ${src2}"##_Src.BroadcastStr,
4827                    (_Dst.VT (OpNode (_Src.VT _Src.RC:$src1), (bitconvert
4828                                 (_Src.VT (_Src.BroadcastLdFrag addr:$src2)))))>,
4829                    EVEX_4V, EVEX_B, EVEX_CD8<_Src.EltSize, CD8VF>,
4830                    Sched<[sched.Folded, sched.ReadAfterFold]>;
4831}
4832
4833multiclass avx512_packs_rm<bits<8> opc, string OpcodeStr,
4834                            SDNode OpNode,X86VectorVTInfo _Src,
4835                            X86VectorVTInfo _Dst, X86FoldableSchedWrite sched,
4836                            bit IsCommutable = 0> {
4837  defm rr : AVX512_maskable<opc, MRMSrcReg, _Dst, (outs _Dst.RC:$dst),
4838                            (ins _Src.RC:$src1, _Src.RC:$src2), OpcodeStr,
4839                            "$src2, $src1","$src1, $src2",
4840                            (_Dst.VT (OpNode
4841                                         (_Src.VT _Src.RC:$src1),
4842                                         (_Src.VT _Src.RC:$src2))),
4843                            IsCommutable, IsCommutable>,
4844                            EVEX_CD8<_Src.EltSize, CD8VF>, EVEX_4V, Sched<[sched]>;
4845  defm rm : AVX512_maskable<opc, MRMSrcMem, _Dst, (outs _Dst.RC:$dst),
4846                        (ins _Src.RC:$src1, _Src.MemOp:$src2), OpcodeStr,
4847                        "$src2, $src1", "$src1, $src2",
4848                        (_Dst.VT (OpNode (_Src.VT _Src.RC:$src1),
4849                                      (_Src.LdFrag addr:$src2)))>,
4850                         EVEX_4V, EVEX_CD8<_Src.EltSize, CD8VF>,
4851                         Sched<[sched.Folded, sched.ReadAfterFold]>;
4852}
4853
4854multiclass avx512_packs_all_i32_i16<bits<8> opc, string OpcodeStr,
4855                                    SDNode OpNode> {
4856  let Predicates = [HasBWI] in
4857  defm NAME#Z : avx512_packs_rm<opc, OpcodeStr, OpNode, v16i32_info,
4858                                 v32i16_info, SchedWriteShuffle.ZMM>,
4859                avx512_packs_rmb<opc, OpcodeStr, OpNode, v16i32_info,
4860                                 v32i16_info, SchedWriteShuffle.ZMM>, EVEX_V512;
4861  let Predicates = [HasBWI, HasVLX] in {
4862    defm NAME#Z256 : avx512_packs_rm<opc, OpcodeStr, OpNode, v8i32x_info,
4863                                     v16i16x_info, SchedWriteShuffle.YMM>,
4864                     avx512_packs_rmb<opc, OpcodeStr, OpNode, v8i32x_info,
4865                                      v16i16x_info, SchedWriteShuffle.YMM>,
4866                                      EVEX_V256;
4867    defm NAME#Z128 : avx512_packs_rm<opc, OpcodeStr, OpNode, v4i32x_info,
4868                                     v8i16x_info, SchedWriteShuffle.XMM>,
4869                     avx512_packs_rmb<opc, OpcodeStr, OpNode, v4i32x_info,
4870                                      v8i16x_info, SchedWriteShuffle.XMM>,
4871                                      EVEX_V128;
4872  }
4873}
4874multiclass avx512_packs_all_i16_i8<bits<8> opc, string OpcodeStr,
4875                            SDNode OpNode> {
4876  let Predicates = [HasBWI] in
4877  defm NAME#Z : avx512_packs_rm<opc, OpcodeStr, OpNode, v32i16_info, v64i8_info,
4878                                SchedWriteShuffle.ZMM>, EVEX_V512, VEX_WIG;
4879  let Predicates = [HasBWI, HasVLX] in {
4880    defm NAME#Z256 : avx512_packs_rm<opc, OpcodeStr, OpNode, v16i16x_info,
4881                                     v32i8x_info, SchedWriteShuffle.YMM>,
4882                                     EVEX_V256, VEX_WIG;
4883    defm NAME#Z128 : avx512_packs_rm<opc, OpcodeStr, OpNode, v8i16x_info,
4884                                     v16i8x_info, SchedWriteShuffle.XMM>,
4885                                     EVEX_V128, VEX_WIG;
4886  }
4887}
4888
4889multiclass avx512_vpmadd<bits<8> opc, string OpcodeStr,
4890                            SDNode OpNode, AVX512VLVectorVTInfo _Src,
4891                            AVX512VLVectorVTInfo _Dst, bit IsCommutable = 0> {
4892  let Predicates = [HasBWI] in
4893  defm NAME#Z : avx512_packs_rm<opc, OpcodeStr, OpNode, _Src.info512,
4894                                _Dst.info512, SchedWriteVecIMul.ZMM,
4895                                IsCommutable>, EVEX_V512;
4896  let Predicates = [HasBWI, HasVLX] in {
4897    defm NAME#Z256 : avx512_packs_rm<opc, OpcodeStr, OpNode, _Src.info256,
4898                                     _Dst.info256, SchedWriteVecIMul.YMM,
4899                                     IsCommutable>, EVEX_V256;
4900    defm NAME#Z128 : avx512_packs_rm<opc, OpcodeStr, OpNode, _Src.info128,
4901                                     _Dst.info128, SchedWriteVecIMul.XMM,
4902                                     IsCommutable>, EVEX_V128;
4903  }
4904}
4905
4906defm VPACKSSDW : avx512_packs_all_i32_i16<0x6B, "vpackssdw", X86Packss>, AVX512BIBase;
4907defm VPACKUSDW : avx512_packs_all_i32_i16<0x2b, "vpackusdw", X86Packus>, AVX5128IBase;
4908defm VPACKSSWB : avx512_packs_all_i16_i8 <0x63, "vpacksswb", X86Packss>, AVX512BIBase;
4909defm VPACKUSWB : avx512_packs_all_i16_i8 <0x67, "vpackuswb", X86Packus>, AVX512BIBase;
4910
4911defm VPMADDUBSW : avx512_vpmadd<0x04, "vpmaddubsw", X86vpmaddubsw,
4912                     avx512vl_i8_info, avx512vl_i16_info>, AVX512BIBase, T8PD, VEX_WIG;
4913defm VPMADDWD   : avx512_vpmadd<0xF5, "vpmaddwd", X86vpmaddwd,
4914                     avx512vl_i16_info, avx512vl_i32_info, 1>, AVX512BIBase, VEX_WIG;
4915
4916defm VPMAXSB : avx512_binop_rm_vl_b<0x3C, "vpmaxsb", smax,
4917                                    SchedWriteVecALU, HasBWI, 1>, T8PD;
4918defm VPMAXSW : avx512_binop_rm_vl_w<0xEE, "vpmaxsw", smax,
4919                                    SchedWriteVecALU, HasBWI, 1>;
4920defm VPMAXSD : avx512_binop_rm_vl_d<0x3D, "vpmaxsd", smax,
4921                                    SchedWriteVecALU, HasAVX512, 1>, T8PD;
4922defm VPMAXSQ : avx512_binop_rm_vl_q<0x3D, "vpmaxsq", smax,
4923                                    SchedWriteVecALU, HasAVX512, 1>, T8PD,
4924                                    NotEVEX2VEXConvertible;
4925
4926defm VPMAXUB : avx512_binop_rm_vl_b<0xDE, "vpmaxub", umax,
4927                                    SchedWriteVecALU, HasBWI, 1>;
4928defm VPMAXUW : avx512_binop_rm_vl_w<0x3E, "vpmaxuw", umax,
4929                                    SchedWriteVecALU, HasBWI, 1>, T8PD;
4930defm VPMAXUD : avx512_binop_rm_vl_d<0x3F, "vpmaxud", umax,
4931                                    SchedWriteVecALU, HasAVX512, 1>, T8PD;
4932defm VPMAXUQ : avx512_binop_rm_vl_q<0x3F, "vpmaxuq", umax,
4933                                    SchedWriteVecALU, HasAVX512, 1>, T8PD,
4934                                    NotEVEX2VEXConvertible;
4935
4936defm VPMINSB : avx512_binop_rm_vl_b<0x38, "vpminsb", smin,
4937                                    SchedWriteVecALU, HasBWI, 1>, T8PD;
4938defm VPMINSW : avx512_binop_rm_vl_w<0xEA, "vpminsw", smin,
4939                                    SchedWriteVecALU, HasBWI, 1>;
4940defm VPMINSD : avx512_binop_rm_vl_d<0x39, "vpminsd", smin,
4941                                    SchedWriteVecALU, HasAVX512, 1>, T8PD;
4942defm VPMINSQ : avx512_binop_rm_vl_q<0x39, "vpminsq", smin,
4943                                    SchedWriteVecALU, HasAVX512, 1>, T8PD,
4944                                    NotEVEX2VEXConvertible;
4945
4946defm VPMINUB : avx512_binop_rm_vl_b<0xDA, "vpminub", umin,
4947                                    SchedWriteVecALU, HasBWI, 1>;
4948defm VPMINUW : avx512_binop_rm_vl_w<0x3A, "vpminuw", umin,
4949                                    SchedWriteVecALU, HasBWI, 1>, T8PD;
4950defm VPMINUD : avx512_binop_rm_vl_d<0x3B, "vpminud", umin,
4951                                    SchedWriteVecALU, HasAVX512, 1>, T8PD;
4952defm VPMINUQ : avx512_binop_rm_vl_q<0x3B, "vpminuq", umin,
4953                                    SchedWriteVecALU, HasAVX512, 1>, T8PD,
4954                                    NotEVEX2VEXConvertible;
4955
4956// PMULLQ: Use 512bit version to implement 128/256 bit in case NoVLX.
4957let Predicates = [HasDQI, NoVLX] in {
4958  def : Pat<(v4i64 (mul (v4i64 VR256X:$src1), (v4i64 VR256X:$src2))),
4959            (EXTRACT_SUBREG
4960                (VPMULLQZrr
4961                    (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src1, sub_ymm),
4962                    (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src2, sub_ymm)),
4963             sub_ymm)>;
4964  def : Pat<(v4i64 (mul (v4i64 VR256X:$src1), (v4i64 (X86VBroadcastld64 addr:$src2)))),
4965            (EXTRACT_SUBREG
4966                (VPMULLQZrmb
4967                    (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src1, sub_ymm),
4968                    addr:$src2),
4969             sub_ymm)>;
4970
4971  def : Pat<(v2i64 (mul (v2i64 VR128X:$src1), (v2i64 VR128X:$src2))),
4972            (EXTRACT_SUBREG
4973                (VPMULLQZrr
4974                    (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src1, sub_xmm),
4975                    (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src2, sub_xmm)),
4976             sub_xmm)>;
4977  def : Pat<(v2i64 (mul (v2i64 VR128X:$src1), (v2i64 (X86VBroadcastld64 addr:$src2)))),
4978            (EXTRACT_SUBREG
4979                (VPMULLQZrmb
4980                    (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src1, sub_xmm),
4981                    addr:$src2),
4982             sub_xmm)>;
4983}
4984
4985multiclass avx512_min_max_lowering<string Instr, SDNode OpNode> {
4986  def : Pat<(v4i64 (OpNode VR256X:$src1, VR256X:$src2)),
4987            (EXTRACT_SUBREG
4988                (!cast<Instruction>(Instr#"rr")
4989                    (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src1, sub_ymm),
4990                    (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src2, sub_ymm)),
4991             sub_ymm)>;
4992  def : Pat<(v4i64 (OpNode (v4i64 VR256X:$src1), (v4i64 (X86VBroadcastld64 addr:$src2)))),
4993            (EXTRACT_SUBREG
4994                (!cast<Instruction>(Instr#"rmb")
4995                    (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src1, sub_ymm),
4996                    addr:$src2),
4997             sub_ymm)>;
4998
4999  def : Pat<(v2i64 (OpNode VR128X:$src1, VR128X:$src2)),
5000            (EXTRACT_SUBREG
5001                (!cast<Instruction>(Instr#"rr")
5002                    (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src1, sub_xmm),
5003                    (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src2, sub_xmm)),
5004             sub_xmm)>;
5005  def : Pat<(v2i64 (OpNode (v2i64 VR128X:$src1), (v2i64 (X86VBroadcastld64 addr:$src2)))),
5006            (EXTRACT_SUBREG
5007                (!cast<Instruction>(Instr#"rmb")
5008                    (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src1, sub_xmm),
5009                    addr:$src2),
5010             sub_xmm)>;
5011}
5012
5013let Predicates = [HasAVX512, NoVLX] in {
5014  defm : avx512_min_max_lowering<"VPMAXUQZ", umax>;
5015  defm : avx512_min_max_lowering<"VPMINUQZ", umin>;
5016  defm : avx512_min_max_lowering<"VPMAXSQZ", smax>;
5017  defm : avx512_min_max_lowering<"VPMINSQZ", smin>;
5018}
5019
5020//===----------------------------------------------------------------------===//
5021// AVX-512  Logical Instructions
5022//===----------------------------------------------------------------------===//
5023
5024defm VPAND : avx512_binop_rm_vl_dq<0xDB, 0xDB, "vpand", and,
5025                                   SchedWriteVecLogic, HasAVX512, 1>;
5026defm VPOR : avx512_binop_rm_vl_dq<0xEB, 0xEB, "vpor", or,
5027                                  SchedWriteVecLogic, HasAVX512, 1>;
5028defm VPXOR : avx512_binop_rm_vl_dq<0xEF, 0xEF, "vpxor", xor,
5029                                   SchedWriteVecLogic, HasAVX512, 1>;
5030defm VPANDN : avx512_binop_rm_vl_dq<0xDF, 0xDF, "vpandn", X86andnp,
5031                                    SchedWriteVecLogic, HasAVX512>;
5032
5033let Predicates = [HasVLX] in {
5034  def : Pat<(v16i8 (and VR128X:$src1, VR128X:$src2)),
5035            (VPANDQZ128rr VR128X:$src1, VR128X:$src2)>;
5036  def : Pat<(v8i16 (and VR128X:$src1, VR128X:$src2)),
5037            (VPANDQZ128rr VR128X:$src1, VR128X:$src2)>;
5038
5039  def : Pat<(v16i8 (or VR128X:$src1, VR128X:$src2)),
5040            (VPORQZ128rr VR128X:$src1, VR128X:$src2)>;
5041  def : Pat<(v8i16 (or VR128X:$src1, VR128X:$src2)),
5042            (VPORQZ128rr VR128X:$src1, VR128X:$src2)>;
5043
5044  def : Pat<(v16i8 (xor VR128X:$src1, VR128X:$src2)),
5045            (VPXORQZ128rr VR128X:$src1, VR128X:$src2)>;
5046  def : Pat<(v8i16 (xor VR128X:$src1, VR128X:$src2)),
5047            (VPXORQZ128rr VR128X:$src1, VR128X:$src2)>;
5048
5049  def : Pat<(v16i8 (X86andnp VR128X:$src1, VR128X:$src2)),
5050            (VPANDNQZ128rr VR128X:$src1, VR128X:$src2)>;
5051  def : Pat<(v8i16 (X86andnp VR128X:$src1, VR128X:$src2)),
5052            (VPANDNQZ128rr VR128X:$src1, VR128X:$src2)>;
5053
5054  def : Pat<(and VR128X:$src1, (loadv16i8 addr:$src2)),
5055            (VPANDQZ128rm VR128X:$src1, addr:$src2)>;
5056  def : Pat<(and VR128X:$src1, (loadv8i16 addr:$src2)),
5057            (VPANDQZ128rm VR128X:$src1, addr:$src2)>;
5058
5059  def : Pat<(or VR128X:$src1, (loadv16i8 addr:$src2)),
5060            (VPORQZ128rm VR128X:$src1, addr:$src2)>;
5061  def : Pat<(or VR128X:$src1, (loadv8i16 addr:$src2)),
5062            (VPORQZ128rm VR128X:$src1, addr:$src2)>;
5063
5064  def : Pat<(xor VR128X:$src1, (loadv16i8 addr:$src2)),
5065            (VPXORQZ128rm VR128X:$src1, addr:$src2)>;
5066  def : Pat<(xor VR128X:$src1, (loadv8i16 addr:$src2)),
5067            (VPXORQZ128rm VR128X:$src1, addr:$src2)>;
5068
5069  def : Pat<(X86andnp VR128X:$src1, (loadv16i8 addr:$src2)),
5070            (VPANDNQZ128rm VR128X:$src1, addr:$src2)>;
5071  def : Pat<(X86andnp VR128X:$src1, (loadv8i16 addr:$src2)),
5072            (VPANDNQZ128rm VR128X:$src1, addr:$src2)>;
5073
5074  def : Pat<(v32i8 (and VR256X:$src1, VR256X:$src2)),
5075            (VPANDQZ256rr VR256X:$src1, VR256X:$src2)>;
5076  def : Pat<(v16i16 (and VR256X:$src1, VR256X:$src2)),
5077            (VPANDQZ256rr VR256X:$src1, VR256X:$src2)>;
5078
5079  def : Pat<(v32i8 (or VR256X:$src1, VR256X:$src2)),
5080            (VPORQZ256rr VR256X:$src1, VR256X:$src2)>;
5081  def : Pat<(v16i16 (or VR256X:$src1, VR256X:$src2)),
5082            (VPORQZ256rr VR256X:$src1, VR256X:$src2)>;
5083
5084  def : Pat<(v32i8 (xor VR256X:$src1, VR256X:$src2)),
5085            (VPXORQZ256rr VR256X:$src1, VR256X:$src2)>;
5086  def : Pat<(v16i16 (xor VR256X:$src1, VR256X:$src2)),
5087            (VPXORQZ256rr VR256X:$src1, VR256X:$src2)>;
5088
5089  def : Pat<(v32i8 (X86andnp VR256X:$src1, VR256X:$src2)),
5090            (VPANDNQZ256rr VR256X:$src1, VR256X:$src2)>;
5091  def : Pat<(v16i16 (X86andnp VR256X:$src1, VR256X:$src2)),
5092            (VPANDNQZ256rr VR256X:$src1, VR256X:$src2)>;
5093
5094  def : Pat<(and VR256X:$src1, (loadv32i8 addr:$src2)),
5095            (VPANDQZ256rm VR256X:$src1, addr:$src2)>;
5096  def : Pat<(and VR256X:$src1, (loadv16i16 addr:$src2)),
5097            (VPANDQZ256rm VR256X:$src1, addr:$src2)>;
5098
5099  def : Pat<(or VR256X:$src1, (loadv32i8 addr:$src2)),
5100            (VPORQZ256rm VR256X:$src1, addr:$src2)>;
5101  def : Pat<(or VR256X:$src1, (loadv16i16 addr:$src2)),
5102            (VPORQZ256rm VR256X:$src1, addr:$src2)>;
5103
5104  def : Pat<(xor VR256X:$src1, (loadv32i8 addr:$src2)),
5105            (VPXORQZ256rm VR256X:$src1, addr:$src2)>;
5106  def : Pat<(xor VR256X:$src1, (loadv16i16 addr:$src2)),
5107            (VPXORQZ256rm VR256X:$src1, addr:$src2)>;
5108
5109  def : Pat<(X86andnp VR256X:$src1, (loadv32i8 addr:$src2)),
5110            (VPANDNQZ256rm VR256X:$src1, addr:$src2)>;
5111  def : Pat<(X86andnp VR256X:$src1, (loadv16i16 addr:$src2)),
5112            (VPANDNQZ256rm VR256X:$src1, addr:$src2)>;
5113}
5114
5115let Predicates = [HasAVX512] in {
5116  def : Pat<(v64i8 (and VR512:$src1, VR512:$src2)),
5117            (VPANDQZrr VR512:$src1, VR512:$src2)>;
5118  def : Pat<(v32i16 (and VR512:$src1, VR512:$src2)),
5119            (VPANDQZrr VR512:$src1, VR512:$src2)>;
5120
5121  def : Pat<(v64i8 (or VR512:$src1, VR512:$src2)),
5122            (VPORQZrr VR512:$src1, VR512:$src2)>;
5123  def : Pat<(v32i16 (or VR512:$src1, VR512:$src2)),
5124            (VPORQZrr VR512:$src1, VR512:$src2)>;
5125
5126  def : Pat<(v64i8 (xor VR512:$src1, VR512:$src2)),
5127            (VPXORQZrr VR512:$src1, VR512:$src2)>;
5128  def : Pat<(v32i16 (xor VR512:$src1, VR512:$src2)),
5129            (VPXORQZrr VR512:$src1, VR512:$src2)>;
5130
5131  def : Pat<(v64i8 (X86andnp VR512:$src1, VR512:$src2)),
5132            (VPANDNQZrr VR512:$src1, VR512:$src2)>;
5133  def : Pat<(v32i16 (X86andnp VR512:$src1, VR512:$src2)),
5134            (VPANDNQZrr VR512:$src1, VR512:$src2)>;
5135
5136  def : Pat<(and VR512:$src1, (loadv64i8 addr:$src2)),
5137            (VPANDQZrm VR512:$src1, addr:$src2)>;
5138  def : Pat<(and VR512:$src1, (loadv32i16 addr:$src2)),
5139            (VPANDQZrm VR512:$src1, addr:$src2)>;
5140
5141  def : Pat<(or VR512:$src1, (loadv64i8 addr:$src2)),
5142            (VPORQZrm VR512:$src1, addr:$src2)>;
5143  def : Pat<(or VR512:$src1, (loadv32i16 addr:$src2)),
5144            (VPORQZrm VR512:$src1, addr:$src2)>;
5145
5146  def : Pat<(xor VR512:$src1, (loadv64i8 addr:$src2)),
5147            (VPXORQZrm VR512:$src1, addr:$src2)>;
5148  def : Pat<(xor VR512:$src1, (loadv32i16 addr:$src2)),
5149            (VPXORQZrm VR512:$src1, addr:$src2)>;
5150
5151  def : Pat<(X86andnp VR512:$src1, (loadv64i8 addr:$src2)),
5152            (VPANDNQZrm VR512:$src1, addr:$src2)>;
5153  def : Pat<(X86andnp VR512:$src1, (loadv32i16 addr:$src2)),
5154            (VPANDNQZrm VR512:$src1, addr:$src2)>;
5155}
5156
5157// Patterns to catch vselect with different type than logic op.
5158multiclass avx512_logical_lowering<string InstrStr, SDNode OpNode,
5159                                    X86VectorVTInfo _,
5160                                    X86VectorVTInfo IntInfo> {
5161  // Masked register-register logical operations.
5162  def : Pat<(_.VT (vselect _.KRCWM:$mask,
5163                   (bitconvert (IntInfo.VT (OpNode _.RC:$src1, _.RC:$src2))),
5164                   _.RC:$src0)),
5165            (!cast<Instruction>(InstrStr#rrk) _.RC:$src0, _.KRCWM:$mask,
5166             _.RC:$src1, _.RC:$src2)>;
5167
5168  def : Pat<(_.VT (vselect _.KRCWM:$mask,
5169                   (bitconvert (IntInfo.VT (OpNode _.RC:$src1, _.RC:$src2))),
5170                   _.ImmAllZerosV)),
5171            (!cast<Instruction>(InstrStr#rrkz) _.KRCWM:$mask, _.RC:$src1,
5172             _.RC:$src2)>;
5173
5174  // Masked register-memory logical operations.
5175  def : Pat<(_.VT (vselect _.KRCWM:$mask,
5176                   (bitconvert (IntInfo.VT (OpNode _.RC:$src1,
5177                                            (load addr:$src2)))),
5178                   _.RC:$src0)),
5179            (!cast<Instruction>(InstrStr#rmk) _.RC:$src0, _.KRCWM:$mask,
5180             _.RC:$src1, addr:$src2)>;
5181  def : Pat<(_.VT (vselect _.KRCWM:$mask,
5182                   (bitconvert (IntInfo.VT (OpNode _.RC:$src1,
5183                                            (load addr:$src2)))),
5184                   _.ImmAllZerosV)),
5185            (!cast<Instruction>(InstrStr#rmkz) _.KRCWM:$mask, _.RC:$src1,
5186             addr:$src2)>;
5187}
5188
5189multiclass avx512_logical_lowering_bcast<string InstrStr, SDNode OpNode,
5190                                         X86VectorVTInfo _,
5191                                         X86VectorVTInfo IntInfo> {
5192  // Register-broadcast logical operations.
5193  def : Pat<(_.VT (vselect _.KRCWM:$mask,
5194                   (bitconvert
5195                    (IntInfo.VT (OpNode _.RC:$src1,
5196                                 (IntInfo.VT (IntInfo.BroadcastLdFrag addr:$src2))))),
5197                   _.RC:$src0)),
5198            (!cast<Instruction>(InstrStr#rmbk) _.RC:$src0, _.KRCWM:$mask,
5199             _.RC:$src1, addr:$src2)>;
5200  def : Pat<(_.VT (vselect _.KRCWM:$mask,
5201                   (bitconvert
5202                    (IntInfo.VT (OpNode _.RC:$src1,
5203                                 (IntInfo.VT (IntInfo.BroadcastLdFrag addr:$src2))))),
5204                   _.ImmAllZerosV)),
5205            (!cast<Instruction>(InstrStr#rmbkz)  _.KRCWM:$mask,
5206             _.RC:$src1, addr:$src2)>;
5207}
5208
5209multiclass avx512_logical_lowering_sizes<string InstrStr, SDNode OpNode,
5210                                         AVX512VLVectorVTInfo SelectInfo,
5211                                         AVX512VLVectorVTInfo IntInfo> {
5212let Predicates = [HasVLX] in {
5213  defm : avx512_logical_lowering<InstrStr#"Z128", OpNode, SelectInfo.info128,
5214                                 IntInfo.info128>;
5215  defm : avx512_logical_lowering<InstrStr#"Z256", OpNode, SelectInfo.info256,
5216                                 IntInfo.info256>;
5217}
5218let Predicates = [HasAVX512] in {
5219  defm : avx512_logical_lowering<InstrStr#"Z", OpNode, SelectInfo.info512,
5220                                 IntInfo.info512>;
5221}
5222}
5223
5224multiclass avx512_logical_lowering_sizes_bcast<string InstrStr, SDNode OpNode,
5225                                               AVX512VLVectorVTInfo SelectInfo,
5226                                               AVX512VLVectorVTInfo IntInfo> {
5227let Predicates = [HasVLX] in {
5228  defm : avx512_logical_lowering_bcast<InstrStr#"Z128", OpNode,
5229                                       SelectInfo.info128, IntInfo.info128>;
5230  defm : avx512_logical_lowering_bcast<InstrStr#"Z256", OpNode,
5231                                       SelectInfo.info256, IntInfo.info256>;
5232}
5233let Predicates = [HasAVX512] in {
5234  defm : avx512_logical_lowering_bcast<InstrStr#"Z", OpNode,
5235                                       SelectInfo.info512, IntInfo.info512>;
5236}
5237}
5238
5239multiclass avx512_logical_lowering_types<string InstrStr, SDNode OpNode> {
5240  // i64 vselect with i32/i16/i8 logic op
5241  defm : avx512_logical_lowering_sizes<InstrStr#"Q", OpNode, avx512vl_i64_info,
5242                                       avx512vl_i32_info>;
5243  defm : avx512_logical_lowering_sizes<InstrStr#"Q", OpNode, avx512vl_i64_info,
5244                                       avx512vl_i16_info>;
5245  defm : avx512_logical_lowering_sizes<InstrStr#"Q", OpNode, avx512vl_i64_info,
5246                                       avx512vl_i8_info>;
5247
5248  // i32 vselect with i64/i16/i8 logic op
5249  defm : avx512_logical_lowering_sizes<InstrStr#"D", OpNode, avx512vl_i32_info,
5250                                       avx512vl_i64_info>;
5251  defm : avx512_logical_lowering_sizes<InstrStr#"D", OpNode, avx512vl_i32_info,
5252                                       avx512vl_i16_info>;
5253  defm : avx512_logical_lowering_sizes<InstrStr#"D", OpNode, avx512vl_i32_info,
5254                                       avx512vl_i8_info>;
5255
5256  // f32 vselect with i64/i32/i16/i8 logic op
5257  defm : avx512_logical_lowering_sizes<InstrStr#"D", OpNode, avx512vl_f32_info,
5258                                       avx512vl_i64_info>;
5259  defm : avx512_logical_lowering_sizes<InstrStr#"D", OpNode, avx512vl_f32_info,
5260                                       avx512vl_i32_info>;
5261  defm : avx512_logical_lowering_sizes<InstrStr#"D", OpNode, avx512vl_f32_info,
5262                                       avx512vl_i16_info>;
5263  defm : avx512_logical_lowering_sizes<InstrStr#"D", OpNode, avx512vl_f32_info,
5264                                       avx512vl_i8_info>;
5265
5266  // f64 vselect with i64/i32/i16/i8 logic op
5267  defm : avx512_logical_lowering_sizes<InstrStr#"Q", OpNode, avx512vl_f64_info,
5268                                       avx512vl_i64_info>;
5269  defm : avx512_logical_lowering_sizes<InstrStr#"Q", OpNode, avx512vl_f64_info,
5270                                       avx512vl_i32_info>;
5271  defm : avx512_logical_lowering_sizes<InstrStr#"Q", OpNode, avx512vl_f64_info,
5272                                       avx512vl_i16_info>;
5273  defm : avx512_logical_lowering_sizes<InstrStr#"Q", OpNode, avx512vl_f64_info,
5274                                       avx512vl_i8_info>;
5275
5276  defm : avx512_logical_lowering_sizes_bcast<InstrStr#"D", OpNode,
5277                                             avx512vl_f32_info,
5278                                             avx512vl_i32_info>;
5279  defm : avx512_logical_lowering_sizes_bcast<InstrStr#"Q", OpNode,
5280                                             avx512vl_f64_info,
5281                                             avx512vl_i64_info>;
5282}
5283
5284defm : avx512_logical_lowering_types<"VPAND", and>;
5285defm : avx512_logical_lowering_types<"VPOR",  or>;
5286defm : avx512_logical_lowering_types<"VPXOR", xor>;
5287defm : avx512_logical_lowering_types<"VPANDN", X86andnp>;
5288
5289//===----------------------------------------------------------------------===//
5290// AVX-512  FP arithmetic
5291//===----------------------------------------------------------------------===//
5292
5293multiclass avx512_fp_scalar<bits<8> opc, string OpcodeStr,X86VectorVTInfo _,
5294                            SDNode OpNode, SDNode VecNode,
5295                            X86FoldableSchedWrite sched, bit IsCommutable> {
5296  let ExeDomain = _.ExeDomain, Uses = [MXCSR], mayRaiseFPException = 1 in {
5297  defm rr_Int : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
5298                           (ins _.RC:$src1, _.RC:$src2), OpcodeStr,
5299                           "$src2, $src1", "$src1, $src2",
5300                           (_.VT (VecNode _.RC:$src1, _.RC:$src2))>,
5301                           Sched<[sched]>;
5302
5303  defm rm_Int : AVX512_maskable_scalar<opc, MRMSrcMem, _, (outs _.RC:$dst),
5304                         (ins _.RC:$src1, _.IntScalarMemOp:$src2), OpcodeStr,
5305                         "$src2, $src1", "$src1, $src2",
5306                         (_.VT (VecNode _.RC:$src1,
5307                                        _.ScalarIntMemCPat:$src2))>,
5308                         Sched<[sched.Folded, sched.ReadAfterFold]>;
5309  let isCodeGenOnly = 1, Predicates = [HasAVX512] in {
5310  def rr : I< opc, MRMSrcReg, (outs _.FRC:$dst),
5311                         (ins _.FRC:$src1, _.FRC:$src2),
5312                          OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}",
5313                          [(set _.FRC:$dst, (OpNode _.FRC:$src1, _.FRC:$src2))]>,
5314                          Sched<[sched]> {
5315    let isCommutable = IsCommutable;
5316  }
5317  def rm : I< opc, MRMSrcMem, (outs _.FRC:$dst),
5318                         (ins _.FRC:$src1, _.ScalarMemOp:$src2),
5319                         OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}",
5320                         [(set _.FRC:$dst, (OpNode _.FRC:$src1,
5321                         (_.ScalarLdFrag addr:$src2)))]>,
5322                         Sched<[sched.Folded, sched.ReadAfterFold]>;
5323  }
5324  }
5325}
5326
5327multiclass avx512_fp_scalar_round<bits<8> opc, string OpcodeStr,X86VectorVTInfo _,
5328                                  SDNode VecNode, X86FoldableSchedWrite sched,
5329                                  bit IsCommutable = 0> {
5330  let ExeDomain = _.ExeDomain, Uses = [MXCSR] in
5331  defm rrb_Int : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
5332                          (ins _.RC:$src1, _.RC:$src2, AVX512RC:$rc), OpcodeStr,
5333                          "$rc, $src2, $src1", "$src1, $src2, $rc",
5334                          (VecNode (_.VT _.RC:$src1), (_.VT _.RC:$src2),
5335                          (i32 timm:$rc))>,
5336                          EVEX_B, EVEX_RC, Sched<[sched]>;
5337}
5338multiclass avx512_fp_scalar_sae<bits<8> opc, string OpcodeStr,X86VectorVTInfo _,
5339                                SDNode OpNode, SDNode VecNode, SDNode SaeNode,
5340                                X86FoldableSchedWrite sched, bit IsCommutable,
5341                                string EVEX2VexOvrd> {
5342  let ExeDomain = _.ExeDomain in {
5343  defm rr_Int : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
5344                           (ins _.RC:$src1, _.RC:$src2), OpcodeStr,
5345                           "$src2, $src1", "$src1, $src2",
5346                           (_.VT (VecNode _.RC:$src1, _.RC:$src2))>,
5347                           Sched<[sched]>, SIMD_EXC;
5348
5349  defm rm_Int : AVX512_maskable_scalar<opc, MRMSrcMem, _, (outs _.RC:$dst),
5350                         (ins _.RC:$src1, _.IntScalarMemOp:$src2), OpcodeStr,
5351                         "$src2, $src1", "$src1, $src2",
5352                         (_.VT (VecNode _.RC:$src1,
5353                                        _.ScalarIntMemCPat:$src2))>,
5354                         Sched<[sched.Folded, sched.ReadAfterFold]>, SIMD_EXC;
5355
5356  let isCodeGenOnly = 1, Predicates = [HasAVX512],
5357      Uses = [MXCSR], mayRaiseFPException = 1 in {
5358  def rr : I< opc, MRMSrcReg, (outs _.FRC:$dst),
5359                         (ins _.FRC:$src1, _.FRC:$src2),
5360                          OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}",
5361                          [(set _.FRC:$dst, (OpNode _.FRC:$src1, _.FRC:$src2))]>,
5362                          Sched<[sched]>,
5363                          EVEX2VEXOverride<EVEX2VexOvrd#"rr"> {
5364    let isCommutable = IsCommutable;
5365  }
5366  def rm : I< opc, MRMSrcMem, (outs _.FRC:$dst),
5367                         (ins _.FRC:$src1, _.ScalarMemOp:$src2),
5368                         OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}",
5369                         [(set _.FRC:$dst, (OpNode _.FRC:$src1,
5370                         (_.ScalarLdFrag addr:$src2)))]>,
5371                         Sched<[sched.Folded, sched.ReadAfterFold]>,
5372                         EVEX2VEXOverride<EVEX2VexOvrd#"rm">;
5373  }
5374
5375  let Uses = [MXCSR] in
5376  defm rrb_Int : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
5377                            (ins _.RC:$src1, _.RC:$src2), OpcodeStr,
5378                            "{sae}, $src2, $src1", "$src1, $src2, {sae}",
5379                            (SaeNode (_.VT _.RC:$src1), (_.VT _.RC:$src2))>,
5380                            EVEX_B, Sched<[sched]>;
5381  }
5382}
5383
5384multiclass avx512_binop_s_round<bits<8> opc, string OpcodeStr, SDNode OpNode,
5385                                SDNode VecNode, SDNode RndNode,
5386                                X86SchedWriteSizes sched, bit IsCommutable> {
5387  defm SSZ : avx512_fp_scalar<opc, OpcodeStr#"ss", f32x_info, OpNode, VecNode,
5388                              sched.PS.Scl, IsCommutable>,
5389             avx512_fp_scalar_round<opc, OpcodeStr#"ss", f32x_info, RndNode,
5390                              sched.PS.Scl, IsCommutable>,
5391                              XS, EVEX_4V, VEX_LIG,  EVEX_CD8<32, CD8VT1>;
5392  defm SDZ : avx512_fp_scalar<opc, OpcodeStr#"sd", f64x_info, OpNode, VecNode,
5393                              sched.PD.Scl, IsCommutable>,
5394             avx512_fp_scalar_round<opc, OpcodeStr#"sd", f64x_info, RndNode,
5395                              sched.PD.Scl, IsCommutable>,
5396                              XD, VEX_W, EVEX_4V, VEX_LIG, EVEX_CD8<64, CD8VT1>;
5397}
5398
5399multiclass avx512_binop_s_sae<bits<8> opc, string OpcodeStr, SDNode OpNode,
5400                              SDNode VecNode, SDNode SaeNode,
5401                              X86SchedWriteSizes sched, bit IsCommutable> {
5402  defm SSZ : avx512_fp_scalar_sae<opc, OpcodeStr#"ss", f32x_info, OpNode,
5403                              VecNode, SaeNode, sched.PS.Scl, IsCommutable,
5404                              NAME#"SS">,
5405                              XS, EVEX_4V, VEX_LIG,  EVEX_CD8<32, CD8VT1>;
5406  defm SDZ : avx512_fp_scalar_sae<opc, OpcodeStr#"sd", f64x_info, OpNode,
5407                              VecNode, SaeNode, sched.PD.Scl, IsCommutable,
5408                              NAME#"SD">,
5409                              XD, VEX_W, EVEX_4V, VEX_LIG, EVEX_CD8<64, CD8VT1>;
5410}
5411defm VADD : avx512_binop_s_round<0x58, "vadd", any_fadd, X86fadds, X86faddRnds,
5412                                 SchedWriteFAddSizes, 1>;
5413defm VMUL : avx512_binop_s_round<0x59, "vmul", any_fmul, X86fmuls, X86fmulRnds,
5414                                 SchedWriteFMulSizes, 1>;
5415defm VSUB : avx512_binop_s_round<0x5C, "vsub", any_fsub, X86fsubs, X86fsubRnds,
5416                                 SchedWriteFAddSizes, 0>;
5417defm VDIV : avx512_binop_s_round<0x5E, "vdiv", any_fdiv, X86fdivs, X86fdivRnds,
5418                                 SchedWriteFDivSizes, 0>;
5419defm VMIN : avx512_binop_s_sae<0x5D, "vmin", X86fmin, X86fmins, X86fminSAEs,
5420                               SchedWriteFCmpSizes, 0>;
5421defm VMAX : avx512_binop_s_sae<0x5F, "vmax", X86fmax, X86fmaxs, X86fmaxSAEs,
5422                               SchedWriteFCmpSizes, 0>;
5423
5424// MIN/MAX nodes are commutable under "unsafe-fp-math". In this case we use
5425// X86fminc and X86fmaxc instead of X86fmin and X86fmax
5426multiclass avx512_comutable_binop_s<bits<8> opc, string OpcodeStr,
5427                                    X86VectorVTInfo _, SDNode OpNode,
5428                                    X86FoldableSchedWrite sched,
5429                                    string EVEX2VEXOvrd> {
5430  let isCodeGenOnly = 1, Predicates = [HasAVX512], ExeDomain = _.ExeDomain in {
5431  def rr : I< opc, MRMSrcReg, (outs _.FRC:$dst),
5432                         (ins _.FRC:$src1, _.FRC:$src2),
5433                          OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}",
5434                          [(set _.FRC:$dst, (OpNode _.FRC:$src1, _.FRC:$src2))]>,
5435                          Sched<[sched]>, EVEX2VEXOverride<EVEX2VEXOvrd#"rr"> {
5436    let isCommutable = 1;
5437  }
5438  def rm : I< opc, MRMSrcMem, (outs _.FRC:$dst),
5439                         (ins _.FRC:$src1, _.ScalarMemOp:$src2),
5440                         OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}",
5441                         [(set _.FRC:$dst, (OpNode _.FRC:$src1,
5442                         (_.ScalarLdFrag addr:$src2)))]>,
5443                         Sched<[sched.Folded, sched.ReadAfterFold]>,
5444                         EVEX2VEXOverride<EVEX2VEXOvrd#"rm">;
5445  }
5446}
5447defm VMINCSSZ : avx512_comutable_binop_s<0x5D, "vminss", f32x_info, X86fminc,
5448                                         SchedWriteFCmp.Scl, "VMINCSS">, XS,
5449                                         EVEX_4V, VEX_LIG, EVEX_CD8<32, CD8VT1>, SIMD_EXC;
5450
5451defm VMINCSDZ : avx512_comutable_binop_s<0x5D, "vminsd", f64x_info, X86fminc,
5452                                         SchedWriteFCmp.Scl, "VMINCSD">, XD,
5453                                         VEX_W, EVEX_4V, VEX_LIG,
5454                                         EVEX_CD8<64, CD8VT1>, SIMD_EXC;
5455
5456defm VMAXCSSZ : avx512_comutable_binop_s<0x5F, "vmaxss", f32x_info, X86fmaxc,
5457                                         SchedWriteFCmp.Scl, "VMAXCSS">, XS,
5458                                         EVEX_4V, VEX_LIG, EVEX_CD8<32, CD8VT1>, SIMD_EXC;
5459
5460defm VMAXCSDZ : avx512_comutable_binop_s<0x5F, "vmaxsd", f64x_info, X86fmaxc,
5461                                         SchedWriteFCmp.Scl, "VMAXCSD">, XD,
5462                                         VEX_W, EVEX_4V, VEX_LIG,
5463                                         EVEX_CD8<64, CD8VT1>, SIMD_EXC;
5464
5465multiclass avx512_fp_packed<bits<8> opc, string OpcodeStr, SDPatternOperator OpNode,
5466                            X86VectorVTInfo _, X86FoldableSchedWrite sched,
5467                            bit IsCommutable,
5468                            bit IsKCommutable = IsCommutable> {
5469  let ExeDomain = _.ExeDomain, hasSideEffects = 0,
5470      Uses = [MXCSR], mayRaiseFPException = 1 in {
5471  defm rr: AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
5472                  (ins _.RC:$src1, _.RC:$src2), OpcodeStr##_.Suffix,
5473                  "$src2, $src1", "$src1, $src2",
5474                  (_.VT (OpNode _.RC:$src1, _.RC:$src2)), IsCommutable,
5475                  IsKCommutable, IsKCommutable>,
5476                  EVEX_4V, Sched<[sched]>;
5477  let mayLoad = 1 in {
5478    defm rm: AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
5479                    (ins _.RC:$src1, _.MemOp:$src2), OpcodeStr##_.Suffix,
5480                    "$src2, $src1", "$src1, $src2",
5481                    (OpNode _.RC:$src1, (_.LdFrag addr:$src2))>,
5482                    EVEX_4V, Sched<[sched.Folded, sched.ReadAfterFold]>;
5483    defm rmb: AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
5484                     (ins _.RC:$src1, _.ScalarMemOp:$src2), OpcodeStr##_.Suffix,
5485                     "${src2}"##_.BroadcastStr##", $src1",
5486                     "$src1, ${src2}"##_.BroadcastStr,
5487                     (OpNode  _.RC:$src1, (_.VT (_.BroadcastLdFrag addr:$src2)))>,
5488                     EVEX_4V, EVEX_B,
5489                     Sched<[sched.Folded, sched.ReadAfterFold]>;
5490    }
5491  }
5492}
5493
5494multiclass avx512_fp_round_packed<bits<8> opc, string OpcodeStr,
5495                                  SDPatternOperator OpNodeRnd,
5496                                  X86FoldableSchedWrite sched, X86VectorVTInfo _> {
5497  let ExeDomain = _.ExeDomain, Uses = [MXCSR] in
5498  defm rrb: AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
5499                  (ins _.RC:$src1, _.RC:$src2, AVX512RC:$rc), OpcodeStr##_.Suffix,
5500                  "$rc, $src2, $src1", "$src1, $src2, $rc",
5501                  (_.VT (OpNodeRnd _.RC:$src1, _.RC:$src2, (i32 timm:$rc)))>,
5502                  EVEX_4V, EVEX_B, EVEX_RC, Sched<[sched]>;
5503}
5504
5505multiclass avx512_fp_sae_packed<bits<8> opc, string OpcodeStr,
5506                                SDPatternOperator OpNodeSAE,
5507                                X86FoldableSchedWrite sched, X86VectorVTInfo _> {
5508  let ExeDomain = _.ExeDomain, Uses = [MXCSR] in
5509  defm rrb: AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
5510                  (ins _.RC:$src1, _.RC:$src2), OpcodeStr##_.Suffix,
5511                  "{sae}, $src2, $src1", "$src1, $src2, {sae}",
5512                  (_.VT (OpNodeSAE _.RC:$src1, _.RC:$src2))>,
5513                  EVEX_4V, EVEX_B, Sched<[sched]>;
5514}
5515
5516multiclass avx512_fp_binop_p<bits<8> opc, string OpcodeStr, SDPatternOperator OpNode,
5517                             Predicate prd, X86SchedWriteSizes sched,
5518                             bit IsCommutable = 0,
5519                             bit IsPD128Commutable = IsCommutable> {
5520  let Predicates = [prd] in {
5521  defm PSZ : avx512_fp_packed<opc, OpcodeStr, OpNode, v16f32_info,
5522                              sched.PS.ZMM, IsCommutable>, EVEX_V512, PS,
5523                              EVEX_CD8<32, CD8VF>;
5524  defm PDZ : avx512_fp_packed<opc, OpcodeStr, OpNode, v8f64_info,
5525                              sched.PD.ZMM, IsCommutable>, EVEX_V512, PD, VEX_W,
5526                              EVEX_CD8<64, CD8VF>;
5527  }
5528
5529    // Define only if AVX512VL feature is present.
5530  let Predicates = [prd, HasVLX] in {
5531    defm PSZ128 : avx512_fp_packed<opc, OpcodeStr, OpNode, v4f32x_info,
5532                                   sched.PS.XMM, IsCommutable>, EVEX_V128, PS,
5533                                   EVEX_CD8<32, CD8VF>;
5534    defm PSZ256 : avx512_fp_packed<opc, OpcodeStr, OpNode, v8f32x_info,
5535                                   sched.PS.YMM, IsCommutable>, EVEX_V256, PS,
5536                                   EVEX_CD8<32, CD8VF>;
5537    defm PDZ128 : avx512_fp_packed<opc, OpcodeStr, OpNode, v2f64x_info,
5538                                   sched.PD.XMM, IsPD128Commutable,
5539                                   IsCommutable>, EVEX_V128, PD, VEX_W,
5540                                   EVEX_CD8<64, CD8VF>;
5541    defm PDZ256 : avx512_fp_packed<opc, OpcodeStr, OpNode, v4f64x_info,
5542                                   sched.PD.YMM, IsCommutable>, EVEX_V256, PD, VEX_W,
5543                                   EVEX_CD8<64, CD8VF>;
5544  }
5545}
5546
5547let Uses = [MXCSR] in
5548multiclass avx512_fp_binop_p_round<bits<8> opc, string OpcodeStr, SDNode OpNodeRnd,
5549                                   X86SchedWriteSizes sched> {
5550  defm PSZ : avx512_fp_round_packed<opc, OpcodeStr, OpNodeRnd, sched.PS.ZMM,
5551                                    v16f32_info>,
5552                                    EVEX_V512, PS, EVEX_CD8<32, CD8VF>;
5553  defm PDZ : avx512_fp_round_packed<opc, OpcodeStr, OpNodeRnd, sched.PD.ZMM,
5554                                    v8f64_info>,
5555                                    EVEX_V512, PD, VEX_W,EVEX_CD8<64, CD8VF>;
5556}
5557
5558let Uses = [MXCSR] in
5559multiclass avx512_fp_binop_p_sae<bits<8> opc, string OpcodeStr, SDNode OpNodeRnd,
5560                                 X86SchedWriteSizes sched> {
5561  defm PSZ : avx512_fp_sae_packed<opc, OpcodeStr, OpNodeRnd, sched.PS.ZMM,
5562                                  v16f32_info>,
5563                                  EVEX_V512, PS, EVEX_CD8<32, CD8VF>;
5564  defm PDZ : avx512_fp_sae_packed<opc, OpcodeStr, OpNodeRnd, sched.PD.ZMM,
5565                                  v8f64_info>,
5566                                  EVEX_V512, PD, VEX_W,EVEX_CD8<64, CD8VF>;
5567}
5568
5569defm VADD : avx512_fp_binop_p<0x58, "vadd", any_fadd, HasAVX512,
5570                              SchedWriteFAddSizes, 1>,
5571            avx512_fp_binop_p_round<0x58, "vadd", X86faddRnd, SchedWriteFAddSizes>;
5572defm VMUL : avx512_fp_binop_p<0x59, "vmul", any_fmul, HasAVX512,
5573                              SchedWriteFMulSizes, 1>,
5574            avx512_fp_binop_p_round<0x59, "vmul", X86fmulRnd, SchedWriteFMulSizes>;
5575defm VSUB : avx512_fp_binop_p<0x5C, "vsub", any_fsub, HasAVX512,
5576                              SchedWriteFAddSizes>,
5577            avx512_fp_binop_p_round<0x5C, "vsub", X86fsubRnd, SchedWriteFAddSizes>;
5578defm VDIV : avx512_fp_binop_p<0x5E, "vdiv", any_fdiv, HasAVX512,
5579                              SchedWriteFDivSizes>,
5580            avx512_fp_binop_p_round<0x5E, "vdiv", X86fdivRnd, SchedWriteFDivSizes>;
5581defm VMIN : avx512_fp_binop_p<0x5D, "vmin", X86fmin, HasAVX512,
5582                              SchedWriteFCmpSizes, 0>,
5583            avx512_fp_binop_p_sae<0x5D, "vmin", X86fminSAE, SchedWriteFCmpSizes>;
5584defm VMAX : avx512_fp_binop_p<0x5F, "vmax", X86fmax, HasAVX512,
5585                              SchedWriteFCmpSizes, 0>,
5586            avx512_fp_binop_p_sae<0x5F, "vmax", X86fmaxSAE, SchedWriteFCmpSizes>;
5587let isCodeGenOnly = 1 in {
5588  defm VMINC : avx512_fp_binop_p<0x5D, "vmin", X86fminc, HasAVX512,
5589                                 SchedWriteFCmpSizes, 1>;
5590  defm VMAXC : avx512_fp_binop_p<0x5F, "vmax", X86fmaxc, HasAVX512,
5591                                 SchedWriteFCmpSizes, 1>;
5592}
5593let Uses = []<Register>, mayRaiseFPException = 0 in {
5594defm VAND  : avx512_fp_binop_p<0x54, "vand", null_frag, HasDQI,
5595                               SchedWriteFLogicSizes, 1>;
5596defm VANDN : avx512_fp_binop_p<0x55, "vandn", null_frag, HasDQI,
5597                               SchedWriteFLogicSizes, 0>;
5598defm VOR   : avx512_fp_binop_p<0x56, "vor", null_frag, HasDQI,
5599                               SchedWriteFLogicSizes, 1>;
5600defm VXOR  : avx512_fp_binop_p<0x57, "vxor", null_frag, HasDQI,
5601                               SchedWriteFLogicSizes, 1>;
5602}
5603
5604multiclass avx512_fp_scalef_p<bits<8> opc, string OpcodeStr, SDNode OpNode,
5605                              X86FoldableSchedWrite sched, X86VectorVTInfo _> {
5606  let ExeDomain = _.ExeDomain, Uses = [MXCSR], mayRaiseFPException = 1 in {
5607  defm rr: AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
5608                  (ins _.RC:$src1, _.RC:$src2), OpcodeStr##_.Suffix,
5609                  "$src2, $src1", "$src1, $src2",
5610                  (_.VT (OpNode _.RC:$src1, _.RC:$src2))>,
5611                  EVEX_4V, Sched<[sched]>;
5612  defm rm: AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
5613                  (ins _.RC:$src1, _.MemOp:$src2), OpcodeStr##_.Suffix,
5614                  "$src2, $src1", "$src1, $src2",
5615                  (OpNode _.RC:$src1, (_.LdFrag addr:$src2))>,
5616                  EVEX_4V, Sched<[sched.Folded, sched.ReadAfterFold]>;
5617  defm rmb: AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
5618                   (ins _.RC:$src1, _.ScalarMemOp:$src2), OpcodeStr##_.Suffix,
5619                   "${src2}"##_.BroadcastStr##", $src1",
5620                   "$src1, ${src2}"##_.BroadcastStr,
5621                   (OpNode  _.RC:$src1, (_.VT (_.BroadcastLdFrag addr:$src2)))>,
5622                   EVEX_4V, EVEX_B, Sched<[sched.Folded, sched.ReadAfterFold]>;
5623  }
5624}
5625
5626multiclass avx512_fp_scalef_scalar<bits<8> opc, string OpcodeStr, SDNode OpNode,
5627                                   X86FoldableSchedWrite sched, X86VectorVTInfo _> {
5628  let ExeDomain = _.ExeDomain, Uses = [MXCSR], mayRaiseFPException = 1 in {
5629  defm rr: AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
5630                  (ins _.RC:$src1, _.RC:$src2), OpcodeStr##_.Suffix,
5631                  "$src2, $src1", "$src1, $src2",
5632                  (_.VT (OpNode _.RC:$src1, _.RC:$src2))>,
5633                  Sched<[sched]>;
5634  defm rm: AVX512_maskable_scalar<opc, MRMSrcMem, _, (outs _.RC:$dst),
5635                  (ins _.RC:$src1, _.IntScalarMemOp:$src2), OpcodeStr##_.Suffix,
5636                  "$src2, $src1", "$src1, $src2",
5637                  (OpNode _.RC:$src1, _.ScalarIntMemCPat:$src2)>,
5638                  Sched<[sched.Folded, sched.ReadAfterFold]>;
5639  }
5640}
5641
5642multiclass avx512_fp_scalef_all<bits<8> opc, bits<8> opcScaler, string OpcodeStr,
5643                                X86SchedWriteWidths sched> {
5644  defm PSZ : avx512_fp_scalef_p<opc, OpcodeStr, X86scalef, sched.ZMM, v16f32_info>,
5645             avx512_fp_round_packed<opc, OpcodeStr, X86scalefRnd, sched.ZMM, v16f32_info>,
5646                              EVEX_V512, EVEX_CD8<32, CD8VF>;
5647  defm PDZ : avx512_fp_scalef_p<opc, OpcodeStr, X86scalef, sched.ZMM, v8f64_info>,
5648             avx512_fp_round_packed<opc, OpcodeStr, X86scalefRnd, sched.ZMM, v8f64_info>,
5649                              EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>;
5650  defm SSZ : avx512_fp_scalef_scalar<opcScaler, OpcodeStr, X86scalefs, sched.Scl, f32x_info>,
5651             avx512_fp_scalar_round<opcScaler, OpcodeStr##"ss", f32x_info,
5652                                    X86scalefsRnd, sched.Scl>,
5653                                    EVEX_4V, VEX_LIG, EVEX_CD8<32, CD8VT1>;
5654  defm SDZ : avx512_fp_scalef_scalar<opcScaler, OpcodeStr, X86scalefs, sched.Scl, f64x_info>,
5655             avx512_fp_scalar_round<opcScaler, OpcodeStr##"sd", f64x_info,
5656                                    X86scalefsRnd, sched.Scl>,
5657                                    EVEX_4V, VEX_LIG, EVEX_CD8<64, CD8VT1>, VEX_W;
5658
5659  // Define only if AVX512VL feature is present.
5660  let Predicates = [HasVLX] in {
5661    defm PSZ128 : avx512_fp_scalef_p<opc, OpcodeStr, X86scalef, sched.XMM, v4f32x_info>,
5662                                   EVEX_V128, EVEX_CD8<32, CD8VF>;
5663    defm PSZ256 : avx512_fp_scalef_p<opc, OpcodeStr, X86scalef, sched.YMM, v8f32x_info>,
5664                                   EVEX_V256, EVEX_CD8<32, CD8VF>;
5665    defm PDZ128 : avx512_fp_scalef_p<opc, OpcodeStr, X86scalef, sched.XMM, v2f64x_info>,
5666                                   EVEX_V128, VEX_W, EVEX_CD8<64, CD8VF>;
5667    defm PDZ256 : avx512_fp_scalef_p<opc, OpcodeStr, X86scalef, sched.YMM, v4f64x_info>,
5668                                   EVEX_V256, VEX_W, EVEX_CD8<64, CD8VF>;
5669  }
5670}
5671defm VSCALEF : avx512_fp_scalef_all<0x2C, 0x2D, "vscalef",
5672                                    SchedWriteFAdd>, T8PD, NotEVEX2VEXConvertible;
5673
5674//===----------------------------------------------------------------------===//
5675// AVX-512  VPTESTM instructions
5676//===----------------------------------------------------------------------===//
5677
5678multiclass avx512_vptest<bits<8> opc, string OpcodeStr,
5679                         X86FoldableSchedWrite sched, X86VectorVTInfo _,
5680                         string Name> {
5681  // NOTE: Patterns are omitted in favor of manual selection in X86ISelDAGToDAG.
5682  // There are just too many permuations due to commutability and bitcasts.
5683  let ExeDomain = _.ExeDomain, hasSideEffects = 0 in {
5684  defm rr : AVX512_maskable_cmp<opc, MRMSrcReg, _, (outs _.KRC:$dst),
5685                   (ins _.RC:$src1, _.RC:$src2), OpcodeStr,
5686                      "$src2, $src1", "$src1, $src2",
5687                   (null_frag), (null_frag), 1>,
5688                   EVEX_4V, Sched<[sched]>;
5689  let mayLoad = 1 in
5690  defm rm : AVX512_maskable_cmp<opc, MRMSrcMem, _, (outs _.KRC:$dst),
5691                   (ins _.RC:$src1, _.MemOp:$src2), OpcodeStr,
5692                       "$src2, $src1", "$src1, $src2",
5693                   (null_frag), (null_frag)>,
5694                   EVEX_4V, EVEX_CD8<_.EltSize, CD8VF>,
5695                   Sched<[sched.Folded, sched.ReadAfterFold]>;
5696  }
5697}
5698
5699multiclass avx512_vptest_mb<bits<8> opc, string OpcodeStr,
5700                            X86FoldableSchedWrite sched, X86VectorVTInfo _> {
5701  let ExeDomain = _.ExeDomain, mayLoad = 1, hasSideEffects = 0 in
5702  defm rmb : AVX512_maskable_cmp<opc, MRMSrcMem, _, (outs _.KRC:$dst),
5703                    (ins _.RC:$src1, _.ScalarMemOp:$src2), OpcodeStr,
5704                    "${src2}"##_.BroadcastStr##", $src1",
5705                    "$src1, ${src2}"##_.BroadcastStr,
5706                    (null_frag), (null_frag)>,
5707                    EVEX_B, EVEX_4V, EVEX_CD8<_.EltSize, CD8VF>,
5708                    Sched<[sched.Folded, sched.ReadAfterFold]>;
5709}
5710
5711multiclass avx512_vptest_dq_sizes<bits<8> opc, string OpcodeStr,
5712                                  X86SchedWriteWidths sched,
5713                                  AVX512VLVectorVTInfo _> {
5714  let Predicates  = [HasAVX512] in
5715  defm Z : avx512_vptest<opc, OpcodeStr, sched.ZMM, _.info512, NAME>,
5716           avx512_vptest_mb<opc, OpcodeStr, sched.ZMM, _.info512>, EVEX_V512;
5717
5718  let Predicates = [HasAVX512, HasVLX] in {
5719  defm Z256 : avx512_vptest<opc, OpcodeStr, sched.YMM, _.info256, NAME>,
5720              avx512_vptest_mb<opc, OpcodeStr, sched.YMM, _.info256>, EVEX_V256;
5721  defm Z128 : avx512_vptest<opc, OpcodeStr, sched.XMM, _.info128, NAME>,
5722              avx512_vptest_mb<opc, OpcodeStr, sched.XMM, _.info128>, EVEX_V128;
5723  }
5724}
5725
5726multiclass avx512_vptest_dq<bits<8> opc, string OpcodeStr,
5727                            X86SchedWriteWidths sched> {
5728  defm D : avx512_vptest_dq_sizes<opc, OpcodeStr#"d", sched,
5729                                 avx512vl_i32_info>;
5730  defm Q : avx512_vptest_dq_sizes<opc, OpcodeStr#"q", sched,
5731                                 avx512vl_i64_info>, VEX_W;
5732}
5733
5734multiclass avx512_vptest_wb<bits<8> opc, string OpcodeStr,
5735                            X86SchedWriteWidths sched> {
5736  let Predicates = [HasBWI] in {
5737  defm WZ:    avx512_vptest<opc, OpcodeStr#"w", sched.ZMM,
5738                            v32i16_info, NAME#"W">, EVEX_V512, VEX_W;
5739  defm BZ:    avx512_vptest<opc, OpcodeStr#"b", sched.ZMM,
5740                            v64i8_info, NAME#"B">, EVEX_V512;
5741  }
5742  let Predicates = [HasVLX, HasBWI] in {
5743
5744  defm WZ256: avx512_vptest<opc, OpcodeStr#"w", sched.YMM,
5745                            v16i16x_info, NAME#"W">, EVEX_V256, VEX_W;
5746  defm WZ128: avx512_vptest<opc, OpcodeStr#"w", sched.XMM,
5747                            v8i16x_info, NAME#"W">, EVEX_V128, VEX_W;
5748  defm BZ256: avx512_vptest<opc, OpcodeStr#"b", sched.YMM,
5749                            v32i8x_info, NAME#"B">, EVEX_V256;
5750  defm BZ128: avx512_vptest<opc, OpcodeStr#"b", sched.XMM,
5751                            v16i8x_info, NAME#"B">, EVEX_V128;
5752  }
5753}
5754
5755multiclass avx512_vptest_all_forms<bits<8> opc_wb, bits<8> opc_dq, string OpcodeStr,
5756                                   X86SchedWriteWidths sched> :
5757  avx512_vptest_wb<opc_wb, OpcodeStr, sched>,
5758  avx512_vptest_dq<opc_dq, OpcodeStr, sched>;
5759
5760defm VPTESTM   : avx512_vptest_all_forms<0x26, 0x27, "vptestm",
5761                                         SchedWriteVecLogic>, T8PD;
5762defm VPTESTNM  : avx512_vptest_all_forms<0x26, 0x27, "vptestnm",
5763                                         SchedWriteVecLogic>, T8XS;
5764
5765//===----------------------------------------------------------------------===//
5766// AVX-512  Shift instructions
5767//===----------------------------------------------------------------------===//
5768
5769multiclass avx512_shift_rmi<bits<8> opc, Format ImmFormR, Format ImmFormM,
5770                            string OpcodeStr, SDNode OpNode,
5771                            X86FoldableSchedWrite sched, X86VectorVTInfo _> {
5772  let ExeDomain = _.ExeDomain in {
5773  defm ri : AVX512_maskable<opc, ImmFormR, _, (outs _.RC:$dst),
5774                   (ins _.RC:$src1, u8imm:$src2), OpcodeStr,
5775                      "$src2, $src1", "$src1, $src2",
5776                   (_.VT (OpNode _.RC:$src1, (i8 timm:$src2)))>,
5777                   Sched<[sched]>;
5778  defm mi : AVX512_maskable<opc, ImmFormM, _, (outs _.RC:$dst),
5779                   (ins _.MemOp:$src1, u8imm:$src2), OpcodeStr,
5780                       "$src2, $src1", "$src1, $src2",
5781                   (_.VT (OpNode (_.VT (_.LdFrag addr:$src1)),
5782                          (i8 timm:$src2)))>,
5783                   Sched<[sched.Folded]>;
5784  }
5785}
5786
5787multiclass avx512_shift_rmbi<bits<8> opc, Format ImmFormM,
5788                             string OpcodeStr, SDNode OpNode,
5789                             X86FoldableSchedWrite sched, X86VectorVTInfo _> {
5790  let ExeDomain = _.ExeDomain in
5791  defm mbi : AVX512_maskable<opc, ImmFormM, _, (outs _.RC:$dst),
5792                   (ins _.ScalarMemOp:$src1, u8imm:$src2), OpcodeStr,
5793      "$src2, ${src1}"##_.BroadcastStr, "${src1}"##_.BroadcastStr##", $src2",
5794     (_.VT (OpNode (_.BroadcastLdFrag addr:$src1), (i8 timm:$src2)))>,
5795     EVEX_B, Sched<[sched.Folded]>;
5796}
5797
5798multiclass avx512_shift_rrm<bits<8> opc, string OpcodeStr, SDNode OpNode,
5799                            X86FoldableSchedWrite sched, ValueType SrcVT,
5800                            X86VectorVTInfo _> {
5801   // src2 is always 128-bit
5802  let ExeDomain = _.ExeDomain in {
5803  defm rr : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
5804                   (ins _.RC:$src1, VR128X:$src2), OpcodeStr,
5805                      "$src2, $src1", "$src1, $src2",
5806                   (_.VT (OpNode _.RC:$src1, (SrcVT VR128X:$src2)))>,
5807                   AVX512BIBase, EVEX_4V, Sched<[sched]>;
5808  defm rm : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
5809                   (ins _.RC:$src1, i128mem:$src2), OpcodeStr,
5810                       "$src2, $src1", "$src1, $src2",
5811                   (_.VT (OpNode _.RC:$src1, (SrcVT (load addr:$src2))))>,
5812                   AVX512BIBase,
5813                   EVEX_4V, Sched<[sched.Folded, sched.ReadAfterFold]>;
5814  }
5815}
5816
5817multiclass avx512_shift_sizes<bits<8> opc, string OpcodeStr, SDNode OpNode,
5818                              X86SchedWriteWidths sched, ValueType SrcVT,
5819                              AVX512VLVectorVTInfo VTInfo,
5820                              Predicate prd> {
5821  let Predicates = [prd] in
5822  defm Z    : avx512_shift_rrm<opc, OpcodeStr, OpNode, sched.ZMM, SrcVT,
5823                               VTInfo.info512>, EVEX_V512,
5824                               EVEX_CD8<VTInfo.info512.EltSize, CD8VQ> ;
5825  let Predicates = [prd, HasVLX] in {
5826  defm Z256 : avx512_shift_rrm<opc, OpcodeStr, OpNode, sched.YMM, SrcVT,
5827                               VTInfo.info256>, EVEX_V256,
5828                               EVEX_CD8<VTInfo.info256.EltSize, CD8VH>;
5829  defm Z128 : avx512_shift_rrm<opc, OpcodeStr, OpNode, sched.XMM, SrcVT,
5830                               VTInfo.info128>, EVEX_V128,
5831                               EVEX_CD8<VTInfo.info128.EltSize, CD8VF>;
5832  }
5833}
5834
5835multiclass avx512_shift_types<bits<8> opcd, bits<8> opcq, bits<8> opcw,
5836                              string OpcodeStr, SDNode OpNode,
5837                              X86SchedWriteWidths sched,
5838                              bit NotEVEX2VEXConvertibleQ = 0> {
5839  defm D : avx512_shift_sizes<opcd, OpcodeStr#"d", OpNode, sched, v4i32,
5840                              avx512vl_i32_info, HasAVX512>;
5841  let notEVEX2VEXConvertible = NotEVEX2VEXConvertibleQ in
5842  defm Q : avx512_shift_sizes<opcq, OpcodeStr#"q", OpNode, sched, v2i64,
5843                              avx512vl_i64_info, HasAVX512>, VEX_W;
5844  defm W : avx512_shift_sizes<opcw, OpcodeStr#"w", OpNode, sched, v8i16,
5845                              avx512vl_i16_info, HasBWI>;
5846}
5847
5848multiclass avx512_shift_rmi_sizes<bits<8> opc, Format ImmFormR, Format ImmFormM,
5849                                  string OpcodeStr, SDNode OpNode,
5850                                  X86SchedWriteWidths sched,
5851                                  AVX512VLVectorVTInfo VTInfo> {
5852  let Predicates = [HasAVX512] in
5853  defm Z:    avx512_shift_rmi<opc, ImmFormR, ImmFormM, OpcodeStr, OpNode,
5854                              sched.ZMM, VTInfo.info512>,
5855             avx512_shift_rmbi<opc, ImmFormM, OpcodeStr, OpNode, sched.ZMM,
5856                               VTInfo.info512>, EVEX_V512;
5857  let Predicates = [HasAVX512, HasVLX] in {
5858  defm Z256: avx512_shift_rmi<opc, ImmFormR, ImmFormM, OpcodeStr, OpNode,
5859                              sched.YMM, VTInfo.info256>,
5860             avx512_shift_rmbi<opc, ImmFormM, OpcodeStr, OpNode, sched.YMM,
5861                               VTInfo.info256>, EVEX_V256;
5862  defm Z128: avx512_shift_rmi<opc, ImmFormR, ImmFormM, OpcodeStr, OpNode,
5863                              sched.XMM, VTInfo.info128>,
5864             avx512_shift_rmbi<opc, ImmFormM, OpcodeStr, OpNode, sched.XMM,
5865                               VTInfo.info128>, EVEX_V128;
5866  }
5867}
5868
5869multiclass avx512_shift_rmi_w<bits<8> opcw, Format ImmFormR, Format ImmFormM,
5870                              string OpcodeStr, SDNode OpNode,
5871                              X86SchedWriteWidths sched> {
5872  let Predicates = [HasBWI] in
5873  defm WZ:    avx512_shift_rmi<opcw, ImmFormR, ImmFormM, OpcodeStr, OpNode,
5874                               sched.ZMM, v32i16_info>, EVEX_V512, VEX_WIG;
5875  let Predicates = [HasVLX, HasBWI] in {
5876  defm WZ256: avx512_shift_rmi<opcw, ImmFormR, ImmFormM, OpcodeStr, OpNode,
5877                               sched.YMM, v16i16x_info>, EVEX_V256, VEX_WIG;
5878  defm WZ128: avx512_shift_rmi<opcw, ImmFormR, ImmFormM, OpcodeStr, OpNode,
5879                               sched.XMM, v8i16x_info>, EVEX_V128, VEX_WIG;
5880  }
5881}
5882
5883multiclass avx512_shift_rmi_dq<bits<8> opcd, bits<8> opcq,
5884                               Format ImmFormR, Format ImmFormM,
5885                               string OpcodeStr, SDNode OpNode,
5886                               X86SchedWriteWidths sched,
5887                               bit NotEVEX2VEXConvertibleQ = 0> {
5888  defm D: avx512_shift_rmi_sizes<opcd, ImmFormR, ImmFormM, OpcodeStr#"d", OpNode,
5889                                 sched, avx512vl_i32_info>, EVEX_CD8<32, CD8VF>;
5890  let notEVEX2VEXConvertible = NotEVEX2VEXConvertibleQ in
5891  defm Q: avx512_shift_rmi_sizes<opcq, ImmFormR, ImmFormM, OpcodeStr#"q", OpNode,
5892                                 sched, avx512vl_i64_info>, EVEX_CD8<64, CD8VF>, VEX_W;
5893}
5894
5895defm VPSRL : avx512_shift_rmi_dq<0x72, 0x73, MRM2r, MRM2m, "vpsrl", X86vsrli,
5896                                 SchedWriteVecShiftImm>,
5897             avx512_shift_rmi_w<0x71, MRM2r, MRM2m, "vpsrlw", X86vsrli,
5898                                SchedWriteVecShiftImm>, AVX512BIi8Base, EVEX_4V;
5899
5900defm VPSLL : avx512_shift_rmi_dq<0x72, 0x73, MRM6r, MRM6m, "vpsll", X86vshli,
5901                                 SchedWriteVecShiftImm>,
5902             avx512_shift_rmi_w<0x71, MRM6r, MRM6m, "vpsllw", X86vshli,
5903                                SchedWriteVecShiftImm>, AVX512BIi8Base, EVEX_4V;
5904
5905defm VPSRA : avx512_shift_rmi_dq<0x72, 0x72, MRM4r, MRM4m, "vpsra", X86vsrai,
5906                                 SchedWriteVecShiftImm, 1>,
5907             avx512_shift_rmi_w<0x71, MRM4r, MRM4m, "vpsraw", X86vsrai,
5908                                SchedWriteVecShiftImm>, AVX512BIi8Base, EVEX_4V;
5909
5910defm VPROR : avx512_shift_rmi_dq<0x72, 0x72, MRM0r, MRM0m, "vpror", X86vrotri,
5911                                 SchedWriteVecShiftImm>, AVX512BIi8Base, EVEX_4V;
5912defm VPROL : avx512_shift_rmi_dq<0x72, 0x72, MRM1r, MRM1m, "vprol", X86vrotli,
5913                                 SchedWriteVecShiftImm>, AVX512BIi8Base, EVEX_4V;
5914
5915defm VPSLL : avx512_shift_types<0xF2, 0xF3, 0xF1, "vpsll", X86vshl,
5916                                SchedWriteVecShift>;
5917defm VPSRA : avx512_shift_types<0xE2, 0xE2, 0xE1, "vpsra", X86vsra,
5918                                SchedWriteVecShift, 1>;
5919defm VPSRL : avx512_shift_types<0xD2, 0xD3, 0xD1, "vpsrl", X86vsrl,
5920                                SchedWriteVecShift>;
5921
5922// Use 512bit VPSRA/VPSRAI version to implement v2i64/v4i64 in case NoVLX.
5923let Predicates = [HasAVX512, NoVLX] in {
5924  def : Pat<(v4i64 (X86vsra (v4i64 VR256X:$src1), (v2i64 VR128X:$src2))),
5925            (EXTRACT_SUBREG (v8i64
5926              (VPSRAQZrr
5927                (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src1, sub_ymm)),
5928                 VR128X:$src2)), sub_ymm)>;
5929
5930  def : Pat<(v2i64 (X86vsra (v2i64 VR128X:$src1), (v2i64 VR128X:$src2))),
5931            (EXTRACT_SUBREG (v8i64
5932              (VPSRAQZrr
5933                (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR128X:$src1, sub_xmm)),
5934                 VR128X:$src2)), sub_xmm)>;
5935
5936  def : Pat<(v4i64 (X86vsrai (v4i64 VR256X:$src1), (i8 timm:$src2))),
5937            (EXTRACT_SUBREG (v8i64
5938              (VPSRAQZri
5939                (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src1, sub_ymm)),
5940                 timm:$src2)), sub_ymm)>;
5941
5942  def : Pat<(v2i64 (X86vsrai (v2i64 VR128X:$src1), (i8 timm:$src2))),
5943            (EXTRACT_SUBREG (v8i64
5944              (VPSRAQZri
5945                (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR128X:$src1, sub_xmm)),
5946                 timm:$src2)), sub_xmm)>;
5947}
5948
5949//===-------------------------------------------------------------------===//
5950// Variable Bit Shifts
5951//===-------------------------------------------------------------------===//
5952
5953multiclass avx512_var_shift<bits<8> opc, string OpcodeStr, SDNode OpNode,
5954                            X86FoldableSchedWrite sched, X86VectorVTInfo _> {
5955  let ExeDomain = _.ExeDomain in {
5956  defm rr : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
5957                   (ins _.RC:$src1, _.RC:$src2), OpcodeStr,
5958                      "$src2, $src1", "$src1, $src2",
5959                   (_.VT (OpNode _.RC:$src1, (_.VT _.RC:$src2)))>,
5960                   AVX5128IBase, EVEX_4V, Sched<[sched]>;
5961  defm rm : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
5962                   (ins _.RC:$src1, _.MemOp:$src2), OpcodeStr,
5963                       "$src2, $src1", "$src1, $src2",
5964                   (_.VT (OpNode _.RC:$src1,
5965                   (_.VT (_.LdFrag addr:$src2))))>,
5966                   AVX5128IBase, EVEX_4V, EVEX_CD8<_.EltSize, CD8VF>,
5967                   Sched<[sched.Folded, sched.ReadAfterFold]>;
5968  }
5969}
5970
5971multiclass avx512_var_shift_mb<bits<8> opc, string OpcodeStr, SDNode OpNode,
5972                               X86FoldableSchedWrite sched, X86VectorVTInfo _> {
5973  let ExeDomain = _.ExeDomain in
5974  defm rmb : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
5975                    (ins _.RC:$src1, _.ScalarMemOp:$src2), OpcodeStr,
5976                    "${src2}"##_.BroadcastStr##", $src1",
5977                    "$src1, ${src2}"##_.BroadcastStr,
5978                    (_.VT (OpNode _.RC:$src1, (_.VT (_.BroadcastLdFrag addr:$src2))))>,
5979                    AVX5128IBase, EVEX_B, EVEX_4V, EVEX_CD8<_.EltSize, CD8VF>,
5980                    Sched<[sched.Folded, sched.ReadAfterFold]>;
5981}
5982
5983multiclass avx512_var_shift_sizes<bits<8> opc, string OpcodeStr, SDNode OpNode,
5984                                  X86SchedWriteWidths sched, AVX512VLVectorVTInfo _> {
5985  let Predicates  = [HasAVX512] in
5986  defm Z : avx512_var_shift<opc, OpcodeStr, OpNode, sched.ZMM, _.info512>,
5987           avx512_var_shift_mb<opc, OpcodeStr, OpNode, sched.ZMM, _.info512>, EVEX_V512;
5988
5989  let Predicates = [HasAVX512, HasVLX] in {
5990  defm Z256 : avx512_var_shift<opc, OpcodeStr, OpNode, sched.YMM, _.info256>,
5991              avx512_var_shift_mb<opc, OpcodeStr, OpNode, sched.YMM, _.info256>, EVEX_V256;
5992  defm Z128 : avx512_var_shift<opc, OpcodeStr, OpNode, sched.XMM, _.info128>,
5993              avx512_var_shift_mb<opc, OpcodeStr, OpNode, sched.XMM, _.info128>, EVEX_V128;
5994  }
5995}
5996
5997multiclass avx512_var_shift_types<bits<8> opc, string OpcodeStr,
5998                                  SDNode OpNode, X86SchedWriteWidths sched> {
5999  defm D : avx512_var_shift_sizes<opc, OpcodeStr#"d", OpNode, sched,
6000                                 avx512vl_i32_info>;
6001  defm Q : avx512_var_shift_sizes<opc, OpcodeStr#"q", OpNode, sched,
6002                                 avx512vl_i64_info>, VEX_W;
6003}
6004
6005// Use 512bit version to implement 128/256 bit in case NoVLX.
6006multiclass avx512_var_shift_lowering<AVX512VLVectorVTInfo _, string OpcodeStr,
6007                                     SDNode OpNode, list<Predicate> p> {
6008  let Predicates = p in {
6009  def : Pat<(_.info256.VT (OpNode (_.info256.VT _.info256.RC:$src1),
6010                                  (_.info256.VT _.info256.RC:$src2))),
6011            (EXTRACT_SUBREG
6012                (!cast<Instruction>(OpcodeStr#"Zrr")
6013                    (INSERT_SUBREG (_.info512.VT (IMPLICIT_DEF)), VR256X:$src1, sub_ymm),
6014                    (INSERT_SUBREG (_.info512.VT (IMPLICIT_DEF)), VR256X:$src2, sub_ymm)),
6015             sub_ymm)>;
6016
6017  def : Pat<(_.info128.VT (OpNode (_.info128.VT _.info128.RC:$src1),
6018                                  (_.info128.VT _.info128.RC:$src2))),
6019            (EXTRACT_SUBREG
6020                (!cast<Instruction>(OpcodeStr#"Zrr")
6021                    (INSERT_SUBREG (_.info512.VT (IMPLICIT_DEF)), VR128X:$src1, sub_xmm),
6022                    (INSERT_SUBREG (_.info512.VT (IMPLICIT_DEF)), VR128X:$src2, sub_xmm)),
6023             sub_xmm)>;
6024  }
6025}
6026multiclass avx512_var_shift_w<bits<8> opc, string OpcodeStr,
6027                              SDNode OpNode, X86SchedWriteWidths sched> {
6028  let Predicates = [HasBWI] in
6029  defm WZ:    avx512_var_shift<opc, OpcodeStr, OpNode, sched.ZMM, v32i16_info>,
6030              EVEX_V512, VEX_W;
6031  let Predicates = [HasVLX, HasBWI] in {
6032
6033  defm WZ256: avx512_var_shift<opc, OpcodeStr, OpNode, sched.YMM, v16i16x_info>,
6034              EVEX_V256, VEX_W;
6035  defm WZ128: avx512_var_shift<opc, OpcodeStr, OpNode, sched.XMM, v8i16x_info>,
6036              EVEX_V128, VEX_W;
6037  }
6038}
6039
6040defm VPSLLV : avx512_var_shift_types<0x47, "vpsllv", X86vshlv, SchedWriteVarVecShift>,
6041              avx512_var_shift_w<0x12, "vpsllvw", X86vshlv, SchedWriteVarVecShift>;
6042
6043defm VPSRAV : avx512_var_shift_types<0x46, "vpsrav", X86vsrav, SchedWriteVarVecShift>,
6044              avx512_var_shift_w<0x11, "vpsravw", X86vsrav, SchedWriteVarVecShift>;
6045
6046defm VPSRLV : avx512_var_shift_types<0x45, "vpsrlv", X86vsrlv, SchedWriteVarVecShift>,
6047              avx512_var_shift_w<0x10, "vpsrlvw", X86vsrlv, SchedWriteVarVecShift>;
6048
6049defm VPRORV : avx512_var_shift_types<0x14, "vprorv", rotr, SchedWriteVarVecShift>;
6050defm VPROLV : avx512_var_shift_types<0x15, "vprolv", rotl, SchedWriteVarVecShift>;
6051
6052defm : avx512_var_shift_lowering<avx512vl_i64_info, "VPSRAVQ", X86vsrav, [HasAVX512, NoVLX]>;
6053defm : avx512_var_shift_lowering<avx512vl_i16_info, "VPSLLVW", X86vshlv, [HasBWI, NoVLX]>;
6054defm : avx512_var_shift_lowering<avx512vl_i16_info, "VPSRAVW", X86vsrav, [HasBWI, NoVLX]>;
6055defm : avx512_var_shift_lowering<avx512vl_i16_info, "VPSRLVW", X86vsrlv, [HasBWI, NoVLX]>;
6056
6057
6058// Use 512bit VPROL/VPROLI version to implement v2i64/v4i64 + v4i32/v8i32 in case NoVLX.
6059let Predicates = [HasAVX512, NoVLX] in {
6060  def : Pat<(v2i64 (rotl (v2i64 VR128X:$src1), (v2i64 VR128X:$src2))),
6061            (EXTRACT_SUBREG (v8i64
6062              (VPROLVQZrr
6063                (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR128X:$src1, sub_xmm)),
6064                (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR128X:$src2, sub_xmm)))),
6065                       sub_xmm)>;
6066  def : Pat<(v4i64 (rotl (v4i64 VR256X:$src1), (v4i64 VR256X:$src2))),
6067            (EXTRACT_SUBREG (v8i64
6068              (VPROLVQZrr
6069                (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src1, sub_ymm)),
6070                (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src2, sub_ymm)))),
6071                       sub_ymm)>;
6072
6073  def : Pat<(v4i32 (rotl (v4i32 VR128X:$src1), (v4i32 VR128X:$src2))),
6074            (EXTRACT_SUBREG (v16i32
6075              (VPROLVDZrr
6076                (v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR128X:$src1, sub_xmm)),
6077                (v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR128X:$src2, sub_xmm)))),
6078                        sub_xmm)>;
6079  def : Pat<(v8i32 (rotl (v8i32 VR256X:$src1), (v8i32 VR256X:$src2))),
6080            (EXTRACT_SUBREG (v16i32
6081              (VPROLVDZrr
6082                (v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src1, sub_ymm)),
6083                (v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src2, sub_ymm)))),
6084                        sub_ymm)>;
6085
6086  def : Pat<(v2i64 (X86vrotli (v2i64 VR128X:$src1), (i8 timm:$src2))),
6087            (EXTRACT_SUBREG (v8i64
6088              (VPROLQZri
6089                (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR128X:$src1, sub_xmm)),
6090                        timm:$src2)), sub_xmm)>;
6091  def : Pat<(v4i64 (X86vrotli (v4i64 VR256X:$src1), (i8 timm:$src2))),
6092            (EXTRACT_SUBREG (v8i64
6093              (VPROLQZri
6094                (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src1, sub_ymm)),
6095                       timm:$src2)), sub_ymm)>;
6096
6097  def : Pat<(v4i32 (X86vrotli (v4i32 VR128X:$src1), (i8 timm:$src2))),
6098            (EXTRACT_SUBREG (v16i32
6099              (VPROLDZri
6100                (v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR128X:$src1, sub_xmm)),
6101                        timm:$src2)), sub_xmm)>;
6102  def : Pat<(v8i32 (X86vrotli (v8i32 VR256X:$src1), (i8 timm:$src2))),
6103            (EXTRACT_SUBREG (v16i32
6104              (VPROLDZri
6105                (v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src1, sub_ymm)),
6106                        timm:$src2)), sub_ymm)>;
6107}
6108
6109// Use 512bit VPROR/VPRORI version to implement v2i64/v4i64 + v4i32/v8i32 in case NoVLX.
6110let Predicates = [HasAVX512, NoVLX] in {
6111  def : Pat<(v2i64 (rotr (v2i64 VR128X:$src1), (v2i64 VR128X:$src2))),
6112            (EXTRACT_SUBREG (v8i64
6113              (VPRORVQZrr
6114                (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR128X:$src1, sub_xmm)),
6115                (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR128X:$src2, sub_xmm)))),
6116                       sub_xmm)>;
6117  def : Pat<(v4i64 (rotr (v4i64 VR256X:$src1), (v4i64 VR256X:$src2))),
6118            (EXTRACT_SUBREG (v8i64
6119              (VPRORVQZrr
6120                (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src1, sub_ymm)),
6121                (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src2, sub_ymm)))),
6122                       sub_ymm)>;
6123
6124  def : Pat<(v4i32 (rotr (v4i32 VR128X:$src1), (v4i32 VR128X:$src2))),
6125            (EXTRACT_SUBREG (v16i32
6126              (VPRORVDZrr
6127                (v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR128X:$src1, sub_xmm)),
6128                (v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR128X:$src2, sub_xmm)))),
6129                        sub_xmm)>;
6130  def : Pat<(v8i32 (rotr (v8i32 VR256X:$src1), (v8i32 VR256X:$src2))),
6131            (EXTRACT_SUBREG (v16i32
6132              (VPRORVDZrr
6133                (v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src1, sub_ymm)),
6134                (v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src2, sub_ymm)))),
6135                        sub_ymm)>;
6136
6137  def : Pat<(v2i64 (X86vrotri (v2i64 VR128X:$src1), (i8 timm:$src2))),
6138            (EXTRACT_SUBREG (v8i64
6139              (VPRORQZri
6140                (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR128X:$src1, sub_xmm)),
6141                        timm:$src2)), sub_xmm)>;
6142  def : Pat<(v4i64 (X86vrotri (v4i64 VR256X:$src1), (i8 timm:$src2))),
6143            (EXTRACT_SUBREG (v8i64
6144              (VPRORQZri
6145                (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src1, sub_ymm)),
6146                       timm:$src2)), sub_ymm)>;
6147
6148  def : Pat<(v4i32 (X86vrotri (v4i32 VR128X:$src1), (i8 timm:$src2))),
6149            (EXTRACT_SUBREG (v16i32
6150              (VPRORDZri
6151                (v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR128X:$src1, sub_xmm)),
6152                        timm:$src2)), sub_xmm)>;
6153  def : Pat<(v8i32 (X86vrotri (v8i32 VR256X:$src1), (i8 timm:$src2))),
6154            (EXTRACT_SUBREG (v16i32
6155              (VPRORDZri
6156                (v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src1, sub_ymm)),
6157                        timm:$src2)), sub_ymm)>;
6158}
6159
6160//===-------------------------------------------------------------------===//
6161// 1-src variable permutation VPERMW/D/Q
6162//===-------------------------------------------------------------------===//
6163
6164multiclass avx512_vperm_dq_sizes<bits<8> opc, string OpcodeStr, SDNode OpNode,
6165                                 X86FoldableSchedWrite sched, AVX512VLVectorVTInfo _> {
6166  let Predicates  = [HasAVX512] in
6167  defm Z : avx512_var_shift<opc, OpcodeStr, OpNode, sched, _.info512>,
6168           avx512_var_shift_mb<opc, OpcodeStr, OpNode, sched, _.info512>, EVEX_V512;
6169
6170  let Predicates = [HasAVX512, HasVLX] in
6171  defm Z256 : avx512_var_shift<opc, OpcodeStr, OpNode, sched, _.info256>,
6172              avx512_var_shift_mb<opc, OpcodeStr, OpNode, sched, _.info256>, EVEX_V256;
6173}
6174
6175multiclass avx512_vpermi_dq_sizes<bits<8> opc, Format ImmFormR, Format ImmFormM,
6176                                 string OpcodeStr, SDNode OpNode,
6177                                 X86FoldableSchedWrite sched, AVX512VLVectorVTInfo VTInfo> {
6178  let Predicates = [HasAVX512] in
6179  defm Z:    avx512_shift_rmi<opc, ImmFormR, ImmFormM, OpcodeStr, OpNode,
6180                              sched, VTInfo.info512>,
6181             avx512_shift_rmbi<opc, ImmFormM, OpcodeStr, OpNode,
6182                               sched, VTInfo.info512>, EVEX_V512;
6183  let Predicates = [HasAVX512, HasVLX] in
6184  defm Z256: avx512_shift_rmi<opc, ImmFormR, ImmFormM, OpcodeStr, OpNode,
6185                              sched, VTInfo.info256>,
6186             avx512_shift_rmbi<opc, ImmFormM, OpcodeStr, OpNode,
6187                               sched, VTInfo.info256>, EVEX_V256;
6188}
6189
6190multiclass avx512_vperm_bw<bits<8> opc, string OpcodeStr,
6191                              Predicate prd, SDNode OpNode,
6192                              X86FoldableSchedWrite sched, AVX512VLVectorVTInfo _> {
6193  let Predicates = [prd] in
6194  defm Z:    avx512_var_shift<opc, OpcodeStr, OpNode, sched, _.info512>,
6195              EVEX_V512 ;
6196  let Predicates = [HasVLX, prd] in {
6197  defm Z256: avx512_var_shift<opc, OpcodeStr, OpNode, sched, _.info256>,
6198              EVEX_V256 ;
6199  defm Z128: avx512_var_shift<opc, OpcodeStr, OpNode, sched, _.info128>,
6200              EVEX_V128 ;
6201  }
6202}
6203
6204defm VPERMW  : avx512_vperm_bw<0x8D, "vpermw", HasBWI, X86VPermv,
6205                               WriteVarShuffle256, avx512vl_i16_info>, VEX_W;
6206defm VPERMB  : avx512_vperm_bw<0x8D, "vpermb", HasVBMI, X86VPermv,
6207                               WriteVarShuffle256, avx512vl_i8_info>;
6208
6209defm VPERMD : avx512_vperm_dq_sizes<0x36, "vpermd", X86VPermv,
6210                                    WriteVarShuffle256, avx512vl_i32_info>;
6211defm VPERMQ : avx512_vperm_dq_sizes<0x36, "vpermq", X86VPermv,
6212                                    WriteVarShuffle256, avx512vl_i64_info>, VEX_W;
6213defm VPERMPS : avx512_vperm_dq_sizes<0x16, "vpermps", X86VPermv,
6214                                     WriteFVarShuffle256, avx512vl_f32_info>;
6215defm VPERMPD : avx512_vperm_dq_sizes<0x16, "vpermpd", X86VPermv,
6216                                     WriteFVarShuffle256, avx512vl_f64_info>, VEX_W;
6217
6218defm VPERMQ : avx512_vpermi_dq_sizes<0x00, MRMSrcReg, MRMSrcMem, "vpermq",
6219                             X86VPermi, WriteShuffle256, avx512vl_i64_info>,
6220                             EVEX, AVX512AIi8Base, EVEX_CD8<64, CD8VF>, VEX_W;
6221defm VPERMPD : avx512_vpermi_dq_sizes<0x01, MRMSrcReg, MRMSrcMem, "vpermpd",
6222                             X86VPermi, WriteFShuffle256, avx512vl_f64_info>,
6223                             EVEX, AVX512AIi8Base, EVEX_CD8<64, CD8VF>, VEX_W;
6224
6225//===----------------------------------------------------------------------===//
6226// AVX-512 - VPERMIL
6227//===----------------------------------------------------------------------===//
6228
6229multiclass avx512_permil_vec<bits<8> OpcVar, string OpcodeStr, SDNode OpNode,
6230                             X86FoldableSchedWrite sched, X86VectorVTInfo _,
6231                             X86VectorVTInfo Ctrl> {
6232  defm rr: AVX512_maskable<OpcVar, MRMSrcReg, _, (outs _.RC:$dst),
6233                  (ins _.RC:$src1, Ctrl.RC:$src2), OpcodeStr,
6234                  "$src2, $src1", "$src1, $src2",
6235                  (_.VT (OpNode _.RC:$src1,
6236                               (Ctrl.VT Ctrl.RC:$src2)))>,
6237                  T8PD, EVEX_4V, Sched<[sched]>;
6238  defm rm: AVX512_maskable<OpcVar, MRMSrcMem, _, (outs _.RC:$dst),
6239                  (ins _.RC:$src1, Ctrl.MemOp:$src2), OpcodeStr,
6240                  "$src2, $src1", "$src1, $src2",
6241                  (_.VT (OpNode
6242                           _.RC:$src1,
6243                           (Ctrl.VT (Ctrl.LdFrag addr:$src2))))>,
6244                  T8PD, EVEX_4V, EVEX_CD8<_.EltSize, CD8VF>,
6245                  Sched<[sched.Folded, sched.ReadAfterFold]>;
6246  defm rmb: AVX512_maskable<OpcVar, MRMSrcMem, _, (outs _.RC:$dst),
6247                   (ins _.RC:$src1, _.ScalarMemOp:$src2), OpcodeStr,
6248                   "${src2}"##_.BroadcastStr##", $src1",
6249                   "$src1, ${src2}"##_.BroadcastStr,
6250                   (_.VT (OpNode
6251                            _.RC:$src1,
6252                            (Ctrl.VT (Ctrl.BroadcastLdFrag addr:$src2))))>,
6253                   T8PD, EVEX_4V, EVEX_B, EVEX_CD8<_.EltSize, CD8VF>,
6254                   Sched<[sched.Folded, sched.ReadAfterFold]>;
6255}
6256
6257multiclass avx512_permil_vec_common<string OpcodeStr, bits<8> OpcVar,
6258                                    X86SchedWriteWidths sched,
6259                                    AVX512VLVectorVTInfo _,
6260                                    AVX512VLVectorVTInfo Ctrl> {
6261  let Predicates = [HasAVX512] in {
6262    defm Z    : avx512_permil_vec<OpcVar, OpcodeStr, X86VPermilpv, sched.ZMM,
6263                                  _.info512, Ctrl.info512>, EVEX_V512;
6264  }
6265  let Predicates = [HasAVX512, HasVLX] in {
6266    defm Z128 : avx512_permil_vec<OpcVar, OpcodeStr, X86VPermilpv, sched.XMM,
6267                                  _.info128, Ctrl.info128>, EVEX_V128;
6268    defm Z256 : avx512_permil_vec<OpcVar, OpcodeStr, X86VPermilpv, sched.YMM,
6269                                  _.info256, Ctrl.info256>, EVEX_V256;
6270  }
6271}
6272
6273multiclass avx512_permil<string OpcodeStr, bits<8> OpcImm, bits<8> OpcVar,
6274                         AVX512VLVectorVTInfo _, AVX512VLVectorVTInfo Ctrl>{
6275  defm NAME: avx512_permil_vec_common<OpcodeStr, OpcVar, SchedWriteFVarShuffle,
6276                                      _, Ctrl>;
6277  defm NAME: avx512_shift_rmi_sizes<OpcImm, MRMSrcReg, MRMSrcMem, OpcodeStr,
6278                                    X86VPermilpi, SchedWriteFShuffle, _>,
6279                    EVEX, AVX512AIi8Base, EVEX_CD8<_.info128.EltSize, CD8VF>;
6280}
6281
6282let ExeDomain = SSEPackedSingle in
6283defm VPERMILPS : avx512_permil<"vpermilps", 0x04, 0x0C, avx512vl_f32_info,
6284                               avx512vl_i32_info>;
6285let ExeDomain = SSEPackedDouble in
6286defm VPERMILPD : avx512_permil<"vpermilpd", 0x05, 0x0D, avx512vl_f64_info,
6287                               avx512vl_i64_info>, VEX_W1X;
6288
6289//===----------------------------------------------------------------------===//
6290// AVX-512 - VPSHUFD, VPSHUFLW, VPSHUFHW
6291//===----------------------------------------------------------------------===//
6292
6293defm VPSHUFD : avx512_shift_rmi_sizes<0x70, MRMSrcReg, MRMSrcMem, "vpshufd",
6294                             X86PShufd, SchedWriteShuffle, avx512vl_i32_info>,
6295                             EVEX, AVX512BIi8Base, EVEX_CD8<32, CD8VF>;
6296defm VPSHUFH : avx512_shift_rmi_w<0x70, MRMSrcReg, MRMSrcMem, "vpshufhw",
6297                                  X86PShufhw, SchedWriteShuffle>,
6298                                  EVEX, AVX512XSIi8Base;
6299defm VPSHUFL : avx512_shift_rmi_w<0x70, MRMSrcReg, MRMSrcMem, "vpshuflw",
6300                                  X86PShuflw, SchedWriteShuffle>,
6301                                  EVEX, AVX512XDIi8Base;
6302
6303//===----------------------------------------------------------------------===//
6304// AVX-512 - VPSHUFB
6305//===----------------------------------------------------------------------===//
6306
6307multiclass avx512_pshufb_sizes<bits<8> opc, string OpcodeStr, SDNode OpNode,
6308                               X86SchedWriteWidths sched> {
6309  let Predicates = [HasBWI] in
6310  defm Z:    avx512_var_shift<opc, OpcodeStr, OpNode, sched.ZMM, v64i8_info>,
6311                              EVEX_V512;
6312
6313  let Predicates = [HasVLX, HasBWI] in {
6314  defm Z256: avx512_var_shift<opc, OpcodeStr, OpNode, sched.YMM, v32i8x_info>,
6315                              EVEX_V256;
6316  defm Z128: avx512_var_shift<opc, OpcodeStr, OpNode, sched.XMM, v16i8x_info>,
6317                              EVEX_V128;
6318  }
6319}
6320
6321defm VPSHUFB: avx512_pshufb_sizes<0x00, "vpshufb", X86pshufb,
6322                                  SchedWriteVarShuffle>, VEX_WIG;
6323
6324//===----------------------------------------------------------------------===//
6325// Move Low to High and High to Low packed FP Instructions
6326//===----------------------------------------------------------------------===//
6327
6328def VMOVLHPSZrr : AVX512PSI<0x16, MRMSrcReg, (outs VR128X:$dst),
6329          (ins VR128X:$src1, VR128X:$src2),
6330          "vmovlhps\t{$src2, $src1, $dst|$dst, $src1, $src2}",
6331          [(set VR128X:$dst, (v4f32 (X86Movlhps VR128X:$src1, VR128X:$src2)))]>,
6332          Sched<[SchedWriteFShuffle.XMM]>, EVEX_4V;
6333let isCommutable = 1 in
6334def VMOVHLPSZrr : AVX512PSI<0x12, MRMSrcReg, (outs VR128X:$dst),
6335          (ins VR128X:$src1, VR128X:$src2),
6336          "vmovhlps\t{$src2, $src1, $dst|$dst, $src1, $src2}",
6337          [(set VR128X:$dst, (v4f32 (X86Movhlps VR128X:$src1, VR128X:$src2)))]>,
6338          Sched<[SchedWriteFShuffle.XMM]>, EVEX_4V, NotMemoryFoldable;
6339
6340//===----------------------------------------------------------------------===//
6341// VMOVHPS/PD VMOVLPS Instructions
6342// All patterns was taken from SSS implementation.
6343//===----------------------------------------------------------------------===//
6344
6345multiclass avx512_mov_hilo_packed<bits<8> opc, string OpcodeStr,
6346                                  SDPatternOperator OpNode,
6347                                  X86VectorVTInfo _> {
6348  let hasSideEffects = 0, mayLoad = 1, ExeDomain = _.ExeDomain in
6349  def rm : AVX512<opc, MRMSrcMem, (outs _.RC:$dst),
6350                  (ins _.RC:$src1, f64mem:$src2),
6351                  !strconcat(OpcodeStr,
6352                             "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
6353                  [(set _.RC:$dst,
6354                     (OpNode _.RC:$src1,
6355                       (_.VT (bitconvert
6356                         (v2f64 (scalar_to_vector (loadf64 addr:$src2)))))))]>,
6357                  Sched<[SchedWriteFShuffle.XMM.Folded, SchedWriteFShuffle.XMM.ReadAfterFold]>, EVEX_4V;
6358}
6359
6360// No patterns for MOVLPS/MOVHPS as the Movlhps node should only be created in
6361// SSE1. And MOVLPS pattern is even more complex.
6362defm VMOVHPSZ128 : avx512_mov_hilo_packed<0x16, "vmovhps", null_frag,
6363                                  v4f32x_info>, EVEX_CD8<32, CD8VT2>, PS;
6364defm VMOVHPDZ128 : avx512_mov_hilo_packed<0x16, "vmovhpd", X86Unpckl,
6365                                  v2f64x_info>, EVEX_CD8<64, CD8VT1>, PD, VEX_W;
6366defm VMOVLPSZ128 : avx512_mov_hilo_packed<0x12, "vmovlps", null_frag,
6367                                  v4f32x_info>, EVEX_CD8<32, CD8VT2>, PS;
6368defm VMOVLPDZ128 : avx512_mov_hilo_packed<0x12, "vmovlpd", X86Movsd,
6369                                  v2f64x_info>, EVEX_CD8<64, CD8VT1>, PD, VEX_W;
6370
6371let Predicates = [HasAVX512] in {
6372  // VMOVHPD patterns
6373  def : Pat<(v2f64 (X86Unpckl VR128X:$src1,
6374                    (bc_v2f64 (v2i64 (scalar_to_vector (loadi64 addr:$src2)))))),
6375           (VMOVHPDZ128rm VR128X:$src1, addr:$src2)>;
6376  def : Pat<(v2f64 (X86Unpckl VR128X:$src1, (X86vzload64 addr:$src2))),
6377            (VMOVHPDZ128rm VR128X:$src1, addr:$src2)>;
6378
6379  // VMOVLPD patterns
6380  def : Pat<(v2f64 (X86Movsd VR128X:$src1, (X86vzload64 addr:$src2))),
6381            (VMOVLPDZ128rm VR128X:$src1, addr:$src2)>;
6382}
6383
6384let SchedRW = [WriteFStore] in {
6385let mayStore = 1, hasSideEffects = 0 in
6386def VMOVHPSZ128mr : AVX512PSI<0x17, MRMDestMem, (outs),
6387                       (ins f64mem:$dst, VR128X:$src),
6388                       "vmovhps\t{$src, $dst|$dst, $src}",
6389                       []>, EVEX, EVEX_CD8<32, CD8VT2>;
6390def VMOVHPDZ128mr : AVX512PDI<0x17, MRMDestMem, (outs),
6391                       (ins f64mem:$dst, VR128X:$src),
6392                       "vmovhpd\t{$src, $dst|$dst, $src}",
6393                       [(store (f64 (extractelt
6394                                     (v2f64 (X86Unpckh VR128X:$src, VR128X:$src)),
6395                                     (iPTR 0))), addr:$dst)]>,
6396                       EVEX, EVEX_CD8<64, CD8VT1>, VEX_W;
6397let mayStore = 1, hasSideEffects = 0 in
6398def VMOVLPSZ128mr : AVX512PSI<0x13, MRMDestMem, (outs),
6399                       (ins f64mem:$dst, VR128X:$src),
6400                       "vmovlps\t{$src, $dst|$dst, $src}",
6401                       []>, EVEX, EVEX_CD8<32, CD8VT2>;
6402def VMOVLPDZ128mr : AVX512PDI<0x13, MRMDestMem, (outs),
6403                       (ins f64mem:$dst, VR128X:$src),
6404                       "vmovlpd\t{$src, $dst|$dst, $src}",
6405                       [(store (f64 (extractelt (v2f64 VR128X:$src),
6406                                     (iPTR 0))), addr:$dst)]>,
6407                       EVEX, EVEX_CD8<64, CD8VT1>, VEX_W;
6408} // SchedRW
6409
6410let Predicates = [HasAVX512] in {
6411  // VMOVHPD patterns
6412  def : Pat<(store (f64 (extractelt
6413                           (v2f64 (X86VPermilpi VR128X:$src, (i8 1))),
6414                           (iPTR 0))), addr:$dst),
6415           (VMOVHPDZ128mr addr:$dst, VR128X:$src)>;
6416}
6417//===----------------------------------------------------------------------===//
6418// FMA - Fused Multiply Operations
6419//
6420
6421multiclass avx512_fma3p_213_rm<bits<8> opc, string OpcodeStr, SDNode OpNode,
6422                               X86FoldableSchedWrite sched,
6423                               X86VectorVTInfo _, string Suff> {
6424  let Constraints = "$src1 = $dst", ExeDomain = _.ExeDomain, hasSideEffects = 0,
6425      Uses = [MXCSR], mayRaiseFPException = 1 in {
6426  defm r: AVX512_maskable_3src<opc, MRMSrcReg, _, (outs _.RC:$dst),
6427          (ins _.RC:$src2, _.RC:$src3),
6428          OpcodeStr, "$src3, $src2", "$src2, $src3",
6429          (_.VT (OpNode _.RC:$src2, _.RC:$src1, _.RC:$src3)), 1, 1>,
6430          AVX512FMA3Base, Sched<[sched]>;
6431
6432  defm m: AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst),
6433          (ins _.RC:$src2, _.MemOp:$src3),
6434          OpcodeStr, "$src3, $src2", "$src2, $src3",
6435          (_.VT (OpNode _.RC:$src2, _.RC:$src1, (_.LdFrag addr:$src3))), 1, 0>,
6436          AVX512FMA3Base, Sched<[sched.Folded, sched.ReadAfterFold]>;
6437
6438  defm mb: AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst),
6439            (ins _.RC:$src2, _.ScalarMemOp:$src3),
6440            OpcodeStr,   !strconcat("${src3}", _.BroadcastStr,", $src2"),
6441            !strconcat("$src2, ${src3}", _.BroadcastStr ),
6442            (OpNode _.RC:$src2,
6443             _.RC:$src1,(_.VT (_.BroadcastLdFrag addr:$src3))), 1, 0>,
6444             AVX512FMA3Base, EVEX_B, Sched<[sched.Folded, sched.ReadAfterFold]>;
6445  }
6446}
6447
6448multiclass avx512_fma3_213_round<bits<8> opc, string OpcodeStr, SDNode OpNode,
6449                                 X86FoldableSchedWrite sched,
6450                                 X86VectorVTInfo _, string Suff> {
6451  let Constraints = "$src1 = $dst", ExeDomain = _.ExeDomain, hasSideEffects = 0,
6452      Uses = [MXCSR] in
6453  defm rb: AVX512_maskable_3src<opc, MRMSrcReg, _, (outs _.RC:$dst),
6454          (ins _.RC:$src2, _.RC:$src3, AVX512RC:$rc),
6455          OpcodeStr, "$rc, $src3, $src2", "$src2, $src3, $rc",
6456          (_.VT ( OpNode _.RC:$src2, _.RC:$src1, _.RC:$src3, (i32 timm:$rc))), 1, 1>,
6457          AVX512FMA3Base, EVEX_B, EVEX_RC, Sched<[sched]>;
6458}
6459
6460multiclass avx512_fma3p_213_common<bits<8> opc, string OpcodeStr, SDNode OpNode,
6461                                   SDNode OpNodeRnd, X86SchedWriteWidths sched,
6462                                   AVX512VLVectorVTInfo _, string Suff> {
6463  let Predicates = [HasAVX512] in {
6464    defm Z      : avx512_fma3p_213_rm<opc, OpcodeStr, OpNode, sched.ZMM,
6465                                      _.info512, Suff>,
6466                  avx512_fma3_213_round<opc, OpcodeStr, OpNodeRnd, sched.ZMM,
6467                                        _.info512, Suff>,
6468                              EVEX_V512, EVEX_CD8<_.info512.EltSize, CD8VF>;
6469  }
6470  let Predicates = [HasVLX, HasAVX512] in {
6471    defm Z256 : avx512_fma3p_213_rm<opc, OpcodeStr, OpNode, sched.YMM,
6472                                    _.info256, Suff>,
6473                      EVEX_V256, EVEX_CD8<_.info256.EltSize, CD8VF>;
6474    defm Z128 : avx512_fma3p_213_rm<opc, OpcodeStr, OpNode, sched.XMM,
6475                                    _.info128, Suff>,
6476                      EVEX_V128, EVEX_CD8<_.info128.EltSize, CD8VF>;
6477  }
6478}
6479
6480multiclass avx512_fma3p_213_f<bits<8> opc, string OpcodeStr, SDNode OpNode,
6481                              SDNode OpNodeRnd> {
6482    defm PS : avx512_fma3p_213_common<opc, OpcodeStr#"ps", OpNode, OpNodeRnd,
6483                                      SchedWriteFMA, avx512vl_f32_info, "PS">;
6484    defm PD : avx512_fma3p_213_common<opc, OpcodeStr#"pd", OpNode, OpNodeRnd,
6485                                      SchedWriteFMA, avx512vl_f64_info, "PD">,
6486                                      VEX_W;
6487}
6488
6489defm VFMADD213    : avx512_fma3p_213_f<0xA8, "vfmadd213", X86any_Fmadd, X86FmaddRnd>;
6490defm VFMSUB213    : avx512_fma3p_213_f<0xAA, "vfmsub213", X86Fmsub, X86FmsubRnd>;
6491defm VFMADDSUB213 : avx512_fma3p_213_f<0xA6, "vfmaddsub213", X86Fmaddsub, X86FmaddsubRnd>;
6492defm VFMSUBADD213 : avx512_fma3p_213_f<0xA7, "vfmsubadd213", X86Fmsubadd, X86FmsubaddRnd>;
6493defm VFNMADD213   : avx512_fma3p_213_f<0xAC, "vfnmadd213", X86Fnmadd, X86FnmaddRnd>;
6494defm VFNMSUB213   : avx512_fma3p_213_f<0xAE, "vfnmsub213", X86Fnmsub, X86FnmsubRnd>;
6495
6496
6497multiclass avx512_fma3p_231_rm<bits<8> opc, string OpcodeStr, SDNode OpNode,
6498                               X86FoldableSchedWrite sched,
6499                               X86VectorVTInfo _, string Suff> {
6500  let Constraints = "$src1 = $dst", ExeDomain = _.ExeDomain, hasSideEffects = 0,
6501      Uses = [MXCSR], mayRaiseFPException = 1 in {
6502  defm r: AVX512_maskable_3src<opc, MRMSrcReg, _, (outs _.RC:$dst),
6503          (ins _.RC:$src2, _.RC:$src3),
6504          OpcodeStr, "$src3, $src2", "$src2, $src3",
6505          (_.VT (OpNode _.RC:$src2, _.RC:$src3, _.RC:$src1)), 1, 1,
6506          vselect, 1>, AVX512FMA3Base, Sched<[sched]>;
6507
6508  defm m: AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst),
6509          (ins _.RC:$src2, _.MemOp:$src3),
6510          OpcodeStr, "$src3, $src2", "$src2, $src3",
6511          (_.VT (OpNode _.RC:$src2, (_.LdFrag addr:$src3), _.RC:$src1)), 1, 0>,
6512          AVX512FMA3Base, Sched<[sched.Folded, sched.ReadAfterFold]>;
6513
6514  defm mb: AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst),
6515         (ins _.RC:$src2, _.ScalarMemOp:$src3),
6516         OpcodeStr, "${src3}"##_.BroadcastStr##", $src2",
6517         "$src2, ${src3}"##_.BroadcastStr,
6518         (_.VT (OpNode _.RC:$src2,
6519                      (_.VT (_.BroadcastLdFrag addr:$src3)),
6520                      _.RC:$src1)), 1, 0>, AVX512FMA3Base, EVEX_B,
6521         Sched<[sched.Folded, sched.ReadAfterFold]>;
6522  }
6523}
6524
6525multiclass avx512_fma3_231_round<bits<8> opc, string OpcodeStr, SDNode OpNode,
6526                                 X86FoldableSchedWrite sched,
6527                                 X86VectorVTInfo _, string Suff> {
6528  let Constraints = "$src1 = $dst", ExeDomain = _.ExeDomain, hasSideEffects = 0,
6529      Uses = [MXCSR] in
6530  defm rb: AVX512_maskable_3src<opc, MRMSrcReg, _, (outs _.RC:$dst),
6531          (ins _.RC:$src2, _.RC:$src3, AVX512RC:$rc),
6532          OpcodeStr, "$rc, $src3, $src2", "$src2, $src3, $rc",
6533          (_.VT ( OpNode _.RC:$src2, _.RC:$src3, _.RC:$src1, (i32 timm:$rc))),
6534          1, 1, vselect, 1>,
6535          AVX512FMA3Base, EVEX_B, EVEX_RC, Sched<[sched]>;
6536}
6537
6538multiclass avx512_fma3p_231_common<bits<8> opc, string OpcodeStr, SDNode OpNode,
6539                                   SDNode OpNodeRnd, X86SchedWriteWidths sched,
6540                                   AVX512VLVectorVTInfo _, string Suff> {
6541  let Predicates = [HasAVX512] in {
6542    defm Z      : avx512_fma3p_231_rm<opc, OpcodeStr, OpNode, sched.ZMM,
6543                                      _.info512, Suff>,
6544                  avx512_fma3_231_round<opc, OpcodeStr, OpNodeRnd, sched.ZMM,
6545                                        _.info512, Suff>,
6546                              EVEX_V512, EVEX_CD8<_.info512.EltSize, CD8VF>;
6547  }
6548  let Predicates = [HasVLX, HasAVX512] in {
6549    defm Z256 : avx512_fma3p_231_rm<opc, OpcodeStr, OpNode, sched.YMM,
6550                                    _.info256, Suff>,
6551                      EVEX_V256, EVEX_CD8<_.info256.EltSize, CD8VF>;
6552    defm Z128 : avx512_fma3p_231_rm<opc, OpcodeStr, OpNode, sched.XMM,
6553                                    _.info128, Suff>,
6554                      EVEX_V128, EVEX_CD8<_.info128.EltSize, CD8VF>;
6555  }
6556}
6557
6558multiclass avx512_fma3p_231_f<bits<8> opc, string OpcodeStr, SDNode OpNode,
6559                              SDNode OpNodeRnd > {
6560    defm PS : avx512_fma3p_231_common<opc, OpcodeStr#"ps", OpNode, OpNodeRnd,
6561                                      SchedWriteFMA, avx512vl_f32_info, "PS">;
6562    defm PD : avx512_fma3p_231_common<opc, OpcodeStr#"pd", OpNode, OpNodeRnd,
6563                                      SchedWriteFMA, avx512vl_f64_info, "PD">,
6564                                      VEX_W;
6565}
6566
6567defm VFMADD231    : avx512_fma3p_231_f<0xB8, "vfmadd231", X86any_Fmadd, X86FmaddRnd>;
6568defm VFMSUB231    : avx512_fma3p_231_f<0xBA, "vfmsub231", X86Fmsub, X86FmsubRnd>;
6569defm VFMADDSUB231 : avx512_fma3p_231_f<0xB6, "vfmaddsub231", X86Fmaddsub, X86FmaddsubRnd>;
6570defm VFMSUBADD231 : avx512_fma3p_231_f<0xB7, "vfmsubadd231", X86Fmsubadd, X86FmsubaddRnd>;
6571defm VFNMADD231   : avx512_fma3p_231_f<0xBC, "vfnmadd231", X86Fnmadd, X86FnmaddRnd>;
6572defm VFNMSUB231   : avx512_fma3p_231_f<0xBE, "vfnmsub231", X86Fnmsub, X86FnmsubRnd>;
6573
6574multiclass avx512_fma3p_132_rm<bits<8> opc, string OpcodeStr, SDNode OpNode,
6575                               X86FoldableSchedWrite sched,
6576                               X86VectorVTInfo _, string Suff> {
6577  let Constraints = "$src1 = $dst", ExeDomain = _.ExeDomain, hasSideEffects = 0,
6578      Uses = [MXCSR], mayRaiseFPException = 1 in {
6579  defm r: AVX512_maskable_3src<opc, MRMSrcReg, _, (outs _.RC:$dst),
6580          (ins _.RC:$src2, _.RC:$src3),
6581          OpcodeStr, "$src3, $src2", "$src2, $src3",
6582          (_.VT (OpNode _.RC:$src1, _.RC:$src3, _.RC:$src2)), 1, 1, vselect, 1>,
6583          AVX512FMA3Base, Sched<[sched]>;
6584
6585  // Pattern is 312 order so that the load is in a different place from the
6586  // 213 and 231 patterns this helps tablegen's duplicate pattern detection.
6587  defm m: AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst),
6588          (ins _.RC:$src2, _.MemOp:$src3),
6589          OpcodeStr, "$src3, $src2", "$src2, $src3",
6590          (_.VT (OpNode (_.LdFrag addr:$src3), _.RC:$src1, _.RC:$src2)), 1, 0>,
6591          AVX512FMA3Base, Sched<[sched.Folded, sched.ReadAfterFold]>;
6592
6593  // Pattern is 312 order so that the load is in a different place from the
6594  // 213 and 231 patterns this helps tablegen's duplicate pattern detection.
6595  defm mb: AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst),
6596         (ins _.RC:$src2, _.ScalarMemOp:$src3),
6597         OpcodeStr, "${src3}"##_.BroadcastStr##", $src2",
6598         "$src2, ${src3}"##_.BroadcastStr,
6599         (_.VT (OpNode (_.VT (_.BroadcastLdFrag addr:$src3)),
6600                       _.RC:$src1, _.RC:$src2)), 1, 0>,
6601         AVX512FMA3Base, EVEX_B, Sched<[sched.Folded, sched.ReadAfterFold]>;
6602  }
6603}
6604
6605multiclass avx512_fma3_132_round<bits<8> opc, string OpcodeStr, SDNode OpNode,
6606                                 X86FoldableSchedWrite sched,
6607                                 X86VectorVTInfo _, string Suff> {
6608  let Constraints = "$src1 = $dst", ExeDomain = _.ExeDomain, hasSideEffects = 0,
6609      Uses = [MXCSR] in
6610  defm rb: AVX512_maskable_3src<opc, MRMSrcReg, _, (outs _.RC:$dst),
6611          (ins _.RC:$src2, _.RC:$src3, AVX512RC:$rc),
6612          OpcodeStr, "$rc, $src3, $src2", "$src2, $src3, $rc",
6613          (_.VT ( OpNode _.RC:$src1, _.RC:$src3, _.RC:$src2, (i32 timm:$rc))),
6614          1, 1, vselect, 1>,
6615          AVX512FMA3Base, EVEX_B, EVEX_RC, Sched<[sched]>;
6616}
6617
6618multiclass avx512_fma3p_132_common<bits<8> opc, string OpcodeStr, SDNode OpNode,
6619                                   SDNode OpNodeRnd, X86SchedWriteWidths sched,
6620                                   AVX512VLVectorVTInfo _, string Suff> {
6621  let Predicates = [HasAVX512] in {
6622    defm Z      : avx512_fma3p_132_rm<opc, OpcodeStr, OpNode, sched.ZMM,
6623                                      _.info512, Suff>,
6624                  avx512_fma3_132_round<opc, OpcodeStr, OpNodeRnd, sched.ZMM,
6625                                        _.info512, Suff>,
6626                              EVEX_V512, EVEX_CD8<_.info512.EltSize, CD8VF>;
6627  }
6628  let Predicates = [HasVLX, HasAVX512] in {
6629    defm Z256 : avx512_fma3p_132_rm<opc, OpcodeStr, OpNode, sched.YMM,
6630                                    _.info256, Suff>,
6631                      EVEX_V256, EVEX_CD8<_.info256.EltSize, CD8VF>;
6632    defm Z128 : avx512_fma3p_132_rm<opc, OpcodeStr, OpNode, sched.XMM,
6633                                    _.info128, Suff>,
6634                      EVEX_V128, EVEX_CD8<_.info128.EltSize, CD8VF>;
6635  }
6636}
6637
6638multiclass avx512_fma3p_132_f<bits<8> opc, string OpcodeStr, SDNode OpNode,
6639                              SDNode OpNodeRnd > {
6640    defm PS : avx512_fma3p_132_common<opc, OpcodeStr#"ps", OpNode, OpNodeRnd,
6641                                      SchedWriteFMA, avx512vl_f32_info, "PS">;
6642    defm PD : avx512_fma3p_132_common<opc, OpcodeStr#"pd", OpNode, OpNodeRnd,
6643                                      SchedWriteFMA, avx512vl_f64_info, "PD">,
6644                                      VEX_W;
6645}
6646
6647defm VFMADD132    : avx512_fma3p_132_f<0x98, "vfmadd132", X86any_Fmadd, X86FmaddRnd>;
6648defm VFMSUB132    : avx512_fma3p_132_f<0x9A, "vfmsub132", X86Fmsub, X86FmsubRnd>;
6649defm VFMADDSUB132 : avx512_fma3p_132_f<0x96, "vfmaddsub132", X86Fmaddsub, X86FmaddsubRnd>;
6650defm VFMSUBADD132 : avx512_fma3p_132_f<0x97, "vfmsubadd132", X86Fmsubadd, X86FmsubaddRnd>;
6651defm VFNMADD132   : avx512_fma3p_132_f<0x9C, "vfnmadd132", X86Fnmadd, X86FnmaddRnd>;
6652defm VFNMSUB132   : avx512_fma3p_132_f<0x9E, "vfnmsub132", X86Fnmsub, X86FnmsubRnd>;
6653
6654// Scalar FMA
6655multiclass avx512_fma3s_common<bits<8> opc, string OpcodeStr, X86VectorVTInfo _,
6656                               dag RHS_r, dag RHS_m, dag RHS_b, bit MaskOnlyReg> {
6657let Constraints = "$src1 = $dst", hasSideEffects = 0 in {
6658  defm r_Int: AVX512_maskable_3src_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
6659          (ins _.RC:$src2, _.RC:$src3), OpcodeStr,
6660          "$src3, $src2", "$src2, $src3", (null_frag), 1, 1>,
6661          AVX512FMA3Base, Sched<[SchedWriteFMA.Scl]>, SIMD_EXC;
6662
6663  let mayLoad = 1 in
6664  defm m_Int: AVX512_maskable_3src_scalar<opc, MRMSrcMem, _, (outs _.RC:$dst),
6665          (ins _.RC:$src2, _.IntScalarMemOp:$src3), OpcodeStr,
6666          "$src3, $src2", "$src2, $src3", (null_frag), 1, 1>,
6667          AVX512FMA3Base, Sched<[SchedWriteFMA.Scl.Folded, SchedWriteFMA.Scl.ReadAfterFold]>, SIMD_EXC;
6668
6669  let Uses = [MXCSR] in
6670  defm rb_Int: AVX512_maskable_3src_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
6671         (ins _.RC:$src2, _.RC:$src3, AVX512RC:$rc),
6672         OpcodeStr, "$rc, $src3, $src2", "$src2, $src3, $rc", (null_frag), 1, 1>,
6673         AVX512FMA3Base, EVEX_B, EVEX_RC, Sched<[SchedWriteFMA.Scl]>;
6674
6675  let isCodeGenOnly = 1, isCommutable = 1 in {
6676    def r     : AVX512FMA3S<opc, MRMSrcReg, (outs _.FRC:$dst),
6677                     (ins _.FRC:$src1, _.FRC:$src2, _.FRC:$src3),
6678                     !strconcat(OpcodeStr,
6679                              "\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
6680                     !if(MaskOnlyReg, [], [RHS_r])>, Sched<[SchedWriteFMA.Scl]>, SIMD_EXC;
6681    def m     : AVX512FMA3S<opc, MRMSrcMem, (outs _.FRC:$dst),
6682                    (ins _.FRC:$src1, _.FRC:$src2, _.ScalarMemOp:$src3),
6683                    !strconcat(OpcodeStr,
6684                               "\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
6685                    [RHS_m]>, Sched<[SchedWriteFMA.Scl.Folded, SchedWriteFMA.Scl.ReadAfterFold]>, SIMD_EXC;
6686
6687    let Uses = [MXCSR] in
6688    def rb    : AVX512FMA3S<opc, MRMSrcReg, (outs _.FRC:$dst),
6689                     (ins _.FRC:$src1, _.FRC:$src2, _.FRC:$src3, AVX512RC:$rc),
6690                     !strconcat(OpcodeStr,
6691                              "\t{$rc, $src3, $src2, $dst|$dst, $src2, $src3, $rc}"),
6692                     !if(MaskOnlyReg, [], [RHS_b])>, EVEX_B, EVEX_RC,
6693                     Sched<[SchedWriteFMA.Scl]>;
6694  }// isCodeGenOnly = 1
6695}// Constraints = "$src1 = $dst"
6696}
6697
6698multiclass avx512_fma3s_all<bits<8> opc213, bits<8> opc231, bits<8> opc132,
6699                            string OpcodeStr, SDNode OpNode, SDNode OpNodeRnd,
6700                            X86VectorVTInfo _, string SUFF> {
6701  let ExeDomain = _.ExeDomain in {
6702  defm NAME#213#SUFF#Z: avx512_fma3s_common<opc213, OpcodeStr#"213"#_.Suffix, _,
6703                // Operands for intrinsic are in 123 order to preserve passthu
6704                // semantics.
6705                (set _.FRC:$dst, (_.EltVT (OpNode _.FRC:$src2, _.FRC:$src1,
6706                         _.FRC:$src3))),
6707                (set _.FRC:$dst, (_.EltVT (OpNode _.FRC:$src2, _.FRC:$src1,
6708                         (_.ScalarLdFrag addr:$src3)))),
6709                (set _.FRC:$dst, (_.EltVT (OpNodeRnd _.FRC:$src2, _.FRC:$src1,
6710                         _.FRC:$src3, (i32 timm:$rc)))), 0>;
6711
6712  defm NAME#231#SUFF#Z: avx512_fma3s_common<opc231, OpcodeStr#"231"#_.Suffix, _,
6713                (set _.FRC:$dst, (_.EltVT (OpNode _.FRC:$src2, _.FRC:$src3,
6714                                          _.FRC:$src1))),
6715                (set _.FRC:$dst, (_.EltVT (OpNode _.FRC:$src2,
6716                            (_.ScalarLdFrag addr:$src3), _.FRC:$src1))),
6717                (set _.FRC:$dst, (_.EltVT (OpNodeRnd _.FRC:$src2, _.FRC:$src3,
6718                         _.FRC:$src1, (i32 timm:$rc)))), 1>;
6719
6720  // One pattern is 312 order so that the load is in a different place from the
6721  // 213 and 231 patterns this helps tablegen's duplicate pattern detection.
6722  defm NAME#132#SUFF#Z: avx512_fma3s_common<opc132, OpcodeStr#"132"#_.Suffix, _,
6723                (set _.FRC:$dst, (_.EltVT (OpNode _.FRC:$src1, _.FRC:$src3,
6724                         _.FRC:$src2))),
6725                (set _.FRC:$dst, (_.EltVT (OpNode (_.ScalarLdFrag addr:$src3),
6726                                 _.FRC:$src1, _.FRC:$src2))),
6727                (set _.FRC:$dst, (_.EltVT (OpNodeRnd _.FRC:$src1, _.FRC:$src3,
6728                         _.FRC:$src2, (i32 timm:$rc)))), 1>;
6729  }
6730}
6731
6732multiclass avx512_fma3s<bits<8> opc213, bits<8> opc231, bits<8> opc132,
6733                        string OpcodeStr, SDNode OpNode, SDNode OpNodeRnd> {
6734  let Predicates = [HasAVX512] in {
6735    defm NAME : avx512_fma3s_all<opc213, opc231, opc132, OpcodeStr, OpNode,
6736                                 OpNodeRnd, f32x_info, "SS">,
6737                                 EVEX_CD8<32, CD8VT1>, VEX_LIG;
6738    defm NAME : avx512_fma3s_all<opc213, opc231, opc132, OpcodeStr, OpNode,
6739                                 OpNodeRnd, f64x_info, "SD">,
6740                                 EVEX_CD8<64, CD8VT1>, VEX_LIG, VEX_W;
6741  }
6742}
6743
6744defm VFMADD  : avx512_fma3s<0xA9, 0xB9, 0x99, "vfmadd", X86any_Fmadd, X86FmaddRnd>;
6745defm VFMSUB  : avx512_fma3s<0xAB, 0xBB, 0x9B, "vfmsub", X86Fmsub, X86FmsubRnd>;
6746defm VFNMADD : avx512_fma3s<0xAD, 0xBD, 0x9D, "vfnmadd", X86Fnmadd, X86FnmaddRnd>;
6747defm VFNMSUB : avx512_fma3s<0xAF, 0xBF, 0x9F, "vfnmsub", X86Fnmsub, X86FnmsubRnd>;
6748
6749multiclass avx512_scalar_fma_patterns<SDNode Op, SDNode RndOp, string Prefix,
6750                                      string Suffix, SDNode Move,
6751                                      X86VectorVTInfo _, PatLeaf ZeroFP> {
6752  let Predicates = [HasAVX512] in {
6753    def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector
6754                (Op _.FRC:$src2,
6755                    (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0))),
6756                    _.FRC:$src3))))),
6757              (!cast<I>(Prefix#"213"#Suffix#"Zr_Int")
6758               VR128X:$src1, (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)),
6759               (_.VT (COPY_TO_REGCLASS _.FRC:$src3, VR128X)))>;
6760
6761    def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector
6762                (Op _.FRC:$src2, _.FRC:$src3,
6763                    (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0)))))))),
6764              (!cast<I>(Prefix#"231"#Suffix#"Zr_Int")
6765               VR128X:$src1, (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)),
6766               (_.VT (COPY_TO_REGCLASS _.FRC:$src3, VR128X)))>;
6767
6768    def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector
6769                (Op _.FRC:$src2,
6770                    (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0))),
6771                    (_.ScalarLdFrag addr:$src3)))))),
6772              (!cast<I>(Prefix#"213"#Suffix#"Zm_Int")
6773               VR128X:$src1, (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)),
6774               addr:$src3)>;
6775
6776    def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector
6777                (Op (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0))),
6778                    (_.ScalarLdFrag addr:$src3), _.FRC:$src2))))),
6779              (!cast<I>(Prefix#"132"#Suffix#"Zm_Int")
6780               VR128X:$src1, (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)),
6781               addr:$src3)>;
6782
6783    def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector
6784                (Op _.FRC:$src2, (_.ScalarLdFrag addr:$src3),
6785                    (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0)))))))),
6786              (!cast<I>(Prefix#"231"#Suffix#"Zm_Int")
6787               VR128X:$src1, (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)),
6788               addr:$src3)>;
6789
6790    def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector
6791               (X86selects VK1WM:$mask,
6792                (Op _.FRC:$src2,
6793                    (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0))),
6794                    _.FRC:$src3),
6795                (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0)))))))),
6796              (!cast<I>(Prefix#"213"#Suffix#"Zr_Intk")
6797               VR128X:$src1, VK1WM:$mask,
6798               (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)),
6799               (_.VT (COPY_TO_REGCLASS _.FRC:$src3, VR128X)))>;
6800
6801    def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector
6802               (X86selects VK1WM:$mask,
6803                (Op _.FRC:$src2,
6804                    (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0))),
6805                    (_.ScalarLdFrag addr:$src3)),
6806                (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0)))))))),
6807              (!cast<I>(Prefix#"213"#Suffix#"Zm_Intk")
6808               VR128X:$src1, VK1WM:$mask,
6809               (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)), addr:$src3)>;
6810
6811    def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector
6812               (X86selects VK1WM:$mask,
6813                (Op (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0))),
6814                    (_.ScalarLdFrag addr:$src3), _.FRC:$src2),
6815                (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0)))))))),
6816              (!cast<I>(Prefix#"132"#Suffix#"Zm_Intk")
6817               VR128X:$src1, VK1WM:$mask,
6818               (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)), addr:$src3)>;
6819
6820    def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector
6821               (X86selects VK1WM:$mask,
6822                (Op _.FRC:$src2, _.FRC:$src3,
6823                    (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0)))),
6824                (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0)))))))),
6825              (!cast<I>(Prefix#"231"#Suffix#"Zr_Intk")
6826               VR128X:$src1, VK1WM:$mask,
6827               (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)),
6828               (_.VT (COPY_TO_REGCLASS _.FRC:$src3, VR128X)))>;
6829
6830    def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector
6831               (X86selects VK1WM:$mask,
6832                (Op _.FRC:$src2, (_.ScalarLdFrag addr:$src3),
6833                    (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0)))),
6834                (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0)))))))),
6835              (!cast<I>(Prefix#"231"#Suffix#"Zm_Intk")
6836               VR128X:$src1, VK1WM:$mask,
6837               (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)), addr:$src3)>;
6838
6839    def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector
6840               (X86selects VK1WM:$mask,
6841                (Op _.FRC:$src2,
6842                    (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0))),
6843                    _.FRC:$src3),
6844                (_.EltVT ZeroFP)))))),
6845              (!cast<I>(Prefix#"213"#Suffix#"Zr_Intkz")
6846               VR128X:$src1, VK1WM:$mask,
6847               (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)),
6848               (_.VT (COPY_TO_REGCLASS _.FRC:$src3, VR128X)))>;
6849
6850    def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector
6851               (X86selects VK1WM:$mask,
6852                (Op _.FRC:$src2, _.FRC:$src3,
6853                    (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0)))),
6854                (_.EltVT ZeroFP)))))),
6855              (!cast<I>(Prefix#"231"#Suffix#"Zr_Intkz")
6856               VR128X:$src1, VK1WM:$mask,
6857               (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)),
6858               (_.VT (COPY_TO_REGCLASS _.FRC:$src3, VR128X)))>;
6859
6860    def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector
6861               (X86selects VK1WM:$mask,
6862                (Op _.FRC:$src2,
6863                    (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0))),
6864                    (_.ScalarLdFrag addr:$src3)),
6865                (_.EltVT ZeroFP)))))),
6866              (!cast<I>(Prefix#"213"#Suffix#"Zm_Intkz")
6867               VR128X:$src1, VK1WM:$mask,
6868               (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)), addr:$src3)>;
6869
6870    def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector
6871               (X86selects VK1WM:$mask,
6872                (Op (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0))),
6873                    _.FRC:$src2, (_.ScalarLdFrag addr:$src3)),
6874                (_.EltVT ZeroFP)))))),
6875              (!cast<I>(Prefix#"132"#Suffix#"Zm_Intkz")
6876               VR128X:$src1, VK1WM:$mask,
6877               (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)), addr:$src3)>;
6878
6879    def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector
6880               (X86selects VK1WM:$mask,
6881                (Op _.FRC:$src2, (_.ScalarLdFrag addr:$src3),
6882                    (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0)))),
6883                (_.EltVT ZeroFP)))))),
6884              (!cast<I>(Prefix#"231"#Suffix#"Zm_Intkz")
6885               VR128X:$src1, VK1WM:$mask,
6886               (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)), addr:$src3)>;
6887
6888    // Patterns with rounding mode.
6889    def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector
6890                (RndOp _.FRC:$src2,
6891                       (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0))),
6892                       _.FRC:$src3, (i32 timm:$rc)))))),
6893              (!cast<I>(Prefix#"213"#Suffix#"Zrb_Int")
6894               VR128X:$src1, (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)),
6895               (_.VT (COPY_TO_REGCLASS _.FRC:$src3, VR128X)), AVX512RC:$rc)>;
6896
6897    def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector
6898                (RndOp _.FRC:$src2, _.FRC:$src3,
6899                       (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0))),
6900                       (i32 timm:$rc)))))),
6901              (!cast<I>(Prefix#"231"#Suffix#"Zrb_Int")
6902               VR128X:$src1, (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)),
6903               (_.VT (COPY_TO_REGCLASS _.FRC:$src3, VR128X)), AVX512RC:$rc)>;
6904
6905    def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector
6906               (X86selects VK1WM:$mask,
6907                (RndOp _.FRC:$src2,
6908                       (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0))),
6909                       _.FRC:$src3, (i32 timm:$rc)),
6910                (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0)))))))),
6911              (!cast<I>(Prefix#"213"#Suffix#"Zrb_Intk")
6912               VR128X:$src1, VK1WM:$mask,
6913               (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)),
6914               (_.VT (COPY_TO_REGCLASS _.FRC:$src3, VR128X)), AVX512RC:$rc)>;
6915
6916    def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector
6917               (X86selects VK1WM:$mask,
6918                (RndOp _.FRC:$src2, _.FRC:$src3,
6919                       (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0))),
6920                       (i32 timm:$rc)),
6921                (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0)))))))),
6922              (!cast<I>(Prefix#"231"#Suffix#"Zrb_Intk")
6923               VR128X:$src1, VK1WM:$mask,
6924               (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)),
6925               (_.VT (COPY_TO_REGCLASS _.FRC:$src3, VR128X)), AVX512RC:$rc)>;
6926
6927    def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector
6928               (X86selects VK1WM:$mask,
6929                (RndOp _.FRC:$src2,
6930                       (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0))),
6931                       _.FRC:$src3, (i32 timm:$rc)),
6932                (_.EltVT ZeroFP)))))),
6933              (!cast<I>(Prefix#"213"#Suffix#"Zrb_Intkz")
6934               VR128X:$src1, VK1WM:$mask,
6935               (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)),
6936               (_.VT (COPY_TO_REGCLASS _.FRC:$src3, VR128X)), AVX512RC:$rc)>;
6937
6938    def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector
6939               (X86selects VK1WM:$mask,
6940                (RndOp _.FRC:$src2, _.FRC:$src3,
6941                       (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0))),
6942                       (i32 timm:$rc)),
6943                (_.EltVT ZeroFP)))))),
6944              (!cast<I>(Prefix#"231"#Suffix#"Zrb_Intkz")
6945               VR128X:$src1, VK1WM:$mask,
6946               (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)),
6947               (_.VT (COPY_TO_REGCLASS _.FRC:$src3, VR128X)), AVX512RC:$rc)>;
6948  }
6949}
6950
6951defm : avx512_scalar_fma_patterns<X86any_Fmadd, X86FmaddRnd, "VFMADD", "SS",
6952                                  X86Movss, v4f32x_info, fp32imm0>;
6953defm : avx512_scalar_fma_patterns<X86Fmsub, X86FmsubRnd, "VFMSUB", "SS",
6954                                  X86Movss, v4f32x_info, fp32imm0>;
6955defm : avx512_scalar_fma_patterns<X86Fnmadd, X86FnmaddRnd, "VFNMADD", "SS",
6956                                  X86Movss, v4f32x_info, fp32imm0>;
6957defm : avx512_scalar_fma_patterns<X86Fnmsub, X86FnmsubRnd, "VFNMSUB", "SS",
6958                                  X86Movss, v4f32x_info, fp32imm0>;
6959
6960defm : avx512_scalar_fma_patterns<X86any_Fmadd, X86FmaddRnd, "VFMADD", "SD",
6961                                  X86Movsd, v2f64x_info, fp64imm0>;
6962defm : avx512_scalar_fma_patterns<X86Fmsub, X86FmsubRnd, "VFMSUB", "SD",
6963                                  X86Movsd, v2f64x_info, fp64imm0>;
6964defm : avx512_scalar_fma_patterns<X86Fnmadd, X86FnmaddRnd, "VFNMADD", "SD",
6965                                  X86Movsd, v2f64x_info, fp64imm0>;
6966defm : avx512_scalar_fma_patterns<X86Fnmsub, X86FnmsubRnd, "VFNMSUB", "SD",
6967                                  X86Movsd, v2f64x_info, fp64imm0>;
6968
6969//===----------------------------------------------------------------------===//
6970// AVX-512  Packed Multiply of Unsigned 52-bit Integers and Add the Low 52-bit IFMA
6971//===----------------------------------------------------------------------===//
6972let Constraints = "$src1 = $dst" in {
6973multiclass avx512_pmadd52_rm<bits<8> opc, string OpcodeStr, SDNode OpNode,
6974                             X86FoldableSchedWrite sched, X86VectorVTInfo _> {
6975  // NOTE: The SDNode have the multiply operands first with the add last.
6976  // This enables commuted load patterns to be autogenerated by tablegen.
6977  let ExeDomain = _.ExeDomain in {
6978  defm r: AVX512_maskable_3src<opc, MRMSrcReg, _, (outs _.RC:$dst),
6979          (ins _.RC:$src2, _.RC:$src3),
6980          OpcodeStr, "$src3, $src2", "$src2, $src3",
6981          (_.VT (OpNode _.RC:$src2, _.RC:$src3, _.RC:$src1)), 1, 1>,
6982         AVX512FMA3Base, Sched<[sched]>;
6983
6984  defm m: AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst),
6985          (ins _.RC:$src2, _.MemOp:$src3),
6986          OpcodeStr, "$src3, $src2", "$src2, $src3",
6987          (_.VT (OpNode _.RC:$src2, (_.LdFrag addr:$src3), _.RC:$src1))>,
6988          AVX512FMA3Base, Sched<[sched.Folded, sched.ReadAfterFold]>;
6989
6990  defm mb: AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst),
6991            (ins _.RC:$src2, _.ScalarMemOp:$src3),
6992            OpcodeStr,   !strconcat("${src3}", _.BroadcastStr,", $src2"),
6993            !strconcat("$src2, ${src3}", _.BroadcastStr ),
6994            (OpNode _.RC:$src2,
6995                    (_.VT (_.BroadcastLdFrag addr:$src3)),
6996                    _.RC:$src1)>,
6997            AVX512FMA3Base, EVEX_B, Sched<[sched.Folded, sched.ReadAfterFold]>;
6998  }
6999}
7000} // Constraints = "$src1 = $dst"
7001
7002multiclass avx512_pmadd52_common<bits<8> opc, string OpcodeStr, SDNode OpNode,
7003                                 X86SchedWriteWidths sched, AVX512VLVectorVTInfo _> {
7004  let Predicates = [HasIFMA] in {
7005    defm Z      : avx512_pmadd52_rm<opc, OpcodeStr, OpNode, sched.ZMM, _.info512>,
7006                      EVEX_V512, EVEX_CD8<_.info512.EltSize, CD8VF>;
7007  }
7008  let Predicates = [HasVLX, HasIFMA] in {
7009    defm Z256 : avx512_pmadd52_rm<opc, OpcodeStr, OpNode, sched.YMM, _.info256>,
7010                      EVEX_V256, EVEX_CD8<_.info256.EltSize, CD8VF>;
7011    defm Z128 : avx512_pmadd52_rm<opc, OpcodeStr, OpNode, sched.XMM, _.info128>,
7012                      EVEX_V128, EVEX_CD8<_.info128.EltSize, CD8VF>;
7013  }
7014}
7015
7016defm VPMADD52LUQ : avx512_pmadd52_common<0xb4, "vpmadd52luq", x86vpmadd52l,
7017                                         SchedWriteVecIMul, avx512vl_i64_info>,
7018                                         VEX_W;
7019defm VPMADD52HUQ : avx512_pmadd52_common<0xb5, "vpmadd52huq", x86vpmadd52h,
7020                                         SchedWriteVecIMul, avx512vl_i64_info>,
7021                                         VEX_W;
7022
7023//===----------------------------------------------------------------------===//
7024// AVX-512  Scalar convert from sign integer to float/double
7025//===----------------------------------------------------------------------===//
7026
7027multiclass avx512_vcvtsi<bits<8> opc, SDPatternOperator OpNode, X86FoldableSchedWrite sched,
7028                    RegisterClass SrcRC, X86VectorVTInfo DstVT,
7029                    X86MemOperand x86memop, PatFrag ld_frag, string asm,
7030                    string mem, list<Register> _Uses = [MXCSR],
7031                    bit _mayRaiseFPException = 1> {
7032let ExeDomain = DstVT.ExeDomain, Uses = _Uses,
7033    mayRaiseFPException = _mayRaiseFPException in {
7034  let hasSideEffects = 0, isCodeGenOnly = 1 in {
7035    def rr : SI<opc, MRMSrcReg, (outs DstVT.FRC:$dst),
7036              (ins DstVT.FRC:$src1, SrcRC:$src),
7037              !strconcat(asm,"\t{$src, $src1, $dst|$dst, $src1, $src}"), []>,
7038              EVEX_4V, Sched<[sched, ReadDefault, ReadInt2Fpu]>;
7039    let mayLoad = 1 in
7040      def rm : SI<opc, MRMSrcMem, (outs DstVT.FRC:$dst),
7041              (ins DstVT.FRC:$src1, x86memop:$src),
7042              asm#"{"#mem#"}\t{$src, $src1, $dst|$dst, $src1, $src}", []>,
7043              EVEX_4V, Sched<[sched.Folded, sched.ReadAfterFold]>;
7044  } // hasSideEffects = 0
7045  def rr_Int : SI<opc, MRMSrcReg, (outs DstVT.RC:$dst),
7046                (ins DstVT.RC:$src1, SrcRC:$src2),
7047                !strconcat(asm,"\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
7048                [(set DstVT.RC:$dst,
7049                      (OpNode (DstVT.VT DstVT.RC:$src1), SrcRC:$src2))]>,
7050               EVEX_4V, Sched<[sched, ReadDefault, ReadInt2Fpu]>;
7051
7052  def rm_Int : SI<opc, MRMSrcMem, (outs DstVT.RC:$dst),
7053                (ins DstVT.RC:$src1, x86memop:$src2),
7054                asm#"{"#mem#"}\t{$src2, $src1, $dst|$dst, $src1, $src2}",
7055                [(set DstVT.RC:$dst,
7056                      (OpNode (DstVT.VT DstVT.RC:$src1),
7057                               (ld_frag addr:$src2)))]>,
7058                EVEX_4V, Sched<[sched.Folded, sched.ReadAfterFold]>;
7059}
7060  def : InstAlias<"v"#asm#mem#"\t{$src2, $src1, $dst|$dst, $src1, $src2}",
7061                  (!cast<Instruction>(NAME#"rr_Int") DstVT.RC:$dst,
7062                  DstVT.RC:$src1, SrcRC:$src2), 0, "att">;
7063}
7064
7065multiclass avx512_vcvtsi_round<bits<8> opc, SDNode OpNode,
7066                               X86FoldableSchedWrite sched, RegisterClass SrcRC,
7067                               X86VectorVTInfo DstVT, string asm,
7068                               string mem> {
7069  let ExeDomain = DstVT.ExeDomain, Uses = [MXCSR] in
7070  def rrb_Int : SI<opc, MRMSrcReg, (outs DstVT.RC:$dst),
7071              (ins DstVT.RC:$src1, SrcRC:$src2, AVX512RC:$rc),
7072              !strconcat(asm,
7073                  "\t{$src2, $rc, $src1, $dst|$dst, $src1, $rc, $src2}"),
7074              [(set DstVT.RC:$dst,
7075                    (OpNode (DstVT.VT DstVT.RC:$src1),
7076                             SrcRC:$src2,
7077                             (i32 timm:$rc)))]>,
7078              EVEX_4V, EVEX_B, EVEX_RC, Sched<[sched, ReadDefault, ReadInt2Fpu]>;
7079  def : InstAlias<"v"#asm#mem#"\t{$src2, $rc, $src1, $dst|$dst, $src1, $rc, $src2}",
7080                  (!cast<Instruction>(NAME#"rrb_Int") DstVT.RC:$dst,
7081                  DstVT.RC:$src1, SrcRC:$src2, AVX512RC:$rc), 0, "att">;
7082}
7083
7084multiclass avx512_vcvtsi_common<bits<8> opc, SDNode OpNode, SDNode OpNodeRnd,
7085                                X86FoldableSchedWrite sched,
7086                                RegisterClass SrcRC, X86VectorVTInfo DstVT,
7087                                X86MemOperand x86memop, PatFrag ld_frag,
7088                                string asm, string mem> {
7089  defm NAME : avx512_vcvtsi_round<opc, OpNodeRnd, sched, SrcRC, DstVT, asm, mem>,
7090              avx512_vcvtsi<opc, OpNode, sched, SrcRC, DstVT, x86memop,
7091                            ld_frag, asm, mem>, VEX_LIG;
7092}
7093
7094let Predicates = [HasAVX512] in {
7095defm VCVTSI2SSZ  : avx512_vcvtsi_common<0x2A, X86SintToFp, X86SintToFpRnd,
7096                                 WriteCvtI2SS, GR32,
7097                                 v4f32x_info, i32mem, loadi32, "cvtsi2ss", "l">,
7098                                 XS, EVEX_CD8<32, CD8VT1>;
7099defm VCVTSI642SSZ: avx512_vcvtsi_common<0x2A, X86SintToFp, X86SintToFpRnd,
7100                                 WriteCvtI2SS, GR64,
7101                                 v4f32x_info, i64mem, loadi64, "cvtsi2ss", "q">,
7102                                 XS, VEX_W, EVEX_CD8<64, CD8VT1>;
7103defm VCVTSI2SDZ  : avx512_vcvtsi<0x2A, null_frag, WriteCvtI2SD, GR32,
7104                                 v2f64x_info, i32mem, loadi32, "cvtsi2sd", "l", [], 0>,
7105                                 XD, VEX_LIG, EVEX_CD8<32, CD8VT1>;
7106defm VCVTSI642SDZ: avx512_vcvtsi_common<0x2A, X86SintToFp, X86SintToFpRnd,
7107                                 WriteCvtI2SD, GR64,
7108                                 v2f64x_info, i64mem, loadi64, "cvtsi2sd", "q">,
7109                                 XD, VEX_W, EVEX_CD8<64, CD8VT1>;
7110
7111def : InstAlias<"vcvtsi2ss\t{$src, $src1, $dst|$dst, $src1, $src}",
7112              (VCVTSI2SSZrm_Int VR128X:$dst, VR128X:$src1, i32mem:$src), 0, "att">;
7113def : InstAlias<"vcvtsi2sd\t{$src, $src1, $dst|$dst, $src1, $src}",
7114              (VCVTSI2SDZrm_Int VR128X:$dst, VR128X:$src1, i32mem:$src), 0, "att">;
7115
7116def : Pat<(f32 (any_sint_to_fp (loadi32 addr:$src))),
7117          (VCVTSI2SSZrm (f32 (IMPLICIT_DEF)), addr:$src)>;
7118def : Pat<(f32 (any_sint_to_fp (loadi64 addr:$src))),
7119          (VCVTSI642SSZrm (f32 (IMPLICIT_DEF)), addr:$src)>;
7120def : Pat<(f64 (any_sint_to_fp (loadi32 addr:$src))),
7121          (VCVTSI2SDZrm (f64 (IMPLICIT_DEF)), addr:$src)>;
7122def : Pat<(f64 (any_sint_to_fp (loadi64 addr:$src))),
7123          (VCVTSI642SDZrm (f64 (IMPLICIT_DEF)), addr:$src)>;
7124
7125def : Pat<(f32 (any_sint_to_fp GR32:$src)),
7126          (VCVTSI2SSZrr (f32 (IMPLICIT_DEF)), GR32:$src)>;
7127def : Pat<(f32 (any_sint_to_fp GR64:$src)),
7128          (VCVTSI642SSZrr (f32 (IMPLICIT_DEF)), GR64:$src)>;
7129def : Pat<(f64 (any_sint_to_fp GR32:$src)),
7130          (VCVTSI2SDZrr (f64 (IMPLICIT_DEF)), GR32:$src)>;
7131def : Pat<(f64 (any_sint_to_fp GR64:$src)),
7132          (VCVTSI642SDZrr (f64 (IMPLICIT_DEF)), GR64:$src)>;
7133
7134defm VCVTUSI2SSZ   : avx512_vcvtsi_common<0x7B, X86UintToFp, X86UintToFpRnd,
7135                                  WriteCvtI2SS, GR32,
7136                                  v4f32x_info, i32mem, loadi32,
7137                                  "cvtusi2ss", "l">, XS, EVEX_CD8<32, CD8VT1>;
7138defm VCVTUSI642SSZ : avx512_vcvtsi_common<0x7B, X86UintToFp, X86UintToFpRnd,
7139                                  WriteCvtI2SS, GR64,
7140                                  v4f32x_info, i64mem, loadi64, "cvtusi2ss", "q">,
7141                                  XS, VEX_W, EVEX_CD8<64, CD8VT1>;
7142defm VCVTUSI2SDZ   : avx512_vcvtsi<0x7B, null_frag, WriteCvtI2SD, GR32, v2f64x_info,
7143                                  i32mem, loadi32, "cvtusi2sd", "l", [], 0>,
7144                                  XD, VEX_LIG, EVEX_CD8<32, CD8VT1>;
7145defm VCVTUSI642SDZ : avx512_vcvtsi_common<0x7B, X86UintToFp, X86UintToFpRnd,
7146                                  WriteCvtI2SD, GR64,
7147                                  v2f64x_info, i64mem, loadi64, "cvtusi2sd", "q">,
7148                                  XD, VEX_W, EVEX_CD8<64, CD8VT1>;
7149
7150def : InstAlias<"vcvtusi2ss\t{$src, $src1, $dst|$dst, $src1, $src}",
7151              (VCVTUSI2SSZrm_Int VR128X:$dst, VR128X:$src1, i32mem:$src), 0, "att">;
7152def : InstAlias<"vcvtusi2sd\t{$src, $src1, $dst|$dst, $src1, $src}",
7153              (VCVTUSI2SDZrm_Int VR128X:$dst, VR128X:$src1, i32mem:$src), 0, "att">;
7154
7155def : Pat<(f32 (any_uint_to_fp (loadi32 addr:$src))),
7156          (VCVTUSI2SSZrm (f32 (IMPLICIT_DEF)), addr:$src)>;
7157def : Pat<(f32 (any_uint_to_fp (loadi64 addr:$src))),
7158          (VCVTUSI642SSZrm (f32 (IMPLICIT_DEF)), addr:$src)>;
7159def : Pat<(f64 (any_uint_to_fp (loadi32 addr:$src))),
7160          (VCVTUSI2SDZrm (f64 (IMPLICIT_DEF)), addr:$src)>;
7161def : Pat<(f64 (any_uint_to_fp (loadi64 addr:$src))),
7162          (VCVTUSI642SDZrm (f64 (IMPLICIT_DEF)), addr:$src)>;
7163
7164def : Pat<(f32 (any_uint_to_fp GR32:$src)),
7165          (VCVTUSI2SSZrr (f32 (IMPLICIT_DEF)), GR32:$src)>;
7166def : Pat<(f32 (any_uint_to_fp GR64:$src)),
7167          (VCVTUSI642SSZrr (f32 (IMPLICIT_DEF)), GR64:$src)>;
7168def : Pat<(f64 (any_uint_to_fp GR32:$src)),
7169          (VCVTUSI2SDZrr (f64 (IMPLICIT_DEF)), GR32:$src)>;
7170def : Pat<(f64 (any_uint_to_fp GR64:$src)),
7171          (VCVTUSI642SDZrr (f64 (IMPLICIT_DEF)), GR64:$src)>;
7172}
7173
7174//===----------------------------------------------------------------------===//
7175// AVX-512  Scalar convert from float/double to integer
7176//===----------------------------------------------------------------------===//
7177
7178multiclass avx512_cvt_s_int_round<bits<8> opc, X86VectorVTInfo SrcVT,
7179                                  X86VectorVTInfo DstVT, SDNode OpNode,
7180                                  SDNode OpNodeRnd,
7181                                  X86FoldableSchedWrite sched, string asm,
7182                                  string aliasStr> {
7183  let Predicates = [HasAVX512], ExeDomain = SrcVT.ExeDomain in {
7184    def rr_Int : SI<opc, MRMSrcReg, (outs DstVT.RC:$dst), (ins SrcVT.RC:$src),
7185                !strconcat(asm,"\t{$src, $dst|$dst, $src}"),
7186                [(set DstVT.RC:$dst, (OpNode (SrcVT.VT SrcVT.RC:$src)))]>,
7187                EVEX, VEX_LIG, Sched<[sched]>, SIMD_EXC;
7188    let Uses = [MXCSR] in
7189    def rrb_Int : SI<opc, MRMSrcReg, (outs DstVT.RC:$dst), (ins SrcVT.RC:$src, AVX512RC:$rc),
7190                 !strconcat(asm,"\t{$rc, $src, $dst|$dst, $src, $rc}"),
7191                 [(set DstVT.RC:$dst, (OpNodeRnd (SrcVT.VT SrcVT.RC:$src),(i32 timm:$rc)))]>,
7192                 EVEX, VEX_LIG, EVEX_B, EVEX_RC,
7193                 Sched<[sched]>;
7194    def rm_Int : SI<opc, MRMSrcMem, (outs DstVT.RC:$dst), (ins SrcVT.IntScalarMemOp:$src),
7195                !strconcat(asm,"\t{$src, $dst|$dst, $src}"),
7196                [(set DstVT.RC:$dst, (OpNode
7197                      (SrcVT.VT SrcVT.ScalarIntMemCPat:$src)))]>,
7198                EVEX, VEX_LIG, Sched<[sched.Folded, sched.ReadAfterFold]>, SIMD_EXC;
7199  } // Predicates = [HasAVX512]
7200
7201  def : InstAlias<"v" # asm # aliasStr # "\t{$src, $dst|$dst, $src}",
7202          (!cast<Instruction>(NAME # "rr_Int") DstVT.RC:$dst, SrcVT.RC:$src), 0, "att">;
7203  def : InstAlias<"v" # asm # aliasStr # "\t{$rc, $src, $dst|$dst, $src, $rc}",
7204          (!cast<Instruction>(NAME # "rrb_Int") DstVT.RC:$dst, SrcVT.RC:$src, AVX512RC:$rc), 0, "att">;
7205  def : InstAlias<"v" # asm # aliasStr # "\t{$src, $dst|$dst, $src}",
7206          (!cast<Instruction>(NAME # "rm_Int") DstVT.RC:$dst,
7207                                          SrcVT.IntScalarMemOp:$src), 0, "att">;
7208}
7209
7210// Convert float/double to signed/unsigned int 32/64
7211defm VCVTSS2SIZ: avx512_cvt_s_int_round<0x2D, f32x_info, i32x_info,X86cvts2si,
7212                                   X86cvts2siRnd, WriteCvtSS2I, "cvtss2si", "{l}">,
7213                                   XS, EVEX_CD8<32, CD8VT1>;
7214defm VCVTSS2SI64Z: avx512_cvt_s_int_round<0x2D, f32x_info, i64x_info, X86cvts2si,
7215                                   X86cvts2siRnd, WriteCvtSS2I, "cvtss2si", "{q}">,
7216                                   XS, VEX_W, EVEX_CD8<32, CD8VT1>;
7217defm VCVTSS2USIZ: avx512_cvt_s_int_round<0x79, f32x_info, i32x_info, X86cvts2usi,
7218                                   X86cvts2usiRnd, WriteCvtSS2I, "cvtss2usi", "{l}">,
7219                                   XS, EVEX_CD8<32, CD8VT1>;
7220defm VCVTSS2USI64Z: avx512_cvt_s_int_round<0x79, f32x_info, i64x_info, X86cvts2usi,
7221                                   X86cvts2usiRnd, WriteCvtSS2I, "cvtss2usi", "{q}">,
7222                                   XS, VEX_W, EVEX_CD8<32, CD8VT1>;
7223defm VCVTSD2SIZ: avx512_cvt_s_int_round<0x2D, f64x_info, i32x_info, X86cvts2si,
7224                                   X86cvts2siRnd, WriteCvtSD2I, "cvtsd2si", "{l}">,
7225                                   XD, EVEX_CD8<64, CD8VT1>;
7226defm VCVTSD2SI64Z: avx512_cvt_s_int_round<0x2D, f64x_info, i64x_info, X86cvts2si,
7227                                   X86cvts2siRnd, WriteCvtSD2I, "cvtsd2si", "{q}">,
7228                                   XD, VEX_W, EVEX_CD8<64, CD8VT1>;
7229defm VCVTSD2USIZ:   avx512_cvt_s_int_round<0x79, f64x_info, i32x_info, X86cvts2usi,
7230                                   X86cvts2usiRnd, WriteCvtSD2I, "cvtsd2usi", "{l}">,
7231                                   XD, EVEX_CD8<64, CD8VT1>;
7232defm VCVTSD2USI64Z: avx512_cvt_s_int_round<0x79, f64x_info, i64x_info, X86cvts2usi,
7233                                   X86cvts2usiRnd, WriteCvtSD2I, "cvtsd2usi", "{q}">,
7234                                   XD, VEX_W, EVEX_CD8<64, CD8VT1>;
7235
7236// Patterns used for matching vcvtsi2s{s,d} intrinsic sequences from clang
7237// which produce unnecessary vmovs{s,d} instructions
7238let Predicates = [HasAVX512] in {
7239def : Pat<(v4f32 (X86Movss
7240                   (v4f32 VR128X:$dst),
7241                   (v4f32 (scalar_to_vector (f32 (any_sint_to_fp GR64:$src)))))),
7242          (VCVTSI642SSZrr_Int VR128X:$dst, GR64:$src)>;
7243
7244def : Pat<(v4f32 (X86Movss
7245                   (v4f32 VR128X:$dst),
7246                   (v4f32 (scalar_to_vector (f32 (any_sint_to_fp (loadi64 addr:$src))))))),
7247          (VCVTSI642SSZrm_Int VR128X:$dst, addr:$src)>;
7248
7249def : Pat<(v4f32 (X86Movss
7250                   (v4f32 VR128X:$dst),
7251                   (v4f32 (scalar_to_vector (f32 (any_sint_to_fp GR32:$src)))))),
7252          (VCVTSI2SSZrr_Int VR128X:$dst, GR32:$src)>;
7253
7254def : Pat<(v4f32 (X86Movss
7255                   (v4f32 VR128X:$dst),
7256                   (v4f32 (scalar_to_vector (f32 (any_sint_to_fp (loadi32 addr:$src))))))),
7257          (VCVTSI2SSZrm_Int VR128X:$dst, addr:$src)>;
7258
7259def : Pat<(v2f64 (X86Movsd
7260                   (v2f64 VR128X:$dst),
7261                   (v2f64 (scalar_to_vector (f64 (any_sint_to_fp GR64:$src)))))),
7262          (VCVTSI642SDZrr_Int VR128X:$dst, GR64:$src)>;
7263
7264def : Pat<(v2f64 (X86Movsd
7265                   (v2f64 VR128X:$dst),
7266                   (v2f64 (scalar_to_vector (f64 (any_sint_to_fp (loadi64 addr:$src))))))),
7267          (VCVTSI642SDZrm_Int VR128X:$dst, addr:$src)>;
7268
7269def : Pat<(v2f64 (X86Movsd
7270                   (v2f64 VR128X:$dst),
7271                   (v2f64 (scalar_to_vector (f64 (any_sint_to_fp GR32:$src)))))),
7272          (VCVTSI2SDZrr_Int VR128X:$dst, GR32:$src)>;
7273
7274def : Pat<(v2f64 (X86Movsd
7275                   (v2f64 VR128X:$dst),
7276                   (v2f64 (scalar_to_vector (f64 (any_sint_to_fp (loadi32 addr:$src))))))),
7277          (VCVTSI2SDZrm_Int VR128X:$dst, addr:$src)>;
7278
7279def : Pat<(v4f32 (X86Movss
7280                   (v4f32 VR128X:$dst),
7281                   (v4f32 (scalar_to_vector (f32 (any_uint_to_fp GR64:$src)))))),
7282          (VCVTUSI642SSZrr_Int VR128X:$dst, GR64:$src)>;
7283
7284def : Pat<(v4f32 (X86Movss
7285                   (v4f32 VR128X:$dst),
7286                   (v4f32 (scalar_to_vector (f32 (any_uint_to_fp (loadi64 addr:$src))))))),
7287          (VCVTUSI642SSZrm_Int VR128X:$dst, addr:$src)>;
7288
7289def : Pat<(v4f32 (X86Movss
7290                   (v4f32 VR128X:$dst),
7291                   (v4f32 (scalar_to_vector (f32 (any_uint_to_fp GR32:$src)))))),
7292          (VCVTUSI2SSZrr_Int VR128X:$dst, GR32:$src)>;
7293
7294def : Pat<(v4f32 (X86Movss
7295                   (v4f32 VR128X:$dst),
7296                   (v4f32 (scalar_to_vector (f32 (any_uint_to_fp (loadi32 addr:$src))))))),
7297          (VCVTUSI2SSZrm_Int VR128X:$dst, addr:$src)>;
7298
7299def : Pat<(v2f64 (X86Movsd
7300                   (v2f64 VR128X:$dst),
7301                   (v2f64 (scalar_to_vector (f64 (any_uint_to_fp GR64:$src)))))),
7302          (VCVTUSI642SDZrr_Int VR128X:$dst, GR64:$src)>;
7303
7304def : Pat<(v2f64 (X86Movsd
7305                   (v2f64 VR128X:$dst),
7306                   (v2f64 (scalar_to_vector (f64 (any_uint_to_fp (loadi64 addr:$src))))))),
7307          (VCVTUSI642SDZrm_Int VR128X:$dst, addr:$src)>;
7308
7309def : Pat<(v2f64 (X86Movsd
7310                   (v2f64 VR128X:$dst),
7311                   (v2f64 (scalar_to_vector (f64 (any_uint_to_fp GR32:$src)))))),
7312          (VCVTUSI2SDZrr_Int VR128X:$dst, GR32:$src)>;
7313
7314def : Pat<(v2f64 (X86Movsd
7315                   (v2f64 VR128X:$dst),
7316                   (v2f64 (scalar_to_vector (f64 (any_uint_to_fp (loadi32 addr:$src))))))),
7317          (VCVTUSI2SDZrm_Int VR128X:$dst, addr:$src)>;
7318} // Predicates = [HasAVX512]
7319
7320// Convert float/double to signed/unsigned int 32/64 with truncation
7321multiclass avx512_cvt_s_all<bits<8> opc, string asm, X86VectorVTInfo _SrcRC,
7322                            X86VectorVTInfo _DstRC, SDNode OpNode,
7323                            SDNode OpNodeInt, SDNode OpNodeSAE,
7324                            X86FoldableSchedWrite sched, string aliasStr>{
7325let Predicates = [HasAVX512], ExeDomain = _SrcRC.ExeDomain in {
7326  let isCodeGenOnly = 1 in {
7327  def rr : AVX512<opc, MRMSrcReg, (outs _DstRC.RC:$dst), (ins _SrcRC.FRC:$src),
7328              !strconcat(asm,"\t{$src, $dst|$dst, $src}"),
7329              [(set _DstRC.RC:$dst, (OpNode _SrcRC.FRC:$src))]>,
7330              EVEX, VEX_LIG, Sched<[sched]>, SIMD_EXC;
7331  def rm : AVX512<opc, MRMSrcMem, (outs _DstRC.RC:$dst), (ins _SrcRC.ScalarMemOp:$src),
7332              !strconcat(asm,"\t{$src, $dst|$dst, $src}"),
7333              [(set _DstRC.RC:$dst, (OpNode (_SrcRC.ScalarLdFrag addr:$src)))]>,
7334              EVEX, VEX_LIG, Sched<[sched.Folded, sched.ReadAfterFold]>, SIMD_EXC;
7335  }
7336
7337  def rr_Int : AVX512<opc, MRMSrcReg, (outs _DstRC.RC:$dst), (ins _SrcRC.RC:$src),
7338            !strconcat(asm,"\t{$src, $dst|$dst, $src}"),
7339           [(set _DstRC.RC:$dst, (OpNodeInt (_SrcRC.VT _SrcRC.RC:$src)))]>,
7340           EVEX, VEX_LIG, Sched<[sched]>, SIMD_EXC;
7341  let Uses = [MXCSR] in
7342  def rrb_Int : AVX512<opc, MRMSrcReg, (outs _DstRC.RC:$dst), (ins _SrcRC.RC:$src),
7343            !strconcat(asm,"\t{{sae}, $src, $dst|$dst, $src, {sae}}"),
7344            [(set _DstRC.RC:$dst, (OpNodeSAE (_SrcRC.VT _SrcRC.RC:$src)))]>,
7345                                  EVEX, VEX_LIG, EVEX_B, Sched<[sched]>;
7346  def rm_Int : AVX512<opc, MRMSrcMem, (outs _DstRC.RC:$dst),
7347              (ins _SrcRC.IntScalarMemOp:$src),
7348              !strconcat(asm,"\t{$src, $dst|$dst, $src}"),
7349              [(set _DstRC.RC:$dst,
7350                (OpNodeInt (_SrcRC.VT _SrcRC.ScalarIntMemCPat:$src)))]>,
7351              EVEX, VEX_LIG, Sched<[sched.Folded, sched.ReadAfterFold]>, SIMD_EXC;
7352} //HasAVX512
7353
7354  def : InstAlias<asm # aliasStr # "\t{$src, $dst|$dst, $src}",
7355          (!cast<Instruction>(NAME # "rr_Int") _DstRC.RC:$dst, _SrcRC.RC:$src), 0, "att">;
7356  def : InstAlias<asm # aliasStr # "\t{{sae}, $src, $dst|$dst, $src, {sae}}",
7357          (!cast<Instruction>(NAME # "rrb_Int") _DstRC.RC:$dst, _SrcRC.RC:$src), 0, "att">;
7358  def : InstAlias<asm # aliasStr # "\t{$src, $dst|$dst, $src}",
7359          (!cast<Instruction>(NAME # "rm_Int") _DstRC.RC:$dst,
7360                                          _SrcRC.IntScalarMemOp:$src), 0, "att">;
7361}
7362
7363defm VCVTTSS2SIZ: avx512_cvt_s_all<0x2C, "vcvttss2si", f32x_info, i32x_info,
7364                        any_fp_to_sint, X86cvtts2Int, X86cvtts2IntSAE, WriteCvtSS2I,
7365                        "{l}">, XS, EVEX_CD8<32, CD8VT1>;
7366defm VCVTTSS2SI64Z: avx512_cvt_s_all<0x2C, "vcvttss2si", f32x_info, i64x_info,
7367                        any_fp_to_sint, X86cvtts2Int, X86cvtts2IntSAE, WriteCvtSS2I,
7368                        "{q}">, VEX_W, XS, EVEX_CD8<32, CD8VT1>;
7369defm VCVTTSD2SIZ: avx512_cvt_s_all<0x2C, "vcvttsd2si", f64x_info, i32x_info,
7370                        any_fp_to_sint, X86cvtts2Int, X86cvtts2IntSAE, WriteCvtSD2I,
7371                        "{l}">, XD, EVEX_CD8<64, CD8VT1>;
7372defm VCVTTSD2SI64Z: avx512_cvt_s_all<0x2C, "vcvttsd2si", f64x_info, i64x_info,
7373                        any_fp_to_sint, X86cvtts2Int, X86cvtts2IntSAE, WriteCvtSD2I,
7374                        "{q}">, VEX_W, XD, EVEX_CD8<64, CD8VT1>;
7375
7376defm VCVTTSS2USIZ: avx512_cvt_s_all<0x78, "vcvttss2usi", f32x_info, i32x_info,
7377                        any_fp_to_uint, X86cvtts2UInt, X86cvtts2UIntSAE, WriteCvtSS2I,
7378                        "{l}">, XS, EVEX_CD8<32, CD8VT1>;
7379defm VCVTTSS2USI64Z: avx512_cvt_s_all<0x78, "vcvttss2usi", f32x_info, i64x_info,
7380                        any_fp_to_uint, X86cvtts2UInt, X86cvtts2UIntSAE, WriteCvtSS2I,
7381                        "{q}">, XS,VEX_W, EVEX_CD8<32, CD8VT1>;
7382defm VCVTTSD2USIZ: avx512_cvt_s_all<0x78, "vcvttsd2usi", f64x_info, i32x_info,
7383                        any_fp_to_uint, X86cvtts2UInt, X86cvtts2UIntSAE, WriteCvtSD2I,
7384                        "{l}">, XD, EVEX_CD8<64, CD8VT1>;
7385defm VCVTTSD2USI64Z: avx512_cvt_s_all<0x78, "vcvttsd2usi", f64x_info, i64x_info,
7386                        any_fp_to_uint, X86cvtts2UInt, X86cvtts2UIntSAE, WriteCvtSD2I,
7387                        "{q}">, XD, VEX_W, EVEX_CD8<64, CD8VT1>;
7388
7389//===----------------------------------------------------------------------===//
7390// AVX-512  Convert form float to double and back
7391//===----------------------------------------------------------------------===//
7392
7393let Uses = [MXCSR], mayRaiseFPException = 1 in
7394multiclass avx512_cvt_fp_scalar<bits<8> opc, string OpcodeStr, X86VectorVTInfo _,
7395                                X86VectorVTInfo _Src, SDNode OpNode,
7396                                X86FoldableSchedWrite sched> {
7397  defm rr_Int : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
7398                         (ins _.RC:$src1, _Src.RC:$src2), OpcodeStr,
7399                         "$src2, $src1", "$src1, $src2",
7400                         (_.VT (OpNode (_.VT _.RC:$src1),
7401                                       (_Src.VT _Src.RC:$src2)))>,
7402                         EVEX_4V, VEX_LIG, Sched<[sched]>;
7403  defm rm_Int : AVX512_maskable_scalar<opc, MRMSrcMem, _, (outs _.RC:$dst),
7404                         (ins _.RC:$src1, _Src.IntScalarMemOp:$src2), OpcodeStr,
7405                         "$src2, $src1", "$src1, $src2",
7406                         (_.VT (OpNode (_.VT _.RC:$src1),
7407                                  (_Src.VT _Src.ScalarIntMemCPat:$src2)))>,
7408                         EVEX_4V, VEX_LIG,
7409                         Sched<[sched.Folded, sched.ReadAfterFold]>;
7410
7411  let isCodeGenOnly = 1, hasSideEffects = 0 in {
7412    def rr : I<opc, MRMSrcReg, (outs _.FRC:$dst),
7413               (ins _.FRC:$src1, _Src.FRC:$src2),
7414               OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}", []>,
7415               EVEX_4V, VEX_LIG, Sched<[sched]>;
7416    let mayLoad = 1 in
7417    def rm : I<opc, MRMSrcMem, (outs _.FRC:$dst),
7418               (ins _.FRC:$src1, _Src.ScalarMemOp:$src2),
7419               OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}", []>,
7420               EVEX_4V, VEX_LIG, Sched<[sched.Folded, sched.ReadAfterFold]>;
7421  }
7422}
7423
7424// Scalar Coversion with SAE - suppress all exceptions
7425multiclass avx512_cvt_fp_sae_scalar<bits<8> opc, string OpcodeStr, X86VectorVTInfo _,
7426                                    X86VectorVTInfo _Src, SDNode OpNodeSAE,
7427                                    X86FoldableSchedWrite sched> {
7428  let Uses = [MXCSR] in
7429  defm rrb_Int : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
7430                        (ins _.RC:$src1, _Src.RC:$src2), OpcodeStr,
7431                        "{sae}, $src2, $src1", "$src1, $src2, {sae}",
7432                        (_.VT (OpNodeSAE (_.VT _.RC:$src1),
7433                                         (_Src.VT _Src.RC:$src2)))>,
7434                        EVEX_4V, VEX_LIG, EVEX_B, Sched<[sched]>;
7435}
7436
7437// Scalar Conversion with rounding control (RC)
7438multiclass avx512_cvt_fp_rc_scalar<bits<8> opc, string OpcodeStr, X86VectorVTInfo _,
7439                                   X86VectorVTInfo _Src, SDNode OpNodeRnd,
7440                                   X86FoldableSchedWrite sched> {
7441  let Uses = [MXCSR] in
7442  defm rrb_Int : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
7443                        (ins _.RC:$src1, _Src.RC:$src2, AVX512RC:$rc), OpcodeStr,
7444                        "$rc, $src2, $src1", "$src1, $src2, $rc",
7445                        (_.VT (OpNodeRnd (_.VT _.RC:$src1),
7446                                         (_Src.VT _Src.RC:$src2), (i32 timm:$rc)))>,
7447                        EVEX_4V, VEX_LIG, Sched<[sched]>,
7448                        EVEX_B, EVEX_RC;
7449}
7450multiclass avx512_cvt_fp_scalar_sd2ss<bits<8> opc, string OpcodeStr,
7451                                      SDNode OpNode, SDNode OpNodeRnd,
7452                                      X86FoldableSchedWrite sched,
7453                                      X86VectorVTInfo _src, X86VectorVTInfo _dst> {
7454  let Predicates = [HasAVX512] in {
7455    defm Z : avx512_cvt_fp_scalar<opc, OpcodeStr, _dst, _src, OpNode, sched>,
7456             avx512_cvt_fp_rc_scalar<opc, OpcodeStr, _dst, _src,
7457                               OpNodeRnd, sched>, VEX_W, EVEX_CD8<64, CD8VT1>, XD;
7458  }
7459}
7460
7461multiclass avx512_cvt_fp_scalar_ss2sd<bits<8> opc, string OpcodeStr,
7462                                      SDNode OpNode, SDNode OpNodeSAE,
7463                                      X86FoldableSchedWrite sched,
7464                                      X86VectorVTInfo _src, X86VectorVTInfo _dst> {
7465  let Predicates = [HasAVX512] in {
7466    defm Z : avx512_cvt_fp_scalar<opc, OpcodeStr, _dst, _src, OpNode, sched>,
7467             avx512_cvt_fp_sae_scalar<opc, OpcodeStr, _dst, _src, OpNodeSAE, sched>,
7468             EVEX_CD8<32, CD8VT1>, XS;
7469  }
7470}
7471defm VCVTSD2SS : avx512_cvt_fp_scalar_sd2ss<0x5A, "vcvtsd2ss", X86frounds,
7472                                         X86froundsRnd, WriteCvtSD2SS, f64x_info,
7473                                         f32x_info>;
7474defm VCVTSS2SD : avx512_cvt_fp_scalar_ss2sd<0x5A, "vcvtss2sd", X86fpexts,
7475                                          X86fpextsSAE, WriteCvtSS2SD, f32x_info,
7476                                          f64x_info>;
7477
7478def : Pat<(f64 (any_fpextend FR32X:$src)),
7479          (VCVTSS2SDZrr (f64 (IMPLICIT_DEF)), FR32X:$src)>,
7480          Requires<[HasAVX512]>;
7481def : Pat<(f64 (any_fpextend (loadf32 addr:$src))),
7482          (VCVTSS2SDZrm (f64 (IMPLICIT_DEF)), addr:$src)>,
7483          Requires<[HasAVX512, OptForSize]>;
7484
7485def : Pat<(f32 (any_fpround FR64X:$src)),
7486          (VCVTSD2SSZrr (f32 (IMPLICIT_DEF)), FR64X:$src)>,
7487           Requires<[HasAVX512]>;
7488
7489def : Pat<(v4f32 (X86Movss
7490                   (v4f32 VR128X:$dst),
7491                   (v4f32 (scalar_to_vector
7492                     (f32 (any_fpround (f64 (extractelt VR128X:$src, (iPTR 0))))))))),
7493          (VCVTSD2SSZrr_Int VR128X:$dst, VR128X:$src)>,
7494          Requires<[HasAVX512]>;
7495
7496def : Pat<(v2f64 (X86Movsd
7497                   (v2f64 VR128X:$dst),
7498                   (v2f64 (scalar_to_vector
7499                     (f64 (any_fpextend (f32 (extractelt VR128X:$src, (iPTR 0))))))))),
7500          (VCVTSS2SDZrr_Int VR128X:$dst, VR128X:$src)>,
7501          Requires<[HasAVX512]>;
7502
7503//===----------------------------------------------------------------------===//
7504// AVX-512  Vector convert from signed/unsigned integer to float/double
7505//          and from float/double to signed/unsigned integer
7506//===----------------------------------------------------------------------===//
7507
7508multiclass avx512_vcvt_fp<bits<8> opc, string OpcodeStr, X86VectorVTInfo _,
7509                          X86VectorVTInfo _Src, SDNode OpNode,
7510                          X86FoldableSchedWrite sched,
7511                          string Broadcast = _.BroadcastStr,
7512                          string Alias = "", X86MemOperand MemOp = _Src.MemOp,
7513                          RegisterClass MaskRC = _.KRCWM,
7514                          dag LdDAG = (_.VT (OpNode (_Src.VT (_Src.LdFrag addr:$src))))> {
7515let Uses = [MXCSR], mayRaiseFPException = 1 in {
7516  defm rr : AVX512_maskable_common<opc, MRMSrcReg, _, (outs _.RC:$dst),
7517                         (ins _Src.RC:$src),
7518                         (ins _.RC:$src0, MaskRC:$mask, _Src.RC:$src),
7519                         (ins MaskRC:$mask, _Src.RC:$src),
7520                          OpcodeStr, "$src", "$src",
7521                         (_.VT (OpNode (_Src.VT _Src.RC:$src))),
7522                         (vselect MaskRC:$mask,
7523                                  (_.VT (OpNode (_Src.VT _Src.RC:$src))),
7524                                  _.RC:$src0),
7525                         vselect, "$src0 = $dst">,
7526                         EVEX, Sched<[sched]>;
7527
7528  defm rm : AVX512_maskable_common<opc, MRMSrcMem, _, (outs _.RC:$dst),
7529                         (ins MemOp:$src),
7530                         (ins _.RC:$src0, MaskRC:$mask, MemOp:$src),
7531                         (ins MaskRC:$mask, MemOp:$src),
7532                         OpcodeStr#Alias, "$src", "$src",
7533                         LdDAG,
7534                         (vselect MaskRC:$mask, LdDAG, _.RC:$src0),
7535                         vselect, "$src0 = $dst">,
7536                         EVEX, Sched<[sched.Folded]>;
7537
7538  defm rmb : AVX512_maskable_common<opc, MRMSrcMem, _, (outs _.RC:$dst),
7539                         (ins _Src.ScalarMemOp:$src),
7540                         (ins _.RC:$src0, MaskRC:$mask, _Src.ScalarMemOp:$src),
7541                         (ins MaskRC:$mask, _Src.ScalarMemOp:$src),
7542                         OpcodeStr,
7543                         "${src}"##Broadcast, "${src}"##Broadcast,
7544                         (_.VT (OpNode (_Src.VT
7545                                  (_Src.BroadcastLdFrag addr:$src))
7546                            )),
7547                         (vselect MaskRC:$mask,
7548                                  (_.VT
7549                                   (OpNode
7550                                    (_Src.VT
7551                                     (_Src.BroadcastLdFrag addr:$src)))),
7552                                  _.RC:$src0),
7553                         vselect, "$src0 = $dst">,
7554                         EVEX, EVEX_B, Sched<[sched.Folded]>;
7555  }
7556}
7557// Coversion with SAE - suppress all exceptions
7558multiclass avx512_vcvt_fp_sae<bits<8> opc, string OpcodeStr, X86VectorVTInfo _,
7559                              X86VectorVTInfo _Src, SDNode OpNodeSAE,
7560                              X86FoldableSchedWrite sched> {
7561  let Uses = [MXCSR] in
7562  defm rrb : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
7563                        (ins _Src.RC:$src), OpcodeStr,
7564                        "{sae}, $src", "$src, {sae}",
7565                        (_.VT (OpNodeSAE (_Src.VT _Src.RC:$src)))>,
7566                        EVEX, EVEX_B, Sched<[sched]>;
7567}
7568
7569// Conversion with rounding control (RC)
7570multiclass avx512_vcvt_fp_rc<bits<8> opc, string OpcodeStr, X86VectorVTInfo _,
7571                         X86VectorVTInfo _Src, SDNode OpNodeRnd,
7572                         X86FoldableSchedWrite sched> {
7573  let Uses = [MXCSR] in
7574  defm rrb : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
7575                        (ins _Src.RC:$src, AVX512RC:$rc), OpcodeStr,
7576                        "$rc, $src", "$src, $rc",
7577                        (_.VT (OpNodeRnd (_Src.VT _Src.RC:$src), (i32 timm:$rc)))>,
7578                        EVEX, EVEX_B, EVEX_RC, Sched<[sched]>;
7579}
7580
7581// Similar to avx512_vcvt_fp, but uses an extload for the memory form.
7582multiclass avx512_vcvt_fpextend<bits<8> opc, string OpcodeStr, X86VectorVTInfo _,
7583                                X86VectorVTInfo _Src, SDNode OpNode,
7584                                X86FoldableSchedWrite sched,
7585                                string Broadcast = _.BroadcastStr,
7586                                string Alias = "", X86MemOperand MemOp = _Src.MemOp,
7587                                RegisterClass MaskRC = _.KRCWM>
7588  : avx512_vcvt_fp<opc, OpcodeStr, _, _Src, OpNode, sched, Broadcast, Alias,
7589                   MemOp, MaskRC,
7590                   (_.VT (!cast<PatFrag>("extload"#_Src.VTName) addr:$src))>;
7591
7592// Extend Float to Double
7593multiclass avx512_cvtps2pd<bits<8> opc, string OpcodeStr,
7594                           X86SchedWriteWidths sched> {
7595  let Predicates = [HasAVX512] in {
7596    defm Z : avx512_vcvt_fpextend<opc, OpcodeStr, v8f64_info, v8f32x_info,
7597                            any_fpextend, sched.ZMM>,
7598             avx512_vcvt_fp_sae<opc, OpcodeStr, v8f64_info, v8f32x_info,
7599                                X86vfpextSAE, sched.ZMM>, EVEX_V512;
7600  }
7601  let Predicates = [HasVLX] in {
7602    defm Z128 : avx512_vcvt_fpextend<opc, OpcodeStr, v2f64x_info, v4f32x_info,
7603                               X86any_vfpext, sched.XMM, "{1to2}", "", f64mem>, EVEX_V128;
7604    defm Z256 : avx512_vcvt_fpextend<opc, OpcodeStr, v4f64x_info, v4f32x_info, any_fpextend,
7605                               sched.YMM>, EVEX_V256;
7606  }
7607}
7608
7609// Truncate Double to Float
7610multiclass avx512_cvtpd2ps<bits<8> opc, string OpcodeStr, X86SchedWriteWidths sched> {
7611  let Predicates = [HasAVX512] in {
7612    defm Z : avx512_vcvt_fp<opc, OpcodeStr, v8f32x_info, v8f64_info, X86any_vfpround, sched.ZMM>,
7613             avx512_vcvt_fp_rc<opc, OpcodeStr, v8f32x_info, v8f64_info,
7614                               X86vfproundRnd, sched.ZMM>, EVEX_V512;
7615  }
7616  let Predicates = [HasVLX] in {
7617    defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v4f32x_info, v2f64x_info,
7618                               null_frag, sched.XMM, "{1to2}", "{x}", f128mem, VK2WM>,
7619                               EVEX_V128;
7620    defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v4f32x_info, v4f64x_info, X86any_vfpround,
7621                               sched.YMM, "{1to4}", "{y}">, EVEX_V256;
7622  }
7623
7624  def : InstAlias<OpcodeStr##"x\t{$src, $dst|$dst, $src}",
7625                  (!cast<Instruction>(NAME # "Z128rr") VR128X:$dst, VR128X:$src), 0, "att">;
7626  def : InstAlias<OpcodeStr##"x\t{$src, $dst {${mask}}|$dst {${mask}}, $src}",
7627                  (!cast<Instruction>(NAME # "Z128rrk") VR128X:$dst,
7628                  VK2WM:$mask, VR128X:$src), 0, "att">;
7629  def : InstAlias<OpcodeStr##"x\t{$src, $dst {${mask}} {z}|"
7630                  "$dst {${mask}} {z}, $src}",
7631                  (!cast<Instruction>(NAME # "Z128rrkz") VR128X:$dst,
7632                  VK2WM:$mask, VR128X:$src), 0, "att">;
7633  def : InstAlias<OpcodeStr##"x\t{${src}{1to2}, $dst|$dst, ${src}{1to2}}",
7634                  (!cast<Instruction>(NAME # "Z128rmb") VR128X:$dst, f64mem:$src), 0, "att">;
7635  def : InstAlias<OpcodeStr##"x\t{${src}{1to2}, $dst {${mask}}|"
7636                  "$dst {${mask}}, ${src}{1to2}}",
7637                  (!cast<Instruction>(NAME # "Z128rmbk") VR128X:$dst,
7638                  VK2WM:$mask, f64mem:$src), 0, "att">;
7639  def : InstAlias<OpcodeStr##"x\t{${src}{1to2}, $dst {${mask}} {z}|"
7640                  "$dst {${mask}} {z}, ${src}{1to2}}",
7641                  (!cast<Instruction>(NAME # "Z128rmbkz") VR128X:$dst,
7642                  VK2WM:$mask, f64mem:$src), 0, "att">;
7643
7644  def : InstAlias<OpcodeStr##"y\t{$src, $dst|$dst, $src}",
7645                  (!cast<Instruction>(NAME # "Z256rr") VR128X:$dst, VR256X:$src), 0, "att">;
7646  def : InstAlias<OpcodeStr##"y\t{$src, $dst {${mask}}|$dst {${mask}}, $src}",
7647                  (!cast<Instruction>(NAME # "Z256rrk") VR128X:$dst,
7648                  VK4WM:$mask, VR256X:$src), 0, "att">;
7649  def : InstAlias<OpcodeStr##"y\t{$src, $dst {${mask}} {z}|"
7650                  "$dst {${mask}} {z}, $src}",
7651                  (!cast<Instruction>(NAME # "Z256rrkz") VR128X:$dst,
7652                  VK4WM:$mask, VR256X:$src), 0, "att">;
7653  def : InstAlias<OpcodeStr##"y\t{${src}{1to4}, $dst|$dst, ${src}{1to4}}",
7654                  (!cast<Instruction>(NAME # "Z256rmb") VR128X:$dst, f64mem:$src), 0, "att">;
7655  def : InstAlias<OpcodeStr##"y\t{${src}{1to4}, $dst {${mask}}|"
7656                  "$dst {${mask}}, ${src}{1to4}}",
7657                  (!cast<Instruction>(NAME # "Z256rmbk") VR128X:$dst,
7658                  VK4WM:$mask, f64mem:$src), 0, "att">;
7659  def : InstAlias<OpcodeStr##"y\t{${src}{1to4}, $dst {${mask}} {z}|"
7660                  "$dst {${mask}} {z}, ${src}{1to4}}",
7661                  (!cast<Instruction>(NAME # "Z256rmbkz") VR128X:$dst,
7662                  VK4WM:$mask, f64mem:$src), 0, "att">;
7663}
7664
7665defm VCVTPD2PS : avx512_cvtpd2ps<0x5A, "vcvtpd2ps", SchedWriteCvtPD2PS>,
7666                                  VEX_W, PD, EVEX_CD8<64, CD8VF>;
7667defm VCVTPS2PD : avx512_cvtps2pd<0x5A, "vcvtps2pd", SchedWriteCvtPS2PD>,
7668                                  PS, EVEX_CD8<32, CD8VH>;
7669
7670let Predicates = [HasVLX] in {
7671  // Special patterns to allow use of X86vmfpround for masking. Instruction
7672  // patterns have been disabled with null_frag.
7673  def : Pat<(X86any_vfpround (v2f64 VR128X:$src)),
7674            (VCVTPD2PSZ128rr VR128X:$src)>;
7675  def : Pat<(X86vmfpround (v2f64 VR128X:$src), (v4f32 VR128X:$src0),
7676                          VK2WM:$mask),
7677            (VCVTPD2PSZ128rrk VR128X:$src0, VK2WM:$mask, VR128X:$src)>;
7678  def : Pat<(X86vmfpround (v2f64 VR128X:$src), v4f32x_info.ImmAllZerosV,
7679                          VK2WM:$mask),
7680            (VCVTPD2PSZ128rrkz VK2WM:$mask, VR128X:$src)>;
7681
7682  def : Pat<(X86any_vfpround (loadv2f64 addr:$src)),
7683            (VCVTPD2PSZ128rm addr:$src)>;
7684  def : Pat<(X86vmfpround (loadv2f64 addr:$src), (v4f32 VR128X:$src0),
7685                          VK2WM:$mask),
7686            (VCVTPD2PSZ128rmk VR128X:$src0, VK2WM:$mask, addr:$src)>;
7687  def : Pat<(X86vmfpround (loadv2f64 addr:$src), v4f32x_info.ImmAllZerosV,
7688                          VK2WM:$mask),
7689            (VCVTPD2PSZ128rmkz VK2WM:$mask, addr:$src)>;
7690
7691  def : Pat<(X86any_vfpround (v2f64 (X86VBroadcastld64 addr:$src))),
7692            (VCVTPD2PSZ128rmb addr:$src)>;
7693  def : Pat<(X86vmfpround (v2f64 (X86VBroadcastld64 addr:$src)),
7694                          (v4f32 VR128X:$src0), VK2WM:$mask),
7695            (VCVTPD2PSZ128rmbk VR128X:$src0, VK2WM:$mask, addr:$src)>;
7696  def : Pat<(X86vmfpround (v2f64 (X86VBroadcastld64 addr:$src)),
7697                          v4f32x_info.ImmAllZerosV, VK2WM:$mask),
7698            (VCVTPD2PSZ128rmbkz VK2WM:$mask, addr:$src)>;
7699}
7700
7701// Convert Signed/Unsigned Doubleword to Double
7702let Uses = []<Register>, mayRaiseFPException = 0 in
7703multiclass avx512_cvtdq2pd<bits<8> opc, string OpcodeStr, SDNode OpNode,
7704                           SDNode OpNode128, X86SchedWriteWidths sched> {
7705  // No rounding in this op
7706  let Predicates = [HasAVX512] in
7707    defm Z : avx512_vcvt_fp<opc, OpcodeStr, v8f64_info, v8i32x_info, OpNode,
7708                            sched.ZMM>, EVEX_V512;
7709
7710  let Predicates = [HasVLX] in {
7711    defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v2f64x_info, v4i32x_info,
7712                               OpNode128, sched.XMM, "{1to2}", "", i64mem, VK2WM,
7713                               (v2f64 (OpNode128 (bc_v4i32
7714                                (v2i64
7715                                 (scalar_to_vector (loadi64 addr:$src))))))>,
7716                               EVEX_V128;
7717    defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v4f64x_info, v4i32x_info, OpNode,
7718                               sched.YMM>, EVEX_V256;
7719  }
7720}
7721
7722// Convert Signed/Unsigned Doubleword to Float
7723multiclass avx512_cvtdq2ps<bits<8> opc, string OpcodeStr, SDNode OpNode,
7724                           SDNode OpNodeRnd, X86SchedWriteWidths sched> {
7725  let Predicates = [HasAVX512] in
7726    defm Z : avx512_vcvt_fp<opc, OpcodeStr, v16f32_info, v16i32_info, OpNode,
7727                            sched.ZMM>,
7728             avx512_vcvt_fp_rc<opc, OpcodeStr, v16f32_info, v16i32_info,
7729                               OpNodeRnd, sched.ZMM>, EVEX_V512;
7730
7731  let Predicates = [HasVLX] in {
7732    defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v4f32x_info, v4i32x_info, OpNode,
7733                               sched.XMM>, EVEX_V128;
7734    defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v8f32x_info, v8i32x_info, OpNode,
7735                               sched.YMM>, EVEX_V256;
7736  }
7737}
7738
7739// Convert Float to Signed/Unsigned Doubleword with truncation
7740multiclass avx512_cvttps2dq<bits<8> opc, string OpcodeStr, SDNode OpNode,
7741                            SDNode OpNodeSAE, X86SchedWriteWidths sched> {
7742  let Predicates = [HasAVX512] in {
7743    defm Z : avx512_vcvt_fp<opc, OpcodeStr, v16i32_info, v16f32_info, OpNode,
7744                            sched.ZMM>,
7745             avx512_vcvt_fp_sae<opc, OpcodeStr, v16i32_info, v16f32_info,
7746                                OpNodeSAE, sched.ZMM>, EVEX_V512;
7747  }
7748  let Predicates = [HasVLX] in {
7749    defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v4i32x_info, v4f32x_info, OpNode,
7750                               sched.XMM>, EVEX_V128;
7751    defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v8i32x_info, v8f32x_info, OpNode,
7752                               sched.YMM>, EVEX_V256;
7753  }
7754}
7755
7756// Convert Float to Signed/Unsigned Doubleword
7757multiclass avx512_cvtps2dq<bits<8> opc, string OpcodeStr, SDNode OpNode,
7758                           SDNode OpNodeRnd, X86SchedWriteWidths sched> {
7759  let Predicates = [HasAVX512] in {
7760    defm Z : avx512_vcvt_fp<opc, OpcodeStr, v16i32_info, v16f32_info, OpNode,
7761                            sched.ZMM>,
7762             avx512_vcvt_fp_rc<opc, OpcodeStr, v16i32_info, v16f32_info,
7763                                OpNodeRnd, sched.ZMM>, EVEX_V512;
7764  }
7765  let Predicates = [HasVLX] in {
7766    defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v4i32x_info, v4f32x_info, OpNode,
7767                               sched.XMM>, EVEX_V128;
7768    defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v8i32x_info, v8f32x_info, OpNode,
7769                               sched.YMM>, EVEX_V256;
7770  }
7771}
7772
7773// Convert Double to Signed/Unsigned Doubleword with truncation
7774multiclass avx512_cvttpd2dq<bits<8> opc, string OpcodeStr, SDNode OpNode,
7775                            SDNode OpNodeSAE, X86SchedWriteWidths sched> {
7776  let Predicates = [HasAVX512] in {
7777    defm Z : avx512_vcvt_fp<opc, OpcodeStr, v8i32x_info, v8f64_info, OpNode,
7778                            sched.ZMM>,
7779             avx512_vcvt_fp_sae<opc, OpcodeStr, v8i32x_info, v8f64_info,
7780                                OpNodeSAE, sched.ZMM>, EVEX_V512;
7781  }
7782  let Predicates = [HasVLX] in {
7783    // we need "x"/"y" suffixes in order to distinguish between 128 and 256
7784    // memory forms of these instructions in Asm Parser. They have the same
7785    // dest type - 'v4i32x_info'. We also specify the broadcast string explicitly
7786    // due to the same reason.
7787    defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v4i32x_info, v2f64x_info,
7788                               null_frag, sched.XMM, "{1to2}", "{x}", f128mem,
7789                               VK2WM>, EVEX_V128;
7790    defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v4i32x_info, v4f64x_info, OpNode,
7791                               sched.YMM, "{1to4}", "{y}">, EVEX_V256;
7792  }
7793
7794  def : InstAlias<OpcodeStr##"x\t{$src, $dst|$dst, $src}",
7795                  (!cast<Instruction>(NAME # "Z128rr") VR128X:$dst,
7796                  VR128X:$src), 0, "att">;
7797  def : InstAlias<OpcodeStr##"x\t{$src, $dst {${mask}}|$dst {${mask}}, $src}",
7798                  (!cast<Instruction>(NAME # "Z128rrk") VR128X:$dst,
7799                  VK2WM:$mask, VR128X:$src), 0, "att">;
7800  def : InstAlias<OpcodeStr##"x\t{$src, $dst {${mask}} {z}|$dst {${mask}} {z}, $src}",
7801                  (!cast<Instruction>(NAME # "Z128rrkz") VR128X:$dst,
7802                  VK2WM:$mask, VR128X:$src), 0, "att">;
7803  def : InstAlias<OpcodeStr##"x\t{${src}{1to2}, $dst|$dst, ${src}{1to2}}",
7804                  (!cast<Instruction>(NAME # "Z128rmb") VR128X:$dst,
7805                  f64mem:$src), 0, "att">;
7806  def : InstAlias<OpcodeStr##"x\t{${src}{1to2}, $dst {${mask}}|"
7807                  "$dst {${mask}}, ${src}{1to2}}",
7808                  (!cast<Instruction>(NAME # "Z128rmbk") VR128X:$dst,
7809                  VK2WM:$mask, f64mem:$src), 0, "att">;
7810  def : InstAlias<OpcodeStr##"x\t{${src}{1to2}, $dst {${mask}} {z}|"
7811                  "$dst {${mask}} {z}, ${src}{1to2}}",
7812                  (!cast<Instruction>(NAME # "Z128rmbkz") VR128X:$dst,
7813                  VK2WM:$mask, f64mem:$src), 0, "att">;
7814
7815  def : InstAlias<OpcodeStr##"y\t{$src, $dst|$dst, $src}",
7816                  (!cast<Instruction>(NAME # "Z256rr") VR128X:$dst,
7817                  VR256X:$src), 0, "att">;
7818  def : InstAlias<OpcodeStr##"y\t{$src, $dst {${mask}}|$dst {${mask}}, $src}",
7819                  (!cast<Instruction>(NAME # "Z256rrk") VR128X:$dst,
7820                  VK4WM:$mask, VR256X:$src), 0, "att">;
7821  def : InstAlias<OpcodeStr##"y\t{$src, $dst {${mask}} {z}|$dst {${mask}} {z}, $src}",
7822                  (!cast<Instruction>(NAME # "Z256rrkz") VR128X:$dst,
7823                  VK4WM:$mask, VR256X:$src), 0, "att">;
7824  def : InstAlias<OpcodeStr##"y\t{${src}{1to4}, $dst|$dst, ${src}{1to4}}",
7825                  (!cast<Instruction>(NAME # "Z256rmb") VR128X:$dst,
7826                  f64mem:$src), 0, "att">;
7827  def : InstAlias<OpcodeStr##"y\t{${src}{1to4}, $dst {${mask}}|"
7828                  "$dst {${mask}}, ${src}{1to4}}",
7829                  (!cast<Instruction>(NAME # "Z256rmbk") VR128X:$dst,
7830                  VK4WM:$mask, f64mem:$src), 0, "att">;
7831  def : InstAlias<OpcodeStr##"y\t{${src}{1to4}, $dst {${mask}} {z}|"
7832                  "$dst {${mask}} {z}, ${src}{1to4}}",
7833                  (!cast<Instruction>(NAME # "Z256rmbkz") VR128X:$dst,
7834                  VK4WM:$mask, f64mem:$src), 0, "att">;
7835}
7836
7837// Convert Double to Signed/Unsigned Doubleword
7838multiclass avx512_cvtpd2dq<bits<8> opc, string OpcodeStr, SDNode OpNode,
7839                           SDNode OpNodeRnd, X86SchedWriteWidths sched> {
7840  let Predicates = [HasAVX512] in {
7841    defm Z : avx512_vcvt_fp<opc, OpcodeStr, v8i32x_info, v8f64_info, OpNode,
7842                            sched.ZMM>,
7843             avx512_vcvt_fp_rc<opc, OpcodeStr, v8i32x_info, v8f64_info,
7844                               OpNodeRnd, sched.ZMM>, EVEX_V512;
7845  }
7846  let Predicates = [HasVLX] in {
7847    // we need "x"/"y" suffixes in order to distinguish between 128 and 256
7848    // memory forms of these instructions in Asm Parcer. They have the same
7849    // dest type - 'v4i32x_info'. We also specify the broadcast string explicitly
7850    // due to the same reason.
7851    defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v4i32x_info, v2f64x_info,
7852                               null_frag, sched.XMM, "{1to2}", "{x}", f128mem,
7853                               VK2WM>, EVEX_V128;
7854    defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v4i32x_info, v4f64x_info, OpNode,
7855                               sched.YMM, "{1to4}", "{y}">, EVEX_V256;
7856  }
7857
7858  def : InstAlias<OpcodeStr##"x\t{$src, $dst|$dst, $src}",
7859                  (!cast<Instruction>(NAME # "Z128rr") VR128X:$dst, VR128X:$src), 0, "att">;
7860  def : InstAlias<OpcodeStr##"x\t{$src, $dst {${mask}}|$dst {${mask}}, $src}",
7861                  (!cast<Instruction>(NAME # "Z128rrk") VR128X:$dst,
7862                  VK2WM:$mask, VR128X:$src), 0, "att">;
7863  def : InstAlias<OpcodeStr##"x\t{$src, $dst {${mask}} {z}|$dst {${mask}} {z}, $src}",
7864                  (!cast<Instruction>(NAME # "Z128rrkz") VR128X:$dst,
7865                  VK2WM:$mask, VR128X:$src), 0, "att">;
7866  def : InstAlias<OpcodeStr##"x\t{${src}{1to2}, $dst|$dst, ${src}{1to2}}",
7867                  (!cast<Instruction>(NAME # "Z128rmb") VR128X:$dst,
7868                  f64mem:$src), 0, "att">;
7869  def : InstAlias<OpcodeStr##"x\t{${src}{1to2}, $dst {${mask}}|"
7870                  "$dst {${mask}}, ${src}{1to2}}",
7871                  (!cast<Instruction>(NAME # "Z128rmbk") VR128X:$dst,
7872                  VK2WM:$mask, f64mem:$src), 0, "att">;
7873  def : InstAlias<OpcodeStr##"x\t{${src}{1to2}, $dst {${mask}} {z}|"
7874                  "$dst {${mask}} {z}, ${src}{1to2}}",
7875                  (!cast<Instruction>(NAME # "Z128rmbkz") VR128X:$dst,
7876                  VK2WM:$mask, f64mem:$src), 0, "att">;
7877
7878  def : InstAlias<OpcodeStr##"y\t{$src, $dst|$dst, $src}",
7879                  (!cast<Instruction>(NAME # "Z256rr") VR128X:$dst, VR256X:$src), 0, "att">;
7880  def : InstAlias<OpcodeStr##"y\t{$src, $dst {${mask}}|$dst {${mask}}, $src}",
7881                  (!cast<Instruction>(NAME # "Z256rrk") VR128X:$dst,
7882                  VK4WM:$mask, VR256X:$src), 0, "att">;
7883  def : InstAlias<OpcodeStr##"y\t{$src, $dst {${mask}} {z}|$dst {${mask}} {z}, $src}",
7884                  (!cast<Instruction>(NAME # "Z256rrkz") VR128X:$dst,
7885                  VK4WM:$mask, VR256X:$src), 0, "att">;
7886  def : InstAlias<OpcodeStr##"y\t{${src}{1to4}, $dst|$dst, ${src}{1to4}}",
7887                  (!cast<Instruction>(NAME # "Z256rmb") VR128X:$dst,
7888                  f64mem:$src), 0, "att">;
7889  def : InstAlias<OpcodeStr##"y\t{${src}{1to4}, $dst {${mask}}|"
7890                  "$dst {${mask}}, ${src}{1to4}}",
7891                  (!cast<Instruction>(NAME # "Z256rmbk") VR128X:$dst,
7892                  VK4WM:$mask, f64mem:$src), 0, "att">;
7893  def : InstAlias<OpcodeStr##"y\t{${src}{1to4}, $dst {${mask}} {z}|"
7894                  "$dst {${mask}} {z}, ${src}{1to4}}",
7895                  (!cast<Instruction>(NAME # "Z256rmbkz") VR128X:$dst,
7896                  VK4WM:$mask, f64mem:$src), 0, "att">;
7897}
7898
7899// Convert Double to Signed/Unsigned Quardword
7900multiclass avx512_cvtpd2qq<bits<8> opc, string OpcodeStr, SDNode OpNode,
7901                           SDNode OpNodeRnd, X86SchedWriteWidths sched> {
7902  let Predicates = [HasDQI] in {
7903    defm Z : avx512_vcvt_fp<opc, OpcodeStr, v8i64_info, v8f64_info, OpNode,
7904                            sched.ZMM>,
7905             avx512_vcvt_fp_rc<opc, OpcodeStr, v8i64_info, v8f64_info,
7906                               OpNodeRnd, sched.ZMM>, EVEX_V512;
7907  }
7908  let Predicates = [HasDQI, HasVLX] in {
7909    defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v2i64x_info, v2f64x_info, OpNode,
7910                               sched.XMM>, EVEX_V128;
7911    defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v4i64x_info, v4f64x_info, OpNode,
7912                               sched.YMM>, EVEX_V256;
7913  }
7914}
7915
7916// Convert Double to Signed/Unsigned Quardword with truncation
7917multiclass avx512_cvttpd2qq<bits<8> opc, string OpcodeStr, SDNode OpNode,
7918                            SDNode OpNodeRnd, X86SchedWriteWidths sched> {
7919  let Predicates = [HasDQI] in {
7920    defm Z : avx512_vcvt_fp<opc, OpcodeStr, v8i64_info, v8f64_info, OpNode,
7921                            sched.ZMM>,
7922             avx512_vcvt_fp_sae<opc, OpcodeStr, v8i64_info, v8f64_info,
7923                                OpNodeRnd, sched.ZMM>, EVEX_V512;
7924  }
7925  let Predicates = [HasDQI, HasVLX] in {
7926    defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v2i64x_info, v2f64x_info, OpNode,
7927                               sched.XMM>, EVEX_V128;
7928    defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v4i64x_info, v4f64x_info, OpNode,
7929                               sched.YMM>, EVEX_V256;
7930  }
7931}
7932
7933// Convert Signed/Unsigned Quardword to Double
7934multiclass avx512_cvtqq2pd<bits<8> opc, string OpcodeStr, SDNode OpNode,
7935                           SDNode OpNodeRnd, X86SchedWriteWidths sched> {
7936  let Predicates = [HasDQI] in {
7937    defm Z : avx512_vcvt_fp<opc, OpcodeStr, v8f64_info, v8i64_info, OpNode,
7938                            sched.ZMM>,
7939             avx512_vcvt_fp_rc<opc, OpcodeStr, v8f64_info, v8i64_info,
7940                               OpNodeRnd, sched.ZMM>, EVEX_V512;
7941  }
7942  let Predicates = [HasDQI, HasVLX] in {
7943    defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v2f64x_info, v2i64x_info, OpNode,
7944                               sched.XMM>, EVEX_V128, NotEVEX2VEXConvertible;
7945    defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v4f64x_info, v4i64x_info, OpNode,
7946                               sched.YMM>, EVEX_V256, NotEVEX2VEXConvertible;
7947  }
7948}
7949
7950// Convert Float to Signed/Unsigned Quardword
7951multiclass avx512_cvtps2qq<bits<8> opc, string OpcodeStr, SDNode OpNode,
7952                           SDNode OpNodeRnd, X86SchedWriteWidths sched> {
7953  let Predicates = [HasDQI] in {
7954    defm Z : avx512_vcvt_fp<opc, OpcodeStr, v8i64_info, v8f32x_info, OpNode,
7955                            sched.ZMM>,
7956             avx512_vcvt_fp_rc<opc, OpcodeStr, v8i64_info, v8f32x_info,
7957                               OpNodeRnd, sched.ZMM>, EVEX_V512;
7958  }
7959  let Predicates = [HasDQI, HasVLX] in {
7960    // Explicitly specified broadcast string, since we take only 2 elements
7961    // from v4f32x_info source
7962    defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v2i64x_info, v4f32x_info, OpNode,
7963                               sched.XMM, "{1to2}", "", f64mem, VK2WM,
7964                               (v2i64 (OpNode (bc_v4f32
7965                                (v2f64
7966                                 (scalar_to_vector (loadf64 addr:$src))))))>,
7967                               EVEX_V128;
7968    defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v4i64x_info, v4f32x_info, OpNode,
7969                               sched.YMM>, EVEX_V256;
7970  }
7971}
7972
7973// Convert Float to Signed/Unsigned Quardword with truncation
7974multiclass avx512_cvttps2qq<bits<8> opc, string OpcodeStr, SDNode OpNode,
7975                            SDNode OpNodeRnd, X86SchedWriteWidths sched> {
7976  let Predicates = [HasDQI] in {
7977    defm Z : avx512_vcvt_fp<opc, OpcodeStr, v8i64_info, v8f32x_info, OpNode, sched.ZMM>,
7978             avx512_vcvt_fp_sae<opc, OpcodeStr, v8i64_info, v8f32x_info,
7979                                OpNodeRnd, sched.ZMM>, EVEX_V512;
7980  }
7981  let Predicates = [HasDQI, HasVLX] in {
7982    // Explicitly specified broadcast string, since we take only 2 elements
7983    // from v4f32x_info source
7984    defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v2i64x_info, v4f32x_info, OpNode,
7985                               sched.XMM, "{1to2}", "", f64mem, VK2WM,
7986                               (v2i64 (OpNode (bc_v4f32
7987                                (v2f64
7988                                 (scalar_to_vector (loadf64 addr:$src))))))>,
7989                               EVEX_V128;
7990    defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v4i64x_info, v4f32x_info, OpNode,
7991                               sched.YMM>, EVEX_V256;
7992  }
7993}
7994
7995// Convert Signed/Unsigned Quardword to Float
7996multiclass avx512_cvtqq2ps<bits<8> opc, string OpcodeStr, SDNode OpNode,
7997                           SDNode OpNodeRnd, X86SchedWriteWidths sched> {
7998  let Predicates = [HasDQI] in {
7999    defm Z : avx512_vcvt_fp<opc, OpcodeStr, v8f32x_info, v8i64_info, OpNode,
8000                            sched.ZMM>,
8001             avx512_vcvt_fp_rc<opc, OpcodeStr, v8f32x_info, v8i64_info,
8002                               OpNodeRnd, sched.ZMM>, EVEX_V512;
8003  }
8004  let Predicates = [HasDQI, HasVLX] in {
8005    // we need "x"/"y" suffixes in order to distinguish between 128 and 256
8006    // memory forms of these instructions in Asm Parcer. They have the same
8007    // dest type - 'v4i32x_info'. We also specify the broadcast string explicitly
8008    // due to the same reason.
8009    defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v4f32x_info, v2i64x_info, null_frag,
8010                               sched.XMM, "{1to2}", "{x}", i128mem, VK2WM>,
8011                               EVEX_V128, NotEVEX2VEXConvertible;
8012    defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v4f32x_info, v4i64x_info, OpNode,
8013                               sched.YMM, "{1to4}", "{y}">, EVEX_V256,
8014                               NotEVEX2VEXConvertible;
8015  }
8016
8017  def : InstAlias<OpcodeStr##"x\t{$src, $dst|$dst, $src}",
8018                  (!cast<Instruction>(NAME # "Z128rr") VR128X:$dst,
8019                  VR128X:$src), 0, "att">;
8020  def : InstAlias<OpcodeStr##"x\t{$src, $dst {${mask}}|$dst {${mask}}, $src}",
8021                  (!cast<Instruction>(NAME # "Z128rrk") VR128X:$dst,
8022                  VK2WM:$mask, VR128X:$src), 0, "att">;
8023  def : InstAlias<OpcodeStr##"x\t{$src, $dst {${mask}} {z}|$dst {${mask}} {z}, $src}",
8024                  (!cast<Instruction>(NAME # "Z128rrkz") VR128X:$dst,
8025                  VK2WM:$mask, VR128X:$src), 0, "att">;
8026  def : InstAlias<OpcodeStr##"x\t{${src}{1to2}, $dst|$dst, ${src}{1to2}}",
8027                  (!cast<Instruction>(NAME # "Z128rmb") VR128X:$dst,
8028                  i64mem:$src), 0, "att">;
8029  def : InstAlias<OpcodeStr##"x\t{${src}{1to2}, $dst {${mask}}|"
8030                  "$dst {${mask}}, ${src}{1to2}}",
8031                  (!cast<Instruction>(NAME # "Z128rmbk") VR128X:$dst,
8032                  VK2WM:$mask, i64mem:$src), 0, "att">;
8033  def : InstAlias<OpcodeStr##"x\t{${src}{1to2}, $dst {${mask}} {z}|"
8034                  "$dst {${mask}} {z}, ${src}{1to2}}",
8035                  (!cast<Instruction>(NAME # "Z128rmbkz") VR128X:$dst,
8036                  VK2WM:$mask, i64mem:$src), 0, "att">;
8037
8038  def : InstAlias<OpcodeStr##"y\t{$src, $dst|$dst, $src}",
8039                  (!cast<Instruction>(NAME # "Z256rr") VR128X:$dst,
8040                  VR256X:$src), 0, "att">;
8041  def : InstAlias<OpcodeStr##"y\t{$src, $dst {${mask}}|"
8042                  "$dst {${mask}}, $src}",
8043                  (!cast<Instruction>(NAME # "Z256rrk") VR128X:$dst,
8044                  VK4WM:$mask, VR256X:$src), 0, "att">;
8045  def : InstAlias<OpcodeStr##"y\t{$src, $dst {${mask}} {z}|"
8046                  "$dst {${mask}} {z}, $src}",
8047                  (!cast<Instruction>(NAME # "Z256rrkz") VR128X:$dst,
8048                  VK4WM:$mask, VR256X:$src), 0, "att">;
8049  def : InstAlias<OpcodeStr##"y\t{${src}{1to4}, $dst|$dst, ${src}{1to4}}",
8050                  (!cast<Instruction>(NAME # "Z256rmb") VR128X:$dst,
8051                  i64mem:$src), 0, "att">;
8052  def : InstAlias<OpcodeStr##"y\t{${src}{1to4}, $dst {${mask}}|"
8053                  "$dst {${mask}}, ${src}{1to4}}",
8054                  (!cast<Instruction>(NAME # "Z256rmbk") VR128X:$dst,
8055                  VK4WM:$mask, i64mem:$src), 0, "att">;
8056  def : InstAlias<OpcodeStr##"y\t{${src}{1to4}, $dst {${mask}} {z}|"
8057                  "$dst {${mask}} {z}, ${src}{1to4}}",
8058                  (!cast<Instruction>(NAME # "Z256rmbkz") VR128X:$dst,
8059                  VK4WM:$mask, i64mem:$src), 0, "att">;
8060}
8061
8062defm VCVTDQ2PD : avx512_cvtdq2pd<0xE6, "vcvtdq2pd", any_sint_to_fp, X86any_VSintToFP,
8063                                 SchedWriteCvtDQ2PD>, XS, EVEX_CD8<32, CD8VH>;
8064
8065defm VCVTDQ2PS : avx512_cvtdq2ps<0x5B, "vcvtdq2ps", any_sint_to_fp,
8066                                X86VSintToFpRnd, SchedWriteCvtDQ2PS>,
8067                                PS, EVEX_CD8<32, CD8VF>;
8068
8069defm VCVTTPS2DQ : avx512_cvttps2dq<0x5B, "vcvttps2dq", X86any_cvttp2si,
8070                                X86cvttp2siSAE, SchedWriteCvtPS2DQ>,
8071                                XS, EVEX_CD8<32, CD8VF>;
8072
8073defm VCVTTPD2DQ : avx512_cvttpd2dq<0xE6, "vcvttpd2dq", X86any_cvttp2si,
8074                                 X86cvttp2siSAE, SchedWriteCvtPD2DQ>,
8075                                 PD, VEX_W, EVEX_CD8<64, CD8VF>;
8076
8077defm VCVTTPS2UDQ : avx512_cvttps2dq<0x78, "vcvttps2udq", X86any_cvttp2ui,
8078                                 X86cvttp2uiSAE, SchedWriteCvtPS2DQ>, PS,
8079                                 EVEX_CD8<32, CD8VF>;
8080
8081defm VCVTTPD2UDQ : avx512_cvttpd2dq<0x78, "vcvttpd2udq", X86any_cvttp2ui,
8082                                 X86cvttp2uiSAE, SchedWriteCvtPD2DQ>,
8083                                 PS, VEX_W, EVEX_CD8<64, CD8VF>;
8084
8085defm VCVTUDQ2PD : avx512_cvtdq2pd<0x7A, "vcvtudq2pd", any_uint_to_fp,
8086                                  X86any_VUintToFP, SchedWriteCvtDQ2PD>, XS,
8087                                  EVEX_CD8<32, CD8VH>;
8088
8089defm VCVTUDQ2PS : avx512_cvtdq2ps<0x7A, "vcvtudq2ps", any_uint_to_fp,
8090                                 X86VUintToFpRnd, SchedWriteCvtDQ2PS>, XD,
8091                                 EVEX_CD8<32, CD8VF>;
8092
8093defm VCVTPS2DQ : avx512_cvtps2dq<0x5B, "vcvtps2dq", X86cvtp2Int,
8094                                 X86cvtp2IntRnd, SchedWriteCvtPS2DQ>, PD,
8095                                 EVEX_CD8<32, CD8VF>;
8096
8097defm VCVTPD2DQ : avx512_cvtpd2dq<0xE6, "vcvtpd2dq", X86cvtp2Int,
8098                                 X86cvtp2IntRnd, SchedWriteCvtPD2DQ>, XD,
8099                                 VEX_W, EVEX_CD8<64, CD8VF>;
8100
8101defm VCVTPS2UDQ : avx512_cvtps2dq<0x79, "vcvtps2udq", X86cvtp2UInt,
8102                                 X86cvtp2UIntRnd, SchedWriteCvtPS2DQ>,
8103                                 PS, EVEX_CD8<32, CD8VF>;
8104
8105defm VCVTPD2UDQ : avx512_cvtpd2dq<0x79, "vcvtpd2udq", X86cvtp2UInt,
8106                                 X86cvtp2UIntRnd, SchedWriteCvtPD2DQ>, VEX_W,
8107                                 PS, EVEX_CD8<64, CD8VF>;
8108
8109defm VCVTPD2QQ : avx512_cvtpd2qq<0x7B, "vcvtpd2qq", X86cvtp2Int,
8110                                 X86cvtp2IntRnd, SchedWriteCvtPD2DQ>, VEX_W,
8111                                 PD, EVEX_CD8<64, CD8VF>;
8112
8113defm VCVTPS2QQ : avx512_cvtps2qq<0x7B, "vcvtps2qq", X86cvtp2Int,
8114                                 X86cvtp2IntRnd, SchedWriteCvtPS2DQ>, PD,
8115                                 EVEX_CD8<32, CD8VH>;
8116
8117defm VCVTPD2UQQ : avx512_cvtpd2qq<0x79, "vcvtpd2uqq", X86cvtp2UInt,
8118                                 X86cvtp2UIntRnd, SchedWriteCvtPD2DQ>, VEX_W,
8119                                 PD, EVEX_CD8<64, CD8VF>;
8120
8121defm VCVTPS2UQQ : avx512_cvtps2qq<0x79, "vcvtps2uqq", X86cvtp2UInt,
8122                                 X86cvtp2UIntRnd, SchedWriteCvtPS2DQ>, PD,
8123                                 EVEX_CD8<32, CD8VH>;
8124
8125defm VCVTTPD2QQ : avx512_cvttpd2qq<0x7A, "vcvttpd2qq", X86any_cvttp2si,
8126                                 X86cvttp2siSAE, SchedWriteCvtPD2DQ>, VEX_W,
8127                                 PD, EVEX_CD8<64, CD8VF>;
8128
8129defm VCVTTPS2QQ : avx512_cvttps2qq<0x7A, "vcvttps2qq", X86any_cvttp2si,
8130                                 X86cvttp2siSAE, SchedWriteCvtPS2DQ>, PD,
8131                                 EVEX_CD8<32, CD8VH>;
8132
8133defm VCVTTPD2UQQ : avx512_cvttpd2qq<0x78, "vcvttpd2uqq", X86any_cvttp2ui,
8134                                 X86cvttp2uiSAE, SchedWriteCvtPD2DQ>, VEX_W,
8135                                 PD, EVEX_CD8<64, CD8VF>;
8136
8137defm VCVTTPS2UQQ : avx512_cvttps2qq<0x78, "vcvttps2uqq", X86any_cvttp2ui,
8138                                 X86cvttp2uiSAE, SchedWriteCvtPS2DQ>, PD,
8139                                 EVEX_CD8<32, CD8VH>;
8140
8141defm VCVTQQ2PD : avx512_cvtqq2pd<0xE6, "vcvtqq2pd", any_sint_to_fp,
8142                            X86VSintToFpRnd, SchedWriteCvtDQ2PD>, VEX_W, XS,
8143                            EVEX_CD8<64, CD8VF>;
8144
8145defm VCVTUQQ2PD : avx512_cvtqq2pd<0x7A, "vcvtuqq2pd", any_uint_to_fp,
8146                            X86VUintToFpRnd, SchedWriteCvtDQ2PD>, VEX_W, XS,
8147                            EVEX_CD8<64, CD8VF>;
8148
8149defm VCVTQQ2PS : avx512_cvtqq2ps<0x5B, "vcvtqq2ps", any_sint_to_fp,
8150                            X86VSintToFpRnd, SchedWriteCvtDQ2PS>, VEX_W, PS,
8151                            EVEX_CD8<64, CD8VF>;
8152
8153defm VCVTUQQ2PS : avx512_cvtqq2ps<0x7A, "vcvtuqq2ps", any_uint_to_fp,
8154                            X86VUintToFpRnd, SchedWriteCvtDQ2PS>, VEX_W, XD,
8155                            EVEX_CD8<64, CD8VF>;
8156
8157let Predicates = [HasVLX] in {
8158  // Special patterns to allow use of X86mcvtp2Int for masking. Instruction
8159  // patterns have been disabled with null_frag.
8160  def : Pat<(v4i32 (X86cvtp2Int (v2f64 VR128X:$src))),
8161            (VCVTPD2DQZ128rr VR128X:$src)>;
8162  def : Pat<(X86mcvtp2Int (v2f64 VR128X:$src), (v4i32 VR128X:$src0),
8163                          VK2WM:$mask),
8164            (VCVTPD2DQZ128rrk VR128X:$src0, VK2WM:$mask, VR128X:$src)>;
8165  def : Pat<(X86mcvtp2Int (v2f64 VR128X:$src), v4i32x_info.ImmAllZerosV,
8166                          VK2WM:$mask),
8167            (VCVTPD2DQZ128rrkz VK2WM:$mask, VR128X:$src)>;
8168
8169  def : Pat<(v4i32 (X86cvtp2Int (loadv2f64 addr:$src))),
8170            (VCVTPD2DQZ128rm addr:$src)>;
8171  def : Pat<(X86mcvtp2Int (loadv2f64 addr:$src), (v4i32 VR128X:$src0),
8172                          VK2WM:$mask),
8173            (VCVTPD2DQZ128rmk VR128X:$src0, VK2WM:$mask, addr:$src)>;
8174  def : Pat<(X86mcvtp2Int (loadv2f64 addr:$src), v4i32x_info.ImmAllZerosV,
8175                          VK2WM:$mask),
8176            (VCVTPD2DQZ128rmkz VK2WM:$mask, addr:$src)>;
8177
8178  def : Pat<(v4i32 (X86cvtp2Int (v2f64 (X86VBroadcastld64 addr:$src)))),
8179            (VCVTPD2DQZ128rmb addr:$src)>;
8180  def : Pat<(X86mcvtp2Int (v2f64 (X86VBroadcastld64 addr:$src)),
8181                          (v4i32 VR128X:$src0), VK2WM:$mask),
8182            (VCVTPD2DQZ128rmbk VR128X:$src0, VK2WM:$mask, addr:$src)>;
8183  def : Pat<(X86mcvtp2Int (v2f64 (X86VBroadcastld64 addr:$src)),
8184                          v4i32x_info.ImmAllZerosV, VK2WM:$mask),
8185            (VCVTPD2DQZ128rmbkz VK2WM:$mask, addr:$src)>;
8186
8187  // Special patterns to allow use of X86mcvttp2si for masking. Instruction
8188  // patterns have been disabled with null_frag.
8189  def : Pat<(v4i32 (X86any_cvttp2si (v2f64 VR128X:$src))),
8190            (VCVTTPD2DQZ128rr VR128X:$src)>;
8191  def : Pat<(X86mcvttp2si (v2f64 VR128X:$src), (v4i32 VR128X:$src0),
8192                          VK2WM:$mask),
8193            (VCVTTPD2DQZ128rrk VR128X:$src0, VK2WM:$mask, VR128X:$src)>;
8194  def : Pat<(X86mcvttp2si (v2f64 VR128X:$src), v4i32x_info.ImmAllZerosV,
8195                          VK2WM:$mask),
8196            (VCVTTPD2DQZ128rrkz VK2WM:$mask, VR128X:$src)>;
8197
8198  def : Pat<(v4i32 (X86any_cvttp2si (loadv2f64 addr:$src))),
8199            (VCVTTPD2DQZ128rm addr:$src)>;
8200  def : Pat<(X86mcvttp2si (loadv2f64 addr:$src), (v4i32 VR128X:$src0),
8201                          VK2WM:$mask),
8202            (VCVTTPD2DQZ128rmk VR128X:$src0, VK2WM:$mask, addr:$src)>;
8203  def : Pat<(X86mcvttp2si (loadv2f64 addr:$src), v4i32x_info.ImmAllZerosV,
8204                          VK2WM:$mask),
8205            (VCVTTPD2DQZ128rmkz VK2WM:$mask, addr:$src)>;
8206
8207  def : Pat<(v4i32 (X86any_cvttp2si (v2f64 (X86VBroadcastld64 addr:$src)))),
8208            (VCVTTPD2DQZ128rmb addr:$src)>;
8209  def : Pat<(X86mcvttp2si (v2f64 (X86VBroadcastld64 addr:$src)),
8210                          (v4i32 VR128X:$src0), VK2WM:$mask),
8211            (VCVTTPD2DQZ128rmbk VR128X:$src0, VK2WM:$mask, addr:$src)>;
8212  def : Pat<(X86mcvttp2si (v2f64 (X86VBroadcastld64 addr:$src)),
8213                          v4i32x_info.ImmAllZerosV, VK2WM:$mask),
8214            (VCVTTPD2DQZ128rmbkz VK2WM:$mask, addr:$src)>;
8215
8216  // Special patterns to allow use of X86mcvtp2UInt for masking. Instruction
8217  // patterns have been disabled with null_frag.
8218  def : Pat<(v4i32 (X86cvtp2UInt (v2f64 VR128X:$src))),
8219            (VCVTPD2UDQZ128rr VR128X:$src)>;
8220  def : Pat<(X86mcvtp2UInt (v2f64 VR128X:$src), (v4i32 VR128X:$src0),
8221                           VK2WM:$mask),
8222            (VCVTPD2UDQZ128rrk VR128X:$src0, VK2WM:$mask, VR128X:$src)>;
8223  def : Pat<(X86mcvtp2UInt (v2f64 VR128X:$src), v4i32x_info.ImmAllZerosV,
8224                           VK2WM:$mask),
8225            (VCVTPD2UDQZ128rrkz VK2WM:$mask, VR128X:$src)>;
8226
8227  def : Pat<(v4i32 (X86cvtp2UInt (loadv2f64 addr:$src))),
8228            (VCVTPD2UDQZ128rm addr:$src)>;
8229  def : Pat<(X86mcvtp2UInt (loadv2f64 addr:$src), (v4i32 VR128X:$src0),
8230                           VK2WM:$mask),
8231            (VCVTPD2UDQZ128rmk VR128X:$src0, VK2WM:$mask, addr:$src)>;
8232  def : Pat<(X86mcvtp2UInt (loadv2f64 addr:$src), v4i32x_info.ImmAllZerosV,
8233                           VK2WM:$mask),
8234            (VCVTPD2UDQZ128rmkz VK2WM:$mask, addr:$src)>;
8235
8236  def : Pat<(v4i32 (X86cvtp2UInt (v2f64 (X86VBroadcastld64 addr:$src)))),
8237            (VCVTPD2UDQZ128rmb addr:$src)>;
8238  def : Pat<(X86mcvtp2UInt (v2f64 (X86VBroadcastld64 addr:$src)),
8239                           (v4i32 VR128X:$src0), VK2WM:$mask),
8240            (VCVTPD2UDQZ128rmbk VR128X:$src0, VK2WM:$mask, addr:$src)>;
8241  def : Pat<(X86mcvtp2UInt (v2f64 (X86VBroadcastld64 addr:$src)),
8242                           v4i32x_info.ImmAllZerosV, VK2WM:$mask),
8243            (VCVTPD2UDQZ128rmbkz VK2WM:$mask, addr:$src)>;
8244
8245  // Special patterns to allow use of X86mcvtp2UInt for masking. Instruction
8246  // patterns have been disabled with null_frag.
8247  def : Pat<(v4i32 (X86any_cvttp2ui (v2f64 VR128X:$src))),
8248            (VCVTTPD2UDQZ128rr VR128X:$src)>;
8249  def : Pat<(X86mcvttp2ui (v2f64 VR128X:$src), (v4i32 VR128X:$src0),
8250                          VK2WM:$mask),
8251            (VCVTTPD2UDQZ128rrk VR128X:$src0, VK2WM:$mask, VR128X:$src)>;
8252  def : Pat<(X86mcvttp2ui (v2f64 VR128X:$src), v4i32x_info.ImmAllZerosV,
8253                          VK2WM:$mask),
8254            (VCVTTPD2UDQZ128rrkz VK2WM:$mask, VR128X:$src)>;
8255
8256  def : Pat<(v4i32 (X86any_cvttp2ui (loadv2f64 addr:$src))),
8257            (VCVTTPD2UDQZ128rm addr:$src)>;
8258  def : Pat<(X86mcvttp2ui (loadv2f64 addr:$src), (v4i32 VR128X:$src0),
8259                          VK2WM:$mask),
8260            (VCVTTPD2UDQZ128rmk VR128X:$src0, VK2WM:$mask, addr:$src)>;
8261  def : Pat<(X86mcvttp2ui (loadv2f64 addr:$src), v4i32x_info.ImmAllZerosV,
8262                          VK2WM:$mask),
8263            (VCVTTPD2UDQZ128rmkz VK2WM:$mask, addr:$src)>;
8264
8265  def : Pat<(v4i32 (X86any_cvttp2ui (v2f64 (X86VBroadcastld64 addr:$src)))),
8266            (VCVTTPD2UDQZ128rmb addr:$src)>;
8267  def : Pat<(X86mcvttp2ui (v2f64 (X86VBroadcastld64 addr:$src)),
8268                          (v4i32 VR128X:$src0), VK2WM:$mask),
8269            (VCVTTPD2UDQZ128rmbk VR128X:$src0, VK2WM:$mask, addr:$src)>;
8270  def : Pat<(X86mcvttp2ui (v2f64 (X86VBroadcastld64 addr:$src)),
8271                          v4i32x_info.ImmAllZerosV, VK2WM:$mask),
8272            (VCVTTPD2UDQZ128rmbkz VK2WM:$mask, addr:$src)>;
8273}
8274
8275let Predicates = [HasDQI, HasVLX] in {
8276  def : Pat<(v2i64 (X86cvtp2Int (bc_v4f32 (v2f64 (X86vzload64 addr:$src))))),
8277            (VCVTPS2QQZ128rm addr:$src)>;
8278  def : Pat<(v2i64 (vselect VK2WM:$mask,
8279                            (X86cvtp2Int (bc_v4f32 (v2f64 (X86vzload64 addr:$src)))),
8280                            VR128X:$src0)),
8281            (VCVTPS2QQZ128rmk VR128X:$src0, VK2WM:$mask, addr:$src)>;
8282  def : Pat<(v2i64 (vselect VK2WM:$mask,
8283                            (X86cvtp2Int (bc_v4f32 (v2f64 (X86vzload64 addr:$src)))),
8284                            v2i64x_info.ImmAllZerosV)),
8285            (VCVTPS2QQZ128rmkz VK2WM:$mask, addr:$src)>;
8286
8287  def : Pat<(v2i64 (X86cvtp2UInt (bc_v4f32 (v2f64 (X86vzload64 addr:$src))))),
8288            (VCVTPS2UQQZ128rm addr:$src)>;
8289  def : Pat<(v2i64 (vselect VK2WM:$mask,
8290                            (X86cvtp2UInt (bc_v4f32 (v2f64 (X86vzload64 addr:$src)))),
8291                            VR128X:$src0)),
8292            (VCVTPS2UQQZ128rmk VR128X:$src0, VK2WM:$mask, addr:$src)>;
8293  def : Pat<(v2i64 (vselect VK2WM:$mask,
8294                            (X86cvtp2UInt (bc_v4f32 (v2f64 (X86vzload64 addr:$src)))),
8295                            v2i64x_info.ImmAllZerosV)),
8296            (VCVTPS2UQQZ128rmkz VK2WM:$mask, addr:$src)>;
8297
8298  def : Pat<(v2i64 (X86any_cvttp2si (bc_v4f32 (v2f64 (X86vzload64 addr:$src))))),
8299            (VCVTTPS2QQZ128rm addr:$src)>;
8300  def : Pat<(v2i64 (vselect VK2WM:$mask,
8301                            (X86cvttp2si (bc_v4f32 (v2f64 (X86vzload64 addr:$src)))),
8302                            VR128X:$src0)),
8303            (VCVTTPS2QQZ128rmk VR128X:$src0, VK2WM:$mask, addr:$src)>;
8304  def : Pat<(v2i64 (vselect VK2WM:$mask,
8305                            (X86cvttp2si (bc_v4f32 (v2f64 (X86vzload64 addr:$src)))),
8306                            v2i64x_info.ImmAllZerosV)),
8307            (VCVTTPS2QQZ128rmkz VK2WM:$mask, addr:$src)>;
8308
8309  def : Pat<(v2i64 (X86any_cvttp2ui (bc_v4f32 (v2f64 (X86vzload64 addr:$src))))),
8310            (VCVTTPS2UQQZ128rm addr:$src)>;
8311  def : Pat<(v2i64 (vselect VK2WM:$mask,
8312                            (X86cvttp2ui (bc_v4f32 (v2f64 (X86vzload64 addr:$src)))),
8313                            VR128X:$src0)),
8314            (VCVTTPS2UQQZ128rmk VR128X:$src0, VK2WM:$mask, addr:$src)>;
8315  def : Pat<(v2i64 (vselect VK2WM:$mask,
8316                            (X86cvttp2ui (bc_v4f32 (v2f64 (X86vzload64 addr:$src)))),
8317                            v2i64x_info.ImmAllZerosV)),
8318            (VCVTTPS2UQQZ128rmkz VK2WM:$mask, addr:$src)>;
8319}
8320
8321let Predicates = [HasVLX] in {
8322  def : Pat<(v2f64 (X86any_VSintToFP (bc_v4i32 (v2i64 (X86vzload64 addr:$src))))),
8323            (VCVTDQ2PDZ128rm addr:$src)>;
8324  def : Pat<(v2f64 (vselect VK2WM:$mask,
8325                            (X86any_VSintToFP (bc_v4i32 (v2i64 (X86vzload64 addr:$src)))),
8326                            VR128X:$src0)),
8327            (VCVTDQ2PDZ128rmk VR128X:$src0, VK2WM:$mask, addr:$src)>;
8328  def : Pat<(v2f64 (vselect VK2WM:$mask,
8329                            (X86any_VSintToFP (bc_v4i32 (v2i64 (X86vzload64 addr:$src)))),
8330                            v2f64x_info.ImmAllZerosV)),
8331            (VCVTDQ2PDZ128rmkz VK2WM:$mask, addr:$src)>;
8332
8333  def : Pat<(v2f64 (X86any_VUintToFP (bc_v4i32 (v2i64 (X86vzload64 addr:$src))))),
8334            (VCVTUDQ2PDZ128rm addr:$src)>;
8335  def : Pat<(v2f64 (vselect VK2WM:$mask,
8336                            (X86any_VUintToFP (bc_v4i32 (v2i64 (X86vzload64 addr:$src)))),
8337                            VR128X:$src0)),
8338            (VCVTUDQ2PDZ128rmk VR128X:$src0, VK2WM:$mask, addr:$src)>;
8339  def : Pat<(v2f64 (vselect VK2WM:$mask,
8340                            (X86any_VUintToFP (bc_v4i32 (v2i64 (X86vzload64 addr:$src)))),
8341                            v2f64x_info.ImmAllZerosV)),
8342            (VCVTUDQ2PDZ128rmkz VK2WM:$mask, addr:$src)>;
8343}
8344
8345let Predicates = [HasDQI, HasVLX] in {
8346  // Special patterns to allow use of X86VMSintToFP for masking. Instruction
8347  // patterns have been disabled with null_frag.
8348  def : Pat<(v4f32 (X86any_VSintToFP (v2i64 VR128X:$src))),
8349            (VCVTQQ2PSZ128rr VR128X:$src)>;
8350  def : Pat<(X86VMSintToFP (v2i64 VR128X:$src), (v4f32 VR128X:$src0),
8351                           VK2WM:$mask),
8352            (VCVTQQ2PSZ128rrk VR128X:$src0, VK2WM:$mask, VR128X:$src)>;
8353  def : Pat<(X86VMSintToFP (v2i64 VR128X:$src), v4f32x_info.ImmAllZerosV,
8354                           VK2WM:$mask),
8355            (VCVTQQ2PSZ128rrkz VK2WM:$mask, VR128X:$src)>;
8356
8357  def : Pat<(v4f32 (X86any_VSintToFP (loadv2i64 addr:$src))),
8358            (VCVTQQ2PSZ128rm addr:$src)>;
8359  def : Pat<(X86VMSintToFP (loadv2i64 addr:$src), (v4f32 VR128X:$src0),
8360                           VK2WM:$mask),
8361            (VCVTQQ2PSZ128rmk VR128X:$src0, VK2WM:$mask, addr:$src)>;
8362  def : Pat<(X86VMSintToFP (loadv2i64 addr:$src), v4f32x_info.ImmAllZerosV,
8363                           VK2WM:$mask),
8364            (VCVTQQ2PSZ128rmkz VK2WM:$mask, addr:$src)>;
8365
8366  def : Pat<(v4f32 (X86any_VSintToFP (v2i64 (X86VBroadcastld64 addr:$src)))),
8367            (VCVTQQ2PSZ128rmb addr:$src)>;
8368  def : Pat<(X86VMSintToFP (v2i64 (X86VBroadcastld64 addr:$src)),
8369                           (v4f32 VR128X:$src0), VK2WM:$mask),
8370            (VCVTQQ2PSZ128rmbk VR128X:$src0, VK2WM:$mask, addr:$src)>;
8371  def : Pat<(X86VMSintToFP (v2i64 (X86VBroadcastld64 addr:$src)),
8372                           v4f32x_info.ImmAllZerosV, VK2WM:$mask),
8373            (VCVTQQ2PSZ128rmbkz VK2WM:$mask, addr:$src)>;
8374
8375  // Special patterns to allow use of X86VMUintToFP for masking. Instruction
8376  // patterns have been disabled with null_frag.
8377  def : Pat<(v4f32 (X86any_VUintToFP (v2i64 VR128X:$src))),
8378            (VCVTUQQ2PSZ128rr VR128X:$src)>;
8379  def : Pat<(X86VMUintToFP (v2i64 VR128X:$src), (v4f32 VR128X:$src0),
8380                           VK2WM:$mask),
8381            (VCVTUQQ2PSZ128rrk VR128X:$src0, VK2WM:$mask, VR128X:$src)>;
8382  def : Pat<(X86VMUintToFP (v2i64 VR128X:$src), v4f32x_info.ImmAllZerosV,
8383                           VK2WM:$mask),
8384            (VCVTUQQ2PSZ128rrkz VK2WM:$mask, VR128X:$src)>;
8385
8386  def : Pat<(v4f32 (X86any_VUintToFP (loadv2i64 addr:$src))),
8387            (VCVTUQQ2PSZ128rm addr:$src)>;
8388  def : Pat<(X86VMUintToFP (loadv2i64 addr:$src), (v4f32 VR128X:$src0),
8389                           VK2WM:$mask),
8390            (VCVTUQQ2PSZ128rmk VR128X:$src0, VK2WM:$mask, addr:$src)>;
8391  def : Pat<(X86VMUintToFP (loadv2i64 addr:$src), v4f32x_info.ImmAllZerosV,
8392                           VK2WM:$mask),
8393            (VCVTUQQ2PSZ128rmkz VK2WM:$mask, addr:$src)>;
8394
8395  def : Pat<(v4f32 (X86any_VUintToFP (v2i64 (X86VBroadcastld64 addr:$src)))),
8396            (VCVTUQQ2PSZ128rmb addr:$src)>;
8397  def : Pat<(X86VMUintToFP (v2i64 (X86VBroadcastld64 addr:$src)),
8398                           (v4f32 VR128X:$src0), VK2WM:$mask),
8399            (VCVTUQQ2PSZ128rmbk VR128X:$src0, VK2WM:$mask, addr:$src)>;
8400  def : Pat<(X86VMUintToFP (v2i64 (X86VBroadcastld64 addr:$src)),
8401                           v4f32x_info.ImmAllZerosV, VK2WM:$mask),
8402            (VCVTUQQ2PSZ128rmbkz VK2WM:$mask, addr:$src)>;
8403}
8404
8405//===----------------------------------------------------------------------===//
8406// Half precision conversion instructions
8407//===----------------------------------------------------------------------===//
8408
8409let Uses = [MXCSR], mayRaiseFPException = 1 in
8410multiclass avx512_cvtph2ps<X86VectorVTInfo _dest, X86VectorVTInfo _src,
8411                           X86MemOperand x86memop, PatFrag ld_frag,
8412                           X86FoldableSchedWrite sched> {
8413  defm rr : AVX512_maskable<0x13, MRMSrcReg, _dest ,(outs _dest.RC:$dst),
8414                            (ins _src.RC:$src), "vcvtph2ps", "$src", "$src",
8415                            (X86cvtph2ps (_src.VT _src.RC:$src))>,
8416                            T8PD, Sched<[sched]>;
8417  defm rm : AVX512_maskable<0x13, MRMSrcMem, _dest, (outs _dest.RC:$dst),
8418                            (ins x86memop:$src), "vcvtph2ps", "$src", "$src",
8419                            (X86cvtph2ps (_src.VT
8420                                          (ld_frag addr:$src)))>,
8421                            T8PD, Sched<[sched.Folded]>;
8422}
8423
8424multiclass avx512_cvtph2ps_sae<X86VectorVTInfo _dest, X86VectorVTInfo _src,
8425                               X86FoldableSchedWrite sched> {
8426  let Uses = [MXCSR] in
8427  defm rrb : AVX512_maskable<0x13, MRMSrcReg, _dest, (outs _dest.RC:$dst),
8428                             (ins _src.RC:$src), "vcvtph2ps",
8429                             "{sae}, $src", "$src, {sae}",
8430                             (X86cvtph2psSAE (_src.VT _src.RC:$src))>,
8431                             T8PD, EVEX_B, Sched<[sched]>;
8432}
8433
8434let Predicates = [HasAVX512] in
8435  defm VCVTPH2PSZ : avx512_cvtph2ps<v16f32_info, v16i16x_info, f256mem, load,
8436                                    WriteCvtPH2PSZ>,
8437                    avx512_cvtph2ps_sae<v16f32_info, v16i16x_info, WriteCvtPH2PSZ>,
8438                    EVEX, EVEX_V512, EVEX_CD8<32, CD8VH>;
8439
8440let Predicates = [HasVLX] in {
8441  defm VCVTPH2PSZ256 : avx512_cvtph2ps<v8f32x_info, v8i16x_info, f128mem,
8442                       load, WriteCvtPH2PSY>, EVEX, EVEX_V256,
8443                       EVEX_CD8<32, CD8VH>;
8444  defm VCVTPH2PSZ128 : avx512_cvtph2ps<v4f32x_info, v8i16x_info, f64mem,
8445                       load, WriteCvtPH2PS>, EVEX, EVEX_V128,
8446                       EVEX_CD8<32, CD8VH>;
8447
8448  // Pattern match vcvtph2ps of a scalar i64 load.
8449  def : Pat<(v4f32 (X86cvtph2ps (bc_v8i16 (v2i64 (X86vzload64 addr:$src))))),
8450            (VCVTPH2PSZ128rm addr:$src)>;
8451  def : Pat<(v4f32 (X86cvtph2ps (v8i16 (bitconvert
8452              (v2i64 (scalar_to_vector (loadi64 addr:$src))))))),
8453            (VCVTPH2PSZ128rm addr:$src)>;
8454}
8455
8456multiclass avx512_cvtps2ph<X86VectorVTInfo _dest, X86VectorVTInfo _src,
8457                           X86MemOperand x86memop, SchedWrite RR, SchedWrite MR> {
8458let ExeDomain = GenericDomain, Uses = [MXCSR], mayRaiseFPException = 1 in {
8459  def rr : AVX512AIi8<0x1D, MRMDestReg, (outs _dest.RC:$dst),
8460             (ins _src.RC:$src1, i32u8imm:$src2),
8461             "vcvtps2ph\t{$src2, $src1, $dst|$dst, $src1, $src2}",
8462             [(set _dest.RC:$dst,
8463                   (X86cvtps2ph (_src.VT _src.RC:$src1), (i32 timm:$src2)))]>,
8464             Sched<[RR]>;
8465  let Constraints = "$src0 = $dst" in
8466  def rrk : AVX512AIi8<0x1D, MRMDestReg, (outs _dest.RC:$dst),
8467             (ins _dest.RC:$src0, _src.KRCWM:$mask, _src.RC:$src1, i32u8imm:$src2),
8468             "vcvtps2ph\t{$src2, $src1, $dst {${mask}}|$dst {${mask}}, $src1, $src2}",
8469             [(set _dest.RC:$dst,
8470                   (X86mcvtps2ph (_src.VT _src.RC:$src1), (i32 timm:$src2),
8471                                 _dest.RC:$src0, _src.KRCWM:$mask))]>,
8472             Sched<[RR]>, EVEX_K;
8473  def rrkz : AVX512AIi8<0x1D, MRMDestReg, (outs _dest.RC:$dst),
8474             (ins _src.KRCWM:$mask, _src.RC:$src1, i32u8imm:$src2),
8475             "vcvtps2ph\t{$src2, $src1, $dst {${mask}} {z}|$dst {${mask}} {z}, $src1, $src2}",
8476             [(set _dest.RC:$dst,
8477                   (X86mcvtps2ph (_src.VT _src.RC:$src1), (i32 timm:$src2),
8478                                 _dest.ImmAllZerosV, _src.KRCWM:$mask))]>,
8479             Sched<[RR]>, EVEX_KZ;
8480  let hasSideEffects = 0, mayStore = 1 in {
8481    def mr : AVX512AIi8<0x1D, MRMDestMem, (outs),
8482               (ins x86memop:$dst, _src.RC:$src1, i32u8imm:$src2),
8483               "vcvtps2ph\t{$src2, $src1, $dst|$dst, $src1, $src2}", []>,
8484               Sched<[MR]>;
8485    def mrk : AVX512AIi8<0x1D, MRMDestMem, (outs),
8486               (ins x86memop:$dst, _dest.KRCWM:$mask, _src.RC:$src1, i32u8imm:$src2),
8487               "vcvtps2ph\t{$src2, $src1, $dst {${mask}}|$dst {${mask}}, $src1, $src2}", []>,
8488                EVEX_K, Sched<[MR]>, NotMemoryFoldable;
8489  }
8490}
8491}
8492
8493multiclass avx512_cvtps2ph_sae<X86VectorVTInfo _dest, X86VectorVTInfo _src,
8494                               SchedWrite Sched> {
8495  let hasSideEffects = 0, Uses = [MXCSR] in
8496  defm rrb : AVX512_maskable_in_asm<0x1D, MRMDestReg, _dest,
8497                   (outs _dest.RC:$dst),
8498                   (ins _src.RC:$src1, i32u8imm:$src2),
8499                   "vcvtps2ph", "$src2, {sae}, $src1", "$src1, {sae}, $src2", []>,
8500                   EVEX_B, AVX512AIi8Base, Sched<[Sched]>;
8501}
8502
8503let Predicates = [HasAVX512] in {
8504  defm VCVTPS2PHZ : avx512_cvtps2ph<v16i16x_info, v16f32_info, f256mem,
8505                                    WriteCvtPS2PHZ, WriteCvtPS2PHZSt>,
8506                    avx512_cvtps2ph_sae<v16i16x_info, v16f32_info, WriteCvtPS2PHZ>,
8507                                        EVEX, EVEX_V512, EVEX_CD8<32, CD8VH>;
8508  let Predicates = [HasVLX] in {
8509    defm VCVTPS2PHZ256 : avx512_cvtps2ph<v8i16x_info, v8f32x_info, f128mem,
8510                                         WriteCvtPS2PHY, WriteCvtPS2PHYSt>,
8511                                         EVEX, EVEX_V256, EVEX_CD8<32, CD8VH>;
8512    defm VCVTPS2PHZ128 : avx512_cvtps2ph<v8i16x_info, v4f32x_info, f64mem,
8513                                         WriteCvtPS2PH, WriteCvtPS2PHSt>,
8514                                         EVEX, EVEX_V128, EVEX_CD8<32, CD8VH>;
8515  }
8516
8517  def : Pat<(store (f64 (extractelt
8518                         (bc_v2f64 (v8i16 (X86cvtps2ph VR128X:$src1, timm:$src2))),
8519                         (iPTR 0))), addr:$dst),
8520            (VCVTPS2PHZ128mr addr:$dst, VR128X:$src1, timm:$src2)>;
8521  def : Pat<(store (i64 (extractelt
8522                         (bc_v2i64 (v8i16 (X86cvtps2ph VR128X:$src1, timm:$src2))),
8523                         (iPTR 0))), addr:$dst),
8524            (VCVTPS2PHZ128mr addr:$dst, VR128X:$src1, timm:$src2)>;
8525  def : Pat<(store (v8i16 (X86cvtps2ph VR256X:$src1, timm:$src2)), addr:$dst),
8526            (VCVTPS2PHZ256mr addr:$dst, VR256X:$src1, timm:$src2)>;
8527  def : Pat<(store (v16i16 (X86cvtps2ph VR512:$src1, timm:$src2)), addr:$dst),
8528            (VCVTPS2PHZmr addr:$dst, VR512:$src1, timm:$src2)>;
8529}
8530
8531// Patterns for matching conversions from float to half-float and vice versa.
8532let Predicates = [HasVLX] in {
8533  // Use MXCSR.RC for rounding instead of explicitly specifying the default
8534  // rounding mode (Nearest-Even, encoded as 0). Both are equivalent in the
8535  // configurations we support (the default). However, falling back to MXCSR is
8536  // more consistent with other instructions, which are always controlled by it.
8537  // It's encoded as 0b100.
8538  def : Pat<(fp_to_f16 FR32X:$src),
8539            (i16 (EXTRACT_SUBREG (VMOVPDI2DIZrr (v8i16 (VCVTPS2PHZ128rr
8540              (v4f32 (COPY_TO_REGCLASS FR32X:$src, VR128X)), 4))), sub_16bit))>;
8541
8542  def : Pat<(f16_to_fp GR16:$src),
8543            (f32 (COPY_TO_REGCLASS (v4f32 (VCVTPH2PSZ128rr
8544              (v8i16 (COPY_TO_REGCLASS (MOVSX32rr16 GR16:$src), VR128X)))), FR32X)) >;
8545
8546  def : Pat<(f16_to_fp (i16 (fp_to_f16 FR32X:$src))),
8547            (f32 (COPY_TO_REGCLASS (v4f32 (VCVTPH2PSZ128rr
8548              (v8i16 (VCVTPS2PHZ128rr
8549               (v4f32 (COPY_TO_REGCLASS FR32X:$src, VR128X)), 4)))), FR32X)) >;
8550}
8551
8552//  Unordered/Ordered scalar fp compare with Sae and set EFLAGS
8553multiclass avx512_ord_cmp_sae<bits<8> opc, X86VectorVTInfo _,
8554                            string OpcodeStr, Domain d,
8555                            X86FoldableSchedWrite sched = WriteFCom> {
8556  let hasSideEffects = 0, Uses = [MXCSR] in
8557  def rrb: AVX512<opc, MRMSrcReg, (outs), (ins _.RC:$src1, _.RC:$src2),
8558                  !strconcat(OpcodeStr, "\t{{sae}, $src2, $src1|$src1, $src2, {sae}}"), []>,
8559                  EVEX, EVEX_B, VEX_LIG, EVEX_V128, Sched<[sched]>;
8560}
8561
8562let Defs = [EFLAGS], Predicates = [HasAVX512] in {
8563  defm VUCOMISSZ : avx512_ord_cmp_sae<0x2E, v4f32x_info, "vucomiss", SSEPackedSingle>,
8564                                   AVX512PSIi8Base, EVEX_CD8<32, CD8VT1>;
8565  defm VUCOMISDZ : avx512_ord_cmp_sae<0x2E, v2f64x_info, "vucomisd", SSEPackedDouble>,
8566                                   AVX512PDIi8Base, VEX_W, EVEX_CD8<64, CD8VT1>;
8567  defm VCOMISSZ : avx512_ord_cmp_sae<0x2F, v4f32x_info, "vcomiss", SSEPackedSingle>,
8568                                   AVX512PSIi8Base, EVEX_CD8<32, CD8VT1>;
8569  defm VCOMISDZ : avx512_ord_cmp_sae<0x2F, v2f64x_info, "vcomisd", SSEPackedDouble>,
8570                                   AVX512PDIi8Base, VEX_W, EVEX_CD8<64, CD8VT1>;
8571}
8572
8573let Defs = [EFLAGS], Predicates = [HasAVX512] in {
8574  defm VUCOMISSZ : sse12_ord_cmp<0x2E, FR32X, X86any_fcmp, f32, f32mem, loadf32,
8575                                 "ucomiss", SSEPackedSingle>, PS, EVEX, VEX_LIG,
8576                                 EVEX_CD8<32, CD8VT1>;
8577  defm VUCOMISDZ : sse12_ord_cmp<0x2E, FR64X, X86any_fcmp, f64, f64mem, loadf64,
8578                                  "ucomisd", SSEPackedDouble>, PD, EVEX,
8579                                  VEX_LIG, VEX_W, EVEX_CD8<64, CD8VT1>;
8580  defm VCOMISSZ  : sse12_ord_cmp<0x2F, FR32X, X86strict_fcmps, f32, f32mem, loadf32,
8581                                 "comiss", SSEPackedSingle>, PS, EVEX, VEX_LIG,
8582                                 EVEX_CD8<32, CD8VT1>;
8583  defm VCOMISDZ  : sse12_ord_cmp<0x2F, FR64X, X86strict_fcmps, f64, f64mem, loadf64,
8584                                 "comisd", SSEPackedDouble>, PD, EVEX,
8585                                  VEX_LIG, VEX_W, EVEX_CD8<64, CD8VT1>;
8586  let isCodeGenOnly = 1 in {
8587    defm VUCOMISSZ  : sse12_ord_cmp_int<0x2E, VR128X, X86ucomi, v4f32, ssmem,
8588                          sse_load_f32, "ucomiss", SSEPackedSingle>, PS, EVEX, VEX_LIG,
8589                          EVEX_CD8<32, CD8VT1>;
8590    defm VUCOMISDZ  : sse12_ord_cmp_int<0x2E, VR128X, X86ucomi, v2f64, sdmem,
8591                          sse_load_f64, "ucomisd", SSEPackedDouble>, PD, EVEX,
8592                          VEX_LIG, VEX_W, EVEX_CD8<64, CD8VT1>;
8593
8594    defm VCOMISSZ  : sse12_ord_cmp_int<0x2F, VR128X, X86comi, v4f32, ssmem,
8595                          sse_load_f32, "comiss", SSEPackedSingle>, PS, EVEX, VEX_LIG,
8596                          EVEX_CD8<32, CD8VT1>;
8597    defm VCOMISDZ  : sse12_ord_cmp_int<0x2F, VR128X, X86comi, v2f64, sdmem,
8598                          sse_load_f64, "comisd", SSEPackedDouble>, PD, EVEX,
8599                          VEX_LIG, VEX_W, EVEX_CD8<64, CD8VT1>;
8600  }
8601}
8602
8603/// avx512_fp14_s rcp14ss, rcp14sd, rsqrt14ss, rsqrt14sd
8604multiclass avx512_fp14_s<bits<8> opc, string OpcodeStr, SDNode OpNode,
8605                         X86FoldableSchedWrite sched, X86VectorVTInfo _> {
8606  let Predicates = [HasAVX512], ExeDomain = _.ExeDomain, Uses = [MXCSR] in {
8607  defm rr : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
8608                           (ins _.RC:$src1, _.RC:$src2), OpcodeStr,
8609                           "$src2, $src1", "$src1, $src2",
8610                           (OpNode (_.VT _.RC:$src1), (_.VT _.RC:$src2))>,
8611                           EVEX_4V, VEX_LIG, Sched<[sched]>;
8612  defm rm : AVX512_maskable_scalar<opc, MRMSrcMem, _, (outs _.RC:$dst),
8613                         (ins _.RC:$src1, _.IntScalarMemOp:$src2), OpcodeStr,
8614                         "$src2, $src1", "$src1, $src2",
8615                         (OpNode (_.VT _.RC:$src1),
8616                          _.ScalarIntMemCPat:$src2)>, EVEX_4V, VEX_LIG,
8617                          Sched<[sched.Folded, sched.ReadAfterFold]>;
8618}
8619}
8620
8621defm VRCP14SSZ : avx512_fp14_s<0x4D, "vrcp14ss", X86rcp14s, SchedWriteFRcp.Scl,
8622                               f32x_info>, EVEX_CD8<32, CD8VT1>,
8623                               T8PD;
8624defm VRCP14SDZ : avx512_fp14_s<0x4D, "vrcp14sd", X86rcp14s, SchedWriteFRcp.Scl,
8625                               f64x_info>, VEX_W, EVEX_CD8<64, CD8VT1>,
8626                               T8PD;
8627defm VRSQRT14SSZ : avx512_fp14_s<0x4F, "vrsqrt14ss", X86rsqrt14s,
8628                                 SchedWriteFRsqrt.Scl, f32x_info>,
8629                                 EVEX_CD8<32, CD8VT1>, T8PD;
8630defm VRSQRT14SDZ : avx512_fp14_s<0x4F, "vrsqrt14sd", X86rsqrt14s,
8631                                 SchedWriteFRsqrt.Scl, f64x_info>, VEX_W,
8632                                 EVEX_CD8<64, CD8VT1>, T8PD;
8633
8634/// avx512_fp14_p rcp14ps, rcp14pd, rsqrt14ps, rsqrt14pd
8635multiclass avx512_fp14_p<bits<8> opc, string OpcodeStr, SDNode OpNode,
8636                         X86FoldableSchedWrite sched, X86VectorVTInfo _> {
8637  let ExeDomain = _.ExeDomain in {
8638  defm r: AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
8639                         (ins _.RC:$src), OpcodeStr, "$src", "$src",
8640                         (_.VT (OpNode _.RC:$src))>, EVEX, T8PD,
8641                         Sched<[sched]>;
8642  defm m: AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
8643                         (ins _.MemOp:$src), OpcodeStr, "$src", "$src",
8644                         (OpNode (_.VT
8645                           (bitconvert (_.LdFrag addr:$src))))>, EVEX, T8PD,
8646                         Sched<[sched.Folded, sched.ReadAfterFold]>;
8647  defm mb: AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
8648                          (ins _.ScalarMemOp:$src), OpcodeStr,
8649                          "${src}"##_.BroadcastStr, "${src}"##_.BroadcastStr,
8650                          (OpNode (_.VT
8651                            (_.BroadcastLdFrag addr:$src)))>,
8652                          EVEX, T8PD, EVEX_B, Sched<[sched.Folded, sched.ReadAfterFold]>;
8653  }
8654}
8655
8656let Uses = [MXCSR] in
8657multiclass avx512_fp14_p_vl_all<bits<8> opc, string OpcodeStr, SDNode OpNode,
8658                                X86SchedWriteWidths sched> {
8659  defm PSZ : avx512_fp14_p<opc, !strconcat(OpcodeStr, "ps"), OpNode, sched.ZMM,
8660                           v16f32_info>, EVEX_V512, EVEX_CD8<32, CD8VF>;
8661  defm PDZ : avx512_fp14_p<opc, !strconcat(OpcodeStr, "pd"), OpNode, sched.ZMM,
8662                           v8f64_info>, EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>;
8663
8664  // Define only if AVX512VL feature is present.
8665  let Predicates = [HasVLX] in {
8666    defm PSZ128 : avx512_fp14_p<opc, !strconcat(OpcodeStr, "ps"),
8667                                OpNode, sched.XMM, v4f32x_info>,
8668                               EVEX_V128, EVEX_CD8<32, CD8VF>;
8669    defm PSZ256 : avx512_fp14_p<opc, !strconcat(OpcodeStr, "ps"),
8670                                OpNode, sched.YMM, v8f32x_info>,
8671                               EVEX_V256, EVEX_CD8<32, CD8VF>;
8672    defm PDZ128 : avx512_fp14_p<opc, !strconcat(OpcodeStr, "pd"),
8673                                OpNode, sched.XMM, v2f64x_info>,
8674                               EVEX_V128, VEX_W, EVEX_CD8<64, CD8VF>;
8675    defm PDZ256 : avx512_fp14_p<opc, !strconcat(OpcodeStr, "pd"),
8676                                OpNode, sched.YMM, v4f64x_info>,
8677                               EVEX_V256, VEX_W, EVEX_CD8<64, CD8VF>;
8678  }
8679}
8680
8681defm VRSQRT14 : avx512_fp14_p_vl_all<0x4E, "vrsqrt14", X86rsqrt14, SchedWriteFRsqrt>;
8682defm VRCP14 : avx512_fp14_p_vl_all<0x4C, "vrcp14", X86rcp14, SchedWriteFRcp>;
8683
8684/// avx512_fp28_s rcp28ss, rcp28sd, rsqrt28ss, rsqrt28sd
8685multiclass avx512_fp28_s<bits<8> opc, string OpcodeStr,X86VectorVTInfo _,
8686                         SDNode OpNode, SDNode OpNodeSAE,
8687                         X86FoldableSchedWrite sched> {
8688  let ExeDomain = _.ExeDomain, Uses = [MXCSR] in {
8689  defm r : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
8690                           (ins _.RC:$src1, _.RC:$src2), OpcodeStr,
8691                           "$src2, $src1", "$src1, $src2",
8692                           (OpNode (_.VT _.RC:$src1), (_.VT _.RC:$src2))>,
8693                           Sched<[sched]>, SIMD_EXC;
8694
8695  defm rb : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
8696                            (ins _.RC:$src1, _.RC:$src2), OpcodeStr,
8697                            "{sae}, $src2, $src1", "$src1, $src2, {sae}",
8698                            (OpNodeSAE (_.VT _.RC:$src1), (_.VT _.RC:$src2))>,
8699                            EVEX_B, Sched<[sched]>;
8700
8701  defm m : AVX512_maskable_scalar<opc, MRMSrcMem, _, (outs _.RC:$dst),
8702                         (ins _.RC:$src1, _.IntScalarMemOp:$src2), OpcodeStr,
8703                         "$src2, $src1", "$src1, $src2",
8704                         (OpNode (_.VT _.RC:$src1), _.ScalarIntMemCPat:$src2)>,
8705                         Sched<[sched.Folded, sched.ReadAfterFold]>, SIMD_EXC;
8706  }
8707}
8708
8709multiclass avx512_eri_s<bits<8> opc, string OpcodeStr, SDNode OpNode,
8710                        SDNode OpNodeSAE, X86FoldableSchedWrite sched> {
8711  defm SSZ : avx512_fp28_s<opc, OpcodeStr#"ss", f32x_info, OpNode, OpNodeSAE,
8712                           sched>, EVEX_CD8<32, CD8VT1>, VEX_LIG;
8713  defm SDZ : avx512_fp28_s<opc, OpcodeStr#"sd", f64x_info, OpNode, OpNodeSAE,
8714                           sched>, EVEX_CD8<64, CD8VT1>, VEX_LIG, VEX_W;
8715}
8716
8717let Predicates = [HasERI] in {
8718  defm VRCP28   : avx512_eri_s<0xCB, "vrcp28", X86rcp28s, X86rcp28SAEs,
8719                               SchedWriteFRcp.Scl>, T8PD, EVEX_4V;
8720  defm VRSQRT28 : avx512_eri_s<0xCD, "vrsqrt28", X86rsqrt28s, X86rsqrt28SAEs,
8721                               SchedWriteFRsqrt.Scl>, T8PD, EVEX_4V;
8722}
8723
8724defm VGETEXP   : avx512_eri_s<0x43, "vgetexp", X86fgetexps, X86fgetexpSAEs,
8725                              SchedWriteFRnd.Scl>, T8PD, EVEX_4V;
8726/// avx512_fp28_p rcp28ps, rcp28pd, rsqrt28ps, rsqrt28pd
8727
8728multiclass avx512_fp28_p<bits<8> opc, string OpcodeStr, X86VectorVTInfo _,
8729                         SDNode OpNode, X86FoldableSchedWrite sched> {
8730  let ExeDomain = _.ExeDomain, Uses = [MXCSR], mayRaiseFPException = 1 in {
8731  defm r : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
8732                         (ins _.RC:$src), OpcodeStr, "$src", "$src",
8733                         (OpNode (_.VT _.RC:$src))>,
8734                         Sched<[sched]>;
8735
8736  defm m : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
8737                         (ins _.MemOp:$src), OpcodeStr, "$src", "$src",
8738                         (OpNode (_.VT
8739                             (bitconvert (_.LdFrag addr:$src))))>,
8740                          Sched<[sched.Folded, sched.ReadAfterFold]>;
8741
8742  defm mb : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
8743                         (ins _.ScalarMemOp:$src), OpcodeStr,
8744                         "${src}"##_.BroadcastStr, "${src}"##_.BroadcastStr,
8745                         (OpNode (_.VT
8746                                  (_.BroadcastLdFrag addr:$src)))>,
8747                         EVEX_B, Sched<[sched.Folded, sched.ReadAfterFold]>;
8748  }
8749}
8750multiclass avx512_fp28_p_sae<bits<8> opc, string OpcodeStr, X86VectorVTInfo _,
8751                         SDNode OpNode, X86FoldableSchedWrite sched> {
8752  let ExeDomain = _.ExeDomain, Uses = [MXCSR] in
8753  defm rb : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
8754                        (ins _.RC:$src), OpcodeStr,
8755                        "{sae}, $src", "$src, {sae}",
8756                        (OpNode (_.VT _.RC:$src))>,
8757                        EVEX_B, Sched<[sched]>;
8758}
8759
8760multiclass  avx512_eri<bits<8> opc, string OpcodeStr, SDNode OpNode,
8761                       SDNode OpNodeSAE, X86SchedWriteWidths sched> {
8762   defm PSZ : avx512_fp28_p<opc, OpcodeStr#"ps", v16f32_info, OpNode, sched.ZMM>,
8763              avx512_fp28_p_sae<opc, OpcodeStr#"ps", v16f32_info, OpNodeSAE, sched.ZMM>,
8764              T8PD, EVEX_V512, EVEX_CD8<32, CD8VF>;
8765   defm PDZ : avx512_fp28_p<opc, OpcodeStr#"pd", v8f64_info, OpNode, sched.ZMM>,
8766              avx512_fp28_p_sae<opc, OpcodeStr#"pd", v8f64_info, OpNodeSAE, sched.ZMM>,
8767              T8PD, EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>;
8768}
8769
8770multiclass avx512_fp_unaryop_packed<bits<8> opc, string OpcodeStr,
8771                                  SDNode OpNode, X86SchedWriteWidths sched> {
8772  // Define only if AVX512VL feature is present.
8773  let Predicates = [HasVLX] in {
8774    defm PSZ128 : avx512_fp28_p<opc, OpcodeStr#"ps", v4f32x_info, OpNode,
8775                                sched.XMM>,
8776                                EVEX_V128, T8PD, EVEX_CD8<32, CD8VF>;
8777    defm PSZ256 : avx512_fp28_p<opc, OpcodeStr#"ps", v8f32x_info, OpNode,
8778                                sched.YMM>,
8779                                EVEX_V256, T8PD, EVEX_CD8<32, CD8VF>;
8780    defm PDZ128 : avx512_fp28_p<opc, OpcodeStr#"pd", v2f64x_info, OpNode,
8781                                sched.XMM>,
8782                                EVEX_V128, VEX_W, T8PD, EVEX_CD8<64, CD8VF>;
8783    defm PDZ256 : avx512_fp28_p<opc, OpcodeStr#"pd", v4f64x_info, OpNode,
8784                                sched.YMM>,
8785                                EVEX_V256, VEX_W, T8PD, EVEX_CD8<64, CD8VF>;
8786  }
8787}
8788
8789let Predicates = [HasERI] in {
8790 defm VRSQRT28 : avx512_eri<0xCC, "vrsqrt28", X86rsqrt28, X86rsqrt28SAE,
8791                            SchedWriteFRsqrt>, EVEX;
8792 defm VRCP28   : avx512_eri<0xCA, "vrcp28", X86rcp28, X86rcp28SAE,
8793                            SchedWriteFRcp>, EVEX;
8794 defm VEXP2    : avx512_eri<0xC8, "vexp2", X86exp2, X86exp2SAE,
8795                            SchedWriteFAdd>, EVEX;
8796}
8797defm VGETEXP   : avx512_eri<0x42, "vgetexp", X86fgetexp, X86fgetexpSAE,
8798                            SchedWriteFRnd>,
8799                 avx512_fp_unaryop_packed<0x42, "vgetexp", X86fgetexp,
8800                                          SchedWriteFRnd>, EVEX;
8801
8802multiclass avx512_sqrt_packed_round<bits<8> opc, string OpcodeStr,
8803                                    X86FoldableSchedWrite sched, X86VectorVTInfo _>{
8804  let ExeDomain = _.ExeDomain in
8805  defm rb: AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
8806                         (ins _.RC:$src, AVX512RC:$rc), OpcodeStr, "$rc, $src", "$src, $rc",
8807                         (_.VT (X86fsqrtRnd _.RC:$src, (i32 timm:$rc)))>,
8808                         EVEX, EVEX_B, EVEX_RC, Sched<[sched]>;
8809}
8810
8811multiclass avx512_sqrt_packed<bits<8> opc, string OpcodeStr,
8812                              X86FoldableSchedWrite sched, X86VectorVTInfo _>{
8813  let ExeDomain = _.ExeDomain, Uses = [MXCSR], mayRaiseFPException = 1 in {
8814  defm r: AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
8815                         (ins _.RC:$src), OpcodeStr, "$src", "$src",
8816                         (_.VT (any_fsqrt _.RC:$src))>, EVEX,
8817                         Sched<[sched]>;
8818  defm m: AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
8819                         (ins _.MemOp:$src), OpcodeStr, "$src", "$src",
8820                         (any_fsqrt (_.VT
8821                           (bitconvert (_.LdFrag addr:$src))))>, EVEX,
8822                           Sched<[sched.Folded, sched.ReadAfterFold]>;
8823  defm mb: AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
8824                          (ins _.ScalarMemOp:$src), OpcodeStr,
8825                          "${src}"##_.BroadcastStr, "${src}"##_.BroadcastStr,
8826                          (any_fsqrt (_.VT
8827                            (_.BroadcastLdFrag addr:$src)))>,
8828                          EVEX, EVEX_B, Sched<[sched.Folded, sched.ReadAfterFold]>;
8829  }
8830}
8831
8832let Uses = [MXCSR], mayRaiseFPException = 1 in
8833multiclass avx512_sqrt_packed_all<bits<8> opc, string OpcodeStr,
8834                                  X86SchedWriteSizes sched> {
8835  defm PSZ : avx512_sqrt_packed<opc, !strconcat(OpcodeStr, "ps"),
8836                                sched.PS.ZMM, v16f32_info>,
8837                                EVEX_V512, PS, EVEX_CD8<32, CD8VF>;
8838  defm PDZ : avx512_sqrt_packed<opc, !strconcat(OpcodeStr, "pd"),
8839                                sched.PD.ZMM, v8f64_info>,
8840                                EVEX_V512, VEX_W, PD, EVEX_CD8<64, CD8VF>;
8841  // Define only if AVX512VL feature is present.
8842  let Predicates = [HasVLX] in {
8843    defm PSZ128 : avx512_sqrt_packed<opc, !strconcat(OpcodeStr, "ps"),
8844                                     sched.PS.XMM, v4f32x_info>,
8845                                     EVEX_V128, PS, EVEX_CD8<32, CD8VF>;
8846    defm PSZ256 : avx512_sqrt_packed<opc, !strconcat(OpcodeStr, "ps"),
8847                                     sched.PS.YMM, v8f32x_info>,
8848                                     EVEX_V256, PS, EVEX_CD8<32, CD8VF>;
8849    defm PDZ128 : avx512_sqrt_packed<opc, !strconcat(OpcodeStr, "pd"),
8850                                     sched.PD.XMM, v2f64x_info>,
8851                                     EVEX_V128, VEX_W, PD, EVEX_CD8<64, CD8VF>;
8852    defm PDZ256 : avx512_sqrt_packed<opc, !strconcat(OpcodeStr, "pd"),
8853                                     sched.PD.YMM, v4f64x_info>,
8854                                     EVEX_V256, VEX_W, PD, EVEX_CD8<64, CD8VF>;
8855  }
8856}
8857
8858let Uses = [MXCSR] in
8859multiclass avx512_sqrt_packed_all_round<bits<8> opc, string OpcodeStr,
8860                                        X86SchedWriteSizes sched> {
8861  defm PSZ : avx512_sqrt_packed_round<opc, !strconcat(OpcodeStr, "ps"),
8862                                      sched.PS.ZMM, v16f32_info>,
8863                                      EVEX_V512, PS, EVEX_CD8<32, CD8VF>;
8864  defm PDZ : avx512_sqrt_packed_round<opc, !strconcat(OpcodeStr, "pd"),
8865                                      sched.PD.ZMM, v8f64_info>,
8866                                      EVEX_V512, VEX_W, PD, EVEX_CD8<64, CD8VF>;
8867}
8868
8869multiclass avx512_sqrt_scalar<bits<8> opc, string OpcodeStr, X86FoldableSchedWrite sched,
8870                              X86VectorVTInfo _, string Name> {
8871  let ExeDomain = _.ExeDomain in {
8872    defm r_Int : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
8873                         (ins _.RC:$src1, _.RC:$src2), OpcodeStr,
8874                         "$src2, $src1", "$src1, $src2",
8875                         (X86fsqrts (_.VT _.RC:$src1),
8876                                    (_.VT _.RC:$src2))>,
8877                         Sched<[sched]>, SIMD_EXC;
8878    defm m_Int : AVX512_maskable_scalar<opc, MRMSrcMem, _, (outs _.RC:$dst),
8879                         (ins _.RC:$src1, _.IntScalarMemOp:$src2), OpcodeStr,
8880                         "$src2, $src1", "$src1, $src2",
8881                         (X86fsqrts (_.VT _.RC:$src1),
8882                                    _.ScalarIntMemCPat:$src2)>,
8883                         Sched<[sched.Folded, sched.ReadAfterFold]>, SIMD_EXC;
8884    let Uses = [MXCSR] in
8885    defm rb_Int : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
8886                         (ins _.RC:$src1, _.RC:$src2, AVX512RC:$rc), OpcodeStr,
8887                         "$rc, $src2, $src1", "$src1, $src2, $rc",
8888                         (X86fsqrtRnds (_.VT _.RC:$src1),
8889                                     (_.VT _.RC:$src2),
8890                                     (i32 timm:$rc))>,
8891                         EVEX_B, EVEX_RC, Sched<[sched]>;
8892
8893    let isCodeGenOnly = 1, hasSideEffects = 0, Predicates=[HasAVX512] in {
8894      def r : I<opc, MRMSrcReg, (outs _.FRC:$dst),
8895                (ins _.FRC:$src1, _.FRC:$src2),
8896                OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}", []>,
8897                Sched<[sched]>, SIMD_EXC;
8898      let mayLoad = 1 in
8899        def m : I<opc, MRMSrcMem, (outs _.FRC:$dst),
8900                  (ins _.FRC:$src1, _.ScalarMemOp:$src2),
8901                  OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}", []>,
8902                  Sched<[sched.Folded, sched.ReadAfterFold]>, SIMD_EXC;
8903    }
8904  }
8905
8906  let Predicates = [HasAVX512] in {
8907    def : Pat<(_.EltVT (any_fsqrt _.FRC:$src)),
8908              (!cast<Instruction>(Name#Zr)
8909                  (_.EltVT (IMPLICIT_DEF)), _.FRC:$src)>;
8910  }
8911
8912  let Predicates = [HasAVX512, OptForSize] in {
8913    def : Pat<(_.EltVT (any_fsqrt (load addr:$src))),
8914              (!cast<Instruction>(Name#Zm)
8915                  (_.EltVT (IMPLICIT_DEF)), addr:$src)>;
8916  }
8917}
8918
8919multiclass avx512_sqrt_scalar_all<bits<8> opc, string OpcodeStr,
8920                                  X86SchedWriteSizes sched> {
8921  defm SSZ : avx512_sqrt_scalar<opc, OpcodeStr#"ss", sched.PS.Scl, f32x_info, NAME#"SS">,
8922                        EVEX_CD8<32, CD8VT1>, EVEX_4V, XS;
8923  defm SDZ : avx512_sqrt_scalar<opc, OpcodeStr#"sd", sched.PD.Scl, f64x_info, NAME#"SD">,
8924                        EVEX_CD8<64, CD8VT1>, EVEX_4V, XD, VEX_W;
8925}
8926
8927defm VSQRT : avx512_sqrt_packed_all<0x51, "vsqrt", SchedWriteFSqrtSizes>,
8928             avx512_sqrt_packed_all_round<0x51, "vsqrt", SchedWriteFSqrtSizes>;
8929
8930defm VSQRT : avx512_sqrt_scalar_all<0x51, "vsqrt", SchedWriteFSqrtSizes>, VEX_LIG;
8931
8932multiclass avx512_rndscale_scalar<bits<8> opc, string OpcodeStr,
8933                                  X86FoldableSchedWrite sched, X86VectorVTInfo _> {
8934  let ExeDomain = _.ExeDomain in {
8935  defm r_Int : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
8936                           (ins _.RC:$src1, _.RC:$src2, i32u8imm:$src3), OpcodeStr,
8937                           "$src3, $src2, $src1", "$src1, $src2, $src3",
8938                           (_.VT (X86RndScales (_.VT _.RC:$src1), (_.VT _.RC:$src2),
8939                           (i32 timm:$src3)))>,
8940                           Sched<[sched]>, SIMD_EXC;
8941
8942  let Uses = [MXCSR] in
8943  defm rb_Int : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
8944                         (ins _.RC:$src1, _.RC:$src2, i32u8imm:$src3), OpcodeStr,
8945                         "$src3, {sae}, $src2, $src1", "$src1, $src2, {sae}, $src3",
8946                         (_.VT (X86RndScalesSAE (_.VT _.RC:$src1), (_.VT _.RC:$src2),
8947                         (i32 timm:$src3)))>, EVEX_B,
8948                         Sched<[sched]>;
8949
8950  defm m_Int : AVX512_maskable_scalar<opc, MRMSrcMem, _, (outs _.RC:$dst),
8951                         (ins _.RC:$src1, _.IntScalarMemOp:$src2, i32u8imm:$src3),
8952                         OpcodeStr,
8953                         "$src3, $src2, $src1", "$src1, $src2, $src3",
8954                         (_.VT (X86RndScales _.RC:$src1,
8955                                _.ScalarIntMemCPat:$src2, (i32 timm:$src3)))>,
8956                         Sched<[sched.Folded, sched.ReadAfterFold]>, SIMD_EXC;
8957
8958  let isCodeGenOnly = 1, hasSideEffects = 0, Predicates = [HasAVX512] in {
8959    def r : I<opc, MRMSrcReg, (outs _.FRC:$dst),
8960               (ins _.FRC:$src1, _.FRC:$src2, i32u8imm:$src3),
8961               OpcodeStr#"\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}",
8962               []>, Sched<[sched]>, SIMD_EXC;
8963
8964    let mayLoad = 1 in
8965      def m : I<opc, MRMSrcMem, (outs _.FRC:$dst),
8966                 (ins _.FRC:$src1, _.ScalarMemOp:$src2, i32u8imm:$src3),
8967                 OpcodeStr#"\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}",
8968                 []>, Sched<[sched.Folded, sched.ReadAfterFold]>, SIMD_EXC;
8969  }
8970  }
8971
8972  let Predicates = [HasAVX512] in {
8973    def : Pat<(X86any_VRndScale _.FRC:$src1, timm:$src2),
8974              (_.EltVT (!cast<Instruction>(NAME##r) (_.EltVT (IMPLICIT_DEF)),
8975               _.FRC:$src1, timm:$src2))>;
8976  }
8977
8978  let Predicates = [HasAVX512, OptForSize] in {
8979    def : Pat<(X86any_VRndScale (_.ScalarLdFrag addr:$src1), timm:$src2),
8980              (_.EltVT (!cast<Instruction>(NAME##m) (_.EltVT (IMPLICIT_DEF)),
8981               addr:$src1, timm:$src2))>;
8982  }
8983}
8984
8985defm VRNDSCALESSZ : avx512_rndscale_scalar<0x0A, "vrndscaless",
8986                                           SchedWriteFRnd.Scl, f32x_info>,
8987                                           AVX512AIi8Base, EVEX_4V, VEX_LIG,
8988                                           EVEX_CD8<32, CD8VT1>;
8989
8990defm VRNDSCALESDZ : avx512_rndscale_scalar<0x0B, "vrndscalesd",
8991                                           SchedWriteFRnd.Scl, f64x_info>,
8992                                           VEX_W, AVX512AIi8Base, EVEX_4V, VEX_LIG,
8993                                           EVEX_CD8<64, CD8VT1>;
8994
8995multiclass avx512_masked_scalar<SDNode OpNode, string OpcPrefix, SDNode Move,
8996                                dag Mask, X86VectorVTInfo _, PatLeaf ZeroFP,
8997                                dag OutMask, Predicate BasePredicate> {
8998  let Predicates = [BasePredicate] in {
8999    def : Pat<(Move _.VT:$src1, (scalar_to_vector (X86selects Mask,
9000               (OpNode (extractelt _.VT:$src2, (iPTR 0))),
9001               (extractelt _.VT:$dst, (iPTR 0))))),
9002              (!cast<Instruction>("V"#OpcPrefix#r_Intk)
9003               _.VT:$dst, OutMask, _.VT:$src2, _.VT:$src1)>;
9004
9005    def : Pat<(Move _.VT:$src1, (scalar_to_vector (X86selects Mask,
9006               (OpNode (extractelt _.VT:$src2, (iPTR 0))),
9007               ZeroFP))),
9008              (!cast<Instruction>("V"#OpcPrefix#r_Intkz)
9009               OutMask, _.VT:$src2, _.VT:$src1)>;
9010  }
9011}
9012
9013defm : avx512_masked_scalar<fsqrt, "SQRTSSZ", X86Movss,
9014                            (v1i1 (scalar_to_vector (i8 (trunc (i32 GR32:$mask))))), v4f32x_info,
9015                            fp32imm0, (COPY_TO_REGCLASS  $mask, VK1WM), HasAVX512>;
9016defm : avx512_masked_scalar<fsqrt, "SQRTSDZ", X86Movsd,
9017                            (v1i1 (scalar_to_vector (i8 (trunc (i32 GR32:$mask))))), v2f64x_info,
9018                            fp64imm0, (COPY_TO_REGCLASS  $mask, VK1WM), HasAVX512>;
9019
9020
9021//-------------------------------------------------
9022// Integer truncate and extend operations
9023//-------------------------------------------------
9024
9025// PatFrags that contain a select and a truncate op. The take operands in the
9026// same order as X86vmtrunc, X86vmtruncs, X86vmtruncus. This allows us to pass
9027// either to the multiclasses.
9028def select_trunc : PatFrag<(ops node:$src, node:$src0, node:$mask),
9029                           (vselect node:$mask,
9030                                    (trunc node:$src), node:$src0)>;
9031def select_truncs : PatFrag<(ops node:$src, node:$src0, node:$mask),
9032                            (vselect node:$mask,
9033                                     (X86vtruncs node:$src), node:$src0)>;
9034def select_truncus : PatFrag<(ops node:$src, node:$src0, node:$mask),
9035                             (vselect node:$mask,
9036                                      (X86vtruncus node:$src), node:$src0)>;
9037
9038multiclass avx512_trunc_common<bits<8> opc, string OpcodeStr, SDNode OpNode,
9039                              SDPatternOperator MaskNode,
9040                              X86FoldableSchedWrite sched, X86VectorVTInfo SrcInfo,
9041                              X86VectorVTInfo DestInfo, X86MemOperand x86memop> {
9042  let ExeDomain = DestInfo.ExeDomain in {
9043  def rr : AVX512XS8I<opc, MRMDestReg, (outs DestInfo.RC:$dst),
9044             (ins SrcInfo.RC:$src),
9045             OpcodeStr # "\t{$src, $dst|$dst, $src}",
9046             [(set DestInfo.RC:$dst,
9047                   (DestInfo.VT (OpNode (SrcInfo.VT SrcInfo.RC:$src))))]>,
9048             EVEX, Sched<[sched]>;
9049  let Constraints = "$src0 = $dst" in
9050  def rrk : AVX512XS8I<opc, MRMDestReg, (outs DestInfo.RC:$dst),
9051             (ins DestInfo.RC:$src0, SrcInfo.KRCWM:$mask, SrcInfo.RC:$src),
9052             OpcodeStr # "\t{$src, $dst {${mask}}|$dst {${mask}}, $src}",
9053             [(set DestInfo.RC:$dst,
9054                   (MaskNode (SrcInfo.VT SrcInfo.RC:$src),
9055                             (DestInfo.VT DestInfo.RC:$src0),
9056                             SrcInfo.KRCWM:$mask))]>,
9057             EVEX, EVEX_K, Sched<[sched]>;
9058  def rrkz : AVX512XS8I<opc, MRMDestReg, (outs DestInfo.RC:$dst),
9059             (ins SrcInfo.KRCWM:$mask, SrcInfo.RC:$src),
9060             OpcodeStr # "\t{$src, $dst {${mask}} {z}|$dst {${mask}} {z}, $src}",
9061             [(set DestInfo.RC:$dst,
9062                   (DestInfo.VT (MaskNode (SrcInfo.VT SrcInfo.RC:$src),
9063                             DestInfo.ImmAllZerosV, SrcInfo.KRCWM:$mask)))]>,
9064             EVEX, EVEX_KZ, Sched<[sched]>;
9065  }
9066
9067  let mayStore = 1, hasSideEffects = 0, ExeDomain = DestInfo.ExeDomain in {
9068    def mr : AVX512XS8I<opc, MRMDestMem, (outs),
9069               (ins x86memop:$dst, SrcInfo.RC:$src),
9070               OpcodeStr # "\t{$src, $dst|$dst, $src}", []>,
9071               EVEX, Sched<[sched.Folded]>;
9072
9073    def mrk : AVX512XS8I<opc, MRMDestMem, (outs),
9074               (ins x86memop:$dst, SrcInfo.KRCWM:$mask, SrcInfo.RC:$src),
9075               OpcodeStr # "\t{$src, $dst {${mask}}|$dst {${mask}}, $src}", []>,
9076               EVEX, EVEX_K, Sched<[sched.Folded]>, NotMemoryFoldable;
9077  }//mayStore = 1, hasSideEffects = 0
9078}
9079
9080multiclass avx512_trunc_mr_lowering<X86VectorVTInfo SrcInfo,
9081                                    X86VectorVTInfo DestInfo,
9082                                    PatFrag truncFrag, PatFrag mtruncFrag,
9083                                    string Name> {
9084
9085  def : Pat<(truncFrag (SrcInfo.VT SrcInfo.RC:$src), addr:$dst),
9086            (!cast<Instruction>(Name#SrcInfo.ZSuffix##mr)
9087                                    addr:$dst, SrcInfo.RC:$src)>;
9088
9089  def : Pat<(mtruncFrag (SrcInfo.VT SrcInfo.RC:$src), addr:$dst,
9090                        SrcInfo.KRCWM:$mask),
9091            (!cast<Instruction>(Name#SrcInfo.ZSuffix##mrk)
9092                            addr:$dst, SrcInfo.KRCWM:$mask, SrcInfo.RC:$src)>;
9093}
9094
9095multiclass avx512_trunc<bits<8> opc, string OpcodeStr, SDNode OpNode128,
9096                        SDNode OpNode256, SDNode OpNode512,
9097                        SDPatternOperator MaskNode128,
9098                        SDPatternOperator MaskNode256,
9099                        SDPatternOperator MaskNode512,
9100                        X86FoldableSchedWrite sched,
9101                        AVX512VLVectorVTInfo VTSrcInfo,
9102                        X86VectorVTInfo DestInfoZ128,
9103                        X86VectorVTInfo DestInfoZ256, X86VectorVTInfo DestInfoZ,
9104                        X86MemOperand x86memopZ128, X86MemOperand x86memopZ256,
9105                        X86MemOperand x86memopZ, PatFrag truncFrag,
9106                        PatFrag mtruncFrag, Predicate prd = HasAVX512>{
9107
9108  let Predicates = [HasVLX, prd] in {
9109    defm Z128:  avx512_trunc_common<opc, OpcodeStr, OpNode128, MaskNode128, sched,
9110                             VTSrcInfo.info128, DestInfoZ128, x86memopZ128>,
9111                avx512_trunc_mr_lowering<VTSrcInfo.info128, DestInfoZ128,
9112                             truncFrag, mtruncFrag, NAME>, EVEX_V128;
9113
9114    defm Z256:  avx512_trunc_common<opc, OpcodeStr, OpNode256, MaskNode256, sched,
9115                             VTSrcInfo.info256, DestInfoZ256, x86memopZ256>,
9116                avx512_trunc_mr_lowering<VTSrcInfo.info256, DestInfoZ256,
9117                             truncFrag, mtruncFrag, NAME>, EVEX_V256;
9118  }
9119  let Predicates = [prd] in
9120    defm Z:     avx512_trunc_common<opc, OpcodeStr, OpNode512, MaskNode512, sched,
9121                             VTSrcInfo.info512, DestInfoZ, x86memopZ>,
9122                avx512_trunc_mr_lowering<VTSrcInfo.info512, DestInfoZ,
9123                             truncFrag, mtruncFrag, NAME>, EVEX_V512;
9124}
9125
9126multiclass avx512_trunc_qb<bits<8> opc, string OpcodeStr, SDNode OpNode,
9127                           SDPatternOperator MaskNode,
9128                           X86FoldableSchedWrite sched, PatFrag StoreNode,
9129                           PatFrag MaskedStoreNode, SDNode InVecNode,
9130                           SDPatternOperator InVecMaskNode> {
9131  defm NAME: avx512_trunc<opc, OpcodeStr, InVecNode, InVecNode, InVecNode,
9132                          InVecMaskNode, InVecMaskNode, InVecMaskNode, sched,
9133                          avx512vl_i64_info, v16i8x_info, v16i8x_info,
9134                          v16i8x_info, i16mem, i32mem, i64mem, StoreNode,
9135                          MaskedStoreNode>, EVEX_CD8<8, CD8VO>;
9136}
9137
9138multiclass avx512_trunc_qw<bits<8> opc, string OpcodeStr, SDNode OpNode,
9139                           SDPatternOperator MaskNode,
9140                           X86FoldableSchedWrite sched, PatFrag StoreNode,
9141                           PatFrag MaskedStoreNode, SDNode InVecNode,
9142                           SDPatternOperator InVecMaskNode> {
9143  defm NAME: avx512_trunc<opc, OpcodeStr, InVecNode, InVecNode, OpNode,
9144                          InVecMaskNode, InVecMaskNode, MaskNode, sched,
9145                          avx512vl_i64_info, v8i16x_info, v8i16x_info,
9146                          v8i16x_info, i32mem, i64mem, i128mem, StoreNode,
9147                          MaskedStoreNode>, EVEX_CD8<16, CD8VQ>;
9148}
9149
9150multiclass avx512_trunc_qd<bits<8> opc, string OpcodeStr, SDNode OpNode,
9151                           SDPatternOperator MaskNode,
9152                           X86FoldableSchedWrite sched, PatFrag StoreNode,
9153                           PatFrag MaskedStoreNode, SDNode InVecNode,
9154                           SDPatternOperator InVecMaskNode> {
9155  defm NAME: avx512_trunc<opc, OpcodeStr, InVecNode, OpNode, OpNode,
9156                          InVecMaskNode, MaskNode, MaskNode, sched,
9157                          avx512vl_i64_info, v4i32x_info, v4i32x_info,
9158                          v8i32x_info, i64mem, i128mem, i256mem, StoreNode,
9159                          MaskedStoreNode>, EVEX_CD8<32, CD8VH>;
9160}
9161
9162multiclass avx512_trunc_db<bits<8> opc, string OpcodeStr, SDNode OpNode,
9163                           SDPatternOperator MaskNode,
9164                           X86FoldableSchedWrite sched, PatFrag StoreNode,
9165                           PatFrag MaskedStoreNode, SDNode InVecNode,
9166                           SDPatternOperator InVecMaskNode> {
9167  defm NAME: avx512_trunc<opc, OpcodeStr, InVecNode, InVecNode, OpNode,
9168                          InVecMaskNode, InVecMaskNode, MaskNode, sched,
9169                          avx512vl_i32_info, v16i8x_info, v16i8x_info,
9170                          v16i8x_info, i32mem, i64mem, i128mem, StoreNode,
9171                          MaskedStoreNode>, EVEX_CD8<8, CD8VQ>;
9172}
9173
9174multiclass avx512_trunc_dw<bits<8> opc, string OpcodeStr, SDNode OpNode,
9175                           SDPatternOperator MaskNode,
9176                           X86FoldableSchedWrite sched, PatFrag StoreNode,
9177                           PatFrag MaskedStoreNode, SDNode InVecNode,
9178                           SDPatternOperator InVecMaskNode> {
9179  defm NAME: avx512_trunc<opc, OpcodeStr, InVecNode, OpNode, OpNode,
9180                          InVecMaskNode, MaskNode, MaskNode, sched,
9181                          avx512vl_i32_info, v8i16x_info, v8i16x_info,
9182                          v16i16x_info, i64mem, i128mem, i256mem, StoreNode,
9183                          MaskedStoreNode>, EVEX_CD8<16, CD8VH>;
9184}
9185
9186multiclass avx512_trunc_wb<bits<8> opc, string OpcodeStr, SDNode OpNode,
9187                           SDPatternOperator MaskNode,
9188                           X86FoldableSchedWrite sched, PatFrag StoreNode,
9189                           PatFrag MaskedStoreNode, SDNode InVecNode,
9190                           SDPatternOperator InVecMaskNode> {
9191  defm NAME: avx512_trunc<opc, OpcodeStr, InVecNode, OpNode, OpNode,
9192                          InVecMaskNode, MaskNode, MaskNode, sched,
9193                          avx512vl_i16_info, v16i8x_info, v16i8x_info,
9194                          v32i8x_info, i64mem, i128mem, i256mem, StoreNode,
9195                          MaskedStoreNode, HasBWI>, EVEX_CD8<16, CD8VH>;
9196}
9197
9198defm VPMOVQB    : avx512_trunc_qb<0x32, "vpmovqb",   trunc, select_trunc,
9199                                  WriteShuffle256, truncstorevi8,
9200                                  masked_truncstorevi8, X86vtrunc, X86vmtrunc>;
9201defm VPMOVSQB   : avx512_trunc_qb<0x22, "vpmovsqb",  X86vtruncs, select_truncs,
9202                                  WriteShuffle256, truncstore_s_vi8,
9203                                  masked_truncstore_s_vi8, X86vtruncs,
9204                                  X86vmtruncs>;
9205defm VPMOVUSQB  : avx512_trunc_qb<0x12, "vpmovusqb", X86vtruncus,
9206                                  select_truncus, WriteShuffle256,
9207                                  truncstore_us_vi8, masked_truncstore_us_vi8,
9208                                  X86vtruncus, X86vmtruncus>;
9209
9210defm VPMOVQW    : avx512_trunc_qw<0x34, "vpmovqw", trunc, select_trunc,
9211                                  WriteShuffle256, truncstorevi16,
9212                                  masked_truncstorevi16, X86vtrunc, X86vmtrunc>;
9213defm VPMOVSQW   : avx512_trunc_qw<0x24, "vpmovsqw",  X86vtruncs, select_truncs,
9214                                  WriteShuffle256, truncstore_s_vi16,
9215                                  masked_truncstore_s_vi16, X86vtruncs,
9216                                  X86vmtruncs>;
9217defm VPMOVUSQW  : avx512_trunc_qw<0x14, "vpmovusqw", X86vtruncus,
9218                                  select_truncus, WriteShuffle256,
9219                                  truncstore_us_vi16, masked_truncstore_us_vi16,
9220                                  X86vtruncus, X86vmtruncus>;
9221
9222defm VPMOVQD    : avx512_trunc_qd<0x35, "vpmovqd", trunc, select_trunc,
9223                                  WriteShuffle256, truncstorevi32,
9224                                  masked_truncstorevi32, X86vtrunc, X86vmtrunc>;
9225defm VPMOVSQD   : avx512_trunc_qd<0x25, "vpmovsqd",  X86vtruncs, select_truncs,
9226                                  WriteShuffle256, truncstore_s_vi32,
9227                                  masked_truncstore_s_vi32, X86vtruncs,
9228                                  X86vmtruncs>;
9229defm VPMOVUSQD  : avx512_trunc_qd<0x15, "vpmovusqd", X86vtruncus,
9230                                  select_truncus, WriteShuffle256,
9231                                  truncstore_us_vi32, masked_truncstore_us_vi32,
9232                                  X86vtruncus, X86vmtruncus>;
9233
9234defm VPMOVDB    : avx512_trunc_db<0x31, "vpmovdb", trunc, select_trunc,
9235                                  WriteShuffle256, truncstorevi8,
9236                                  masked_truncstorevi8, X86vtrunc, X86vmtrunc>;
9237defm VPMOVSDB   : avx512_trunc_db<0x21, "vpmovsdb", X86vtruncs, select_truncs,
9238                                  WriteShuffle256, truncstore_s_vi8,
9239                                  masked_truncstore_s_vi8, X86vtruncs,
9240                                  X86vmtruncs>;
9241defm VPMOVUSDB  : avx512_trunc_db<0x11, "vpmovusdb",  X86vtruncus,
9242                                  select_truncus, WriteShuffle256,
9243                                  truncstore_us_vi8, masked_truncstore_us_vi8,
9244                                  X86vtruncus, X86vmtruncus>;
9245
9246defm VPMOVDW    : avx512_trunc_dw<0x33, "vpmovdw", trunc, select_trunc,
9247                                  WriteShuffle256, truncstorevi16,
9248                                  masked_truncstorevi16, X86vtrunc, X86vmtrunc>;
9249defm VPMOVSDW   : avx512_trunc_dw<0x23, "vpmovsdw", X86vtruncs, select_truncs,
9250                                  WriteShuffle256, truncstore_s_vi16,
9251                                  masked_truncstore_s_vi16, X86vtruncs,
9252                                  X86vmtruncs>;
9253defm VPMOVUSDW  : avx512_trunc_dw<0x13, "vpmovusdw", X86vtruncus,
9254                                  select_truncus, WriteShuffle256,
9255                                  truncstore_us_vi16, masked_truncstore_us_vi16,
9256                                  X86vtruncus, X86vmtruncus>;
9257
9258defm VPMOVWB    : avx512_trunc_wb<0x30, "vpmovwb", trunc, select_trunc,
9259                                  WriteShuffle256, truncstorevi8,
9260                                  masked_truncstorevi8, X86vtrunc,
9261                                  X86vmtrunc>;
9262defm VPMOVSWB   : avx512_trunc_wb<0x20, "vpmovswb", X86vtruncs, select_truncs,
9263                                  WriteShuffle256, truncstore_s_vi8,
9264                                  masked_truncstore_s_vi8, X86vtruncs,
9265                                  X86vmtruncs>;
9266defm VPMOVUSWB  : avx512_trunc_wb<0x10, "vpmovuswb", X86vtruncus,
9267                                  select_truncus, WriteShuffle256,
9268                                  truncstore_us_vi8, masked_truncstore_us_vi8,
9269                                  X86vtruncus, X86vmtruncus>;
9270
9271let Predicates = [HasAVX512, NoVLX] in {
9272def: Pat<(v8i16 (trunc (v8i32 VR256X:$src))),
9273         (v8i16 (EXTRACT_SUBREG
9274                 (v16i16 (VPMOVDWZrr (v16i32 (INSERT_SUBREG (IMPLICIT_DEF),
9275                                          VR256X:$src, sub_ymm)))), sub_xmm))>;
9276def: Pat<(v4i32 (trunc (v4i64 VR256X:$src))),
9277         (v4i32 (EXTRACT_SUBREG
9278                 (v8i32 (VPMOVQDZrr (v8i64 (INSERT_SUBREG (IMPLICIT_DEF),
9279                                           VR256X:$src, sub_ymm)))), sub_xmm))>;
9280}
9281
9282let Predicates = [HasBWI, NoVLX] in {
9283def: Pat<(v16i8 (trunc (v16i16 VR256X:$src))),
9284         (v16i8 (EXTRACT_SUBREG (VPMOVWBZrr (v32i16 (INSERT_SUBREG (IMPLICIT_DEF),
9285                                            VR256X:$src, sub_ymm))), sub_xmm))>;
9286}
9287
9288// Without BWI we can't use vXi16/vXi8 vselect so we have to use vmtrunc nodes.
9289multiclass mtrunc_lowering<string InstrName, SDNode OpNode,
9290                           X86VectorVTInfo DestInfo,
9291                           X86VectorVTInfo SrcInfo> {
9292  def : Pat<(DestInfo.VT (OpNode (SrcInfo.VT SrcInfo.RC:$src),
9293                                 DestInfo.RC:$src0,
9294                                 SrcInfo.KRCWM:$mask)),
9295            (!cast<Instruction>(InstrName#"rrk") DestInfo.RC:$src0,
9296                                                 SrcInfo.KRCWM:$mask,
9297                                                 SrcInfo.RC:$src)>;
9298
9299  def : Pat<(DestInfo.VT (OpNode (SrcInfo.VT SrcInfo.RC:$src),
9300                                 DestInfo.ImmAllZerosV,
9301                                 SrcInfo.KRCWM:$mask)),
9302            (!cast<Instruction>(InstrName#"rrkz") SrcInfo.KRCWM:$mask,
9303                                                  SrcInfo.RC:$src)>;
9304}
9305
9306let Predicates = [HasVLX] in {
9307defm : mtrunc_lowering<"VPMOVDWZ256", X86vmtrunc, v8i16x_info, v8i32x_info>;
9308defm : mtrunc_lowering<"VPMOVSDWZ256", X86vmtruncs, v8i16x_info, v8i32x_info>;
9309defm : mtrunc_lowering<"VPMOVUSDWZ256", X86vmtruncus, v8i16x_info, v8i32x_info>;
9310}
9311
9312let Predicates = [HasAVX512] in {
9313defm : mtrunc_lowering<"VPMOVDWZ", X86vmtrunc, v16i16x_info, v16i32_info>;
9314defm : mtrunc_lowering<"VPMOVSDWZ", X86vmtruncs, v16i16x_info, v16i32_info>;
9315defm : mtrunc_lowering<"VPMOVUSDWZ", X86vmtruncus, v16i16x_info, v16i32_info>;
9316
9317defm : mtrunc_lowering<"VPMOVDBZ", X86vmtrunc, v16i8x_info, v16i32_info>;
9318defm : mtrunc_lowering<"VPMOVSDBZ", X86vmtruncs, v16i8x_info, v16i32_info>;
9319defm : mtrunc_lowering<"VPMOVUSDBZ", X86vmtruncus, v16i8x_info, v16i32_info>;
9320
9321defm : mtrunc_lowering<"VPMOVQWZ", X86vmtrunc, v8i16x_info, v8i64_info>;
9322defm : mtrunc_lowering<"VPMOVSQWZ", X86vmtruncs, v8i16x_info, v8i64_info>;
9323defm : mtrunc_lowering<"VPMOVUSQWZ", X86vmtruncus, v8i16x_info, v8i64_info>;
9324}
9325
9326multiclass WriteShuffle256_common<bits<8> opc, string OpcodeStr, X86FoldableSchedWrite sched,
9327              X86VectorVTInfo DestInfo, X86VectorVTInfo SrcInfo,
9328              X86MemOperand x86memop, PatFrag LdFrag, SDNode OpNode>{
9329  let ExeDomain = DestInfo.ExeDomain in {
9330  defm rr   : AVX512_maskable<opc, MRMSrcReg, DestInfo, (outs DestInfo.RC:$dst),
9331                    (ins SrcInfo.RC:$src), OpcodeStr ,"$src", "$src",
9332                    (DestInfo.VT (OpNode (SrcInfo.VT SrcInfo.RC:$src)))>,
9333                  EVEX, Sched<[sched]>;
9334
9335  defm rm : AVX512_maskable<opc, MRMSrcMem, DestInfo, (outs DestInfo.RC:$dst),
9336                  (ins x86memop:$src), OpcodeStr ,"$src", "$src",
9337                  (DestInfo.VT (LdFrag addr:$src))>,
9338                EVEX, Sched<[sched.Folded]>;
9339  }
9340}
9341
9342multiclass WriteShuffle256_BW<bits<8> opc, string OpcodeStr,
9343          SDNode OpNode, SDNode InVecNode, string ExtTy,
9344          X86FoldableSchedWrite sched, PatFrag LdFrag = !cast<PatFrag>(ExtTy#"extloadvi8")> {
9345  let Predicates = [HasVLX, HasBWI] in {
9346    defm Z128:  WriteShuffle256_common<opc, OpcodeStr, sched, v8i16x_info,
9347                    v16i8x_info, i64mem, LdFrag, InVecNode>,
9348                     EVEX_CD8<8, CD8VH>, T8PD, EVEX_V128, VEX_WIG;
9349
9350    defm Z256:  WriteShuffle256_common<opc, OpcodeStr, sched, v16i16x_info,
9351                    v16i8x_info, i128mem, LdFrag, OpNode>,
9352                     EVEX_CD8<8, CD8VH>, T8PD, EVEX_V256, VEX_WIG;
9353  }
9354  let Predicates = [HasBWI] in {
9355    defm Z   :  WriteShuffle256_common<opc, OpcodeStr, sched, v32i16_info,
9356                    v32i8x_info, i256mem, LdFrag, OpNode>,
9357                     EVEX_CD8<8, CD8VH>, T8PD, EVEX_V512, VEX_WIG;
9358  }
9359}
9360
9361multiclass WriteShuffle256_BD<bits<8> opc, string OpcodeStr,
9362          SDNode OpNode, SDNode InVecNode, string ExtTy,
9363          X86FoldableSchedWrite sched, PatFrag LdFrag = !cast<PatFrag>(ExtTy#"extloadvi8")> {
9364  let Predicates = [HasVLX, HasAVX512] in {
9365    defm Z128:  WriteShuffle256_common<opc, OpcodeStr, sched, v4i32x_info,
9366                   v16i8x_info, i32mem, LdFrag, InVecNode>,
9367                         EVEX_CD8<8, CD8VQ>, T8PD, EVEX_V128, VEX_WIG;
9368
9369    defm Z256:  WriteShuffle256_common<opc, OpcodeStr, sched, v8i32x_info,
9370                   v16i8x_info, i64mem, LdFrag, InVecNode>,
9371                         EVEX_CD8<8, CD8VQ>, T8PD, EVEX_V256, VEX_WIG;
9372  }
9373  let Predicates = [HasAVX512] in {
9374    defm Z   :  WriteShuffle256_common<opc, OpcodeStr, sched, v16i32_info,
9375                   v16i8x_info, i128mem, LdFrag, OpNode>,
9376                         EVEX_CD8<8, CD8VQ>, T8PD, EVEX_V512, VEX_WIG;
9377  }
9378}
9379
9380multiclass WriteShuffle256_BQ<bits<8> opc, string OpcodeStr,
9381          SDNode OpNode, SDNode InVecNode, string ExtTy,
9382          X86FoldableSchedWrite sched, PatFrag LdFrag = !cast<PatFrag>(ExtTy#"extloadvi8")> {
9383  let Predicates = [HasVLX, HasAVX512] in {
9384    defm Z128:  WriteShuffle256_common<opc, OpcodeStr, sched, v2i64x_info,
9385                   v16i8x_info, i16mem, LdFrag, InVecNode>,
9386                     EVEX_CD8<8, CD8VO>, T8PD, EVEX_V128, VEX_WIG;
9387
9388    defm Z256:  WriteShuffle256_common<opc, OpcodeStr, sched, v4i64x_info,
9389                   v16i8x_info, i32mem, LdFrag, InVecNode>,
9390                     EVEX_CD8<8, CD8VO>, T8PD, EVEX_V256, VEX_WIG;
9391  }
9392  let Predicates = [HasAVX512] in {
9393    defm Z   :  WriteShuffle256_common<opc, OpcodeStr, sched, v8i64_info,
9394                   v16i8x_info, i64mem, LdFrag, InVecNode>,
9395                     EVEX_CD8<8, CD8VO>, T8PD, EVEX_V512, VEX_WIG;
9396  }
9397}
9398
9399multiclass WriteShuffle256_WD<bits<8> opc, string OpcodeStr,
9400         SDNode OpNode, SDNode InVecNode, string ExtTy,
9401         X86FoldableSchedWrite sched, PatFrag LdFrag = !cast<PatFrag>(ExtTy#"extloadvi16")> {
9402  let Predicates = [HasVLX, HasAVX512] in {
9403    defm Z128:  WriteShuffle256_common<opc, OpcodeStr, sched, v4i32x_info,
9404                   v8i16x_info, i64mem, LdFrag, InVecNode>,
9405                     EVEX_CD8<16, CD8VH>, T8PD, EVEX_V128, VEX_WIG;
9406
9407    defm Z256:  WriteShuffle256_common<opc, OpcodeStr, sched, v8i32x_info,
9408                   v8i16x_info, i128mem, LdFrag, OpNode>,
9409                     EVEX_CD8<16, CD8VH>, T8PD, EVEX_V256, VEX_WIG;
9410  }
9411  let Predicates = [HasAVX512] in {
9412    defm Z   :  WriteShuffle256_common<opc, OpcodeStr, sched, v16i32_info,
9413                   v16i16x_info, i256mem, LdFrag, OpNode>,
9414                     EVEX_CD8<16, CD8VH>, T8PD, EVEX_V512, VEX_WIG;
9415  }
9416}
9417
9418multiclass WriteShuffle256_WQ<bits<8> opc, string OpcodeStr,
9419         SDNode OpNode, SDNode InVecNode, string ExtTy,
9420         X86FoldableSchedWrite sched, PatFrag LdFrag = !cast<PatFrag>(ExtTy#"extloadvi16")> {
9421  let Predicates = [HasVLX, HasAVX512] in {
9422    defm Z128:  WriteShuffle256_common<opc, OpcodeStr, sched, v2i64x_info,
9423                   v8i16x_info, i32mem, LdFrag, InVecNode>,
9424                     EVEX_CD8<16, CD8VQ>, T8PD, EVEX_V128, VEX_WIG;
9425
9426    defm Z256:  WriteShuffle256_common<opc, OpcodeStr, sched, v4i64x_info,
9427                   v8i16x_info, i64mem, LdFrag, InVecNode>,
9428                     EVEX_CD8<16, CD8VQ>, T8PD, EVEX_V256, VEX_WIG;
9429  }
9430  let Predicates = [HasAVX512] in {
9431    defm Z   :  WriteShuffle256_common<opc, OpcodeStr, sched, v8i64_info,
9432                   v8i16x_info, i128mem, LdFrag, OpNode>,
9433                     EVEX_CD8<16, CD8VQ>, T8PD, EVEX_V512, VEX_WIG;
9434  }
9435}
9436
9437multiclass WriteShuffle256_DQ<bits<8> opc, string OpcodeStr,
9438         SDNode OpNode, SDNode InVecNode, string ExtTy,
9439         X86FoldableSchedWrite sched, PatFrag LdFrag = !cast<PatFrag>(ExtTy#"extloadvi32")> {
9440
9441  let Predicates = [HasVLX, HasAVX512] in {
9442    defm Z128:  WriteShuffle256_common<opc, OpcodeStr, sched, v2i64x_info,
9443                   v4i32x_info, i64mem, LdFrag, InVecNode>,
9444                     EVEX_CD8<32, CD8VH>, T8PD, EVEX_V128;
9445
9446    defm Z256:  WriteShuffle256_common<opc, OpcodeStr, sched, v4i64x_info,
9447                   v4i32x_info, i128mem, LdFrag, OpNode>,
9448                     EVEX_CD8<32, CD8VH>, T8PD, EVEX_V256;
9449  }
9450  let Predicates = [HasAVX512] in {
9451    defm Z   :  WriteShuffle256_common<opc, OpcodeStr, sched, v8i64_info,
9452                   v8i32x_info, i256mem, LdFrag, OpNode>,
9453                     EVEX_CD8<32, CD8VH>, T8PD, EVEX_V512;
9454  }
9455}
9456
9457defm VPMOVZXBW : WriteShuffle256_BW<0x30, "vpmovzxbw", zext, zext_invec, "z", WriteShuffle256>;
9458defm VPMOVZXBD : WriteShuffle256_BD<0x31, "vpmovzxbd", zext, zext_invec, "z", WriteShuffle256>;
9459defm VPMOVZXBQ : WriteShuffle256_BQ<0x32, "vpmovzxbq", zext, zext_invec, "z", WriteShuffle256>;
9460defm VPMOVZXWD : WriteShuffle256_WD<0x33, "vpmovzxwd", zext, zext_invec, "z", WriteShuffle256>;
9461defm VPMOVZXWQ : WriteShuffle256_WQ<0x34, "vpmovzxwq", zext, zext_invec, "z", WriteShuffle256>;
9462defm VPMOVZXDQ : WriteShuffle256_DQ<0x35, "vpmovzxdq", zext, zext_invec, "z", WriteShuffle256>;
9463
9464defm VPMOVSXBW: WriteShuffle256_BW<0x20, "vpmovsxbw", sext, sext_invec, "s", WriteShuffle256>;
9465defm VPMOVSXBD: WriteShuffle256_BD<0x21, "vpmovsxbd", sext, sext_invec, "s", WriteShuffle256>;
9466defm VPMOVSXBQ: WriteShuffle256_BQ<0x22, "vpmovsxbq", sext, sext_invec, "s", WriteShuffle256>;
9467defm VPMOVSXWD: WriteShuffle256_WD<0x23, "vpmovsxwd", sext, sext_invec, "s", WriteShuffle256>;
9468defm VPMOVSXWQ: WriteShuffle256_WQ<0x24, "vpmovsxwq", sext, sext_invec, "s", WriteShuffle256>;
9469defm VPMOVSXDQ: WriteShuffle256_DQ<0x25, "vpmovsxdq", sext, sext_invec, "s", WriteShuffle256>;
9470
9471
9472// Patterns that we also need any extend versions of. aext_vector_inreg
9473// is currently legalized to zext_vector_inreg.
9474multiclass AVX512_pmovx_patterns_base<string OpcPrefix, SDNode ExtOp> {
9475  // 256-bit patterns
9476  let Predicates = [HasVLX, HasBWI] in {
9477    def : Pat<(v16i16 (ExtOp (loadv16i8 addr:$src))),
9478              (!cast<I>(OpcPrefix#BWZ256rm) addr:$src)>;
9479  }
9480
9481  let Predicates = [HasVLX] in {
9482    def : Pat<(v8i32 (ExtOp (loadv8i16 addr:$src))),
9483              (!cast<I>(OpcPrefix#WDZ256rm) addr:$src)>;
9484
9485    def : Pat<(v4i64 (ExtOp (loadv4i32 addr:$src))),
9486              (!cast<I>(OpcPrefix#DQZ256rm) addr:$src)>;
9487  }
9488
9489  // 512-bit patterns
9490  let Predicates = [HasBWI] in {
9491    def : Pat<(v32i16 (ExtOp (loadv32i8 addr:$src))),
9492              (!cast<I>(OpcPrefix#BWZrm) addr:$src)>;
9493  }
9494  let Predicates = [HasAVX512] in {
9495    def : Pat<(v16i32 (ExtOp (loadv16i8 addr:$src))),
9496              (!cast<I>(OpcPrefix#BDZrm) addr:$src)>;
9497    def : Pat<(v16i32 (ExtOp (loadv16i16 addr:$src))),
9498              (!cast<I>(OpcPrefix#WDZrm) addr:$src)>;
9499
9500    def : Pat<(v8i64 (ExtOp (loadv8i16 addr:$src))),
9501              (!cast<I>(OpcPrefix#WQZrm) addr:$src)>;
9502
9503    def : Pat<(v8i64 (ExtOp (loadv8i32 addr:$src))),
9504              (!cast<I>(OpcPrefix#DQZrm) addr:$src)>;
9505  }
9506}
9507
9508multiclass AVX512_pmovx_patterns<string OpcPrefix, SDNode ExtOp,
9509                                 SDNode InVecOp> :
9510    AVX512_pmovx_patterns_base<OpcPrefix, ExtOp> {
9511  // 128-bit patterns
9512  let Predicates = [HasVLX, HasBWI] in {
9513  def : Pat<(v8i16 (InVecOp (bc_v16i8 (v2i64 (scalar_to_vector (loadi64 addr:$src)))))),
9514            (!cast<I>(OpcPrefix#BWZ128rm) addr:$src)>;
9515  def : Pat<(v8i16 (InVecOp (bc_v16i8 (v2f64 (scalar_to_vector (loadf64 addr:$src)))))),
9516            (!cast<I>(OpcPrefix#BWZ128rm) addr:$src)>;
9517  def : Pat<(v8i16 (InVecOp (bc_v16i8 (v2i64 (X86vzload64 addr:$src))))),
9518            (!cast<I>(OpcPrefix#BWZ128rm) addr:$src)>;
9519  }
9520  let Predicates = [HasVLX] in {
9521  def : Pat<(v4i32 (InVecOp (bc_v16i8 (v4i32 (scalar_to_vector (loadi32 addr:$src)))))),
9522            (!cast<I>(OpcPrefix#BDZ128rm) addr:$src)>;
9523  def : Pat<(v4i32 (InVecOp (bc_v16i8 (v4i32 (X86vzload32 addr:$src))))),
9524            (!cast<I>(OpcPrefix#BDZ128rm) addr:$src)>;
9525
9526  def : Pat<(v2i64 (InVecOp (bc_v16i8 (v4i32 (scalar_to_vector (extloadi32i16 addr:$src)))))),
9527            (!cast<I>(OpcPrefix#BQZ128rm) addr:$src)>;
9528
9529  def : Pat<(v4i32 (InVecOp (bc_v8i16 (v2i64 (scalar_to_vector (loadi64 addr:$src)))))),
9530            (!cast<I>(OpcPrefix#WDZ128rm) addr:$src)>;
9531  def : Pat<(v4i32 (InVecOp (bc_v8i16 (v2f64 (scalar_to_vector (loadf64 addr:$src)))))),
9532            (!cast<I>(OpcPrefix#WDZ128rm) addr:$src)>;
9533  def : Pat<(v4i32 (InVecOp (bc_v8i16 (v2i64 (X86vzload64 addr:$src))))),
9534            (!cast<I>(OpcPrefix#WDZ128rm) addr:$src)>;
9535
9536  def : Pat<(v2i64 (InVecOp (bc_v8i16 (v4i32 (scalar_to_vector (loadi32 addr:$src)))))),
9537            (!cast<I>(OpcPrefix#WQZ128rm) addr:$src)>;
9538  def : Pat<(v2i64 (InVecOp (bc_v8i16 (v4i32 (X86vzload32 addr:$src))))),
9539            (!cast<I>(OpcPrefix#WQZ128rm) addr:$src)>;
9540
9541  def : Pat<(v2i64 (InVecOp (bc_v4i32 (v2i64 (scalar_to_vector (loadi64 addr:$src)))))),
9542            (!cast<I>(OpcPrefix#DQZ128rm) addr:$src)>;
9543  def : Pat<(v2i64 (InVecOp (bc_v4i32 (v2f64 (scalar_to_vector (loadf64 addr:$src)))))),
9544            (!cast<I>(OpcPrefix#DQZ128rm) addr:$src)>;
9545  def : Pat<(v2i64 (InVecOp (bc_v4i32 (v2i64 (X86vzload64 addr:$src))))),
9546            (!cast<I>(OpcPrefix#DQZ128rm) addr:$src)>;
9547  }
9548  let Predicates = [HasVLX] in {
9549  def : Pat<(v8i32 (InVecOp (bc_v16i8 (v2i64 (scalar_to_vector (loadi64 addr:$src)))))),
9550            (!cast<I>(OpcPrefix#BDZ256rm) addr:$src)>;
9551  def : Pat<(v8i32 (InVecOp (bc_v16i8 (v2i64 (X86vzload64 addr:$src))))),
9552            (!cast<I>(OpcPrefix#BDZ256rm) addr:$src)>;
9553
9554  def : Pat<(v4i64 (InVecOp (bc_v16i8 (v4i32 (scalar_to_vector (loadi32 addr:$src)))))),
9555            (!cast<I>(OpcPrefix#BQZ256rm) addr:$src)>;
9556  def : Pat<(v4i64 (InVecOp (bc_v16i8 (v4i32 (X86vzload32 addr:$src))))),
9557            (!cast<I>(OpcPrefix#BQZ256rm) addr:$src)>;
9558
9559  def : Pat<(v4i64 (InVecOp (bc_v8i16 (v2i64 (scalar_to_vector (loadi64 addr:$src)))))),
9560            (!cast<I>(OpcPrefix#WQZ256rm) addr:$src)>;
9561  def : Pat<(v4i64 (InVecOp (bc_v8i16 (v2i64 (X86vzload64 addr:$src))))),
9562            (!cast<I>(OpcPrefix#WQZ256rm) addr:$src)>;
9563  }
9564  // 512-bit patterns
9565  let Predicates = [HasAVX512] in {
9566  def : Pat<(v8i64 (InVecOp (bc_v16i8 (v2i64 (scalar_to_vector (loadi64 addr:$src)))))),
9567            (!cast<I>(OpcPrefix#BQZrm) addr:$src)>;
9568  }
9569}
9570
9571defm : AVX512_pmovx_patterns<"VPMOVSX", sext, sext_invec>;
9572defm : AVX512_pmovx_patterns<"VPMOVZX", zext, zext_invec>;
9573
9574// Without BWI we can't do a trunc from v16i16 to v16i8. DAG combine can merge
9575// ext+trunc aggressively making it impossible to legalize the DAG to this
9576// pattern directly.
9577let Predicates = [HasAVX512, NoBWI] in {
9578def: Pat<(v16i8 (trunc (v16i16 VR256X:$src))),
9579         (VPMOVDBZrr (v16i32 (VPMOVZXWDZrr VR256X:$src)))>;
9580def: Pat<(v16i8 (trunc (loadv16i16 addr:$src))),
9581         (VPMOVDBZrr (v16i32 (VPMOVZXWDZrm addr:$src)))>;
9582}
9583
9584//===----------------------------------------------------------------------===//
9585// GATHER - SCATTER Operations
9586
9587// FIXME: Improve scheduling of gather/scatter instructions.
9588multiclass avx512_gather<bits<8> opc, string OpcodeStr, X86VectorVTInfo _,
9589                         X86MemOperand memop, PatFrag GatherNode,
9590                         RegisterClass MaskRC = _.KRCWM> {
9591  let Constraints = "@earlyclobber $dst, $src1 = $dst, $mask = $mask_wb",
9592      ExeDomain = _.ExeDomain in
9593  def rm  : AVX5128I<opc, MRMSrcMem, (outs _.RC:$dst, MaskRC:$mask_wb),
9594            (ins _.RC:$src1, MaskRC:$mask, memop:$src2),
9595            !strconcat(OpcodeStr#_.Suffix,
9596            "\t{$src2, ${dst} {${mask}}|${dst} {${mask}}, $src2}"),
9597            [(set _.RC:$dst, MaskRC:$mask_wb,
9598              (GatherNode  (_.VT _.RC:$src1), MaskRC:$mask,
9599                     vectoraddr:$src2))]>, EVEX, EVEX_K,
9600             EVEX_CD8<_.EltSize, CD8VT1>, Sched<[WriteLoad]>;
9601}
9602
9603multiclass avx512_gather_q_pd<bits<8> dopc, bits<8> qopc,
9604                        AVX512VLVectorVTInfo _, string OpcodeStr, string SUFF> {
9605  defm NAME##D##SUFF##Z: avx512_gather<dopc, OpcodeStr##"d", _.info512,
9606                                      vy512xmem, mgatherv8i32>, EVEX_V512, VEX_W;
9607  defm NAME##Q##SUFF##Z: avx512_gather<qopc, OpcodeStr##"q", _.info512,
9608                                      vz512mem,  mgatherv8i64>, EVEX_V512, VEX_W;
9609let Predicates = [HasVLX] in {
9610  defm NAME##D##SUFF##Z256: avx512_gather<dopc, OpcodeStr##"d", _.info256,
9611                              vx256xmem, mgatherv4i32>, EVEX_V256, VEX_W;
9612  defm NAME##Q##SUFF##Z256: avx512_gather<qopc, OpcodeStr##"q", _.info256,
9613                              vy256xmem, mgatherv4i64>, EVEX_V256, VEX_W;
9614  defm NAME##D##SUFF##Z128: avx512_gather<dopc, OpcodeStr##"d", _.info128,
9615                              vx128xmem, mgatherv4i32>, EVEX_V128, VEX_W;
9616  defm NAME##Q##SUFF##Z128: avx512_gather<qopc, OpcodeStr##"q", _.info128,
9617                              vx128xmem, mgatherv2i64>, EVEX_V128, VEX_W;
9618}
9619}
9620
9621multiclass avx512_gather_d_ps<bits<8> dopc, bits<8> qopc,
9622                       AVX512VLVectorVTInfo _, string OpcodeStr, string SUFF> {
9623  defm NAME##D##SUFF##Z: avx512_gather<dopc, OpcodeStr##"d", _.info512, vz512mem,
9624                                       mgatherv16i32>, EVEX_V512;
9625  defm NAME##Q##SUFF##Z: avx512_gather<qopc, OpcodeStr##"q", _.info256, vz256mem,
9626                                       mgatherv8i64>, EVEX_V512;
9627let Predicates = [HasVLX] in {
9628  defm NAME##D##SUFF##Z256: avx512_gather<dopc, OpcodeStr##"d", _.info256,
9629                                          vy256xmem, mgatherv8i32>, EVEX_V256;
9630  defm NAME##Q##SUFF##Z256: avx512_gather<qopc, OpcodeStr##"q", _.info128,
9631                                          vy128xmem, mgatherv4i64>, EVEX_V256;
9632  defm NAME##D##SUFF##Z128: avx512_gather<dopc, OpcodeStr##"d", _.info128,
9633                                          vx128xmem, mgatherv4i32>, EVEX_V128;
9634  defm NAME##Q##SUFF##Z128: avx512_gather<qopc, OpcodeStr##"q", _.info128,
9635                                          vx64xmem, mgatherv2i64, VK2WM>,
9636                                          EVEX_V128;
9637}
9638}
9639
9640
9641defm VGATHER : avx512_gather_q_pd<0x92, 0x93, avx512vl_f64_info, "vgather", "PD">,
9642               avx512_gather_d_ps<0x92, 0x93, avx512vl_f32_info, "vgather", "PS">;
9643
9644defm VPGATHER : avx512_gather_q_pd<0x90, 0x91, avx512vl_i64_info, "vpgather", "Q">,
9645                avx512_gather_d_ps<0x90, 0x91, avx512vl_i32_info, "vpgather", "D">;
9646
9647multiclass avx512_scatter<bits<8> opc, string OpcodeStr, X86VectorVTInfo _,
9648                          X86MemOperand memop, PatFrag ScatterNode,
9649                          RegisterClass MaskRC = _.KRCWM> {
9650
9651let mayStore = 1, Constraints = "$mask = $mask_wb", ExeDomain = _.ExeDomain in
9652
9653  def mr  : AVX5128I<opc, MRMDestMem, (outs MaskRC:$mask_wb),
9654            (ins memop:$dst, MaskRC:$mask, _.RC:$src),
9655            !strconcat(OpcodeStr#_.Suffix,
9656            "\t{$src, ${dst} {${mask}}|${dst} {${mask}}, $src}"),
9657            [(set MaskRC:$mask_wb, (ScatterNode (_.VT _.RC:$src),
9658                                    MaskRC:$mask,  vectoraddr:$dst))]>,
9659            EVEX, EVEX_K, EVEX_CD8<_.EltSize, CD8VT1>,
9660            Sched<[WriteStore]>;
9661}
9662
9663multiclass avx512_scatter_q_pd<bits<8> dopc, bits<8> qopc,
9664                        AVX512VLVectorVTInfo _, string OpcodeStr, string SUFF> {
9665  defm NAME##D##SUFF##Z: avx512_scatter<dopc, OpcodeStr##"d", _.info512,
9666                                      vy512xmem, mscatterv8i32>, EVEX_V512, VEX_W;
9667  defm NAME##Q##SUFF##Z: avx512_scatter<qopc, OpcodeStr##"q", _.info512,
9668                                      vz512mem,  mscatterv8i64>, EVEX_V512, VEX_W;
9669let Predicates = [HasVLX] in {
9670  defm NAME##D##SUFF##Z256: avx512_scatter<dopc, OpcodeStr##"d", _.info256,
9671                              vx256xmem, mscatterv4i32>, EVEX_V256, VEX_W;
9672  defm NAME##Q##SUFF##Z256: avx512_scatter<qopc, OpcodeStr##"q", _.info256,
9673                              vy256xmem, mscatterv4i64>, EVEX_V256, VEX_W;
9674  defm NAME##D##SUFF##Z128: avx512_scatter<dopc, OpcodeStr##"d", _.info128,
9675                              vx128xmem, mscatterv4i32>, EVEX_V128, VEX_W;
9676  defm NAME##Q##SUFF##Z128: avx512_scatter<qopc, OpcodeStr##"q", _.info128,
9677                              vx128xmem, mscatterv2i64>, EVEX_V128, VEX_W;
9678}
9679}
9680
9681multiclass avx512_scatter_d_ps<bits<8> dopc, bits<8> qopc,
9682                       AVX512VLVectorVTInfo _, string OpcodeStr, string SUFF> {
9683  defm NAME##D##SUFF##Z: avx512_scatter<dopc, OpcodeStr##"d", _.info512, vz512mem,
9684                                       mscatterv16i32>, EVEX_V512;
9685  defm NAME##Q##SUFF##Z: avx512_scatter<qopc, OpcodeStr##"q", _.info256, vz256mem,
9686                                       mscatterv8i64>, EVEX_V512;
9687let Predicates = [HasVLX] in {
9688  defm NAME##D##SUFF##Z256: avx512_scatter<dopc, OpcodeStr##"d", _.info256,
9689                                          vy256xmem, mscatterv8i32>, EVEX_V256;
9690  defm NAME##Q##SUFF##Z256: avx512_scatter<qopc, OpcodeStr##"q", _.info128,
9691                                          vy128xmem, mscatterv4i64>, EVEX_V256;
9692  defm NAME##D##SUFF##Z128: avx512_scatter<dopc, OpcodeStr##"d", _.info128,
9693                                          vx128xmem, mscatterv4i32>, EVEX_V128;
9694  defm NAME##Q##SUFF##Z128: avx512_scatter<qopc, OpcodeStr##"q", _.info128,
9695                                          vx64xmem, mscatterv2i64, VK2WM>,
9696                                          EVEX_V128;
9697}
9698}
9699
9700defm VSCATTER : avx512_scatter_q_pd<0xA2, 0xA3, avx512vl_f64_info, "vscatter", "PD">,
9701               avx512_scatter_d_ps<0xA2, 0xA3, avx512vl_f32_info, "vscatter", "PS">;
9702
9703defm VPSCATTER : avx512_scatter_q_pd<0xA0, 0xA1, avx512vl_i64_info, "vpscatter", "Q">,
9704                avx512_scatter_d_ps<0xA0, 0xA1, avx512vl_i32_info, "vpscatter", "D">;
9705
9706// prefetch
9707multiclass avx512_gather_scatter_prefetch<bits<8> opc, Format F, string OpcodeStr,
9708                       RegisterClass KRC, X86MemOperand memop> {
9709  let Predicates = [HasPFI], mayLoad = 1, mayStore = 1 in
9710  def m  : AVX5128I<opc, F, (outs), (ins KRC:$mask, memop:$src),
9711            !strconcat(OpcodeStr, "\t{$src {${mask}}|{${mask}}, $src}"), []>,
9712            EVEX, EVEX_K, Sched<[WriteLoad]>;
9713}
9714
9715defm VGATHERPF0DPS: avx512_gather_scatter_prefetch<0xC6, MRM1m, "vgatherpf0dps",
9716                     VK16WM, vz512mem>, EVEX_V512, EVEX_CD8<32, CD8VT1>;
9717
9718defm VGATHERPF0QPS: avx512_gather_scatter_prefetch<0xC7, MRM1m, "vgatherpf0qps",
9719                     VK8WM, vz256mem>, EVEX_V512, EVEX_CD8<64, CD8VT1>;
9720
9721defm VGATHERPF0DPD: avx512_gather_scatter_prefetch<0xC6, MRM1m, "vgatherpf0dpd",
9722                     VK8WM, vy512xmem>, EVEX_V512, VEX_W, EVEX_CD8<32, CD8VT1>;
9723
9724defm VGATHERPF0QPD: avx512_gather_scatter_prefetch<0xC7, MRM1m, "vgatherpf0qpd",
9725                     VK8WM, vz512mem>, EVEX_V512, VEX_W, EVEX_CD8<64, CD8VT1>;
9726
9727defm VGATHERPF1DPS: avx512_gather_scatter_prefetch<0xC6, MRM2m, "vgatherpf1dps",
9728                     VK16WM, vz512mem>, EVEX_V512, EVEX_CD8<32, CD8VT1>;
9729
9730defm VGATHERPF1QPS: avx512_gather_scatter_prefetch<0xC7, MRM2m, "vgatherpf1qps",
9731                     VK8WM, vz256mem>, EVEX_V512, EVEX_CD8<64, CD8VT1>;
9732
9733defm VGATHERPF1DPD: avx512_gather_scatter_prefetch<0xC6, MRM2m, "vgatherpf1dpd",
9734                     VK8WM, vy512xmem>, EVEX_V512, VEX_W, EVEX_CD8<32, CD8VT1>;
9735
9736defm VGATHERPF1QPD: avx512_gather_scatter_prefetch<0xC7, MRM2m, "vgatherpf1qpd",
9737                     VK8WM, vz512mem>, EVEX_V512, VEX_W, EVEX_CD8<64, CD8VT1>;
9738
9739defm VSCATTERPF0DPS: avx512_gather_scatter_prefetch<0xC6, MRM5m, "vscatterpf0dps",
9740                     VK16WM, vz512mem>, EVEX_V512, EVEX_CD8<32, CD8VT1>;
9741
9742defm VSCATTERPF0QPS: avx512_gather_scatter_prefetch<0xC7, MRM5m, "vscatterpf0qps",
9743                     VK8WM, vz256mem>, EVEX_V512, EVEX_CD8<64, CD8VT1>;
9744
9745defm VSCATTERPF0DPD: avx512_gather_scatter_prefetch<0xC6, MRM5m, "vscatterpf0dpd",
9746                     VK8WM, vy512xmem>, EVEX_V512, VEX_W, EVEX_CD8<32, CD8VT1>;
9747
9748defm VSCATTERPF0QPD: avx512_gather_scatter_prefetch<0xC7, MRM5m, "vscatterpf0qpd",
9749                     VK8WM, vz512mem>, EVEX_V512, VEX_W, EVEX_CD8<64, CD8VT1>;
9750
9751defm VSCATTERPF1DPS: avx512_gather_scatter_prefetch<0xC6, MRM6m, "vscatterpf1dps",
9752                     VK16WM, vz512mem>, EVEX_V512, EVEX_CD8<32, CD8VT1>;
9753
9754defm VSCATTERPF1QPS: avx512_gather_scatter_prefetch<0xC7, MRM6m, "vscatterpf1qps",
9755                     VK8WM, vz256mem>, EVEX_V512, EVEX_CD8<64, CD8VT1>;
9756
9757defm VSCATTERPF1DPD: avx512_gather_scatter_prefetch<0xC6, MRM6m, "vscatterpf1dpd",
9758                     VK8WM, vy512xmem>, EVEX_V512, VEX_W, EVEX_CD8<32, CD8VT1>;
9759
9760defm VSCATTERPF1QPD: avx512_gather_scatter_prefetch<0xC7, MRM6m, "vscatterpf1qpd",
9761                     VK8WM, vz512mem>, EVEX_V512, VEX_W, EVEX_CD8<64, CD8VT1>;
9762
9763multiclass cvt_by_vec_width<bits<8> opc, X86VectorVTInfo Vec, string OpcodeStr > {
9764def rr : AVX512XS8I<opc, MRMSrcReg, (outs Vec.RC:$dst), (ins Vec.KRC:$src),
9765                  !strconcat(OpcodeStr##Vec.Suffix, "\t{$src, $dst|$dst, $src}"),
9766                  [(set Vec.RC:$dst, (Vec.VT (sext Vec.KRC:$src)))]>,
9767                  EVEX, Sched<[WriteMove]>; // TODO - WriteVecTrunc?
9768
9769// Also need a pattern for anyextend.
9770def : Pat<(Vec.VT (anyext Vec.KRC:$src)),
9771          (!cast<Instruction>(NAME#"rr") Vec.KRC:$src)>;
9772}
9773
9774multiclass cvt_mask_by_elt_width<bits<8> opc, AVX512VLVectorVTInfo VTInfo,
9775                                 string OpcodeStr, Predicate prd> {
9776let Predicates = [prd] in
9777  defm Z : cvt_by_vec_width<opc, VTInfo.info512, OpcodeStr>, EVEX_V512;
9778
9779  let Predicates = [prd, HasVLX] in {
9780    defm Z256 : cvt_by_vec_width<opc, VTInfo.info256, OpcodeStr>, EVEX_V256;
9781    defm Z128 : cvt_by_vec_width<opc, VTInfo.info128, OpcodeStr>, EVEX_V128;
9782  }
9783}
9784
9785defm VPMOVM2B : cvt_mask_by_elt_width<0x28, avx512vl_i8_info, "vpmovm2" , HasBWI>;
9786defm VPMOVM2W : cvt_mask_by_elt_width<0x28, avx512vl_i16_info, "vpmovm2", HasBWI> , VEX_W;
9787defm VPMOVM2D : cvt_mask_by_elt_width<0x38, avx512vl_i32_info, "vpmovm2", HasDQI>;
9788defm VPMOVM2Q : cvt_mask_by_elt_width<0x38, avx512vl_i64_info, "vpmovm2", HasDQI> , VEX_W;
9789
9790multiclass convert_vector_to_mask_common<bits<8> opc, X86VectorVTInfo _, string OpcodeStr > {
9791    def rr : AVX512XS8I<opc, MRMSrcReg, (outs _.KRC:$dst), (ins _.RC:$src),
9792                        !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
9793                        [(set _.KRC:$dst, (X86pcmpgtm _.ImmAllZerosV, (_.VT _.RC:$src)))]>,
9794                        EVEX, Sched<[WriteMove]>;
9795}
9796
9797// Use 512bit version to implement 128/256 bit in case NoVLX.
9798multiclass convert_vector_to_mask_lowering<X86VectorVTInfo ExtendInfo,
9799                                           X86VectorVTInfo _,
9800                                           string Name> {
9801
9802  def : Pat<(_.KVT (X86pcmpgtm _.ImmAllZerosV, (_.VT _.RC:$src))),
9803            (_.KVT (COPY_TO_REGCLASS
9804                     (!cast<Instruction>(Name#"Zrr")
9805                       (INSERT_SUBREG (ExtendInfo.VT (IMPLICIT_DEF)),
9806                                      _.RC:$src, _.SubRegIdx)),
9807                   _.KRC))>;
9808}
9809
9810multiclass avx512_convert_vector_to_mask<bits<8> opc, string OpcodeStr,
9811                                   AVX512VLVectorVTInfo VTInfo, Predicate prd> {
9812  let Predicates = [prd] in
9813    defm Z : convert_vector_to_mask_common <opc, VTInfo.info512, OpcodeStr>,
9814                                            EVEX_V512;
9815
9816  let Predicates = [prd, HasVLX] in {
9817    defm Z256 : convert_vector_to_mask_common<opc, VTInfo.info256, OpcodeStr>,
9818                                              EVEX_V256;
9819    defm Z128 : convert_vector_to_mask_common<opc, VTInfo.info128, OpcodeStr>,
9820                                               EVEX_V128;
9821  }
9822  let Predicates = [prd, NoVLX] in {
9823    defm Z256_Alt : convert_vector_to_mask_lowering<VTInfo.info512, VTInfo.info256, NAME>;
9824    defm Z128_Alt : convert_vector_to_mask_lowering<VTInfo.info512, VTInfo.info128, NAME>;
9825  }
9826}
9827
9828defm VPMOVB2M : avx512_convert_vector_to_mask<0x29, "vpmovb2m",
9829                                              avx512vl_i8_info, HasBWI>;
9830defm VPMOVW2M : avx512_convert_vector_to_mask<0x29, "vpmovw2m",
9831                                              avx512vl_i16_info, HasBWI>, VEX_W;
9832defm VPMOVD2M : avx512_convert_vector_to_mask<0x39, "vpmovd2m",
9833                                              avx512vl_i32_info, HasDQI>;
9834defm VPMOVQ2M : avx512_convert_vector_to_mask<0x39, "vpmovq2m",
9835                                              avx512vl_i64_info, HasDQI>, VEX_W;
9836
9837// Patterns for handling sext from a mask register to v16i8/v16i16 when DQI
9838// is available, but BWI is not. We can't handle this in lowering because
9839// a target independent DAG combine likes to combine sext and trunc.
9840let Predicates = [HasDQI, NoBWI] in {
9841  def : Pat<(v16i8 (sext (v16i1 VK16:$src))),
9842            (VPMOVDBZrr (v16i32 (VPMOVM2DZrr VK16:$src)))>;
9843  def : Pat<(v16i16 (sext (v16i1 VK16:$src))),
9844            (VPMOVDWZrr (v16i32 (VPMOVM2DZrr VK16:$src)))>;
9845
9846  def : Pat<(v16i8 (anyext (v16i1 VK16:$src))),
9847            (VPMOVDBZrr (v16i32 (VPMOVM2DZrr VK16:$src)))>;
9848  def : Pat<(v16i16 (anyext (v16i1 VK16:$src))),
9849            (VPMOVDWZrr (v16i32 (VPMOVM2DZrr VK16:$src)))>;
9850}
9851
9852let Predicates = [HasDQI, NoBWI, HasVLX] in {
9853  def : Pat<(v8i16 (sext (v8i1 VK8:$src))),
9854            (VPMOVDWZ256rr (v8i32 (VPMOVM2DZ256rr VK8:$src)))>;
9855
9856  def : Pat<(v8i16 (anyext (v8i1 VK8:$src))),
9857            (VPMOVDWZ256rr (v8i32 (VPMOVM2DZ256rr VK8:$src)))>;
9858}
9859
9860//===----------------------------------------------------------------------===//
9861// AVX-512 - COMPRESS and EXPAND
9862//
9863
9864multiclass compress_by_vec_width_common<bits<8> opc, X86VectorVTInfo _,
9865                                 string OpcodeStr, X86FoldableSchedWrite sched> {
9866  defm rr : AVX512_maskable<opc, MRMDestReg, _, (outs _.RC:$dst),
9867              (ins _.RC:$src1), OpcodeStr, "$src1", "$src1",
9868              (null_frag)>, AVX5128IBase,
9869              Sched<[sched]>;
9870
9871  let mayStore = 1, hasSideEffects = 0 in
9872  def mr : AVX5128I<opc, MRMDestMem, (outs),
9873              (ins _.MemOp:$dst, _.RC:$src),
9874              OpcodeStr # "\t{$src, $dst|$dst, $src}",
9875              []>, EVEX_CD8<_.EltSize, CD8VT1>,
9876              Sched<[sched.Folded]>;
9877
9878  def mrk : AVX5128I<opc, MRMDestMem, (outs),
9879              (ins _.MemOp:$dst, _.KRCWM:$mask, _.RC:$src),
9880              OpcodeStr # "\t{$src, $dst {${mask}}|$dst {${mask}}, $src}",
9881              []>,
9882              EVEX_K, EVEX_CD8<_.EltSize, CD8VT1>,
9883              Sched<[sched.Folded]>;
9884}
9885
9886multiclass compress_by_vec_width_lowering<X86VectorVTInfo _, string Name> {
9887  def : Pat<(X86mCompressingStore (_.VT _.RC:$src), addr:$dst, _.KRCWM:$mask),
9888            (!cast<Instruction>(Name#_.ZSuffix##mrk)
9889                            addr:$dst, _.KRCWM:$mask, _.RC:$src)>;
9890
9891  def : Pat<(X86compress (_.VT _.RC:$src), _.RC:$src0, _.KRCWM:$mask),
9892            (!cast<Instruction>(Name#_.ZSuffix##rrk)
9893                            _.RC:$src0, _.KRCWM:$mask, _.RC:$src)>;
9894  def : Pat<(X86compress (_.VT _.RC:$src), _.ImmAllZerosV, _.KRCWM:$mask),
9895            (!cast<Instruction>(Name#_.ZSuffix##rrkz)
9896                            _.KRCWM:$mask, _.RC:$src)>;
9897}
9898
9899multiclass compress_by_elt_width<bits<8> opc, string OpcodeStr,
9900                                 X86FoldableSchedWrite sched,
9901                                 AVX512VLVectorVTInfo VTInfo,
9902                                 Predicate Pred = HasAVX512> {
9903  let Predicates = [Pred] in
9904  defm Z : compress_by_vec_width_common<opc, VTInfo.info512, OpcodeStr, sched>,
9905           compress_by_vec_width_lowering<VTInfo.info512, NAME>, EVEX_V512;
9906
9907  let Predicates = [Pred, HasVLX] in {
9908    defm Z256 : compress_by_vec_width_common<opc, VTInfo.info256, OpcodeStr, sched>,
9909                compress_by_vec_width_lowering<VTInfo.info256, NAME>, EVEX_V256;
9910    defm Z128 : compress_by_vec_width_common<opc, VTInfo.info128, OpcodeStr, sched>,
9911                compress_by_vec_width_lowering<VTInfo.info128, NAME>, EVEX_V128;
9912  }
9913}
9914
9915// FIXME: Is there a better scheduler class for VPCOMPRESS?
9916defm VPCOMPRESSD : compress_by_elt_width <0x8B, "vpcompressd", WriteVarShuffle256,
9917                                          avx512vl_i32_info>, EVEX, NotMemoryFoldable;
9918defm VPCOMPRESSQ : compress_by_elt_width <0x8B, "vpcompressq", WriteVarShuffle256,
9919                                          avx512vl_i64_info>, EVEX, VEX_W, NotMemoryFoldable;
9920defm VCOMPRESSPS : compress_by_elt_width <0x8A, "vcompressps", WriteVarShuffle256,
9921                                          avx512vl_f32_info>, EVEX, NotMemoryFoldable;
9922defm VCOMPRESSPD : compress_by_elt_width <0x8A, "vcompresspd", WriteVarShuffle256,
9923                                          avx512vl_f64_info>, EVEX, VEX_W, NotMemoryFoldable;
9924
9925// expand
9926multiclass expand_by_vec_width<bits<8> opc, X86VectorVTInfo _,
9927                                 string OpcodeStr, X86FoldableSchedWrite sched> {
9928  defm rr : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
9929              (ins _.RC:$src1), OpcodeStr, "$src1", "$src1",
9930              (null_frag)>, AVX5128IBase,
9931              Sched<[sched]>;
9932
9933  defm rm : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
9934              (ins _.MemOp:$src1), OpcodeStr, "$src1", "$src1",
9935              (null_frag)>,
9936            AVX5128IBase, EVEX_CD8<_.EltSize, CD8VT1>,
9937            Sched<[sched.Folded, sched.ReadAfterFold]>;
9938}
9939
9940multiclass expand_by_vec_width_lowering<X86VectorVTInfo _, string Name> {
9941
9942  def : Pat<(_.VT (X86mExpandingLoad addr:$src, _.KRCWM:$mask, undef)),
9943            (!cast<Instruction>(Name#_.ZSuffix##rmkz)
9944                                        _.KRCWM:$mask, addr:$src)>;
9945
9946  def : Pat<(_.VT (X86mExpandingLoad addr:$src, _.KRCWM:$mask, _.ImmAllZerosV)),
9947            (!cast<Instruction>(Name#_.ZSuffix##rmkz)
9948                                        _.KRCWM:$mask, addr:$src)>;
9949
9950  def : Pat<(_.VT (X86mExpandingLoad addr:$src, _.KRCWM:$mask,
9951                                               (_.VT _.RC:$src0))),
9952            (!cast<Instruction>(Name#_.ZSuffix##rmk)
9953                            _.RC:$src0, _.KRCWM:$mask, addr:$src)>;
9954
9955  def : Pat<(X86expand (_.VT _.RC:$src), _.RC:$src0, _.KRCWM:$mask),
9956            (!cast<Instruction>(Name#_.ZSuffix##rrk)
9957                            _.RC:$src0, _.KRCWM:$mask, _.RC:$src)>;
9958  def : Pat<(X86expand (_.VT _.RC:$src), _.ImmAllZerosV, _.KRCWM:$mask),
9959            (!cast<Instruction>(Name#_.ZSuffix##rrkz)
9960                            _.KRCWM:$mask, _.RC:$src)>;
9961}
9962
9963multiclass expand_by_elt_width<bits<8> opc, string OpcodeStr,
9964                               X86FoldableSchedWrite sched,
9965                               AVX512VLVectorVTInfo VTInfo,
9966                               Predicate Pred = HasAVX512> {
9967  let Predicates = [Pred] in
9968  defm Z : expand_by_vec_width<opc, VTInfo.info512, OpcodeStr, sched>,
9969           expand_by_vec_width_lowering<VTInfo.info512, NAME>, EVEX_V512;
9970
9971  let Predicates = [Pred, HasVLX] in {
9972    defm Z256 : expand_by_vec_width<opc, VTInfo.info256, OpcodeStr, sched>,
9973                expand_by_vec_width_lowering<VTInfo.info256, NAME>, EVEX_V256;
9974    defm Z128 : expand_by_vec_width<opc, VTInfo.info128, OpcodeStr, sched>,
9975                expand_by_vec_width_lowering<VTInfo.info128, NAME>, EVEX_V128;
9976  }
9977}
9978
9979// FIXME: Is there a better scheduler class for VPEXPAND?
9980defm VPEXPANDD : expand_by_elt_width <0x89, "vpexpandd", WriteVarShuffle256,
9981                                      avx512vl_i32_info>, EVEX;
9982defm VPEXPANDQ : expand_by_elt_width <0x89, "vpexpandq", WriteVarShuffle256,
9983                                      avx512vl_i64_info>, EVEX, VEX_W;
9984defm VEXPANDPS : expand_by_elt_width <0x88, "vexpandps", WriteVarShuffle256,
9985                                      avx512vl_f32_info>, EVEX;
9986defm VEXPANDPD : expand_by_elt_width <0x88, "vexpandpd", WriteVarShuffle256,
9987                                      avx512vl_f64_info>, EVEX, VEX_W;
9988
9989//handle instruction  reg_vec1 = op(reg_vec,imm)
9990//                               op(mem_vec,imm)
9991//                               op(broadcast(eltVt),imm)
9992//all instruction created with FROUND_CURRENT
9993multiclass avx512_unary_fp_packed_imm<bits<8> opc, string OpcodeStr, SDNode OpNode,
9994                                      X86FoldableSchedWrite sched, X86VectorVTInfo _> {
9995  let ExeDomain = _.ExeDomain, Uses = [MXCSR], mayRaiseFPException = 1 in {
9996  defm rri : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
9997                      (ins _.RC:$src1, i32u8imm:$src2),
9998                      OpcodeStr##_.Suffix, "$src2, $src1", "$src1, $src2",
9999                      (OpNode (_.VT _.RC:$src1),
10000                              (i32 timm:$src2))>, Sched<[sched]>;
10001  defm rmi : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
10002                    (ins _.MemOp:$src1, i32u8imm:$src2),
10003                    OpcodeStr##_.Suffix, "$src2, $src1", "$src1, $src2",
10004                    (OpNode (_.VT (bitconvert (_.LdFrag addr:$src1))),
10005                            (i32 timm:$src2))>,
10006                    Sched<[sched.Folded, sched.ReadAfterFold]>;
10007  defm rmbi : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
10008                    (ins _.ScalarMemOp:$src1, i32u8imm:$src2),
10009                    OpcodeStr##_.Suffix, "$src2, ${src1}"##_.BroadcastStr,
10010                    "${src1}"##_.BroadcastStr##", $src2",
10011                    (OpNode (_.VT (_.BroadcastLdFrag addr:$src1)),
10012                            (i32 timm:$src2))>, EVEX_B,
10013                    Sched<[sched.Folded, sched.ReadAfterFold]>;
10014  }
10015}
10016
10017//handle instruction  reg_vec1 = op(reg_vec2,reg_vec3,imm),{sae}
10018multiclass avx512_unary_fp_sae_packed_imm<bits<8> opc, string OpcodeStr,
10019                                          SDNode OpNode, X86FoldableSchedWrite sched,
10020                                          X86VectorVTInfo _> {
10021  let ExeDomain = _.ExeDomain, Uses = [MXCSR] in
10022  defm rrib : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
10023                      (ins _.RC:$src1, i32u8imm:$src2),
10024                      OpcodeStr##_.Suffix, "$src2, {sae}, $src1",
10025                      "$src1, {sae}, $src2",
10026                      (OpNode (_.VT _.RC:$src1),
10027                              (i32 timm:$src2))>,
10028                      EVEX_B, Sched<[sched]>;
10029}
10030
10031multiclass avx512_common_unary_fp_sae_packed_imm<string OpcodeStr,
10032            AVX512VLVectorVTInfo _, bits<8> opc, SDNode OpNode,
10033            SDNode OpNodeSAE, X86SchedWriteWidths sched, Predicate prd>{
10034  let Predicates = [prd] in {
10035    defm Z    : avx512_unary_fp_packed_imm<opc, OpcodeStr, OpNode, sched.ZMM,
10036                                           _.info512>,
10037                avx512_unary_fp_sae_packed_imm<opc, OpcodeStr, OpNodeSAE,
10038                                               sched.ZMM, _.info512>, EVEX_V512;
10039  }
10040  let Predicates = [prd, HasVLX] in {
10041    defm Z128 : avx512_unary_fp_packed_imm<opc, OpcodeStr, OpNode, sched.XMM,
10042                                           _.info128>, EVEX_V128;
10043    defm Z256 : avx512_unary_fp_packed_imm<opc, OpcodeStr, OpNode, sched.YMM,
10044                                           _.info256>, EVEX_V256;
10045  }
10046}
10047
10048//handle instruction  reg_vec1 = op(reg_vec2,reg_vec3,imm)
10049//                               op(reg_vec2,mem_vec,imm)
10050//                               op(reg_vec2,broadcast(eltVt),imm)
10051//all instruction created with FROUND_CURRENT
10052multiclass avx512_fp_packed_imm<bits<8> opc, string OpcodeStr, SDNode OpNode,
10053                                X86FoldableSchedWrite sched, X86VectorVTInfo _>{
10054  let ExeDomain = _.ExeDomain, Uses = [MXCSR], mayRaiseFPException = 1 in {
10055  defm rri : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
10056                      (ins _.RC:$src1, _.RC:$src2, i32u8imm:$src3),
10057                      OpcodeStr, "$src3, $src2, $src1", "$src1, $src2, $src3",
10058                      (OpNode (_.VT _.RC:$src1),
10059                              (_.VT _.RC:$src2),
10060                              (i32 timm:$src3))>,
10061                      Sched<[sched]>;
10062  defm rmi : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
10063                    (ins _.RC:$src1, _.MemOp:$src2, i32u8imm:$src3),
10064                    OpcodeStr, "$src3, $src2, $src1", "$src1, $src2, $src3",
10065                    (OpNode (_.VT _.RC:$src1),
10066                            (_.VT (bitconvert (_.LdFrag addr:$src2))),
10067                            (i32 timm:$src3))>,
10068                    Sched<[sched.Folded, sched.ReadAfterFold]>;
10069  defm rmbi : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
10070                    (ins _.RC:$src1, _.ScalarMemOp:$src2, i32u8imm:$src3),
10071                    OpcodeStr, "$src3, ${src2}"##_.BroadcastStr##", $src1",
10072                    "$src1, ${src2}"##_.BroadcastStr##", $src3",
10073                    (OpNode (_.VT _.RC:$src1),
10074                            (_.VT (_.BroadcastLdFrag addr:$src2)),
10075                            (i32 timm:$src3))>, EVEX_B,
10076                    Sched<[sched.Folded, sched.ReadAfterFold]>;
10077  }
10078}
10079
10080//handle instruction  reg_vec1 = op(reg_vec2,reg_vec3,imm)
10081//                               op(reg_vec2,mem_vec,imm)
10082multiclass avx512_3Op_rm_imm8<bits<8> opc, string OpcodeStr, SDNode OpNode,
10083                              X86FoldableSchedWrite sched, X86VectorVTInfo DestInfo,
10084                              X86VectorVTInfo SrcInfo>{
10085  let ExeDomain = DestInfo.ExeDomain in {
10086  defm rri : AVX512_maskable<opc, MRMSrcReg, DestInfo, (outs DestInfo.RC:$dst),
10087                  (ins SrcInfo.RC:$src1, SrcInfo.RC:$src2, u8imm:$src3),
10088                  OpcodeStr, "$src3, $src2, $src1", "$src1, $src2, $src3",
10089                  (DestInfo.VT (OpNode (SrcInfo.VT SrcInfo.RC:$src1),
10090                               (SrcInfo.VT SrcInfo.RC:$src2),
10091                               (i8 timm:$src3)))>,
10092                  Sched<[sched]>;
10093  defm rmi : AVX512_maskable<opc, MRMSrcMem, DestInfo, (outs DestInfo.RC:$dst),
10094                (ins SrcInfo.RC:$src1, SrcInfo.MemOp:$src2, u8imm:$src3),
10095                OpcodeStr, "$src3, $src2, $src1", "$src1, $src2, $src3",
10096                (DestInfo.VT (OpNode (SrcInfo.VT SrcInfo.RC:$src1),
10097                             (SrcInfo.VT (bitconvert
10098                                                (SrcInfo.LdFrag addr:$src2))),
10099                             (i8 timm:$src3)))>,
10100                Sched<[sched.Folded, sched.ReadAfterFold]>;
10101  }
10102}
10103
10104//handle instruction  reg_vec1 = op(reg_vec2,reg_vec3,imm)
10105//                               op(reg_vec2,mem_vec,imm)
10106//                               op(reg_vec2,broadcast(eltVt),imm)
10107multiclass avx512_3Op_imm8<bits<8> opc, string OpcodeStr, SDNode OpNode,
10108                           X86FoldableSchedWrite sched, X86VectorVTInfo _>:
10109  avx512_3Op_rm_imm8<opc, OpcodeStr, OpNode, sched, _, _>{
10110
10111  let ExeDomain = _.ExeDomain in
10112  defm rmbi : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
10113                    (ins _.RC:$src1, _.ScalarMemOp:$src2, u8imm:$src3),
10114                    OpcodeStr, "$src3, ${src2}"##_.BroadcastStr##", $src1",
10115                    "$src1, ${src2}"##_.BroadcastStr##", $src3",
10116                    (OpNode (_.VT _.RC:$src1),
10117                            (_.VT (_.BroadcastLdFrag addr:$src2)),
10118                            (i8 timm:$src3))>, EVEX_B,
10119                    Sched<[sched.Folded, sched.ReadAfterFold]>;
10120}
10121
10122//handle scalar instruction  reg_vec1 = op(reg_vec2,reg_vec3,imm)
10123//                                      op(reg_vec2,mem_scalar,imm)
10124multiclass avx512_fp_scalar_imm<bits<8> opc, string OpcodeStr, SDNode OpNode,
10125                                X86FoldableSchedWrite sched, X86VectorVTInfo _> {
10126  let ExeDomain = _.ExeDomain, Uses = [MXCSR], mayRaiseFPException = 1 in {
10127  defm rri : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
10128                      (ins _.RC:$src1, _.RC:$src2, i32u8imm:$src3),
10129                      OpcodeStr, "$src3, $src2, $src1", "$src1, $src2, $src3",
10130                      (OpNode (_.VT _.RC:$src1),
10131                              (_.VT _.RC:$src2),
10132                              (i32 timm:$src3))>,
10133                      Sched<[sched]>;
10134  defm rmi : AVX512_maskable_scalar<opc, MRMSrcMem, _, (outs _.RC:$dst),
10135                    (ins _.RC:$src1, _.IntScalarMemOp:$src2, i32u8imm:$src3),
10136                    OpcodeStr, "$src3, $src2, $src1", "$src1, $src2, $src3",
10137                    (OpNode (_.VT _.RC:$src1),
10138                            (_.VT _.ScalarIntMemCPat:$src2),
10139                            (i32 timm:$src3))>,
10140                    Sched<[sched.Folded, sched.ReadAfterFold]>;
10141  }
10142}
10143
10144//handle instruction  reg_vec1 = op(reg_vec2,reg_vec3,imm),{sae}
10145multiclass avx512_fp_sae_packed_imm<bits<8> opc, string OpcodeStr,
10146                                    SDNode OpNode, X86FoldableSchedWrite sched,
10147                                    X86VectorVTInfo _> {
10148  let ExeDomain = _.ExeDomain, Uses = [MXCSR] in
10149  defm rrib : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
10150                      (ins _.RC:$src1, _.RC:$src2, i32u8imm:$src3),
10151                      OpcodeStr, "$src3, {sae}, $src2, $src1",
10152                      "$src1, $src2, {sae}, $src3",
10153                      (OpNode (_.VT _.RC:$src1),
10154                              (_.VT _.RC:$src2),
10155                              (i32 timm:$src3))>,
10156                      EVEX_B, Sched<[sched]>;
10157}
10158
10159//handle scalar instruction  reg_vec1 = op(reg_vec2,reg_vec3,imm),{sae}
10160multiclass avx512_fp_sae_scalar_imm<bits<8> opc, string OpcodeStr, SDNode OpNode,
10161                                    X86FoldableSchedWrite sched, X86VectorVTInfo _> {
10162  let ExeDomain = _.ExeDomain, Uses = [MXCSR] in
10163  defm NAME#rrib : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
10164                      (ins _.RC:$src1, _.RC:$src2, i32u8imm:$src3),
10165                      OpcodeStr, "$src3, {sae}, $src2, $src1",
10166                      "$src1, $src2, {sae}, $src3",
10167                      (OpNode (_.VT _.RC:$src1),
10168                              (_.VT _.RC:$src2),
10169                              (i32 timm:$src3))>,
10170                      EVEX_B, Sched<[sched]>;
10171}
10172
10173multiclass avx512_common_fp_sae_packed_imm<string OpcodeStr,
10174            AVX512VLVectorVTInfo _, bits<8> opc, SDNode OpNode,
10175            SDNode OpNodeSAE, X86SchedWriteWidths sched, Predicate prd>{
10176  let Predicates = [prd] in {
10177    defm Z    : avx512_fp_packed_imm<opc, OpcodeStr, OpNode, sched.ZMM, _.info512>,
10178                avx512_fp_sae_packed_imm<opc, OpcodeStr, OpNodeSAE, sched.ZMM, _.info512>,
10179                                  EVEX_V512;
10180
10181  }
10182  let Predicates = [prd, HasVLX] in {
10183    defm Z128 : avx512_fp_packed_imm<opc, OpcodeStr, OpNode, sched.XMM, _.info128>,
10184                                  EVEX_V128;
10185    defm Z256 : avx512_fp_packed_imm<opc, OpcodeStr, OpNode, sched.YMM, _.info256>,
10186                                  EVEX_V256;
10187  }
10188}
10189
10190multiclass avx512_common_3Op_rm_imm8<bits<8> opc, SDNode OpNode, string OpStr,
10191                   X86SchedWriteWidths sched, AVX512VLVectorVTInfo DestInfo,
10192                   AVX512VLVectorVTInfo SrcInfo, Predicate Pred = HasBWI> {
10193  let Predicates = [Pred] in {
10194    defm Z    : avx512_3Op_rm_imm8<opc, OpStr, OpNode, sched.ZMM, DestInfo.info512,
10195                           SrcInfo.info512>, EVEX_V512, AVX512AIi8Base, EVEX_4V;
10196  }
10197  let Predicates = [Pred, HasVLX] in {
10198    defm Z128 : avx512_3Op_rm_imm8<opc, OpStr, OpNode, sched.XMM, DestInfo.info128,
10199                           SrcInfo.info128>, EVEX_V128, AVX512AIi8Base, EVEX_4V;
10200    defm Z256 : avx512_3Op_rm_imm8<opc, OpStr, OpNode, sched.YMM, DestInfo.info256,
10201                           SrcInfo.info256>, EVEX_V256, AVX512AIi8Base, EVEX_4V;
10202  }
10203}
10204
10205multiclass avx512_common_3Op_imm8<string OpcodeStr, AVX512VLVectorVTInfo _,
10206                                  bits<8> opc, SDNode OpNode, X86SchedWriteWidths sched,
10207                                  Predicate Pred = HasAVX512> {
10208  let Predicates = [Pred] in {
10209    defm Z    : avx512_3Op_imm8<opc, OpcodeStr, OpNode, sched.ZMM, _.info512>,
10210                                EVEX_V512;
10211  }
10212  let Predicates = [Pred, HasVLX] in {
10213    defm Z128 : avx512_3Op_imm8<opc, OpcodeStr, OpNode, sched.XMM, _.info128>,
10214                                EVEX_V128;
10215    defm Z256 : avx512_3Op_imm8<opc, OpcodeStr, OpNode, sched.YMM, _.info256>,
10216                                EVEX_V256;
10217  }
10218}
10219
10220multiclass avx512_common_fp_sae_scalar_imm<string OpcodeStr,
10221                  X86VectorVTInfo _, bits<8> opc, SDNode OpNode,
10222                  SDNode OpNodeSAE, X86SchedWriteWidths sched, Predicate prd> {
10223  let Predicates = [prd] in {
10224     defm Z : avx512_fp_scalar_imm<opc, OpcodeStr, OpNode, sched.XMM, _>,
10225              avx512_fp_sae_scalar_imm<opc, OpcodeStr, OpNodeSAE, sched.XMM, _>;
10226  }
10227}
10228
10229multiclass avx512_common_unary_fp_sae_packed_imm_all<string OpcodeStr,
10230                    bits<8> opcPs, bits<8> opcPd, SDNode OpNode,
10231                    SDNode OpNodeSAE, X86SchedWriteWidths sched, Predicate prd>{
10232  defm PS : avx512_common_unary_fp_sae_packed_imm<OpcodeStr, avx512vl_f32_info,
10233                            opcPs, OpNode, OpNodeSAE, sched, prd>,
10234                            EVEX_CD8<32, CD8VF>;
10235  defm PD : avx512_common_unary_fp_sae_packed_imm<OpcodeStr, avx512vl_f64_info,
10236                            opcPd, OpNode, OpNodeSAE, sched, prd>,
10237                            EVEX_CD8<64, CD8VF>, VEX_W;
10238}
10239
10240defm VREDUCE   : avx512_common_unary_fp_sae_packed_imm_all<"vreduce", 0x56, 0x56,
10241                              X86VReduce, X86VReduceSAE, SchedWriteFRnd, HasDQI>,
10242                              AVX512AIi8Base, EVEX;
10243defm VRNDSCALE : avx512_common_unary_fp_sae_packed_imm_all<"vrndscale", 0x08, 0x09,
10244                              X86any_VRndScale, X86VRndScaleSAE, SchedWriteFRnd, HasAVX512>,
10245                              AVX512AIi8Base, EVEX;
10246defm VGETMANT : avx512_common_unary_fp_sae_packed_imm_all<"vgetmant", 0x26, 0x26,
10247                              X86VGetMant, X86VGetMantSAE, SchedWriteFRnd, HasAVX512>,
10248                              AVX512AIi8Base, EVEX;
10249
10250defm VRANGEPD : avx512_common_fp_sae_packed_imm<"vrangepd", avx512vl_f64_info,
10251                                                0x50, X86VRange, X86VRangeSAE,
10252                                                SchedWriteFAdd, HasDQI>,
10253      AVX512AIi8Base, EVEX_4V, EVEX_CD8<64, CD8VF>, VEX_W;
10254defm VRANGEPS : avx512_common_fp_sae_packed_imm<"vrangeps", avx512vl_f32_info,
10255                                                0x50, X86VRange, X86VRangeSAE,
10256                                                SchedWriteFAdd, HasDQI>,
10257      AVX512AIi8Base, EVEX_4V, EVEX_CD8<32, CD8VF>;
10258
10259defm VRANGESD: avx512_common_fp_sae_scalar_imm<"vrangesd",
10260      f64x_info, 0x51, X86Ranges, X86RangesSAE, SchedWriteFAdd, HasDQI>,
10261      AVX512AIi8Base, VEX_LIG, EVEX_4V, EVEX_CD8<64, CD8VT1>, VEX_W;
10262defm VRANGESS: avx512_common_fp_sae_scalar_imm<"vrangess", f32x_info,
10263      0x51, X86Ranges, X86RangesSAE, SchedWriteFAdd, HasDQI>,
10264      AVX512AIi8Base, VEX_LIG, EVEX_4V, EVEX_CD8<32, CD8VT1>;
10265
10266defm VREDUCESD: avx512_common_fp_sae_scalar_imm<"vreducesd", f64x_info,
10267      0x57, X86Reduces, X86ReducesSAE, SchedWriteFRnd, HasDQI>,
10268      AVX512AIi8Base, VEX_LIG, EVEX_4V, EVEX_CD8<64, CD8VT1>, VEX_W;
10269defm VREDUCESS: avx512_common_fp_sae_scalar_imm<"vreducess", f32x_info,
10270      0x57, X86Reduces, X86ReducesSAE, SchedWriteFRnd, HasDQI>,
10271      AVX512AIi8Base, VEX_LIG, EVEX_4V, EVEX_CD8<32, CD8VT1>;
10272
10273defm VGETMANTSD: avx512_common_fp_sae_scalar_imm<"vgetmantsd", f64x_info,
10274      0x27, X86GetMants, X86GetMantsSAE, SchedWriteFRnd, HasAVX512>,
10275      AVX512AIi8Base, VEX_LIG, EVEX_4V, EVEX_CD8<64, CD8VT1>, VEX_W;
10276defm VGETMANTSS: avx512_common_fp_sae_scalar_imm<"vgetmantss", f32x_info,
10277      0x27, X86GetMants, X86GetMantsSAE, SchedWriteFRnd, HasAVX512>,
10278      AVX512AIi8Base, VEX_LIG, EVEX_4V, EVEX_CD8<32, CD8VT1>;
10279
10280multiclass avx512_shuff_packed_128_common<bits<8> opc, string OpcodeStr,
10281                                          X86FoldableSchedWrite sched,
10282                                          X86VectorVTInfo _,
10283                                          X86VectorVTInfo CastInfo,
10284                                          string EVEX2VEXOvrd> {
10285  let ExeDomain = _.ExeDomain in {
10286  defm rri : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
10287                  (ins _.RC:$src1, _.RC:$src2, u8imm:$src3),
10288                  OpcodeStr, "$src3, $src2, $src1", "$src1, $src2, $src3",
10289                  (_.VT (bitconvert
10290                         (CastInfo.VT (X86Shuf128 _.RC:$src1, _.RC:$src2,
10291                                                  (i8 timm:$src3)))))>,
10292                  Sched<[sched]>, EVEX2VEXOverride<EVEX2VEXOvrd#"rr">;
10293  defm rmi : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
10294                (ins _.RC:$src1, _.MemOp:$src2, u8imm:$src3),
10295                OpcodeStr, "$src3, $src2, $src1", "$src1, $src2, $src3",
10296                (_.VT
10297                 (bitconvert
10298                  (CastInfo.VT (X86Shuf128 _.RC:$src1,
10299                                           (CastInfo.LdFrag addr:$src2),
10300                                           (i8 timm:$src3)))))>,
10301                Sched<[sched.Folded, sched.ReadAfterFold]>,
10302                EVEX2VEXOverride<EVEX2VEXOvrd#"rm">;
10303  defm rmbi : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
10304                    (ins _.RC:$src1, _.ScalarMemOp:$src2, u8imm:$src3),
10305                    OpcodeStr, "$src3, ${src2}"##_.BroadcastStr##", $src1",
10306                    "$src1, ${src2}"##_.BroadcastStr##", $src3",
10307                    (_.VT
10308                     (bitconvert
10309                      (CastInfo.VT
10310                       (X86Shuf128 _.RC:$src1,
10311                                   (_.BroadcastLdFrag addr:$src2),
10312                                   (i8 timm:$src3)))))>, EVEX_B,
10313                    Sched<[sched.Folded, sched.ReadAfterFold]>;
10314  }
10315}
10316
10317multiclass avx512_shuff_packed_128<string OpcodeStr, X86FoldableSchedWrite sched,
10318                                   AVX512VLVectorVTInfo _,
10319                                   AVX512VLVectorVTInfo CastInfo, bits<8> opc,
10320                                   string EVEX2VEXOvrd>{
10321  let Predicates = [HasAVX512] in
10322  defm Z : avx512_shuff_packed_128_common<opc, OpcodeStr, sched,
10323                                          _.info512, CastInfo.info512, "">, EVEX_V512;
10324
10325  let Predicates = [HasAVX512, HasVLX] in
10326  defm Z256 : avx512_shuff_packed_128_common<opc, OpcodeStr, sched,
10327                                             _.info256, CastInfo.info256,
10328                                             EVEX2VEXOvrd>, EVEX_V256;
10329}
10330
10331defm VSHUFF32X4 : avx512_shuff_packed_128<"vshuff32x4", WriteFShuffle256,
10332      avx512vl_f32_info, avx512vl_f64_info, 0x23, "VPERM2F128">, AVX512AIi8Base, EVEX_4V, EVEX_CD8<32, CD8VF>;
10333defm VSHUFF64X2 : avx512_shuff_packed_128<"vshuff64x2", WriteFShuffle256,
10334      avx512vl_f64_info, avx512vl_f64_info, 0x23, "VPERM2F128">, AVX512AIi8Base, EVEX_4V, EVEX_CD8<64, CD8VF>, VEX_W;
10335defm VSHUFI32X4 : avx512_shuff_packed_128<"vshufi32x4", WriteFShuffle256,
10336      avx512vl_i32_info, avx512vl_i64_info, 0x43, "VPERM2I128">, AVX512AIi8Base, EVEX_4V, EVEX_CD8<32, CD8VF>;
10337defm VSHUFI64X2 : avx512_shuff_packed_128<"vshufi64x2", WriteFShuffle256,
10338      avx512vl_i64_info, avx512vl_i64_info, 0x43, "VPERM2I128">, AVX512AIi8Base, EVEX_4V, EVEX_CD8<64, CD8VF>, VEX_W;
10339
10340let Predicates = [HasAVX512] in {
10341// Provide fallback in case the load node that is used in the broadcast
10342// patterns above is used by additional users, which prevents the pattern
10343// selection.
10344def : Pat<(v8f64 (X86SubVBroadcast (v2f64 VR128X:$src))),
10345          (VSHUFF64X2Zrri (INSERT_SUBREG (v8f64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
10346                          (INSERT_SUBREG (v8f64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
10347                          0)>;
10348def : Pat<(v8i64 (X86SubVBroadcast (v2i64 VR128X:$src))),
10349          (VSHUFI64X2Zrri (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
10350                          (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
10351                          0)>;
10352
10353def : Pat<(v16f32 (X86SubVBroadcast (v4f32 VR128X:$src))),
10354          (VSHUFF32X4Zrri (INSERT_SUBREG (v16f32 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
10355                          (INSERT_SUBREG (v16f32 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
10356                          0)>;
10357def : Pat<(v16i32 (X86SubVBroadcast (v4i32 VR128X:$src))),
10358          (VSHUFI32X4Zrri (INSERT_SUBREG (v16i32 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
10359                          (INSERT_SUBREG (v16i32 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
10360                          0)>;
10361
10362def : Pat<(v32i16 (X86SubVBroadcast (v8i16 VR128X:$src))),
10363          (VSHUFI32X4Zrri (INSERT_SUBREG (v32i16 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
10364                          (INSERT_SUBREG (v32i16 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
10365                          0)>;
10366
10367def : Pat<(v64i8 (X86SubVBroadcast (v16i8 VR128X:$src))),
10368          (VSHUFI32X4Zrri (INSERT_SUBREG (v64i8 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
10369                          (INSERT_SUBREG (v64i8 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
10370                          0)>;
10371}
10372
10373multiclass avx512_valign<bits<8> opc, string OpcodeStr,
10374                         X86FoldableSchedWrite sched, X86VectorVTInfo _>{
10375  // NOTE: EVEX2VEXOverride changed back to Unset for 256-bit at the
10376  // instantiation of this class.
10377  let ExeDomain = _.ExeDomain in {
10378  defm rri : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
10379                  (ins _.RC:$src1, _.RC:$src2, u8imm:$src3),
10380                  OpcodeStr, "$src3, $src2, $src1", "$src1, $src2, $src3",
10381                  (_.VT (X86VAlign _.RC:$src1, _.RC:$src2, (i8 timm:$src3)))>,
10382                  Sched<[sched]>, EVEX2VEXOverride<"VPALIGNRrri">;
10383  defm rmi : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
10384                (ins _.RC:$src1, _.MemOp:$src2, u8imm:$src3),
10385                OpcodeStr, "$src3, $src2, $src1", "$src1, $src2, $src3",
10386                (_.VT (X86VAlign _.RC:$src1,
10387                                 (bitconvert (_.LdFrag addr:$src2)),
10388                                 (i8 timm:$src3)))>,
10389                Sched<[sched.Folded, sched.ReadAfterFold]>,
10390                EVEX2VEXOverride<"VPALIGNRrmi">;
10391
10392  defm rmbi : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
10393                   (ins _.RC:$src1, _.ScalarMemOp:$src2, u8imm:$src3),
10394                   OpcodeStr, "$src3, ${src2}"##_.BroadcastStr##", $src1",
10395                   "$src1, ${src2}"##_.BroadcastStr##", $src3",
10396                   (X86VAlign _.RC:$src1,
10397                              (_.VT (_.BroadcastLdFrag addr:$src2)),
10398                              (i8 timm:$src3))>, EVEX_B,
10399                   Sched<[sched.Folded, sched.ReadAfterFold]>;
10400  }
10401}
10402
10403multiclass avx512_valign_common<string OpcodeStr, X86SchedWriteWidths sched,
10404                                AVX512VLVectorVTInfo _> {
10405  let Predicates = [HasAVX512] in {
10406    defm Z    : avx512_valign<0x03, OpcodeStr, sched.ZMM, _.info512>,
10407                                AVX512AIi8Base, EVEX_4V, EVEX_V512;
10408  }
10409  let Predicates = [HasAVX512, HasVLX] in {
10410    defm Z128 : avx512_valign<0x03, OpcodeStr, sched.XMM, _.info128>,
10411                                AVX512AIi8Base, EVEX_4V, EVEX_V128;
10412    // We can't really override the 256-bit version so change it back to unset.
10413    let EVEX2VEXOverride = ? in
10414    defm Z256 : avx512_valign<0x03, OpcodeStr, sched.YMM, _.info256>,
10415                                AVX512AIi8Base, EVEX_4V, EVEX_V256;
10416  }
10417}
10418
10419defm VALIGND: avx512_valign_common<"valignd", SchedWriteShuffle,
10420                                   avx512vl_i32_info>, EVEX_CD8<32, CD8VF>;
10421defm VALIGNQ: avx512_valign_common<"valignq", SchedWriteShuffle,
10422                                   avx512vl_i64_info>, EVEX_CD8<64, CD8VF>,
10423                                   VEX_W;
10424
10425defm VPALIGNR: avx512_common_3Op_rm_imm8<0x0F, X86PAlignr, "vpalignr",
10426                                         SchedWriteShuffle, avx512vl_i8_info,
10427                                         avx512vl_i8_info>, EVEX_CD8<8, CD8VF>;
10428
10429// Fragments to help convert valignq into masked valignd. Or valignq/valignd
10430// into vpalignr.
10431def ValignqImm32XForm : SDNodeXForm<timm, [{
10432  return getI8Imm(N->getZExtValue() * 2, SDLoc(N));
10433}]>;
10434def ValignqImm8XForm : SDNodeXForm<timm, [{
10435  return getI8Imm(N->getZExtValue() * 8, SDLoc(N));
10436}]>;
10437def ValigndImm8XForm : SDNodeXForm<timm, [{
10438  return getI8Imm(N->getZExtValue() * 4, SDLoc(N));
10439}]>;
10440
10441multiclass avx512_vpalign_mask_lowering<string OpcodeStr, SDNode OpNode,
10442                                        X86VectorVTInfo From, X86VectorVTInfo To,
10443                                        SDNodeXForm ImmXForm> {
10444  def : Pat<(To.VT (vselect To.KRCWM:$mask,
10445                            (bitconvert
10446                             (From.VT (OpNode From.RC:$src1, From.RC:$src2,
10447                                              timm:$src3))),
10448                            To.RC:$src0)),
10449            (!cast<Instruction>(OpcodeStr#"rrik") To.RC:$src0, To.KRCWM:$mask,
10450                                                  To.RC:$src1, To.RC:$src2,
10451                                                  (ImmXForm timm:$src3))>;
10452
10453  def : Pat<(To.VT (vselect To.KRCWM:$mask,
10454                            (bitconvert
10455                             (From.VT (OpNode From.RC:$src1, From.RC:$src2,
10456                                              timm:$src3))),
10457                            To.ImmAllZerosV)),
10458            (!cast<Instruction>(OpcodeStr#"rrikz") To.KRCWM:$mask,
10459                                                   To.RC:$src1, To.RC:$src2,
10460                                                   (ImmXForm timm:$src3))>;
10461
10462  def : Pat<(To.VT (vselect To.KRCWM:$mask,
10463                            (bitconvert
10464                             (From.VT (OpNode From.RC:$src1,
10465                                              (From.LdFrag addr:$src2),
10466                                      timm:$src3))),
10467                            To.RC:$src0)),
10468            (!cast<Instruction>(OpcodeStr#"rmik") To.RC:$src0, To.KRCWM:$mask,
10469                                                  To.RC:$src1, addr:$src2,
10470                                                  (ImmXForm timm:$src3))>;
10471
10472  def : Pat<(To.VT (vselect To.KRCWM:$mask,
10473                            (bitconvert
10474                             (From.VT (OpNode From.RC:$src1,
10475                                              (From.LdFrag addr:$src2),
10476                                      timm:$src3))),
10477                            To.ImmAllZerosV)),
10478            (!cast<Instruction>(OpcodeStr#"rmikz") To.KRCWM:$mask,
10479                                                   To.RC:$src1, addr:$src2,
10480                                                   (ImmXForm timm:$src3))>;
10481}
10482
10483multiclass avx512_vpalign_mask_lowering_mb<string OpcodeStr, SDNode OpNode,
10484                                           X86VectorVTInfo From,
10485                                           X86VectorVTInfo To,
10486                                           SDNodeXForm ImmXForm> :
10487      avx512_vpalign_mask_lowering<OpcodeStr, OpNode, From, To, ImmXForm> {
10488  def : Pat<(From.VT (OpNode From.RC:$src1,
10489                             (bitconvert (To.VT (To.BroadcastLdFrag addr:$src2))),
10490                             timm:$src3)),
10491            (!cast<Instruction>(OpcodeStr#"rmbi") To.RC:$src1, addr:$src2,
10492                                                  (ImmXForm timm:$src3))>;
10493
10494  def : Pat<(To.VT (vselect To.KRCWM:$mask,
10495                            (bitconvert
10496                             (From.VT (OpNode From.RC:$src1,
10497                                      (bitconvert
10498                                       (To.VT (To.BroadcastLdFrag addr:$src2))),
10499                                      timm:$src3))),
10500                            To.RC:$src0)),
10501            (!cast<Instruction>(OpcodeStr#"rmbik") To.RC:$src0, To.KRCWM:$mask,
10502                                                   To.RC:$src1, addr:$src2,
10503                                                   (ImmXForm timm:$src3))>;
10504
10505  def : Pat<(To.VT (vselect To.KRCWM:$mask,
10506                            (bitconvert
10507                             (From.VT (OpNode From.RC:$src1,
10508                                      (bitconvert
10509                                       (To.VT (To.BroadcastLdFrag addr:$src2))),
10510                                      timm:$src3))),
10511                            To.ImmAllZerosV)),
10512            (!cast<Instruction>(OpcodeStr#"rmbikz") To.KRCWM:$mask,
10513                                                    To.RC:$src1, addr:$src2,
10514                                                    (ImmXForm timm:$src3))>;
10515}
10516
10517let Predicates = [HasAVX512] in {
10518  // For 512-bit we lower to the widest element type we can. So we only need
10519  // to handle converting valignq to valignd.
10520  defm : avx512_vpalign_mask_lowering_mb<"VALIGNDZ", X86VAlign, v8i64_info,
10521                                         v16i32_info, ValignqImm32XForm>;
10522}
10523
10524let Predicates = [HasVLX] in {
10525  // For 128-bit we lower to the widest element type we can. So we only need
10526  // to handle converting valignq to valignd.
10527  defm : avx512_vpalign_mask_lowering_mb<"VALIGNDZ128", X86VAlign, v2i64x_info,
10528                                         v4i32x_info, ValignqImm32XForm>;
10529  // For 256-bit we lower to the widest element type we can. So we only need
10530  // to handle converting valignq to valignd.
10531  defm : avx512_vpalign_mask_lowering_mb<"VALIGNDZ256", X86VAlign, v4i64x_info,
10532                                         v8i32x_info, ValignqImm32XForm>;
10533}
10534
10535let Predicates = [HasVLX, HasBWI] in {
10536  // We can turn 128 and 256 bit VALIGND/VALIGNQ into VPALIGNR.
10537  defm : avx512_vpalign_mask_lowering<"VPALIGNRZ128", X86VAlign, v2i64x_info,
10538                                      v16i8x_info, ValignqImm8XForm>;
10539  defm : avx512_vpalign_mask_lowering<"VPALIGNRZ128", X86VAlign, v4i32x_info,
10540                                      v16i8x_info, ValigndImm8XForm>;
10541}
10542
10543defm VDBPSADBW: avx512_common_3Op_rm_imm8<0x42, X86dbpsadbw, "vdbpsadbw",
10544                SchedWritePSADBW, avx512vl_i16_info, avx512vl_i8_info>,
10545                EVEX_CD8<8, CD8VF>, NotEVEX2VEXConvertible;
10546
10547multiclass avx512_unary_rm<bits<8> opc, string OpcodeStr, SDNode OpNode,
10548                           X86FoldableSchedWrite sched, X86VectorVTInfo _> {
10549  let ExeDomain = _.ExeDomain in {
10550  defm rr : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
10551                    (ins _.RC:$src1), OpcodeStr,
10552                    "$src1", "$src1",
10553                    (_.VT (OpNode (_.VT _.RC:$src1)))>, EVEX, AVX5128IBase,
10554                    Sched<[sched]>;
10555
10556  defm rm : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
10557                  (ins _.MemOp:$src1), OpcodeStr,
10558                  "$src1", "$src1",
10559                  (_.VT (OpNode (_.VT (bitconvert (_.LdFrag addr:$src1)))))>,
10560            EVEX, AVX5128IBase, EVEX_CD8<_.EltSize, CD8VF>,
10561            Sched<[sched.Folded]>;
10562  }
10563}
10564
10565multiclass avx512_unary_rmb<bits<8> opc, string OpcodeStr, SDNode OpNode,
10566                            X86FoldableSchedWrite sched, X86VectorVTInfo _> :
10567           avx512_unary_rm<opc, OpcodeStr, OpNode, sched, _> {
10568  defm rmb : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
10569                  (ins _.ScalarMemOp:$src1), OpcodeStr,
10570                  "${src1}"##_.BroadcastStr,
10571                  "${src1}"##_.BroadcastStr,
10572                  (_.VT (OpNode (_.VT (_.BroadcastLdFrag addr:$src1))))>,
10573             EVEX, AVX5128IBase, EVEX_B, EVEX_CD8<_.EltSize, CD8VF>,
10574             Sched<[sched.Folded]>;
10575}
10576
10577multiclass avx512_unary_rm_vl<bits<8> opc, string OpcodeStr, SDNode OpNode,
10578                              X86SchedWriteWidths sched,
10579                              AVX512VLVectorVTInfo VTInfo, Predicate prd> {
10580  let Predicates = [prd] in
10581    defm Z : avx512_unary_rm<opc, OpcodeStr, OpNode, sched.ZMM, VTInfo.info512>,
10582                             EVEX_V512;
10583
10584  let Predicates = [prd, HasVLX] in {
10585    defm Z256 : avx512_unary_rm<opc, OpcodeStr, OpNode, sched.YMM, VTInfo.info256>,
10586                              EVEX_V256;
10587    defm Z128 : avx512_unary_rm<opc, OpcodeStr, OpNode, sched.XMM, VTInfo.info128>,
10588                              EVEX_V128;
10589  }
10590}
10591
10592multiclass avx512_unary_rmb_vl<bits<8> opc, string OpcodeStr, SDNode OpNode,
10593                               X86SchedWriteWidths sched, AVX512VLVectorVTInfo VTInfo,
10594                               Predicate prd> {
10595  let Predicates = [prd] in
10596    defm Z : avx512_unary_rmb<opc, OpcodeStr, OpNode, sched.ZMM, VTInfo.info512>,
10597                              EVEX_V512;
10598
10599  let Predicates = [prd, HasVLX] in {
10600    defm Z256 : avx512_unary_rmb<opc, OpcodeStr, OpNode, sched.YMM, VTInfo.info256>,
10601                                 EVEX_V256;
10602    defm Z128 : avx512_unary_rmb<opc, OpcodeStr, OpNode, sched.XMM, VTInfo.info128>,
10603                                 EVEX_V128;
10604  }
10605}
10606
10607multiclass avx512_unary_rm_vl_dq<bits<8> opc_d, bits<8> opc_q, string OpcodeStr,
10608                                 SDNode OpNode, X86SchedWriteWidths sched,
10609                                 Predicate prd> {
10610  defm Q : avx512_unary_rmb_vl<opc_q, OpcodeStr#"q", OpNode, sched,
10611                               avx512vl_i64_info, prd>, VEX_W;
10612  defm D : avx512_unary_rmb_vl<opc_d, OpcodeStr#"d", OpNode, sched,
10613                               avx512vl_i32_info, prd>;
10614}
10615
10616multiclass avx512_unary_rm_vl_bw<bits<8> opc_b, bits<8> opc_w, string OpcodeStr,
10617                                 SDNode OpNode, X86SchedWriteWidths sched,
10618                                 Predicate prd> {
10619  defm W : avx512_unary_rm_vl<opc_w, OpcodeStr#"w", OpNode, sched,
10620                              avx512vl_i16_info, prd>, VEX_WIG;
10621  defm B : avx512_unary_rm_vl<opc_b, OpcodeStr#"b", OpNode, sched,
10622                              avx512vl_i8_info, prd>, VEX_WIG;
10623}
10624
10625multiclass avx512_unary_rm_vl_all<bits<8> opc_b, bits<8> opc_w,
10626                                  bits<8> opc_d, bits<8> opc_q,
10627                                  string OpcodeStr, SDNode OpNode,
10628                                  X86SchedWriteWidths sched> {
10629  defm NAME : avx512_unary_rm_vl_dq<opc_d, opc_q, OpcodeStr, OpNode, sched,
10630                                    HasAVX512>,
10631              avx512_unary_rm_vl_bw<opc_b, opc_w, OpcodeStr, OpNode, sched,
10632                                    HasBWI>;
10633}
10634
10635defm VPABS : avx512_unary_rm_vl_all<0x1C, 0x1D, 0x1E, 0x1F, "vpabs", abs,
10636                                    SchedWriteVecALU>;
10637
10638// VPABS: Use 512bit version to implement 128/256 bit in case NoVLX.
10639let Predicates = [HasAVX512, NoVLX] in {
10640  def : Pat<(v4i64 (abs VR256X:$src)),
10641            (EXTRACT_SUBREG
10642                (VPABSQZrr
10643                    (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src, sub_ymm)),
10644             sub_ymm)>;
10645  def : Pat<(v2i64 (abs VR128X:$src)),
10646            (EXTRACT_SUBREG
10647                (VPABSQZrr
10648                    (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm)),
10649             sub_xmm)>;
10650}
10651
10652// Use 512bit version to implement 128/256 bit.
10653multiclass avx512_unary_lowering<string InstrStr, SDNode OpNode,
10654                                 AVX512VLVectorVTInfo _, Predicate prd> {
10655  let Predicates = [prd, NoVLX] in {
10656    def : Pat<(_.info256.VT (OpNode (_.info256.VT _.info256.RC:$src1))),
10657              (EXTRACT_SUBREG
10658                (!cast<Instruction>(InstrStr # "Zrr")
10659                  (INSERT_SUBREG(_.info512.VT(IMPLICIT_DEF)),
10660                                 _.info256.RC:$src1,
10661                                 _.info256.SubRegIdx)),
10662              _.info256.SubRegIdx)>;
10663
10664    def : Pat<(_.info128.VT (OpNode (_.info128.VT _.info128.RC:$src1))),
10665              (EXTRACT_SUBREG
10666                (!cast<Instruction>(InstrStr # "Zrr")
10667                  (INSERT_SUBREG(_.info512.VT(IMPLICIT_DEF)),
10668                                 _.info128.RC:$src1,
10669                                 _.info128.SubRegIdx)),
10670              _.info128.SubRegIdx)>;
10671  }
10672}
10673
10674defm VPLZCNT    : avx512_unary_rm_vl_dq<0x44, 0x44, "vplzcnt", ctlz,
10675                                        SchedWriteVecIMul, HasCDI>;
10676
10677// FIXME: Is there a better scheduler class for VPCONFLICT?
10678defm VPCONFLICT : avx512_unary_rm_vl_dq<0xC4, 0xC4, "vpconflict", X86Conflict,
10679                                        SchedWriteVecALU, HasCDI>;
10680
10681// VPLZCNT: Use 512bit version to implement 128/256 bit in case NoVLX.
10682defm : avx512_unary_lowering<"VPLZCNTQ", ctlz, avx512vl_i64_info, HasCDI>;
10683defm : avx512_unary_lowering<"VPLZCNTD", ctlz, avx512vl_i32_info, HasCDI>;
10684
10685//===---------------------------------------------------------------------===//
10686// Counts number of ones - VPOPCNTD and VPOPCNTQ
10687//===---------------------------------------------------------------------===//
10688
10689// FIXME: Is there a better scheduler class for VPOPCNTD/VPOPCNTQ?
10690defm VPOPCNT : avx512_unary_rm_vl_dq<0x55, 0x55, "vpopcnt", ctpop,
10691                                     SchedWriteVecALU, HasVPOPCNTDQ>;
10692
10693defm : avx512_unary_lowering<"VPOPCNTQ", ctpop, avx512vl_i64_info, HasVPOPCNTDQ>;
10694defm : avx512_unary_lowering<"VPOPCNTD", ctpop, avx512vl_i32_info, HasVPOPCNTDQ>;
10695
10696//===---------------------------------------------------------------------===//
10697// Replicate Single FP - MOVSHDUP and MOVSLDUP
10698//===---------------------------------------------------------------------===//
10699
10700multiclass avx512_replicate<bits<8> opc, string OpcodeStr, SDNode OpNode,
10701                            X86SchedWriteWidths sched> {
10702  defm NAME:       avx512_unary_rm_vl<opc, OpcodeStr, OpNode, sched,
10703                                      avx512vl_f32_info, HasAVX512>, XS;
10704}
10705
10706defm VMOVSHDUP : avx512_replicate<0x16, "vmovshdup", X86Movshdup,
10707                                  SchedWriteFShuffle>;
10708defm VMOVSLDUP : avx512_replicate<0x12, "vmovsldup", X86Movsldup,
10709                                  SchedWriteFShuffle>;
10710
10711//===----------------------------------------------------------------------===//
10712// AVX-512 - MOVDDUP
10713//===----------------------------------------------------------------------===//
10714
10715multiclass avx512_movddup_128<bits<8> opc, string OpcodeStr,
10716                              X86FoldableSchedWrite sched, X86VectorVTInfo _> {
10717  let ExeDomain = _.ExeDomain in {
10718  defm rr : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
10719                   (ins _.RC:$src), OpcodeStr, "$src", "$src",
10720                   (_.VT (X86VBroadcast (_.VT _.RC:$src)))>, EVEX,
10721                   Sched<[sched]>;
10722  defm rm : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
10723                 (ins _.ScalarMemOp:$src), OpcodeStr, "$src", "$src",
10724                 (_.VT (_.BroadcastLdFrag addr:$src))>,
10725                 EVEX, EVEX_CD8<_.EltSize, CD8VH>,
10726                 Sched<[sched.Folded]>;
10727  }
10728}
10729
10730multiclass avx512_movddup_common<bits<8> opc, string OpcodeStr, SDNode OpNode,
10731                                 X86SchedWriteWidths sched, AVX512VLVectorVTInfo VTInfo> {
10732  defm Z : avx512_unary_rm<opc, OpcodeStr, X86Movddup, sched.ZMM,
10733                           VTInfo.info512>, EVEX_V512;
10734
10735  let Predicates = [HasAVX512, HasVLX] in {
10736    defm Z256 : avx512_unary_rm<opc, OpcodeStr, X86Movddup, sched.YMM,
10737                                VTInfo.info256>, EVEX_V256;
10738    defm Z128 : avx512_movddup_128<opc, OpcodeStr, sched.XMM,
10739                                   VTInfo.info128>, EVEX_V128;
10740  }
10741}
10742
10743multiclass avx512_movddup<bits<8> opc, string OpcodeStr, SDNode OpNode,
10744                          X86SchedWriteWidths sched> {
10745  defm NAME:      avx512_movddup_common<opc, OpcodeStr, OpNode, sched,
10746                                        avx512vl_f64_info>, XD, VEX_W;
10747}
10748
10749defm VMOVDDUP : avx512_movddup<0x12, "vmovddup", X86Movddup, SchedWriteFShuffle>;
10750
10751let Predicates = [HasVLX] in {
10752def : Pat<(v2f64 (X86VBroadcast f64:$src)),
10753          (VMOVDDUPZ128rr (v2f64 (COPY_TO_REGCLASS FR64X:$src, VR128X)))>;
10754def : Pat<(v2f64 (X86VBroadcast (v2f64 (simple_load addr:$src)))),
10755          (VMOVDDUPZ128rm addr:$src)>;
10756def : Pat<(v2f64 (X86VBroadcast (v2f64 (X86vzload64 addr:$src)))),
10757          (VMOVDDUPZ128rm addr:$src)>;
10758
10759def : Pat<(vselect (v2i1 VK2WM:$mask), (v2f64 (X86VBroadcast f64:$src)),
10760                   (v2f64 VR128X:$src0)),
10761          (VMOVDDUPZ128rrk VR128X:$src0, VK2WM:$mask,
10762                           (v2f64 (COPY_TO_REGCLASS FR64X:$src, VR128X)))>;
10763def : Pat<(vselect (v2i1 VK2WM:$mask), (v2f64 (X86VBroadcast f64:$src)),
10764                   immAllZerosV),
10765          (VMOVDDUPZ128rrkz VK2WM:$mask, (v2f64 (COPY_TO_REGCLASS FR64X:$src, VR128X)))>;
10766
10767def : Pat<(vselect (v2i1 VK2WM:$mask), (v2f64 (X86VBroadcastld64 addr:$src)),
10768                   (v2f64 VR128X:$src0)),
10769          (VMOVDDUPZ128rmk VR128X:$src0, VK2WM:$mask, addr:$src)>;
10770def : Pat<(vselect (v2i1 VK2WM:$mask), (v2f64 (X86VBroadcastld64 addr:$src)),
10771                   immAllZerosV),
10772          (VMOVDDUPZ128rmkz VK2WM:$mask, addr:$src)>;
10773
10774def : Pat<(vselect (v2i1 VK2WM:$mask), (v2f64 (X86VBroadcast (v2f64 (simple_load addr:$src)))),
10775                   (v2f64 VR128X:$src0)),
10776          (VMOVDDUPZ128rmk VR128X:$src0, VK2WM:$mask, addr:$src)>;
10777def : Pat<(vselect (v2i1 VK2WM:$mask), (v2f64 (X86VBroadcast (v2f64 (simple_load addr:$src)))),
10778                   immAllZerosV),
10779          (VMOVDDUPZ128rmkz VK2WM:$mask, addr:$src)>;
10780}
10781
10782//===----------------------------------------------------------------------===//
10783// AVX-512 - Unpack Instructions
10784//===----------------------------------------------------------------------===//
10785
10786let Uses = []<Register>, mayRaiseFPException = 0 in {
10787defm VUNPCKH : avx512_fp_binop_p<0x15, "vunpckh", X86Unpckh, HasAVX512,
10788                                 SchedWriteFShuffleSizes, 0, 1>;
10789defm VUNPCKL : avx512_fp_binop_p<0x14, "vunpckl", X86Unpckl, HasAVX512,
10790                                 SchedWriteFShuffleSizes>;
10791}
10792
10793defm VPUNPCKLBW : avx512_binop_rm_vl_b<0x60, "vpunpcklbw", X86Unpckl,
10794                                       SchedWriteShuffle, HasBWI>;
10795defm VPUNPCKHBW : avx512_binop_rm_vl_b<0x68, "vpunpckhbw", X86Unpckh,
10796                                       SchedWriteShuffle, HasBWI>;
10797defm VPUNPCKLWD : avx512_binop_rm_vl_w<0x61, "vpunpcklwd", X86Unpckl,
10798                                       SchedWriteShuffle, HasBWI>;
10799defm VPUNPCKHWD : avx512_binop_rm_vl_w<0x69, "vpunpckhwd", X86Unpckh,
10800                                       SchedWriteShuffle, HasBWI>;
10801
10802defm VPUNPCKLDQ : avx512_binop_rm_vl_d<0x62, "vpunpckldq", X86Unpckl,
10803                                       SchedWriteShuffle, HasAVX512>;
10804defm VPUNPCKHDQ : avx512_binop_rm_vl_d<0x6A, "vpunpckhdq", X86Unpckh,
10805                                       SchedWriteShuffle, HasAVX512>;
10806defm VPUNPCKLQDQ : avx512_binop_rm_vl_q<0x6C, "vpunpcklqdq", X86Unpckl,
10807                                        SchedWriteShuffle, HasAVX512>;
10808defm VPUNPCKHQDQ : avx512_binop_rm_vl_q<0x6D, "vpunpckhqdq", X86Unpckh,
10809                                        SchedWriteShuffle, HasAVX512>;
10810
10811//===----------------------------------------------------------------------===//
10812// AVX-512 - Extract & Insert Integer Instructions
10813//===----------------------------------------------------------------------===//
10814
10815multiclass avx512_extract_elt_bw_m<bits<8> opc, string OpcodeStr, SDNode OpNode,
10816                                                            X86VectorVTInfo _> {
10817  def mr : AVX512Ii8<opc, MRMDestMem, (outs),
10818              (ins _.ScalarMemOp:$dst, _.RC:$src1, u8imm:$src2),
10819              OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}",
10820              [(store (_.EltVT (trunc (OpNode (_.VT _.RC:$src1), imm:$src2))),
10821                       addr:$dst)]>,
10822              EVEX, EVEX_CD8<_.EltSize, CD8VT1>, Sched<[WriteVecExtractSt]>;
10823}
10824
10825multiclass avx512_extract_elt_b<string OpcodeStr, X86VectorVTInfo _> {
10826  let Predicates = [HasBWI] in {
10827    def rr : AVX512Ii8<0x14, MRMDestReg, (outs GR32orGR64:$dst),
10828                  (ins _.RC:$src1, u8imm:$src2),
10829                  OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}",
10830                  [(set GR32orGR64:$dst,
10831                        (X86pextrb (_.VT _.RC:$src1), imm:$src2))]>,
10832                  EVEX, TAPD, Sched<[WriteVecExtract]>;
10833
10834    defm NAME : avx512_extract_elt_bw_m<0x14, OpcodeStr, X86pextrb, _>, TAPD;
10835  }
10836}
10837
10838multiclass avx512_extract_elt_w<string OpcodeStr, X86VectorVTInfo _> {
10839  let Predicates = [HasBWI] in {
10840    def rr : AVX512Ii8<0xC5, MRMSrcReg, (outs GR32orGR64:$dst),
10841                  (ins _.RC:$src1, u8imm:$src2),
10842                  OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}",
10843                  [(set GR32orGR64:$dst,
10844                        (X86pextrw (_.VT _.RC:$src1), imm:$src2))]>,
10845                  EVEX, PD, Sched<[WriteVecExtract]>;
10846
10847    let hasSideEffects = 0, isCodeGenOnly = 1, ForceDisassemble = 1 in
10848    def rr_REV : AVX512Ii8<0x15, MRMDestReg, (outs GR32orGR64:$dst),
10849                   (ins _.RC:$src1, u8imm:$src2),
10850                   OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}", []>,
10851                   EVEX, TAPD, FoldGenData<NAME#rr>,
10852                   Sched<[WriteVecExtract]>;
10853
10854    defm NAME : avx512_extract_elt_bw_m<0x15, OpcodeStr, X86pextrw, _>, TAPD;
10855  }
10856}
10857
10858multiclass avx512_extract_elt_dq<string OpcodeStr, X86VectorVTInfo _,
10859                                                            RegisterClass GRC> {
10860  let Predicates = [HasDQI] in {
10861    def rr : AVX512Ii8<0x16, MRMDestReg, (outs GRC:$dst),
10862                  (ins _.RC:$src1, u8imm:$src2),
10863                  OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}",
10864                  [(set GRC:$dst,
10865                      (extractelt (_.VT _.RC:$src1), imm:$src2))]>,
10866                  EVEX, TAPD, Sched<[WriteVecExtract]>;
10867
10868    def mr : AVX512Ii8<0x16, MRMDestMem, (outs),
10869                (ins _.ScalarMemOp:$dst, _.RC:$src1, u8imm:$src2),
10870                OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}",
10871                [(store (extractelt (_.VT _.RC:$src1),
10872                                    imm:$src2),addr:$dst)]>,
10873                EVEX, EVEX_CD8<_.EltSize, CD8VT1>, TAPD,
10874                Sched<[WriteVecExtractSt]>;
10875  }
10876}
10877
10878defm VPEXTRBZ : avx512_extract_elt_b<"vpextrb", v16i8x_info>, VEX_WIG;
10879defm VPEXTRWZ : avx512_extract_elt_w<"vpextrw", v8i16x_info>, VEX_WIG;
10880defm VPEXTRDZ : avx512_extract_elt_dq<"vpextrd", v4i32x_info, GR32>;
10881defm VPEXTRQZ : avx512_extract_elt_dq<"vpextrq", v2i64x_info, GR64>, VEX_W;
10882
10883multiclass avx512_insert_elt_m<bits<8> opc, string OpcodeStr, SDNode OpNode,
10884                                            X86VectorVTInfo _, PatFrag LdFrag> {
10885  def rm : AVX512Ii8<opc, MRMSrcMem, (outs _.RC:$dst),
10886      (ins _.RC:$src1,  _.ScalarMemOp:$src2, u8imm:$src3),
10887      OpcodeStr#"\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}",
10888      [(set _.RC:$dst,
10889          (_.VT (OpNode _.RC:$src1, (LdFrag addr:$src2), imm:$src3)))]>,
10890      EVEX_4V, EVEX_CD8<_.EltSize, CD8VT1>, Sched<[WriteVecInsert.Folded, WriteVecInsert.ReadAfterFold]>;
10891}
10892
10893multiclass avx512_insert_elt_bw<bits<8> opc, string OpcodeStr, SDNode OpNode,
10894                                            X86VectorVTInfo _, PatFrag LdFrag> {
10895  let Predicates = [HasBWI] in {
10896    def rr : AVX512Ii8<opc, MRMSrcReg, (outs _.RC:$dst),
10897        (ins _.RC:$src1, GR32orGR64:$src2, u8imm:$src3),
10898        OpcodeStr#"\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}",
10899        [(set _.RC:$dst,
10900            (OpNode _.RC:$src1, GR32orGR64:$src2, imm:$src3))]>, EVEX_4V,
10901        Sched<[WriteVecInsert]>;
10902
10903    defm NAME : avx512_insert_elt_m<opc, OpcodeStr, OpNode, _, LdFrag>;
10904  }
10905}
10906
10907multiclass avx512_insert_elt_dq<bits<8> opc, string OpcodeStr,
10908                                         X86VectorVTInfo _, RegisterClass GRC> {
10909  let Predicates = [HasDQI] in {
10910    def rr : AVX512Ii8<opc, MRMSrcReg, (outs _.RC:$dst),
10911        (ins _.RC:$src1, GRC:$src2, u8imm:$src3),
10912        OpcodeStr#"\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}",
10913        [(set _.RC:$dst,
10914            (_.VT (insertelt _.RC:$src1, GRC:$src2, imm:$src3)))]>,
10915        EVEX_4V, TAPD, Sched<[WriteVecInsert]>;
10916
10917    defm NAME : avx512_insert_elt_m<opc, OpcodeStr, insertelt, _,
10918                                    _.ScalarLdFrag>, TAPD;
10919  }
10920}
10921
10922defm VPINSRBZ : avx512_insert_elt_bw<0x20, "vpinsrb", X86pinsrb, v16i8x_info,
10923                                     extloadi8>, TAPD, VEX_WIG;
10924defm VPINSRWZ : avx512_insert_elt_bw<0xC4, "vpinsrw", X86pinsrw, v8i16x_info,
10925                                     extloadi16>, PD, VEX_WIG;
10926defm VPINSRDZ : avx512_insert_elt_dq<0x22, "vpinsrd", v4i32x_info, GR32>;
10927defm VPINSRQZ : avx512_insert_elt_dq<0x22, "vpinsrq", v2i64x_info, GR64>, VEX_W;
10928
10929//===----------------------------------------------------------------------===//
10930// VSHUFPS - VSHUFPD Operations
10931//===----------------------------------------------------------------------===//
10932
10933multiclass avx512_shufp<string OpcodeStr, AVX512VLVectorVTInfo VTInfo_I,
10934                        AVX512VLVectorVTInfo VTInfo_FP>{
10935  defm NAME: avx512_common_3Op_imm8<OpcodeStr, VTInfo_FP, 0xC6, X86Shufp,
10936                                    SchedWriteFShuffle>,
10937                                    EVEX_CD8<VTInfo_FP.info512.EltSize, CD8VF>,
10938                                    AVX512AIi8Base, EVEX_4V;
10939}
10940
10941defm VSHUFPS: avx512_shufp<"vshufps", avx512vl_i32_info, avx512vl_f32_info>, PS;
10942defm VSHUFPD: avx512_shufp<"vshufpd", avx512vl_i64_info, avx512vl_f64_info>, PD, VEX_W;
10943
10944//===----------------------------------------------------------------------===//
10945// AVX-512 - Byte shift Left/Right
10946//===----------------------------------------------------------------------===//
10947
10948// FIXME: The SSE/AVX names are PSLLDQri etc. - should we add the i here as well?
10949multiclass avx512_shift_packed<bits<8> opc, SDNode OpNode, Format MRMr,
10950                               Format MRMm, string OpcodeStr,
10951                               X86FoldableSchedWrite sched, X86VectorVTInfo _>{
10952  def rr : AVX512<opc, MRMr,
10953             (outs _.RC:$dst), (ins _.RC:$src1, u8imm:$src2),
10954             !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
10955             [(set _.RC:$dst,(_.VT (OpNode _.RC:$src1, (i8 timm:$src2))))]>,
10956             Sched<[sched]>;
10957  def rm : AVX512<opc, MRMm,
10958           (outs _.RC:$dst), (ins _.MemOp:$src1, u8imm:$src2),
10959           !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
10960           [(set _.RC:$dst,(_.VT (OpNode
10961                                 (_.VT (bitconvert (_.LdFrag addr:$src1))),
10962                                 (i8 timm:$src2))))]>,
10963           Sched<[sched.Folded, sched.ReadAfterFold]>;
10964}
10965
10966multiclass avx512_shift_packed_all<bits<8> opc, SDNode OpNode, Format MRMr,
10967                                   Format MRMm, string OpcodeStr,
10968                                   X86SchedWriteWidths sched, Predicate prd>{
10969  let Predicates = [prd] in
10970    defm Z : avx512_shift_packed<opc, OpNode, MRMr, MRMm, OpcodeStr,
10971                                 sched.ZMM, v64i8_info>, EVEX_V512;
10972  let Predicates = [prd, HasVLX] in {
10973    defm Z256 : avx512_shift_packed<opc, OpNode, MRMr, MRMm, OpcodeStr,
10974                                    sched.YMM, v32i8x_info>, EVEX_V256;
10975    defm Z128 : avx512_shift_packed<opc, OpNode, MRMr, MRMm, OpcodeStr,
10976                                    sched.XMM, v16i8x_info>, EVEX_V128;
10977  }
10978}
10979defm VPSLLDQ : avx512_shift_packed_all<0x73, X86vshldq, MRM7r, MRM7m, "vpslldq",
10980                                       SchedWriteShuffle, HasBWI>,
10981                                       AVX512PDIi8Base, EVEX_4V, VEX_WIG;
10982defm VPSRLDQ : avx512_shift_packed_all<0x73, X86vshrdq, MRM3r, MRM3m, "vpsrldq",
10983                                       SchedWriteShuffle, HasBWI>,
10984                                       AVX512PDIi8Base, EVEX_4V, VEX_WIG;
10985
10986multiclass avx512_psadbw_packed<bits<8> opc, SDNode OpNode,
10987                                string OpcodeStr, X86FoldableSchedWrite sched,
10988                                X86VectorVTInfo _dst, X86VectorVTInfo _src> {
10989  let isCommutable = 1 in
10990  def rr : AVX512BI<opc, MRMSrcReg,
10991             (outs _dst.RC:$dst), (ins _src.RC:$src1, _src.RC:$src2),
10992             !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
10993             [(set _dst.RC:$dst,(_dst.VT
10994                                (OpNode (_src.VT _src.RC:$src1),
10995                                        (_src.VT _src.RC:$src2))))]>,
10996             Sched<[sched]>;
10997  def rm : AVX512BI<opc, MRMSrcMem,
10998           (outs _dst.RC:$dst), (ins _src.RC:$src1, _src.MemOp:$src2),
10999           !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
11000           [(set _dst.RC:$dst,(_dst.VT
11001                              (OpNode (_src.VT _src.RC:$src1),
11002                              (_src.VT (bitconvert
11003                                        (_src.LdFrag addr:$src2))))))]>,
11004           Sched<[sched.Folded, sched.ReadAfterFold]>;
11005}
11006
11007multiclass avx512_psadbw_packed_all<bits<8> opc, SDNode OpNode,
11008                                    string OpcodeStr, X86SchedWriteWidths sched,
11009                                    Predicate prd> {
11010  let Predicates = [prd] in
11011    defm Z : avx512_psadbw_packed<opc, OpNode, OpcodeStr, sched.ZMM,
11012                                  v8i64_info, v64i8_info>, EVEX_V512;
11013  let Predicates = [prd, HasVLX] in {
11014    defm Z256 : avx512_psadbw_packed<opc, OpNode, OpcodeStr, sched.YMM,
11015                                     v4i64x_info, v32i8x_info>, EVEX_V256;
11016    defm Z128 : avx512_psadbw_packed<opc, OpNode, OpcodeStr, sched.XMM,
11017                                     v2i64x_info, v16i8x_info>, EVEX_V128;
11018  }
11019}
11020
11021defm VPSADBW : avx512_psadbw_packed_all<0xf6, X86psadbw, "vpsadbw",
11022                                        SchedWritePSADBW, HasBWI>, EVEX_4V, VEX_WIG;
11023
11024// Transforms to swizzle an immediate to enable better matching when
11025// memory operand isn't in the right place.
11026def VPTERNLOG321_imm8 : SDNodeXForm<timm, [{
11027  // Convert a VPTERNLOG immediate by swapping operand 0 and operand 2.
11028  uint8_t Imm = N->getZExtValue();
11029  // Swap bits 1/4 and 3/6.
11030  uint8_t NewImm = Imm & 0xa5;
11031  if (Imm & 0x02) NewImm |= 0x10;
11032  if (Imm & 0x10) NewImm |= 0x02;
11033  if (Imm & 0x08) NewImm |= 0x40;
11034  if (Imm & 0x40) NewImm |= 0x08;
11035  return getI8Imm(NewImm, SDLoc(N));
11036}]>;
11037def VPTERNLOG213_imm8 : SDNodeXForm<timm, [{
11038  // Convert a VPTERNLOG immediate by swapping operand 1 and operand 2.
11039  uint8_t Imm = N->getZExtValue();
11040  // Swap bits 2/4 and 3/5.
11041  uint8_t NewImm = Imm & 0xc3;
11042  if (Imm & 0x04) NewImm |= 0x10;
11043  if (Imm & 0x10) NewImm |= 0x04;
11044  if (Imm & 0x08) NewImm |= 0x20;
11045  if (Imm & 0x20) NewImm |= 0x08;
11046  return getI8Imm(NewImm, SDLoc(N));
11047}]>;
11048def VPTERNLOG132_imm8 : SDNodeXForm<timm, [{
11049  // Convert a VPTERNLOG immediate by swapping operand 1 and operand 2.
11050  uint8_t Imm = N->getZExtValue();
11051  // Swap bits 1/2 and 5/6.
11052  uint8_t NewImm = Imm & 0x99;
11053  if (Imm & 0x02) NewImm |= 0x04;
11054  if (Imm & 0x04) NewImm |= 0x02;
11055  if (Imm & 0x20) NewImm |= 0x40;
11056  if (Imm & 0x40) NewImm |= 0x20;
11057  return getI8Imm(NewImm, SDLoc(N));
11058}]>;
11059def VPTERNLOG231_imm8 : SDNodeXForm<timm, [{
11060  // Convert a VPTERNLOG immediate by moving operand 1 to the end.
11061  uint8_t Imm = N->getZExtValue();
11062  // Move bits 1->2, 2->4, 3->6, 4->1, 5->3, 6->5
11063  uint8_t NewImm = Imm & 0x81;
11064  if (Imm & 0x02) NewImm |= 0x04;
11065  if (Imm & 0x04) NewImm |= 0x10;
11066  if (Imm & 0x08) NewImm |= 0x40;
11067  if (Imm & 0x10) NewImm |= 0x02;
11068  if (Imm & 0x20) NewImm |= 0x08;
11069  if (Imm & 0x40) NewImm |= 0x20;
11070  return getI8Imm(NewImm, SDLoc(N));
11071}]>;
11072def VPTERNLOG312_imm8 : SDNodeXForm<timm, [{
11073  // Convert a VPTERNLOG immediate by moving operand 2 to the beginning.
11074  uint8_t Imm = N->getZExtValue();
11075  // Move bits 1->4, 2->1, 3->5, 4->2, 5->6, 6->3
11076  uint8_t NewImm = Imm & 0x81;
11077  if (Imm & 0x02) NewImm |= 0x10;
11078  if (Imm & 0x04) NewImm |= 0x02;
11079  if (Imm & 0x08) NewImm |= 0x20;
11080  if (Imm & 0x10) NewImm |= 0x04;
11081  if (Imm & 0x20) NewImm |= 0x40;
11082  if (Imm & 0x40) NewImm |= 0x08;
11083  return getI8Imm(NewImm, SDLoc(N));
11084}]>;
11085
11086multiclass avx512_ternlog<bits<8> opc, string OpcodeStr, SDNode OpNode,
11087                          X86FoldableSchedWrite sched, X86VectorVTInfo _,
11088                          string Name>{
11089  let Constraints = "$src1 = $dst", ExeDomain = _.ExeDomain in {
11090  defm rri : AVX512_maskable_3src<opc, MRMSrcReg, _, (outs _.RC:$dst),
11091                      (ins _.RC:$src2, _.RC:$src3, u8imm:$src4),
11092                      OpcodeStr, "$src4, $src3, $src2", "$src2, $src3, $src4",
11093                      (OpNode (_.VT _.RC:$src1),
11094                              (_.VT _.RC:$src2),
11095                              (_.VT _.RC:$src3),
11096                              (i8 timm:$src4)), 1, 1>,
11097                      AVX512AIi8Base, EVEX_4V, Sched<[sched]>;
11098  defm rmi : AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst),
11099                    (ins _.RC:$src2, _.MemOp:$src3, u8imm:$src4),
11100                    OpcodeStr, "$src4, $src3, $src2", "$src2, $src3, $src4",
11101                    (OpNode (_.VT _.RC:$src1),
11102                            (_.VT _.RC:$src2),
11103                            (_.VT (bitconvert (_.LdFrag addr:$src3))),
11104                            (i8 timm:$src4)), 1, 0>,
11105                    AVX512AIi8Base, EVEX_4V, EVEX_CD8<_.EltSize, CD8VF>,
11106                    Sched<[sched.Folded, sched.ReadAfterFold]>;
11107  defm rmbi : AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst),
11108                    (ins _.RC:$src2, _.ScalarMemOp:$src3, u8imm:$src4),
11109                    OpcodeStr, "$src4, ${src3}"##_.BroadcastStr##", $src2",
11110                    "$src2, ${src3}"##_.BroadcastStr##", $src4",
11111                    (OpNode (_.VT _.RC:$src1),
11112                            (_.VT _.RC:$src2),
11113                            (_.VT (_.BroadcastLdFrag addr:$src3)),
11114                            (i8 timm:$src4)), 1, 0>, EVEX_B,
11115                    AVX512AIi8Base, EVEX_4V, EVEX_CD8<_.EltSize, CD8VF>,
11116                    Sched<[sched.Folded, sched.ReadAfterFold]>;
11117  }// Constraints = "$src1 = $dst"
11118
11119  // Additional patterns for matching passthru operand in other positions.
11120  def : Pat<(_.VT (vselect _.KRCWM:$mask,
11121                   (OpNode _.RC:$src3, _.RC:$src2, _.RC:$src1, (i8 timm:$src4)),
11122                   _.RC:$src1)),
11123            (!cast<Instruction>(Name#_.ZSuffix#rrik) _.RC:$src1, _.KRCWM:$mask,
11124             _.RC:$src2, _.RC:$src3, (VPTERNLOG321_imm8 timm:$src4))>;
11125  def : Pat<(_.VT (vselect _.KRCWM:$mask,
11126                   (OpNode _.RC:$src2, _.RC:$src1, _.RC:$src3, (i8 timm:$src4)),
11127                   _.RC:$src1)),
11128            (!cast<Instruction>(Name#_.ZSuffix#rrik) _.RC:$src1, _.KRCWM:$mask,
11129             _.RC:$src2, _.RC:$src3, (VPTERNLOG213_imm8 timm:$src4))>;
11130
11131  // Additional patterns for matching loads in other positions.
11132  def : Pat<(_.VT (OpNode (bitconvert (_.LdFrag addr:$src3)),
11133                          _.RC:$src2, _.RC:$src1, (i8 timm:$src4))),
11134            (!cast<Instruction>(Name#_.ZSuffix#rmi) _.RC:$src1, _.RC:$src2,
11135                                   addr:$src3, (VPTERNLOG321_imm8 timm:$src4))>;
11136  def : Pat<(_.VT (OpNode _.RC:$src1,
11137                          (bitconvert (_.LdFrag addr:$src3)),
11138                          _.RC:$src2, (i8 timm:$src4))),
11139            (!cast<Instruction>(Name#_.ZSuffix#rmi) _.RC:$src1, _.RC:$src2,
11140                                   addr:$src3, (VPTERNLOG132_imm8 timm:$src4))>;
11141
11142  // Additional patterns for matching zero masking with loads in other
11143  // positions.
11144  def : Pat<(_.VT (vselect _.KRCWM:$mask,
11145                   (OpNode (bitconvert (_.LdFrag addr:$src3)),
11146                    _.RC:$src2, _.RC:$src1, (i8 timm:$src4)),
11147                   _.ImmAllZerosV)),
11148            (!cast<Instruction>(Name#_.ZSuffix#rmikz) _.RC:$src1, _.KRCWM:$mask,
11149             _.RC:$src2, addr:$src3, (VPTERNLOG321_imm8 timm:$src4))>;
11150  def : Pat<(_.VT (vselect _.KRCWM:$mask,
11151                   (OpNode _.RC:$src1, (bitconvert (_.LdFrag addr:$src3)),
11152                    _.RC:$src2, (i8 timm:$src4)),
11153                   _.ImmAllZerosV)),
11154            (!cast<Instruction>(Name#_.ZSuffix#rmikz) _.RC:$src1, _.KRCWM:$mask,
11155             _.RC:$src2, addr:$src3, (VPTERNLOG132_imm8 timm:$src4))>;
11156
11157  // Additional patterns for matching masked loads with different
11158  // operand orders.
11159  def : Pat<(_.VT (vselect _.KRCWM:$mask,
11160                   (OpNode _.RC:$src1, (bitconvert (_.LdFrag addr:$src3)),
11161                    _.RC:$src2, (i8 timm:$src4)),
11162                   _.RC:$src1)),
11163            (!cast<Instruction>(Name#_.ZSuffix#rmik) _.RC:$src1, _.KRCWM:$mask,
11164             _.RC:$src2, addr:$src3, (VPTERNLOG132_imm8 timm:$src4))>;
11165  def : Pat<(_.VT (vselect _.KRCWM:$mask,
11166                   (OpNode (bitconvert (_.LdFrag addr:$src3)),
11167                    _.RC:$src2, _.RC:$src1, (i8 timm:$src4)),
11168                   _.RC:$src1)),
11169            (!cast<Instruction>(Name#_.ZSuffix#rmik) _.RC:$src1, _.KRCWM:$mask,
11170             _.RC:$src2, addr:$src3, (VPTERNLOG321_imm8 timm:$src4))>;
11171  def : Pat<(_.VT (vselect _.KRCWM:$mask,
11172                   (OpNode _.RC:$src2, _.RC:$src1,
11173                    (bitconvert (_.LdFrag addr:$src3)), (i8 timm:$src4)),
11174                   _.RC:$src1)),
11175            (!cast<Instruction>(Name#_.ZSuffix#rmik) _.RC:$src1, _.KRCWM:$mask,
11176             _.RC:$src2, addr:$src3, (VPTERNLOG213_imm8 timm:$src4))>;
11177  def : Pat<(_.VT (vselect _.KRCWM:$mask,
11178                   (OpNode _.RC:$src2, (bitconvert (_.LdFrag addr:$src3)),
11179                    _.RC:$src1, (i8 timm:$src4)),
11180                   _.RC:$src1)),
11181            (!cast<Instruction>(Name#_.ZSuffix#rmik) _.RC:$src1, _.KRCWM:$mask,
11182             _.RC:$src2, addr:$src3, (VPTERNLOG231_imm8 timm:$src4))>;
11183  def : Pat<(_.VT (vselect _.KRCWM:$mask,
11184                   (OpNode (bitconvert (_.LdFrag addr:$src3)),
11185                    _.RC:$src1, _.RC:$src2, (i8 timm:$src4)),
11186                   _.RC:$src1)),
11187            (!cast<Instruction>(Name#_.ZSuffix#rmik) _.RC:$src1, _.KRCWM:$mask,
11188             _.RC:$src2, addr:$src3, (VPTERNLOG312_imm8 timm:$src4))>;
11189
11190  // Additional patterns for matching broadcasts in other positions.
11191  def : Pat<(_.VT (OpNode (_.BroadcastLdFrag addr:$src3),
11192                          _.RC:$src2, _.RC:$src1, (i8 timm:$src4))),
11193            (!cast<Instruction>(Name#_.ZSuffix#rmbi) _.RC:$src1, _.RC:$src2,
11194                                   addr:$src3, (VPTERNLOG321_imm8 timm:$src4))>;
11195  def : Pat<(_.VT (OpNode _.RC:$src1,
11196                          (_.BroadcastLdFrag addr:$src3),
11197                          _.RC:$src2, (i8 timm:$src4))),
11198            (!cast<Instruction>(Name#_.ZSuffix#rmbi) _.RC:$src1, _.RC:$src2,
11199                                   addr:$src3, (VPTERNLOG132_imm8 timm:$src4))>;
11200
11201  // Additional patterns for matching zero masking with broadcasts in other
11202  // positions.
11203  def : Pat<(_.VT (vselect _.KRCWM:$mask,
11204                   (OpNode (_.BroadcastLdFrag addr:$src3),
11205                    _.RC:$src2, _.RC:$src1, (i8 timm:$src4)),
11206                   _.ImmAllZerosV)),
11207            (!cast<Instruction>(Name#_.ZSuffix#rmbikz) _.RC:$src1,
11208             _.KRCWM:$mask, _.RC:$src2, addr:$src3,
11209             (VPTERNLOG321_imm8 timm:$src4))>;
11210  def : Pat<(_.VT (vselect _.KRCWM:$mask,
11211                   (OpNode _.RC:$src1,
11212                    (_.BroadcastLdFrag addr:$src3),
11213                    _.RC:$src2, (i8 timm:$src4)),
11214                   _.ImmAllZerosV)),
11215            (!cast<Instruction>(Name#_.ZSuffix#rmbikz) _.RC:$src1,
11216             _.KRCWM:$mask, _.RC:$src2, addr:$src3,
11217             (VPTERNLOG132_imm8 timm:$src4))>;
11218
11219  // Additional patterns for matching masked broadcasts with different
11220  // operand orders.
11221  def : Pat<(_.VT (vselect _.KRCWM:$mask,
11222                   (OpNode _.RC:$src1, (_.BroadcastLdFrag addr:$src3),
11223                    _.RC:$src2, (i8 timm:$src4)),
11224                   _.RC:$src1)),
11225            (!cast<Instruction>(Name#_.ZSuffix#rmbik) _.RC:$src1, _.KRCWM:$mask,
11226             _.RC:$src2, addr:$src3, (VPTERNLOG132_imm8 timm:$src4))>;
11227  def : Pat<(_.VT (vselect _.KRCWM:$mask,
11228                   (OpNode (_.BroadcastLdFrag addr:$src3),
11229                    _.RC:$src2, _.RC:$src1, (i8 timm:$src4)),
11230                   _.RC:$src1)),
11231            (!cast<Instruction>(Name#_.ZSuffix#rmbik) _.RC:$src1, _.KRCWM:$mask,
11232             _.RC:$src2, addr:$src3, (VPTERNLOG321_imm8 timm:$src4))>;
11233  def : Pat<(_.VT (vselect _.KRCWM:$mask,
11234                   (OpNode _.RC:$src2, _.RC:$src1,
11235                    (_.BroadcastLdFrag addr:$src3),
11236                    (i8 timm:$src4)), _.RC:$src1)),
11237            (!cast<Instruction>(Name#_.ZSuffix#rmbik) _.RC:$src1, _.KRCWM:$mask,
11238             _.RC:$src2, addr:$src3, (VPTERNLOG213_imm8 timm:$src4))>;
11239  def : Pat<(_.VT (vselect _.KRCWM:$mask,
11240                   (OpNode _.RC:$src2,
11241                    (_.BroadcastLdFrag addr:$src3),
11242                    _.RC:$src1, (i8 timm:$src4)),
11243                   _.RC:$src1)),
11244            (!cast<Instruction>(Name#_.ZSuffix#rmbik) _.RC:$src1, _.KRCWM:$mask,
11245             _.RC:$src2, addr:$src3, (VPTERNLOG231_imm8 timm:$src4))>;
11246  def : Pat<(_.VT (vselect _.KRCWM:$mask,
11247                   (OpNode (_.BroadcastLdFrag addr:$src3),
11248                    _.RC:$src1, _.RC:$src2, (i8 timm:$src4)),
11249                   _.RC:$src1)),
11250            (!cast<Instruction>(Name#_.ZSuffix#rmbik) _.RC:$src1, _.KRCWM:$mask,
11251             _.RC:$src2, addr:$src3, (VPTERNLOG312_imm8 timm:$src4))>;
11252}
11253
11254multiclass avx512_common_ternlog<string OpcodeStr, X86SchedWriteWidths sched,
11255                                 AVX512VLVectorVTInfo _> {
11256  let Predicates = [HasAVX512] in
11257    defm Z    : avx512_ternlog<0x25, OpcodeStr, X86vpternlog, sched.ZMM,
11258                               _.info512, NAME>, EVEX_V512;
11259  let Predicates = [HasAVX512, HasVLX] in {
11260    defm Z128 : avx512_ternlog<0x25, OpcodeStr, X86vpternlog, sched.XMM,
11261                               _.info128, NAME>, EVEX_V128;
11262    defm Z256 : avx512_ternlog<0x25, OpcodeStr, X86vpternlog, sched.YMM,
11263                               _.info256, NAME>, EVEX_V256;
11264  }
11265}
11266
11267defm VPTERNLOGD : avx512_common_ternlog<"vpternlogd", SchedWriteVecALU,
11268                                        avx512vl_i32_info>;
11269defm VPTERNLOGQ : avx512_common_ternlog<"vpternlogq", SchedWriteVecALU,
11270                                        avx512vl_i64_info>, VEX_W;
11271
11272// Patterns to use VPTERNLOG for vXi16/vXi8 vectors.
11273let Predicates = [HasVLX] in {
11274  def : Pat<(v16i8 (X86vpternlog VR128X:$src1, VR128X:$src2, VR128X:$src3,
11275                                 (i8 timm:$src4))),
11276            (VPTERNLOGQZ128rri VR128X:$src1, VR128X:$src2, VR128X:$src3,
11277                               timm:$src4)>;
11278  def : Pat<(v16i8 (X86vpternlog VR128X:$src1, VR128X:$src2,
11279                                 (loadv16i8 addr:$src3), (i8 timm:$src4))),
11280            (VPTERNLOGQZ128rmi VR128X:$src1, VR128X:$src2, addr:$src3,
11281                               timm:$src4)>;
11282  def : Pat<(v16i8 (X86vpternlog (loadv16i8 addr:$src3), VR128X:$src2,
11283                                 VR128X:$src1, (i8 timm:$src4))),
11284            (VPTERNLOGQZ128rmi VR128X:$src1, VR128X:$src2, addr:$src3,
11285                               (VPTERNLOG321_imm8 timm:$src4))>;
11286  def : Pat<(v16i8 (X86vpternlog VR128X:$src1, (loadv16i8 addr:$src3),
11287                                 VR128X:$src2, (i8 timm:$src4))),
11288            (VPTERNLOGQZ128rmi VR128X:$src1, VR128X:$src2, addr:$src3,
11289                               (VPTERNLOG132_imm8 timm:$src4))>;
11290
11291  def : Pat<(v8i16 (X86vpternlog VR128X:$src1, VR128X:$src2, VR128X:$src3,
11292                                 (i8 timm:$src4))),
11293            (VPTERNLOGQZ128rri VR128X:$src1, VR128X:$src2, VR128X:$src3,
11294                               timm:$src4)>;
11295  def : Pat<(v8i16 (X86vpternlog VR128X:$src1, VR128X:$src2,
11296                                 (loadv8i16 addr:$src3), (i8 timm:$src4))),
11297            (VPTERNLOGQZ128rmi VR128X:$src1, VR128X:$src2, addr:$src3,
11298                               timm:$src4)>;
11299  def : Pat<(v8i16 (X86vpternlog (loadv8i16 addr:$src3), VR128X:$src2,
11300                                 VR128X:$src1, (i8 timm:$src4))),
11301            (VPTERNLOGQZ128rmi VR128X:$src1, VR128X:$src2, addr:$src3,
11302                               (VPTERNLOG321_imm8 timm:$src4))>;
11303  def : Pat<(v8i16 (X86vpternlog VR128X:$src1, (loadv8i16 addr:$src3),
11304                                 VR128X:$src2, (i8 timm:$src4))),
11305            (VPTERNLOGQZ128rmi VR128X:$src1, VR128X:$src2, addr:$src3,
11306                               (VPTERNLOG132_imm8 timm:$src4))>;
11307
11308  def : Pat<(v32i8 (X86vpternlog VR256X:$src1, VR256X:$src2, VR256X:$src3,
11309                                 (i8 timm:$src4))),
11310            (VPTERNLOGQZ256rri VR256X:$src1, VR256X:$src2, VR256X:$src3,
11311                               timm:$src4)>;
11312  def : Pat<(v32i8 (X86vpternlog VR256X:$src1, VR256X:$src2,
11313                                 (loadv32i8 addr:$src3), (i8 timm:$src4))),
11314            (VPTERNLOGQZ256rmi VR256X:$src1, VR256X:$src2, addr:$src3,
11315                               timm:$src4)>;
11316  def : Pat<(v32i8 (X86vpternlog (loadv32i8 addr:$src3), VR256X:$src2,
11317                                 VR256X:$src1, (i8 timm:$src4))),
11318            (VPTERNLOGQZ256rmi VR256X:$src1, VR256X:$src2, addr:$src3,
11319                               (VPTERNLOG321_imm8 timm:$src4))>;
11320  def : Pat<(v32i8 (X86vpternlog VR256X:$src1, (loadv32i8 addr:$src3),
11321                                 VR256X:$src2, (i8 timm:$src4))),
11322            (VPTERNLOGQZ256rmi VR256X:$src1, VR256X:$src2, addr:$src3,
11323                               (VPTERNLOG132_imm8 timm:$src4))>;
11324
11325  def : Pat<(v16i16 (X86vpternlog VR256X:$src1, VR256X:$src2, VR256X:$src3,
11326                                  (i8 timm:$src4))),
11327            (VPTERNLOGQZ256rri VR256X:$src1, VR256X:$src2, VR256X:$src3,
11328                               timm:$src4)>;
11329  def : Pat<(v16i16 (X86vpternlog VR256X:$src1, VR256X:$src2,
11330                                  (loadv16i16 addr:$src3), (i8 timm:$src4))),
11331            (VPTERNLOGQZ256rmi VR256X:$src1, VR256X:$src2, addr:$src3,
11332                               timm:$src4)>;
11333  def : Pat<(v16i16 (X86vpternlog (loadv16i16 addr:$src3), VR256X:$src2,
11334                                  VR256X:$src1, (i8 timm:$src4))),
11335            (VPTERNLOGQZ256rmi VR256X:$src1, VR256X:$src2, addr:$src3,
11336                               (VPTERNLOG321_imm8 timm:$src4))>;
11337  def : Pat<(v16i16 (X86vpternlog VR256X:$src1, (loadv16i16 addr:$src3),
11338                                  VR256X:$src2, (i8 timm:$src4))),
11339            (VPTERNLOGQZ256rmi VR256X:$src1, VR256X:$src2, addr:$src3,
11340                               (VPTERNLOG132_imm8 timm:$src4))>;
11341}
11342
11343let Predicates = [HasAVX512] in {
11344  def : Pat<(v64i8 (X86vpternlog VR512:$src1, VR512:$src2, VR512:$src3,
11345                                 (i8 timm:$src4))),
11346            (VPTERNLOGQZrri VR512:$src1, VR512:$src2, VR512:$src3,
11347                            timm:$src4)>;
11348  def : Pat<(v64i8 (X86vpternlog VR512:$src1, VR512:$src2,
11349                                 (loadv64i8 addr:$src3), (i8 timm:$src4))),
11350            (VPTERNLOGQZrmi VR512:$src1, VR512:$src2, addr:$src3,
11351                            timm:$src4)>;
11352  def : Pat<(v64i8 (X86vpternlog (loadv64i8 addr:$src3), VR512:$src2,
11353                                  VR512:$src1, (i8 timm:$src4))),
11354            (VPTERNLOGQZrmi VR512:$src1, VR512:$src2, addr:$src3,
11355                            (VPTERNLOG321_imm8 timm:$src4))>;
11356  def : Pat<(v64i8 (X86vpternlog VR512:$src1, (loadv64i8 addr:$src3),
11357                                 VR512:$src2, (i8 timm:$src4))),
11358            (VPTERNLOGQZrmi VR512:$src1, VR512:$src2, addr:$src3,
11359                            (VPTERNLOG132_imm8 timm:$src4))>;
11360
11361  def : Pat<(v32i16 (X86vpternlog VR512:$src1, VR512:$src2, VR512:$src3,
11362                                  (i8 timm:$src4))),
11363            (VPTERNLOGQZrri VR512:$src1, VR512:$src2, VR512:$src3,
11364                            timm:$src4)>;
11365  def : Pat<(v32i16 (X86vpternlog VR512:$src1, VR512:$src2,
11366                                  (loadv32i16 addr:$src3), (i8 timm:$src4))),
11367            (VPTERNLOGQZrmi VR512:$src1, VR512:$src2, addr:$src3,
11368                            timm:$src4)>;
11369  def : Pat<(v32i16 (X86vpternlog (loadv32i16 addr:$src3), VR512:$src2,
11370                                  VR512:$src1, (i8 timm:$src4))),
11371            (VPTERNLOGQZrmi VR512:$src1, VR512:$src2, addr:$src3,
11372                            (VPTERNLOG321_imm8 timm:$src4))>;
11373  def : Pat<(v32i16 (X86vpternlog VR512:$src1, (loadv32i16 addr:$src3),
11374                                 VR512:$src2, (i8 timm:$src4))),
11375            (VPTERNLOGQZrmi VR512:$src1, VR512:$src2, addr:$src3,
11376                            (VPTERNLOG132_imm8 timm:$src4))>;
11377}
11378
11379// Patterns to implement vnot using vpternlog instead of creating all ones
11380// using pcmpeq or vpternlog and then xoring with that. The value 15 is chosen
11381// so that the result is only dependent on src0. But we use the same source
11382// for all operands to prevent a false dependency.
11383// TODO: We should maybe have a more generalized algorithm for folding to
11384// vpternlog.
11385let Predicates = [HasAVX512] in {
11386  def : Pat<(xor VR512:$src, (v64i8 immAllOnesV)),
11387            (VPTERNLOGQZrri VR512:$src, VR512:$src, VR512:$src, (i8 15))>;
11388  def : Pat<(xor VR512:$src, (v32i16 immAllOnesV)),
11389            (VPTERNLOGQZrri VR512:$src, VR512:$src, VR512:$src, (i8 15))>;
11390  def : Pat<(xor VR512:$src, (v16i32 immAllOnesV)),
11391            (VPTERNLOGQZrri VR512:$src, VR512:$src, VR512:$src, (i8 15))>;
11392  def : Pat<(xor VR512:$src, (v8i64 immAllOnesV)),
11393            (VPTERNLOGQZrri VR512:$src, VR512:$src, VR512:$src, (i8 15))>;
11394}
11395
11396let Predicates = [HasAVX512, NoVLX] in {
11397  def : Pat<(xor VR128X:$src, (v16i8 immAllOnesV)),
11398            (EXTRACT_SUBREG
11399             (VPTERNLOGQZrri
11400              (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
11401              (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
11402              (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
11403              (i8 15)), sub_xmm)>;
11404  def : Pat<(xor VR128X:$src, (v8i16 immAllOnesV)),
11405            (EXTRACT_SUBREG
11406             (VPTERNLOGQZrri
11407              (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
11408              (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
11409              (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
11410              (i8 15)), sub_xmm)>;
11411  def : Pat<(xor VR128X:$src, (v4i32 immAllOnesV)),
11412            (EXTRACT_SUBREG
11413             (VPTERNLOGQZrri
11414              (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
11415              (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
11416              (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
11417              (i8 15)), sub_xmm)>;
11418  def : Pat<(xor VR128X:$src, (v2i64 immAllOnesV)),
11419            (EXTRACT_SUBREG
11420             (VPTERNLOGQZrri
11421              (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
11422              (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
11423              (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
11424              (i8 15)), sub_xmm)>;
11425
11426  def : Pat<(xor VR256X:$src, (v32i8 immAllOnesV)),
11427            (EXTRACT_SUBREG
11428             (VPTERNLOGQZrri
11429              (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src, sub_ymm),
11430              (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src, sub_ymm),
11431              (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src, sub_ymm),
11432              (i8 15)), sub_ymm)>;
11433  def : Pat<(xor VR256X:$src, (v16i16 immAllOnesV)),
11434            (EXTRACT_SUBREG
11435             (VPTERNLOGQZrri
11436              (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src, sub_ymm),
11437              (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src, sub_ymm),
11438              (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src, sub_ymm),
11439              (i8 15)), sub_ymm)>;
11440  def : Pat<(xor VR256X:$src, (v8i32 immAllOnesV)),
11441            (EXTRACT_SUBREG
11442             (VPTERNLOGQZrri
11443              (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src, sub_ymm),
11444              (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src, sub_ymm),
11445              (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src, sub_ymm),
11446              (i8 15)), sub_ymm)>;
11447  def : Pat<(xor VR256X:$src, (v4i64 immAllOnesV)),
11448            (EXTRACT_SUBREG
11449             (VPTERNLOGQZrri
11450              (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src, sub_ymm),
11451              (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src, sub_ymm),
11452              (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src, sub_ymm),
11453              (i8 15)), sub_ymm)>;
11454}
11455
11456let Predicates = [HasVLX] in {
11457  def : Pat<(xor VR128X:$src, (v16i8 immAllOnesV)),
11458            (VPTERNLOGQZ128rri VR128X:$src, VR128X:$src, VR128X:$src, (i8 15))>;
11459  def : Pat<(xor VR128X:$src, (v8i16 immAllOnesV)),
11460            (VPTERNLOGQZ128rri VR128X:$src, VR128X:$src, VR128X:$src, (i8 15))>;
11461  def : Pat<(xor VR128X:$src, (v4i32 immAllOnesV)),
11462            (VPTERNLOGQZ128rri VR128X:$src, VR128X:$src, VR128X:$src, (i8 15))>;
11463  def : Pat<(xor VR128X:$src, (v2i64 immAllOnesV)),
11464            (VPTERNLOGQZ128rri VR128X:$src, VR128X:$src, VR128X:$src, (i8 15))>;
11465
11466  def : Pat<(xor VR256X:$src, (v32i8 immAllOnesV)),
11467            (VPTERNLOGQZ256rri VR256X:$src, VR256X:$src, VR256X:$src, (i8 15))>;
11468  def : Pat<(xor VR256X:$src, (v16i16 immAllOnesV)),
11469            (VPTERNLOGQZ256rri VR256X:$src, VR256X:$src, VR256X:$src, (i8 15))>;
11470  def : Pat<(xor VR256X:$src, (v8i32 immAllOnesV)),
11471            (VPTERNLOGQZ256rri VR256X:$src, VR256X:$src, VR256X:$src, (i8 15))>;
11472  def : Pat<(xor VR256X:$src, (v4i64 immAllOnesV)),
11473            (VPTERNLOGQZ256rri VR256X:$src, VR256X:$src, VR256X:$src, (i8 15))>;
11474}
11475
11476//===----------------------------------------------------------------------===//
11477// AVX-512 - FixupImm
11478//===----------------------------------------------------------------------===//
11479
11480multiclass avx512_fixupimm_packed<bits<8> opc, string OpcodeStr,
11481                                  X86FoldableSchedWrite sched, X86VectorVTInfo _,
11482                                  X86VectorVTInfo TblVT>{
11483  let Constraints = "$src1 = $dst", ExeDomain = _.ExeDomain,
11484      Uses = [MXCSR], mayRaiseFPException = 1 in {
11485    defm rri : AVX512_maskable_3src<opc, MRMSrcReg, _, (outs _.RC:$dst),
11486                        (ins _.RC:$src2, _.RC:$src3, i32u8imm:$src4),
11487                         OpcodeStr##_.Suffix, "$src4, $src3, $src2", "$src2, $src3, $src4",
11488                        (X86VFixupimm (_.VT _.RC:$src1),
11489                                      (_.VT _.RC:$src2),
11490                                      (TblVT.VT _.RC:$src3),
11491                                      (i32 timm:$src4))>, Sched<[sched]>;
11492    defm rmi : AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst),
11493                      (ins _.RC:$src2, _.MemOp:$src3, i32u8imm:$src4),
11494                      OpcodeStr##_.Suffix, "$src4, $src3, $src2", "$src2, $src3, $src4",
11495                      (X86VFixupimm (_.VT _.RC:$src1),
11496                                    (_.VT _.RC:$src2),
11497                                    (TblVT.VT (bitconvert (TblVT.LdFrag addr:$src3))),
11498                                    (i32 timm:$src4))>,
11499                      Sched<[sched.Folded, sched.ReadAfterFold]>;
11500    defm rmbi : AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst),
11501                      (ins _.RC:$src2, _.ScalarMemOp:$src3, i32u8imm:$src4),
11502                    OpcodeStr##_.Suffix, "$src4, ${src3}"##_.BroadcastStr##", $src2",
11503                    "$src2, ${src3}"##_.BroadcastStr##", $src4",
11504                      (X86VFixupimm (_.VT _.RC:$src1),
11505                                    (_.VT _.RC:$src2),
11506                                    (TblVT.VT (TblVT.BroadcastLdFrag addr:$src3)),
11507                                    (i32 timm:$src4))>,
11508                    EVEX_B, Sched<[sched.Folded, sched.ReadAfterFold]>;
11509  } // Constraints = "$src1 = $dst"
11510}
11511
11512multiclass avx512_fixupimm_packed_sae<bits<8> opc, string OpcodeStr,
11513                                      X86FoldableSchedWrite sched,
11514                                      X86VectorVTInfo _, X86VectorVTInfo TblVT>
11515  : avx512_fixupimm_packed<opc, OpcodeStr, sched, _, TblVT> {
11516let Constraints = "$src1 = $dst", ExeDomain = _.ExeDomain, Uses = [MXCSR] in {
11517  defm rrib : AVX512_maskable_3src<opc, MRMSrcReg, _, (outs _.RC:$dst),
11518                      (ins _.RC:$src2, _.RC:$src3, i32u8imm:$src4),
11519                      OpcodeStr##_.Suffix, "$src4, {sae}, $src3, $src2",
11520                      "$src2, $src3, {sae}, $src4",
11521                      (X86VFixupimmSAE (_.VT _.RC:$src1),
11522                                       (_.VT _.RC:$src2),
11523                                       (TblVT.VT _.RC:$src3),
11524                                       (i32 timm:$src4))>,
11525                      EVEX_B, Sched<[sched]>;
11526  }
11527}
11528
11529multiclass avx512_fixupimm_scalar<bits<8> opc, string OpcodeStr,
11530                                  X86FoldableSchedWrite sched, X86VectorVTInfo _,
11531                                  X86VectorVTInfo _src3VT> {
11532  let Constraints = "$src1 = $dst" , Predicates = [HasAVX512],
11533      ExeDomain = _.ExeDomain in {
11534    defm rri : AVX512_maskable_3src_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
11535                      (ins _.RC:$src2, _.RC:$src3, i32u8imm:$src4),
11536                      OpcodeStr##_.Suffix, "$src4, $src3, $src2", "$src2, $src3, $src4",
11537                      (X86VFixupimms (_.VT _.RC:$src1),
11538                                     (_.VT _.RC:$src2),
11539                                     (_src3VT.VT _src3VT.RC:$src3),
11540                                     (i32 timm:$src4))>, Sched<[sched]>, SIMD_EXC;
11541    let Uses = [MXCSR] in
11542    defm rrib : AVX512_maskable_3src_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
11543                      (ins _.RC:$src2, _.RC:$src3, i32u8imm:$src4),
11544                      OpcodeStr##_.Suffix, "$src4, {sae}, $src3, $src2",
11545                      "$src2, $src3, {sae}, $src4",
11546                      (X86VFixupimmSAEs (_.VT _.RC:$src1),
11547                                        (_.VT _.RC:$src2),
11548                                        (_src3VT.VT _src3VT.RC:$src3),
11549                                        (i32 timm:$src4))>,
11550                      EVEX_B, Sched<[sched.Folded, sched.ReadAfterFold]>;
11551    defm rmi : AVX512_maskable_3src_scalar<opc, MRMSrcMem, _, (outs _.RC:$dst),
11552                     (ins _.RC:$src2, _.ScalarMemOp:$src3, i32u8imm:$src4),
11553                     OpcodeStr##_.Suffix, "$src4, $src3, $src2", "$src2, $src3, $src4",
11554                     (X86VFixupimms (_.VT _.RC:$src1),
11555                                    (_.VT _.RC:$src2),
11556                                    (_src3VT.VT (scalar_to_vector
11557                                              (_src3VT.ScalarLdFrag addr:$src3))),
11558                                    (i32 timm:$src4))>,
11559                     Sched<[sched.Folded, sched.ReadAfterFold]>, SIMD_EXC;
11560  }
11561}
11562
11563multiclass avx512_fixupimm_packed_all<X86SchedWriteWidths sched,
11564                                      AVX512VLVectorVTInfo _Vec,
11565                                      AVX512VLVectorVTInfo _Tbl> {
11566  let Predicates = [HasAVX512] in
11567    defm Z    : avx512_fixupimm_packed_sae<0x54, "vfixupimm", sched.ZMM,
11568                                _Vec.info512, _Tbl.info512>, AVX512AIi8Base,
11569                                EVEX_4V, EVEX_V512;
11570  let Predicates = [HasAVX512, HasVLX] in {
11571    defm Z128 : avx512_fixupimm_packed<0x54, "vfixupimm", sched.XMM,
11572                            _Vec.info128, _Tbl.info128>, AVX512AIi8Base,
11573                            EVEX_4V, EVEX_V128;
11574    defm Z256 : avx512_fixupimm_packed<0x54, "vfixupimm", sched.YMM,
11575                            _Vec.info256, _Tbl.info256>, AVX512AIi8Base,
11576                            EVEX_4V, EVEX_V256;
11577  }
11578}
11579
11580defm VFIXUPIMMSSZ : avx512_fixupimm_scalar<0x55, "vfixupimm",
11581                                           SchedWriteFAdd.Scl, f32x_info, v4i32x_info>,
11582                          AVX512AIi8Base, VEX_LIG, EVEX_4V, EVEX_CD8<32, CD8VT1>;
11583defm VFIXUPIMMSDZ : avx512_fixupimm_scalar<0x55, "vfixupimm",
11584                                           SchedWriteFAdd.Scl, f64x_info, v2i64x_info>,
11585                          AVX512AIi8Base, VEX_LIG, EVEX_4V, EVEX_CD8<64, CD8VT1>, VEX_W;
11586defm VFIXUPIMMPS : avx512_fixupimm_packed_all<SchedWriteFAdd, avx512vl_f32_info,
11587                         avx512vl_i32_info>, EVEX_CD8<32, CD8VF>;
11588defm VFIXUPIMMPD : avx512_fixupimm_packed_all<SchedWriteFAdd, avx512vl_f64_info,
11589                         avx512vl_i64_info>, EVEX_CD8<64, CD8VF>, VEX_W;
11590
11591// Patterns used to select SSE scalar fp arithmetic instructions from
11592// either:
11593//
11594// (1) a scalar fp operation followed by a blend
11595//
11596// The effect is that the backend no longer emits unnecessary vector
11597// insert instructions immediately after SSE scalar fp instructions
11598// like addss or mulss.
11599//
11600// For example, given the following code:
11601//   __m128 foo(__m128 A, __m128 B) {
11602//     A[0] += B[0];
11603//     return A;
11604//   }
11605//
11606// Previously we generated:
11607//   addss %xmm0, %xmm1
11608//   movss %xmm1, %xmm0
11609//
11610// We now generate:
11611//   addss %xmm1, %xmm0
11612//
11613// (2) a vector packed single/double fp operation followed by a vector insert
11614//
11615// The effect is that the backend converts the packed fp instruction
11616// followed by a vector insert into a single SSE scalar fp instruction.
11617//
11618// For example, given the following code:
11619//   __m128 foo(__m128 A, __m128 B) {
11620//     __m128 C = A + B;
11621//     return (__m128) {c[0], a[1], a[2], a[3]};
11622//   }
11623//
11624// Previously we generated:
11625//   addps %xmm0, %xmm1
11626//   movss %xmm1, %xmm0
11627//
11628// We now generate:
11629//   addss %xmm1, %xmm0
11630
11631// TODO: Some canonicalization in lowering would simplify the number of
11632// patterns we have to try to match.
11633multiclass AVX512_scalar_math_fp_patterns<SDNode Op, string OpcPrefix, SDNode MoveNode,
11634                                           X86VectorVTInfo _, PatLeaf ZeroFP> {
11635  let Predicates = [HasAVX512] in {
11636    // extracted scalar math op with insert via movss
11637    def : Pat<(MoveNode
11638               (_.VT VR128X:$dst),
11639               (_.VT (scalar_to_vector
11640                      (Op (_.EltVT (extractelt (_.VT VR128X:$dst), (iPTR 0))),
11641                          _.FRC:$src)))),
11642              (!cast<Instruction>("V"#OpcPrefix#Zrr_Int) _.VT:$dst,
11643               (_.VT (COPY_TO_REGCLASS _.FRC:$src, VR128X)))>;
11644    def : Pat<(MoveNode
11645               (_.VT VR128X:$dst),
11646               (_.VT (scalar_to_vector
11647                      (Op (_.EltVT (extractelt (_.VT VR128X:$dst), (iPTR 0))),
11648                          (_.ScalarLdFrag addr:$src))))),
11649              (!cast<Instruction>("V"#OpcPrefix#Zrm_Int) _.VT:$dst, addr:$src)>;
11650
11651    // extracted masked scalar math op with insert via movss
11652    def : Pat<(MoveNode (_.VT VR128X:$src1),
11653               (scalar_to_vector
11654                (X86selects VK1WM:$mask,
11655                            (Op (_.EltVT
11656                                 (extractelt (_.VT VR128X:$src1), (iPTR 0))),
11657                                _.FRC:$src2),
11658                            _.FRC:$src0))),
11659              (!cast<Instruction>("V"#OpcPrefix#Zrr_Intk)
11660               (_.VT (COPY_TO_REGCLASS _.FRC:$src0, VR128X)),
11661               VK1WM:$mask, _.VT:$src1,
11662               (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)))>;
11663    def : Pat<(MoveNode (_.VT VR128X:$src1),
11664               (scalar_to_vector
11665                (X86selects VK1WM:$mask,
11666                            (Op (_.EltVT
11667                                 (extractelt (_.VT VR128X:$src1), (iPTR 0))),
11668                                (_.ScalarLdFrag addr:$src2)),
11669                            _.FRC:$src0))),
11670              (!cast<Instruction>("V"#OpcPrefix#Zrm_Intk)
11671               (_.VT (COPY_TO_REGCLASS _.FRC:$src0, VR128X)),
11672               VK1WM:$mask, _.VT:$src1, addr:$src2)>;
11673
11674    // extracted masked scalar math op with insert via movss
11675    def : Pat<(MoveNode (_.VT VR128X:$src1),
11676               (scalar_to_vector
11677                (X86selects VK1WM:$mask,
11678                            (Op (_.EltVT
11679                                 (extractelt (_.VT VR128X:$src1), (iPTR 0))),
11680                                _.FRC:$src2), (_.EltVT ZeroFP)))),
11681      (!cast<I>("V"#OpcPrefix#Zrr_Intkz)
11682          VK1WM:$mask, _.VT:$src1,
11683          (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)))>;
11684    def : Pat<(MoveNode (_.VT VR128X:$src1),
11685               (scalar_to_vector
11686                (X86selects VK1WM:$mask,
11687                            (Op (_.EltVT
11688                                 (extractelt (_.VT VR128X:$src1), (iPTR 0))),
11689                                (_.ScalarLdFrag addr:$src2)), (_.EltVT ZeroFP)))),
11690      (!cast<I>("V"#OpcPrefix#Zrm_Intkz) VK1WM:$mask, _.VT:$src1, addr:$src2)>;
11691  }
11692}
11693
11694defm : AVX512_scalar_math_fp_patterns<fadd, "ADDSS", X86Movss, v4f32x_info, fp32imm0>;
11695defm : AVX512_scalar_math_fp_patterns<fsub, "SUBSS", X86Movss, v4f32x_info, fp32imm0>;
11696defm : AVX512_scalar_math_fp_patterns<fmul, "MULSS", X86Movss, v4f32x_info, fp32imm0>;
11697defm : AVX512_scalar_math_fp_patterns<fdiv, "DIVSS", X86Movss, v4f32x_info, fp32imm0>;
11698
11699defm : AVX512_scalar_math_fp_patterns<fadd, "ADDSD", X86Movsd, v2f64x_info, fp64imm0>;
11700defm : AVX512_scalar_math_fp_patterns<fsub, "SUBSD", X86Movsd, v2f64x_info, fp64imm0>;
11701defm : AVX512_scalar_math_fp_patterns<fmul, "MULSD", X86Movsd, v2f64x_info, fp64imm0>;
11702defm : AVX512_scalar_math_fp_patterns<fdiv, "DIVSD", X86Movsd, v2f64x_info, fp64imm0>;
11703
11704multiclass AVX512_scalar_unary_math_patterns<SDNode OpNode, string OpcPrefix,
11705                                             SDNode Move, X86VectorVTInfo _> {
11706  let Predicates = [HasAVX512] in {
11707    def : Pat<(_.VT (Move _.VT:$dst,
11708                     (scalar_to_vector (OpNode (extractelt _.VT:$src, 0))))),
11709              (!cast<Instruction>("V"#OpcPrefix#Zr_Int) _.VT:$dst, _.VT:$src)>;
11710  }
11711}
11712
11713defm : AVX512_scalar_unary_math_patterns<fsqrt, "SQRTSS", X86Movss, v4f32x_info>;
11714defm : AVX512_scalar_unary_math_patterns<fsqrt, "SQRTSD", X86Movsd, v2f64x_info>;
11715
11716//===----------------------------------------------------------------------===//
11717// AES instructions
11718//===----------------------------------------------------------------------===//
11719
11720multiclass avx512_vaes<bits<8> Op, string OpStr, string IntPrefix> {
11721  let Predicates = [HasVLX, HasVAES] in {
11722    defm Z128 : AESI_binop_rm_int<Op, OpStr,
11723                                  !cast<Intrinsic>(IntPrefix),
11724                                  loadv2i64, 0, VR128X, i128mem>,
11725                  EVEX_4V, EVEX_CD8<64, CD8VF>, EVEX_V128, VEX_WIG;
11726    defm Z256 : AESI_binop_rm_int<Op, OpStr,
11727                                  !cast<Intrinsic>(IntPrefix##"_256"),
11728                                  loadv4i64, 0, VR256X, i256mem>,
11729                  EVEX_4V, EVEX_CD8<64, CD8VF>, EVEX_V256, VEX_WIG;
11730    }
11731    let Predicates = [HasAVX512, HasVAES] in
11732    defm Z    : AESI_binop_rm_int<Op, OpStr,
11733                                  !cast<Intrinsic>(IntPrefix##"_512"),
11734                                  loadv8i64, 0, VR512, i512mem>,
11735                  EVEX_4V, EVEX_CD8<64, CD8VF>, EVEX_V512, VEX_WIG;
11736}
11737
11738defm VAESENC      : avx512_vaes<0xDC, "vaesenc", "int_x86_aesni_aesenc">;
11739defm VAESENCLAST  : avx512_vaes<0xDD, "vaesenclast", "int_x86_aesni_aesenclast">;
11740defm VAESDEC      : avx512_vaes<0xDE, "vaesdec", "int_x86_aesni_aesdec">;
11741defm VAESDECLAST  : avx512_vaes<0xDF, "vaesdeclast", "int_x86_aesni_aesdeclast">;
11742
11743//===----------------------------------------------------------------------===//
11744// PCLMUL instructions - Carry less multiplication
11745//===----------------------------------------------------------------------===//
11746
11747let Predicates = [HasAVX512, HasVPCLMULQDQ] in
11748defm VPCLMULQDQZ : vpclmulqdq<VR512, i512mem, loadv8i64, int_x86_pclmulqdq_512>,
11749                              EVEX_4V, EVEX_V512, EVEX_CD8<64, CD8VF>, VEX_WIG;
11750
11751let Predicates = [HasVLX, HasVPCLMULQDQ] in {
11752defm VPCLMULQDQZ128 : vpclmulqdq<VR128X, i128mem, loadv2i64, int_x86_pclmulqdq>,
11753                              EVEX_4V, EVEX_V128, EVEX_CD8<64, CD8VF>, VEX_WIG;
11754
11755defm VPCLMULQDQZ256: vpclmulqdq<VR256X, i256mem, loadv4i64,
11756                                int_x86_pclmulqdq_256>, EVEX_4V, EVEX_V256,
11757                                EVEX_CD8<64, CD8VF>, VEX_WIG;
11758}
11759
11760// Aliases
11761defm : vpclmulqdq_aliases<"VPCLMULQDQZ", VR512, i512mem>;
11762defm : vpclmulqdq_aliases<"VPCLMULQDQZ128", VR128X, i128mem>;
11763defm : vpclmulqdq_aliases<"VPCLMULQDQZ256", VR256X, i256mem>;
11764
11765//===----------------------------------------------------------------------===//
11766// VBMI2
11767//===----------------------------------------------------------------------===//
11768
11769multiclass VBMI2_shift_var_rm<bits<8> Op, string OpStr, SDNode OpNode,
11770                              X86FoldableSchedWrite sched, X86VectorVTInfo VTI> {
11771  let Constraints = "$src1 = $dst",
11772      ExeDomain   = VTI.ExeDomain in {
11773    defm r:   AVX512_maskable_3src<Op, MRMSrcReg, VTI, (outs VTI.RC:$dst),
11774                (ins VTI.RC:$src2, VTI.RC:$src3), OpStr,
11775                "$src3, $src2", "$src2, $src3",
11776                (VTI.VT (OpNode VTI.RC:$src1, VTI.RC:$src2, VTI.RC:$src3))>,
11777                AVX512FMA3Base, Sched<[sched]>;
11778    defm m:   AVX512_maskable_3src<Op, MRMSrcMem, VTI, (outs VTI.RC:$dst),
11779                (ins VTI.RC:$src2, VTI.MemOp:$src3), OpStr,
11780                "$src3, $src2", "$src2, $src3",
11781                (VTI.VT (OpNode VTI.RC:$src1, VTI.RC:$src2,
11782                        (VTI.VT (VTI.LdFrag addr:$src3))))>,
11783                AVX512FMA3Base,
11784                Sched<[sched.Folded, sched.ReadAfterFold]>;
11785  }
11786}
11787
11788multiclass VBMI2_shift_var_rmb<bits<8> Op, string OpStr, SDNode OpNode,
11789                               X86FoldableSchedWrite sched, X86VectorVTInfo VTI>
11790         : VBMI2_shift_var_rm<Op, OpStr, OpNode, sched, VTI> {
11791  let Constraints = "$src1 = $dst",
11792      ExeDomain   = VTI.ExeDomain in
11793  defm mb:  AVX512_maskable_3src<Op, MRMSrcMem, VTI, (outs VTI.RC:$dst),
11794              (ins VTI.RC:$src2, VTI.ScalarMemOp:$src3), OpStr,
11795              "${src3}"##VTI.BroadcastStr##", $src2",
11796              "$src2, ${src3}"##VTI.BroadcastStr,
11797              (OpNode VTI.RC:$src1, VTI.RC:$src2,
11798               (VTI.VT (VTI.BroadcastLdFrag addr:$src3)))>,
11799              AVX512FMA3Base, EVEX_B,
11800              Sched<[sched.Folded, sched.ReadAfterFold]>;
11801}
11802
11803multiclass VBMI2_shift_var_rm_common<bits<8> Op, string OpStr, SDNode OpNode,
11804                                     X86SchedWriteWidths sched, AVX512VLVectorVTInfo VTI> {
11805  let Predicates = [HasVBMI2] in
11806  defm Z      : VBMI2_shift_var_rm<Op, OpStr, OpNode, sched.ZMM, VTI.info512>,
11807                                   EVEX_V512;
11808  let Predicates = [HasVBMI2, HasVLX] in {
11809    defm Z256 : VBMI2_shift_var_rm<Op, OpStr, OpNode, sched.YMM, VTI.info256>,
11810                                   EVEX_V256;
11811    defm Z128 : VBMI2_shift_var_rm<Op, OpStr, OpNode, sched.XMM, VTI.info128>,
11812                                   EVEX_V128;
11813  }
11814}
11815
11816multiclass VBMI2_shift_var_rmb_common<bits<8> Op, string OpStr, SDNode OpNode,
11817                                      X86SchedWriteWidths sched, AVX512VLVectorVTInfo VTI> {
11818  let Predicates = [HasVBMI2] in
11819  defm Z      : VBMI2_shift_var_rmb<Op, OpStr, OpNode, sched.ZMM, VTI.info512>,
11820                                    EVEX_V512;
11821  let Predicates = [HasVBMI2, HasVLX] in {
11822    defm Z256 : VBMI2_shift_var_rmb<Op, OpStr, OpNode, sched.YMM, VTI.info256>,
11823                                    EVEX_V256;
11824    defm Z128 : VBMI2_shift_var_rmb<Op, OpStr, OpNode, sched.XMM, VTI.info128>,
11825                                    EVEX_V128;
11826  }
11827}
11828multiclass VBMI2_shift_var<bits<8> wOp, bits<8> dqOp, string Prefix,
11829                           SDNode OpNode, X86SchedWriteWidths sched> {
11830  defm W : VBMI2_shift_var_rm_common<wOp, Prefix##"w", OpNode, sched,
11831             avx512vl_i16_info>, VEX_W, EVEX_CD8<16, CD8VF>;
11832  defm D : VBMI2_shift_var_rmb_common<dqOp, Prefix##"d", OpNode, sched,
11833             avx512vl_i32_info>, EVEX_CD8<32, CD8VF>;
11834  defm Q : VBMI2_shift_var_rmb_common<dqOp, Prefix##"q", OpNode, sched,
11835             avx512vl_i64_info>, VEX_W, EVEX_CD8<64, CD8VF>;
11836}
11837
11838multiclass VBMI2_shift_imm<bits<8> wOp, bits<8> dqOp, string Prefix,
11839                           SDNode OpNode, X86SchedWriteWidths sched> {
11840  defm W : avx512_common_3Op_rm_imm8<wOp, OpNode, Prefix##"w", sched,
11841             avx512vl_i16_info, avx512vl_i16_info, HasVBMI2>,
11842             VEX_W, EVEX_CD8<16, CD8VF>;
11843  defm D : avx512_common_3Op_imm8<Prefix##"d", avx512vl_i32_info, dqOp,
11844             OpNode, sched, HasVBMI2>, AVX512AIi8Base, EVEX_4V, EVEX_CD8<32, CD8VF>;
11845  defm Q : avx512_common_3Op_imm8<Prefix##"q", avx512vl_i64_info, dqOp, OpNode,
11846             sched, HasVBMI2>, AVX512AIi8Base, EVEX_4V, EVEX_CD8<64, CD8VF>, VEX_W;
11847}
11848
11849// Concat & Shift
11850defm VPSHLDV : VBMI2_shift_var<0x70, 0x71, "vpshldv", X86VShldv, SchedWriteVecIMul>;
11851defm VPSHRDV : VBMI2_shift_var<0x72, 0x73, "vpshrdv", X86VShrdv, SchedWriteVecIMul>;
11852defm VPSHLD  : VBMI2_shift_imm<0x70, 0x71, "vpshld", X86VShld, SchedWriteVecIMul>;
11853defm VPSHRD  : VBMI2_shift_imm<0x72, 0x73, "vpshrd", X86VShrd, SchedWriteVecIMul>;
11854
11855// Compress
11856defm VPCOMPRESSB : compress_by_elt_width<0x63, "vpcompressb", WriteVarShuffle256,
11857                                         avx512vl_i8_info, HasVBMI2>, EVEX,
11858                                         NotMemoryFoldable;
11859defm VPCOMPRESSW : compress_by_elt_width <0x63, "vpcompressw", WriteVarShuffle256,
11860                                          avx512vl_i16_info, HasVBMI2>, EVEX, VEX_W,
11861                                          NotMemoryFoldable;
11862// Expand
11863defm VPEXPANDB : expand_by_elt_width <0x62, "vpexpandb", WriteVarShuffle256,
11864                                      avx512vl_i8_info, HasVBMI2>, EVEX;
11865defm VPEXPANDW : expand_by_elt_width <0x62, "vpexpandw", WriteVarShuffle256,
11866                                      avx512vl_i16_info, HasVBMI2>, EVEX, VEX_W;
11867
11868//===----------------------------------------------------------------------===//
11869// VNNI
11870//===----------------------------------------------------------------------===//
11871
11872let Constraints = "$src1 = $dst" in
11873multiclass VNNI_rmb<bits<8> Op, string OpStr, SDNode OpNode,
11874                    X86FoldableSchedWrite sched, X86VectorVTInfo VTI,
11875                    bit IsCommutable> {
11876  let ExeDomain = VTI.ExeDomain in {
11877  defm r  :   AVX512_maskable_3src<Op, MRMSrcReg, VTI, (outs VTI.RC:$dst),
11878                                   (ins VTI.RC:$src2, VTI.RC:$src3), OpStr,
11879                                   "$src3, $src2", "$src2, $src3",
11880                                   (VTI.VT (OpNode VTI.RC:$src1,
11881                                            VTI.RC:$src2, VTI.RC:$src3)),
11882                                   IsCommutable, IsCommutable>,
11883                                   EVEX_4V, T8PD, Sched<[sched]>;
11884  defm m  :   AVX512_maskable_3src<Op, MRMSrcMem, VTI, (outs VTI.RC:$dst),
11885                                   (ins VTI.RC:$src2, VTI.MemOp:$src3), OpStr,
11886                                   "$src3, $src2", "$src2, $src3",
11887                                   (VTI.VT (OpNode VTI.RC:$src1, VTI.RC:$src2,
11888                                            (VTI.VT (VTI.LdFrag addr:$src3))))>,
11889                                   EVEX_4V, EVEX_CD8<32, CD8VF>, T8PD,
11890                                   Sched<[sched.Folded, sched.ReadAfterFold]>;
11891  defm mb :   AVX512_maskable_3src<Op, MRMSrcMem, VTI, (outs VTI.RC:$dst),
11892                                   (ins VTI.RC:$src2, VTI.ScalarMemOp:$src3),
11893                                   OpStr, "${src3}"##VTI.BroadcastStr##", $src2",
11894                                   "$src2, ${src3}"##VTI.BroadcastStr,
11895                                   (OpNode VTI.RC:$src1, VTI.RC:$src2,
11896                                    (VTI.VT (VTI.BroadcastLdFrag addr:$src3)))>,
11897                                   EVEX_4V, EVEX_CD8<32, CD8VF>, EVEX_B,
11898                                   T8PD, Sched<[sched.Folded, sched.ReadAfterFold]>;
11899  }
11900}
11901
11902multiclass VNNI_common<bits<8> Op, string OpStr, SDNode OpNode,
11903                       X86SchedWriteWidths sched, bit IsCommutable> {
11904  let Predicates = [HasVNNI] in
11905  defm Z      :   VNNI_rmb<Op, OpStr, OpNode, sched.ZMM, v16i32_info,
11906                           IsCommutable>, EVEX_V512;
11907  let Predicates = [HasVNNI, HasVLX] in {
11908    defm Z256 :   VNNI_rmb<Op, OpStr, OpNode, sched.YMM, v8i32x_info,
11909                           IsCommutable>, EVEX_V256;
11910    defm Z128 :   VNNI_rmb<Op, OpStr, OpNode, sched.XMM, v4i32x_info,
11911                           IsCommutable>, EVEX_V128;
11912  }
11913}
11914
11915// FIXME: Is there a better scheduler class for VPDP?
11916defm VPDPBUSD   : VNNI_common<0x50, "vpdpbusd", X86Vpdpbusd, SchedWriteVecIMul, 0>;
11917defm VPDPBUSDS  : VNNI_common<0x51, "vpdpbusds", X86Vpdpbusds, SchedWriteVecIMul, 0>;
11918defm VPDPWSSD   : VNNI_common<0x52, "vpdpwssd", X86Vpdpwssd, SchedWriteVecIMul, 1>;
11919defm VPDPWSSDS  : VNNI_common<0x53, "vpdpwssds", X86Vpdpwssds, SchedWriteVecIMul, 1>;
11920
11921def X86vpmaddwd_su : PatFrag<(ops node:$lhs, node:$rhs),
11922                             (X86vpmaddwd node:$lhs, node:$rhs), [{
11923  return N->hasOneUse();
11924}]>;
11925
11926// Patterns to match VPDPWSSD from existing instructions/intrinsics.
11927let Predicates = [HasVNNI] in {
11928  def : Pat<(v16i32 (add VR512:$src1,
11929                         (X86vpmaddwd_su VR512:$src2, VR512:$src3))),
11930            (VPDPWSSDZr VR512:$src1, VR512:$src2, VR512:$src3)>;
11931  def : Pat<(v16i32 (add VR512:$src1,
11932                         (X86vpmaddwd_su VR512:$src2, (load addr:$src3)))),
11933            (VPDPWSSDZm VR512:$src1, VR512:$src2, addr:$src3)>;
11934}
11935let Predicates = [HasVNNI,HasVLX] in {
11936  def : Pat<(v8i32 (add VR256X:$src1,
11937                        (X86vpmaddwd_su VR256X:$src2, VR256X:$src3))),
11938            (VPDPWSSDZ256r VR256X:$src1, VR256X:$src2, VR256X:$src3)>;
11939  def : Pat<(v8i32 (add VR256X:$src1,
11940                        (X86vpmaddwd_su VR256X:$src2, (load addr:$src3)))),
11941            (VPDPWSSDZ256m VR256X:$src1, VR256X:$src2, addr:$src3)>;
11942  def : Pat<(v4i32 (add VR128X:$src1,
11943                        (X86vpmaddwd_su VR128X:$src2, VR128X:$src3))),
11944            (VPDPWSSDZ128r VR128X:$src1, VR128X:$src2, VR128X:$src3)>;
11945  def : Pat<(v4i32 (add VR128X:$src1,
11946                        (X86vpmaddwd_su VR128X:$src2, (load addr:$src3)))),
11947            (VPDPWSSDZ128m VR128X:$src1, VR128X:$src2, addr:$src3)>;
11948}
11949
11950//===----------------------------------------------------------------------===//
11951// Bit Algorithms
11952//===----------------------------------------------------------------------===//
11953
11954// FIXME: Is there a better scheduler class for VPOPCNTB/VPOPCNTW?
11955defm VPOPCNTB : avx512_unary_rm_vl<0x54, "vpopcntb", ctpop, SchedWriteVecALU,
11956                                   avx512vl_i8_info, HasBITALG>;
11957defm VPOPCNTW : avx512_unary_rm_vl<0x54, "vpopcntw", ctpop, SchedWriteVecALU,
11958                                   avx512vl_i16_info, HasBITALG>, VEX_W;
11959
11960defm : avx512_unary_lowering<"VPOPCNTB", ctpop, avx512vl_i8_info, HasBITALG>;
11961defm : avx512_unary_lowering<"VPOPCNTW", ctpop, avx512vl_i16_info, HasBITALG>;
11962
11963def X86Vpshufbitqmb_su : PatFrag<(ops node:$src1, node:$src2),
11964                                 (X86Vpshufbitqmb node:$src1, node:$src2), [{
11965  return N->hasOneUse();
11966}]>;
11967
11968multiclass VPSHUFBITQMB_rm<X86FoldableSchedWrite sched, X86VectorVTInfo VTI> {
11969  defm rr : AVX512_maskable_cmp<0x8F, MRMSrcReg, VTI, (outs VTI.KRC:$dst),
11970                                (ins VTI.RC:$src1, VTI.RC:$src2),
11971                                "vpshufbitqmb",
11972                                "$src2, $src1", "$src1, $src2",
11973                                (X86Vpshufbitqmb (VTI.VT VTI.RC:$src1),
11974                                (VTI.VT VTI.RC:$src2)),
11975                                (X86Vpshufbitqmb_su (VTI.VT VTI.RC:$src1),
11976                                (VTI.VT VTI.RC:$src2))>, EVEX_4V, T8PD,
11977                                Sched<[sched]>;
11978  defm rm : AVX512_maskable_cmp<0x8F, MRMSrcMem, VTI, (outs VTI.KRC:$dst),
11979                                (ins VTI.RC:$src1, VTI.MemOp:$src2),
11980                                "vpshufbitqmb",
11981                                "$src2, $src1", "$src1, $src2",
11982                                (X86Vpshufbitqmb (VTI.VT VTI.RC:$src1),
11983                                (VTI.VT (VTI.LdFrag addr:$src2))),
11984                                (X86Vpshufbitqmb_su (VTI.VT VTI.RC:$src1),
11985                                (VTI.VT (VTI.LdFrag addr:$src2)))>,
11986                                EVEX_4V, EVEX_CD8<8, CD8VF>, T8PD,
11987                                Sched<[sched.Folded, sched.ReadAfterFold]>;
11988}
11989
11990multiclass VPSHUFBITQMB_common<X86SchedWriteWidths sched, AVX512VLVectorVTInfo VTI> {
11991  let Predicates = [HasBITALG] in
11992  defm Z      : VPSHUFBITQMB_rm<sched.ZMM, VTI.info512>, EVEX_V512;
11993  let Predicates = [HasBITALG, HasVLX] in {
11994    defm Z256 : VPSHUFBITQMB_rm<sched.YMM, VTI.info256>, EVEX_V256;
11995    defm Z128 : VPSHUFBITQMB_rm<sched.XMM, VTI.info128>, EVEX_V128;
11996  }
11997}
11998
11999// FIXME: Is there a better scheduler class for VPSHUFBITQMB?
12000defm VPSHUFBITQMB : VPSHUFBITQMB_common<SchedWriteVecIMul, avx512vl_i8_info>;
12001
12002//===----------------------------------------------------------------------===//
12003// GFNI
12004//===----------------------------------------------------------------------===//
12005
12006multiclass GF2P8MULB_avx512_common<bits<8> Op, string OpStr, SDNode OpNode,
12007                                   X86SchedWriteWidths sched> {
12008  let Predicates = [HasGFNI, HasAVX512, HasBWI] in
12009  defm Z      : avx512_binop_rm<Op, OpStr, OpNode, v64i8_info, sched.ZMM, 1>,
12010                                EVEX_V512;
12011  let Predicates = [HasGFNI, HasVLX, HasBWI] in {
12012    defm Z256 : avx512_binop_rm<Op, OpStr, OpNode, v32i8x_info, sched.YMM, 1>,
12013                                EVEX_V256;
12014    defm Z128 : avx512_binop_rm<Op, OpStr, OpNode, v16i8x_info, sched.XMM, 1>,
12015                                EVEX_V128;
12016  }
12017}
12018
12019defm VGF2P8MULB : GF2P8MULB_avx512_common<0xCF, "vgf2p8mulb", X86GF2P8mulb,
12020                                          SchedWriteVecALU>,
12021                                          EVEX_CD8<8, CD8VF>, T8PD;
12022
12023multiclass GF2P8AFFINE_avx512_rmb_imm<bits<8> Op, string OpStr, SDNode OpNode,
12024                                      X86FoldableSchedWrite sched, X86VectorVTInfo VTI,
12025                                      X86VectorVTInfo BcstVTI>
12026           : avx512_3Op_rm_imm8<Op, OpStr, OpNode, sched, VTI, VTI> {
12027  let ExeDomain = VTI.ExeDomain in
12028  defm rmbi : AVX512_maskable<Op, MRMSrcMem, VTI, (outs VTI.RC:$dst),
12029                (ins VTI.RC:$src1, VTI.ScalarMemOp:$src2, u8imm:$src3),
12030                OpStr, "$src3, ${src2}"##BcstVTI.BroadcastStr##", $src1",
12031                "$src1, ${src2}"##BcstVTI.BroadcastStr##", $src3",
12032                (OpNode (VTI.VT VTI.RC:$src1),
12033                 (bitconvert (BcstVTI.VT (X86VBroadcastld64 addr:$src2))),
12034                 (i8 timm:$src3))>, EVEX_B,
12035                 Sched<[sched.Folded, sched.ReadAfterFold]>;
12036}
12037
12038multiclass GF2P8AFFINE_avx512_common<bits<8> Op, string OpStr, SDNode OpNode,
12039                                     X86SchedWriteWidths sched> {
12040  let Predicates = [HasGFNI, HasAVX512, HasBWI] in
12041  defm Z      : GF2P8AFFINE_avx512_rmb_imm<Op, OpStr, OpNode, sched.ZMM,
12042                                           v64i8_info, v8i64_info>, EVEX_V512;
12043  let Predicates = [HasGFNI, HasVLX, HasBWI] in {
12044    defm Z256 : GF2P8AFFINE_avx512_rmb_imm<Op, OpStr, OpNode, sched.YMM,
12045                                           v32i8x_info, v4i64x_info>, EVEX_V256;
12046    defm Z128 : GF2P8AFFINE_avx512_rmb_imm<Op, OpStr, OpNode, sched.XMM,
12047                                           v16i8x_info, v2i64x_info>, EVEX_V128;
12048  }
12049}
12050
12051defm VGF2P8AFFINEINVQB : GF2P8AFFINE_avx512_common<0xCF, "vgf2p8affineinvqb",
12052                         X86GF2P8affineinvqb, SchedWriteVecIMul>,
12053                         EVEX_4V, EVEX_CD8<8, CD8VF>, VEX_W, AVX512AIi8Base;
12054defm VGF2P8AFFINEQB    : GF2P8AFFINE_avx512_common<0xCE, "vgf2p8affineqb",
12055                         X86GF2P8affineqb, SchedWriteVecIMul>,
12056                         EVEX_4V, EVEX_CD8<8, CD8VF>, VEX_W, AVX512AIi8Base;
12057
12058
12059//===----------------------------------------------------------------------===//
12060// AVX5124FMAPS
12061//===----------------------------------------------------------------------===//
12062
12063let hasSideEffects = 0, mayLoad = 1, ExeDomain = SSEPackedSingle,
12064    Constraints = "$src1 = $dst", Uses = [MXCSR], mayRaiseFPException = 1 in {
12065defm V4FMADDPSrm : AVX512_maskable_3src_in_asm<0x9A, MRMSrcMem, v16f32_info,
12066                    (outs VR512:$dst), (ins VR512:$src2, f128mem:$src3),
12067                    "v4fmaddps", "$src3, $src2", "$src2, $src3",
12068                    []>, EVEX_V512, EVEX_4V, T8XD, EVEX_CD8<32, CD8VQ>,
12069                    Sched<[SchedWriteFMA.ZMM.Folded]>;
12070
12071defm V4FNMADDPSrm : AVX512_maskable_3src_in_asm<0xAA, MRMSrcMem, v16f32_info,
12072                     (outs VR512:$dst), (ins VR512:$src2, f128mem:$src3),
12073                     "v4fnmaddps", "$src3, $src2", "$src2, $src3",
12074                     []>, EVEX_V512, EVEX_4V, T8XD, EVEX_CD8<32, CD8VQ>,
12075                     Sched<[SchedWriteFMA.ZMM.Folded]>;
12076
12077defm V4FMADDSSrm : AVX512_maskable_3src_in_asm<0x9B, MRMSrcMem, f32x_info,
12078                    (outs VR128X:$dst), (ins  VR128X:$src2, f128mem:$src3),
12079                    "v4fmaddss", "$src3, $src2", "$src2, $src3",
12080                    []>, VEX_LIG, EVEX_4V, T8XD, EVEX_CD8<32, CD8VF>,
12081                    Sched<[SchedWriteFMA.Scl.Folded]>;
12082
12083defm V4FNMADDSSrm : AVX512_maskable_3src_in_asm<0xAB, MRMSrcMem, f32x_info,
12084                     (outs VR128X:$dst), (ins VR128X:$src2, f128mem:$src3),
12085                     "v4fnmaddss", "$src3, $src2", "$src2, $src3",
12086                     []>, VEX_LIG, EVEX_4V, T8XD, EVEX_CD8<32, CD8VF>,
12087                     Sched<[SchedWriteFMA.Scl.Folded]>;
12088}
12089
12090//===----------------------------------------------------------------------===//
12091// AVX5124VNNIW
12092//===----------------------------------------------------------------------===//
12093
12094let hasSideEffects = 0, mayLoad = 1, ExeDomain = SSEPackedInt,
12095    Constraints = "$src1 = $dst" in {
12096defm VP4DPWSSDrm : AVX512_maskable_3src_in_asm<0x52, MRMSrcMem, v16i32_info,
12097                    (outs VR512:$dst), (ins VR512:$src2, f128mem:$src3),
12098                     "vp4dpwssd", "$src3, $src2", "$src2, $src3",
12099                    []>, EVEX_V512, EVEX_4V, T8XD, EVEX_CD8<32, CD8VQ>,
12100                    Sched<[SchedWriteFMA.ZMM.Folded]>;
12101
12102defm VP4DPWSSDSrm : AVX512_maskable_3src_in_asm<0x53, MRMSrcMem, v16i32_info,
12103                     (outs VR512:$dst), (ins VR512:$src2, f128mem:$src3),
12104                     "vp4dpwssds", "$src3, $src2", "$src2, $src3",
12105                     []>, EVEX_V512, EVEX_4V, T8XD, EVEX_CD8<32, CD8VQ>,
12106                     Sched<[SchedWriteFMA.ZMM.Folded]>;
12107}
12108
12109let hasSideEffects = 0 in {
12110  let mayStore = 1, SchedRW = [WriteFStoreX] in
12111  def MASKPAIR16STORE : PseudoI<(outs), (ins anymem:$dst, VK16PAIR:$src), []>;
12112  let mayLoad = 1, SchedRW = [WriteFLoadX] in
12113  def MASKPAIR16LOAD : PseudoI<(outs VK16PAIR:$dst), (ins anymem:$src), []>;
12114}
12115
12116//===----------------------------------------------------------------------===//
12117// VP2INTERSECT
12118//===----------------------------------------------------------------------===//
12119
12120multiclass avx512_vp2intersect_modes<X86FoldableSchedWrite sched, X86VectorVTInfo _> {
12121  def rr : I<0x68, MRMSrcReg,
12122                  (outs _.KRPC:$dst),
12123                  (ins _.RC:$src1, _.RC:$src2),
12124                  !strconcat("vp2intersect", _.Suffix,
12125                             "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
12126                  [(set _.KRPC:$dst, (X86vp2intersect
12127                            _.RC:$src1, (_.VT _.RC:$src2)))]>,
12128                  EVEX_4V, T8XD, Sched<[sched]>;
12129
12130  def rm : I<0x68, MRMSrcMem,
12131                  (outs _.KRPC:$dst),
12132                  (ins  _.RC:$src1, _.MemOp:$src2),
12133                  !strconcat("vp2intersect", _.Suffix,
12134                             "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
12135                  [(set _.KRPC:$dst, (X86vp2intersect
12136                            _.RC:$src1, (_.VT (bitconvert (_.LdFrag addr:$src2)))))]>,
12137                  EVEX_4V, T8XD, EVEX_CD8<_.EltSize, CD8VF>,
12138                  Sched<[sched.Folded, sched.ReadAfterFold]>;
12139
12140  def rmb : I<0x68, MRMSrcMem,
12141                  (outs _.KRPC:$dst),
12142                  (ins _.RC:$src1, _.ScalarMemOp:$src2),
12143                  !strconcat("vp2intersect", _.Suffix, "\t{${src2}", _.BroadcastStr,
12144                             ", $src1, $dst|$dst, $src1, ${src2}", _.BroadcastStr ,"}"),
12145                  [(set _.KRPC:$dst, (X86vp2intersect
12146                             _.RC:$src1, (_.VT (_.BroadcastLdFrag addr:$src2))))]>,
12147                  EVEX_4V, T8XD, EVEX_B, EVEX_CD8<_.EltSize, CD8VF>,
12148                  Sched<[sched.Folded, sched.ReadAfterFold]>;
12149}
12150
12151multiclass avx512_vp2intersect<X86SchedWriteWidths sched, AVX512VLVectorVTInfo _> {
12152  let Predicates  = [HasAVX512, HasVP2INTERSECT] in
12153    defm Z : avx512_vp2intersect_modes<sched.ZMM, _.info512>, EVEX_V512;
12154
12155  let Predicates = [HasAVX512, HasVP2INTERSECT, HasVLX] in {
12156    defm Z256 : avx512_vp2intersect_modes<sched.YMM, _.info256>, EVEX_V256;
12157    defm Z128 : avx512_vp2intersect_modes<sched.XMM, _.info128>, EVEX_V128;
12158  }
12159}
12160
12161defm VP2INTERSECTD : avx512_vp2intersect<SchedWriteVecALU, avx512vl_i32_info>;
12162defm VP2INTERSECTQ : avx512_vp2intersect<SchedWriteVecALU, avx512vl_i64_info>, VEX_W;
12163
12164multiclass avx512_binop_all2<bits<8> opc, string OpcodeStr,
12165                             X86SchedWriteWidths sched,
12166                             AVX512VLVectorVTInfo _SrcVTInfo,
12167                             AVX512VLVectorVTInfo _DstVTInfo,
12168                             SDNode OpNode, Predicate prd,
12169                             bit IsCommutable = 0> {
12170  let Predicates = [prd] in
12171    defm NAME#Z : avx512_binop_rm2<opc, OpcodeStr, sched.ZMM, OpNode,
12172                                   _SrcVTInfo.info512, _DstVTInfo.info512,
12173                                   _SrcVTInfo.info512, IsCommutable>,
12174                                   EVEX_V512, EVEX_CD8<32, CD8VF>;
12175  let Predicates = [HasVLX, prd] in {
12176    defm NAME#Z256 : avx512_binop_rm2<opc, OpcodeStr, sched.YMM, OpNode,
12177                                      _SrcVTInfo.info256, _DstVTInfo.info256,
12178                                      _SrcVTInfo.info256, IsCommutable>,
12179                                     EVEX_V256, EVEX_CD8<32, CD8VF>;
12180    defm NAME#Z128 : avx512_binop_rm2<opc, OpcodeStr, sched.XMM, OpNode,
12181                                      _SrcVTInfo.info128, _DstVTInfo.info128,
12182                                      _SrcVTInfo.info128, IsCommutable>,
12183                                      EVEX_V128, EVEX_CD8<32, CD8VF>;
12184  }
12185}
12186
12187defm VCVTNE2PS2BF16 : avx512_binop_all2<0x72, "vcvtne2ps2bf16",
12188                                        SchedWriteCvtPD2PS, //FIXME: Shoulod be SchedWriteCvtPS2BF
12189                                        avx512vl_f32_info, avx512vl_i16_info,
12190                                        X86cvtne2ps2bf16, HasBF16, 0>, T8XD;
12191
12192// Truncate Float to BFloat16
12193multiclass avx512_cvtps2bf16<bits<8> opc, string OpcodeStr,
12194                             X86SchedWriteWidths sched> {
12195  let Predicates = [HasBF16], Uses = []<Register>, mayRaiseFPException = 0 in {
12196    defm Z : avx512_vcvt_fp<opc, OpcodeStr, v16i16x_info, v16f32_info,
12197                            X86cvtneps2bf16, sched.ZMM>, EVEX_V512;
12198  }
12199  let Predicates = [HasBF16, HasVLX] in {
12200    let Uses = []<Register>, mayRaiseFPException = 0 in {
12201    defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v8i16x_info, v4f32x_info,
12202                               null_frag, sched.XMM, "{1to4}", "{x}", f128mem,
12203                               VK4WM>, EVEX_V128;
12204    defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v8i16x_info, v8f32x_info,
12205                               X86cvtneps2bf16,
12206                               sched.YMM, "{1to8}", "{y}">, EVEX_V256;
12207    }
12208
12209    def : InstAlias<OpcodeStr##"x\t{$src, $dst|$dst, $src}",
12210                    (!cast<Instruction>(NAME # "Z128rr") VR128X:$dst,
12211                    VR128X:$src), 0>;
12212    def : InstAlias<OpcodeStr##"x\t{$src, $dst|$dst, $src}",
12213                    (!cast<Instruction>(NAME # "Z128rm") VR128X:$dst,
12214                    f128mem:$src), 0, "intel">;
12215    def : InstAlias<OpcodeStr##"y\t{$src, $dst|$dst, $src}",
12216                    (!cast<Instruction>(NAME # "Z256rr") VR128X:$dst,
12217                    VR256X:$src), 0>;
12218    def : InstAlias<OpcodeStr##"y\t{$src, $dst|$dst, $src}",
12219                    (!cast<Instruction>(NAME # "Z256rm") VR128X:$dst,
12220                    f256mem:$src), 0, "intel">;
12221  }
12222}
12223
12224defm VCVTNEPS2BF16 : avx512_cvtps2bf16<0x72, "vcvtneps2bf16",
12225                                       SchedWriteCvtPD2PS>, T8XS,
12226                                       EVEX_CD8<32, CD8VF>;
12227
12228let Predicates = [HasBF16, HasVLX] in {
12229  // Special patterns to allow use of X86mcvtneps2bf16 for masking. Instruction
12230  // patterns have been disabled with null_frag.
12231  def : Pat<(v8i16 (X86cvtneps2bf16 (v4f32 VR128X:$src))),
12232            (VCVTNEPS2BF16Z128rr VR128X:$src)>;
12233  def : Pat<(X86mcvtneps2bf16 (v4f32 VR128X:$src), (v8i16 VR128X:$src0),
12234                              VK4WM:$mask),
12235            (VCVTNEPS2BF16Z128rrk VR128X:$src0, VK4WM:$mask, VR128X:$src)>;
12236  def : Pat<(X86mcvtneps2bf16 (v4f32 VR128X:$src), v8i16x_info.ImmAllZerosV,
12237                              VK4WM:$mask),
12238            (VCVTNEPS2BF16Z128rrkz VK4WM:$mask, VR128X:$src)>;
12239
12240  def : Pat<(v8i16 (X86cvtneps2bf16 (loadv4f32 addr:$src))),
12241            (VCVTNEPS2BF16Z128rm addr:$src)>;
12242  def : Pat<(X86mcvtneps2bf16 (loadv4f32 addr:$src), (v8i16 VR128X:$src0),
12243                              VK4WM:$mask),
12244            (VCVTNEPS2BF16Z128rmk VR128X:$src0, VK4WM:$mask, addr:$src)>;
12245  def : Pat<(X86mcvtneps2bf16 (loadv4f32 addr:$src), v8i16x_info.ImmAllZerosV,
12246                              VK4WM:$mask),
12247            (VCVTNEPS2BF16Z128rmkz VK4WM:$mask, addr:$src)>;
12248
12249  def : Pat<(v8i16 (X86cvtneps2bf16 (v4f32
12250                                     (X86VBroadcastld32 addr:$src)))),
12251            (VCVTNEPS2BF16Z128rmb addr:$src)>;
12252  def : Pat<(X86mcvtneps2bf16 (v4f32 (X86VBroadcastld32 addr:$src)),
12253                              (v8i16 VR128X:$src0), VK4WM:$mask),
12254            (VCVTNEPS2BF16Z128rmbk VR128X:$src0, VK4WM:$mask, addr:$src)>;
12255  def : Pat<(X86mcvtneps2bf16 (v4f32 (X86VBroadcastld32 addr:$src)),
12256                              v8i16x_info.ImmAllZerosV, VK4WM:$mask),
12257            (VCVTNEPS2BF16Z128rmbkz VK4WM:$mask, addr:$src)>;
12258}
12259
12260let Constraints = "$src1 = $dst" in {
12261multiclass avx512_dpbf16ps_rm<bits<8> opc, string OpcodeStr, SDNode OpNode,
12262                              X86FoldableSchedWrite sched,
12263                              X86VectorVTInfo _, X86VectorVTInfo src_v> {
12264  defm r: AVX512_maskable_3src<opc, MRMSrcReg, _, (outs _.RC:$dst),
12265                           (ins _.RC:$src2, _.RC:$src3),
12266                           OpcodeStr, "$src3, $src2", "$src2, $src3",
12267                           (_.VT (OpNode _.RC:$src1, _.RC:$src2, _.RC:$src3))>,
12268                           EVEX_4V, Sched<[sched]>;
12269
12270  defm m: AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst),
12271                               (ins _.RC:$src2, _.MemOp:$src3),
12272                               OpcodeStr, "$src3, $src2", "$src2, $src3",
12273                               (_.VT (OpNode _.RC:$src1, _.RC:$src2,
12274                               (src_v.VT (bitconvert
12275                               (src_v.LdFrag addr:$src3)))))>, EVEX_4V,
12276                               Sched<[sched.Folded, sched.ReadAfterFold]>;
12277
12278  defm mb: AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst),
12279                  (ins _.RC:$src2, _.ScalarMemOp:$src3),
12280                  OpcodeStr,
12281                  !strconcat("${src3}", _.BroadcastStr,", $src2"),
12282                  !strconcat("$src2, ${src3}", _.BroadcastStr),
12283                  (_.VT (OpNode _.RC:$src1, _.RC:$src2,
12284                  (src_v.VT (src_v.BroadcastLdFrag addr:$src3))))>,
12285                  EVEX_B, EVEX_4V, Sched<[sched.Folded, sched.ReadAfterFold]>;
12286
12287}
12288} // Constraints = "$src1 = $dst"
12289
12290multiclass avx512_dpbf16ps_sizes<bits<8> opc, string OpcodeStr, SDNode OpNode,
12291                                 X86SchedWriteWidths sched, AVX512VLVectorVTInfo _,
12292                                 AVX512VLVectorVTInfo src_v, Predicate prd> {
12293  let Predicates = [prd] in {
12294    defm Z    : avx512_dpbf16ps_rm<opc, OpcodeStr, OpNode, sched.ZMM, _.info512,
12295                                   src_v.info512>, EVEX_V512;
12296  }
12297  let Predicates = [HasVLX, prd] in {
12298    defm Z256 : avx512_dpbf16ps_rm<opc, OpcodeStr, OpNode, sched.YMM, _.info256,
12299                                   src_v.info256>, EVEX_V256;
12300    defm Z128 : avx512_dpbf16ps_rm<opc, OpcodeStr, OpNode, sched.XMM, _.info128,
12301                                   src_v.info128>, EVEX_V128;
12302  }
12303}
12304
12305defm VDPBF16PS : avx512_dpbf16ps_sizes<0x52, "vdpbf16ps", X86dpbf16ps, SchedWriteFMA,
12306                                       avx512vl_f32_info, avx512vl_i32_info,
12307                                       HasBF16>, T8XS, EVEX_CD8<32, CD8VF>;
12308