xref: /freebsd/contrib/llvm-project/llvm/lib/Target/X86/X86InstrAVX512.td (revision 6966ac055c3b7a39266fb982493330df7a097997)
1//===-- X86InstrAVX512.td - AVX512 Instruction Set ---------*- tablegen -*-===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file describes the X86 AVX512 instruction set, defining the
10// instructions, and properties of the instructions which are needed for code
11// generation, machine code emission, and analysis.
12//
13//===----------------------------------------------------------------------===//
14
15// Group template arguments that can be derived from the vector type (EltNum x
16// EltVT).  These are things like the register class for the writemask, etc.
17// The idea is to pass one of these as the template argument rather than the
18// individual arguments.
19// The template is also used for scalar types, in this case numelts is 1.
20class X86VectorVTInfo<int numelts, ValueType eltvt, RegisterClass rc,
21                      string suffix = ""> {
22  RegisterClass RC = rc;
23  ValueType EltVT = eltvt;
24  int NumElts = numelts;
25
26  // Corresponding mask register class.
27  RegisterClass KRC = !cast<RegisterClass>("VK" # NumElts);
28
29  // Corresponding mask register pair class.
30  RegisterOperand KRPC = !if (!gt(NumElts, 16), ?,
31                              !cast<RegisterOperand>("VK" # NumElts # "Pair"));
32
33  // Corresponding write-mask register class.
34  RegisterClass KRCWM = !cast<RegisterClass>("VK" # NumElts # "WM");
35
36  // The mask VT.
37  ValueType KVT = !cast<ValueType>("v" # NumElts # "i1");
38
39  // Suffix used in the instruction mnemonic.
40  string Suffix = suffix;
41
42  // VTName is a string name for vector VT. For vector types it will be
43  // v # NumElts # EltVT, so for vector of 8 elements of i32 it will be v8i32
44  // It is a little bit complex for scalar types, where NumElts = 1.
45  // In this case we build v4f32 or v2f64
46  string VTName = "v" # !if (!eq (NumElts, 1),
47                        !if (!eq (EltVT.Size, 32), 4,
48                        !if (!eq (EltVT.Size, 64), 2, NumElts)), NumElts) # EltVT;
49
50  // The vector VT.
51  ValueType VT = !cast<ValueType>(VTName);
52
53  string EltTypeName = !cast<string>(EltVT);
54  // Size of the element type in bits, e.g. 32 for v16i32.
55  string EltSizeName = !subst("i", "", !subst("f", "", EltTypeName));
56  int EltSize = EltVT.Size;
57
58  // "i" for integer types and "f" for floating-point types
59  string TypeVariantName = !subst(EltSizeName, "", EltTypeName);
60
61  // Size of RC in bits, e.g. 512 for VR512.
62  int Size = VT.Size;
63
64  // The corresponding memory operand, e.g. i512mem for VR512.
65  X86MemOperand MemOp = !cast<X86MemOperand>(TypeVariantName # Size # "mem");
66  X86MemOperand ScalarMemOp = !cast<X86MemOperand>(EltVT # "mem");
67  // FP scalar memory operand for intrinsics - ssmem/sdmem.
68  Operand IntScalarMemOp = !if (!eq (EltTypeName, "f32"), !cast<Operand>("ssmem"),
69                           !if (!eq (EltTypeName, "f64"), !cast<Operand>("sdmem"), ?));
70
71  // Load patterns
72  PatFrag LdFrag = !cast<PatFrag>("load" # VTName);
73
74  PatFrag AlignedLdFrag = !cast<PatFrag>("alignedload" # VTName);
75
76  PatFrag ScalarLdFrag = !cast<PatFrag>("load" # EltVT);
77
78  ComplexPattern ScalarIntMemCPat = !if (!eq (EltTypeName, "f32"),
79                                          !cast<ComplexPattern>("sse_load_f32"),
80                                    !if (!eq (EltTypeName, "f64"),
81                                          !cast<ComplexPattern>("sse_load_f64"),
82                                    ?));
83
84  // The string to specify embedded broadcast in assembly.
85  string BroadcastStr = "{1to" # NumElts # "}";
86
87  // 8-bit compressed displacement tuple/subvector format.  This is only
88  // defined for NumElts <= 8.
89  CD8VForm CD8TupleForm = !if (!eq (!srl(NumElts, 4), 0),
90                               !cast<CD8VForm>("CD8VT" # NumElts), ?);
91
92  SubRegIndex SubRegIdx = !if (!eq (Size, 128), sub_xmm,
93                          !if (!eq (Size, 256), sub_ymm, ?));
94
95  Domain ExeDomain = !if (!eq (EltTypeName, "f32"), SSEPackedSingle,
96                     !if (!eq (EltTypeName, "f64"), SSEPackedDouble,
97                     SSEPackedInt));
98
99  RegisterClass FRC = !if (!eq (EltTypeName, "f32"), FR32X, FR64X);
100
101  dag ImmAllZerosV = (VT immAllZerosV);
102
103  string ZSuffix = !if (!eq (Size, 128), "Z128",
104                   !if (!eq (Size, 256), "Z256", "Z"));
105}
106
107def v64i8_info  : X86VectorVTInfo<64,  i8, VR512, "b">;
108def v32i16_info : X86VectorVTInfo<32, i16, VR512, "w">;
109def v16i32_info : X86VectorVTInfo<16, i32, VR512, "d">;
110def v8i64_info  : X86VectorVTInfo<8,  i64, VR512, "q">;
111def v16f32_info : X86VectorVTInfo<16, f32, VR512, "ps">;
112def v8f64_info  : X86VectorVTInfo<8,  f64, VR512, "pd">;
113
114// "x" in v32i8x_info means RC = VR256X
115def v32i8x_info  : X86VectorVTInfo<32,  i8, VR256X, "b">;
116def v16i16x_info : X86VectorVTInfo<16, i16, VR256X, "w">;
117def v8i32x_info  : X86VectorVTInfo<8,  i32, VR256X, "d">;
118def v4i64x_info  : X86VectorVTInfo<4,  i64, VR256X, "q">;
119def v8f32x_info  : X86VectorVTInfo<8,  f32, VR256X, "ps">;
120def v4f64x_info  : X86VectorVTInfo<4,  f64, VR256X, "pd">;
121
122def v16i8x_info  : X86VectorVTInfo<16,  i8, VR128X, "b">;
123def v8i16x_info  : X86VectorVTInfo<8,  i16, VR128X, "w">;
124def v4i32x_info  : X86VectorVTInfo<4,  i32, VR128X, "d">;
125def v2i64x_info  : X86VectorVTInfo<2,  i64, VR128X, "q">;
126def v4f32x_info  : X86VectorVTInfo<4,  f32, VR128X, "ps">;
127def v2f64x_info  : X86VectorVTInfo<2,  f64, VR128X, "pd">;
128
129// We map scalar types to the smallest (128-bit) vector type
130// with the appropriate element type. This allows to use the same masking logic.
131def i32x_info    : X86VectorVTInfo<1,  i32, GR32, "si">;
132def i64x_info    : X86VectorVTInfo<1,  i64, GR64, "sq">;
133def f32x_info    : X86VectorVTInfo<1,  f32, VR128X, "ss">;
134def f64x_info    : X86VectorVTInfo<1,  f64, VR128X, "sd">;
135
136class AVX512VLVectorVTInfo<X86VectorVTInfo i512, X86VectorVTInfo i256,
137                           X86VectorVTInfo i128> {
138  X86VectorVTInfo info512 = i512;
139  X86VectorVTInfo info256 = i256;
140  X86VectorVTInfo info128 = i128;
141}
142
143def avx512vl_i8_info  : AVX512VLVectorVTInfo<v64i8_info, v32i8x_info,
144                                             v16i8x_info>;
145def avx512vl_i16_info : AVX512VLVectorVTInfo<v32i16_info, v16i16x_info,
146                                             v8i16x_info>;
147def avx512vl_i32_info : AVX512VLVectorVTInfo<v16i32_info, v8i32x_info,
148                                             v4i32x_info>;
149def avx512vl_i64_info : AVX512VLVectorVTInfo<v8i64_info, v4i64x_info,
150                                             v2i64x_info>;
151def avx512vl_f32_info : AVX512VLVectorVTInfo<v16f32_info, v8f32x_info,
152                                             v4f32x_info>;
153def avx512vl_f64_info : AVX512VLVectorVTInfo<v8f64_info, v4f64x_info,
154                                             v2f64x_info>;
155
156class X86KVectorVTInfo<RegisterClass _krc, RegisterClass _krcwm,
157                       ValueType _vt> {
158  RegisterClass KRC = _krc;
159  RegisterClass KRCWM = _krcwm;
160  ValueType KVT = _vt;
161}
162
163def v1i1_info : X86KVectorVTInfo<VK1, VK1WM, v1i1>;
164def v2i1_info : X86KVectorVTInfo<VK2, VK2WM, v2i1>;
165def v4i1_info : X86KVectorVTInfo<VK4, VK4WM, v4i1>;
166def v8i1_info : X86KVectorVTInfo<VK8, VK8WM, v8i1>;
167def v16i1_info : X86KVectorVTInfo<VK16, VK16WM, v16i1>;
168def v32i1_info : X86KVectorVTInfo<VK32, VK32WM, v32i1>;
169def v64i1_info : X86KVectorVTInfo<VK64, VK64WM, v64i1>;
170
171// This multiclass generates the masking variants from the non-masking
172// variant.  It only provides the assembly pieces for the masking variants.
173// It assumes custom ISel patterns for masking which can be provided as
174// template arguments.
175multiclass AVX512_maskable_custom<bits<8> O, Format F,
176                                  dag Outs,
177                                  dag Ins, dag MaskingIns, dag ZeroMaskingIns,
178                                  string OpcodeStr,
179                                  string AttSrcAsm, string IntelSrcAsm,
180                                  list<dag> Pattern,
181                                  list<dag> MaskingPattern,
182                                  list<dag> ZeroMaskingPattern,
183                                  string MaskingConstraint = "",
184                                  bit IsCommutable = 0,
185                                  bit IsKCommutable = 0,
186                                  bit IsKZCommutable = IsCommutable> {
187  let isCommutable = IsCommutable in
188    def NAME: AVX512<O, F, Outs, Ins,
189                       OpcodeStr#"\t{"#AttSrcAsm#", $dst|"#
190                                     "$dst, "#IntelSrcAsm#"}",
191                       Pattern>;
192
193  // Prefer over VMOV*rrk Pat<>
194  let isCommutable = IsKCommutable in
195    def NAME#k: AVX512<O, F, Outs, MaskingIns,
196                       OpcodeStr#"\t{"#AttSrcAsm#", $dst {${mask}}|"#
197                                     "$dst {${mask}}, "#IntelSrcAsm#"}",
198                       MaskingPattern>,
199              EVEX_K {
200      // In case of the 3src subclass this is overridden with a let.
201      string Constraints = MaskingConstraint;
202    }
203
204  // Zero mask does not add any restrictions to commute operands transformation.
205  // So, it is Ok to use IsCommutable instead of IsKCommutable.
206  let isCommutable = IsKZCommutable in // Prefer over VMOV*rrkz Pat<>
207    def NAME#kz: AVX512<O, F, Outs, ZeroMaskingIns,
208                       OpcodeStr#"\t{"#AttSrcAsm#", $dst {${mask}} {z}|"#
209                                     "$dst {${mask}} {z}, "#IntelSrcAsm#"}",
210                       ZeroMaskingPattern>,
211              EVEX_KZ;
212}
213
214
215// Common base class of AVX512_maskable and AVX512_maskable_3src.
216multiclass AVX512_maskable_common<bits<8> O, Format F, X86VectorVTInfo _,
217                                  dag Outs,
218                                  dag Ins, dag MaskingIns, dag ZeroMaskingIns,
219                                  string OpcodeStr,
220                                  string AttSrcAsm, string IntelSrcAsm,
221                                  dag RHS, dag MaskingRHS,
222                                  SDNode Select = vselect,
223                                  string MaskingConstraint = "",
224                                  bit IsCommutable = 0,
225                                  bit IsKCommutable = 0,
226                                  bit IsKZCommutable = IsCommutable> :
227  AVX512_maskable_custom<O, F, Outs, Ins, MaskingIns, ZeroMaskingIns, OpcodeStr,
228                         AttSrcAsm, IntelSrcAsm,
229                         [(set _.RC:$dst, RHS)],
230                         [(set _.RC:$dst, MaskingRHS)],
231                         [(set _.RC:$dst,
232                               (Select _.KRCWM:$mask, RHS, _.ImmAllZerosV))],
233                         MaskingConstraint, IsCommutable,
234                         IsKCommutable, IsKZCommutable>;
235
236// This multiclass generates the unconditional/non-masking, the masking and
237// the zero-masking variant of the vector instruction.  In the masking case, the
238// perserved vector elements come from a new dummy input operand tied to $dst.
239// This version uses a separate dag for non-masking and masking.
240multiclass AVX512_maskable_split<bits<8> O, Format F, X86VectorVTInfo _,
241                           dag Outs, dag Ins, string OpcodeStr,
242                           string AttSrcAsm, string IntelSrcAsm,
243                           dag RHS, dag MaskRHS,
244                           bit IsCommutable = 0, bit IsKCommutable = 0,
245                           SDNode Select = vselect> :
246   AVX512_maskable_custom<O, F, Outs, Ins,
247                          !con((ins _.RC:$src0, _.KRCWM:$mask), Ins),
248                          !con((ins _.KRCWM:$mask), Ins),
249                          OpcodeStr, AttSrcAsm, IntelSrcAsm,
250                          [(set _.RC:$dst, RHS)],
251                          [(set _.RC:$dst,
252                              (Select _.KRCWM:$mask, MaskRHS, _.RC:$src0))],
253                          [(set _.RC:$dst,
254                              (Select _.KRCWM:$mask, MaskRHS, _.ImmAllZerosV))],
255                          "$src0 = $dst", IsCommutable, IsKCommutable>;
256
257// This multiclass generates the unconditional/non-masking, the masking and
258// the zero-masking variant of the vector instruction.  In the masking case, the
259// perserved vector elements come from a new dummy input operand tied to $dst.
260multiclass AVX512_maskable<bits<8> O, Format F, X86VectorVTInfo _,
261                           dag Outs, dag Ins, string OpcodeStr,
262                           string AttSrcAsm, string IntelSrcAsm,
263                           dag RHS,
264                           bit IsCommutable = 0, bit IsKCommutable = 0,
265                           bit IsKZCommutable = IsCommutable,
266                           SDNode Select = vselect> :
267   AVX512_maskable_common<O, F, _, Outs, Ins,
268                          !con((ins _.RC:$src0, _.KRCWM:$mask), Ins),
269                          !con((ins _.KRCWM:$mask), Ins),
270                          OpcodeStr, AttSrcAsm, IntelSrcAsm, RHS,
271                          (Select _.KRCWM:$mask, RHS, _.RC:$src0),
272                          Select, "$src0 = $dst", IsCommutable, IsKCommutable,
273                          IsKZCommutable>;
274
275// This multiclass generates the unconditional/non-masking, the masking and
276// the zero-masking variant of the scalar instruction.
277multiclass AVX512_maskable_scalar<bits<8> O, Format F, X86VectorVTInfo _,
278                           dag Outs, dag Ins, string OpcodeStr,
279                           string AttSrcAsm, string IntelSrcAsm,
280                           dag RHS> :
281   AVX512_maskable<O, F, _, Outs, Ins, OpcodeStr, AttSrcAsm, IntelSrcAsm,
282                   RHS, 0, 0, 0, X86selects>;
283
284// Similar to AVX512_maskable but in this case one of the source operands
285// ($src1) is already tied to $dst so we just use that for the preserved
286// vector elements.  NOTE that the NonTiedIns (the ins dag) should exclude
287// $src1.
288multiclass AVX512_maskable_3src<bits<8> O, Format F, X86VectorVTInfo _,
289                                dag Outs, dag NonTiedIns, string OpcodeStr,
290                                string AttSrcAsm, string IntelSrcAsm,
291                                dag RHS,
292                                bit IsCommutable = 0,
293                                bit IsKCommutable = 0,
294                                SDNode Select = vselect,
295                                bit MaskOnly = 0> :
296   AVX512_maskable_common<O, F, _, Outs,
297                          !con((ins _.RC:$src1), NonTiedIns),
298                          !con((ins _.RC:$src1, _.KRCWM:$mask), NonTiedIns),
299                          !con((ins _.RC:$src1, _.KRCWM:$mask), NonTiedIns),
300                          OpcodeStr, AttSrcAsm, IntelSrcAsm,
301                          !if(MaskOnly, (null_frag), RHS),
302                          (Select _.KRCWM:$mask, RHS, _.RC:$src1),
303                          Select, "", IsCommutable, IsKCommutable>;
304
305// Similar to AVX512_maskable_3src but in this case the input VT for the tied
306// operand differs from the output VT. This requires a bitconvert on
307// the preserved vector going into the vselect.
308// NOTE: The unmasked pattern is disabled.
309multiclass AVX512_maskable_3src_cast<bits<8> O, Format F, X86VectorVTInfo OutVT,
310                                     X86VectorVTInfo InVT,
311                                     dag Outs, dag NonTiedIns, string OpcodeStr,
312                                     string AttSrcAsm, string IntelSrcAsm,
313                                     dag RHS, bit IsCommutable = 0> :
314   AVX512_maskable_common<O, F, OutVT, Outs,
315                          !con((ins InVT.RC:$src1), NonTiedIns),
316                          !con((ins InVT.RC:$src1, InVT.KRCWM:$mask), NonTiedIns),
317                          !con((ins InVT.RC:$src1, InVT.KRCWM:$mask), NonTiedIns),
318                          OpcodeStr, AttSrcAsm, IntelSrcAsm, (null_frag),
319                          (vselect InVT.KRCWM:$mask, RHS,
320                           (bitconvert InVT.RC:$src1)),
321                           vselect, "", IsCommutable>;
322
323multiclass AVX512_maskable_3src_scalar<bits<8> O, Format F, X86VectorVTInfo _,
324                                     dag Outs, dag NonTiedIns, string OpcodeStr,
325                                     string AttSrcAsm, string IntelSrcAsm,
326                                     dag RHS,
327                                     bit IsCommutable = 0,
328                                     bit IsKCommutable = 0,
329                                     bit MaskOnly = 0> :
330   AVX512_maskable_3src<O, F, _, Outs, NonTiedIns, OpcodeStr, AttSrcAsm,
331                        IntelSrcAsm, RHS, IsCommutable, IsKCommutable,
332                        X86selects, MaskOnly>;
333
334multiclass AVX512_maskable_in_asm<bits<8> O, Format F, X86VectorVTInfo _,
335                                  dag Outs, dag Ins,
336                                  string OpcodeStr,
337                                  string AttSrcAsm, string IntelSrcAsm,
338                                  list<dag> Pattern> :
339   AVX512_maskable_custom<O, F, Outs, Ins,
340                          !con((ins _.RC:$src0, _.KRCWM:$mask), Ins),
341                          !con((ins _.KRCWM:$mask), Ins),
342                          OpcodeStr, AttSrcAsm, IntelSrcAsm, Pattern, [], [],
343                          "$src0 = $dst">;
344
345multiclass AVX512_maskable_3src_in_asm<bits<8> O, Format F, X86VectorVTInfo _,
346                                       dag Outs, dag NonTiedIns,
347                                       string OpcodeStr,
348                                       string AttSrcAsm, string IntelSrcAsm,
349                                       list<dag> Pattern> :
350   AVX512_maskable_custom<O, F, Outs,
351                          !con((ins _.RC:$src1), NonTiedIns),
352                          !con((ins _.RC:$src1, _.KRCWM:$mask), NonTiedIns),
353                          !con((ins _.RC:$src1, _.KRCWM:$mask), NonTiedIns),
354                          OpcodeStr, AttSrcAsm, IntelSrcAsm, Pattern, [], [],
355                          "">;
356
357// Instruction with mask that puts result in mask register,
358// like "compare" and "vptest"
359multiclass AVX512_maskable_custom_cmp<bits<8> O, Format F,
360                                  dag Outs,
361                                  dag Ins, dag MaskingIns,
362                                  string OpcodeStr,
363                                  string AttSrcAsm, string IntelSrcAsm,
364                                  list<dag> Pattern,
365                                  list<dag> MaskingPattern,
366                                  bit IsCommutable = 0> {
367    let isCommutable = IsCommutable in {
368    def NAME: AVX512<O, F, Outs, Ins,
369                       OpcodeStr#"\t{"#AttSrcAsm#", $dst|"#
370                                     "$dst, "#IntelSrcAsm#"}",
371                       Pattern>;
372
373    def NAME#k: AVX512<O, F, Outs, MaskingIns,
374                       OpcodeStr#"\t{"#AttSrcAsm#", $dst {${mask}}|"#
375                                     "$dst {${mask}}, "#IntelSrcAsm#"}",
376                       MaskingPattern>, EVEX_K;
377    }
378}
379
380multiclass AVX512_maskable_common_cmp<bits<8> O, Format F, X86VectorVTInfo _,
381                                  dag Outs,
382                                  dag Ins, dag MaskingIns,
383                                  string OpcodeStr,
384                                  string AttSrcAsm, string IntelSrcAsm,
385                                  dag RHS, dag MaskingRHS,
386                                  bit IsCommutable = 0> :
387  AVX512_maskable_custom_cmp<O, F, Outs, Ins, MaskingIns, OpcodeStr,
388                         AttSrcAsm, IntelSrcAsm,
389                         [(set _.KRC:$dst, RHS)],
390                         [(set _.KRC:$dst, MaskingRHS)], IsCommutable>;
391
392multiclass AVX512_maskable_cmp<bits<8> O, Format F, X86VectorVTInfo _,
393                           dag Outs, dag Ins, string OpcodeStr,
394                           string AttSrcAsm, string IntelSrcAsm,
395                           dag RHS, dag RHS_su, bit IsCommutable = 0> :
396   AVX512_maskable_common_cmp<O, F, _, Outs, Ins,
397                          !con((ins _.KRCWM:$mask), Ins),
398                          OpcodeStr, AttSrcAsm, IntelSrcAsm, RHS,
399                          (and _.KRCWM:$mask, RHS_su), IsCommutable>;
400
401
402// Alias instruction that maps zero vector to pxor / xorp* for AVX-512.
403// This is expanded by ExpandPostRAPseudos to an xorps / vxorps, and then
404// swizzled by ExecutionDomainFix to pxor.
405// We set canFoldAsLoad because this can be converted to a constant-pool
406// load of an all-zeros value if folding it would be beneficial.
407let isReMaterializable = 1, isAsCheapAsAMove = 1, canFoldAsLoad = 1,
408    isPseudo = 1, Predicates = [HasAVX512], SchedRW = [WriteZero] in {
409def AVX512_512_SET0 : I<0, Pseudo, (outs VR512:$dst), (ins), "",
410               [(set VR512:$dst, (v16i32 immAllZerosV))]>;
411def AVX512_512_SETALLONES : I<0, Pseudo, (outs VR512:$dst), (ins), "",
412               [(set VR512:$dst, (v16i32 immAllOnesV))]>;
413}
414
415// Alias instructions that allow VPTERNLOG to be used with a mask to create
416// a mix of all ones and all zeros elements. This is done this way to force
417// the same register to be used as input for all three sources.
418let isPseudo = 1, Predicates = [HasAVX512], SchedRW = [WriteVecALU] in {
419def AVX512_512_SEXT_MASK_32 : I<0, Pseudo, (outs VR512:$dst),
420                                (ins VK16WM:$mask), "",
421                           [(set VR512:$dst, (vselect (v16i1 VK16WM:$mask),
422                                                      (v16i32 immAllOnesV),
423                                                      (v16i32 immAllZerosV)))]>;
424def AVX512_512_SEXT_MASK_64 : I<0, Pseudo, (outs VR512:$dst),
425                                (ins VK8WM:$mask), "",
426                [(set VR512:$dst, (vselect (v8i1 VK8WM:$mask),
427                                           (v8i64 immAllOnesV),
428                                           (v8i64 immAllZerosV)))]>;
429}
430
431let isReMaterializable = 1, isAsCheapAsAMove = 1, canFoldAsLoad = 1,
432    isPseudo = 1, Predicates = [HasAVX512], SchedRW = [WriteZero] in {
433def AVX512_128_SET0 : I<0, Pseudo, (outs VR128X:$dst), (ins), "",
434               [(set VR128X:$dst, (v4i32 immAllZerosV))]>;
435def AVX512_256_SET0 : I<0, Pseudo, (outs VR256X:$dst), (ins), "",
436               [(set VR256X:$dst, (v8i32 immAllZerosV))]>;
437}
438
439// Alias instructions that map fld0 to xorps for sse or vxorps for avx.
440// This is expanded by ExpandPostRAPseudos.
441let isReMaterializable = 1, isAsCheapAsAMove = 1, canFoldAsLoad = 1,
442    isPseudo = 1, SchedRW = [WriteZero], Predicates = [HasAVX512] in {
443  def AVX512_FsFLD0SS : I<0, Pseudo, (outs FR32X:$dst), (ins), "",
444                          [(set FR32X:$dst, fp32imm0)]>;
445  def AVX512_FsFLD0SD : I<0, Pseudo, (outs FR64X:$dst), (ins), "",
446                          [(set FR64X:$dst, fpimm0)]>;
447}
448
449//===----------------------------------------------------------------------===//
450// AVX-512 - VECTOR INSERT
451//
452
453// Supports two different pattern operators for mask and unmasked ops. Allows
454// null_frag to be passed for one.
455multiclass vinsert_for_size_split<int Opcode, X86VectorVTInfo From,
456                                  X86VectorVTInfo To,
457                                  SDPatternOperator vinsert_insert,
458                                  SDPatternOperator vinsert_for_mask,
459                                  X86FoldableSchedWrite sched> {
460  let hasSideEffects = 0, ExeDomain = To.ExeDomain in {
461    defm rr : AVX512_maskable_split<Opcode, MRMSrcReg, To, (outs To.RC:$dst),
462                   (ins To.RC:$src1, From.RC:$src2, u8imm:$src3),
463                   "vinsert" # From.EltTypeName # "x" # From.NumElts,
464                   "$src3, $src2, $src1", "$src1, $src2, $src3",
465                   (vinsert_insert:$src3 (To.VT To.RC:$src1),
466                                         (From.VT From.RC:$src2),
467                                         (iPTR imm)),
468                   (vinsert_for_mask:$src3 (To.VT To.RC:$src1),
469                                           (From.VT From.RC:$src2),
470                                           (iPTR imm))>,
471                   AVX512AIi8Base, EVEX_4V, Sched<[sched]>;
472    let mayLoad = 1 in
473    defm rm : AVX512_maskable_split<Opcode, MRMSrcMem, To, (outs To.RC:$dst),
474                   (ins To.RC:$src1, From.MemOp:$src2, u8imm:$src3),
475                   "vinsert" # From.EltTypeName # "x" # From.NumElts,
476                   "$src3, $src2, $src1", "$src1, $src2, $src3",
477                   (vinsert_insert:$src3 (To.VT To.RC:$src1),
478                               (From.VT (From.LdFrag addr:$src2)),
479                               (iPTR imm)),
480                   (vinsert_for_mask:$src3 (To.VT To.RC:$src1),
481                               (From.VT (From.LdFrag addr:$src2)),
482                               (iPTR imm))>, AVX512AIi8Base, EVEX_4V,
483                   EVEX_CD8<From.EltSize, From.CD8TupleForm>,
484                   Sched<[sched.Folded, sched.ReadAfterFold]>;
485  }
486}
487
488// Passes the same pattern operator for masked and unmasked ops.
489multiclass vinsert_for_size<int Opcode, X86VectorVTInfo From,
490                            X86VectorVTInfo To,
491                            SDPatternOperator vinsert_insert,
492                            X86FoldableSchedWrite sched> :
493  vinsert_for_size_split<Opcode, From, To, vinsert_insert, vinsert_insert, sched>;
494
495multiclass vinsert_for_size_lowering<string InstrStr, X86VectorVTInfo From,
496                       X86VectorVTInfo To, PatFrag vinsert_insert,
497                       SDNodeXForm INSERT_get_vinsert_imm , list<Predicate> p> {
498  let Predicates = p in {
499    def : Pat<(vinsert_insert:$ins
500                     (To.VT To.RC:$src1), (From.VT From.RC:$src2), (iPTR imm)),
501              (To.VT (!cast<Instruction>(InstrStr#"rr")
502                     To.RC:$src1, From.RC:$src2,
503                     (INSERT_get_vinsert_imm To.RC:$ins)))>;
504
505    def : Pat<(vinsert_insert:$ins
506                  (To.VT To.RC:$src1),
507                  (From.VT (From.LdFrag addr:$src2)),
508                  (iPTR imm)),
509              (To.VT (!cast<Instruction>(InstrStr#"rm")
510                  To.RC:$src1, addr:$src2,
511                  (INSERT_get_vinsert_imm To.RC:$ins)))>;
512  }
513}
514
515multiclass vinsert_for_type<ValueType EltVT32, int Opcode128,
516                            ValueType EltVT64, int Opcode256,
517                            X86FoldableSchedWrite sched> {
518
519  let Predicates = [HasVLX] in
520    defm NAME # "32x4Z256" : vinsert_for_size<Opcode128,
521                                 X86VectorVTInfo< 4, EltVT32, VR128X>,
522                                 X86VectorVTInfo< 8, EltVT32, VR256X>,
523                                 vinsert128_insert, sched>, EVEX_V256;
524
525  defm NAME # "32x4Z" : vinsert_for_size<Opcode128,
526                                 X86VectorVTInfo< 4, EltVT32, VR128X>,
527                                 X86VectorVTInfo<16, EltVT32, VR512>,
528                                 vinsert128_insert, sched>, EVEX_V512;
529
530  defm NAME # "64x4Z" : vinsert_for_size<Opcode256,
531                                 X86VectorVTInfo< 4, EltVT64, VR256X>,
532                                 X86VectorVTInfo< 8, EltVT64, VR512>,
533                                 vinsert256_insert, sched>, VEX_W, EVEX_V512;
534
535  // Even with DQI we'd like to only use these instructions for masking.
536  let Predicates = [HasVLX, HasDQI] in
537    defm NAME # "64x2Z256" : vinsert_for_size_split<Opcode128,
538                                   X86VectorVTInfo< 2, EltVT64, VR128X>,
539                                   X86VectorVTInfo< 4, EltVT64, VR256X>,
540                                   null_frag, vinsert128_insert, sched>,
541                                   VEX_W1X, EVEX_V256;
542
543  // Even with DQI we'd like to only use these instructions for masking.
544  let Predicates = [HasDQI] in {
545    defm NAME # "64x2Z" : vinsert_for_size_split<Opcode128,
546                                 X86VectorVTInfo< 2, EltVT64, VR128X>,
547                                 X86VectorVTInfo< 8, EltVT64, VR512>,
548                                 null_frag, vinsert128_insert, sched>,
549                                 VEX_W, EVEX_V512;
550
551    defm NAME # "32x8Z" : vinsert_for_size_split<Opcode256,
552                                   X86VectorVTInfo< 8, EltVT32, VR256X>,
553                                   X86VectorVTInfo<16, EltVT32, VR512>,
554                                   null_frag, vinsert256_insert, sched>,
555                                   EVEX_V512;
556  }
557}
558
559// FIXME: Is there a better scheduler class for VINSERTF/VINSERTI?
560defm VINSERTF : vinsert_for_type<f32, 0x18, f64, 0x1a, WriteFShuffle256>;
561defm VINSERTI : vinsert_for_type<i32, 0x38, i64, 0x3a, WriteShuffle256>;
562
563// Codegen pattern with the alternative types,
564// Even with AVX512DQ we'll still use these for unmasked operations.
565defm : vinsert_for_size_lowering<"VINSERTF32x4Z256", v2f64x_info, v4f64x_info,
566              vinsert128_insert, INSERT_get_vinsert128_imm, [HasVLX]>;
567defm : vinsert_for_size_lowering<"VINSERTI32x4Z256", v2i64x_info, v4i64x_info,
568              vinsert128_insert, INSERT_get_vinsert128_imm, [HasVLX]>;
569
570defm : vinsert_for_size_lowering<"VINSERTF32x4Z", v2f64x_info, v8f64_info,
571              vinsert128_insert, INSERT_get_vinsert128_imm, [HasAVX512]>;
572defm : vinsert_for_size_lowering<"VINSERTI32x4Z", v2i64x_info, v8i64_info,
573              vinsert128_insert, INSERT_get_vinsert128_imm, [HasAVX512]>;
574
575defm : vinsert_for_size_lowering<"VINSERTF64x4Z", v8f32x_info, v16f32_info,
576              vinsert256_insert, INSERT_get_vinsert256_imm, [HasAVX512]>;
577defm : vinsert_for_size_lowering<"VINSERTI64x4Z", v8i32x_info, v16i32_info,
578              vinsert256_insert, INSERT_get_vinsert256_imm, [HasAVX512]>;
579
580// Codegen pattern with the alternative types insert VEC128 into VEC256
581defm : vinsert_for_size_lowering<"VINSERTI32x4Z256", v8i16x_info, v16i16x_info,
582              vinsert128_insert, INSERT_get_vinsert128_imm, [HasVLX]>;
583defm : vinsert_for_size_lowering<"VINSERTI32x4Z256", v16i8x_info, v32i8x_info,
584              vinsert128_insert, INSERT_get_vinsert128_imm, [HasVLX]>;
585// Codegen pattern with the alternative types insert VEC128 into VEC512
586defm : vinsert_for_size_lowering<"VINSERTI32x4Z", v8i16x_info, v32i16_info,
587              vinsert128_insert, INSERT_get_vinsert128_imm, [HasAVX512]>;
588defm : vinsert_for_size_lowering<"VINSERTI32x4Z", v16i8x_info, v64i8_info,
589               vinsert128_insert, INSERT_get_vinsert128_imm, [HasAVX512]>;
590// Codegen pattern with the alternative types insert VEC256 into VEC512
591defm : vinsert_for_size_lowering<"VINSERTI64x4Z", v16i16x_info, v32i16_info,
592              vinsert256_insert, INSERT_get_vinsert256_imm, [HasAVX512]>;
593defm : vinsert_for_size_lowering<"VINSERTI64x4Z", v32i8x_info, v64i8_info,
594              vinsert256_insert, INSERT_get_vinsert256_imm, [HasAVX512]>;
595
596
597multiclass vinsert_for_mask_cast<string InstrStr, X86VectorVTInfo From,
598                                 X86VectorVTInfo To, X86VectorVTInfo Cast,
599                                 PatFrag vinsert_insert,
600                                 SDNodeXForm INSERT_get_vinsert_imm,
601                                 list<Predicate> p> {
602let Predicates = p in {
603  def : Pat<(Cast.VT
604             (vselect Cast.KRCWM:$mask,
605                      (bitconvert
606                       (vinsert_insert:$ins (To.VT To.RC:$src1),
607                                            (From.VT From.RC:$src2),
608                                            (iPTR imm))),
609                      Cast.RC:$src0)),
610            (!cast<Instruction>(InstrStr#"rrk")
611             Cast.RC:$src0, Cast.KRCWM:$mask, To.RC:$src1, From.RC:$src2,
612             (INSERT_get_vinsert_imm To.RC:$ins))>;
613  def : Pat<(Cast.VT
614             (vselect Cast.KRCWM:$mask,
615                      (bitconvert
616                       (vinsert_insert:$ins (To.VT To.RC:$src1),
617                                            (From.VT
618                                             (bitconvert
619                                              (From.LdFrag addr:$src2))),
620                                            (iPTR imm))),
621                      Cast.RC:$src0)),
622            (!cast<Instruction>(InstrStr#"rmk")
623             Cast.RC:$src0, Cast.KRCWM:$mask, To.RC:$src1, addr:$src2,
624             (INSERT_get_vinsert_imm To.RC:$ins))>;
625
626  def : Pat<(Cast.VT
627             (vselect Cast.KRCWM:$mask,
628                      (bitconvert
629                       (vinsert_insert:$ins (To.VT To.RC:$src1),
630                                            (From.VT From.RC:$src2),
631                                            (iPTR imm))),
632                      Cast.ImmAllZerosV)),
633            (!cast<Instruction>(InstrStr#"rrkz")
634             Cast.KRCWM:$mask, To.RC:$src1, From.RC:$src2,
635             (INSERT_get_vinsert_imm To.RC:$ins))>;
636  def : Pat<(Cast.VT
637             (vselect Cast.KRCWM:$mask,
638                      (bitconvert
639                       (vinsert_insert:$ins (To.VT To.RC:$src1),
640                                            (From.VT (From.LdFrag addr:$src2)),
641                                            (iPTR imm))),
642                      Cast.ImmAllZerosV)),
643            (!cast<Instruction>(InstrStr#"rmkz")
644             Cast.KRCWM:$mask, To.RC:$src1, addr:$src2,
645             (INSERT_get_vinsert_imm To.RC:$ins))>;
646}
647}
648
649defm : vinsert_for_mask_cast<"VINSERTF32x4Z256", v2f64x_info, v4f64x_info,
650                             v8f32x_info, vinsert128_insert,
651                             INSERT_get_vinsert128_imm, [HasVLX]>;
652defm : vinsert_for_mask_cast<"VINSERTF64x2Z256", v4f32x_info, v8f32x_info,
653                             v4f64x_info, vinsert128_insert,
654                             INSERT_get_vinsert128_imm, [HasDQI, HasVLX]>;
655
656defm : vinsert_for_mask_cast<"VINSERTI32x4Z256", v2i64x_info, v4i64x_info,
657                             v8i32x_info, vinsert128_insert,
658                             INSERT_get_vinsert128_imm, [HasVLX]>;
659defm : vinsert_for_mask_cast<"VINSERTI32x4Z256", v8i16x_info, v16i16x_info,
660                             v8i32x_info, vinsert128_insert,
661                             INSERT_get_vinsert128_imm, [HasVLX]>;
662defm : vinsert_for_mask_cast<"VINSERTI32x4Z256", v16i8x_info, v32i8x_info,
663                             v8i32x_info, vinsert128_insert,
664                             INSERT_get_vinsert128_imm, [HasVLX]>;
665defm : vinsert_for_mask_cast<"VINSERTF64x2Z256", v4i32x_info, v8i32x_info,
666                             v4i64x_info, vinsert128_insert,
667                             INSERT_get_vinsert128_imm, [HasDQI, HasVLX]>;
668defm : vinsert_for_mask_cast<"VINSERTF64x2Z256", v8i16x_info, v16i16x_info,
669                             v4i64x_info, vinsert128_insert,
670                             INSERT_get_vinsert128_imm, [HasDQI, HasVLX]>;
671defm : vinsert_for_mask_cast<"VINSERTF64x2Z256", v16i8x_info, v32i8x_info,
672                             v4i64x_info, vinsert128_insert,
673                             INSERT_get_vinsert128_imm, [HasDQI, HasVLX]>;
674
675defm : vinsert_for_mask_cast<"VINSERTF32x4Z", v2f64x_info, v8f64_info,
676                             v16f32_info, vinsert128_insert,
677                             INSERT_get_vinsert128_imm, [HasAVX512]>;
678defm : vinsert_for_mask_cast<"VINSERTF64x2Z", v4f32x_info, v16f32_info,
679                             v8f64_info, vinsert128_insert,
680                             INSERT_get_vinsert128_imm, [HasDQI]>;
681
682defm : vinsert_for_mask_cast<"VINSERTI32x4Z", v2i64x_info, v8i64_info,
683                             v16i32_info, vinsert128_insert,
684                             INSERT_get_vinsert128_imm, [HasAVX512]>;
685defm : vinsert_for_mask_cast<"VINSERTI32x4Z", v8i16x_info, v32i16_info,
686                             v16i32_info, vinsert128_insert,
687                             INSERT_get_vinsert128_imm, [HasAVX512]>;
688defm : vinsert_for_mask_cast<"VINSERTI32x4Z", v16i8x_info, v64i8_info,
689                             v16i32_info, vinsert128_insert,
690                             INSERT_get_vinsert128_imm, [HasAVX512]>;
691defm : vinsert_for_mask_cast<"VINSERTI64x2Z", v4i32x_info, v16i32_info,
692                             v8i64_info, vinsert128_insert,
693                             INSERT_get_vinsert128_imm, [HasDQI]>;
694defm : vinsert_for_mask_cast<"VINSERTI64x2Z", v8i16x_info, v32i16_info,
695                             v8i64_info, vinsert128_insert,
696                             INSERT_get_vinsert128_imm, [HasDQI]>;
697defm : vinsert_for_mask_cast<"VINSERTI64x2Z", v16i8x_info, v64i8_info,
698                             v8i64_info, vinsert128_insert,
699                             INSERT_get_vinsert128_imm, [HasDQI]>;
700
701defm : vinsert_for_mask_cast<"VINSERTF32x8Z", v4f64x_info, v8f64_info,
702                             v16f32_info, vinsert256_insert,
703                             INSERT_get_vinsert256_imm, [HasDQI]>;
704defm : vinsert_for_mask_cast<"VINSERTF64x4Z", v8f32x_info, v16f32_info,
705                             v8f64_info, vinsert256_insert,
706                             INSERT_get_vinsert256_imm, [HasAVX512]>;
707
708defm : vinsert_for_mask_cast<"VINSERTI32x8Z", v4i64x_info, v8i64_info,
709                             v16i32_info, vinsert256_insert,
710                             INSERT_get_vinsert256_imm, [HasDQI]>;
711defm : vinsert_for_mask_cast<"VINSERTI32x8Z", v16i16x_info, v32i16_info,
712                             v16i32_info, vinsert256_insert,
713                             INSERT_get_vinsert256_imm, [HasDQI]>;
714defm : vinsert_for_mask_cast<"VINSERTI32x8Z", v32i8x_info, v64i8_info,
715                             v16i32_info, vinsert256_insert,
716                             INSERT_get_vinsert256_imm, [HasDQI]>;
717defm : vinsert_for_mask_cast<"VINSERTI64x4Z", v8i32x_info, v16i32_info,
718                             v8i64_info, vinsert256_insert,
719                             INSERT_get_vinsert256_imm, [HasAVX512]>;
720defm : vinsert_for_mask_cast<"VINSERTI64x4Z", v16i16x_info, v32i16_info,
721                             v8i64_info, vinsert256_insert,
722                             INSERT_get_vinsert256_imm, [HasAVX512]>;
723defm : vinsert_for_mask_cast<"VINSERTI64x4Z", v32i8x_info, v64i8_info,
724                             v8i64_info, vinsert256_insert,
725                             INSERT_get_vinsert256_imm, [HasAVX512]>;
726
727// vinsertps - insert f32 to XMM
728let ExeDomain = SSEPackedSingle in {
729let isCommutable = 1 in
730def VINSERTPSZrr : AVX512AIi8<0x21, MRMSrcReg, (outs VR128X:$dst),
731      (ins VR128X:$src1, VR128X:$src2, u8imm:$src3),
732      "vinsertps\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}",
733      [(set VR128X:$dst, (X86insertps VR128X:$src1, VR128X:$src2, imm:$src3))]>,
734      EVEX_4V, Sched<[SchedWriteFShuffle.XMM]>;
735def VINSERTPSZrm: AVX512AIi8<0x21, MRMSrcMem, (outs VR128X:$dst),
736      (ins VR128X:$src1, f32mem:$src2, u8imm:$src3),
737      "vinsertps\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}",
738      [(set VR128X:$dst, (X86insertps VR128X:$src1,
739                          (v4f32 (scalar_to_vector (loadf32 addr:$src2))),
740                          imm:$src3))]>,
741      EVEX_4V, EVEX_CD8<32, CD8VT1>,
742      Sched<[SchedWriteFShuffle.XMM.Folded, SchedWriteFShuffle.XMM.ReadAfterFold]>;
743}
744
745//===----------------------------------------------------------------------===//
746// AVX-512 VECTOR EXTRACT
747//---
748
749// Supports two different pattern operators for mask and unmasked ops. Allows
750// null_frag to be passed for one.
751multiclass vextract_for_size_split<int Opcode,
752                                   X86VectorVTInfo From, X86VectorVTInfo To,
753                                   SDPatternOperator vextract_extract,
754                                   SDPatternOperator vextract_for_mask,
755                                   SchedWrite SchedRR, SchedWrite SchedMR> {
756
757  let hasSideEffects = 0, ExeDomain = To.ExeDomain in {
758    defm rr : AVX512_maskable_split<Opcode, MRMDestReg, To, (outs To.RC:$dst),
759                (ins From.RC:$src1, u8imm:$idx),
760                "vextract" # To.EltTypeName # "x" # To.NumElts,
761                "$idx, $src1", "$src1, $idx",
762                (vextract_extract:$idx (From.VT From.RC:$src1), (iPTR imm)),
763                (vextract_for_mask:$idx (From.VT From.RC:$src1), (iPTR imm))>,
764                AVX512AIi8Base, EVEX, Sched<[SchedRR]>;
765
766    def mr  : AVX512AIi8<Opcode, MRMDestMem, (outs),
767                    (ins To.MemOp:$dst, From.RC:$src1, u8imm:$idx),
768                    "vextract" # To.EltTypeName # "x" # To.NumElts #
769                        "\t{$idx, $src1, $dst|$dst, $src1, $idx}",
770                    [(store (To.VT (vextract_extract:$idx
771                                    (From.VT From.RC:$src1), (iPTR imm))),
772                             addr:$dst)]>, EVEX,
773                    Sched<[SchedMR]>;
774
775    let mayStore = 1, hasSideEffects = 0 in
776    def mrk : AVX512AIi8<Opcode, MRMDestMem, (outs),
777                    (ins To.MemOp:$dst, To.KRCWM:$mask,
778                                        From.RC:$src1, u8imm:$idx),
779                     "vextract" # To.EltTypeName # "x" # To.NumElts #
780                          "\t{$idx, $src1, $dst {${mask}}|"
781                          "$dst {${mask}}, $src1, $idx}", []>,
782                    EVEX_K, EVEX, Sched<[SchedMR]>, NotMemoryFoldable;
783  }
784}
785
786// Passes the same pattern operator for masked and unmasked ops.
787multiclass vextract_for_size<int Opcode, X86VectorVTInfo From,
788                             X86VectorVTInfo To,
789                             SDPatternOperator vextract_extract,
790                             SchedWrite SchedRR, SchedWrite SchedMR> :
791  vextract_for_size_split<Opcode, From, To, vextract_extract, vextract_extract, SchedRR, SchedMR>;
792
793// Codegen pattern for the alternative types
794multiclass vextract_for_size_lowering<string InstrStr, X86VectorVTInfo From,
795                X86VectorVTInfo To, PatFrag vextract_extract,
796                SDNodeXForm EXTRACT_get_vextract_imm, list<Predicate> p> {
797  let Predicates = p in {
798     def : Pat<(vextract_extract:$ext (From.VT From.RC:$src1), (iPTR imm)),
799               (To.VT (!cast<Instruction>(InstrStr#"rr")
800                          From.RC:$src1,
801                          (EXTRACT_get_vextract_imm To.RC:$ext)))>;
802     def : Pat<(store (To.VT (vextract_extract:$ext (From.VT From.RC:$src1),
803                              (iPTR imm))), addr:$dst),
804               (!cast<Instruction>(InstrStr#"mr") addr:$dst, From.RC:$src1,
805                (EXTRACT_get_vextract_imm To.RC:$ext))>;
806  }
807}
808
809multiclass vextract_for_type<ValueType EltVT32, int Opcode128,
810                             ValueType EltVT64, int Opcode256,
811                             SchedWrite SchedRR, SchedWrite SchedMR> {
812  let Predicates = [HasAVX512] in {
813    defm NAME # "32x4Z" : vextract_for_size<Opcode128,
814                                   X86VectorVTInfo<16, EltVT32, VR512>,
815                                   X86VectorVTInfo< 4, EltVT32, VR128X>,
816                                   vextract128_extract, SchedRR, SchedMR>,
817                                       EVEX_V512, EVEX_CD8<32, CD8VT4>;
818    defm NAME # "64x4Z" : vextract_for_size<Opcode256,
819                                   X86VectorVTInfo< 8, EltVT64, VR512>,
820                                   X86VectorVTInfo< 4, EltVT64, VR256X>,
821                                   vextract256_extract, SchedRR, SchedMR>,
822                                       VEX_W, EVEX_V512, EVEX_CD8<64, CD8VT4>;
823  }
824  let Predicates = [HasVLX] in
825    defm NAME # "32x4Z256" : vextract_for_size<Opcode128,
826                                 X86VectorVTInfo< 8, EltVT32, VR256X>,
827                                 X86VectorVTInfo< 4, EltVT32, VR128X>,
828                                 vextract128_extract, SchedRR, SchedMR>,
829                                     EVEX_V256, EVEX_CD8<32, CD8VT4>;
830
831  // Even with DQI we'd like to only use these instructions for masking.
832  let Predicates = [HasVLX, HasDQI] in
833    defm NAME # "64x2Z256" : vextract_for_size_split<Opcode128,
834                                 X86VectorVTInfo< 4, EltVT64, VR256X>,
835                                 X86VectorVTInfo< 2, EltVT64, VR128X>,
836                                 null_frag, vextract128_extract, SchedRR, SchedMR>,
837                                     VEX_W1X, EVEX_V256, EVEX_CD8<64, CD8VT2>;
838
839  // Even with DQI we'd like to only use these instructions for masking.
840  let Predicates = [HasDQI] in {
841    defm NAME # "64x2Z" : vextract_for_size_split<Opcode128,
842                                 X86VectorVTInfo< 8, EltVT64, VR512>,
843                                 X86VectorVTInfo< 2, EltVT64, VR128X>,
844                                 null_frag, vextract128_extract, SchedRR, SchedMR>,
845                                     VEX_W, EVEX_V512, EVEX_CD8<64, CD8VT2>;
846    defm NAME # "32x8Z" : vextract_for_size_split<Opcode256,
847                                 X86VectorVTInfo<16, EltVT32, VR512>,
848                                 X86VectorVTInfo< 8, EltVT32, VR256X>,
849                                 null_frag, vextract256_extract, SchedRR, SchedMR>,
850                                     EVEX_V512, EVEX_CD8<32, CD8VT8>;
851  }
852}
853
854// TODO - replace WriteFStore/WriteVecStore with X86SchedWriteMoveLSWidths types.
855defm VEXTRACTF : vextract_for_type<f32, 0x19, f64, 0x1b, WriteFShuffle256, WriteFStore>;
856defm VEXTRACTI : vextract_for_type<i32, 0x39, i64, 0x3b, WriteShuffle256, WriteVecStore>;
857
858// extract_subvector codegen patterns with the alternative types.
859// Even with AVX512DQ we'll still use these for unmasked operations.
860defm : vextract_for_size_lowering<"VEXTRACTF32x4Z", v8f64_info, v2f64x_info,
861          vextract128_extract, EXTRACT_get_vextract128_imm, [HasAVX512]>;
862defm : vextract_for_size_lowering<"VEXTRACTI32x4Z", v8i64_info, v2i64x_info,
863          vextract128_extract, EXTRACT_get_vextract128_imm, [HasAVX512]>;
864
865defm : vextract_for_size_lowering<"VEXTRACTF64x4Z", v16f32_info, v8f32x_info,
866          vextract256_extract, EXTRACT_get_vextract256_imm, [HasAVX512]>;
867defm : vextract_for_size_lowering<"VEXTRACTI64x4Z", v16i32_info, v8i32x_info,
868          vextract256_extract, EXTRACT_get_vextract256_imm, [HasAVX512]>;
869
870defm : vextract_for_size_lowering<"VEXTRACTF32x4Z256", v4f64x_info, v2f64x_info,
871          vextract128_extract, EXTRACT_get_vextract128_imm, [HasVLX]>;
872defm : vextract_for_size_lowering<"VEXTRACTI32x4Z256", v4i64x_info, v2i64x_info,
873          vextract128_extract, EXTRACT_get_vextract128_imm, [HasVLX]>;
874
875// Codegen pattern with the alternative types extract VEC128 from VEC256
876defm : vextract_for_size_lowering<"VEXTRACTI32x4Z256", v16i16x_info, v8i16x_info,
877          vextract128_extract, EXTRACT_get_vextract128_imm, [HasVLX]>;
878defm : vextract_for_size_lowering<"VEXTRACTI32x4Z256", v32i8x_info, v16i8x_info,
879          vextract128_extract, EXTRACT_get_vextract128_imm, [HasVLX]>;
880
881// Codegen pattern with the alternative types extract VEC128 from VEC512
882defm : vextract_for_size_lowering<"VEXTRACTI32x4Z", v32i16_info, v8i16x_info,
883                 vextract128_extract, EXTRACT_get_vextract128_imm, [HasAVX512]>;
884defm : vextract_for_size_lowering<"VEXTRACTI32x4Z", v64i8_info, v16i8x_info,
885                 vextract128_extract, EXTRACT_get_vextract128_imm, [HasAVX512]>;
886// Codegen pattern with the alternative types extract VEC256 from VEC512
887defm : vextract_for_size_lowering<"VEXTRACTI64x4Z", v32i16_info, v16i16x_info,
888                 vextract256_extract, EXTRACT_get_vextract256_imm, [HasAVX512]>;
889defm : vextract_for_size_lowering<"VEXTRACTI64x4Z", v64i8_info, v32i8x_info,
890                 vextract256_extract, EXTRACT_get_vextract256_imm, [HasAVX512]>;
891
892
893// A 128-bit extract from bits [255:128] of a 512-bit vector should use a
894// smaller extract to enable EVEX->VEX.
895let Predicates = [NoVLX] in {
896def : Pat<(v2i64 (extract_subvector (v8i64 VR512:$src), (iPTR 2))),
897          (v2i64 (VEXTRACTI128rr
898                  (v4i64 (EXTRACT_SUBREG (v8i64 VR512:$src), sub_ymm)),
899                  (iPTR 1)))>;
900def : Pat<(v2f64 (extract_subvector (v8f64 VR512:$src), (iPTR 2))),
901          (v2f64 (VEXTRACTF128rr
902                  (v4f64 (EXTRACT_SUBREG (v8f64 VR512:$src), sub_ymm)),
903                  (iPTR 1)))>;
904def : Pat<(v4i32 (extract_subvector (v16i32 VR512:$src), (iPTR 4))),
905          (v4i32 (VEXTRACTI128rr
906                  (v8i32 (EXTRACT_SUBREG (v16i32 VR512:$src), sub_ymm)),
907                  (iPTR 1)))>;
908def : Pat<(v4f32 (extract_subvector (v16f32 VR512:$src), (iPTR 4))),
909          (v4f32 (VEXTRACTF128rr
910                  (v8f32 (EXTRACT_SUBREG (v16f32 VR512:$src), sub_ymm)),
911                  (iPTR 1)))>;
912def : Pat<(v8i16 (extract_subvector (v32i16 VR512:$src), (iPTR 8))),
913          (v8i16 (VEXTRACTI128rr
914                  (v16i16 (EXTRACT_SUBREG (v32i16 VR512:$src), sub_ymm)),
915                  (iPTR 1)))>;
916def : Pat<(v16i8 (extract_subvector (v64i8 VR512:$src), (iPTR 16))),
917          (v16i8 (VEXTRACTI128rr
918                  (v32i8 (EXTRACT_SUBREG (v64i8 VR512:$src), sub_ymm)),
919                  (iPTR 1)))>;
920}
921
922// A 128-bit extract from bits [255:128] of a 512-bit vector should use a
923// smaller extract to enable EVEX->VEX.
924let Predicates = [HasVLX] in {
925def : Pat<(v2i64 (extract_subvector (v8i64 VR512:$src), (iPTR 2))),
926          (v2i64 (VEXTRACTI32x4Z256rr
927                  (v4i64 (EXTRACT_SUBREG (v8i64 VR512:$src), sub_ymm)),
928                  (iPTR 1)))>;
929def : Pat<(v2f64 (extract_subvector (v8f64 VR512:$src), (iPTR 2))),
930          (v2f64 (VEXTRACTF32x4Z256rr
931                  (v4f64 (EXTRACT_SUBREG (v8f64 VR512:$src), sub_ymm)),
932                  (iPTR 1)))>;
933def : Pat<(v4i32 (extract_subvector (v16i32 VR512:$src), (iPTR 4))),
934          (v4i32 (VEXTRACTI32x4Z256rr
935                  (v8i32 (EXTRACT_SUBREG (v16i32 VR512:$src), sub_ymm)),
936                  (iPTR 1)))>;
937def : Pat<(v4f32 (extract_subvector (v16f32 VR512:$src), (iPTR 4))),
938          (v4f32 (VEXTRACTF32x4Z256rr
939                  (v8f32 (EXTRACT_SUBREG (v16f32 VR512:$src), sub_ymm)),
940                  (iPTR 1)))>;
941def : Pat<(v8i16 (extract_subvector (v32i16 VR512:$src), (iPTR 8))),
942          (v8i16 (VEXTRACTI32x4Z256rr
943                  (v16i16 (EXTRACT_SUBREG (v32i16 VR512:$src), sub_ymm)),
944                  (iPTR 1)))>;
945def : Pat<(v16i8 (extract_subvector (v64i8 VR512:$src), (iPTR 16))),
946          (v16i8 (VEXTRACTI32x4Z256rr
947                  (v32i8 (EXTRACT_SUBREG (v64i8 VR512:$src), sub_ymm)),
948                  (iPTR 1)))>;
949}
950
951
952// Additional patterns for handling a bitcast between the vselect and the
953// extract_subvector.
954multiclass vextract_for_mask_cast<string InstrStr, X86VectorVTInfo From,
955                                  X86VectorVTInfo To, X86VectorVTInfo Cast,
956                                  PatFrag vextract_extract,
957                                  SDNodeXForm EXTRACT_get_vextract_imm,
958                                  list<Predicate> p> {
959let Predicates = p in {
960  def : Pat<(Cast.VT (vselect Cast.KRCWM:$mask,
961                              (bitconvert
962                               (To.VT (vextract_extract:$ext
963                                       (From.VT From.RC:$src), (iPTR imm)))),
964                              To.RC:$src0)),
965            (Cast.VT (!cast<Instruction>(InstrStr#"rrk")
966                      Cast.RC:$src0, Cast.KRCWM:$mask, From.RC:$src,
967                      (EXTRACT_get_vextract_imm To.RC:$ext)))>;
968
969  def : Pat<(Cast.VT (vselect Cast.KRCWM:$mask,
970                              (bitconvert
971                               (To.VT (vextract_extract:$ext
972                                       (From.VT From.RC:$src), (iPTR imm)))),
973                              Cast.ImmAllZerosV)),
974            (Cast.VT (!cast<Instruction>(InstrStr#"rrkz")
975                      Cast.KRCWM:$mask, From.RC:$src,
976                      (EXTRACT_get_vextract_imm To.RC:$ext)))>;
977}
978}
979
980defm : vextract_for_mask_cast<"VEXTRACTF32x4Z256", v4f64x_info, v2f64x_info,
981                              v4f32x_info, vextract128_extract,
982                              EXTRACT_get_vextract128_imm, [HasVLX]>;
983defm : vextract_for_mask_cast<"VEXTRACTF64x2Z256", v8f32x_info, v4f32x_info,
984                              v2f64x_info, vextract128_extract,
985                              EXTRACT_get_vextract128_imm, [HasDQI, HasVLX]>;
986
987defm : vextract_for_mask_cast<"VEXTRACTI32x4Z256", v4i64x_info, v2i64x_info,
988                              v4i32x_info, vextract128_extract,
989                              EXTRACT_get_vextract128_imm, [HasVLX]>;
990defm : vextract_for_mask_cast<"VEXTRACTI32x4Z256", v16i16x_info, v8i16x_info,
991                              v4i32x_info, vextract128_extract,
992                              EXTRACT_get_vextract128_imm, [HasVLX]>;
993defm : vextract_for_mask_cast<"VEXTRACTI32x4Z256", v32i8x_info, v16i8x_info,
994                              v4i32x_info, vextract128_extract,
995                              EXTRACT_get_vextract128_imm, [HasVLX]>;
996defm : vextract_for_mask_cast<"VEXTRACTI64x2Z256", v8i32x_info, v4i32x_info,
997                              v2i64x_info, vextract128_extract,
998                              EXTRACT_get_vextract128_imm, [HasDQI, HasVLX]>;
999defm : vextract_for_mask_cast<"VEXTRACTI64x2Z256", v16i16x_info, v8i16x_info,
1000                              v2i64x_info, vextract128_extract,
1001                              EXTRACT_get_vextract128_imm, [HasDQI, HasVLX]>;
1002defm : vextract_for_mask_cast<"VEXTRACTI64x2Z256", v32i8x_info, v16i8x_info,
1003                              v2i64x_info, vextract128_extract,
1004                              EXTRACT_get_vextract128_imm, [HasDQI, HasVLX]>;
1005
1006defm : vextract_for_mask_cast<"VEXTRACTF32x4Z", v8f64_info, v2f64x_info,
1007                              v4f32x_info, vextract128_extract,
1008                              EXTRACT_get_vextract128_imm, [HasAVX512]>;
1009defm : vextract_for_mask_cast<"VEXTRACTF64x2Z", v16f32_info, v4f32x_info,
1010                              v2f64x_info, vextract128_extract,
1011                              EXTRACT_get_vextract128_imm, [HasDQI]>;
1012
1013defm : vextract_for_mask_cast<"VEXTRACTI32x4Z", v8i64_info, v2i64x_info,
1014                              v4i32x_info, vextract128_extract,
1015                              EXTRACT_get_vextract128_imm, [HasAVX512]>;
1016defm : vextract_for_mask_cast<"VEXTRACTI32x4Z", v32i16_info, v8i16x_info,
1017                              v4i32x_info, vextract128_extract,
1018                              EXTRACT_get_vextract128_imm, [HasAVX512]>;
1019defm : vextract_for_mask_cast<"VEXTRACTI32x4Z", v64i8_info, v16i8x_info,
1020                              v4i32x_info, vextract128_extract,
1021                              EXTRACT_get_vextract128_imm, [HasAVX512]>;
1022defm : vextract_for_mask_cast<"VEXTRACTI64x2Z", v16i32_info, v4i32x_info,
1023                              v2i64x_info, vextract128_extract,
1024                              EXTRACT_get_vextract128_imm, [HasDQI]>;
1025defm : vextract_for_mask_cast<"VEXTRACTI64x2Z", v32i16_info, v8i16x_info,
1026                              v2i64x_info, vextract128_extract,
1027                              EXTRACT_get_vextract128_imm, [HasDQI]>;
1028defm : vextract_for_mask_cast<"VEXTRACTI64x2Z", v64i8_info, v16i8x_info,
1029                              v2i64x_info, vextract128_extract,
1030                              EXTRACT_get_vextract128_imm, [HasDQI]>;
1031
1032defm : vextract_for_mask_cast<"VEXTRACTF32x8Z", v8f64_info, v4f64x_info,
1033                              v8f32x_info, vextract256_extract,
1034                              EXTRACT_get_vextract256_imm, [HasDQI]>;
1035defm : vextract_for_mask_cast<"VEXTRACTF64x4Z", v16f32_info, v8f32x_info,
1036                              v4f64x_info, vextract256_extract,
1037                              EXTRACT_get_vextract256_imm, [HasAVX512]>;
1038
1039defm : vextract_for_mask_cast<"VEXTRACTI32x8Z", v8i64_info, v4i64x_info,
1040                              v8i32x_info, vextract256_extract,
1041                              EXTRACT_get_vextract256_imm, [HasDQI]>;
1042defm : vextract_for_mask_cast<"VEXTRACTI32x8Z", v32i16_info, v16i16x_info,
1043                              v8i32x_info, vextract256_extract,
1044                              EXTRACT_get_vextract256_imm, [HasDQI]>;
1045defm : vextract_for_mask_cast<"VEXTRACTI32x8Z", v64i8_info, v32i8x_info,
1046                              v8i32x_info, vextract256_extract,
1047                              EXTRACT_get_vextract256_imm, [HasDQI]>;
1048defm : vextract_for_mask_cast<"VEXTRACTI64x4Z", v16i32_info, v8i32x_info,
1049                              v4i64x_info, vextract256_extract,
1050                              EXTRACT_get_vextract256_imm, [HasAVX512]>;
1051defm : vextract_for_mask_cast<"VEXTRACTI64x4Z", v32i16_info, v16i16x_info,
1052                              v4i64x_info, vextract256_extract,
1053                              EXTRACT_get_vextract256_imm, [HasAVX512]>;
1054defm : vextract_for_mask_cast<"VEXTRACTI64x4Z", v64i8_info, v32i8x_info,
1055                              v4i64x_info, vextract256_extract,
1056                              EXTRACT_get_vextract256_imm, [HasAVX512]>;
1057
1058// vextractps - extract 32 bits from XMM
1059def VEXTRACTPSZrr : AVX512AIi8<0x17, MRMDestReg, (outs GR32:$dst),
1060      (ins VR128X:$src1, u8imm:$src2),
1061      "vextractps\t{$src2, $src1, $dst|$dst, $src1, $src2}",
1062      [(set GR32:$dst, (extractelt (bc_v4i32 (v4f32 VR128X:$src1)), imm:$src2))]>,
1063      EVEX, VEX_WIG, Sched<[WriteVecExtract]>;
1064
1065def VEXTRACTPSZmr : AVX512AIi8<0x17, MRMDestMem, (outs),
1066      (ins f32mem:$dst, VR128X:$src1, u8imm:$src2),
1067      "vextractps\t{$src2, $src1, $dst|$dst, $src1, $src2}",
1068      [(store (extractelt (bc_v4i32 (v4f32 VR128X:$src1)), imm:$src2),
1069                          addr:$dst)]>,
1070      EVEX, VEX_WIG, EVEX_CD8<32, CD8VT1>, Sched<[WriteVecExtractSt]>;
1071
1072//===---------------------------------------------------------------------===//
1073// AVX-512 BROADCAST
1074//---
1075// broadcast with a scalar argument.
1076multiclass avx512_broadcast_scalar<bits<8> opc, string OpcodeStr,
1077                            string Name,
1078                            X86VectorVTInfo DestInfo, X86VectorVTInfo SrcInfo> {
1079  def : Pat<(DestInfo.VT (X86VBroadcast SrcInfo.FRC:$src)),
1080            (!cast<Instruction>(Name#DestInfo.ZSuffix#r)
1081             (SrcInfo.VT (COPY_TO_REGCLASS SrcInfo.FRC:$src, SrcInfo.RC)))>;
1082  def : Pat<(DestInfo.VT (vselect DestInfo.KRCWM:$mask,
1083                                  (X86VBroadcast SrcInfo.FRC:$src),
1084                                  DestInfo.RC:$src0)),
1085            (!cast<Instruction>(Name#DestInfo.ZSuffix#rk)
1086             DestInfo.RC:$src0, DestInfo.KRCWM:$mask,
1087             (SrcInfo.VT (COPY_TO_REGCLASS SrcInfo.FRC:$src, SrcInfo.RC)))>;
1088  def : Pat<(DestInfo.VT (vselect DestInfo.KRCWM:$mask,
1089                                  (X86VBroadcast SrcInfo.FRC:$src),
1090                                  DestInfo.ImmAllZerosV)),
1091            (!cast<Instruction>(Name#DestInfo.ZSuffix#rkz)
1092             DestInfo.KRCWM:$mask, (SrcInfo.VT (COPY_TO_REGCLASS SrcInfo.FRC:$src, SrcInfo.RC)))>;
1093}
1094
1095// Split version to allow mask and broadcast node to be different types. This
1096// helps support the 32x2 broadcasts.
1097multiclass avx512_broadcast_rm_split<bits<8> opc, string OpcodeStr,
1098                                     string Name,
1099                                     SchedWrite SchedRR, SchedWrite SchedRM,
1100                                     X86VectorVTInfo MaskInfo,
1101                                     X86VectorVTInfo DestInfo,
1102                                     X86VectorVTInfo SrcInfo,
1103                                     SDPatternOperator UnmaskedOp = X86VBroadcast> {
1104  let ExeDomain = DestInfo.ExeDomain, hasSideEffects = 0 in {
1105  defm r : AVX512_maskable_split<opc, MRMSrcReg, MaskInfo,
1106                   (outs MaskInfo.RC:$dst),
1107                   (ins SrcInfo.RC:$src), OpcodeStr, "$src", "$src",
1108                   (MaskInfo.VT
1109                    (bitconvert
1110                     (DestInfo.VT
1111                      (UnmaskedOp (SrcInfo.VT SrcInfo.RC:$src))))),
1112                   (MaskInfo.VT
1113                    (bitconvert
1114                     (DestInfo.VT
1115                      (X86VBroadcast (SrcInfo.VT SrcInfo.RC:$src)))))>,
1116                   T8PD, EVEX, Sched<[SchedRR]>;
1117  let mayLoad = 1 in
1118  defm m : AVX512_maskable_split<opc, MRMSrcMem, MaskInfo,
1119                   (outs MaskInfo.RC:$dst),
1120                   (ins SrcInfo.ScalarMemOp:$src), OpcodeStr, "$src", "$src",
1121                   (MaskInfo.VT
1122                    (bitconvert
1123                     (DestInfo.VT (UnmaskedOp
1124                                   (SrcInfo.ScalarLdFrag addr:$src))))),
1125                   (MaskInfo.VT
1126                    (bitconvert
1127                     (DestInfo.VT (X86VBroadcast
1128                                   (SrcInfo.ScalarLdFrag addr:$src)))))>,
1129                   T8PD, EVEX, EVEX_CD8<SrcInfo.EltSize, CD8VT1>,
1130                   Sched<[SchedRM]>;
1131  }
1132
1133  def : Pat<(MaskInfo.VT
1134             (bitconvert
1135              (DestInfo.VT (UnmaskedOp
1136                            (SrcInfo.VT (scalar_to_vector
1137                                         (SrcInfo.ScalarLdFrag addr:$src))))))),
1138            (!cast<Instruction>(Name#MaskInfo.ZSuffix#m) addr:$src)>;
1139  def : Pat<(MaskInfo.VT (vselect MaskInfo.KRCWM:$mask,
1140                          (bitconvert
1141                           (DestInfo.VT
1142                            (X86VBroadcast
1143                             (SrcInfo.VT (scalar_to_vector
1144                                          (SrcInfo.ScalarLdFrag addr:$src)))))),
1145                          MaskInfo.RC:$src0)),
1146            (!cast<Instruction>(Name#DestInfo.ZSuffix#mk)
1147             MaskInfo.RC:$src0, MaskInfo.KRCWM:$mask, addr:$src)>;
1148  def : Pat<(MaskInfo.VT (vselect MaskInfo.KRCWM:$mask,
1149                          (bitconvert
1150                           (DestInfo.VT
1151                            (X86VBroadcast
1152                             (SrcInfo.VT (scalar_to_vector
1153                                          (SrcInfo.ScalarLdFrag addr:$src)))))),
1154                          MaskInfo.ImmAllZerosV)),
1155            (!cast<Instruction>(Name#MaskInfo.ZSuffix#mkz)
1156             MaskInfo.KRCWM:$mask, addr:$src)>;
1157}
1158
1159// Helper class to force mask and broadcast result to same type.
1160multiclass avx512_broadcast_rm<bits<8> opc, string OpcodeStr, string Name,
1161                               SchedWrite SchedRR, SchedWrite SchedRM,
1162                               X86VectorVTInfo DestInfo,
1163                               X86VectorVTInfo SrcInfo> :
1164  avx512_broadcast_rm_split<opc, OpcodeStr, Name, SchedRR, SchedRM,
1165                            DestInfo, DestInfo, SrcInfo>;
1166
1167multiclass avx512_fp_broadcast_sd<bits<8> opc, string OpcodeStr,
1168                                                       AVX512VLVectorVTInfo _> {
1169  let Predicates = [HasAVX512] in {
1170    defm Z  : avx512_broadcast_rm<opc, OpcodeStr, NAME, WriteFShuffle256,
1171                                  WriteFShuffle256Ld, _.info512, _.info128>,
1172              avx512_broadcast_scalar<opc, OpcodeStr, NAME, _.info512,
1173                                      _.info128>,
1174              EVEX_V512;
1175  }
1176
1177  let Predicates = [HasVLX] in {
1178    defm Z256  : avx512_broadcast_rm<opc, OpcodeStr, NAME, WriteFShuffle256,
1179                                     WriteFShuffle256Ld, _.info256, _.info128>,
1180                 avx512_broadcast_scalar<opc, OpcodeStr, NAME, _.info256,
1181                                         _.info128>,
1182                 EVEX_V256;
1183  }
1184}
1185
1186multiclass avx512_fp_broadcast_ss<bits<8> opc, string OpcodeStr,
1187                                                       AVX512VLVectorVTInfo _> {
1188  let Predicates = [HasAVX512] in {
1189    defm Z  : avx512_broadcast_rm<opc, OpcodeStr, NAME, WriteFShuffle256,
1190                                  WriteFShuffle256Ld, _.info512, _.info128>,
1191              avx512_broadcast_scalar<opc, OpcodeStr, NAME, _.info512,
1192                                      _.info128>,
1193              EVEX_V512;
1194  }
1195
1196  let Predicates = [HasVLX] in {
1197    defm Z256  : avx512_broadcast_rm<opc, OpcodeStr, NAME, WriteFShuffle256,
1198                                     WriteFShuffle256Ld, _.info256, _.info128>,
1199                 avx512_broadcast_scalar<opc, OpcodeStr, NAME, _.info256,
1200                                         _.info128>,
1201                 EVEX_V256;
1202    defm Z128  : avx512_broadcast_rm<opc, OpcodeStr, NAME, WriteFShuffle256,
1203                                     WriteFShuffle256Ld, _.info128, _.info128>,
1204                 avx512_broadcast_scalar<opc, OpcodeStr, NAME, _.info128,
1205                                         _.info128>,
1206                 EVEX_V128;
1207  }
1208}
1209defm VBROADCASTSS  : avx512_fp_broadcast_ss<0x18, "vbroadcastss",
1210                                       avx512vl_f32_info>;
1211defm VBROADCASTSD  : avx512_fp_broadcast_sd<0x19, "vbroadcastsd",
1212                                       avx512vl_f64_info>, VEX_W1X;
1213
1214multiclass avx512_int_broadcast_reg<bits<8> opc, SchedWrite SchedRR,
1215                                    X86VectorVTInfo _, SDPatternOperator OpNode,
1216                                    RegisterClass SrcRC> {
1217  let ExeDomain = _.ExeDomain in
1218  defm r : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
1219                         (ins SrcRC:$src),
1220                         "vpbroadcast"##_.Suffix, "$src", "$src",
1221                         (_.VT (OpNode SrcRC:$src))>, T8PD, EVEX,
1222                         Sched<[SchedRR]>;
1223}
1224
1225multiclass avx512_int_broadcastbw_reg<bits<8> opc, string Name, SchedWrite SchedRR,
1226                                    X86VectorVTInfo _, SDPatternOperator OpNode,
1227                                    RegisterClass SrcRC, SubRegIndex Subreg> {
1228  let hasSideEffects = 0, ExeDomain = _.ExeDomain in
1229  defm r : AVX512_maskable_custom<opc, MRMSrcReg,
1230                        (outs _.RC:$dst), (ins GR32:$src),
1231                        !con((ins _.RC:$src0, _.KRCWM:$mask), (ins GR32:$src)),
1232                        !con((ins _.KRCWM:$mask), (ins GR32:$src)),
1233                        "vpbroadcast"##_.Suffix, "$src", "$src", [], [], [],
1234                        "$src0 = $dst">, T8PD, EVEX, Sched<[SchedRR]>;
1235
1236  def : Pat <(_.VT (OpNode SrcRC:$src)),
1237             (!cast<Instruction>(Name#r)
1238              (i32 (INSERT_SUBREG (i32 (IMPLICIT_DEF)), SrcRC:$src, Subreg)))>;
1239
1240  def : Pat <(vselect _.KRCWM:$mask, (_.VT (OpNode SrcRC:$src)), _.RC:$src0),
1241             (!cast<Instruction>(Name#rk) _.RC:$src0, _.KRCWM:$mask,
1242              (i32 (INSERT_SUBREG (i32 (IMPLICIT_DEF)), SrcRC:$src, Subreg)))>;
1243
1244  def : Pat <(vselect _.KRCWM:$mask, (_.VT (OpNode SrcRC:$src)), _.ImmAllZerosV),
1245             (!cast<Instruction>(Name#rkz) _.KRCWM:$mask,
1246              (i32 (INSERT_SUBREG (i32 (IMPLICIT_DEF)), SrcRC:$src, Subreg)))>;
1247}
1248
1249multiclass avx512_int_broadcastbw_reg_vl<bits<8> opc, string Name,
1250                      AVX512VLVectorVTInfo _, SDPatternOperator OpNode,
1251                      RegisterClass SrcRC, SubRegIndex Subreg, Predicate prd> {
1252  let Predicates = [prd] in
1253    defm Z : avx512_int_broadcastbw_reg<opc, Name#Z, WriteShuffle256, _.info512,
1254              OpNode, SrcRC, Subreg>, EVEX_V512;
1255  let Predicates = [prd, HasVLX] in {
1256    defm Z256 : avx512_int_broadcastbw_reg<opc, Name#Z256, WriteShuffle256,
1257              _.info256, OpNode, SrcRC, Subreg>, EVEX_V256;
1258    defm Z128 : avx512_int_broadcastbw_reg<opc, Name#Z128, WriteShuffle,
1259              _.info128, OpNode, SrcRC, Subreg>, EVEX_V128;
1260  }
1261}
1262
1263multiclass avx512_int_broadcast_reg_vl<bits<8> opc, AVX512VLVectorVTInfo _,
1264                                       SDPatternOperator OpNode,
1265                                       RegisterClass SrcRC, Predicate prd> {
1266  let Predicates = [prd] in
1267    defm Z : avx512_int_broadcast_reg<opc, WriteShuffle256, _.info512, OpNode,
1268                                      SrcRC>, EVEX_V512;
1269  let Predicates = [prd, HasVLX] in {
1270    defm Z256 : avx512_int_broadcast_reg<opc, WriteShuffle256, _.info256, OpNode,
1271                                         SrcRC>, EVEX_V256;
1272    defm Z128 : avx512_int_broadcast_reg<opc, WriteShuffle, _.info128, OpNode,
1273                                         SrcRC>, EVEX_V128;
1274  }
1275}
1276
1277defm VPBROADCASTBr : avx512_int_broadcastbw_reg_vl<0x7A, "VPBROADCASTBr",
1278                       avx512vl_i8_info, X86VBroadcast, GR8, sub_8bit, HasBWI>;
1279defm VPBROADCASTWr : avx512_int_broadcastbw_reg_vl<0x7B, "VPBROADCASTWr",
1280                       avx512vl_i16_info, X86VBroadcast, GR16, sub_16bit,
1281                       HasBWI>;
1282defm VPBROADCASTDr : avx512_int_broadcast_reg_vl<0x7C, avx512vl_i32_info,
1283                                                 X86VBroadcast, GR32, HasAVX512>;
1284defm VPBROADCASTQr : avx512_int_broadcast_reg_vl<0x7C, avx512vl_i64_info,
1285                                                 X86VBroadcast, GR64, HasAVX512>, VEX_W;
1286
1287// Provide aliases for broadcast from the same register class that
1288// automatically does the extract.
1289multiclass avx512_int_broadcast_rm_lowering<string Name,
1290                                            X86VectorVTInfo DestInfo,
1291                                            X86VectorVTInfo SrcInfo,
1292                                            X86VectorVTInfo ExtInfo> {
1293  def : Pat<(DestInfo.VT (X86VBroadcast (SrcInfo.VT SrcInfo.RC:$src))),
1294            (!cast<Instruction>(Name#DestInfo.ZSuffix#"r")
1295                (ExtInfo.VT (EXTRACT_SUBREG (SrcInfo.VT SrcInfo.RC:$src), sub_xmm)))>;
1296}
1297
1298multiclass avx512_int_broadcast_rm_vl<bits<8> opc, string OpcodeStr,
1299                                        AVX512VLVectorVTInfo _, Predicate prd> {
1300  let Predicates = [prd] in {
1301    defm Z :   avx512_broadcast_rm<opc, OpcodeStr, NAME, WriteShuffle256,
1302                                   WriteShuffle256Ld, _.info512, _.info128>,
1303               avx512_int_broadcast_rm_lowering<NAME, _.info512, _.info256, _.info128>,
1304                                  EVEX_V512;
1305    // Defined separately to avoid redefinition.
1306    defm Z_Alt : avx512_int_broadcast_rm_lowering<NAME, _.info512, _.info512, _.info128>;
1307  }
1308  let Predicates = [prd, HasVLX] in {
1309    defm Z256 : avx512_broadcast_rm<opc, OpcodeStr, NAME, WriteShuffle256,
1310                                    WriteShuffle256Ld, _.info256, _.info128>,
1311                avx512_int_broadcast_rm_lowering<NAME, _.info256, _.info256, _.info128>,
1312                                 EVEX_V256;
1313    defm Z128 : avx512_broadcast_rm<opc, OpcodeStr, NAME, WriteShuffle,
1314                                    WriteShuffleXLd, _.info128, _.info128>,
1315                                 EVEX_V128;
1316  }
1317}
1318
1319defm VPBROADCASTB  : avx512_int_broadcast_rm_vl<0x78, "vpbroadcastb",
1320                                           avx512vl_i8_info, HasBWI>;
1321defm VPBROADCASTW  : avx512_int_broadcast_rm_vl<0x79, "vpbroadcastw",
1322                                           avx512vl_i16_info, HasBWI>;
1323defm VPBROADCASTD  : avx512_int_broadcast_rm_vl<0x58, "vpbroadcastd",
1324                                           avx512vl_i32_info, HasAVX512>;
1325defm VPBROADCASTQ  : avx512_int_broadcast_rm_vl<0x59, "vpbroadcastq",
1326                                           avx512vl_i64_info, HasAVX512>, VEX_W1X;
1327
1328multiclass avx512_subvec_broadcast_rm<bits<8> opc, string OpcodeStr,
1329                          X86VectorVTInfo _Dst, X86VectorVTInfo _Src> {
1330  defm rm : AVX512_maskable<opc, MRMSrcMem, _Dst, (outs _Dst.RC:$dst),
1331                           (ins _Src.MemOp:$src), OpcodeStr, "$src", "$src",
1332                           (_Dst.VT (X86SubVBroadcast
1333                             (_Src.VT (_Src.LdFrag addr:$src))))>,
1334                           Sched<[SchedWriteShuffle.YMM.Folded]>,
1335                           AVX5128IBase, EVEX;
1336}
1337
1338// This should be used for the AVX512DQ broadcast instructions. It disables
1339// the unmasked patterns so that we only use the DQ instructions when masking
1340//  is requested.
1341multiclass avx512_subvec_broadcast_rm_dq<bits<8> opc, string OpcodeStr,
1342                          X86VectorVTInfo _Dst, X86VectorVTInfo _Src> {
1343  let hasSideEffects = 0, mayLoad = 1 in
1344  defm rm : AVX512_maskable_split<opc, MRMSrcMem, _Dst, (outs _Dst.RC:$dst),
1345                           (ins _Src.MemOp:$src), OpcodeStr, "$src", "$src",
1346                           (null_frag),
1347                           (_Dst.VT (X86SubVBroadcast
1348                             (_Src.VT (_Src.LdFrag addr:$src))))>,
1349                           Sched<[SchedWriteShuffle.YMM.Folded]>,
1350                           AVX5128IBase, EVEX;
1351}
1352
1353let Predicates = [HasAVX512] in {
1354  // 32-bit targets will fail to load a i64 directly but can use ZEXT_LOAD.
1355  def : Pat<(v8i64 (X86VBroadcast (v2i64 (X86vzload64 addr:$src)))),
1356            (VPBROADCASTQZm addr:$src)>;
1357}
1358
1359let Predicates = [HasVLX] in {
1360  // 32-bit targets will fail to load a i64 directly but can use ZEXT_LOAD.
1361  def : Pat<(v2i64 (X86VBroadcast (v2i64 (X86vzload64 addr:$src)))),
1362            (VPBROADCASTQZ128m addr:$src)>;
1363  def : Pat<(v4i64 (X86VBroadcast (v2i64 (X86vzload64 addr:$src)))),
1364            (VPBROADCASTQZ256m addr:$src)>;
1365}
1366let Predicates = [HasVLX, HasBWI] in {
1367  // loadi16 is tricky to fold, because !isTypeDesirableForOp, justifiably.
1368  // This means we'll encounter truncated i32 loads; match that here.
1369  def : Pat<(v8i16 (X86VBroadcast (i16 (trunc (i32 (load addr:$src)))))),
1370            (VPBROADCASTWZ128m addr:$src)>;
1371  def : Pat<(v16i16 (X86VBroadcast (i16 (trunc (i32 (load addr:$src)))))),
1372            (VPBROADCASTWZ256m addr:$src)>;
1373  def : Pat<(v8i16 (X86VBroadcast
1374              (i16 (trunc (i32 (extloadi16 addr:$src)))))),
1375            (VPBROADCASTWZ128m addr:$src)>;
1376  def : Pat<(v8i16 (X86VBroadcast
1377              (i16 (trunc (i32 (zextloadi16 addr:$src)))))),
1378            (VPBROADCASTWZ128m addr:$src)>;
1379  def : Pat<(v16i16 (X86VBroadcast
1380              (i16 (trunc (i32 (extloadi16 addr:$src)))))),
1381            (VPBROADCASTWZ256m addr:$src)>;
1382  def : Pat<(v16i16 (X86VBroadcast
1383              (i16 (trunc (i32 (zextloadi16 addr:$src)))))),
1384            (VPBROADCASTWZ256m addr:$src)>;
1385}
1386let Predicates = [HasBWI] in {
1387  // loadi16 is tricky to fold, because !isTypeDesirableForOp, justifiably.
1388  // This means we'll encounter truncated i32 loads; match that here.
1389  def : Pat<(v32i16 (X86VBroadcast (i16 (trunc (i32 (load addr:$src)))))),
1390            (VPBROADCASTWZm addr:$src)>;
1391  def : Pat<(v32i16 (X86VBroadcast
1392              (i16 (trunc (i32 (extloadi16 addr:$src)))))),
1393            (VPBROADCASTWZm addr:$src)>;
1394  def : Pat<(v32i16 (X86VBroadcast
1395              (i16 (trunc (i32 (zextloadi16 addr:$src)))))),
1396            (VPBROADCASTWZm addr:$src)>;
1397}
1398
1399//===----------------------------------------------------------------------===//
1400// AVX-512 BROADCAST SUBVECTORS
1401//
1402
1403defm VBROADCASTI32X4 : avx512_subvec_broadcast_rm<0x5a, "vbroadcasti32x4",
1404                       v16i32_info, v4i32x_info>,
1405                       EVEX_V512, EVEX_CD8<32, CD8VT4>;
1406defm VBROADCASTF32X4 : avx512_subvec_broadcast_rm<0x1a, "vbroadcastf32x4",
1407                       v16f32_info, v4f32x_info>,
1408                       EVEX_V512, EVEX_CD8<32, CD8VT4>;
1409defm VBROADCASTI64X4 : avx512_subvec_broadcast_rm<0x5b, "vbroadcasti64x4",
1410                       v8i64_info, v4i64x_info>, VEX_W,
1411                       EVEX_V512, EVEX_CD8<64, CD8VT4>;
1412defm VBROADCASTF64X4 : avx512_subvec_broadcast_rm<0x1b, "vbroadcastf64x4",
1413                       v8f64_info, v4f64x_info>, VEX_W,
1414                       EVEX_V512, EVEX_CD8<64, CD8VT4>;
1415
1416let Predicates = [HasAVX512] in {
1417def : Pat<(v16f32 (X86SubVBroadcast (loadv8f32 addr:$src))),
1418          (VBROADCASTF64X4rm addr:$src)>;
1419def : Pat<(v16i32 (X86SubVBroadcast (loadv8i32 addr:$src))),
1420          (VBROADCASTI64X4rm addr:$src)>;
1421def : Pat<(v32i16 (X86SubVBroadcast (loadv16i16 addr:$src))),
1422          (VBROADCASTI64X4rm addr:$src)>;
1423def : Pat<(v64i8 (X86SubVBroadcast (loadv32i8 addr:$src))),
1424          (VBROADCASTI64X4rm addr:$src)>;
1425
1426// Provide fallback in case the load node that is used in the patterns above
1427// is used by additional users, which prevents the pattern selection.
1428def : Pat<(v8f64 (X86SubVBroadcast (v4f64 VR256X:$src))),
1429          (VINSERTF64x4Zrr (INSERT_SUBREG (v8f64 (IMPLICIT_DEF)), VR256X:$src, sub_ymm),
1430                           (v4f64 VR256X:$src), 1)>;
1431def : Pat<(v16f32 (X86SubVBroadcast (v8f32 VR256X:$src))),
1432          (VINSERTF64x4Zrr (INSERT_SUBREG (v16f32 (IMPLICIT_DEF)), VR256X:$src, sub_ymm),
1433                           (v8f32 VR256X:$src), 1)>;
1434def : Pat<(v8i64 (X86SubVBroadcast (v4i64 VR256X:$src))),
1435          (VINSERTI64x4Zrr (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src, sub_ymm),
1436                           (v4i64 VR256X:$src), 1)>;
1437def : Pat<(v16i32 (X86SubVBroadcast (v8i32 VR256X:$src))),
1438          (VINSERTI64x4Zrr (INSERT_SUBREG (v16i32 (IMPLICIT_DEF)), VR256X:$src, sub_ymm),
1439                           (v8i32 VR256X:$src), 1)>;
1440def : Pat<(v32i16 (X86SubVBroadcast (v16i16 VR256X:$src))),
1441          (VINSERTI64x4Zrr (INSERT_SUBREG (v32i16 (IMPLICIT_DEF)), VR256X:$src, sub_ymm),
1442                           (v16i16 VR256X:$src), 1)>;
1443def : Pat<(v64i8 (X86SubVBroadcast (v32i8 VR256X:$src))),
1444          (VINSERTI64x4Zrr (INSERT_SUBREG (v64i8 (IMPLICIT_DEF)), VR256X:$src, sub_ymm),
1445                           (v32i8 VR256X:$src), 1)>;
1446
1447def : Pat<(v8f64 (X86SubVBroadcast (loadv2f64 addr:$src))),
1448          (VBROADCASTF32X4rm addr:$src)>;
1449def : Pat<(v8i64 (X86SubVBroadcast (loadv2i64 addr:$src))),
1450          (VBROADCASTI32X4rm addr:$src)>;
1451def : Pat<(v32i16 (X86SubVBroadcast (loadv8i16 addr:$src))),
1452          (VBROADCASTI32X4rm addr:$src)>;
1453def : Pat<(v64i8 (X86SubVBroadcast (loadv16i8 addr:$src))),
1454          (VBROADCASTI32X4rm addr:$src)>;
1455
1456// Patterns for selects of bitcasted operations.
1457def : Pat<(vselect VK16WM:$mask,
1458                   (bc_v16f32 (v8f64 (X86SubVBroadcast (loadv2f64 addr:$src)))),
1459                   (v16f32 immAllZerosV)),
1460          (VBROADCASTF32X4rmkz VK16WM:$mask, addr:$src)>;
1461def : Pat<(vselect VK16WM:$mask,
1462                   (bc_v16f32 (v8f64 (X86SubVBroadcast (loadv2f64 addr:$src)))),
1463                   VR512:$src0),
1464          (VBROADCASTF32X4rmk VR512:$src0, VK16WM:$mask, addr:$src)>;
1465def : Pat<(vselect VK16WM:$mask,
1466                   (bc_v16i32 (v8i64 (X86SubVBroadcast (loadv2i64 addr:$src)))),
1467                   (v16i32 immAllZerosV)),
1468          (VBROADCASTI32X4rmkz VK16WM:$mask, addr:$src)>;
1469def : Pat<(vselect VK16WM:$mask,
1470                   (bc_v16i32 (v8i64 (X86SubVBroadcast (loadv2i64 addr:$src)))),
1471                   VR512:$src0),
1472          (VBROADCASTI32X4rmk VR512:$src0, VK16WM:$mask, addr:$src)>;
1473
1474def : Pat<(vselect VK8WM:$mask,
1475                   (bc_v8f64 (v16f32 (X86SubVBroadcast (loadv8f32 addr:$src)))),
1476                   (v8f64 immAllZerosV)),
1477          (VBROADCASTF64X4rmkz VK8WM:$mask, addr:$src)>;
1478def : Pat<(vselect VK8WM:$mask,
1479                   (bc_v8f64 (v16f32 (X86SubVBroadcast (loadv8f32 addr:$src)))),
1480                   VR512:$src0),
1481          (VBROADCASTF64X4rmk VR512:$src0, VK8WM:$mask, addr:$src)>;
1482def : Pat<(vselect VK8WM:$mask,
1483                   (bc_v8i64 (v16i32 (X86SubVBroadcast (loadv8i32 addr:$src)))),
1484                   (v8i64 immAllZerosV)),
1485          (VBROADCASTI64X4rmkz VK8WM:$mask, addr:$src)>;
1486def : Pat<(vselect VK8WM:$mask,
1487                   (bc_v8i64 (v16i32 (X86SubVBroadcast (loadv8i32 addr:$src)))),
1488                   VR512:$src0),
1489          (VBROADCASTI64X4rmk VR512:$src0, VK8WM:$mask, addr:$src)>;
1490}
1491
1492let Predicates = [HasVLX] in {
1493defm VBROADCASTI32X4Z256 : avx512_subvec_broadcast_rm<0x5a, "vbroadcasti32x4",
1494                           v8i32x_info, v4i32x_info>,
1495                           EVEX_V256, EVEX_CD8<32, CD8VT4>;
1496defm VBROADCASTF32X4Z256 : avx512_subvec_broadcast_rm<0x1a, "vbroadcastf32x4",
1497                           v8f32x_info, v4f32x_info>,
1498                           EVEX_V256, EVEX_CD8<32, CD8VT4>;
1499
1500def : Pat<(v4f64 (X86SubVBroadcast (loadv2f64 addr:$src))),
1501          (VBROADCASTF32X4Z256rm addr:$src)>;
1502def : Pat<(v4i64 (X86SubVBroadcast (loadv2i64 addr:$src))),
1503          (VBROADCASTI32X4Z256rm addr:$src)>;
1504def : Pat<(v16i16 (X86SubVBroadcast (loadv8i16 addr:$src))),
1505          (VBROADCASTI32X4Z256rm addr:$src)>;
1506def : Pat<(v32i8 (X86SubVBroadcast (loadv16i8 addr:$src))),
1507          (VBROADCASTI32X4Z256rm addr:$src)>;
1508
1509// Patterns for selects of bitcasted operations.
1510def : Pat<(vselect VK8WM:$mask,
1511                   (bc_v8f32 (v4f64 (X86SubVBroadcast (loadv2f64 addr:$src)))),
1512                   (v8f32 immAllZerosV)),
1513          (VBROADCASTF32X4Z256rmkz VK8WM:$mask, addr:$src)>;
1514def : Pat<(vselect VK8WM:$mask,
1515                   (bc_v8f32 (v4f64 (X86SubVBroadcast (loadv2f64 addr:$src)))),
1516                   VR256X:$src0),
1517          (VBROADCASTF32X4Z256rmk VR256X:$src0, VK8WM:$mask, addr:$src)>;
1518def : Pat<(vselect VK8WM:$mask,
1519                   (bc_v8i32 (v4i64 (X86SubVBroadcast (loadv2i64 addr:$src)))),
1520                   (v8i32 immAllZerosV)),
1521          (VBROADCASTI32X4Z256rmkz VK8WM:$mask, addr:$src)>;
1522def : Pat<(vselect VK8WM:$mask,
1523                   (bc_v8i32 (v4i64 (X86SubVBroadcast (loadv2i64 addr:$src)))),
1524                   VR256X:$src0),
1525          (VBROADCASTI32X4Z256rmk VR256X:$src0, VK8WM:$mask, addr:$src)>;
1526
1527
1528// Provide fallback in case the load node that is used in the patterns above
1529// is used by additional users, which prevents the pattern selection.
1530def : Pat<(v4f64 (X86SubVBroadcast (v2f64 VR128X:$src))),
1531          (VINSERTF32x4Z256rr (INSERT_SUBREG (v4f64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
1532                              (v2f64 VR128X:$src), 1)>;
1533def : Pat<(v8f32 (X86SubVBroadcast (v4f32 VR128X:$src))),
1534          (VINSERTF32x4Z256rr (INSERT_SUBREG (v8f32 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
1535                              (v4f32 VR128X:$src), 1)>;
1536def : Pat<(v4i64 (X86SubVBroadcast (v2i64 VR128X:$src))),
1537          (VINSERTI32x4Z256rr (INSERT_SUBREG (v4i64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
1538                              (v2i64 VR128X:$src), 1)>;
1539def : Pat<(v8i32 (X86SubVBroadcast (v4i32 VR128X:$src))),
1540          (VINSERTI32x4Z256rr (INSERT_SUBREG (v8i32 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
1541                              (v4i32 VR128X:$src), 1)>;
1542def : Pat<(v16i16 (X86SubVBroadcast (v8i16 VR128X:$src))),
1543          (VINSERTI32x4Z256rr (INSERT_SUBREG (v16i16 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
1544                              (v8i16 VR128X:$src), 1)>;
1545def : Pat<(v32i8 (X86SubVBroadcast (v16i8 VR128X:$src))),
1546          (VINSERTI32x4Z256rr (INSERT_SUBREG (v32i8 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
1547                              (v16i8 VR128X:$src), 1)>;
1548}
1549
1550let Predicates = [HasVLX, HasDQI] in {
1551defm VBROADCASTI64X2Z128 : avx512_subvec_broadcast_rm_dq<0x5a, "vbroadcasti64x2",
1552                           v4i64x_info, v2i64x_info>, VEX_W1X,
1553                           EVEX_V256, EVEX_CD8<64, CD8VT2>;
1554defm VBROADCASTF64X2Z128 : avx512_subvec_broadcast_rm_dq<0x1a, "vbroadcastf64x2",
1555                           v4f64x_info, v2f64x_info>, VEX_W1X,
1556                           EVEX_V256, EVEX_CD8<64, CD8VT2>;
1557
1558// Patterns for selects of bitcasted operations.
1559def : Pat<(vselect VK4WM:$mask,
1560                   (bc_v4f64 (v8f32 (X86SubVBroadcast (loadv4f32 addr:$src)))),
1561                   (v4f64 immAllZerosV)),
1562          (VBROADCASTF64X2Z128rmkz VK4WM:$mask, addr:$src)>;
1563def : Pat<(vselect VK4WM:$mask,
1564                   (bc_v4f64 (v8f32 (X86SubVBroadcast (loadv4f32 addr:$src)))),
1565                   VR256X:$src0),
1566          (VBROADCASTF64X2Z128rmk VR256X:$src0, VK4WM:$mask, addr:$src)>;
1567def : Pat<(vselect VK4WM:$mask,
1568                   (bc_v4i64 (v8i32 (X86SubVBroadcast (loadv4i32 addr:$src)))),
1569                   (v4i64 immAllZerosV)),
1570          (VBROADCASTI64X2Z128rmkz VK4WM:$mask, addr:$src)>;
1571def : Pat<(vselect VK4WM:$mask,
1572                   (bc_v4i64 (v8i32 (X86SubVBroadcast (loadv4i32 addr:$src)))),
1573                   VR256X:$src0),
1574          (VBROADCASTI64X2Z128rmk VR256X:$src0, VK4WM:$mask, addr:$src)>;
1575}
1576
1577let Predicates = [HasDQI] in {
1578defm VBROADCASTI64X2 : avx512_subvec_broadcast_rm_dq<0x5a, "vbroadcasti64x2",
1579                       v8i64_info, v2i64x_info>, VEX_W,
1580                       EVEX_V512, EVEX_CD8<64, CD8VT2>;
1581defm VBROADCASTI32X8 : avx512_subvec_broadcast_rm_dq<0x5b, "vbroadcasti32x8",
1582                       v16i32_info, v8i32x_info>,
1583                       EVEX_V512, EVEX_CD8<32, CD8VT8>;
1584defm VBROADCASTF64X2 : avx512_subvec_broadcast_rm_dq<0x1a, "vbroadcastf64x2",
1585                       v8f64_info, v2f64x_info>, VEX_W,
1586                       EVEX_V512, EVEX_CD8<64, CD8VT2>;
1587defm VBROADCASTF32X8 : avx512_subvec_broadcast_rm_dq<0x1b, "vbroadcastf32x8",
1588                       v16f32_info, v8f32x_info>,
1589                       EVEX_V512, EVEX_CD8<32, CD8VT8>;
1590
1591// Patterns for selects of bitcasted operations.
1592def : Pat<(vselect VK16WM:$mask,
1593                   (bc_v16f32 (v8f64 (X86SubVBroadcast (loadv4f64 addr:$src)))),
1594                   (v16f32 immAllZerosV)),
1595          (VBROADCASTF32X8rmkz VK16WM:$mask, addr:$src)>;
1596def : Pat<(vselect VK16WM:$mask,
1597                   (bc_v16f32 (v8f64 (X86SubVBroadcast (loadv4f64 addr:$src)))),
1598                   VR512:$src0),
1599          (VBROADCASTF32X8rmk VR512:$src0, VK16WM:$mask, addr:$src)>;
1600def : Pat<(vselect VK16WM:$mask,
1601                   (bc_v16i32 (v8i64 (X86SubVBroadcast (loadv4i64 addr:$src)))),
1602                   (v16i32 immAllZerosV)),
1603          (VBROADCASTI32X8rmkz VK16WM:$mask, addr:$src)>;
1604def : Pat<(vselect VK16WM:$mask,
1605                   (bc_v16i32 (v8i64 (X86SubVBroadcast (loadv4i64 addr:$src)))),
1606                   VR512:$src0),
1607          (VBROADCASTI32X8rmk VR512:$src0, VK16WM:$mask, addr:$src)>;
1608
1609def : Pat<(vselect VK8WM:$mask,
1610                   (bc_v8f64 (v16f32 (X86SubVBroadcast (loadv4f32 addr:$src)))),
1611                   (v8f64 immAllZerosV)),
1612          (VBROADCASTF64X2rmkz VK8WM:$mask, addr:$src)>;
1613def : Pat<(vselect VK8WM:$mask,
1614                   (bc_v8f64 (v16f32 (X86SubVBroadcast (loadv4f32 addr:$src)))),
1615                   VR512:$src0),
1616          (VBROADCASTF64X2rmk VR512:$src0, VK8WM:$mask, addr:$src)>;
1617def : Pat<(vselect VK8WM:$mask,
1618                   (bc_v8i64 (v16i32 (X86SubVBroadcast (loadv4i32 addr:$src)))),
1619                   (v8i64 immAllZerosV)),
1620          (VBROADCASTI64X2rmkz VK8WM:$mask, addr:$src)>;
1621def : Pat<(vselect VK8WM:$mask,
1622                   (bc_v8i64 (v16i32 (X86SubVBroadcast (loadv4i32 addr:$src)))),
1623                   VR512:$src0),
1624          (VBROADCASTI64X2rmk VR512:$src0, VK8WM:$mask, addr:$src)>;
1625}
1626
1627multiclass avx512_common_broadcast_32x2<bits<8> opc, string OpcodeStr,
1628                         AVX512VLVectorVTInfo _Dst, AVX512VLVectorVTInfo _Src> {
1629  let Predicates = [HasDQI] in
1630    defm Z :    avx512_broadcast_rm_split<opc, OpcodeStr, NAME, WriteShuffle256,
1631                                          WriteShuffle256Ld, _Dst.info512,
1632                                          _Src.info512, _Src.info128, null_frag>,
1633                                          EVEX_V512;
1634  let Predicates = [HasDQI, HasVLX] in
1635    defm Z256 : avx512_broadcast_rm_split<opc, OpcodeStr, NAME, WriteShuffle256,
1636                                          WriteShuffle256Ld, _Dst.info256,
1637                                          _Src.info256, _Src.info128, null_frag>,
1638                                          EVEX_V256;
1639}
1640
1641multiclass avx512_common_broadcast_i32x2<bits<8> opc, string OpcodeStr,
1642                         AVX512VLVectorVTInfo _Dst, AVX512VLVectorVTInfo _Src> :
1643  avx512_common_broadcast_32x2<opc, OpcodeStr, _Dst, _Src> {
1644
1645  let Predicates = [HasDQI, HasVLX] in
1646    defm Z128 : avx512_broadcast_rm_split<opc, OpcodeStr, NAME, WriteShuffle,
1647                                          WriteShuffleXLd, _Dst.info128,
1648                                          _Src.info128, _Src.info128, null_frag>,
1649                                          EVEX_V128;
1650}
1651
1652defm VBROADCASTI32X2  : avx512_common_broadcast_i32x2<0x59, "vbroadcasti32x2",
1653                                          avx512vl_i32_info, avx512vl_i64_info>;
1654defm VBROADCASTF32X2  : avx512_common_broadcast_32x2<0x19, "vbroadcastf32x2",
1655                                          avx512vl_f32_info, avx512vl_f64_info>;
1656
1657let Predicates = [HasVLX] in {
1658def : Pat<(v8f32 (X86VBroadcast (v8f32 VR256X:$src))),
1659          (VBROADCASTSSZ256r (v4f32 (EXTRACT_SUBREG (v8f32 VR256X:$src), sub_xmm)))>;
1660def : Pat<(v4f64 (X86VBroadcast (v4f64 VR256X:$src))),
1661          (VBROADCASTSDZ256r (v2f64 (EXTRACT_SUBREG (v4f64 VR256X:$src), sub_xmm)))>;
1662}
1663
1664def : Pat<(v16f32 (X86VBroadcast (v16f32 VR512:$src))),
1665          (VBROADCASTSSZr (v4f32 (EXTRACT_SUBREG (v16f32 VR512:$src), sub_xmm)))>;
1666def : Pat<(v16f32 (X86VBroadcast (v8f32 VR256X:$src))),
1667          (VBROADCASTSSZr (v4f32 (EXTRACT_SUBREG (v8f32 VR256X:$src), sub_xmm)))>;
1668
1669def : Pat<(v8f64 (X86VBroadcast (v8f64 VR512:$src))),
1670          (VBROADCASTSDZr (v2f64 (EXTRACT_SUBREG (v8f64 VR512:$src), sub_xmm)))>;
1671def : Pat<(v8f64 (X86VBroadcast (v4f64 VR256X:$src))),
1672          (VBROADCASTSDZr (v2f64 (EXTRACT_SUBREG (v4f64 VR256X:$src), sub_xmm)))>;
1673
1674//===----------------------------------------------------------------------===//
1675// AVX-512 BROADCAST MASK TO VECTOR REGISTER
1676//---
1677multiclass avx512_mask_broadcastm<bits<8> opc, string OpcodeStr,
1678                                  X86VectorVTInfo _, RegisterClass KRC> {
1679  def rr : AVX512XS8I<opc, MRMSrcReg, (outs _.RC:$dst), (ins KRC:$src),
1680                  !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
1681                  [(set _.RC:$dst, (_.VT (X86VBroadcastm KRC:$src)))]>,
1682                  EVEX, Sched<[WriteShuffle]>;
1683}
1684
1685multiclass avx512_mask_broadcast<bits<8> opc, string OpcodeStr,
1686                                 AVX512VLVectorVTInfo VTInfo, RegisterClass KRC> {
1687  let Predicates = [HasCDI] in
1688    defm Z : avx512_mask_broadcastm<opc, OpcodeStr, VTInfo.info512, KRC>, EVEX_V512;
1689  let Predicates = [HasCDI, HasVLX] in {
1690    defm Z256 : avx512_mask_broadcastm<opc, OpcodeStr, VTInfo.info256, KRC>, EVEX_V256;
1691    defm Z128 : avx512_mask_broadcastm<opc, OpcodeStr, VTInfo.info128, KRC>, EVEX_V128;
1692  }
1693}
1694
1695defm VPBROADCASTMW2D : avx512_mask_broadcast<0x3A, "vpbroadcastmw2d",
1696                                               avx512vl_i32_info, VK16>;
1697defm VPBROADCASTMB2Q : avx512_mask_broadcast<0x2A, "vpbroadcastmb2q",
1698                                               avx512vl_i64_info, VK8>, VEX_W;
1699
1700//===----------------------------------------------------------------------===//
1701// -- VPERMI2 - 3 source operands form --
1702multiclass avx512_perm_i<bits<8> opc, string OpcodeStr,
1703                         X86FoldableSchedWrite sched,
1704                         X86VectorVTInfo _, X86VectorVTInfo IdxVT> {
1705let Constraints = "$src1 = $dst", ExeDomain = _.ExeDomain,
1706    hasSideEffects = 0 in {
1707  defm rr: AVX512_maskable_3src_cast<opc, MRMSrcReg, _, IdxVT, (outs _.RC:$dst),
1708          (ins _.RC:$src2, _.RC:$src3),
1709          OpcodeStr, "$src3, $src2", "$src2, $src3",
1710          (_.VT (X86VPermt2 _.RC:$src2, IdxVT.RC:$src1, _.RC:$src3)), 1>,
1711          EVEX_4V, AVX5128IBase, Sched<[sched]>;
1712
1713  let mayLoad = 1 in
1714  defm rm: AVX512_maskable_3src_cast<opc, MRMSrcMem, _, IdxVT, (outs _.RC:$dst),
1715            (ins _.RC:$src2, _.MemOp:$src3),
1716            OpcodeStr, "$src3, $src2", "$src2, $src3",
1717            (_.VT (X86VPermt2 _.RC:$src2, IdxVT.RC:$src1,
1718                   (_.VT (_.LdFrag addr:$src3)))), 1>,
1719            EVEX_4V, AVX5128IBase, Sched<[sched.Folded, sched.ReadAfterFold]>;
1720  }
1721}
1722
1723multiclass avx512_perm_i_mb<bits<8> opc, string OpcodeStr,
1724                            X86FoldableSchedWrite sched,
1725                            X86VectorVTInfo _, X86VectorVTInfo IdxVT> {
1726  let Constraints = "$src1 = $dst", ExeDomain = _.ExeDomain,
1727      hasSideEffects = 0, mayLoad = 1 in
1728  defm rmb: AVX512_maskable_3src_cast<opc, MRMSrcMem, _, IdxVT, (outs _.RC:$dst),
1729              (ins _.RC:$src2, _.ScalarMemOp:$src3),
1730              OpcodeStr,   !strconcat("${src3}", _.BroadcastStr,", $src2"),
1731              !strconcat("$src2, ${src3}", _.BroadcastStr ),
1732              (_.VT (X86VPermt2 _.RC:$src2,
1733               IdxVT.RC:$src1,(_.VT (X86VBroadcast (_.ScalarLdFrag addr:$src3))))), 1>,
1734              AVX5128IBase, EVEX_4V, EVEX_B,
1735              Sched<[sched.Folded, sched.ReadAfterFold]>;
1736}
1737
1738multiclass avx512_perm_i_sizes<bits<8> opc, string OpcodeStr,
1739                               X86FoldableSchedWrite sched,
1740                               AVX512VLVectorVTInfo VTInfo,
1741                               AVX512VLVectorVTInfo ShuffleMask> {
1742  defm NAME: avx512_perm_i<opc, OpcodeStr, sched, VTInfo.info512,
1743                           ShuffleMask.info512>,
1744            avx512_perm_i_mb<opc, OpcodeStr, sched, VTInfo.info512,
1745                             ShuffleMask.info512>, EVEX_V512;
1746  let Predicates = [HasVLX] in {
1747  defm NAME#128: avx512_perm_i<opc, OpcodeStr, sched, VTInfo.info128,
1748                               ShuffleMask.info128>,
1749                 avx512_perm_i_mb<opc, OpcodeStr, sched, VTInfo.info128,
1750                                  ShuffleMask.info128>, EVEX_V128;
1751  defm NAME#256: avx512_perm_i<opc, OpcodeStr, sched, VTInfo.info256,
1752                               ShuffleMask.info256>,
1753                 avx512_perm_i_mb<opc, OpcodeStr, sched, VTInfo.info256,
1754                                  ShuffleMask.info256>, EVEX_V256;
1755  }
1756}
1757
1758multiclass avx512_perm_i_sizes_bw<bits<8> opc, string OpcodeStr,
1759                                  X86FoldableSchedWrite sched,
1760                                  AVX512VLVectorVTInfo VTInfo,
1761                                  AVX512VLVectorVTInfo Idx,
1762                                  Predicate Prd> {
1763  let Predicates = [Prd] in
1764  defm NAME: avx512_perm_i<opc, OpcodeStr, sched, VTInfo.info512,
1765                           Idx.info512>, EVEX_V512;
1766  let Predicates = [Prd, HasVLX] in {
1767  defm NAME#128: avx512_perm_i<opc, OpcodeStr, sched, VTInfo.info128,
1768                               Idx.info128>, EVEX_V128;
1769  defm NAME#256: avx512_perm_i<opc, OpcodeStr, sched, VTInfo.info256,
1770                               Idx.info256>,  EVEX_V256;
1771  }
1772}
1773
1774defm VPERMI2D  : avx512_perm_i_sizes<0x76, "vpermi2d", WriteVarShuffle256,
1775                  avx512vl_i32_info, avx512vl_i32_info>, EVEX_CD8<32, CD8VF>;
1776defm VPERMI2Q  : avx512_perm_i_sizes<0x76, "vpermi2q", WriteVarShuffle256,
1777                  avx512vl_i64_info, avx512vl_i64_info>, VEX_W, EVEX_CD8<64, CD8VF>;
1778defm VPERMI2W  : avx512_perm_i_sizes_bw<0x75, "vpermi2w", WriteVarShuffle256,
1779                  avx512vl_i16_info, avx512vl_i16_info, HasBWI>,
1780                  VEX_W, EVEX_CD8<16, CD8VF>;
1781defm VPERMI2B  : avx512_perm_i_sizes_bw<0x75, "vpermi2b", WriteVarShuffle256,
1782                  avx512vl_i8_info, avx512vl_i8_info, HasVBMI>,
1783                  EVEX_CD8<8, CD8VF>;
1784defm VPERMI2PS : avx512_perm_i_sizes<0x77, "vpermi2ps", WriteFVarShuffle256,
1785                  avx512vl_f32_info, avx512vl_i32_info>, EVEX_CD8<32, CD8VF>;
1786defm VPERMI2PD : avx512_perm_i_sizes<0x77, "vpermi2pd", WriteFVarShuffle256,
1787                  avx512vl_f64_info, avx512vl_i64_info>, VEX_W, EVEX_CD8<64, CD8VF>;
1788
1789// Extra patterns to deal with extra bitcasts due to passthru and index being
1790// different types on the fp versions.
1791multiclass avx512_perm_i_lowering<string InstrStr, X86VectorVTInfo _,
1792                                  X86VectorVTInfo IdxVT,
1793                                  X86VectorVTInfo CastVT> {
1794  def : Pat<(_.VT (vselect _.KRCWM:$mask,
1795                             (X86VPermt2 (_.VT _.RC:$src2),
1796                                         (IdxVT.VT (bitconvert (CastVT.VT _.RC:$src1))), _.RC:$src3),
1797                             (_.VT (bitconvert (CastVT.VT _.RC:$src1))))),
1798            (!cast<Instruction>(InstrStr#"rrk") _.RC:$src1, _.KRCWM:$mask,
1799                                                _.RC:$src2, _.RC:$src3)>;
1800  def : Pat<(_.VT (vselect _.KRCWM:$mask,
1801                             (X86VPermt2 _.RC:$src2,
1802                                         (IdxVT.VT (bitconvert  (CastVT.VT _.RC:$src1))),
1803                                         (_.LdFrag addr:$src3)),
1804                             (_.VT (bitconvert  (CastVT.VT _.RC:$src1))))),
1805            (!cast<Instruction>(InstrStr#"rmk") _.RC:$src1, _.KRCWM:$mask,
1806                                                _.RC:$src2, addr:$src3)>;
1807  def : Pat<(_.VT (vselect _.KRCWM:$mask,
1808                             (X86VPermt2 _.RC:$src2,
1809                                         (IdxVT.VT (bitconvert  (CastVT.VT _.RC:$src1))),
1810                                         (X86VBroadcast (_.ScalarLdFrag addr:$src3))),
1811                             (_.VT (bitconvert  (CastVT.VT _.RC:$src1))))),
1812            (!cast<Instruction>(InstrStr#"rmbk") _.RC:$src1, _.KRCWM:$mask,
1813                                                 _.RC:$src2, addr:$src3)>;
1814}
1815
1816// TODO: Should we add more casts? The vXi64 case is common due to ABI.
1817defm : avx512_perm_i_lowering<"VPERMI2PS", v16f32_info, v16i32_info, v8i64_info>;
1818defm : avx512_perm_i_lowering<"VPERMI2PS256", v8f32x_info, v8i32x_info, v4i64x_info>;
1819defm : avx512_perm_i_lowering<"VPERMI2PS128", v4f32x_info, v4i32x_info, v2i64x_info>;
1820
1821// VPERMT2
1822multiclass avx512_perm_t<bits<8> opc, string OpcodeStr,
1823                         X86FoldableSchedWrite sched,
1824                         X86VectorVTInfo _, X86VectorVTInfo IdxVT> {
1825let Constraints = "$src1 = $dst", ExeDomain = _.ExeDomain in {
1826  defm rr: AVX512_maskable_3src<opc, MRMSrcReg, _, (outs _.RC:$dst),
1827          (ins IdxVT.RC:$src2, _.RC:$src3),
1828          OpcodeStr, "$src3, $src2", "$src2, $src3",
1829          (_.VT (X86VPermt2 _.RC:$src1, IdxVT.RC:$src2, _.RC:$src3)), 1>,
1830          EVEX_4V, AVX5128IBase, Sched<[sched]>;
1831
1832  defm rm: AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst),
1833            (ins IdxVT.RC:$src2, _.MemOp:$src3),
1834            OpcodeStr, "$src3, $src2", "$src2, $src3",
1835            (_.VT (X86VPermt2 _.RC:$src1, IdxVT.RC:$src2,
1836                   (_.LdFrag addr:$src3))), 1>,
1837            EVEX_4V, AVX5128IBase, Sched<[sched.Folded, sched.ReadAfterFold]>;
1838  }
1839}
1840multiclass avx512_perm_t_mb<bits<8> opc, string OpcodeStr,
1841                            X86FoldableSchedWrite sched,
1842                            X86VectorVTInfo _, X86VectorVTInfo IdxVT> {
1843  let Constraints = "$src1 = $dst", ExeDomain = _.ExeDomain in
1844  defm rmb: AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst),
1845              (ins IdxVT.RC:$src2, _.ScalarMemOp:$src3),
1846              OpcodeStr,   !strconcat("${src3}", _.BroadcastStr,", $src2"),
1847              !strconcat("$src2, ${src3}", _.BroadcastStr ),
1848              (_.VT (X86VPermt2 _.RC:$src1,
1849               IdxVT.RC:$src2,(_.VT (X86VBroadcast (_.ScalarLdFrag addr:$src3))))), 1>,
1850              AVX5128IBase, EVEX_4V, EVEX_B,
1851              Sched<[sched.Folded, sched.ReadAfterFold]>;
1852}
1853
1854multiclass avx512_perm_t_sizes<bits<8> opc, string OpcodeStr,
1855                               X86FoldableSchedWrite sched,
1856                               AVX512VLVectorVTInfo VTInfo,
1857                               AVX512VLVectorVTInfo ShuffleMask> {
1858  defm NAME: avx512_perm_t<opc, OpcodeStr, sched, VTInfo.info512,
1859                              ShuffleMask.info512>,
1860            avx512_perm_t_mb<opc, OpcodeStr, sched, VTInfo.info512,
1861                              ShuffleMask.info512>, EVEX_V512;
1862  let Predicates = [HasVLX] in {
1863  defm NAME#128: avx512_perm_t<opc, OpcodeStr, sched, VTInfo.info128,
1864                              ShuffleMask.info128>,
1865                 avx512_perm_t_mb<opc, OpcodeStr, sched, VTInfo.info128,
1866                              ShuffleMask.info128>, EVEX_V128;
1867  defm NAME#256: avx512_perm_t<opc, OpcodeStr, sched, VTInfo.info256,
1868                              ShuffleMask.info256>,
1869                 avx512_perm_t_mb<opc, OpcodeStr, sched, VTInfo.info256,
1870                              ShuffleMask.info256>, EVEX_V256;
1871  }
1872}
1873
1874multiclass avx512_perm_t_sizes_bw<bits<8> opc, string OpcodeStr,
1875                                  X86FoldableSchedWrite sched,
1876                                  AVX512VLVectorVTInfo VTInfo,
1877                                  AVX512VLVectorVTInfo Idx, Predicate Prd> {
1878  let Predicates = [Prd] in
1879  defm NAME: avx512_perm_t<opc, OpcodeStr, sched, VTInfo.info512,
1880                           Idx.info512>, EVEX_V512;
1881  let Predicates = [Prd, HasVLX] in {
1882  defm NAME#128: avx512_perm_t<opc, OpcodeStr, sched, VTInfo.info128,
1883                               Idx.info128>, EVEX_V128;
1884  defm NAME#256: avx512_perm_t<opc, OpcodeStr, sched, VTInfo.info256,
1885                               Idx.info256>, EVEX_V256;
1886  }
1887}
1888
1889defm VPERMT2D  : avx512_perm_t_sizes<0x7E, "vpermt2d", WriteVarShuffle256,
1890                  avx512vl_i32_info, avx512vl_i32_info>, EVEX_CD8<32, CD8VF>;
1891defm VPERMT2Q  : avx512_perm_t_sizes<0x7E, "vpermt2q", WriteVarShuffle256,
1892                  avx512vl_i64_info, avx512vl_i64_info>, VEX_W, EVEX_CD8<64, CD8VF>;
1893defm VPERMT2W  : avx512_perm_t_sizes_bw<0x7D, "vpermt2w", WriteVarShuffle256,
1894                  avx512vl_i16_info, avx512vl_i16_info, HasBWI>,
1895                  VEX_W, EVEX_CD8<16, CD8VF>;
1896defm VPERMT2B  : avx512_perm_t_sizes_bw<0x7D, "vpermt2b", WriteVarShuffle256,
1897                  avx512vl_i8_info, avx512vl_i8_info, HasVBMI>,
1898                  EVEX_CD8<8, CD8VF>;
1899defm VPERMT2PS : avx512_perm_t_sizes<0x7F, "vpermt2ps", WriteFVarShuffle256,
1900                  avx512vl_f32_info, avx512vl_i32_info>, EVEX_CD8<32, CD8VF>;
1901defm VPERMT2PD : avx512_perm_t_sizes<0x7F, "vpermt2pd", WriteFVarShuffle256,
1902                  avx512vl_f64_info, avx512vl_i64_info>, VEX_W, EVEX_CD8<64, CD8VF>;
1903
1904//===----------------------------------------------------------------------===//
1905// AVX-512 - BLEND using mask
1906//
1907
1908multiclass WriteFVarBlendask<bits<8> opc, string OpcodeStr,
1909                             X86FoldableSchedWrite sched, X86VectorVTInfo _> {
1910  let ExeDomain = _.ExeDomain, hasSideEffects = 0 in {
1911  def rr : AVX5128I<opc, MRMSrcReg, (outs _.RC:$dst),
1912             (ins _.RC:$src1, _.RC:$src2),
1913             !strconcat(OpcodeStr,
1914             "\t{$src2, $src1, ${dst}|${dst}, $src1, $src2}"), []>,
1915             EVEX_4V, Sched<[sched]>;
1916  def rrk : AVX5128I<opc, MRMSrcReg, (outs _.RC:$dst),
1917             (ins _.KRCWM:$mask, _.RC:$src1, _.RC:$src2),
1918             !strconcat(OpcodeStr,
1919             "\t{$src2, $src1, ${dst} {${mask}}|${dst} {${mask}}, $src1, $src2}"),
1920             []>, EVEX_4V, EVEX_K, Sched<[sched]>;
1921  def rrkz : AVX5128I<opc, MRMSrcReg, (outs _.RC:$dst),
1922             (ins _.KRCWM:$mask, _.RC:$src1, _.RC:$src2),
1923             !strconcat(OpcodeStr,
1924             "\t{$src2, $src1, ${dst} {${mask}} {z}|${dst} {${mask}} {z}, $src1, $src2}"),
1925             []>, EVEX_4V, EVEX_KZ, Sched<[sched]>, NotMemoryFoldable;
1926  let mayLoad = 1 in {
1927  def rm  : AVX5128I<opc, MRMSrcMem, (outs _.RC:$dst),
1928             (ins _.RC:$src1, _.MemOp:$src2),
1929             !strconcat(OpcodeStr,
1930             "\t{$src2, $src1, ${dst}|${dst}, $src1, $src2}"),
1931             []>, EVEX_4V, EVEX_CD8<_.EltSize, CD8VF>,
1932             Sched<[sched.Folded, sched.ReadAfterFold]>;
1933  def rmk : AVX5128I<opc, MRMSrcMem, (outs _.RC:$dst),
1934             (ins _.KRCWM:$mask, _.RC:$src1, _.MemOp:$src2),
1935             !strconcat(OpcodeStr,
1936             "\t{$src2, $src1, ${dst} {${mask}}|${dst} {${mask}}, $src1, $src2}"),
1937             []>, EVEX_4V, EVEX_K, EVEX_CD8<_.EltSize, CD8VF>,
1938             Sched<[sched.Folded, sched.ReadAfterFold]>;
1939  def rmkz : AVX5128I<opc, MRMSrcMem, (outs _.RC:$dst),
1940             (ins _.KRCWM:$mask, _.RC:$src1, _.MemOp:$src2),
1941             !strconcat(OpcodeStr,
1942             "\t{$src2, $src1, ${dst} {${mask}} {z}|${dst} {${mask}} {z}, $src1, $src2}"),
1943             []>, EVEX_4V, EVEX_KZ, EVEX_CD8<_.EltSize, CD8VF>,
1944             Sched<[sched.Folded, sched.ReadAfterFold]>, NotMemoryFoldable;
1945  }
1946  }
1947}
1948multiclass WriteFVarBlendask_rmb<bits<8> opc, string OpcodeStr,
1949                                 X86FoldableSchedWrite sched, X86VectorVTInfo _> {
1950  let mayLoad = 1, hasSideEffects = 0 in {
1951  def rmbk : AVX5128I<opc, MRMSrcMem, (outs _.RC:$dst),
1952      (ins _.KRCWM:$mask, _.RC:$src1, _.ScalarMemOp:$src2),
1953       !strconcat(OpcodeStr,
1954            "\t{${src2}", _.BroadcastStr, ", $src1, $dst {${mask}}|",
1955            "$dst {${mask}}, $src1, ${src2}", _.BroadcastStr, "}"), []>,
1956      EVEX_4V, EVEX_K, EVEX_B, EVEX_CD8<_.EltSize, CD8VF>,
1957      Sched<[sched.Folded, sched.ReadAfterFold]>;
1958
1959  def rmbkz : AVX5128I<opc, MRMSrcMem, (outs _.RC:$dst),
1960      (ins _.KRCWM:$mask, _.RC:$src1, _.ScalarMemOp:$src2),
1961       !strconcat(OpcodeStr,
1962            "\t{${src2}", _.BroadcastStr, ", $src1, $dst {${mask}} {z}|",
1963            "$dst {${mask}} {z}, $src1, ${src2}", _.BroadcastStr, "}"), []>,
1964      EVEX_4V, EVEX_KZ, EVEX_B, EVEX_CD8<_.EltSize, CD8VF>,
1965      Sched<[sched.Folded, sched.ReadAfterFold]>, NotMemoryFoldable;
1966
1967  def rmb : AVX5128I<opc, MRMSrcMem, (outs _.RC:$dst),
1968      (ins _.RC:$src1, _.ScalarMemOp:$src2),
1969       !strconcat(OpcodeStr,
1970            "\t{${src2}", _.BroadcastStr, ", $src1, $dst|",
1971            "$dst, $src1, ${src2}", _.BroadcastStr, "}"), []>,
1972      EVEX_4V, EVEX_B, EVEX_CD8<_.EltSize, CD8VF>,
1973      Sched<[sched.Folded, sched.ReadAfterFold]>;
1974  }
1975}
1976
1977multiclass blendmask_dq<bits<8> opc, string OpcodeStr, X86SchedWriteWidths sched,
1978                        AVX512VLVectorVTInfo VTInfo> {
1979  defm Z : WriteFVarBlendask<opc, OpcodeStr, sched.ZMM, VTInfo.info512>,
1980           WriteFVarBlendask_rmb<opc, OpcodeStr, sched.ZMM, VTInfo.info512>,
1981                                 EVEX_V512;
1982
1983  let Predicates = [HasVLX] in {
1984    defm Z256 : WriteFVarBlendask<opc, OpcodeStr, sched.YMM, VTInfo.info256>,
1985                WriteFVarBlendask_rmb<opc, OpcodeStr, sched.YMM, VTInfo.info256>,
1986                                      EVEX_V256;
1987    defm Z128 : WriteFVarBlendask<opc, OpcodeStr, sched.XMM, VTInfo.info128>,
1988                WriteFVarBlendask_rmb<opc, OpcodeStr, sched.XMM, VTInfo.info128>,
1989                                      EVEX_V128;
1990  }
1991}
1992
1993multiclass blendmask_bw<bits<8> opc, string OpcodeStr, X86SchedWriteWidths sched,
1994                        AVX512VLVectorVTInfo VTInfo> {
1995  let Predicates = [HasBWI] in
1996    defm Z : WriteFVarBlendask<opc, OpcodeStr, sched.ZMM, VTInfo.info512>,
1997                               EVEX_V512;
1998
1999  let Predicates = [HasBWI, HasVLX] in {
2000    defm Z256 : WriteFVarBlendask<opc, OpcodeStr, sched.YMM, VTInfo.info256>,
2001                                  EVEX_V256;
2002    defm Z128 : WriteFVarBlendask<opc, OpcodeStr, sched.XMM, VTInfo.info128>,
2003                                  EVEX_V128;
2004  }
2005}
2006
2007defm VBLENDMPS : blendmask_dq<0x65, "vblendmps", SchedWriteFVarBlend,
2008                              avx512vl_f32_info>;
2009defm VBLENDMPD : blendmask_dq<0x65, "vblendmpd", SchedWriteFVarBlend,
2010                              avx512vl_f64_info>, VEX_W;
2011defm VPBLENDMD : blendmask_dq<0x64, "vpblendmd", SchedWriteVarBlend,
2012                              avx512vl_i32_info>;
2013defm VPBLENDMQ : blendmask_dq<0x64, "vpblendmq", SchedWriteVarBlend,
2014                              avx512vl_i64_info>, VEX_W;
2015defm VPBLENDMB : blendmask_bw<0x66, "vpblendmb", SchedWriteVarBlend,
2016                              avx512vl_i8_info>;
2017defm VPBLENDMW : blendmask_bw<0x66, "vpblendmw", SchedWriteVarBlend,
2018                              avx512vl_i16_info>, VEX_W;
2019
2020//===----------------------------------------------------------------------===//
2021// Compare Instructions
2022//===----------------------------------------------------------------------===//
2023
2024// avx512_cmp_scalar - AVX512 CMPSS and CMPSD
2025
2026multiclass avx512_cmp_scalar<X86VectorVTInfo _, SDNode OpNode, SDNode OpNodeSAE,
2027                             PatFrag OpNode_su, PatFrag OpNodeSAE_su,
2028                             X86FoldableSchedWrite sched> {
2029  defm  rr_Int  : AVX512_maskable_cmp<0xC2, MRMSrcReg, _,
2030                      (outs _.KRC:$dst),
2031                      (ins _.RC:$src1, _.RC:$src2, u8imm:$cc),
2032                      "vcmp"#_.Suffix,
2033                      "$cc, $src2, $src1", "$src1, $src2, $cc",
2034                      (OpNode (_.VT _.RC:$src1), (_.VT _.RC:$src2), imm:$cc),
2035                      (OpNode_su (_.VT _.RC:$src1), (_.VT _.RC:$src2),
2036                                 imm:$cc)>, EVEX_4V, VEX_LIG, Sched<[sched]>;
2037  let mayLoad = 1 in
2038  defm  rm_Int  : AVX512_maskable_cmp<0xC2, MRMSrcMem, _,
2039                    (outs _.KRC:$dst),
2040                    (ins _.RC:$src1, _.IntScalarMemOp:$src2, u8imm:$cc),
2041                    "vcmp"#_.Suffix,
2042                    "$cc, $src2, $src1", "$src1, $src2, $cc",
2043                    (OpNode (_.VT _.RC:$src1), _.ScalarIntMemCPat:$src2,
2044                        imm:$cc),
2045                    (OpNode_su (_.VT _.RC:$src1), _.ScalarIntMemCPat:$src2,
2046                        imm:$cc)>, EVEX_4V, VEX_LIG, EVEX_CD8<_.EltSize, CD8VT1>,
2047                    Sched<[sched.Folded, sched.ReadAfterFold]>;
2048
2049  defm  rrb_Int  : AVX512_maskable_cmp<0xC2, MRMSrcReg, _,
2050                     (outs _.KRC:$dst),
2051                     (ins _.RC:$src1, _.RC:$src2, u8imm:$cc),
2052                     "vcmp"#_.Suffix,
2053                     "$cc, {sae}, $src2, $src1","$src1, $src2, {sae}, $cc",
2054                     (OpNodeSAE (_.VT _.RC:$src1), (_.VT _.RC:$src2),
2055                                imm:$cc),
2056                     (OpNodeSAE_su (_.VT _.RC:$src1), (_.VT _.RC:$src2),
2057                                   imm:$cc)>,
2058                     EVEX_4V, VEX_LIG, EVEX_B, Sched<[sched]>;
2059
2060  let isCodeGenOnly = 1 in {
2061    let isCommutable = 1 in
2062    def rr : AVX512Ii8<0xC2, MRMSrcReg,
2063                (outs _.KRC:$dst), (ins _.FRC:$src1, _.FRC:$src2, u8imm:$cc),
2064                !strconcat("vcmp", _.Suffix,
2065                           "\t{$cc, $src2, $src1, $dst|$dst, $src1, $src2, $cc}"),
2066                [(set _.KRC:$dst, (OpNode _.FRC:$src1,
2067                                          _.FRC:$src2,
2068                                          imm:$cc))]>,
2069                EVEX_4V, VEX_LIG, Sched<[sched]>;
2070    def rm : AVX512Ii8<0xC2, MRMSrcMem,
2071              (outs _.KRC:$dst),
2072              (ins _.FRC:$src1, _.ScalarMemOp:$src2, u8imm:$cc),
2073              !strconcat("vcmp", _.Suffix,
2074                         "\t{$cc, $src2, $src1, $dst|$dst, $src1, $src2, $cc}"),
2075              [(set _.KRC:$dst, (OpNode _.FRC:$src1,
2076                                        (_.ScalarLdFrag addr:$src2),
2077                                        imm:$cc))]>,
2078              EVEX_4V, VEX_LIG, EVEX_CD8<_.EltSize, CD8VT1>,
2079              Sched<[sched.Folded, sched.ReadAfterFold]>;
2080  }
2081}
2082
2083def X86cmpms_su : PatFrag<(ops node:$src1, node:$src2, node:$cc),
2084                          (X86cmpms node:$src1, node:$src2, node:$cc), [{
2085  return N->hasOneUse();
2086}]>;
2087def X86cmpmsSAE_su : PatFrag<(ops node:$src1, node:$src2, node:$cc),
2088                          (X86cmpmsSAE node:$src1, node:$src2, node:$cc), [{
2089  return N->hasOneUse();
2090}]>;
2091
2092let Predicates = [HasAVX512] in {
2093  let ExeDomain = SSEPackedSingle in
2094  defm VCMPSSZ : avx512_cmp_scalar<f32x_info, X86cmpms, X86cmpmsSAE,
2095                                   X86cmpms_su, X86cmpmsSAE_su,
2096                                   SchedWriteFCmp.Scl>, AVX512XSIi8Base;
2097  let ExeDomain = SSEPackedDouble in
2098  defm VCMPSDZ : avx512_cmp_scalar<f64x_info, X86cmpms, X86cmpmsSAE,
2099                                   X86cmpms_su, X86cmpmsSAE_su,
2100                                   SchedWriteFCmp.Scl>, AVX512XDIi8Base, VEX_W;
2101}
2102
2103multiclass avx512_icmp_packed<bits<8> opc, string OpcodeStr, PatFrag OpNode,
2104                              PatFrag OpNode_su, X86FoldableSchedWrite sched,
2105                              X86VectorVTInfo _, bit IsCommutable> {
2106  let isCommutable = IsCommutable in
2107  def rr : AVX512BI<opc, MRMSrcReg,
2108             (outs _.KRC:$dst), (ins _.RC:$src1, _.RC:$src2),
2109             !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
2110             [(set _.KRC:$dst, (OpNode (_.VT _.RC:$src1), (_.VT _.RC:$src2)))]>,
2111             EVEX_4V, Sched<[sched]>;
2112  def rm : AVX512BI<opc, MRMSrcMem,
2113             (outs _.KRC:$dst), (ins _.RC:$src1, _.MemOp:$src2),
2114             !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
2115             [(set _.KRC:$dst, (OpNode (_.VT _.RC:$src1),
2116                                       (_.VT (_.LdFrag addr:$src2))))]>,
2117             EVEX_4V, Sched<[sched.Folded, sched.ReadAfterFold]>;
2118  let isCommutable = IsCommutable in
2119  def rrk : AVX512BI<opc, MRMSrcReg,
2120              (outs _.KRC:$dst), (ins _.KRCWM:$mask, _.RC:$src1, _.RC:$src2),
2121              !strconcat(OpcodeStr, "\t{$src2, $src1, $dst {${mask}}|",
2122                          "$dst {${mask}}, $src1, $src2}"),
2123              [(set _.KRC:$dst, (and _.KRCWM:$mask,
2124                                   (OpNode_su (_.VT _.RC:$src1), (_.VT _.RC:$src2))))]>,
2125              EVEX_4V, EVEX_K, Sched<[sched]>;
2126  def rmk : AVX512BI<opc, MRMSrcMem,
2127              (outs _.KRC:$dst), (ins _.KRCWM:$mask, _.RC:$src1, _.MemOp:$src2),
2128              !strconcat(OpcodeStr, "\t{$src2, $src1, $dst {${mask}}|",
2129                          "$dst {${mask}}, $src1, $src2}"),
2130              [(set _.KRC:$dst, (and _.KRCWM:$mask,
2131                                   (OpNode_su (_.VT _.RC:$src1),
2132                                       (_.VT (_.LdFrag addr:$src2)))))]>,
2133              EVEX_4V, EVEX_K, Sched<[sched.Folded, sched.ReadAfterFold]>;
2134}
2135
2136multiclass avx512_icmp_packed_rmb<bits<8> opc, string OpcodeStr, PatFrag OpNode,
2137                                  PatFrag OpNode_su,
2138                                  X86FoldableSchedWrite sched, X86VectorVTInfo _,
2139                                  bit IsCommutable> :
2140           avx512_icmp_packed<opc, OpcodeStr, OpNode, OpNode_su, sched, _, IsCommutable> {
2141  def rmb : AVX512BI<opc, MRMSrcMem,
2142              (outs _.KRC:$dst), (ins _.RC:$src1, _.ScalarMemOp:$src2),
2143              !strconcat(OpcodeStr, "\t{${src2}", _.BroadcastStr, ", $src1, $dst",
2144                                    "|$dst, $src1, ${src2}", _.BroadcastStr, "}"),
2145              [(set _.KRC:$dst, (OpNode (_.VT _.RC:$src1),
2146                              (X86VBroadcast (_.ScalarLdFrag addr:$src2))))]>,
2147              EVEX_4V, EVEX_B, Sched<[sched.Folded, sched.ReadAfterFold]>;
2148  def rmbk : AVX512BI<opc, MRMSrcMem,
2149               (outs _.KRC:$dst), (ins _.KRCWM:$mask, _.RC:$src1,
2150                                       _.ScalarMemOp:$src2),
2151               !strconcat(OpcodeStr,
2152                          "\t{${src2}", _.BroadcastStr, ", $src1, $dst {${mask}}|",
2153                          "$dst {${mask}}, $src1, ${src2}", _.BroadcastStr, "}"),
2154               [(set _.KRC:$dst, (and _.KRCWM:$mask,
2155                                      (OpNode_su (_.VT _.RC:$src1),
2156                                        (X86VBroadcast
2157                                          (_.ScalarLdFrag addr:$src2)))))]>,
2158               EVEX_4V, EVEX_K, EVEX_B,
2159               Sched<[sched.Folded, sched.ReadAfterFold]>;
2160}
2161
2162multiclass avx512_icmp_packed_vl<bits<8> opc, string OpcodeStr, PatFrag OpNode,
2163                                 PatFrag OpNode_su, X86SchedWriteWidths sched,
2164                                 AVX512VLVectorVTInfo VTInfo, Predicate prd,
2165                                 bit IsCommutable = 0> {
2166  let Predicates = [prd] in
2167  defm Z : avx512_icmp_packed<opc, OpcodeStr, OpNode, OpNode_su, sched.ZMM,
2168                              VTInfo.info512, IsCommutable>, EVEX_V512;
2169
2170  let Predicates = [prd, HasVLX] in {
2171    defm Z256 : avx512_icmp_packed<opc, OpcodeStr, OpNode, OpNode_su, sched.YMM,
2172                                   VTInfo.info256, IsCommutable>, EVEX_V256;
2173    defm Z128 : avx512_icmp_packed<opc, OpcodeStr, OpNode, OpNode_su, sched.XMM,
2174                                   VTInfo.info128, IsCommutable>, EVEX_V128;
2175  }
2176}
2177
2178multiclass avx512_icmp_packed_rmb_vl<bits<8> opc, string OpcodeStr,
2179                                     PatFrag OpNode, PatFrag OpNode_su,
2180                                     X86SchedWriteWidths sched,
2181                                     AVX512VLVectorVTInfo VTInfo,
2182                                     Predicate prd, bit IsCommutable = 0> {
2183  let Predicates = [prd] in
2184  defm Z : avx512_icmp_packed_rmb<opc, OpcodeStr, OpNode, OpNode_su, sched.ZMM,
2185                                  VTInfo.info512, IsCommutable>, EVEX_V512;
2186
2187  let Predicates = [prd, HasVLX] in {
2188    defm Z256 : avx512_icmp_packed_rmb<opc, OpcodeStr, OpNode, OpNode_su, sched.YMM,
2189                                       VTInfo.info256, IsCommutable>, EVEX_V256;
2190    defm Z128 : avx512_icmp_packed_rmb<opc, OpcodeStr, OpNode, OpNode_su, sched.XMM,
2191                                       VTInfo.info128, IsCommutable>, EVEX_V128;
2192  }
2193}
2194
2195// This fragment treats X86cmpm as commutable to help match loads in both
2196// operands for PCMPEQ.
2197def X86setcc_commute : SDNode<"ISD::SETCC", SDTSetCC, [SDNPCommutative]>;
2198def X86pcmpeqm_c : PatFrag<(ops node:$src1, node:$src2),
2199                           (X86setcc_commute node:$src1, node:$src2, SETEQ)>;
2200def X86pcmpgtm : PatFrag<(ops node:$src1, node:$src2),
2201                         (setcc node:$src1, node:$src2, SETGT)>;
2202
2203def X86pcmpeqm_c_su : PatFrag<(ops node:$src1, node:$src2),
2204                              (X86pcmpeqm_c node:$src1, node:$src2), [{
2205  return N->hasOneUse();
2206}]>;
2207def X86pcmpgtm_su : PatFrag<(ops node:$src1, node:$src2),
2208                            (X86pcmpgtm node:$src1, node:$src2), [{
2209  return N->hasOneUse();
2210}]>;
2211
2212// AddedComplexity is needed because the explicit SETEQ/SETGT CondCode doesn't
2213// increase the pattern complexity the way an immediate would.
2214let AddedComplexity = 2 in {
2215// FIXME: Is there a better scheduler class for VPCMP?
2216defm VPCMPEQB : avx512_icmp_packed_vl<0x74, "vpcmpeqb", X86pcmpeqm_c, X86pcmpeqm_c_su,
2217                      SchedWriteVecALU, avx512vl_i8_info, HasBWI, 1>,
2218                EVEX_CD8<8, CD8VF>, VEX_WIG;
2219
2220defm VPCMPEQW : avx512_icmp_packed_vl<0x75, "vpcmpeqw", X86pcmpeqm_c, X86pcmpeqm_c_su,
2221                      SchedWriteVecALU, avx512vl_i16_info, HasBWI, 1>,
2222                EVEX_CD8<16, CD8VF>, VEX_WIG;
2223
2224defm VPCMPEQD : avx512_icmp_packed_rmb_vl<0x76, "vpcmpeqd", X86pcmpeqm_c, X86pcmpeqm_c_su,
2225                      SchedWriteVecALU, avx512vl_i32_info, HasAVX512, 1>,
2226                EVEX_CD8<32, CD8VF>;
2227
2228defm VPCMPEQQ : avx512_icmp_packed_rmb_vl<0x29, "vpcmpeqq", X86pcmpeqm_c, X86pcmpeqm_c_su,
2229                      SchedWriteVecALU, avx512vl_i64_info, HasAVX512, 1>,
2230                T8PD, VEX_W, EVEX_CD8<64, CD8VF>;
2231
2232defm VPCMPGTB : avx512_icmp_packed_vl<0x64, "vpcmpgtb", X86pcmpgtm, X86pcmpgtm_su,
2233                      SchedWriteVecALU, avx512vl_i8_info, HasBWI>,
2234                EVEX_CD8<8, CD8VF>, VEX_WIG;
2235
2236defm VPCMPGTW : avx512_icmp_packed_vl<0x65, "vpcmpgtw", X86pcmpgtm, X86pcmpgtm_su,
2237                      SchedWriteVecALU, avx512vl_i16_info, HasBWI>,
2238                EVEX_CD8<16, CD8VF>, VEX_WIG;
2239
2240defm VPCMPGTD : avx512_icmp_packed_rmb_vl<0x66, "vpcmpgtd", X86pcmpgtm, X86pcmpgtm_su,
2241                      SchedWriteVecALU, avx512vl_i32_info, HasAVX512>,
2242                EVEX_CD8<32, CD8VF>;
2243
2244defm VPCMPGTQ : avx512_icmp_packed_rmb_vl<0x37, "vpcmpgtq", X86pcmpgtm, X86pcmpgtm_su,
2245                      SchedWriteVecALU, avx512vl_i64_info, HasAVX512>,
2246                T8PD, VEX_W, EVEX_CD8<64, CD8VF>;
2247}
2248
2249multiclass avx512_icmp_cc<bits<8> opc, string Suffix, PatFrag Frag,
2250                          PatFrag Frag_su, PatFrag CommFrag, PatFrag CommFrag_su,
2251                          X86FoldableSchedWrite sched,
2252                          X86VectorVTInfo _, string Name> {
2253  let isCommutable = 1 in
2254  def rri : AVX512AIi8<opc, MRMSrcReg,
2255             (outs _.KRC:$dst), (ins _.RC:$src1, _.RC:$src2, u8imm:$cc),
2256             !strconcat("vpcmp", Suffix,
2257                        "\t{$cc, $src2, $src1, $dst|$dst, $src1, $src2, $cc}"),
2258             [(set _.KRC:$dst, (_.KVT (Frag:$cc (_.VT _.RC:$src1),
2259                                                (_.VT _.RC:$src2),
2260                                                cond)))]>,
2261             EVEX_4V, Sched<[sched]>;
2262  def rmi : AVX512AIi8<opc, MRMSrcMem,
2263             (outs _.KRC:$dst), (ins _.RC:$src1, _.MemOp:$src2, u8imm:$cc),
2264             !strconcat("vpcmp", Suffix,
2265                        "\t{$cc, $src2, $src1, $dst|$dst, $src1, $src2, $cc}"),
2266             [(set _.KRC:$dst, (_.KVT
2267                                (Frag:$cc
2268                                 (_.VT _.RC:$src1),
2269                                 (_.VT (_.LdFrag addr:$src2)),
2270                                 cond)))]>,
2271             EVEX_4V, Sched<[sched.Folded, sched.ReadAfterFold]>;
2272  let isCommutable = 1 in
2273  def rrik : AVX512AIi8<opc, MRMSrcReg,
2274              (outs _.KRC:$dst), (ins _.KRCWM:$mask, _.RC:$src1, _.RC:$src2,
2275                                      u8imm:$cc),
2276              !strconcat("vpcmp", Suffix,
2277                         "\t{$cc, $src2, $src1, $dst {${mask}}|",
2278                         "$dst {${mask}}, $src1, $src2, $cc}"),
2279              [(set _.KRC:$dst, (and _.KRCWM:$mask,
2280                                     (_.KVT (Frag_su:$cc (_.VT _.RC:$src1),
2281                                                         (_.VT _.RC:$src2),
2282                                                         cond))))]>,
2283              EVEX_4V, EVEX_K, Sched<[sched]>;
2284  def rmik : AVX512AIi8<opc, MRMSrcMem,
2285              (outs _.KRC:$dst), (ins _.KRCWM:$mask, _.RC:$src1, _.MemOp:$src2,
2286                                    u8imm:$cc),
2287              !strconcat("vpcmp", Suffix,
2288                         "\t{$cc, $src2, $src1, $dst {${mask}}|",
2289                         "$dst {${mask}}, $src1, $src2, $cc}"),
2290              [(set _.KRC:$dst, (and _.KRCWM:$mask,
2291                                     (_.KVT
2292                                      (Frag_su:$cc
2293                                       (_.VT _.RC:$src1),
2294                                       (_.VT (_.LdFrag addr:$src2)),
2295                                       cond))))]>,
2296              EVEX_4V, EVEX_K, Sched<[sched.Folded, sched.ReadAfterFold]>;
2297
2298  def : Pat<(_.KVT (CommFrag:$cc (_.LdFrag addr:$src2),
2299                                 (_.VT _.RC:$src1), cond)),
2300            (!cast<Instruction>(Name#_.ZSuffix#"rmi")
2301             _.RC:$src1, addr:$src2, (CommFrag.OperandTransform $cc))>;
2302
2303  def : Pat<(and _.KRCWM:$mask,
2304                 (_.KVT (CommFrag_su:$cc (_.LdFrag addr:$src2),
2305                                      (_.VT _.RC:$src1), cond))),
2306            (!cast<Instruction>(Name#_.ZSuffix#"rmik")
2307             _.KRCWM:$mask, _.RC:$src1, addr:$src2,
2308             (CommFrag.OperandTransform $cc))>;
2309}
2310
2311multiclass avx512_icmp_cc_rmb<bits<8> opc, string Suffix, PatFrag Frag,
2312                              PatFrag Frag_su, PatFrag CommFrag,
2313                              PatFrag CommFrag_su, X86FoldableSchedWrite sched,
2314                              X86VectorVTInfo _, string Name> :
2315           avx512_icmp_cc<opc, Suffix, Frag, Frag_su, CommFrag, CommFrag_su,
2316                          sched, _, Name> {
2317  def rmib : AVX512AIi8<opc, MRMSrcMem,
2318             (outs _.KRC:$dst), (ins _.RC:$src1, _.ScalarMemOp:$src2,
2319                                     u8imm:$cc),
2320             !strconcat("vpcmp", Suffix,
2321                        "\t{$cc, ${src2}", _.BroadcastStr, ", $src1, $dst|",
2322                        "$dst, $src1, ${src2}", _.BroadcastStr, ", $cc}"),
2323             [(set _.KRC:$dst, (_.KVT (Frag:$cc
2324                                       (_.VT _.RC:$src1),
2325                                       (X86VBroadcast
2326                                        (_.ScalarLdFrag addr:$src2)),
2327                                       cond)))]>,
2328             EVEX_4V, EVEX_B, Sched<[sched.Folded, sched.ReadAfterFold]>;
2329  def rmibk : AVX512AIi8<opc, MRMSrcMem,
2330              (outs _.KRC:$dst), (ins _.KRCWM:$mask, _.RC:$src1,
2331                                       _.ScalarMemOp:$src2, u8imm:$cc),
2332              !strconcat("vpcmp", Suffix,
2333                  "\t{$cc, ${src2}", _.BroadcastStr, ", $src1, $dst {${mask}}|",
2334                  "$dst {${mask}}, $src1, ${src2}", _.BroadcastStr, ", $cc}"),
2335              [(set _.KRC:$dst, (and _.KRCWM:$mask,
2336                                     (_.KVT (Frag_su:$cc
2337                                             (_.VT _.RC:$src1),
2338                                             (X86VBroadcast
2339                                              (_.ScalarLdFrag addr:$src2)),
2340                                             cond))))]>,
2341              EVEX_4V, EVEX_K, EVEX_B, Sched<[sched.Folded, sched.ReadAfterFold]>;
2342
2343  def : Pat<(_.KVT (CommFrag:$cc (X86VBroadcast (_.ScalarLdFrag addr:$src2)),
2344                    (_.VT _.RC:$src1), cond)),
2345            (!cast<Instruction>(Name#_.ZSuffix#"rmib")
2346             _.RC:$src1, addr:$src2, (CommFrag.OperandTransform $cc))>;
2347
2348  def : Pat<(and _.KRCWM:$mask,
2349                 (_.KVT (CommFrag_su:$cc (X86VBroadcast
2350                                       (_.ScalarLdFrag addr:$src2)),
2351                                      (_.VT _.RC:$src1), cond))),
2352            (!cast<Instruction>(Name#_.ZSuffix#"rmibk")
2353             _.KRCWM:$mask, _.RC:$src1, addr:$src2,
2354             (CommFrag.OperandTransform $cc))>;
2355}
2356
2357multiclass avx512_icmp_cc_vl<bits<8> opc, string Suffix, PatFrag Frag,
2358                             PatFrag Frag_su, PatFrag CommFrag,
2359                             PatFrag CommFrag_su, X86SchedWriteWidths sched,
2360                             AVX512VLVectorVTInfo VTInfo, Predicate prd> {
2361  let Predicates = [prd] in
2362  defm Z : avx512_icmp_cc<opc, Suffix, Frag, Frag_su, CommFrag, CommFrag_su,
2363                          sched.ZMM, VTInfo.info512, NAME>, EVEX_V512;
2364
2365  let Predicates = [prd, HasVLX] in {
2366    defm Z256 : avx512_icmp_cc<opc, Suffix, Frag, Frag_su, CommFrag, CommFrag_su,
2367                               sched.YMM, VTInfo.info256, NAME>, EVEX_V256;
2368    defm Z128 : avx512_icmp_cc<opc, Suffix, Frag, Frag_su, CommFrag, CommFrag_su,
2369                               sched.XMM, VTInfo.info128, NAME>, EVEX_V128;
2370  }
2371}
2372
2373multiclass avx512_icmp_cc_rmb_vl<bits<8> opc, string Suffix, PatFrag Frag,
2374                                 PatFrag Frag_su, PatFrag CommFrag,
2375                                 PatFrag CommFrag_su, X86SchedWriteWidths sched,
2376                                 AVX512VLVectorVTInfo VTInfo, Predicate prd> {
2377  let Predicates = [prd] in
2378  defm Z : avx512_icmp_cc_rmb<opc, Suffix, Frag, Frag_su, CommFrag, CommFrag_su,
2379                              sched.ZMM, VTInfo.info512, NAME>, EVEX_V512;
2380
2381  let Predicates = [prd, HasVLX] in {
2382    defm Z256 : avx512_icmp_cc_rmb<opc, Suffix, Frag, Frag_su, CommFrag, CommFrag_su,
2383                                   sched.YMM, VTInfo.info256, NAME>, EVEX_V256;
2384    defm Z128 : avx512_icmp_cc_rmb<opc, Suffix, Frag, Frag_su, CommFrag, CommFrag_su,
2385                                   sched.XMM, VTInfo.info128, NAME>, EVEX_V128;
2386  }
2387}
2388
2389def X86pcmpm_imm : SDNodeXForm<setcc, [{
2390  ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(2))->get();
2391  uint8_t SSECC = X86::getVPCMPImmForCond(CC);
2392  return getI8Imm(SSECC, SDLoc(N));
2393}]>;
2394
2395// Swapped operand version of the above.
2396def X86pcmpm_imm_commute : SDNodeXForm<setcc, [{
2397  ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(2))->get();
2398  uint8_t SSECC = X86::getVPCMPImmForCond(CC);
2399  SSECC = X86::getSwappedVPCMPImm(SSECC);
2400  return getI8Imm(SSECC, SDLoc(N));
2401}]>;
2402
2403def X86pcmpm : PatFrag<(ops node:$src1, node:$src2, node:$cc),
2404                       (setcc node:$src1, node:$src2, node:$cc), [{
2405  ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(2))->get();
2406  return !ISD::isUnsignedIntSetCC(CC);
2407}], X86pcmpm_imm>;
2408
2409def X86pcmpm_su : PatFrag<(ops node:$src1, node:$src2, node:$cc),
2410                          (setcc node:$src1, node:$src2, node:$cc), [{
2411  ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(2))->get();
2412  return N->hasOneUse() && !ISD::isUnsignedIntSetCC(CC);
2413}], X86pcmpm_imm>;
2414
2415// Same as above, but commutes immediate. Use for load folding.
2416def X86pcmpm_commute : PatFrag<(ops node:$src1, node:$src2, node:$cc),
2417                               (setcc node:$src1, node:$src2, node:$cc), [{
2418  ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(2))->get();
2419  return !ISD::isUnsignedIntSetCC(CC);
2420}], X86pcmpm_imm_commute>;
2421
2422def X86pcmpm_commute_su : PatFrag<(ops node:$src1, node:$src2, node:$cc),
2423                                  (setcc node:$src1, node:$src2, node:$cc), [{
2424  ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(2))->get();
2425  return N->hasOneUse() && !ISD::isUnsignedIntSetCC(CC);
2426}], X86pcmpm_imm_commute>;
2427
2428def X86pcmpum : PatFrag<(ops node:$src1, node:$src2, node:$cc),
2429                        (setcc node:$src1, node:$src2, node:$cc), [{
2430  ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(2))->get();
2431  return ISD::isUnsignedIntSetCC(CC);
2432}], X86pcmpm_imm>;
2433
2434def X86pcmpum_su : PatFrag<(ops node:$src1, node:$src2, node:$cc),
2435                           (setcc node:$src1, node:$src2, node:$cc), [{
2436  ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(2))->get();
2437  return N->hasOneUse() && ISD::isUnsignedIntSetCC(CC);
2438}], X86pcmpm_imm>;
2439
2440// Same as above, but commutes immediate. Use for load folding.
2441def X86pcmpum_commute : PatFrag<(ops node:$src1, node:$src2, node:$cc),
2442                                (setcc node:$src1, node:$src2, node:$cc), [{
2443  ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(2))->get();
2444  return ISD::isUnsignedIntSetCC(CC);
2445}], X86pcmpm_imm_commute>;
2446
2447def X86pcmpum_commute_su : PatFrag<(ops node:$src1, node:$src2, node:$cc),
2448                                   (setcc node:$src1, node:$src2, node:$cc), [{
2449  ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(2))->get();
2450  return N->hasOneUse() && ISD::isUnsignedIntSetCC(CC);
2451}], X86pcmpm_imm_commute>;
2452
2453// FIXME: Is there a better scheduler class for VPCMP/VPCMPU?
2454defm VPCMPB : avx512_icmp_cc_vl<0x3F, "b", X86pcmpm, X86pcmpm_su,
2455                                X86pcmpm_commute, X86pcmpm_commute_su,
2456                                SchedWriteVecALU, avx512vl_i8_info, HasBWI>,
2457                                EVEX_CD8<8, CD8VF>;
2458defm VPCMPUB : avx512_icmp_cc_vl<0x3E, "ub", X86pcmpum, X86pcmpum_su,
2459                                 X86pcmpum_commute, X86pcmpum_commute_su,
2460                                 SchedWriteVecALU, avx512vl_i8_info, HasBWI>,
2461                                 EVEX_CD8<8, CD8VF>;
2462
2463defm VPCMPW : avx512_icmp_cc_vl<0x3F, "w", X86pcmpm, X86pcmpm_su,
2464                                X86pcmpm_commute, X86pcmpm_commute_su,
2465                                SchedWriteVecALU, avx512vl_i16_info, HasBWI>,
2466                                VEX_W, EVEX_CD8<16, CD8VF>;
2467defm VPCMPUW : avx512_icmp_cc_vl<0x3E, "uw", X86pcmpum, X86pcmpum_su,
2468                                 X86pcmpum_commute, X86pcmpum_commute_su,
2469                                 SchedWriteVecALU, avx512vl_i16_info, HasBWI>,
2470                                 VEX_W, EVEX_CD8<16, CD8VF>;
2471
2472defm VPCMPD : avx512_icmp_cc_rmb_vl<0x1F, "d", X86pcmpm, X86pcmpm_su,
2473                                    X86pcmpm_commute, X86pcmpm_commute_su,
2474                                    SchedWriteVecALU, avx512vl_i32_info,
2475                                    HasAVX512>, EVEX_CD8<32, CD8VF>;
2476defm VPCMPUD : avx512_icmp_cc_rmb_vl<0x1E, "ud", X86pcmpum, X86pcmpum_su,
2477                                     X86pcmpum_commute, X86pcmpum_commute_su,
2478                                     SchedWriteVecALU, avx512vl_i32_info,
2479                                     HasAVX512>, EVEX_CD8<32, CD8VF>;
2480
2481defm VPCMPQ : avx512_icmp_cc_rmb_vl<0x1F, "q", X86pcmpm, X86pcmpm_su,
2482                                    X86pcmpm_commute, X86pcmpm_commute_su,
2483                                    SchedWriteVecALU, avx512vl_i64_info,
2484                                    HasAVX512>, VEX_W, EVEX_CD8<64, CD8VF>;
2485defm VPCMPUQ : avx512_icmp_cc_rmb_vl<0x1E, "uq", X86pcmpum, X86pcmpum_su,
2486                                     X86pcmpum_commute, X86pcmpum_commute_su,
2487                                     SchedWriteVecALU, avx512vl_i64_info,
2488                                     HasAVX512>, VEX_W, EVEX_CD8<64, CD8VF>;
2489
2490def X86cmpm_su : PatFrag<(ops node:$src1, node:$src2, node:$cc),
2491                         (X86cmpm node:$src1, node:$src2, node:$cc), [{
2492  return N->hasOneUse();
2493}]>;
2494def X86cmpmSAE_su : PatFrag<(ops node:$src1, node:$src2, node:$cc),
2495                            (X86cmpmSAE node:$src1, node:$src2, node:$cc), [{
2496  return N->hasOneUse();
2497}]>;
2498
2499multiclass avx512_vcmp_common<X86FoldableSchedWrite sched, X86VectorVTInfo _,
2500                              string Name> {
2501  defm  rri  : AVX512_maskable_cmp<0xC2, MRMSrcReg, _,
2502                   (outs _.KRC:$dst), (ins _.RC:$src1, _.RC:$src2,u8imm:$cc),
2503                   "vcmp"#_.Suffix,
2504                   "$cc, $src2, $src1", "$src1, $src2, $cc",
2505                   (X86cmpm (_.VT _.RC:$src1), (_.VT _.RC:$src2), imm:$cc),
2506                   (X86cmpm_su (_.VT _.RC:$src1), (_.VT _.RC:$src2), imm:$cc),
2507                   1>, Sched<[sched]>;
2508
2509  defm  rmi  : AVX512_maskable_cmp<0xC2, MRMSrcMem, _,
2510                (outs _.KRC:$dst),(ins _.RC:$src1, _.MemOp:$src2, u8imm:$cc),
2511                "vcmp"#_.Suffix,
2512                "$cc, $src2, $src1", "$src1, $src2, $cc",
2513                (X86cmpm (_.VT _.RC:$src1), (_.VT (_.LdFrag addr:$src2)),
2514                         imm:$cc),
2515                (X86cmpm_su (_.VT _.RC:$src1), (_.VT (_.LdFrag addr:$src2)),
2516                            imm:$cc)>,
2517                Sched<[sched.Folded, sched.ReadAfterFold]>;
2518
2519  defm  rmbi : AVX512_maskable_cmp<0xC2, MRMSrcMem, _,
2520                (outs _.KRC:$dst),
2521                (ins _.RC:$src1, _.ScalarMemOp:$src2, u8imm:$cc),
2522                "vcmp"#_.Suffix,
2523                "$cc, ${src2}"#_.BroadcastStr#", $src1",
2524                "$src1, ${src2}"#_.BroadcastStr#", $cc",
2525                (X86cmpm (_.VT _.RC:$src1),
2526                        (_.VT (X86VBroadcast(_.ScalarLdFrag addr:$src2))),
2527                        imm:$cc),
2528                (X86cmpm_su (_.VT _.RC:$src1),
2529                            (_.VT (X86VBroadcast(_.ScalarLdFrag addr:$src2))),
2530                            imm:$cc)>,
2531                EVEX_B, Sched<[sched.Folded, sched.ReadAfterFold]>;
2532
2533  // Patterns for selecting with loads in other operand.
2534  def : Pat<(X86cmpm (_.LdFrag addr:$src2), (_.VT _.RC:$src1),
2535                     CommutableCMPCC:$cc),
2536            (!cast<Instruction>(Name#_.ZSuffix#"rmi") _.RC:$src1, addr:$src2,
2537                                                      imm:$cc)>;
2538
2539  def : Pat<(and _.KRCWM:$mask, (X86cmpm_su (_.LdFrag addr:$src2),
2540                                            (_.VT _.RC:$src1),
2541                                            CommutableCMPCC:$cc)),
2542            (!cast<Instruction>(Name#_.ZSuffix#"rmik") _.KRCWM:$mask,
2543                                                       _.RC:$src1, addr:$src2,
2544                                                       imm:$cc)>;
2545
2546  def : Pat<(X86cmpm (X86VBroadcast (_.ScalarLdFrag addr:$src2)),
2547                     (_.VT _.RC:$src1), CommutableCMPCC:$cc),
2548            (!cast<Instruction>(Name#_.ZSuffix#"rmbi") _.RC:$src1, addr:$src2,
2549                                                       imm:$cc)>;
2550
2551  def : Pat<(and _.KRCWM:$mask, (X86cmpm_su (X86VBroadcast
2552                                             (_.ScalarLdFrag addr:$src2)),
2553                                            (_.VT _.RC:$src1),
2554                                            CommutableCMPCC:$cc)),
2555            (!cast<Instruction>(Name#_.ZSuffix#"rmbik") _.KRCWM:$mask,
2556                                                        _.RC:$src1, addr:$src2,
2557                                                        imm:$cc)>;
2558}
2559
2560multiclass avx512_vcmp_sae<X86FoldableSchedWrite sched, X86VectorVTInfo _> {
2561  // comparison code form (VCMP[EQ/LT/LE/...]
2562  defm  rrib  : AVX512_maskable_cmp<0xC2, MRMSrcReg, _,
2563                     (outs _.KRC:$dst),(ins _.RC:$src1, _.RC:$src2, u8imm:$cc),
2564                     "vcmp"#_.Suffix,
2565                     "$cc, {sae}, $src2, $src1",
2566                     "$src1, $src2, {sae}, $cc",
2567                     (X86cmpmSAE (_.VT _.RC:$src1), (_.VT _.RC:$src2), imm:$cc),
2568                     (X86cmpmSAE_su (_.VT _.RC:$src1), (_.VT _.RC:$src2),
2569                                    imm:$cc)>,
2570                     EVEX_B, Sched<[sched]>;
2571}
2572
2573multiclass avx512_vcmp<X86SchedWriteWidths sched, AVX512VLVectorVTInfo _> {
2574  let Predicates = [HasAVX512] in {
2575    defm Z    : avx512_vcmp_common<sched.ZMM, _.info512, NAME>,
2576                avx512_vcmp_sae<sched.ZMM, _.info512>, EVEX_V512;
2577
2578  }
2579  let Predicates = [HasAVX512,HasVLX] in {
2580   defm Z128 : avx512_vcmp_common<sched.XMM, _.info128, NAME>, EVEX_V128;
2581   defm Z256 : avx512_vcmp_common<sched.YMM, _.info256, NAME>, EVEX_V256;
2582  }
2583}
2584
2585defm VCMPPD : avx512_vcmp<SchedWriteFCmp, avx512vl_f64_info>,
2586                          AVX512PDIi8Base, EVEX_4V, EVEX_CD8<64, CD8VF>, VEX_W;
2587defm VCMPPS : avx512_vcmp<SchedWriteFCmp, avx512vl_f32_info>,
2588                          AVX512PSIi8Base, EVEX_4V, EVEX_CD8<32, CD8VF>;
2589
2590// Patterns to select fp compares with load as first operand.
2591let Predicates = [HasAVX512] in {
2592  def : Pat<(v1i1 (X86cmpms (loadf64 addr:$src2), FR64X:$src1,
2593                            CommutableCMPCC:$cc)),
2594            (VCMPSDZrm FR64X:$src1, addr:$src2, imm:$cc)>;
2595
2596  def : Pat<(v1i1 (X86cmpms (loadf32 addr:$src2), FR32X:$src1,
2597                            CommutableCMPCC:$cc)),
2598            (VCMPSSZrm FR32X:$src1, addr:$src2, imm:$cc)>;
2599}
2600
2601// ----------------------------------------------------------------
2602// FPClass
2603
2604def X86Vfpclasss_su : PatFrag<(ops node:$src1, node:$src2),
2605                              (X86Vfpclasss node:$src1, node:$src2), [{
2606  return N->hasOneUse();
2607}]>;
2608
2609def X86Vfpclass_su : PatFrag<(ops node:$src1, node:$src2),
2610                             (X86Vfpclass node:$src1, node:$src2), [{
2611  return N->hasOneUse();
2612}]>;
2613
2614//handle fpclass instruction  mask =  op(reg_scalar,imm)
2615//                                    op(mem_scalar,imm)
2616multiclass avx512_scalar_fpclass<bits<8> opc, string OpcodeStr,
2617                                 X86FoldableSchedWrite sched, X86VectorVTInfo _,
2618                                 Predicate prd> {
2619  let Predicates = [prd], ExeDomain = _.ExeDomain in {
2620      def rr : AVX512<opc, MRMSrcReg, (outs _.KRC:$dst),
2621                      (ins _.RC:$src1, i32u8imm:$src2),
2622                      OpcodeStr##_.Suffix#"\t{$src2, $src1, $dst|$dst, $src1, $src2}",
2623                      [(set _.KRC:$dst,(X86Vfpclasss (_.VT _.RC:$src1),
2624                              (i32 imm:$src2)))]>,
2625                      Sched<[sched]>;
2626      def rrk : AVX512<opc, MRMSrcReg, (outs _.KRC:$dst),
2627                      (ins _.KRCWM:$mask, _.RC:$src1, i32u8imm:$src2),
2628                      OpcodeStr##_.Suffix#
2629                      "\t{$src2, $src1, $dst {${mask}}|$dst {${mask}}, $src1, $src2}",
2630                      [(set _.KRC:$dst,(and _.KRCWM:$mask,
2631                                      (X86Vfpclasss_su (_.VT _.RC:$src1),
2632                                      (i32 imm:$src2))))]>,
2633                      EVEX_K, Sched<[sched]>;
2634    def rm : AVX512<opc, MRMSrcMem, (outs _.KRC:$dst),
2635                    (ins _.IntScalarMemOp:$src1, i32u8imm:$src2),
2636                    OpcodeStr##_.Suffix##
2637                              "\t{$src2, $src1, $dst|$dst, $src1, $src2}",
2638                    [(set _.KRC:$dst,
2639                          (X86Vfpclasss _.ScalarIntMemCPat:$src1,
2640                                       (i32 imm:$src2)))]>,
2641                    Sched<[sched.Folded, sched.ReadAfterFold]>;
2642    def rmk : AVX512<opc, MRMSrcMem, (outs _.KRC:$dst),
2643                    (ins _.KRCWM:$mask, _.IntScalarMemOp:$src1, i32u8imm:$src2),
2644                    OpcodeStr##_.Suffix##
2645                    "\t{$src2, $src1, $dst {${mask}}|$dst {${mask}}, $src1, $src2}",
2646                    [(set _.KRC:$dst,(and _.KRCWM:$mask,
2647                        (X86Vfpclasss_su _.ScalarIntMemCPat:$src1,
2648                            (i32 imm:$src2))))]>,
2649                    EVEX_K, Sched<[sched.Folded, sched.ReadAfterFold]>;
2650  }
2651}
2652
2653//handle fpclass instruction mask = fpclass(reg_vec, reg_vec, imm)
2654//                                  fpclass(reg_vec, mem_vec, imm)
2655//                                  fpclass(reg_vec, broadcast(eltVt), imm)
2656multiclass avx512_vector_fpclass<bits<8> opc, string OpcodeStr,
2657                                 X86FoldableSchedWrite sched, X86VectorVTInfo _,
2658                                 string mem>{
2659  let ExeDomain = _.ExeDomain in {
2660  def rr : AVX512<opc, MRMSrcReg, (outs _.KRC:$dst),
2661                      (ins _.RC:$src1, i32u8imm:$src2),
2662                      OpcodeStr##_.Suffix#"\t{$src2, $src1, $dst|$dst, $src1, $src2}",
2663                      [(set _.KRC:$dst,(X86Vfpclass (_.VT _.RC:$src1),
2664                                       (i32 imm:$src2)))]>,
2665                      Sched<[sched]>;
2666  def rrk : AVX512<opc, MRMSrcReg, (outs _.KRC:$dst),
2667                      (ins _.KRCWM:$mask, _.RC:$src1, i32u8imm:$src2),
2668                      OpcodeStr##_.Suffix#
2669                      "\t{$src2, $src1, $dst {${mask}}|$dst {${mask}}, $src1, $src2}",
2670                      [(set _.KRC:$dst,(and _.KRCWM:$mask,
2671                                       (X86Vfpclass_su (_.VT _.RC:$src1),
2672                                       (i32 imm:$src2))))]>,
2673                      EVEX_K, Sched<[sched]>;
2674  def rm : AVX512<opc, MRMSrcMem, (outs _.KRC:$dst),
2675                    (ins _.MemOp:$src1, i32u8imm:$src2),
2676                    OpcodeStr##_.Suffix#"{"#mem#"}"#
2677                    "\t{$src2, $src1, $dst|$dst, $src1, $src2}",
2678                    [(set _.KRC:$dst,(X86Vfpclass
2679                                     (_.VT (_.LdFrag addr:$src1)),
2680                                     (i32 imm:$src2)))]>,
2681                    Sched<[sched.Folded, sched.ReadAfterFold]>;
2682  def rmk : AVX512<opc, MRMSrcMem, (outs _.KRC:$dst),
2683                    (ins _.KRCWM:$mask, _.MemOp:$src1, i32u8imm:$src2),
2684                    OpcodeStr##_.Suffix#"{"#mem#"}"#
2685                    "\t{$src2, $src1, $dst {${mask}}|$dst {${mask}}, $src1, $src2}",
2686                    [(set _.KRC:$dst, (and _.KRCWM:$mask, (X86Vfpclass_su
2687                                  (_.VT (_.LdFrag addr:$src1)),
2688                                  (i32 imm:$src2))))]>,
2689                    EVEX_K, Sched<[sched.Folded, sched.ReadAfterFold]>;
2690  def rmb : AVX512<opc, MRMSrcMem, (outs _.KRC:$dst),
2691                    (ins _.ScalarMemOp:$src1, i32u8imm:$src2),
2692                    OpcodeStr##_.Suffix##"\t{$src2, ${src1}"##
2693                                      _.BroadcastStr##", $dst|$dst, ${src1}"
2694                                                  ##_.BroadcastStr##", $src2}",
2695                    [(set _.KRC:$dst,(X86Vfpclass
2696                                     (_.VT (X86VBroadcast
2697                                           (_.ScalarLdFrag addr:$src1))),
2698                                     (i32 imm:$src2)))]>,
2699                    EVEX_B, Sched<[sched.Folded, sched.ReadAfterFold]>;
2700  def rmbk : AVX512<opc, MRMSrcMem, (outs _.KRC:$dst),
2701                    (ins _.KRCWM:$mask, _.ScalarMemOp:$src1, i32u8imm:$src2),
2702                    OpcodeStr##_.Suffix##"\t{$src2, ${src1}"##
2703                          _.BroadcastStr##", $dst {${mask}}|$dst {${mask}}, ${src1}"##
2704                                                   _.BroadcastStr##", $src2}",
2705                    [(set _.KRC:$dst,(and _.KRCWM:$mask, (X86Vfpclass_su
2706                                     (_.VT (X86VBroadcast
2707                                           (_.ScalarLdFrag addr:$src1))),
2708                                     (i32 imm:$src2))))]>,
2709                    EVEX_B, EVEX_K,  Sched<[sched.Folded, sched.ReadAfterFold]>;
2710  }
2711
2712  // Allow registers or broadcast with the x, y, z suffix we use to disambiguate
2713  // the memory form.
2714  def : InstAlias<OpcodeStr#_.Suffix#mem#
2715                  "\t{$src2, $src1, $dst|$dst, $src1, $src2}",
2716                  (!cast<Instruction>(NAME#"rr")
2717                   _.KRC:$dst, _.RC:$src1, i32u8imm:$src2), 0, "att">;
2718  def : InstAlias<OpcodeStr#_.Suffix#mem#
2719                  "\t{$src2, $src1, $dst {${mask}}|$dst {${mask}}, $src1, $src2}",
2720                  (!cast<Instruction>(NAME#"rrk")
2721                   _.KRC:$dst, _.KRCWM:$mask, _.RC:$src1, i32u8imm:$src2), 0, "att">;
2722  def : InstAlias<OpcodeStr#_.Suffix#mem#
2723                  "\t{$src2, ${src1}"#_.BroadcastStr#", $dst|$dst, ${src1}"#
2724                  _.BroadcastStr#", $src2}",
2725                  (!cast<Instruction>(NAME#"rmb")
2726                   _.KRC:$dst, _.ScalarMemOp:$src1, i32u8imm:$src2), 0, "att">;
2727  def : InstAlias<OpcodeStr#_.Suffix#mem#
2728                  "\t{$src2, ${src1}"#_.BroadcastStr#", $dst {${mask}}|"
2729                  "$dst {${mask}}, ${src1}"#_.BroadcastStr#", $src2}",
2730                  (!cast<Instruction>(NAME#"rmbk")
2731                   _.KRC:$dst, _.KRCWM:$mask, _.ScalarMemOp:$src1, i32u8imm:$src2), 0, "att">;
2732}
2733
2734multiclass avx512_vector_fpclass_all<string OpcodeStr, AVX512VLVectorVTInfo _,
2735                                     bits<8> opc, X86SchedWriteWidths sched,
2736                                     Predicate prd>{
2737  let Predicates = [prd] in {
2738    defm Z    : avx512_vector_fpclass<opc, OpcodeStr, sched.ZMM,
2739                                      _.info512, "z">, EVEX_V512;
2740  }
2741  let Predicates = [prd, HasVLX] in {
2742    defm Z128 : avx512_vector_fpclass<opc, OpcodeStr, sched.XMM,
2743                                      _.info128, "x">, EVEX_V128;
2744    defm Z256 : avx512_vector_fpclass<opc, OpcodeStr, sched.YMM,
2745                                      _.info256, "y">, EVEX_V256;
2746  }
2747}
2748
2749multiclass avx512_fp_fpclass_all<string OpcodeStr, bits<8> opcVec,
2750                                 bits<8> opcScalar, X86SchedWriteWidths sched,
2751                                 Predicate prd> {
2752  defm PS : avx512_vector_fpclass_all<OpcodeStr,  avx512vl_f32_info, opcVec,
2753                                      sched, prd>,
2754                                      EVEX_CD8<32, CD8VF>;
2755  defm PD : avx512_vector_fpclass_all<OpcodeStr,  avx512vl_f64_info, opcVec,
2756                                      sched, prd>,
2757                                      EVEX_CD8<64, CD8VF> , VEX_W;
2758  defm SSZ : avx512_scalar_fpclass<opcScalar, OpcodeStr,
2759                                   sched.Scl, f32x_info, prd>, VEX_LIG,
2760                                   EVEX_CD8<32, CD8VT1>;
2761  defm SDZ : avx512_scalar_fpclass<opcScalar, OpcodeStr,
2762                                   sched.Scl, f64x_info, prd>, VEX_LIG,
2763                                   EVEX_CD8<64, CD8VT1>, VEX_W;
2764}
2765
2766defm VFPCLASS : avx512_fp_fpclass_all<"vfpclass", 0x66, 0x67, SchedWriteFCmp,
2767                                      HasDQI>, AVX512AIi8Base, EVEX;
2768
2769//-----------------------------------------------------------------
2770// Mask register copy, including
2771// - copy between mask registers
2772// - load/store mask registers
2773// - copy from GPR to mask register and vice versa
2774//
2775multiclass avx512_mask_mov<bits<8> opc_kk, bits<8> opc_km, bits<8> opc_mk,
2776                         string OpcodeStr, RegisterClass KRC,
2777                         ValueType vvt, X86MemOperand x86memop> {
2778  let isMoveReg = 1, hasSideEffects = 0, SchedRW = [WriteMove] in
2779  def kk : I<opc_kk, MRMSrcReg, (outs KRC:$dst), (ins KRC:$src),
2780             !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), []>,
2781             Sched<[WriteMove]>;
2782  def km : I<opc_km, MRMSrcMem, (outs KRC:$dst), (ins x86memop:$src),
2783             !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
2784             [(set KRC:$dst, (vvt (load addr:$src)))]>,
2785             Sched<[WriteLoad]>;
2786  def mk : I<opc_mk, MRMDestMem, (outs), (ins x86memop:$dst, KRC:$src),
2787             !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
2788             [(store KRC:$src, addr:$dst)]>,
2789             Sched<[WriteStore]>;
2790}
2791
2792multiclass avx512_mask_mov_gpr<bits<8> opc_kr, bits<8> opc_rk,
2793                             string OpcodeStr,
2794                             RegisterClass KRC, RegisterClass GRC> {
2795  let hasSideEffects = 0 in {
2796    def kr : I<opc_kr, MRMSrcReg, (outs KRC:$dst), (ins GRC:$src),
2797               !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), []>,
2798               Sched<[WriteMove]>;
2799    def rk : I<opc_rk, MRMSrcReg, (outs GRC:$dst), (ins KRC:$src),
2800               !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), []>,
2801               Sched<[WriteMove]>;
2802  }
2803}
2804
2805let Predicates = [HasDQI] in
2806  defm KMOVB : avx512_mask_mov<0x90, 0x90, 0x91, "kmovb", VK8, v8i1, i8mem>,
2807               avx512_mask_mov_gpr<0x92, 0x93, "kmovb", VK8, GR32>,
2808               VEX, PD;
2809
2810let Predicates = [HasAVX512] in
2811  defm KMOVW : avx512_mask_mov<0x90, 0x90, 0x91, "kmovw", VK16, v16i1, i16mem>,
2812               avx512_mask_mov_gpr<0x92, 0x93, "kmovw", VK16, GR32>,
2813               VEX, PS;
2814
2815let Predicates = [HasBWI] in {
2816  defm KMOVD : avx512_mask_mov<0x90, 0x90, 0x91, "kmovd", VK32, v32i1,i32mem>,
2817               VEX, PD, VEX_W;
2818  defm KMOVD : avx512_mask_mov_gpr<0x92, 0x93, "kmovd", VK32, GR32>,
2819               VEX, XD;
2820  defm KMOVQ : avx512_mask_mov<0x90, 0x90, 0x91, "kmovq", VK64, v64i1, i64mem>,
2821               VEX, PS, VEX_W;
2822  defm KMOVQ : avx512_mask_mov_gpr<0x92, 0x93, "kmovq", VK64, GR64>,
2823               VEX, XD, VEX_W;
2824}
2825
2826// GR from/to mask register
2827def : Pat<(v16i1 (bitconvert (i16 GR16:$src))),
2828          (COPY_TO_REGCLASS (i32 (INSERT_SUBREG (IMPLICIT_DEF), GR16:$src, sub_16bit)), VK16)>;
2829def : Pat<(i16 (bitconvert (v16i1 VK16:$src))),
2830          (EXTRACT_SUBREG (i32 (COPY_TO_REGCLASS VK16:$src, GR32)), sub_16bit)>;
2831
2832def : Pat<(v8i1 (bitconvert (i8 GR8:$src))),
2833          (COPY_TO_REGCLASS (i32 (INSERT_SUBREG (IMPLICIT_DEF), GR8:$src, sub_8bit)), VK8)>;
2834def : Pat<(i8 (bitconvert (v8i1 VK8:$src))),
2835          (EXTRACT_SUBREG (i32 (COPY_TO_REGCLASS VK8:$src, GR32)), sub_8bit)>;
2836
2837def : Pat<(i32 (zext (i16 (bitconvert (v16i1 VK16:$src))))),
2838          (KMOVWrk VK16:$src)>;
2839def : Pat<(i32 (anyext (i16 (bitconvert (v16i1 VK16:$src))))),
2840          (COPY_TO_REGCLASS VK16:$src, GR32)>;
2841
2842def : Pat<(i32 (zext (i8 (bitconvert (v8i1 VK8:$src))))),
2843          (KMOVBrk VK8:$src)>, Requires<[HasDQI]>;
2844def : Pat<(i32 (anyext (i8 (bitconvert (v8i1 VK8:$src))))),
2845          (COPY_TO_REGCLASS VK8:$src, GR32)>;
2846
2847def : Pat<(v32i1 (bitconvert (i32 GR32:$src))),
2848          (COPY_TO_REGCLASS GR32:$src, VK32)>;
2849def : Pat<(i32 (bitconvert (v32i1 VK32:$src))),
2850          (COPY_TO_REGCLASS VK32:$src, GR32)>;
2851def : Pat<(v64i1 (bitconvert (i64 GR64:$src))),
2852          (COPY_TO_REGCLASS GR64:$src, VK64)>;
2853def : Pat<(i64 (bitconvert (v64i1 VK64:$src))),
2854          (COPY_TO_REGCLASS VK64:$src, GR64)>;
2855
2856// Load/store kreg
2857let Predicates = [HasDQI] in {
2858  def : Pat<(store VK1:$src, addr:$dst),
2859            (KMOVBmk addr:$dst, (COPY_TO_REGCLASS VK1:$src, VK8))>;
2860
2861  def : Pat<(v1i1 (load addr:$src)),
2862            (COPY_TO_REGCLASS (KMOVBkm addr:$src), VK1)>;
2863  def : Pat<(v2i1 (load addr:$src)),
2864            (COPY_TO_REGCLASS (KMOVBkm addr:$src), VK2)>;
2865  def : Pat<(v4i1 (load addr:$src)),
2866            (COPY_TO_REGCLASS (KMOVBkm addr:$src), VK4)>;
2867}
2868
2869let Predicates = [HasAVX512] in {
2870  def : Pat<(v8i1 (bitconvert (i8 (load addr:$src)))),
2871            (COPY_TO_REGCLASS (MOVZX32rm8 addr:$src), VK8)>;
2872  def : Pat<(v16i1 (bitconvert (loadi16 addr:$src))),
2873            (KMOVWkm addr:$src)>;
2874}
2875
2876def X86kextract : SDNode<"ISD::EXTRACT_VECTOR_ELT",
2877                         SDTypeProfile<1, 2, [SDTCisVT<0, i8>,
2878                                              SDTCVecEltisVT<1, i1>,
2879                                              SDTCisPtrTy<2>]>>;
2880
2881let Predicates = [HasAVX512] in {
2882  multiclass operation_gpr_mask_copy_lowering<RegisterClass maskRC, ValueType maskVT> {
2883    def : Pat<(maskVT (scalar_to_vector GR32:$src)),
2884              (COPY_TO_REGCLASS GR32:$src, maskRC)>;
2885
2886    def : Pat<(maskVT (scalar_to_vector GR8:$src)),
2887              (COPY_TO_REGCLASS (INSERT_SUBREG (i32 (IMPLICIT_DEF)), GR8:$src, sub_8bit), maskRC)>;
2888
2889    def : Pat<(i8 (X86kextract maskRC:$src, (iPTR 0))),
2890              (EXTRACT_SUBREG (i32 (COPY_TO_REGCLASS maskRC:$src, GR32)), sub_8bit)>;
2891
2892    def : Pat<(i32 (anyext (i8 (X86kextract maskRC:$src, (iPTR 0))))),
2893              (i32 (COPY_TO_REGCLASS maskRC:$src, GR32))>;
2894  }
2895
2896  defm : operation_gpr_mask_copy_lowering<VK1,  v1i1>;
2897  defm : operation_gpr_mask_copy_lowering<VK2,  v2i1>;
2898  defm : operation_gpr_mask_copy_lowering<VK4,  v4i1>;
2899  defm : operation_gpr_mask_copy_lowering<VK8,  v8i1>;
2900  defm : operation_gpr_mask_copy_lowering<VK16,  v16i1>;
2901  defm : operation_gpr_mask_copy_lowering<VK32,  v32i1>;
2902  defm : operation_gpr_mask_copy_lowering<VK64,  v64i1>;
2903
2904  def : Pat<(insert_subvector (v16i1 immAllZerosV),
2905                              (v1i1 (scalar_to_vector GR8:$src)), (iPTR 0)),
2906            (COPY_TO_REGCLASS
2907             (KMOVWkr (AND32ri8
2908                       (INSERT_SUBREG (i32 (IMPLICIT_DEF)), GR8:$src, sub_8bit),
2909                       (i32 1))), VK16)>;
2910}
2911
2912// Mask unary operation
2913// - KNOT
2914multiclass avx512_mask_unop<bits<8> opc, string OpcodeStr,
2915                            RegisterClass KRC, SDPatternOperator OpNode,
2916                            X86FoldableSchedWrite sched, Predicate prd> {
2917  let Predicates = [prd] in
2918    def rr : I<opc, MRMSrcReg, (outs KRC:$dst), (ins KRC:$src),
2919               !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
2920               [(set KRC:$dst, (OpNode KRC:$src))]>,
2921               Sched<[sched]>;
2922}
2923
2924multiclass avx512_mask_unop_all<bits<8> opc, string OpcodeStr,
2925                                SDPatternOperator OpNode,
2926                                X86FoldableSchedWrite sched> {
2927  defm B : avx512_mask_unop<opc, !strconcat(OpcodeStr, "b"), VK8, OpNode,
2928                            sched, HasDQI>, VEX, PD;
2929  defm W : avx512_mask_unop<opc, !strconcat(OpcodeStr, "w"), VK16, OpNode,
2930                            sched, HasAVX512>, VEX, PS;
2931  defm D : avx512_mask_unop<opc, !strconcat(OpcodeStr, "d"), VK32, OpNode,
2932                            sched, HasBWI>, VEX, PD, VEX_W;
2933  defm Q : avx512_mask_unop<opc, !strconcat(OpcodeStr, "q"), VK64, OpNode,
2934                            sched, HasBWI>, VEX, PS, VEX_W;
2935}
2936
2937// TODO - do we need a X86SchedWriteWidths::KMASK type?
2938defm KNOT : avx512_mask_unop_all<0x44, "knot", vnot, SchedWriteVecLogic.XMM>;
2939
2940// KNL does not support KMOVB, 8-bit mask is promoted to 16-bit
2941let Predicates = [HasAVX512, NoDQI] in
2942def : Pat<(vnot VK8:$src),
2943          (COPY_TO_REGCLASS (KNOTWrr (COPY_TO_REGCLASS VK8:$src, VK16)), VK8)>;
2944
2945def : Pat<(vnot VK4:$src),
2946          (COPY_TO_REGCLASS (KNOTWrr (COPY_TO_REGCLASS VK4:$src, VK16)), VK4)>;
2947def : Pat<(vnot VK2:$src),
2948          (COPY_TO_REGCLASS (KNOTWrr (COPY_TO_REGCLASS VK2:$src, VK16)), VK2)>;
2949
2950// Mask binary operation
2951// - KAND, KANDN, KOR, KXNOR, KXOR
2952multiclass avx512_mask_binop<bits<8> opc, string OpcodeStr,
2953                           RegisterClass KRC, SDPatternOperator OpNode,
2954                           X86FoldableSchedWrite sched, Predicate prd,
2955                           bit IsCommutable> {
2956  let Predicates = [prd], isCommutable = IsCommutable in
2957    def rr : I<opc, MRMSrcReg, (outs KRC:$dst), (ins KRC:$src1, KRC:$src2),
2958               !strconcat(OpcodeStr,
2959                          "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
2960               [(set KRC:$dst, (OpNode KRC:$src1, KRC:$src2))]>,
2961               Sched<[sched]>;
2962}
2963
2964multiclass avx512_mask_binop_all<bits<8> opc, string OpcodeStr,
2965                                 SDPatternOperator OpNode,
2966                                 X86FoldableSchedWrite sched, bit IsCommutable,
2967                                 Predicate prdW = HasAVX512> {
2968  defm B : avx512_mask_binop<opc, !strconcat(OpcodeStr, "b"), VK8, OpNode,
2969                             sched, HasDQI, IsCommutable>, VEX_4V, VEX_L, PD;
2970  defm W : avx512_mask_binop<opc, !strconcat(OpcodeStr, "w"), VK16, OpNode,
2971                             sched, prdW, IsCommutable>, VEX_4V, VEX_L, PS;
2972  defm D : avx512_mask_binop<opc, !strconcat(OpcodeStr, "d"), VK32, OpNode,
2973                             sched, HasBWI, IsCommutable>, VEX_4V, VEX_L, VEX_W, PD;
2974  defm Q : avx512_mask_binop<opc, !strconcat(OpcodeStr, "q"), VK64, OpNode,
2975                             sched, HasBWI, IsCommutable>, VEX_4V, VEX_L, VEX_W, PS;
2976}
2977
2978def andn : PatFrag<(ops node:$i0, node:$i1), (and (not node:$i0), node:$i1)>;
2979def xnor : PatFrag<(ops node:$i0, node:$i1), (not (xor node:$i0, node:$i1))>;
2980// These nodes use 'vnot' instead of 'not' to support vectors.
2981def vandn : PatFrag<(ops node:$i0, node:$i1), (and (vnot node:$i0), node:$i1)>;
2982def vxnor : PatFrag<(ops node:$i0, node:$i1), (vnot (xor node:$i0, node:$i1))>;
2983
2984// TODO - do we need a X86SchedWriteWidths::KMASK type?
2985defm KAND  : avx512_mask_binop_all<0x41, "kand",  and,     SchedWriteVecLogic.XMM, 1>;
2986defm KOR   : avx512_mask_binop_all<0x45, "kor",   or,      SchedWriteVecLogic.XMM, 1>;
2987defm KXNOR : avx512_mask_binop_all<0x46, "kxnor", vxnor,   SchedWriteVecLogic.XMM, 1>;
2988defm KXOR  : avx512_mask_binop_all<0x47, "kxor",  xor,     SchedWriteVecLogic.XMM, 1>;
2989defm KANDN : avx512_mask_binop_all<0x42, "kandn", vandn,   SchedWriteVecLogic.XMM, 0>;
2990defm KADD  : avx512_mask_binop_all<0x4A, "kadd",  X86kadd, SchedWriteVecLogic.XMM, 1, HasDQI>;
2991
2992multiclass avx512_binop_pat<SDPatternOperator VOpNode, SDPatternOperator OpNode,
2993                            Instruction Inst> {
2994  // With AVX512F, 8-bit mask is promoted to 16-bit mask,
2995  // for the DQI set, this type is legal and KxxxB instruction is used
2996  let Predicates = [NoDQI] in
2997  def : Pat<(VOpNode VK8:$src1, VK8:$src2),
2998            (COPY_TO_REGCLASS
2999              (Inst (COPY_TO_REGCLASS VK8:$src1, VK16),
3000                    (COPY_TO_REGCLASS VK8:$src2, VK16)), VK8)>;
3001
3002  // All types smaller than 8 bits require conversion anyway
3003  def : Pat<(OpNode VK1:$src1, VK1:$src2),
3004        (COPY_TO_REGCLASS (Inst
3005                           (COPY_TO_REGCLASS VK1:$src1, VK16),
3006                           (COPY_TO_REGCLASS VK1:$src2, VK16)), VK1)>;
3007  def : Pat<(VOpNode VK2:$src1, VK2:$src2),
3008        (COPY_TO_REGCLASS (Inst
3009                           (COPY_TO_REGCLASS VK2:$src1, VK16),
3010                           (COPY_TO_REGCLASS VK2:$src2, VK16)), VK1)>;
3011  def : Pat<(VOpNode VK4:$src1, VK4:$src2),
3012        (COPY_TO_REGCLASS (Inst
3013                           (COPY_TO_REGCLASS VK4:$src1, VK16),
3014                           (COPY_TO_REGCLASS VK4:$src2, VK16)), VK1)>;
3015}
3016
3017defm : avx512_binop_pat<and,   and,  KANDWrr>;
3018defm : avx512_binop_pat<vandn, andn, KANDNWrr>;
3019defm : avx512_binop_pat<or,    or,   KORWrr>;
3020defm : avx512_binop_pat<vxnor, xnor, KXNORWrr>;
3021defm : avx512_binop_pat<xor,   xor,  KXORWrr>;
3022
3023// Mask unpacking
3024multiclass avx512_mask_unpck<string Suffix, X86KVectorVTInfo Dst,
3025                             X86KVectorVTInfo Src, X86FoldableSchedWrite sched,
3026                             Predicate prd> {
3027  let Predicates = [prd] in {
3028    let hasSideEffects = 0 in
3029    def rr : I<0x4b, MRMSrcReg, (outs Dst.KRC:$dst),
3030               (ins Src.KRC:$src1, Src.KRC:$src2),
3031               "kunpck"#Suffix#"\t{$src2, $src1, $dst|$dst, $src1, $src2}", []>,
3032               VEX_4V, VEX_L, Sched<[sched]>;
3033
3034    def : Pat<(Dst.KVT (concat_vectors Src.KRC:$src1, Src.KRC:$src2)),
3035              (!cast<Instruction>(NAME##rr) Src.KRC:$src2, Src.KRC:$src1)>;
3036  }
3037}
3038
3039defm KUNPCKBW : avx512_mask_unpck<"bw", v16i1_info, v8i1_info,  WriteShuffle, HasAVX512>, PD;
3040defm KUNPCKWD : avx512_mask_unpck<"wd", v32i1_info, v16i1_info, WriteShuffle, HasBWI>, PS;
3041defm KUNPCKDQ : avx512_mask_unpck<"dq", v64i1_info, v32i1_info, WriteShuffle, HasBWI>, PS, VEX_W;
3042
3043// Mask bit testing
3044multiclass avx512_mask_testop<bits<8> opc, string OpcodeStr, RegisterClass KRC,
3045                              SDNode OpNode, X86FoldableSchedWrite sched,
3046                              Predicate prd> {
3047  let Predicates = [prd], Defs = [EFLAGS] in
3048    def rr : I<opc, MRMSrcReg, (outs), (ins KRC:$src1, KRC:$src2),
3049               !strconcat(OpcodeStr, "\t{$src2, $src1|$src1, $src2}"),
3050               [(set EFLAGS, (OpNode KRC:$src1, KRC:$src2))]>,
3051               Sched<[sched]>;
3052}
3053
3054multiclass avx512_mask_testop_w<bits<8> opc, string OpcodeStr, SDNode OpNode,
3055                                X86FoldableSchedWrite sched,
3056                                Predicate prdW = HasAVX512> {
3057  defm B : avx512_mask_testop<opc, OpcodeStr#"b", VK8, OpNode, sched, HasDQI>,
3058                                                                VEX, PD;
3059  defm W : avx512_mask_testop<opc, OpcodeStr#"w", VK16, OpNode, sched, prdW>,
3060                                                                VEX, PS;
3061  defm Q : avx512_mask_testop<opc, OpcodeStr#"q", VK64, OpNode, sched, HasBWI>,
3062                                                                VEX, PS, VEX_W;
3063  defm D : avx512_mask_testop<opc, OpcodeStr#"d", VK32, OpNode, sched, HasBWI>,
3064                                                                VEX, PD, VEX_W;
3065}
3066
3067// TODO - do we need a X86SchedWriteWidths::KMASK type?
3068defm KORTEST : avx512_mask_testop_w<0x98, "kortest", X86kortest, SchedWriteVecLogic.XMM>;
3069defm KTEST   : avx512_mask_testop_w<0x99, "ktest", X86ktest, SchedWriteVecLogic.XMM, HasDQI>;
3070
3071// Mask shift
3072multiclass avx512_mask_shiftop<bits<8> opc, string OpcodeStr, RegisterClass KRC,
3073                               SDNode OpNode, X86FoldableSchedWrite sched> {
3074  let Predicates = [HasAVX512] in
3075    def ri : Ii8<opc, MRMSrcReg, (outs KRC:$dst), (ins KRC:$src, u8imm:$imm),
3076                 !strconcat(OpcodeStr,
3077                            "\t{$imm, $src, $dst|$dst, $src, $imm}"),
3078                            [(set KRC:$dst, (OpNode KRC:$src, (i8 imm:$imm)))]>,
3079                 Sched<[sched]>;
3080}
3081
3082multiclass avx512_mask_shiftop_w<bits<8> opc1, bits<8> opc2, string OpcodeStr,
3083                                 SDNode OpNode, X86FoldableSchedWrite sched> {
3084  defm W : avx512_mask_shiftop<opc1, !strconcat(OpcodeStr, "w"), VK16, OpNode,
3085                               sched>, VEX, TAPD, VEX_W;
3086  let Predicates = [HasDQI] in
3087  defm B : avx512_mask_shiftop<opc1, !strconcat(OpcodeStr, "b"), VK8, OpNode,
3088                               sched>, VEX, TAPD;
3089  let Predicates = [HasBWI] in {
3090  defm Q : avx512_mask_shiftop<opc2, !strconcat(OpcodeStr, "q"), VK64, OpNode,
3091                               sched>, VEX, TAPD, VEX_W;
3092  defm D : avx512_mask_shiftop<opc2, !strconcat(OpcodeStr, "d"), VK32, OpNode,
3093                               sched>, VEX, TAPD;
3094  }
3095}
3096
3097defm KSHIFTL : avx512_mask_shiftop_w<0x32, 0x33, "kshiftl", X86kshiftl, WriteShuffle>;
3098defm KSHIFTR : avx512_mask_shiftop_w<0x30, 0x31, "kshiftr", X86kshiftr, WriteShuffle>;
3099
3100// Patterns for comparing 128/256-bit integer vectors using 512-bit instruction.
3101multiclass axv512_icmp_packed_no_vlx_lowering<PatFrag Frag, PatFrag Frag_su,
3102                                              string InstStr,
3103                                              X86VectorVTInfo Narrow,
3104                                              X86VectorVTInfo Wide> {
3105  def : Pat<(Narrow.KVT (Frag (Narrow.VT Narrow.RC:$src1),
3106                              (Narrow.VT Narrow.RC:$src2))),
3107          (COPY_TO_REGCLASS
3108           (!cast<Instruction>(InstStr#"Zrr")
3109            (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src1, Narrow.SubRegIdx)),
3110            (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src2, Narrow.SubRegIdx))),
3111           Narrow.KRC)>;
3112
3113  def : Pat<(Narrow.KVT (and Narrow.KRC:$mask,
3114                             (Frag_su (Narrow.VT Narrow.RC:$src1),
3115                                      (Narrow.VT Narrow.RC:$src2)))),
3116          (COPY_TO_REGCLASS
3117           (!cast<Instruction>(InstStr#"Zrrk")
3118            (COPY_TO_REGCLASS Narrow.KRC:$mask, Wide.KRC),
3119            (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src1, Narrow.SubRegIdx)),
3120            (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src2, Narrow.SubRegIdx))),
3121           Narrow.KRC)>;
3122}
3123
3124// Patterns for comparing 128/256-bit integer vectors using 512-bit instruction.
3125multiclass axv512_icmp_packed_cc_no_vlx_lowering<PatFrag Frag, PatFrag Frag_su,
3126                                                 string InstStr,
3127                                                 X86VectorVTInfo Narrow,
3128                                                 X86VectorVTInfo Wide> {
3129def : Pat<(Narrow.KVT (Frag:$cc (Narrow.VT Narrow.RC:$src1),
3130                                (Narrow.VT Narrow.RC:$src2), cond)),
3131          (COPY_TO_REGCLASS
3132           (!cast<Instruction>(InstStr##Zrri)
3133            (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src1, Narrow.SubRegIdx)),
3134            (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src2, Narrow.SubRegIdx)),
3135            (Frag.OperandTransform $cc)), Narrow.KRC)>;
3136
3137def : Pat<(Narrow.KVT (and Narrow.KRC:$mask,
3138                           (Narrow.KVT (Frag_su:$cc (Narrow.VT Narrow.RC:$src1),
3139                                                    (Narrow.VT Narrow.RC:$src2),
3140                                                    cond)))),
3141          (COPY_TO_REGCLASS (!cast<Instruction>(InstStr##Zrrik)
3142           (COPY_TO_REGCLASS Narrow.KRC:$mask, Wide.KRC),
3143           (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src1, Narrow.SubRegIdx)),
3144           (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src2, Narrow.SubRegIdx)),
3145           (Frag.OperandTransform $cc)), Narrow.KRC)>;
3146}
3147
3148// Same as above, but for fp types which don't use PatFrags.
3149multiclass axv512_cmp_packed_cc_no_vlx_lowering<SDNode OpNode, PatFrag OpNode_su,
3150                                                string InstStr,
3151                                                X86VectorVTInfo Narrow,
3152                                                X86VectorVTInfo Wide> {
3153def : Pat<(Narrow.KVT (OpNode (Narrow.VT Narrow.RC:$src1),
3154                              (Narrow.VT Narrow.RC:$src2), imm:$cc)),
3155          (COPY_TO_REGCLASS
3156           (!cast<Instruction>(InstStr##Zrri)
3157            (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src1, Narrow.SubRegIdx)),
3158            (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src2, Narrow.SubRegIdx)),
3159            imm:$cc), Narrow.KRC)>;
3160
3161def : Pat<(Narrow.KVT (and Narrow.KRC:$mask,
3162                           (OpNode_su (Narrow.VT Narrow.RC:$src1),
3163                                      (Narrow.VT Narrow.RC:$src2), imm:$cc))),
3164          (COPY_TO_REGCLASS (!cast<Instruction>(InstStr##Zrrik)
3165           (COPY_TO_REGCLASS Narrow.KRC:$mask, Wide.KRC),
3166           (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src1, Narrow.SubRegIdx)),
3167           (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src2, Narrow.SubRegIdx)),
3168           imm:$cc), Narrow.KRC)>;
3169}
3170
3171let Predicates = [HasAVX512, NoVLX] in {
3172  // AddedComplexity is needed because the explicit SETEQ/SETGT CondCode doesn't
3173  // increase the pattern complexity the way an immediate would.
3174  let AddedComplexity = 2 in {
3175  defm : axv512_icmp_packed_no_vlx_lowering<X86pcmpgtm, X86pcmpgtm_su, "VPCMPGTD", v8i32x_info, v16i32_info>;
3176  defm : axv512_icmp_packed_no_vlx_lowering<X86pcmpeqm_c, X86pcmpeqm_c_su, "VPCMPEQD", v8i32x_info, v16i32_info>;
3177
3178  defm : axv512_icmp_packed_no_vlx_lowering<X86pcmpgtm, X86pcmpgtm_su, "VPCMPGTD", v4i32x_info, v16i32_info>;
3179  defm : axv512_icmp_packed_no_vlx_lowering<X86pcmpeqm_c, X86pcmpeqm_c_su, "VPCMPEQD", v4i32x_info, v16i32_info>;
3180
3181  defm : axv512_icmp_packed_no_vlx_lowering<X86pcmpgtm, X86pcmpgtm_su, "VPCMPGTQ", v4i64x_info, v8i64_info>;
3182  defm : axv512_icmp_packed_no_vlx_lowering<X86pcmpeqm_c, X86pcmpeqm_c_su, "VPCMPEQQ", v4i64x_info, v8i64_info>;
3183
3184  defm : axv512_icmp_packed_no_vlx_lowering<X86pcmpgtm, X86pcmpgtm_su, "VPCMPGTQ", v2i64x_info, v8i64_info>;
3185  defm : axv512_icmp_packed_no_vlx_lowering<X86pcmpeqm_c, X86pcmpeqm_c_su, "VPCMPEQQ", v2i64x_info, v8i64_info>;
3186  }
3187
3188  defm : axv512_icmp_packed_cc_no_vlx_lowering<X86pcmpm, X86pcmpm_su, "VPCMPD", v8i32x_info, v16i32_info>;
3189  defm : axv512_icmp_packed_cc_no_vlx_lowering<X86pcmpum, X86pcmpum_su, "VPCMPUD", v8i32x_info, v16i32_info>;
3190
3191  defm : axv512_icmp_packed_cc_no_vlx_lowering<X86pcmpm, X86pcmpm_su, "VPCMPD", v4i32x_info, v16i32_info>;
3192  defm : axv512_icmp_packed_cc_no_vlx_lowering<X86pcmpum, X86pcmpum_su, "VPCMPUD", v4i32x_info, v16i32_info>;
3193
3194  defm : axv512_icmp_packed_cc_no_vlx_lowering<X86pcmpm, X86pcmpm_su, "VPCMPQ", v4i64x_info, v8i64_info>;
3195  defm : axv512_icmp_packed_cc_no_vlx_lowering<X86pcmpum, X86pcmpum_su, "VPCMPUQ", v4i64x_info, v8i64_info>;
3196
3197  defm : axv512_icmp_packed_cc_no_vlx_lowering<X86pcmpm, X86pcmpm_su, "VPCMPQ", v2i64x_info, v8i64_info>;
3198  defm : axv512_icmp_packed_cc_no_vlx_lowering<X86pcmpum, X86pcmpum_su, "VPCMPUQ", v2i64x_info, v8i64_info>;
3199
3200  defm : axv512_cmp_packed_cc_no_vlx_lowering<X86cmpm, X86cmpm_su, "VCMPPS", v8f32x_info, v16f32_info>;
3201  defm : axv512_cmp_packed_cc_no_vlx_lowering<X86cmpm, X86cmpm_su, "VCMPPS", v4f32x_info, v16f32_info>;
3202  defm : axv512_cmp_packed_cc_no_vlx_lowering<X86cmpm, X86cmpm_su, "VCMPPD", v4f64x_info, v8f64_info>;
3203  defm : axv512_cmp_packed_cc_no_vlx_lowering<X86cmpm, X86cmpm_su, "VCMPPD", v2f64x_info, v8f64_info>;
3204}
3205
3206let Predicates = [HasBWI, NoVLX] in {
3207  // AddedComplexity is needed because the explicit SETEQ/SETGT CondCode doesn't
3208  // increase the pattern complexity the way an immediate would.
3209  let AddedComplexity = 2 in {
3210  defm : axv512_icmp_packed_no_vlx_lowering<X86pcmpgtm, X86pcmpgtm_su, "VPCMPGTB", v32i8x_info, v64i8_info>;
3211  defm : axv512_icmp_packed_no_vlx_lowering<X86pcmpeqm_c, X86pcmpeqm_c_su, "VPCMPEQB", v32i8x_info, v64i8_info>;
3212
3213  defm : axv512_icmp_packed_no_vlx_lowering<X86pcmpgtm, X86pcmpgtm_su, "VPCMPGTB", v16i8x_info, v64i8_info>;
3214  defm : axv512_icmp_packed_no_vlx_lowering<X86pcmpeqm_c, X86pcmpeqm_c_su, "VPCMPEQB", v16i8x_info, v64i8_info>;
3215
3216  defm : axv512_icmp_packed_no_vlx_lowering<X86pcmpgtm, X86pcmpgtm_su, "VPCMPGTW", v16i16x_info, v32i16_info>;
3217  defm : axv512_icmp_packed_no_vlx_lowering<X86pcmpeqm_c, X86pcmpeqm_c_su, "VPCMPEQW", v16i16x_info, v32i16_info>;
3218
3219  defm : axv512_icmp_packed_no_vlx_lowering<X86pcmpgtm, X86pcmpgtm_su, "VPCMPGTW", v8i16x_info, v32i16_info>;
3220  defm : axv512_icmp_packed_no_vlx_lowering<X86pcmpeqm_c, X86pcmpeqm_c_su, "VPCMPEQW", v8i16x_info, v32i16_info>;
3221  }
3222
3223  defm : axv512_icmp_packed_cc_no_vlx_lowering<X86pcmpm, X86pcmpm_su, "VPCMPB", v32i8x_info, v64i8_info>;
3224  defm : axv512_icmp_packed_cc_no_vlx_lowering<X86pcmpum, X86pcmpum_su, "VPCMPUB", v32i8x_info, v64i8_info>;
3225
3226  defm : axv512_icmp_packed_cc_no_vlx_lowering<X86pcmpm, X86pcmpm_su, "VPCMPB", v16i8x_info, v64i8_info>;
3227  defm : axv512_icmp_packed_cc_no_vlx_lowering<X86pcmpum, X86pcmpum_su, "VPCMPUB", v16i8x_info, v64i8_info>;
3228
3229  defm : axv512_icmp_packed_cc_no_vlx_lowering<X86pcmpm, X86pcmpm_su, "VPCMPW", v16i16x_info, v32i16_info>;
3230  defm : axv512_icmp_packed_cc_no_vlx_lowering<X86pcmpum, X86pcmpum_su, "VPCMPUW", v16i16x_info, v32i16_info>;
3231
3232  defm : axv512_icmp_packed_cc_no_vlx_lowering<X86pcmpm, X86pcmpm_su, "VPCMPW", v8i16x_info, v32i16_info>;
3233  defm : axv512_icmp_packed_cc_no_vlx_lowering<X86pcmpum, X86pcmpum_su, "VPCMPUW", v8i16x_info, v32i16_info>;
3234}
3235
3236// Mask setting all 0s or 1s
3237multiclass avx512_mask_setop<RegisterClass KRC, ValueType VT, PatFrag Val> {
3238  let Predicates = [HasAVX512] in
3239    let isReMaterializable = 1, isAsCheapAsAMove = 1, isPseudo = 1,
3240        SchedRW = [WriteZero] in
3241      def #NAME# : I<0, Pseudo, (outs KRC:$dst), (ins), "",
3242                     [(set KRC:$dst, (VT Val))]>;
3243}
3244
3245multiclass avx512_mask_setop_w<PatFrag Val> {
3246  defm W : avx512_mask_setop<VK16, v16i1, Val>;
3247  defm D : avx512_mask_setop<VK32,  v32i1, Val>;
3248  defm Q : avx512_mask_setop<VK64, v64i1, Val>;
3249}
3250
3251defm KSET0 : avx512_mask_setop_w<immAllZerosV>;
3252defm KSET1 : avx512_mask_setop_w<immAllOnesV>;
3253
3254// With AVX-512 only, 8-bit mask is promoted to 16-bit mask.
3255let Predicates = [HasAVX512] in {
3256  def : Pat<(v8i1 immAllZerosV), (COPY_TO_REGCLASS (KSET0W), VK8)>;
3257  def : Pat<(v4i1 immAllZerosV), (COPY_TO_REGCLASS (KSET0W), VK4)>;
3258  def : Pat<(v2i1 immAllZerosV), (COPY_TO_REGCLASS (KSET0W), VK2)>;
3259  def : Pat<(v1i1 immAllZerosV), (COPY_TO_REGCLASS (KSET0W), VK1)>;
3260  def : Pat<(v8i1 immAllOnesV),  (COPY_TO_REGCLASS (KSET1W), VK8)>;
3261  def : Pat<(v4i1 immAllOnesV),  (COPY_TO_REGCLASS (KSET1W), VK4)>;
3262  def : Pat<(v2i1 immAllOnesV),  (COPY_TO_REGCLASS (KSET1W), VK2)>;
3263  def : Pat<(v1i1 immAllOnesV),  (COPY_TO_REGCLASS (KSET1W), VK1)>;
3264}
3265
3266// Patterns for kmask insert_subvector/extract_subvector to/from index=0
3267multiclass operation_subvector_mask_lowering<RegisterClass subRC, ValueType subVT,
3268                                             RegisterClass RC, ValueType VT> {
3269  def : Pat<(subVT (extract_subvector (VT RC:$src), (iPTR 0))),
3270            (subVT (COPY_TO_REGCLASS RC:$src, subRC))>;
3271
3272  def : Pat<(VT (insert_subvector undef, subRC:$src, (iPTR 0))),
3273            (VT (COPY_TO_REGCLASS subRC:$src, RC))>;
3274}
3275defm : operation_subvector_mask_lowering<VK1,  v1i1,  VK2,  v2i1>;
3276defm : operation_subvector_mask_lowering<VK1,  v1i1,  VK4,  v4i1>;
3277defm : operation_subvector_mask_lowering<VK1,  v1i1,  VK8,  v8i1>;
3278defm : operation_subvector_mask_lowering<VK1,  v1i1,  VK16, v16i1>;
3279defm : operation_subvector_mask_lowering<VK1,  v1i1,  VK32, v32i1>;
3280defm : operation_subvector_mask_lowering<VK1,  v1i1,  VK64, v64i1>;
3281
3282defm : operation_subvector_mask_lowering<VK2,  v2i1,  VK4,  v4i1>;
3283defm : operation_subvector_mask_lowering<VK2,  v2i1,  VK8,  v8i1>;
3284defm : operation_subvector_mask_lowering<VK2,  v2i1,  VK16, v16i1>;
3285defm : operation_subvector_mask_lowering<VK2,  v2i1,  VK32, v32i1>;
3286defm : operation_subvector_mask_lowering<VK2,  v2i1,  VK64, v64i1>;
3287
3288defm : operation_subvector_mask_lowering<VK4,  v4i1,  VK8,  v8i1>;
3289defm : operation_subvector_mask_lowering<VK4,  v4i1,  VK16, v16i1>;
3290defm : operation_subvector_mask_lowering<VK4,  v4i1,  VK32, v32i1>;
3291defm : operation_subvector_mask_lowering<VK4,  v4i1,  VK64, v64i1>;
3292
3293defm : operation_subvector_mask_lowering<VK8,  v8i1,  VK16, v16i1>;
3294defm : operation_subvector_mask_lowering<VK8,  v8i1,  VK32, v32i1>;
3295defm : operation_subvector_mask_lowering<VK8,  v8i1,  VK64, v64i1>;
3296
3297defm : operation_subvector_mask_lowering<VK16, v16i1, VK32, v32i1>;
3298defm : operation_subvector_mask_lowering<VK16, v16i1, VK64, v64i1>;
3299
3300defm : operation_subvector_mask_lowering<VK32, v32i1, VK64, v64i1>;
3301
3302//===----------------------------------------------------------------------===//
3303// AVX-512 - Aligned and unaligned load and store
3304//
3305
3306multiclass avx512_load<bits<8> opc, string OpcodeStr, string Name,
3307                       X86VectorVTInfo _, PatFrag ld_frag, PatFrag mload,
3308                       X86SchedWriteMoveLS Sched, string EVEX2VEXOvrd,
3309                       bit NoRMPattern = 0,
3310                       SDPatternOperator SelectOprr = vselect> {
3311  let hasSideEffects = 0 in {
3312  let isMoveReg = 1 in
3313  def rr : AVX512PI<opc, MRMSrcReg, (outs _.RC:$dst), (ins _.RC:$src),
3314                    !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), [],
3315                    _.ExeDomain>, EVEX, Sched<[Sched.RR]>,
3316                    EVEX2VEXOverride<EVEX2VEXOvrd#"rr">;
3317  def rrkz : AVX512PI<opc, MRMSrcReg, (outs _.RC:$dst),
3318                      (ins _.KRCWM:$mask,  _.RC:$src),
3319                      !strconcat(OpcodeStr, "\t{$src, ${dst} {${mask}} {z}|",
3320                       "${dst} {${mask}} {z}, $src}"),
3321                       [(set _.RC:$dst, (_.VT (SelectOprr _.KRCWM:$mask,
3322                                           (_.VT _.RC:$src),
3323                                           _.ImmAllZerosV)))], _.ExeDomain>,
3324                       EVEX, EVEX_KZ, Sched<[Sched.RR]>;
3325
3326  let mayLoad = 1, canFoldAsLoad = 1, isReMaterializable = 1 in
3327  def rm : AVX512PI<opc, MRMSrcMem, (outs _.RC:$dst), (ins _.MemOp:$src),
3328                    !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
3329                    !if(NoRMPattern, [],
3330                        [(set _.RC:$dst,
3331                          (_.VT (ld_frag addr:$src)))]),
3332                    _.ExeDomain>, EVEX, Sched<[Sched.RM]>,
3333                    EVEX2VEXOverride<EVEX2VEXOvrd#"rm">;
3334
3335  let Constraints = "$src0 = $dst", isConvertibleToThreeAddress = 1 in {
3336    def rrk : AVX512PI<opc, MRMSrcReg, (outs _.RC:$dst),
3337                      (ins _.RC:$src0, _.KRCWM:$mask, _.RC:$src1),
3338                      !strconcat(OpcodeStr, "\t{$src1, ${dst} {${mask}}|",
3339                      "${dst} {${mask}}, $src1}"),
3340                      [(set _.RC:$dst, (_.VT (SelectOprr _.KRCWM:$mask,
3341                                          (_.VT _.RC:$src1),
3342                                          (_.VT _.RC:$src0))))], _.ExeDomain>,
3343                       EVEX, EVEX_K, Sched<[Sched.RR]>;
3344    def rmk : AVX512PI<opc, MRMSrcMem, (outs _.RC:$dst),
3345                     (ins _.RC:$src0, _.KRCWM:$mask, _.MemOp:$src1),
3346                     !strconcat(OpcodeStr, "\t{$src1, ${dst} {${mask}}|",
3347                      "${dst} {${mask}}, $src1}"),
3348                     [(set _.RC:$dst, (_.VT
3349                         (vselect _.KRCWM:$mask,
3350                          (_.VT (ld_frag addr:$src1)),
3351                           (_.VT _.RC:$src0))))], _.ExeDomain>,
3352                     EVEX, EVEX_K, Sched<[Sched.RM]>;
3353  }
3354  def rmkz : AVX512PI<opc, MRMSrcMem, (outs _.RC:$dst),
3355                  (ins _.KRCWM:$mask, _.MemOp:$src),
3356                  OpcodeStr #"\t{$src, ${dst} {${mask}} {z}|"#
3357                                "${dst} {${mask}} {z}, $src}",
3358                  [(set _.RC:$dst, (_.VT (vselect _.KRCWM:$mask,
3359                    (_.VT (ld_frag addr:$src)), _.ImmAllZerosV)))],
3360                  _.ExeDomain>, EVEX, EVEX_KZ, Sched<[Sched.RM]>;
3361  }
3362  def : Pat<(_.VT (mload addr:$ptr, _.KRCWM:$mask, undef)),
3363            (!cast<Instruction>(Name#_.ZSuffix##rmkz) _.KRCWM:$mask, addr:$ptr)>;
3364
3365  def : Pat<(_.VT (mload addr:$ptr, _.KRCWM:$mask, _.ImmAllZerosV)),
3366            (!cast<Instruction>(Name#_.ZSuffix##rmkz) _.KRCWM:$mask, addr:$ptr)>;
3367
3368  def : Pat<(_.VT (mload addr:$ptr, _.KRCWM:$mask, (_.VT _.RC:$src0))),
3369            (!cast<Instruction>(Name#_.ZSuffix##rmk) _.RC:$src0,
3370             _.KRCWM:$mask, addr:$ptr)>;
3371}
3372
3373multiclass avx512_alignedload_vl<bits<8> opc, string OpcodeStr,
3374                                 AVX512VLVectorVTInfo _, Predicate prd,
3375                                 X86SchedWriteMoveLSWidths Sched,
3376                                 string EVEX2VEXOvrd, bit NoRMPattern = 0> {
3377  let Predicates = [prd] in
3378  defm Z : avx512_load<opc, OpcodeStr, NAME, _.info512,
3379                       _.info512.AlignedLdFrag, masked_load_aligned,
3380                       Sched.ZMM, "", NoRMPattern>, EVEX_V512;
3381
3382  let Predicates = [prd, HasVLX] in {
3383  defm Z256 : avx512_load<opc, OpcodeStr, NAME, _.info256,
3384                          _.info256.AlignedLdFrag, masked_load_aligned,
3385                          Sched.YMM, EVEX2VEXOvrd#"Y", NoRMPattern>, EVEX_V256;
3386  defm Z128 : avx512_load<opc, OpcodeStr, NAME, _.info128,
3387                          _.info128.AlignedLdFrag, masked_load_aligned,
3388                          Sched.XMM, EVEX2VEXOvrd, NoRMPattern>, EVEX_V128;
3389  }
3390}
3391
3392multiclass avx512_load_vl<bits<8> opc, string OpcodeStr,
3393                          AVX512VLVectorVTInfo _, Predicate prd,
3394                          X86SchedWriteMoveLSWidths Sched,
3395                          string EVEX2VEXOvrd, bit NoRMPattern = 0,
3396                          SDPatternOperator SelectOprr = vselect> {
3397  let Predicates = [prd] in
3398  defm Z : avx512_load<opc, OpcodeStr, NAME, _.info512, _.info512.LdFrag,
3399                       masked_load, Sched.ZMM, "",
3400                       NoRMPattern, SelectOprr>, EVEX_V512;
3401
3402  let Predicates = [prd, HasVLX] in {
3403  defm Z256 : avx512_load<opc, OpcodeStr, NAME, _.info256, _.info256.LdFrag,
3404                         masked_load, Sched.YMM, EVEX2VEXOvrd#"Y",
3405                         NoRMPattern, SelectOprr>, EVEX_V256;
3406  defm Z128 : avx512_load<opc, OpcodeStr, NAME, _.info128, _.info128.LdFrag,
3407                         masked_load, Sched.XMM, EVEX2VEXOvrd,
3408                         NoRMPattern, SelectOprr>, EVEX_V128;
3409  }
3410}
3411
3412multiclass avx512_store<bits<8> opc, string OpcodeStr, string BaseName,
3413                        X86VectorVTInfo _, PatFrag st_frag, PatFrag mstore,
3414                        X86SchedWriteMoveLS Sched, string EVEX2VEXOvrd,
3415                        bit NoMRPattern = 0> {
3416  let hasSideEffects = 0, isCodeGenOnly = 1, ForceDisassemble = 1 in {
3417  let isMoveReg = 1 in
3418  def rr_REV  : AVX512PI<opc, MRMDestReg, (outs _.RC:$dst), (ins _.RC:$src),
3419                         OpcodeStr # "\t{$src, $dst|$dst, $src}",
3420                         [], _.ExeDomain>, EVEX,
3421                         FoldGenData<BaseName#_.ZSuffix#rr>, Sched<[Sched.RR]>,
3422                         EVEX2VEXOverride<EVEX2VEXOvrd#"rr_REV">;
3423  def rrk_REV : AVX512PI<opc, MRMDestReg, (outs  _.RC:$dst),
3424                         (ins _.KRCWM:$mask, _.RC:$src),
3425                         OpcodeStr # "\t{$src, ${dst} {${mask}}|"#
3426                         "${dst} {${mask}}, $src}",
3427                         [], _.ExeDomain>,  EVEX, EVEX_K,
3428                         FoldGenData<BaseName#_.ZSuffix#rrk>,
3429                         Sched<[Sched.RR]>;
3430  def rrkz_REV : AVX512PI<opc, MRMDestReg, (outs  _.RC:$dst),
3431                          (ins _.KRCWM:$mask, _.RC:$src),
3432                          OpcodeStr # "\t{$src, ${dst} {${mask}} {z}|" #
3433                          "${dst} {${mask}} {z}, $src}",
3434                          [], _.ExeDomain>, EVEX, EVEX_KZ,
3435                          FoldGenData<BaseName#_.ZSuffix#rrkz>,
3436                          Sched<[Sched.RR]>;
3437  }
3438
3439  let hasSideEffects = 0, mayStore = 1 in
3440  def mr : AVX512PI<opc, MRMDestMem, (outs), (ins _.MemOp:$dst, _.RC:$src),
3441                    !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
3442                    !if(NoMRPattern, [],
3443                        [(st_frag (_.VT _.RC:$src), addr:$dst)]),
3444                    _.ExeDomain>, EVEX, Sched<[Sched.MR]>,
3445                    EVEX2VEXOverride<EVEX2VEXOvrd#"mr">;
3446  def mrk : AVX512PI<opc, MRMDestMem, (outs),
3447                     (ins _.MemOp:$dst, _.KRCWM:$mask, _.RC:$src),
3448              OpcodeStr # "\t{$src, ${dst} {${mask}}|${dst} {${mask}}, $src}",
3449               [], _.ExeDomain>, EVEX, EVEX_K, Sched<[Sched.MR]>,
3450               NotMemoryFoldable;
3451
3452  def: Pat<(mstore (_.VT _.RC:$src), addr:$ptr, _.KRCWM:$mask),
3453           (!cast<Instruction>(BaseName#_.ZSuffix#mrk) addr:$ptr,
3454                                                        _.KRCWM:$mask, _.RC:$src)>;
3455
3456  def : InstAlias<OpcodeStr#".s\t{$src, $dst|$dst, $src}",
3457                  (!cast<Instruction>(BaseName#_.ZSuffix#"rr_REV")
3458                   _.RC:$dst, _.RC:$src), 0>;
3459  def : InstAlias<OpcodeStr#".s\t{$src, ${dst} {${mask}}|${dst} {${mask}}, $src}",
3460                  (!cast<Instruction>(BaseName#_.ZSuffix#"rrk_REV")
3461                   _.RC:$dst, _.KRCWM:$mask, _.RC:$src), 0>;
3462  def : InstAlias<OpcodeStr#".s\t{$src, ${dst} {${mask}} {z}|${dst} {${mask}} {z}, $src}",
3463                  (!cast<Instruction>(BaseName#_.ZSuffix#"rrkz_REV")
3464                   _.RC:$dst, _.KRCWM:$mask, _.RC:$src), 0>;
3465}
3466
3467multiclass avx512_store_vl< bits<8> opc, string OpcodeStr,
3468                            AVX512VLVectorVTInfo _, Predicate prd,
3469                            X86SchedWriteMoveLSWidths Sched,
3470                            string EVEX2VEXOvrd, bit NoMRPattern = 0> {
3471  let Predicates = [prd] in
3472  defm Z : avx512_store<opc, OpcodeStr, NAME, _.info512, store,
3473                        masked_store, Sched.ZMM, "",
3474                        NoMRPattern>, EVEX_V512;
3475  let Predicates = [prd, HasVLX] in {
3476    defm Z256 : avx512_store<opc, OpcodeStr, NAME, _.info256, store,
3477                             masked_store, Sched.YMM,
3478                             EVEX2VEXOvrd#"Y", NoMRPattern>, EVEX_V256;
3479    defm Z128 : avx512_store<opc, OpcodeStr, NAME, _.info128, store,
3480                             masked_store, Sched.XMM, EVEX2VEXOvrd,
3481                             NoMRPattern>, EVEX_V128;
3482  }
3483}
3484
3485multiclass avx512_alignedstore_vl<bits<8> opc, string OpcodeStr,
3486                                  AVX512VLVectorVTInfo _, Predicate prd,
3487                                  X86SchedWriteMoveLSWidths Sched,
3488                                  string EVEX2VEXOvrd, bit NoMRPattern = 0> {
3489  let Predicates = [prd] in
3490  defm Z : avx512_store<opc, OpcodeStr, NAME, _.info512, alignedstore,
3491                        masked_store_aligned, Sched.ZMM, "",
3492                        NoMRPattern>, EVEX_V512;
3493
3494  let Predicates = [prd, HasVLX] in {
3495    defm Z256 : avx512_store<opc, OpcodeStr, NAME, _.info256, alignedstore,
3496                             masked_store_aligned, Sched.YMM,
3497                             EVEX2VEXOvrd#"Y", NoMRPattern>, EVEX_V256;
3498    defm Z128 : avx512_store<opc, OpcodeStr, NAME, _.info128, alignedstore,
3499                             masked_store_aligned, Sched.XMM, EVEX2VEXOvrd,
3500                             NoMRPattern>, EVEX_V128;
3501  }
3502}
3503
3504defm VMOVAPS : avx512_alignedload_vl<0x28, "vmovaps", avx512vl_f32_info,
3505                                     HasAVX512, SchedWriteFMoveLS, "VMOVAPS">,
3506               avx512_alignedstore_vl<0x29, "vmovaps", avx512vl_f32_info,
3507                                      HasAVX512, SchedWriteFMoveLS, "VMOVAPS">,
3508               PS, EVEX_CD8<32, CD8VF>;
3509
3510defm VMOVAPD : avx512_alignedload_vl<0x28, "vmovapd", avx512vl_f64_info,
3511                                     HasAVX512, SchedWriteFMoveLS, "VMOVAPD">,
3512               avx512_alignedstore_vl<0x29, "vmovapd", avx512vl_f64_info,
3513                                      HasAVX512, SchedWriteFMoveLS, "VMOVAPD">,
3514               PD, VEX_W, EVEX_CD8<64, CD8VF>;
3515
3516defm VMOVUPS : avx512_load_vl<0x10, "vmovups", avx512vl_f32_info, HasAVX512,
3517                              SchedWriteFMoveLS, "VMOVUPS", 0, null_frag>,
3518               avx512_store_vl<0x11, "vmovups", avx512vl_f32_info, HasAVX512,
3519                               SchedWriteFMoveLS, "VMOVUPS">,
3520                               PS, EVEX_CD8<32, CD8VF>;
3521
3522defm VMOVUPD : avx512_load_vl<0x10, "vmovupd", avx512vl_f64_info, HasAVX512,
3523                              SchedWriteFMoveLS, "VMOVUPD", 0, null_frag>,
3524               avx512_store_vl<0x11, "vmovupd", avx512vl_f64_info, HasAVX512,
3525                               SchedWriteFMoveLS, "VMOVUPD">,
3526               PD, VEX_W, EVEX_CD8<64, CD8VF>;
3527
3528defm VMOVDQA32 : avx512_alignedload_vl<0x6F, "vmovdqa32", avx512vl_i32_info,
3529                                       HasAVX512, SchedWriteVecMoveLS,
3530                                       "VMOVDQA", 1>,
3531                 avx512_alignedstore_vl<0x7F, "vmovdqa32", avx512vl_i32_info,
3532                                        HasAVX512, SchedWriteVecMoveLS,
3533                                        "VMOVDQA", 1>,
3534                 PD, EVEX_CD8<32, CD8VF>;
3535
3536defm VMOVDQA64 : avx512_alignedload_vl<0x6F, "vmovdqa64", avx512vl_i64_info,
3537                                       HasAVX512, SchedWriteVecMoveLS,
3538                                       "VMOVDQA">,
3539                 avx512_alignedstore_vl<0x7F, "vmovdqa64", avx512vl_i64_info,
3540                                        HasAVX512, SchedWriteVecMoveLS,
3541                                        "VMOVDQA">,
3542                 PD, VEX_W, EVEX_CD8<64, CD8VF>;
3543
3544defm VMOVDQU8 : avx512_load_vl<0x6F, "vmovdqu8", avx512vl_i8_info, HasBWI,
3545                               SchedWriteVecMoveLS, "VMOVDQU", 1>,
3546                avx512_store_vl<0x7F, "vmovdqu8", avx512vl_i8_info, HasBWI,
3547                                SchedWriteVecMoveLS, "VMOVDQU", 1>,
3548                XD, EVEX_CD8<8, CD8VF>;
3549
3550defm VMOVDQU16 : avx512_load_vl<0x6F, "vmovdqu16", avx512vl_i16_info, HasBWI,
3551                                SchedWriteVecMoveLS, "VMOVDQU", 1>,
3552                 avx512_store_vl<0x7F, "vmovdqu16", avx512vl_i16_info, HasBWI,
3553                                 SchedWriteVecMoveLS, "VMOVDQU", 1>,
3554                 XD, VEX_W, EVEX_CD8<16, CD8VF>;
3555
3556defm VMOVDQU32 : avx512_load_vl<0x6F, "vmovdqu32", avx512vl_i32_info, HasAVX512,
3557                                SchedWriteVecMoveLS, "VMOVDQU", 1, null_frag>,
3558                 avx512_store_vl<0x7F, "vmovdqu32", avx512vl_i32_info, HasAVX512,
3559                                 SchedWriteVecMoveLS, "VMOVDQU", 1>,
3560                 XS, EVEX_CD8<32, CD8VF>;
3561
3562defm VMOVDQU64 : avx512_load_vl<0x6F, "vmovdqu64", avx512vl_i64_info, HasAVX512,
3563                                SchedWriteVecMoveLS, "VMOVDQU", 0, null_frag>,
3564                 avx512_store_vl<0x7F, "vmovdqu64", avx512vl_i64_info, HasAVX512,
3565                                 SchedWriteVecMoveLS, "VMOVDQU">,
3566                 XS, VEX_W, EVEX_CD8<64, CD8VF>;
3567
3568// Special instructions to help with spilling when we don't have VLX. We need
3569// to load or store from a ZMM register instead. These are converted in
3570// expandPostRAPseudos.
3571let isReMaterializable = 1, canFoldAsLoad = 1,
3572    isPseudo = 1, mayLoad = 1, hasSideEffects = 0 in {
3573def VMOVAPSZ128rm_NOVLX : I<0, Pseudo, (outs VR128X:$dst), (ins f128mem:$src),
3574                            "", []>, Sched<[WriteFLoadX]>;
3575def VMOVAPSZ256rm_NOVLX : I<0, Pseudo, (outs VR256X:$dst), (ins f256mem:$src),
3576                            "", []>, Sched<[WriteFLoadY]>;
3577def VMOVUPSZ128rm_NOVLX : I<0, Pseudo, (outs VR128X:$dst), (ins f128mem:$src),
3578                            "", []>, Sched<[WriteFLoadX]>;
3579def VMOVUPSZ256rm_NOVLX : I<0, Pseudo, (outs VR256X:$dst), (ins f256mem:$src),
3580                            "", []>, Sched<[WriteFLoadY]>;
3581}
3582
3583let isPseudo = 1, mayStore = 1, hasSideEffects = 0 in {
3584def VMOVAPSZ128mr_NOVLX : I<0, Pseudo, (outs), (ins f128mem:$dst, VR128X:$src),
3585                            "", []>, Sched<[WriteFStoreX]>;
3586def VMOVAPSZ256mr_NOVLX : I<0, Pseudo, (outs), (ins f256mem:$dst, VR256X:$src),
3587                            "", []>, Sched<[WriteFStoreY]>;
3588def VMOVUPSZ128mr_NOVLX : I<0, Pseudo, (outs), (ins f128mem:$dst, VR128X:$src),
3589                            "", []>, Sched<[WriteFStoreX]>;
3590def VMOVUPSZ256mr_NOVLX : I<0, Pseudo, (outs), (ins f256mem:$dst, VR256X:$src),
3591                            "", []>, Sched<[WriteFStoreY]>;
3592}
3593
3594def : Pat<(v8i64 (vselect VK8WM:$mask, (v8i64 immAllZerosV),
3595                          (v8i64 VR512:$src))),
3596   (VMOVDQA64Zrrkz (COPY_TO_REGCLASS (KNOTWrr (COPY_TO_REGCLASS VK8:$mask, VK16)),
3597                                              VK8), VR512:$src)>;
3598
3599def : Pat<(v16i32 (vselect VK16WM:$mask, (v16i32 immAllZerosV),
3600                           (v16i32 VR512:$src))),
3601                  (VMOVDQA32Zrrkz (KNOTWrr VK16WM:$mask), VR512:$src)>;
3602
3603// These patterns exist to prevent the above patterns from introducing a second
3604// mask inversion when one already exists.
3605def : Pat<(v8i64 (vselect (xor VK8:$mask, (v8i1 immAllOnesV)),
3606                          (v8i64 immAllZerosV),
3607                          (v8i64 VR512:$src))),
3608                 (VMOVDQA64Zrrkz VK8:$mask, VR512:$src)>;
3609def : Pat<(v16i32 (vselect (xor VK16:$mask, (v16i1 immAllOnesV)),
3610                           (v16i32 immAllZerosV),
3611                           (v16i32 VR512:$src))),
3612                  (VMOVDQA32Zrrkz VK16WM:$mask, VR512:$src)>;
3613
3614multiclass mask_move_lowering<string InstrStr, X86VectorVTInfo Narrow,
3615                              X86VectorVTInfo Wide> {
3616 def : Pat<(Narrow.VT (vselect (Narrow.KVT Narrow.KRCWM:$mask),
3617                               Narrow.RC:$src1, Narrow.RC:$src0)),
3618           (EXTRACT_SUBREG
3619            (Wide.VT
3620             (!cast<Instruction>(InstrStr#"rrk")
3621              (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src0, Narrow.SubRegIdx)),
3622              (COPY_TO_REGCLASS Narrow.KRCWM:$mask, Wide.KRCWM),
3623              (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src1, Narrow.SubRegIdx)))),
3624            Narrow.SubRegIdx)>;
3625
3626 def : Pat<(Narrow.VT (vselect (Narrow.KVT Narrow.KRCWM:$mask),
3627                               Narrow.RC:$src1, Narrow.ImmAllZerosV)),
3628           (EXTRACT_SUBREG
3629            (Wide.VT
3630             (!cast<Instruction>(InstrStr#"rrkz")
3631              (COPY_TO_REGCLASS Narrow.KRCWM:$mask, Wide.KRCWM),
3632              (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src1, Narrow.SubRegIdx)))),
3633            Narrow.SubRegIdx)>;
3634}
3635
3636// Patterns for handling v8i1 selects of 256-bit vectors when VLX isn't
3637// available. Use a 512-bit operation and extract.
3638let Predicates = [HasAVX512, NoVLX] in {
3639  defm : mask_move_lowering<"VMOVAPSZ", v4f32x_info, v16f32_info>;
3640  defm : mask_move_lowering<"VMOVDQA32Z", v4i32x_info, v16i32_info>;
3641  defm : mask_move_lowering<"VMOVAPSZ", v8f32x_info, v16f32_info>;
3642  defm : mask_move_lowering<"VMOVDQA32Z", v8i32x_info, v16i32_info>;
3643
3644  defm : mask_move_lowering<"VMOVAPDZ", v2f64x_info, v8f64_info>;
3645  defm : mask_move_lowering<"VMOVDQA64Z", v2i64x_info, v8i64_info>;
3646  defm : mask_move_lowering<"VMOVAPDZ", v4f64x_info, v8f64_info>;
3647  defm : mask_move_lowering<"VMOVDQA64Z", v4i64x_info, v8i64_info>;
3648}
3649
3650let Predicates = [HasBWI, NoVLX] in {
3651  defm : mask_move_lowering<"VMOVDQU8Z", v16i8x_info, v64i8_info>;
3652  defm : mask_move_lowering<"VMOVDQU8Z", v32i8x_info, v64i8_info>;
3653
3654  defm : mask_move_lowering<"VMOVDQU16Z", v8i16x_info, v32i16_info>;
3655  defm : mask_move_lowering<"VMOVDQU16Z", v16i16x_info, v32i16_info>;
3656}
3657
3658let Predicates = [HasAVX512] in {
3659  // 512-bit load.
3660  def : Pat<(alignedloadv16i32 addr:$src),
3661            (VMOVDQA64Zrm addr:$src)>;
3662  def : Pat<(alignedloadv32i16 addr:$src),
3663            (VMOVDQA64Zrm addr:$src)>;
3664  def : Pat<(alignedloadv64i8 addr:$src),
3665            (VMOVDQA64Zrm addr:$src)>;
3666  def : Pat<(loadv16i32 addr:$src),
3667            (VMOVDQU64Zrm addr:$src)>;
3668  def : Pat<(loadv32i16 addr:$src),
3669            (VMOVDQU64Zrm addr:$src)>;
3670  def : Pat<(loadv64i8 addr:$src),
3671            (VMOVDQU64Zrm addr:$src)>;
3672
3673  // 512-bit store.
3674  def : Pat<(alignedstore (v16i32 VR512:$src), addr:$dst),
3675            (VMOVDQA64Zmr addr:$dst, VR512:$src)>;
3676  def : Pat<(alignedstore (v32i16 VR512:$src), addr:$dst),
3677            (VMOVDQA64Zmr addr:$dst, VR512:$src)>;
3678  def : Pat<(alignedstore (v64i8 VR512:$src), addr:$dst),
3679            (VMOVDQA64Zmr addr:$dst, VR512:$src)>;
3680  def : Pat<(store (v16i32 VR512:$src), addr:$dst),
3681            (VMOVDQU64Zmr addr:$dst, VR512:$src)>;
3682  def : Pat<(store (v32i16 VR512:$src), addr:$dst),
3683            (VMOVDQU64Zmr addr:$dst, VR512:$src)>;
3684  def : Pat<(store (v64i8 VR512:$src), addr:$dst),
3685            (VMOVDQU64Zmr addr:$dst, VR512:$src)>;
3686}
3687
3688let Predicates = [HasVLX] in {
3689  // 128-bit load.
3690  def : Pat<(alignedloadv4i32 addr:$src),
3691            (VMOVDQA64Z128rm addr:$src)>;
3692  def : Pat<(alignedloadv8i16 addr:$src),
3693            (VMOVDQA64Z128rm addr:$src)>;
3694  def : Pat<(alignedloadv16i8 addr:$src),
3695            (VMOVDQA64Z128rm addr:$src)>;
3696  def : Pat<(loadv4i32 addr:$src),
3697            (VMOVDQU64Z128rm addr:$src)>;
3698  def : Pat<(loadv8i16 addr:$src),
3699            (VMOVDQU64Z128rm addr:$src)>;
3700  def : Pat<(loadv16i8 addr:$src),
3701            (VMOVDQU64Z128rm addr:$src)>;
3702
3703  // 128-bit store.
3704  def : Pat<(alignedstore (v4i32 VR128X:$src), addr:$dst),
3705            (VMOVDQA64Z128mr addr:$dst, VR128X:$src)>;
3706  def : Pat<(alignedstore (v8i16 VR128X:$src), addr:$dst),
3707            (VMOVDQA64Z128mr addr:$dst, VR128X:$src)>;
3708  def : Pat<(alignedstore (v16i8 VR128X:$src), addr:$dst),
3709            (VMOVDQA64Z128mr addr:$dst, VR128X:$src)>;
3710  def : Pat<(store (v4i32 VR128X:$src), addr:$dst),
3711            (VMOVDQU64Z128mr addr:$dst, VR128X:$src)>;
3712  def : Pat<(store (v8i16 VR128X:$src), addr:$dst),
3713            (VMOVDQU64Z128mr addr:$dst, VR128X:$src)>;
3714  def : Pat<(store (v16i8 VR128X:$src), addr:$dst),
3715            (VMOVDQU64Z128mr addr:$dst, VR128X:$src)>;
3716
3717  // 256-bit load.
3718  def : Pat<(alignedloadv8i32 addr:$src),
3719            (VMOVDQA64Z256rm addr:$src)>;
3720  def : Pat<(alignedloadv16i16 addr:$src),
3721            (VMOVDQA64Z256rm addr:$src)>;
3722  def : Pat<(alignedloadv32i8 addr:$src),
3723            (VMOVDQA64Z256rm addr:$src)>;
3724  def : Pat<(loadv8i32 addr:$src),
3725            (VMOVDQU64Z256rm addr:$src)>;
3726  def : Pat<(loadv16i16 addr:$src),
3727            (VMOVDQU64Z256rm addr:$src)>;
3728  def : Pat<(loadv32i8 addr:$src),
3729            (VMOVDQU64Z256rm addr:$src)>;
3730
3731  // 256-bit store.
3732  def : Pat<(alignedstore (v8i32 VR256X:$src), addr:$dst),
3733            (VMOVDQA64Z256mr addr:$dst, VR256X:$src)>;
3734  def : Pat<(alignedstore (v16i16 VR256X:$src), addr:$dst),
3735            (VMOVDQA64Z256mr addr:$dst, VR256X:$src)>;
3736  def : Pat<(alignedstore (v32i8 VR256X:$src), addr:$dst),
3737            (VMOVDQA64Z256mr addr:$dst, VR256X:$src)>;
3738  def : Pat<(store (v8i32 VR256X:$src), addr:$dst),
3739            (VMOVDQU64Z256mr addr:$dst, VR256X:$src)>;
3740  def : Pat<(store (v16i16 VR256X:$src), addr:$dst),
3741            (VMOVDQU64Z256mr addr:$dst, VR256X:$src)>;
3742  def : Pat<(store (v32i8 VR256X:$src), addr:$dst),
3743            (VMOVDQU64Z256mr addr:$dst, VR256X:$src)>;
3744}
3745
3746// Move Int Doubleword to Packed Double Int
3747//
3748let ExeDomain = SSEPackedInt in {
3749def VMOVDI2PDIZrr : AVX512BI<0x6E, MRMSrcReg, (outs VR128X:$dst), (ins GR32:$src),
3750                      "vmovd\t{$src, $dst|$dst, $src}",
3751                      [(set VR128X:$dst,
3752                        (v4i32 (scalar_to_vector GR32:$src)))]>,
3753                        EVEX, Sched<[WriteVecMoveFromGpr]>;
3754def VMOVDI2PDIZrm : AVX512BI<0x6E, MRMSrcMem, (outs VR128X:$dst), (ins i32mem:$src),
3755                      "vmovd\t{$src, $dst|$dst, $src}",
3756                      [(set VR128X:$dst,
3757                        (v4i32 (scalar_to_vector (loadi32 addr:$src))))]>,
3758                      EVEX, EVEX_CD8<32, CD8VT1>, Sched<[WriteVecLoad]>;
3759def VMOV64toPQIZrr : AVX512BI<0x6E, MRMSrcReg, (outs VR128X:$dst), (ins GR64:$src),
3760                      "vmovq\t{$src, $dst|$dst, $src}",
3761                        [(set VR128X:$dst,
3762                          (v2i64 (scalar_to_vector GR64:$src)))]>,
3763                      EVEX, VEX_W, Sched<[WriteVecMoveFromGpr]>;
3764let isCodeGenOnly = 1, ForceDisassemble = 1, hasSideEffects = 0, mayLoad = 1 in
3765def VMOV64toPQIZrm : AVX512BI<0x6E, MRMSrcMem, (outs VR128X:$dst),
3766                      (ins i64mem:$src),
3767                      "vmovq\t{$src, $dst|$dst, $src}", []>,
3768                      EVEX, VEX_W, EVEX_CD8<64, CD8VT1>, Sched<[WriteVecLoad]>;
3769let isCodeGenOnly = 1 in {
3770def VMOV64toSDZrr : AVX512BI<0x6E, MRMSrcReg, (outs FR64X:$dst), (ins GR64:$src),
3771                       "vmovq\t{$src, $dst|$dst, $src}",
3772                       [(set FR64X:$dst, (bitconvert GR64:$src))]>,
3773                       EVEX, VEX_W, Sched<[WriteVecMoveFromGpr]>;
3774def VMOVSDto64Zrr : AVX512BI<0x7E, MRMDestReg, (outs GR64:$dst), (ins FR64X:$src),
3775                         "vmovq\t{$src, $dst|$dst, $src}",
3776                         [(set GR64:$dst, (bitconvert FR64X:$src))]>,
3777                         EVEX, VEX_W, Sched<[WriteVecMoveFromGpr]>;
3778}
3779} // ExeDomain = SSEPackedInt
3780
3781// Move Int Doubleword to Single Scalar
3782//
3783let ExeDomain = SSEPackedInt, isCodeGenOnly = 1 in {
3784def VMOVDI2SSZrr  : AVX512BI<0x6E, MRMSrcReg, (outs FR32X:$dst), (ins GR32:$src),
3785                      "vmovd\t{$src, $dst|$dst, $src}",
3786                      [(set FR32X:$dst, (bitconvert GR32:$src))]>,
3787                      EVEX, Sched<[WriteVecMoveFromGpr]>;
3788} // ExeDomain = SSEPackedInt, isCodeGenOnly = 1
3789
3790// Move doubleword from xmm register to r/m32
3791//
3792let ExeDomain = SSEPackedInt in {
3793def VMOVPDI2DIZrr  : AVX512BI<0x7E, MRMDestReg, (outs GR32:$dst), (ins VR128X:$src),
3794                       "vmovd\t{$src, $dst|$dst, $src}",
3795                       [(set GR32:$dst, (extractelt (v4i32 VR128X:$src),
3796                                        (iPTR 0)))]>,
3797                       EVEX, Sched<[WriteVecMoveToGpr]>;
3798def VMOVPDI2DIZmr  : AVX512BI<0x7E, MRMDestMem, (outs),
3799                       (ins i32mem:$dst, VR128X:$src),
3800                       "vmovd\t{$src, $dst|$dst, $src}",
3801                       [(store (i32 (extractelt (v4i32 VR128X:$src),
3802                                     (iPTR 0))), addr:$dst)]>,
3803                       EVEX, EVEX_CD8<32, CD8VT1>, Sched<[WriteVecStore]>;
3804} // ExeDomain = SSEPackedInt
3805
3806// Move quadword from xmm1 register to r/m64
3807//
3808let ExeDomain = SSEPackedInt in {
3809def VMOVPQIto64Zrr : I<0x7E, MRMDestReg, (outs GR64:$dst), (ins VR128X:$src),
3810                      "vmovq\t{$src, $dst|$dst, $src}",
3811                      [(set GR64:$dst, (extractelt (v2i64 VR128X:$src),
3812                                                   (iPTR 0)))]>,
3813                      PD, EVEX, VEX_W, Sched<[WriteVecMoveToGpr]>,
3814                      Requires<[HasAVX512]>;
3815
3816let isCodeGenOnly = 1, ForceDisassemble = 1, hasSideEffects = 0, mayStore = 1 in
3817def VMOVPQIto64Zmr : I<0x7E, MRMDestMem, (outs), (ins i64mem:$dst, VR128X:$src),
3818                      "vmovq\t{$src, $dst|$dst, $src}", []>, PD,
3819                      EVEX, VEX_W, Sched<[WriteVecStore]>,
3820                      Requires<[HasAVX512, In64BitMode]>;
3821
3822def VMOVPQI2QIZmr : I<0xD6, MRMDestMem, (outs),
3823                      (ins i64mem:$dst, VR128X:$src),
3824                      "vmovq\t{$src, $dst|$dst, $src}",
3825                      [(store (extractelt (v2i64 VR128X:$src), (iPTR 0)),
3826                              addr:$dst)]>,
3827                      EVEX, PD, VEX_W, EVEX_CD8<64, CD8VT1>,
3828                      Sched<[WriteVecStore]>, Requires<[HasAVX512]>;
3829
3830let hasSideEffects = 0, isCodeGenOnly = 1, ForceDisassemble = 1 in
3831def VMOVPQI2QIZrr : AVX512BI<0xD6, MRMDestReg, (outs VR128X:$dst),
3832                             (ins VR128X:$src),
3833                             "vmovq\t{$src, $dst|$dst, $src}", []>,
3834                             EVEX, VEX_W, Sched<[SchedWriteVecLogic.XMM]>;
3835} // ExeDomain = SSEPackedInt
3836
3837def : InstAlias<"vmovq.s\t{$src, $dst|$dst, $src}",
3838                (VMOVPQI2QIZrr VR128X:$dst, VR128X:$src), 0>;
3839
3840let Predicates = [HasAVX512] in {
3841  def : Pat<(X86vextractstore64 (v2i64 VR128X:$src), addr:$dst),
3842            (VMOVPQI2QIZmr addr:$dst, VR128X:$src)>;
3843}
3844
3845// Move Scalar Single to Double Int
3846//
3847let ExeDomain = SSEPackedInt, isCodeGenOnly = 1 in {
3848def VMOVSS2DIZrr  : AVX512BI<0x7E, MRMDestReg, (outs GR32:$dst),
3849                      (ins FR32X:$src),
3850                      "vmovd\t{$src, $dst|$dst, $src}",
3851                      [(set GR32:$dst, (bitconvert FR32X:$src))]>,
3852                      EVEX, Sched<[WriteVecMoveToGpr]>;
3853} // ExeDomain = SSEPackedInt, isCodeGenOnly = 1
3854
3855// Move Quadword Int to Packed Quadword Int
3856//
3857let ExeDomain = SSEPackedInt in {
3858def VMOVQI2PQIZrm : AVX512XSI<0x7E, MRMSrcMem, (outs VR128X:$dst),
3859                      (ins i64mem:$src),
3860                      "vmovq\t{$src, $dst|$dst, $src}",
3861                      [(set VR128X:$dst,
3862                        (v2i64 (scalar_to_vector (loadi64 addr:$src))))]>,
3863                      EVEX, VEX_W, EVEX_CD8<8, CD8VT8>, Sched<[WriteVecLoad]>;
3864} // ExeDomain = SSEPackedInt
3865
3866// Allow "vmovd" but print "vmovq".
3867def : InstAlias<"vmovd\t{$src, $dst|$dst, $src}",
3868                (VMOV64toPQIZrr VR128X:$dst, GR64:$src), 0>;
3869def : InstAlias<"vmovd\t{$src, $dst|$dst, $src}",
3870                (VMOVPQIto64Zrr GR64:$dst, VR128X:$src), 0>;
3871
3872//===----------------------------------------------------------------------===//
3873// AVX-512  MOVSS, MOVSD
3874//===----------------------------------------------------------------------===//
3875
3876multiclass avx512_move_scalar<string asm, SDNode OpNode, PatFrag vzload_frag,
3877                              X86VectorVTInfo _> {
3878  let Predicates = [HasAVX512, OptForSize] in
3879  def rr : AVX512PI<0x10, MRMSrcReg, (outs _.RC:$dst),
3880             (ins _.RC:$src1, _.RC:$src2),
3881             !strconcat(asm, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
3882             [(set _.RC:$dst, (_.VT (OpNode _.RC:$src1, _.RC:$src2)))],
3883             _.ExeDomain>, EVEX_4V, Sched<[SchedWriteFShuffle.XMM]>;
3884  def rrkz : AVX512PI<0x10, MRMSrcReg, (outs _.RC:$dst),
3885              (ins _.KRCWM:$mask, _.RC:$src1, _.RC:$src2),
3886              !strconcat(asm, "\t{$src2, $src1, $dst {${mask}} {z}|",
3887              "$dst {${mask}} {z}, $src1, $src2}"),
3888              [(set _.RC:$dst, (_.VT (X86selects _.KRCWM:$mask,
3889                                      (_.VT (OpNode _.RC:$src1, _.RC:$src2)),
3890                                      _.ImmAllZerosV)))],
3891              _.ExeDomain>, EVEX_4V, EVEX_KZ, Sched<[SchedWriteFShuffle.XMM]>;
3892  let Constraints = "$src0 = $dst"  in
3893  def rrk : AVX512PI<0x10, MRMSrcReg, (outs _.RC:$dst),
3894             (ins _.RC:$src0, _.KRCWM:$mask, _.RC:$src1, _.RC:$src2),
3895             !strconcat(asm, "\t{$src2, $src1, $dst {${mask}}|",
3896             "$dst {${mask}}, $src1, $src2}"),
3897             [(set _.RC:$dst, (_.VT (X86selects _.KRCWM:$mask,
3898                                     (_.VT (OpNode _.RC:$src1, _.RC:$src2)),
3899                                     (_.VT _.RC:$src0))))],
3900             _.ExeDomain>, EVEX_4V, EVEX_K, Sched<[SchedWriteFShuffle.XMM]>;
3901  let canFoldAsLoad = 1, isReMaterializable = 1 in {
3902  def rm : AVX512PI<0x10, MRMSrcMem, (outs _.RC:$dst), (ins _.ScalarMemOp:$src),
3903             !strconcat(asm, "\t{$src, $dst|$dst, $src}"),
3904             [(set _.RC:$dst, (_.VT (vzload_frag addr:$src)))],
3905             _.ExeDomain>, EVEX, Sched<[WriteFLoad]>;
3906  // _alt version uses FR32/FR64 register class.
3907  let isCodeGenOnly = 1 in
3908  def rm_alt : AVX512PI<0x10, MRMSrcMem, (outs _.FRC:$dst), (ins _.ScalarMemOp:$src),
3909                 !strconcat(asm, "\t{$src, $dst|$dst, $src}"),
3910                 [(set _.FRC:$dst, (_.ScalarLdFrag addr:$src))],
3911                 _.ExeDomain>, EVEX, Sched<[WriteFLoad]>;
3912  }
3913  let mayLoad = 1, hasSideEffects = 0 in {
3914    let Constraints = "$src0 = $dst" in
3915    def rmk : AVX512PI<0x10, MRMSrcMem, (outs _.RC:$dst),
3916               (ins _.RC:$src0, _.KRCWM:$mask, _.ScalarMemOp:$src),
3917               !strconcat(asm, "\t{$src, $dst {${mask}}|",
3918               "$dst {${mask}}, $src}"),
3919               [], _.ExeDomain>, EVEX, EVEX_K, Sched<[WriteFLoad]>;
3920    def rmkz : AVX512PI<0x10, MRMSrcMem, (outs _.RC:$dst),
3921               (ins _.KRCWM:$mask, _.ScalarMemOp:$src),
3922               !strconcat(asm, "\t{$src, $dst {${mask}} {z}|",
3923               "$dst {${mask}} {z}, $src}"),
3924               [], _.ExeDomain>, EVEX, EVEX_KZ, Sched<[WriteFLoad]>;
3925  }
3926  def mr: AVX512PI<0x11, MRMDestMem, (outs), (ins _.ScalarMemOp:$dst, _.FRC:$src),
3927             !strconcat(asm, "\t{$src, $dst|$dst, $src}"),
3928             [(store _.FRC:$src, addr:$dst)],  _.ExeDomain>,
3929             EVEX, Sched<[WriteFStore]>;
3930  let mayStore = 1, hasSideEffects = 0 in
3931  def mrk: AVX512PI<0x11, MRMDestMem, (outs),
3932              (ins _.ScalarMemOp:$dst, VK1WM:$mask, _.RC:$src),
3933              !strconcat(asm, "\t{$src, $dst {${mask}}|$dst {${mask}}, $src}"),
3934              [], _.ExeDomain>, EVEX, EVEX_K, Sched<[WriteFStore]>,
3935              NotMemoryFoldable;
3936}
3937
3938defm VMOVSSZ : avx512_move_scalar<"vmovss", X86Movss, X86vzload32, f32x_info>,
3939                                  VEX_LIG, XS, EVEX_CD8<32, CD8VT1>;
3940
3941defm VMOVSDZ : avx512_move_scalar<"vmovsd", X86Movsd, X86vzload64, f64x_info>,
3942                                  VEX_LIG, XD, VEX_W, EVEX_CD8<64, CD8VT1>;
3943
3944
3945multiclass avx512_move_scalar_lowering<string InstrStr, SDNode OpNode,
3946                                       PatLeaf ZeroFP, X86VectorVTInfo _> {
3947
3948def : Pat<(_.VT (OpNode _.RC:$src0,
3949                        (_.VT (scalar_to_vector
3950                                  (_.EltVT (X86selects VK1WM:$mask,
3951                                                       (_.EltVT _.FRC:$src1),
3952                                                       (_.EltVT _.FRC:$src2))))))),
3953          (!cast<Instruction>(InstrStr#rrk)
3954                        (_.VT (COPY_TO_REGCLASS _.FRC:$src2, _.RC)),
3955                        VK1WM:$mask,
3956                        (_.VT _.RC:$src0),
3957                        (_.VT (COPY_TO_REGCLASS _.FRC:$src1, _.RC)))>;
3958
3959def : Pat<(_.VT (OpNode _.RC:$src0,
3960                        (_.VT (scalar_to_vector
3961                                  (_.EltVT (X86selects VK1WM:$mask,
3962                                                       (_.EltVT _.FRC:$src1),
3963                                                       (_.EltVT ZeroFP))))))),
3964          (!cast<Instruction>(InstrStr#rrkz)
3965                        VK1WM:$mask,
3966                        (_.VT _.RC:$src0),
3967                        (_.VT (COPY_TO_REGCLASS _.FRC:$src1, _.RC)))>;
3968}
3969
3970multiclass avx512_store_scalar_lowering<string InstrStr, AVX512VLVectorVTInfo _,
3971                                        dag Mask, RegisterClass MaskRC> {
3972
3973def : Pat<(masked_store
3974             (_.info512.VT (insert_subvector undef,
3975                               (_.info128.VT _.info128.RC:$src),
3976                               (iPTR 0))), addr:$dst, Mask),
3977          (!cast<Instruction>(InstrStr#mrk) addr:$dst,
3978                      (COPY_TO_REGCLASS MaskRC:$mask, VK1WM),
3979                      _.info128.RC:$src)>;
3980
3981}
3982
3983multiclass avx512_store_scalar_lowering_subreg<string InstrStr,
3984                                               AVX512VLVectorVTInfo _,
3985                                               dag Mask, RegisterClass MaskRC,
3986                                               SubRegIndex subreg> {
3987
3988def : Pat<(masked_store
3989             (_.info512.VT (insert_subvector undef,
3990                               (_.info128.VT _.info128.RC:$src),
3991                               (iPTR 0))), addr:$dst, Mask),
3992          (!cast<Instruction>(InstrStr#mrk) addr:$dst,
3993                      (COPY_TO_REGCLASS (i32 (INSERT_SUBREG (IMPLICIT_DEF), MaskRC:$mask, subreg)), VK1WM),
3994                      _.info128.RC:$src)>;
3995
3996}
3997
3998// This matches the more recent codegen from clang that avoids emitting a 512
3999// bit masked store directly. Codegen will widen 128-bit masked store to 512
4000// bits on AVX512F only targets.
4001multiclass avx512_store_scalar_lowering_subreg2<string InstrStr,
4002                                               AVX512VLVectorVTInfo _,
4003                                               dag Mask512, dag Mask128,
4004                                               RegisterClass MaskRC,
4005                                               SubRegIndex subreg> {
4006
4007// AVX512F pattern.
4008def : Pat<(masked_store
4009             (_.info512.VT (insert_subvector undef,
4010                               (_.info128.VT _.info128.RC:$src),
4011                               (iPTR 0))), addr:$dst, Mask512),
4012          (!cast<Instruction>(InstrStr#mrk) addr:$dst,
4013                      (COPY_TO_REGCLASS (i32 (INSERT_SUBREG (IMPLICIT_DEF), MaskRC:$mask, subreg)), VK1WM),
4014                      _.info128.RC:$src)>;
4015
4016// AVX512VL pattern.
4017def : Pat<(masked_store (_.info128.VT _.info128.RC:$src), addr:$dst, Mask128),
4018          (!cast<Instruction>(InstrStr#mrk) addr:$dst,
4019                      (COPY_TO_REGCLASS (i32 (INSERT_SUBREG (IMPLICIT_DEF), MaskRC:$mask, subreg)), VK1WM),
4020                      _.info128.RC:$src)>;
4021}
4022
4023multiclass avx512_load_scalar_lowering<string InstrStr, AVX512VLVectorVTInfo _,
4024                                       dag Mask, RegisterClass MaskRC> {
4025
4026def : Pat<(_.info128.VT (extract_subvector
4027                         (_.info512.VT (masked_load addr:$srcAddr, Mask,
4028                                        _.info512.ImmAllZerosV)),
4029                           (iPTR 0))),
4030          (!cast<Instruction>(InstrStr#rmkz)
4031                      (COPY_TO_REGCLASS MaskRC:$mask, VK1WM),
4032                      addr:$srcAddr)>;
4033
4034def : Pat<(_.info128.VT (extract_subvector
4035                (_.info512.VT (masked_load addr:$srcAddr, Mask,
4036                      (_.info512.VT (insert_subvector undef,
4037                            (_.info128.VT (X86vzmovl _.info128.RC:$src)),
4038                            (iPTR 0))))),
4039                (iPTR 0))),
4040          (!cast<Instruction>(InstrStr#rmk) _.info128.RC:$src,
4041                      (COPY_TO_REGCLASS MaskRC:$mask, VK1WM),
4042                      addr:$srcAddr)>;
4043
4044}
4045
4046multiclass avx512_load_scalar_lowering_subreg<string InstrStr,
4047                                              AVX512VLVectorVTInfo _,
4048                                              dag Mask, RegisterClass MaskRC,
4049                                              SubRegIndex subreg> {
4050
4051def : Pat<(_.info128.VT (extract_subvector
4052                         (_.info512.VT (masked_load addr:$srcAddr, Mask,
4053                                        _.info512.ImmAllZerosV)),
4054                           (iPTR 0))),
4055          (!cast<Instruction>(InstrStr#rmkz)
4056                      (COPY_TO_REGCLASS (i32 (INSERT_SUBREG (IMPLICIT_DEF), MaskRC:$mask, subreg)), VK1WM),
4057                      addr:$srcAddr)>;
4058
4059def : Pat<(_.info128.VT (extract_subvector
4060                (_.info512.VT (masked_load addr:$srcAddr, Mask,
4061                      (_.info512.VT (insert_subvector undef,
4062                            (_.info128.VT (X86vzmovl _.info128.RC:$src)),
4063                            (iPTR 0))))),
4064                (iPTR 0))),
4065          (!cast<Instruction>(InstrStr#rmk) _.info128.RC:$src,
4066                      (COPY_TO_REGCLASS (i32 (INSERT_SUBREG (IMPLICIT_DEF), MaskRC:$mask, subreg)), VK1WM),
4067                      addr:$srcAddr)>;
4068
4069}
4070
4071// This matches the more recent codegen from clang that avoids emitting a 512
4072// bit masked load directly. Codegen will widen 128-bit masked load to 512
4073// bits on AVX512F only targets.
4074multiclass avx512_load_scalar_lowering_subreg2<string InstrStr,
4075                                              AVX512VLVectorVTInfo _,
4076                                              dag Mask512, dag Mask128,
4077                                              RegisterClass MaskRC,
4078                                              SubRegIndex subreg> {
4079// AVX512F patterns.
4080def : Pat<(_.info128.VT (extract_subvector
4081                         (_.info512.VT (masked_load addr:$srcAddr, Mask512,
4082                                        _.info512.ImmAllZerosV)),
4083                           (iPTR 0))),
4084          (!cast<Instruction>(InstrStr#rmkz)
4085                      (COPY_TO_REGCLASS (i32 (INSERT_SUBREG (IMPLICIT_DEF), MaskRC:$mask, subreg)), VK1WM),
4086                      addr:$srcAddr)>;
4087
4088def : Pat<(_.info128.VT (extract_subvector
4089                (_.info512.VT (masked_load addr:$srcAddr, Mask512,
4090                      (_.info512.VT (insert_subvector undef,
4091                            (_.info128.VT (X86vzmovl _.info128.RC:$src)),
4092                            (iPTR 0))))),
4093                (iPTR 0))),
4094          (!cast<Instruction>(InstrStr#rmk) _.info128.RC:$src,
4095                      (COPY_TO_REGCLASS (i32 (INSERT_SUBREG (IMPLICIT_DEF), MaskRC:$mask, subreg)), VK1WM),
4096                      addr:$srcAddr)>;
4097
4098// AVX512Vl patterns.
4099def : Pat<(_.info128.VT (masked_load addr:$srcAddr, Mask128,
4100                         _.info128.ImmAllZerosV)),
4101          (!cast<Instruction>(InstrStr#rmkz)
4102                      (COPY_TO_REGCLASS (i32 (INSERT_SUBREG (IMPLICIT_DEF), MaskRC:$mask, subreg)), VK1WM),
4103                      addr:$srcAddr)>;
4104
4105def : Pat<(_.info128.VT (masked_load addr:$srcAddr, Mask128,
4106                         (_.info128.VT (X86vzmovl _.info128.RC:$src)))),
4107          (!cast<Instruction>(InstrStr#rmk) _.info128.RC:$src,
4108                      (COPY_TO_REGCLASS (i32 (INSERT_SUBREG (IMPLICIT_DEF), MaskRC:$mask, subreg)), VK1WM),
4109                      addr:$srcAddr)>;
4110}
4111
4112defm : avx512_move_scalar_lowering<"VMOVSSZ", X86Movss, fp32imm0, v4f32x_info>;
4113defm : avx512_move_scalar_lowering<"VMOVSDZ", X86Movsd, fp64imm0, v2f64x_info>;
4114
4115defm : avx512_store_scalar_lowering<"VMOVSSZ", avx512vl_f32_info,
4116                   (v16i1 (bitconvert (i16 (trunc (and GR32:$mask, (i32 1)))))), GR32>;
4117defm : avx512_store_scalar_lowering_subreg<"VMOVSSZ", avx512vl_f32_info,
4118                   (v16i1 (bitconvert (i16 (and GR16:$mask, (i16 1))))), GR16, sub_16bit>;
4119defm : avx512_store_scalar_lowering_subreg<"VMOVSDZ", avx512vl_f64_info,
4120                   (v8i1 (bitconvert (i8 (and GR8:$mask, (i8 1))))), GR8, sub_8bit>;
4121
4122defm : avx512_store_scalar_lowering_subreg2<"VMOVSSZ", avx512vl_f32_info,
4123                   (v16i1 (insert_subvector
4124                           (v16i1 immAllZerosV),
4125                           (v4i1 (extract_subvector
4126                                  (v8i1 (bitconvert (and GR8:$mask, (i8 1)))),
4127                                  (iPTR 0))),
4128                           (iPTR 0))),
4129                   (v4i1 (extract_subvector
4130                          (v8i1 (bitconvert (and GR8:$mask, (i8 1)))),
4131                          (iPTR 0))), GR8, sub_8bit>;
4132defm : avx512_store_scalar_lowering_subreg2<"VMOVSDZ", avx512vl_f64_info,
4133                   (v8i1
4134                    (extract_subvector
4135                     (v16i1
4136                      (insert_subvector
4137                       (v16i1 immAllZerosV),
4138                       (v2i1 (extract_subvector
4139                              (v8i1 (bitconvert (i8 (and GR8:$mask, (i8 1))))),
4140                              (iPTR 0))),
4141                       (iPTR 0))),
4142                     (iPTR 0))),
4143                   (v2i1 (extract_subvector
4144                          (v8i1 (bitconvert (i8 (and GR8:$mask, (i8 1))))),
4145                          (iPTR 0))), GR8, sub_8bit>;
4146
4147defm : avx512_load_scalar_lowering<"VMOVSSZ", avx512vl_f32_info,
4148                   (v16i1 (bitconvert (i16 (trunc (and GR32:$mask, (i32 1)))))), GR32>;
4149defm : avx512_load_scalar_lowering_subreg<"VMOVSSZ", avx512vl_f32_info,
4150                   (v16i1 (bitconvert (i16 (and GR16:$mask, (i16 1))))), GR16, sub_16bit>;
4151defm : avx512_load_scalar_lowering_subreg<"VMOVSDZ", avx512vl_f64_info,
4152                   (v8i1 (bitconvert (i8 (and GR8:$mask, (i8 1))))), GR8, sub_8bit>;
4153
4154defm : avx512_load_scalar_lowering_subreg2<"VMOVSSZ", avx512vl_f32_info,
4155                   (v16i1 (insert_subvector
4156                           (v16i1 immAllZerosV),
4157                           (v4i1 (extract_subvector
4158                                  (v8i1 (bitconvert (and GR8:$mask, (i8 1)))),
4159                                  (iPTR 0))),
4160                           (iPTR 0))),
4161                   (v4i1 (extract_subvector
4162                          (v8i1 (bitconvert (and GR8:$mask, (i8 1)))),
4163                          (iPTR 0))), GR8, sub_8bit>;
4164defm : avx512_load_scalar_lowering_subreg2<"VMOVSDZ", avx512vl_f64_info,
4165                   (v8i1
4166                    (extract_subvector
4167                     (v16i1
4168                      (insert_subvector
4169                       (v16i1 immAllZerosV),
4170                       (v2i1 (extract_subvector
4171                              (v8i1 (bitconvert (i8 (and GR8:$mask, (i8 1))))),
4172                              (iPTR 0))),
4173                       (iPTR 0))),
4174                     (iPTR 0))),
4175                   (v2i1 (extract_subvector
4176                          (v8i1 (bitconvert (i8 (and GR8:$mask, (i8 1))))),
4177                          (iPTR 0))), GR8, sub_8bit>;
4178
4179def : Pat<(f32 (X86selects VK1WM:$mask, (f32 FR32X:$src1), (f32 FR32X:$src2))),
4180          (COPY_TO_REGCLASS (v4f32 (VMOVSSZrrk
4181           (v4f32 (COPY_TO_REGCLASS FR32X:$src2, VR128X)),
4182           VK1WM:$mask, (v4f32 (IMPLICIT_DEF)),
4183           (v4f32 (COPY_TO_REGCLASS FR32X:$src1, VR128X)))), FR32X)>;
4184
4185def : Pat<(f32 (X86selects VK1WM:$mask, (f32 FR32X:$src1), fp32imm0)),
4186          (COPY_TO_REGCLASS (v4f32 (VMOVSSZrrkz VK1WM:$mask, (v4f32 (IMPLICIT_DEF)),
4187           (v4f32 (COPY_TO_REGCLASS FR32X:$src1, VR128X)))), FR32X)>;
4188
4189def : Pat<(f64 (X86selects VK1WM:$mask, (f64 FR64X:$src1), (f64 FR64X:$src2))),
4190          (COPY_TO_REGCLASS (v2f64 (VMOVSDZrrk
4191           (v2f64 (COPY_TO_REGCLASS FR64X:$src2, VR128X)),
4192           VK1WM:$mask, (v2f64 (IMPLICIT_DEF)),
4193           (v2f64 (COPY_TO_REGCLASS FR64X:$src1, VR128X)))), FR64X)>;
4194
4195def : Pat<(f64 (X86selects VK1WM:$mask, (f64 FR64X:$src1), fpimm0)),
4196          (COPY_TO_REGCLASS (v2f64 (VMOVSDZrrkz VK1WM:$mask, (v2f64 (IMPLICIT_DEF)),
4197           (v2f64 (COPY_TO_REGCLASS FR64X:$src1, VR128X)))), FR64X)>;
4198
4199let hasSideEffects = 0, isCodeGenOnly = 1, ForceDisassemble = 1 in {
4200  def VMOVSSZrr_REV: AVX512<0x11, MRMDestReg, (outs VR128X:$dst),
4201                           (ins VR128X:$src1, VR128X:$src2),
4202                           "vmovss\t{$src2, $src1, $dst|$dst, $src1, $src2}",
4203                           []>, XS, EVEX_4V, VEX_LIG,
4204                           FoldGenData<"VMOVSSZrr">,
4205                           Sched<[SchedWriteFShuffle.XMM]>;
4206
4207  let Constraints = "$src0 = $dst" in
4208  def VMOVSSZrrk_REV: AVX512<0x11, MRMDestReg, (outs VR128X:$dst),
4209                             (ins f32x_info.RC:$src0, f32x_info.KRCWM:$mask,
4210                                                   VR128X:$src1, VR128X:$src2),
4211                             "vmovss\t{$src2, $src1, $dst {${mask}}|"#
4212                                        "$dst {${mask}}, $src1, $src2}",
4213                             []>, EVEX_K, XS, EVEX_4V, VEX_LIG,
4214                             FoldGenData<"VMOVSSZrrk">,
4215                             Sched<[SchedWriteFShuffle.XMM]>;
4216
4217  def VMOVSSZrrkz_REV: AVX512<0x11, MRMDestReg, (outs VR128X:$dst),
4218                         (ins f32x_info.KRCWM:$mask, VR128X:$src1, VR128X:$src2),
4219                         "vmovss\t{$src2, $src1, $dst {${mask}} {z}|"#
4220                                    "$dst {${mask}} {z}, $src1, $src2}",
4221                         []>, EVEX_KZ, XS, EVEX_4V, VEX_LIG,
4222                         FoldGenData<"VMOVSSZrrkz">,
4223                         Sched<[SchedWriteFShuffle.XMM]>;
4224
4225  def VMOVSDZrr_REV: AVX512<0x11, MRMDestReg, (outs VR128X:$dst),
4226                           (ins VR128X:$src1, VR128X:$src2),
4227                           "vmovsd\t{$src2, $src1, $dst|$dst, $src1, $src2}",
4228                           []>, XD, EVEX_4V, VEX_LIG, VEX_W,
4229                           FoldGenData<"VMOVSDZrr">,
4230                           Sched<[SchedWriteFShuffle.XMM]>;
4231
4232  let Constraints = "$src0 = $dst" in
4233  def VMOVSDZrrk_REV: AVX512<0x11, MRMDestReg, (outs VR128X:$dst),
4234                             (ins f64x_info.RC:$src0, f64x_info.KRCWM:$mask,
4235                                                   VR128X:$src1, VR128X:$src2),
4236                             "vmovsd\t{$src2, $src1, $dst {${mask}}|"#
4237                                        "$dst {${mask}}, $src1, $src2}",
4238                             []>, EVEX_K, XD, EVEX_4V, VEX_LIG,
4239                             VEX_W, FoldGenData<"VMOVSDZrrk">,
4240                             Sched<[SchedWriteFShuffle.XMM]>;
4241
4242  def VMOVSDZrrkz_REV: AVX512<0x11, MRMDestReg, (outs VR128X:$dst),
4243                              (ins f64x_info.KRCWM:$mask, VR128X:$src1,
4244                                                          VR128X:$src2),
4245                              "vmovsd\t{$src2, $src1, $dst {${mask}} {z}|"#
4246                                         "$dst {${mask}} {z}, $src1, $src2}",
4247                              []>, EVEX_KZ, XD, EVEX_4V, VEX_LIG,
4248                              VEX_W, FoldGenData<"VMOVSDZrrkz">,
4249                              Sched<[SchedWriteFShuffle.XMM]>;
4250}
4251
4252def : InstAlias<"vmovss.s\t{$src2, $src1, $dst|$dst, $src1, $src2}",
4253                (VMOVSSZrr_REV VR128X:$dst, VR128X:$src1, VR128X:$src2), 0>;
4254def : InstAlias<"vmovss.s\t{$src2, $src1, $dst {${mask}}|"#
4255                             "$dst {${mask}}, $src1, $src2}",
4256                (VMOVSSZrrk_REV VR128X:$dst, VK1WM:$mask,
4257                                VR128X:$src1, VR128X:$src2), 0>;
4258def : InstAlias<"vmovss.s\t{$src2, $src1, $dst {${mask}} {z}|"#
4259                             "$dst {${mask}} {z}, $src1, $src2}",
4260                (VMOVSSZrrkz_REV VR128X:$dst, VK1WM:$mask,
4261                                 VR128X:$src1, VR128X:$src2), 0>;
4262def : InstAlias<"vmovsd.s\t{$src2, $src1, $dst|$dst, $src1, $src2}",
4263                (VMOVSDZrr_REV VR128X:$dst, VR128X:$src1, VR128X:$src2), 0>;
4264def : InstAlias<"vmovsd.s\t{$src2, $src1, $dst {${mask}}|"#
4265                             "$dst {${mask}}, $src1, $src2}",
4266                (VMOVSDZrrk_REV VR128X:$dst, VK1WM:$mask,
4267                                VR128X:$src1, VR128X:$src2), 0>;
4268def : InstAlias<"vmovsd.s\t{$src2, $src1, $dst {${mask}} {z}|"#
4269                             "$dst {${mask}} {z}, $src1, $src2}",
4270                (VMOVSDZrrkz_REV VR128X:$dst, VK1WM:$mask,
4271                                 VR128X:$src1, VR128X:$src2), 0>;
4272
4273let Predicates = [HasAVX512, OptForSize] in {
4274  def : Pat<(v4f32 (X86vzmovl (v4f32 VR128X:$src))),
4275            (VMOVSSZrr (v4f32 (AVX512_128_SET0)), VR128X:$src)>;
4276  def : Pat<(v4i32 (X86vzmovl (v4i32 VR128X:$src))),
4277            (VMOVSSZrr (v4i32 (AVX512_128_SET0)), VR128X:$src)>;
4278
4279  // Move low f32 and clear high bits.
4280  def : Pat<(v8f32 (X86vzmovl (v8f32 VR256X:$src))),
4281            (SUBREG_TO_REG (i32 0),
4282             (v4f32 (VMOVSSZrr (v4f32 (AVX512_128_SET0)),
4283              (v4f32 (EXTRACT_SUBREG (v8f32 VR256X:$src), sub_xmm)))), sub_xmm)>;
4284  def : Pat<(v8i32 (X86vzmovl (v8i32 VR256X:$src))),
4285            (SUBREG_TO_REG (i32 0),
4286             (v4i32 (VMOVSSZrr (v4i32 (AVX512_128_SET0)),
4287              (v4i32 (EXTRACT_SUBREG (v8i32 VR256X:$src), sub_xmm)))), sub_xmm)>;
4288
4289  def : Pat<(v16f32 (X86vzmovl (v16f32 VR512:$src))),
4290            (SUBREG_TO_REG (i32 0),
4291             (v4f32 (VMOVSSZrr (v4f32 (AVX512_128_SET0)),
4292              (v4f32 (EXTRACT_SUBREG (v16f32 VR512:$src), sub_xmm)))), sub_xmm)>;
4293  def : Pat<(v16i32 (X86vzmovl (v16i32 VR512:$src))),
4294            (SUBREG_TO_REG (i32 0),
4295             (v4i32 (VMOVSSZrr (v4i32 (AVX512_128_SET0)),
4296              (v4i32 (EXTRACT_SUBREG (v16i32 VR512:$src), sub_xmm)))), sub_xmm)>;
4297}
4298
4299// Use 128-bit blends for OptForSpeed since BLENDs have better throughput than
4300// VMOVSS/SD. Unfortunately, loses the ability to use XMM16-31.
4301let Predicates = [HasAVX512, OptForSpeed] in {
4302  def : Pat<(v16f32 (X86vzmovl (v16f32 VR512:$src))),
4303            (SUBREG_TO_REG (i32 0),
4304             (v4f32 (VBLENDPSrri (v4f32 (V_SET0)),
4305                          (v4f32 (EXTRACT_SUBREG (v16f32 VR512:$src), sub_xmm)),
4306                          (i8 1))), sub_xmm)>;
4307  def : Pat<(v16i32 (X86vzmovl (v16i32 VR512:$src))),
4308            (SUBREG_TO_REG (i32 0),
4309             (v4i32 (VPBLENDWrri (v4i32 (V_SET0)),
4310                          (v4i32 (EXTRACT_SUBREG (v16i32 VR512:$src), sub_xmm)),
4311                          (i8 3))), sub_xmm)>;
4312}
4313
4314let Predicates = [HasAVX512] in {
4315  def : Pat<(v4f32 (scalar_to_vector (loadf32 addr:$src))),
4316            (VMOVSSZrm addr:$src)>;
4317  def : Pat<(v2f64 (scalar_to_vector (loadf64 addr:$src))),
4318            (VMOVSDZrm addr:$src)>;
4319
4320  // Represent the same patterns above but in the form they appear for
4321  // 256-bit types
4322  def : Pat<(v8f32 (X86vzload32 addr:$src)),
4323            (SUBREG_TO_REG (i32 0), (VMOVSSZrm addr:$src), sub_xmm)>;
4324  def : Pat<(v4f64 (X86vzload64 addr:$src)),
4325            (SUBREG_TO_REG (i32 0), (VMOVSDZrm addr:$src), sub_xmm)>;
4326
4327  // Represent the same patterns above but in the form they appear for
4328  // 512-bit types
4329  def : Pat<(v16f32 (X86vzload32 addr:$src)),
4330            (SUBREG_TO_REG (i32 0), (VMOVSSZrm addr:$src), sub_xmm)>;
4331  def : Pat<(v8f64 (X86vzload64 addr:$src)),
4332            (SUBREG_TO_REG (i32 0), (VMOVSDZrm addr:$src), sub_xmm)>;
4333}
4334
4335let ExeDomain = SSEPackedInt, SchedRW = [SchedWriteVecLogic.XMM] in {
4336def VMOVZPQILo2PQIZrr : AVX512XSI<0x7E, MRMSrcReg, (outs VR128X:$dst),
4337                                (ins VR128X:$src),
4338                                "vmovq\t{$src, $dst|$dst, $src}",
4339                                [(set VR128X:$dst, (v2i64 (X86vzmovl
4340                                                   (v2i64 VR128X:$src))))]>,
4341                                EVEX, VEX_W;
4342}
4343
4344let Predicates = [HasAVX512] in {
4345  def : Pat<(v4i32 (X86vzmovl (v4i32 (scalar_to_vector GR32:$src)))),
4346            (VMOVDI2PDIZrr GR32:$src)>;
4347
4348  def : Pat<(v2i64 (X86vzmovl (v2i64 (scalar_to_vector GR64:$src)))),
4349            (VMOV64toPQIZrr GR64:$src)>;
4350
4351  // AVX 128-bit movd/movq instruction write zeros in the high 128-bit part.
4352  def : Pat<(v2i64 (X86vzmovl (v2i64 (scalar_to_vector (zextloadi64i32 addr:$src))))),
4353            (VMOVDI2PDIZrm addr:$src)>;
4354  def : Pat<(v4i32 (X86vzload32 addr:$src)),
4355            (VMOVDI2PDIZrm addr:$src)>;
4356  def : Pat<(v8i32 (X86vzload32 addr:$src)),
4357            (SUBREG_TO_REG (i32 0), (v4i32 (VMOVDI2PDIZrm addr:$src)), sub_xmm)>;
4358  def : Pat<(v2f64 (X86vzmovl (v2f64 VR128X:$src))),
4359            (VMOVZPQILo2PQIZrr VR128X:$src)>;
4360  def : Pat<(v2i64 (X86vzload64 addr:$src)),
4361            (VMOVQI2PQIZrm addr:$src)>;
4362  def : Pat<(v4i64 (X86vzload64 addr:$src)),
4363            (SUBREG_TO_REG (i64 0), (v2i64 (VMOVQI2PQIZrm addr:$src)), sub_xmm)>;
4364
4365  // Use regular 128-bit instructions to match 512-bit scalar_to_vec+zext.
4366  def : Pat<(v16i32 (X86vzload32 addr:$src)),
4367            (SUBREG_TO_REG (i32 0), (v4i32 (VMOVDI2PDIZrm addr:$src)), sub_xmm)>;
4368  def : Pat<(v8i64 (X86vzload64 addr:$src)),
4369            (SUBREG_TO_REG (i64 0), (v2i64 (VMOVQI2PQIZrm addr:$src)), sub_xmm)>;
4370
4371  def : Pat<(v4f64 (X86vzmovl (v4f64 VR256X:$src))),
4372            (SUBREG_TO_REG (i32 0),
4373             (v2f64 (VMOVZPQILo2PQIZrr
4374                     (v2f64 (EXTRACT_SUBREG (v4f64 VR256X:$src), sub_xmm)))),
4375             sub_xmm)>;
4376  def : Pat<(v4i64 (X86vzmovl (v4i64 VR256X:$src))),
4377            (SUBREG_TO_REG (i32 0),
4378             (v2i64 (VMOVZPQILo2PQIZrr
4379                     (v2i64 (EXTRACT_SUBREG (v4i64 VR256X:$src), sub_xmm)))),
4380             sub_xmm)>;
4381
4382  def : Pat<(v8f64 (X86vzmovl (v8f64 VR512:$src))),
4383            (SUBREG_TO_REG (i32 0),
4384             (v2f64 (VMOVZPQILo2PQIZrr
4385                     (v2f64 (EXTRACT_SUBREG (v8f64 VR512:$src), sub_xmm)))),
4386             sub_xmm)>;
4387  def : Pat<(v8i64 (X86vzmovl (v8i64 VR512:$src))),
4388            (SUBREG_TO_REG (i32 0),
4389             (v2i64 (VMOVZPQILo2PQIZrr
4390                     (v2i64 (EXTRACT_SUBREG (v8i64 VR512:$src), sub_xmm)))),
4391             sub_xmm)>;
4392}
4393
4394//===----------------------------------------------------------------------===//
4395// AVX-512 - Non-temporals
4396//===----------------------------------------------------------------------===//
4397
4398def VMOVNTDQAZrm : AVX512PI<0x2A, MRMSrcMem, (outs VR512:$dst),
4399                      (ins i512mem:$src), "vmovntdqa\t{$src, $dst|$dst, $src}",
4400                      [], SSEPackedInt>, Sched<[SchedWriteVecMoveLS.ZMM.RM]>,
4401                      EVEX, T8PD, EVEX_V512, EVEX_CD8<64, CD8VF>;
4402
4403let Predicates = [HasVLX] in {
4404  def VMOVNTDQAZ256rm : AVX512PI<0x2A, MRMSrcMem, (outs VR256X:$dst),
4405                       (ins i256mem:$src),
4406                       "vmovntdqa\t{$src, $dst|$dst, $src}",
4407                       [], SSEPackedInt>, Sched<[SchedWriteVecMoveLS.YMM.RM]>,
4408                       EVEX, T8PD, EVEX_V256, EVEX_CD8<64, CD8VF>;
4409
4410  def VMOVNTDQAZ128rm : AVX512PI<0x2A, MRMSrcMem, (outs VR128X:$dst),
4411                      (ins i128mem:$src),
4412                      "vmovntdqa\t{$src, $dst|$dst, $src}",
4413                      [], SSEPackedInt>, Sched<[SchedWriteVecMoveLS.XMM.RM]>,
4414                      EVEX, T8PD, EVEX_V128, EVEX_CD8<64, CD8VF>;
4415}
4416
4417multiclass avx512_movnt<bits<8> opc, string OpcodeStr, X86VectorVTInfo _,
4418                        X86SchedWriteMoveLS Sched,
4419                        PatFrag st_frag = alignednontemporalstore> {
4420  let SchedRW = [Sched.MR], AddedComplexity = 400 in
4421  def mr : AVX512PI<opc, MRMDestMem, (outs), (ins _.MemOp:$dst, _.RC:$src),
4422                    !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
4423                    [(st_frag (_.VT _.RC:$src), addr:$dst)],
4424                    _.ExeDomain>, EVEX, EVEX_CD8<_.EltSize, CD8VF>;
4425}
4426
4427multiclass avx512_movnt_vl<bits<8> opc, string OpcodeStr,
4428                           AVX512VLVectorVTInfo VTInfo,
4429                           X86SchedWriteMoveLSWidths Sched> {
4430  let Predicates = [HasAVX512] in
4431    defm Z : avx512_movnt<opc, OpcodeStr, VTInfo.info512, Sched.ZMM>, EVEX_V512;
4432
4433  let Predicates = [HasAVX512, HasVLX] in {
4434    defm Z256 : avx512_movnt<opc, OpcodeStr, VTInfo.info256, Sched.YMM>, EVEX_V256;
4435    defm Z128 : avx512_movnt<opc, OpcodeStr, VTInfo.info128, Sched.XMM>, EVEX_V128;
4436  }
4437}
4438
4439defm VMOVNTDQ : avx512_movnt_vl<0xE7, "vmovntdq", avx512vl_i64_info,
4440                                SchedWriteVecMoveLSNT>, PD;
4441defm VMOVNTPD : avx512_movnt_vl<0x2B, "vmovntpd", avx512vl_f64_info,
4442                                SchedWriteFMoveLSNT>, PD, VEX_W;
4443defm VMOVNTPS : avx512_movnt_vl<0x2B, "vmovntps", avx512vl_f32_info,
4444                                SchedWriteFMoveLSNT>, PS;
4445
4446let Predicates = [HasAVX512], AddedComplexity = 400 in {
4447  def : Pat<(alignednontemporalstore (v16i32 VR512:$src), addr:$dst),
4448            (VMOVNTDQZmr addr:$dst, VR512:$src)>;
4449  def : Pat<(alignednontemporalstore (v32i16 VR512:$src), addr:$dst),
4450            (VMOVNTDQZmr addr:$dst, VR512:$src)>;
4451  def : Pat<(alignednontemporalstore (v64i8 VR512:$src), addr:$dst),
4452            (VMOVNTDQZmr addr:$dst, VR512:$src)>;
4453
4454  def : Pat<(v8f64 (alignednontemporalload addr:$src)),
4455            (VMOVNTDQAZrm addr:$src)>;
4456  def : Pat<(v16f32 (alignednontemporalload addr:$src)),
4457            (VMOVNTDQAZrm addr:$src)>;
4458  def : Pat<(v8i64 (alignednontemporalload addr:$src)),
4459            (VMOVNTDQAZrm addr:$src)>;
4460  def : Pat<(v16i32 (alignednontemporalload addr:$src)),
4461            (VMOVNTDQAZrm addr:$src)>;
4462  def : Pat<(v32i16 (alignednontemporalload addr:$src)),
4463            (VMOVNTDQAZrm addr:$src)>;
4464  def : Pat<(v64i8 (alignednontemporalload addr:$src)),
4465            (VMOVNTDQAZrm addr:$src)>;
4466}
4467
4468let Predicates = [HasVLX], AddedComplexity = 400 in {
4469  def : Pat<(alignednontemporalstore (v8i32 VR256X:$src), addr:$dst),
4470            (VMOVNTDQZ256mr addr:$dst, VR256X:$src)>;
4471  def : Pat<(alignednontemporalstore (v16i16 VR256X:$src), addr:$dst),
4472            (VMOVNTDQZ256mr addr:$dst, VR256X:$src)>;
4473  def : Pat<(alignednontemporalstore (v32i8 VR256X:$src), addr:$dst),
4474            (VMOVNTDQZ256mr addr:$dst, VR256X:$src)>;
4475
4476  def : Pat<(v4f64 (alignednontemporalload addr:$src)),
4477            (VMOVNTDQAZ256rm addr:$src)>;
4478  def : Pat<(v8f32 (alignednontemporalload addr:$src)),
4479            (VMOVNTDQAZ256rm addr:$src)>;
4480  def : Pat<(v4i64 (alignednontemporalload addr:$src)),
4481            (VMOVNTDQAZ256rm addr:$src)>;
4482  def : Pat<(v8i32 (alignednontemporalload addr:$src)),
4483            (VMOVNTDQAZ256rm addr:$src)>;
4484  def : Pat<(v16i16 (alignednontemporalload addr:$src)),
4485            (VMOVNTDQAZ256rm addr:$src)>;
4486  def : Pat<(v32i8 (alignednontemporalload addr:$src)),
4487            (VMOVNTDQAZ256rm addr:$src)>;
4488
4489  def : Pat<(alignednontemporalstore (v4i32 VR128X:$src), addr:$dst),
4490            (VMOVNTDQZ128mr addr:$dst, VR128X:$src)>;
4491  def : Pat<(alignednontemporalstore (v8i16 VR128X:$src), addr:$dst),
4492            (VMOVNTDQZ128mr addr:$dst, VR128X:$src)>;
4493  def : Pat<(alignednontemporalstore (v16i8 VR128X:$src), addr:$dst),
4494            (VMOVNTDQZ128mr addr:$dst, VR128X:$src)>;
4495
4496  def : Pat<(v2f64 (alignednontemporalload addr:$src)),
4497            (VMOVNTDQAZ128rm addr:$src)>;
4498  def : Pat<(v4f32 (alignednontemporalload addr:$src)),
4499            (VMOVNTDQAZ128rm addr:$src)>;
4500  def : Pat<(v2i64 (alignednontemporalload addr:$src)),
4501            (VMOVNTDQAZ128rm addr:$src)>;
4502  def : Pat<(v4i32 (alignednontemporalload addr:$src)),
4503            (VMOVNTDQAZ128rm addr:$src)>;
4504  def : Pat<(v8i16 (alignednontemporalload addr:$src)),
4505            (VMOVNTDQAZ128rm addr:$src)>;
4506  def : Pat<(v16i8 (alignednontemporalload addr:$src)),
4507            (VMOVNTDQAZ128rm addr:$src)>;
4508}
4509
4510//===----------------------------------------------------------------------===//
4511// AVX-512 - Integer arithmetic
4512//
4513multiclass avx512_binop_rm<bits<8> opc, string OpcodeStr, SDNode OpNode,
4514                           X86VectorVTInfo _, X86FoldableSchedWrite sched,
4515                           bit IsCommutable = 0> {
4516  defm rr : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
4517                    (ins _.RC:$src1, _.RC:$src2), OpcodeStr,
4518                    "$src2, $src1", "$src1, $src2",
4519                    (_.VT (OpNode _.RC:$src1, _.RC:$src2)),
4520                    IsCommutable, IsCommutable>, AVX512BIBase, EVEX_4V,
4521                    Sched<[sched]>;
4522
4523  defm rm : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
4524                  (ins _.RC:$src1, _.MemOp:$src2), OpcodeStr,
4525                  "$src2, $src1", "$src1, $src2",
4526                  (_.VT (OpNode _.RC:$src1, (_.LdFrag addr:$src2)))>,
4527                  AVX512BIBase, EVEX_4V,
4528                  Sched<[sched.Folded, sched.ReadAfterFold]>;
4529}
4530
4531multiclass avx512_binop_rmb<bits<8> opc, string OpcodeStr, SDNode OpNode,
4532                            X86VectorVTInfo _, X86FoldableSchedWrite sched,
4533                            bit IsCommutable = 0> :
4534           avx512_binop_rm<opc, OpcodeStr, OpNode, _, sched, IsCommutable> {
4535  defm rmb : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
4536                  (ins _.RC:$src1, _.ScalarMemOp:$src2), OpcodeStr,
4537                  "${src2}"##_.BroadcastStr##", $src1",
4538                  "$src1, ${src2}"##_.BroadcastStr,
4539                  (_.VT (OpNode _.RC:$src1,
4540                                (X86VBroadcast
4541                                    (_.ScalarLdFrag addr:$src2))))>,
4542                  AVX512BIBase, EVEX_4V, EVEX_B,
4543                  Sched<[sched.Folded, sched.ReadAfterFold]>;
4544}
4545
4546multiclass avx512_binop_rm_vl<bits<8> opc, string OpcodeStr, SDNode OpNode,
4547                              AVX512VLVectorVTInfo VTInfo,
4548                              X86SchedWriteWidths sched, Predicate prd,
4549                              bit IsCommutable = 0> {
4550  let Predicates = [prd] in
4551    defm Z : avx512_binop_rm<opc, OpcodeStr, OpNode, VTInfo.info512, sched.ZMM,
4552                             IsCommutable>, EVEX_V512;
4553
4554  let Predicates = [prd, HasVLX] in {
4555    defm Z256 : avx512_binop_rm<opc, OpcodeStr, OpNode, VTInfo.info256,
4556                                sched.YMM, IsCommutable>, EVEX_V256;
4557    defm Z128 : avx512_binop_rm<opc, OpcodeStr, OpNode, VTInfo.info128,
4558                                sched.XMM, IsCommutable>, EVEX_V128;
4559  }
4560}
4561
4562multiclass avx512_binop_rmb_vl<bits<8> opc, string OpcodeStr, SDNode OpNode,
4563                               AVX512VLVectorVTInfo VTInfo,
4564                               X86SchedWriteWidths sched, Predicate prd,
4565                               bit IsCommutable = 0> {
4566  let Predicates = [prd] in
4567    defm Z : avx512_binop_rmb<opc, OpcodeStr, OpNode, VTInfo.info512, sched.ZMM,
4568                             IsCommutable>, EVEX_V512;
4569
4570  let Predicates = [prd, HasVLX] in {
4571    defm Z256 : avx512_binop_rmb<opc, OpcodeStr, OpNode, VTInfo.info256,
4572                                 sched.YMM, IsCommutable>, EVEX_V256;
4573    defm Z128 : avx512_binop_rmb<opc, OpcodeStr, OpNode, VTInfo.info128,
4574                                 sched.XMM, IsCommutable>, EVEX_V128;
4575  }
4576}
4577
4578multiclass avx512_binop_rm_vl_q<bits<8> opc, string OpcodeStr, SDNode OpNode,
4579                                X86SchedWriteWidths sched, Predicate prd,
4580                                bit IsCommutable = 0> {
4581  defm NAME : avx512_binop_rmb_vl<opc, OpcodeStr, OpNode, avx512vl_i64_info,
4582                                  sched, prd, IsCommutable>,
4583                                  VEX_W, EVEX_CD8<64, CD8VF>;
4584}
4585
4586multiclass avx512_binop_rm_vl_d<bits<8> opc, string OpcodeStr, SDNode OpNode,
4587                                X86SchedWriteWidths sched, Predicate prd,
4588                                bit IsCommutable = 0> {
4589  defm NAME : avx512_binop_rmb_vl<opc, OpcodeStr, OpNode, avx512vl_i32_info,
4590                                  sched, prd, IsCommutable>, EVEX_CD8<32, CD8VF>;
4591}
4592
4593multiclass avx512_binop_rm_vl_w<bits<8> opc, string OpcodeStr, SDNode OpNode,
4594                                X86SchedWriteWidths sched, Predicate prd,
4595                                bit IsCommutable = 0> {
4596  defm NAME : avx512_binop_rm_vl<opc, OpcodeStr, OpNode, avx512vl_i16_info,
4597                                 sched, prd, IsCommutable>, EVEX_CD8<16, CD8VF>,
4598                                 VEX_WIG;
4599}
4600
4601multiclass avx512_binop_rm_vl_b<bits<8> opc, string OpcodeStr, SDNode OpNode,
4602                                X86SchedWriteWidths sched, Predicate prd,
4603                                bit IsCommutable = 0> {
4604  defm NAME : avx512_binop_rm_vl<opc, OpcodeStr, OpNode, avx512vl_i8_info,
4605                                 sched, prd, IsCommutable>, EVEX_CD8<8, CD8VF>,
4606                                 VEX_WIG;
4607}
4608
4609multiclass avx512_binop_rm_vl_dq<bits<8> opc_d, bits<8> opc_q, string OpcodeStr,
4610                                 SDNode OpNode, X86SchedWriteWidths sched,
4611                                 Predicate prd, bit IsCommutable = 0> {
4612  defm Q : avx512_binop_rm_vl_q<opc_q, OpcodeStr#"q", OpNode, sched, prd,
4613                                   IsCommutable>;
4614
4615  defm D : avx512_binop_rm_vl_d<opc_d, OpcodeStr#"d", OpNode, sched, prd,
4616                                   IsCommutable>;
4617}
4618
4619multiclass avx512_binop_rm_vl_bw<bits<8> opc_b, bits<8> opc_w, string OpcodeStr,
4620                                 SDNode OpNode, X86SchedWriteWidths sched,
4621                                 Predicate prd, bit IsCommutable = 0> {
4622  defm W : avx512_binop_rm_vl_w<opc_w, OpcodeStr#"w", OpNode, sched, prd,
4623                                   IsCommutable>;
4624
4625  defm B : avx512_binop_rm_vl_b<opc_b, OpcodeStr#"b", OpNode, sched, prd,
4626                                   IsCommutable>;
4627}
4628
4629multiclass avx512_binop_rm_vl_all<bits<8> opc_b, bits<8> opc_w,
4630                                  bits<8> opc_d, bits<8> opc_q,
4631                                  string OpcodeStr, SDNode OpNode,
4632                                  X86SchedWriteWidths sched,
4633                                  bit IsCommutable = 0> {
4634  defm NAME : avx512_binop_rm_vl_dq<opc_d, opc_q, OpcodeStr, OpNode,
4635                                    sched, HasAVX512, IsCommutable>,
4636              avx512_binop_rm_vl_bw<opc_b, opc_w, OpcodeStr, OpNode,
4637                                    sched, HasBWI, IsCommutable>;
4638}
4639
4640multiclass avx512_binop_rm2<bits<8> opc, string OpcodeStr,
4641                            X86FoldableSchedWrite sched,
4642                            SDNode OpNode,X86VectorVTInfo _Src,
4643                            X86VectorVTInfo _Dst, X86VectorVTInfo _Brdct,
4644                            bit IsCommutable = 0> {
4645  defm rr : AVX512_maskable<opc, MRMSrcReg, _Dst, (outs _Dst.RC:$dst),
4646                            (ins _Src.RC:$src1, _Src.RC:$src2), OpcodeStr,
4647                            "$src2, $src1","$src1, $src2",
4648                            (_Dst.VT (OpNode
4649                                         (_Src.VT _Src.RC:$src1),
4650                                         (_Src.VT _Src.RC:$src2))),
4651                            IsCommutable>,
4652                            AVX512BIBase, EVEX_4V, Sched<[sched]>;
4653  defm rm : AVX512_maskable<opc, MRMSrcMem, _Dst, (outs _Dst.RC:$dst),
4654                        (ins _Src.RC:$src1, _Src.MemOp:$src2), OpcodeStr,
4655                        "$src2, $src1", "$src1, $src2",
4656                        (_Dst.VT (OpNode (_Src.VT _Src.RC:$src1),
4657                                      (_Src.LdFrag addr:$src2)))>,
4658                        AVX512BIBase, EVEX_4V,
4659                        Sched<[sched.Folded, sched.ReadAfterFold]>;
4660
4661  defm rmb : AVX512_maskable<opc, MRMSrcMem, _Dst, (outs _Dst.RC:$dst),
4662                    (ins _Src.RC:$src1, _Brdct.ScalarMemOp:$src2),
4663                    OpcodeStr,
4664                    "${src2}"##_Brdct.BroadcastStr##", $src1",
4665                     "$src1, ${src2}"##_Brdct.BroadcastStr,
4666                    (_Dst.VT (OpNode (_Src.VT _Src.RC:$src1), (bitconvert
4667                                 (_Brdct.VT (X86VBroadcast
4668                                          (_Brdct.ScalarLdFrag addr:$src2))))))>,
4669                    AVX512BIBase, EVEX_4V, EVEX_B,
4670                    Sched<[sched.Folded, sched.ReadAfterFold]>;
4671}
4672
4673defm VPADD : avx512_binop_rm_vl_all<0xFC, 0xFD, 0xFE, 0xD4, "vpadd", add,
4674                                    SchedWriteVecALU, 1>;
4675defm VPSUB : avx512_binop_rm_vl_all<0xF8, 0xF9, 0xFA, 0xFB, "vpsub", sub,
4676                                    SchedWriteVecALU, 0>;
4677defm VPADDS : avx512_binop_rm_vl_bw<0xEC, 0xED, "vpadds", saddsat,
4678                                    SchedWriteVecALU, HasBWI, 1>;
4679defm VPSUBS : avx512_binop_rm_vl_bw<0xE8, 0xE9, "vpsubs", ssubsat,
4680                                    SchedWriteVecALU, HasBWI, 0>;
4681defm VPADDUS : avx512_binop_rm_vl_bw<0xDC, 0xDD, "vpaddus", uaddsat,
4682                                     SchedWriteVecALU, HasBWI, 1>;
4683defm VPSUBUS : avx512_binop_rm_vl_bw<0xD8, 0xD9, "vpsubus", usubsat,
4684                                     SchedWriteVecALU, HasBWI, 0>;
4685defm VPMULLD : avx512_binop_rm_vl_d<0x40, "vpmulld", mul,
4686                                    SchedWritePMULLD, HasAVX512, 1>, T8PD;
4687defm VPMULLW : avx512_binop_rm_vl_w<0xD5, "vpmullw", mul,
4688                                    SchedWriteVecIMul, HasBWI, 1>;
4689defm VPMULLQ : avx512_binop_rm_vl_q<0x40, "vpmullq", mul,
4690                                    SchedWriteVecIMul, HasDQI, 1>, T8PD,
4691                                    NotEVEX2VEXConvertible;
4692defm VPMULHW : avx512_binop_rm_vl_w<0xE5, "vpmulhw", mulhs, SchedWriteVecIMul,
4693                                    HasBWI, 1>;
4694defm VPMULHUW : avx512_binop_rm_vl_w<0xE4, "vpmulhuw", mulhu, SchedWriteVecIMul,
4695                                     HasBWI, 1>;
4696defm VPMULHRSW : avx512_binop_rm_vl_w<0x0B, "vpmulhrsw", X86mulhrs,
4697                                      SchedWriteVecIMul, HasBWI, 1>, T8PD;
4698defm VPAVG : avx512_binop_rm_vl_bw<0xE0, 0xE3, "vpavg", X86avg,
4699                                   SchedWriteVecALU, HasBWI, 1>;
4700defm VPMULDQ : avx512_binop_rm_vl_q<0x28, "vpmuldq", X86pmuldq,
4701                                    SchedWriteVecIMul, HasAVX512, 1>, T8PD;
4702defm VPMULUDQ : avx512_binop_rm_vl_q<0xF4, "vpmuludq", X86pmuludq,
4703                                     SchedWriteVecIMul, HasAVX512, 1>;
4704
4705multiclass avx512_binop_all<bits<8> opc, string OpcodeStr,
4706                            X86SchedWriteWidths sched,
4707                            AVX512VLVectorVTInfo _SrcVTInfo,
4708                            AVX512VLVectorVTInfo _DstVTInfo,
4709                            SDNode OpNode, Predicate prd,  bit IsCommutable = 0> {
4710  let Predicates = [prd] in
4711    defm NAME#Z : avx512_binop_rm2<opc, OpcodeStr, sched.ZMM, OpNode,
4712                                 _SrcVTInfo.info512, _DstVTInfo.info512,
4713                                 v8i64_info, IsCommutable>,
4714                                  EVEX_V512, EVEX_CD8<64, CD8VF>, VEX_W;
4715  let Predicates = [HasVLX, prd] in {
4716    defm NAME#Z256 : avx512_binop_rm2<opc, OpcodeStr, sched.YMM, OpNode,
4717                                      _SrcVTInfo.info256, _DstVTInfo.info256,
4718                                      v4i64x_info, IsCommutable>,
4719                                      EVEX_V256, EVEX_CD8<64, CD8VF>, VEX_W;
4720    defm NAME#Z128 : avx512_binop_rm2<opc, OpcodeStr, sched.XMM, OpNode,
4721                                      _SrcVTInfo.info128, _DstVTInfo.info128,
4722                                      v2i64x_info, IsCommutable>,
4723                                     EVEX_V128, EVEX_CD8<64, CD8VF>, VEX_W;
4724  }
4725}
4726
4727defm VPMULTISHIFTQB : avx512_binop_all<0x83, "vpmultishiftqb", SchedWriteVecALU,
4728                                avx512vl_i8_info, avx512vl_i8_info,
4729                                X86multishift, HasVBMI, 0>, T8PD;
4730
4731multiclass avx512_packs_rmb<bits<8> opc, string OpcodeStr, SDNode OpNode,
4732                            X86VectorVTInfo _Src, X86VectorVTInfo _Dst,
4733                            X86FoldableSchedWrite sched> {
4734  defm rmb : AVX512_maskable<opc, MRMSrcMem, _Dst, (outs _Dst.RC:$dst),
4735                    (ins _Src.RC:$src1, _Src.ScalarMemOp:$src2),
4736                    OpcodeStr,
4737                    "${src2}"##_Src.BroadcastStr##", $src1",
4738                     "$src1, ${src2}"##_Src.BroadcastStr,
4739                    (_Dst.VT (OpNode (_Src.VT _Src.RC:$src1), (bitconvert
4740                                 (_Src.VT (X86VBroadcast
4741                                          (_Src.ScalarLdFrag addr:$src2))))))>,
4742                    EVEX_4V, EVEX_B, EVEX_CD8<_Src.EltSize, CD8VF>,
4743                    Sched<[sched.Folded, sched.ReadAfterFold]>;
4744}
4745
4746multiclass avx512_packs_rm<bits<8> opc, string OpcodeStr,
4747                            SDNode OpNode,X86VectorVTInfo _Src,
4748                            X86VectorVTInfo _Dst, X86FoldableSchedWrite sched,
4749                            bit IsCommutable = 0> {
4750  defm rr : AVX512_maskable<opc, MRMSrcReg, _Dst, (outs _Dst.RC:$dst),
4751                            (ins _Src.RC:$src1, _Src.RC:$src2), OpcodeStr,
4752                            "$src2, $src1","$src1, $src2",
4753                            (_Dst.VT (OpNode
4754                                         (_Src.VT _Src.RC:$src1),
4755                                         (_Src.VT _Src.RC:$src2))),
4756                            IsCommutable, IsCommutable>,
4757                            EVEX_CD8<_Src.EltSize, CD8VF>, EVEX_4V, Sched<[sched]>;
4758  defm rm : AVX512_maskable<opc, MRMSrcMem, _Dst, (outs _Dst.RC:$dst),
4759                        (ins _Src.RC:$src1, _Src.MemOp:$src2), OpcodeStr,
4760                        "$src2, $src1", "$src1, $src2",
4761                        (_Dst.VT (OpNode (_Src.VT _Src.RC:$src1),
4762                                      (_Src.LdFrag addr:$src2)))>,
4763                         EVEX_4V, EVEX_CD8<_Src.EltSize, CD8VF>,
4764                         Sched<[sched.Folded, sched.ReadAfterFold]>;
4765}
4766
4767multiclass avx512_packs_all_i32_i16<bits<8> opc, string OpcodeStr,
4768                                    SDNode OpNode> {
4769  let Predicates = [HasBWI] in
4770  defm NAME#Z : avx512_packs_rm<opc, OpcodeStr, OpNode, v16i32_info,
4771                                 v32i16_info, SchedWriteShuffle.ZMM>,
4772                avx512_packs_rmb<opc, OpcodeStr, OpNode, v16i32_info,
4773                                 v32i16_info, SchedWriteShuffle.ZMM>, EVEX_V512;
4774  let Predicates = [HasBWI, HasVLX] in {
4775    defm NAME#Z256 : avx512_packs_rm<opc, OpcodeStr, OpNode, v8i32x_info,
4776                                     v16i16x_info, SchedWriteShuffle.YMM>,
4777                     avx512_packs_rmb<opc, OpcodeStr, OpNode, v8i32x_info,
4778                                      v16i16x_info, SchedWriteShuffle.YMM>,
4779                                      EVEX_V256;
4780    defm NAME#Z128 : avx512_packs_rm<opc, OpcodeStr, OpNode, v4i32x_info,
4781                                     v8i16x_info, SchedWriteShuffle.XMM>,
4782                     avx512_packs_rmb<opc, OpcodeStr, OpNode, v4i32x_info,
4783                                      v8i16x_info, SchedWriteShuffle.XMM>,
4784                                      EVEX_V128;
4785  }
4786}
4787multiclass avx512_packs_all_i16_i8<bits<8> opc, string OpcodeStr,
4788                            SDNode OpNode> {
4789  let Predicates = [HasBWI] in
4790  defm NAME#Z : avx512_packs_rm<opc, OpcodeStr, OpNode, v32i16_info, v64i8_info,
4791                                SchedWriteShuffle.ZMM>, EVEX_V512, VEX_WIG;
4792  let Predicates = [HasBWI, HasVLX] in {
4793    defm NAME#Z256 : avx512_packs_rm<opc, OpcodeStr, OpNode, v16i16x_info,
4794                                     v32i8x_info, SchedWriteShuffle.YMM>,
4795                                     EVEX_V256, VEX_WIG;
4796    defm NAME#Z128 : avx512_packs_rm<opc, OpcodeStr, OpNode, v8i16x_info,
4797                                     v16i8x_info, SchedWriteShuffle.XMM>,
4798                                     EVEX_V128, VEX_WIG;
4799  }
4800}
4801
4802multiclass avx512_vpmadd<bits<8> opc, string OpcodeStr,
4803                            SDNode OpNode, AVX512VLVectorVTInfo _Src,
4804                            AVX512VLVectorVTInfo _Dst, bit IsCommutable = 0> {
4805  let Predicates = [HasBWI] in
4806  defm NAME#Z : avx512_packs_rm<opc, OpcodeStr, OpNode, _Src.info512,
4807                                _Dst.info512, SchedWriteVecIMul.ZMM,
4808                                IsCommutable>, EVEX_V512;
4809  let Predicates = [HasBWI, HasVLX] in {
4810    defm NAME#Z256 : avx512_packs_rm<opc, OpcodeStr, OpNode, _Src.info256,
4811                                     _Dst.info256, SchedWriteVecIMul.YMM,
4812                                     IsCommutable>, EVEX_V256;
4813    defm NAME#Z128 : avx512_packs_rm<opc, OpcodeStr, OpNode, _Src.info128,
4814                                     _Dst.info128, SchedWriteVecIMul.XMM,
4815                                     IsCommutable>, EVEX_V128;
4816  }
4817}
4818
4819defm VPACKSSDW : avx512_packs_all_i32_i16<0x6B, "vpackssdw", X86Packss>, AVX512BIBase;
4820defm VPACKUSDW : avx512_packs_all_i32_i16<0x2b, "vpackusdw", X86Packus>, AVX5128IBase;
4821defm VPACKSSWB : avx512_packs_all_i16_i8 <0x63, "vpacksswb", X86Packss>, AVX512BIBase;
4822defm VPACKUSWB : avx512_packs_all_i16_i8 <0x67, "vpackuswb", X86Packus>, AVX512BIBase;
4823
4824defm VPMADDUBSW : avx512_vpmadd<0x04, "vpmaddubsw", X86vpmaddubsw,
4825                     avx512vl_i8_info, avx512vl_i16_info>, AVX512BIBase, T8PD, VEX_WIG;
4826defm VPMADDWD   : avx512_vpmadd<0xF5, "vpmaddwd", X86vpmaddwd,
4827                     avx512vl_i16_info, avx512vl_i32_info, 1>, AVX512BIBase, VEX_WIG;
4828
4829defm VPMAXSB : avx512_binop_rm_vl_b<0x3C, "vpmaxsb", smax,
4830                                    SchedWriteVecALU, HasBWI, 1>, T8PD;
4831defm VPMAXSW : avx512_binop_rm_vl_w<0xEE, "vpmaxsw", smax,
4832                                    SchedWriteVecALU, HasBWI, 1>;
4833defm VPMAXSD : avx512_binop_rm_vl_d<0x3D, "vpmaxsd", smax,
4834                                    SchedWriteVecALU, HasAVX512, 1>, T8PD;
4835defm VPMAXSQ : avx512_binop_rm_vl_q<0x3D, "vpmaxsq", smax,
4836                                    SchedWriteVecALU, HasAVX512, 1>, T8PD,
4837                                    NotEVEX2VEXConvertible;
4838
4839defm VPMAXUB : avx512_binop_rm_vl_b<0xDE, "vpmaxub", umax,
4840                                    SchedWriteVecALU, HasBWI, 1>;
4841defm VPMAXUW : avx512_binop_rm_vl_w<0x3E, "vpmaxuw", umax,
4842                                    SchedWriteVecALU, HasBWI, 1>, T8PD;
4843defm VPMAXUD : avx512_binop_rm_vl_d<0x3F, "vpmaxud", umax,
4844                                    SchedWriteVecALU, HasAVX512, 1>, T8PD;
4845defm VPMAXUQ : avx512_binop_rm_vl_q<0x3F, "vpmaxuq", umax,
4846                                    SchedWriteVecALU, HasAVX512, 1>, T8PD,
4847                                    NotEVEX2VEXConvertible;
4848
4849defm VPMINSB : avx512_binop_rm_vl_b<0x38, "vpminsb", smin,
4850                                    SchedWriteVecALU, HasBWI, 1>, T8PD;
4851defm VPMINSW : avx512_binop_rm_vl_w<0xEA, "vpminsw", smin,
4852                                    SchedWriteVecALU, HasBWI, 1>;
4853defm VPMINSD : avx512_binop_rm_vl_d<0x39, "vpminsd", smin,
4854                                    SchedWriteVecALU, HasAVX512, 1>, T8PD;
4855defm VPMINSQ : avx512_binop_rm_vl_q<0x39, "vpminsq", smin,
4856                                    SchedWriteVecALU, HasAVX512, 1>, T8PD,
4857                                    NotEVEX2VEXConvertible;
4858
4859defm VPMINUB : avx512_binop_rm_vl_b<0xDA, "vpminub", umin,
4860                                    SchedWriteVecALU, HasBWI, 1>;
4861defm VPMINUW : avx512_binop_rm_vl_w<0x3A, "vpminuw", umin,
4862                                    SchedWriteVecALU, HasBWI, 1>, T8PD;
4863defm VPMINUD : avx512_binop_rm_vl_d<0x3B, "vpminud", umin,
4864                                    SchedWriteVecALU, HasAVX512, 1>, T8PD;
4865defm VPMINUQ : avx512_binop_rm_vl_q<0x3B, "vpminuq", umin,
4866                                    SchedWriteVecALU, HasAVX512, 1>, T8PD,
4867                                    NotEVEX2VEXConvertible;
4868
4869// PMULLQ: Use 512bit version to implement 128/256 bit in case NoVLX.
4870let Predicates = [HasDQI, NoVLX] in {
4871  def : Pat<(v4i64 (mul (v4i64 VR256X:$src1), (v4i64 VR256X:$src2))),
4872            (EXTRACT_SUBREG
4873                (VPMULLQZrr
4874                    (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src1, sub_ymm),
4875                    (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src2, sub_ymm)),
4876             sub_ymm)>;
4877
4878  def : Pat<(v2i64 (mul (v2i64 VR128X:$src1), (v2i64 VR128X:$src2))),
4879            (EXTRACT_SUBREG
4880                (VPMULLQZrr
4881                    (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src1, sub_xmm),
4882                    (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src2, sub_xmm)),
4883             sub_xmm)>;
4884}
4885
4886// PMULLQ: Use 512bit version to implement 128/256 bit in case NoVLX.
4887let Predicates = [HasDQI, NoVLX] in {
4888  def : Pat<(v4i64 (mul (v4i64 VR256X:$src1), (v4i64 VR256X:$src2))),
4889            (EXTRACT_SUBREG
4890                (VPMULLQZrr
4891                    (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src1, sub_ymm),
4892                    (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src2, sub_ymm)),
4893             sub_ymm)>;
4894
4895  def : Pat<(v2i64 (mul (v2i64 VR128X:$src1), (v2i64 VR128X:$src2))),
4896            (EXTRACT_SUBREG
4897                (VPMULLQZrr
4898                    (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src1, sub_xmm),
4899                    (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src2, sub_xmm)),
4900             sub_xmm)>;
4901}
4902
4903multiclass avx512_min_max_lowering<Instruction Instr, SDNode OpNode> {
4904  def : Pat<(v4i64 (OpNode VR256X:$src1, VR256X:$src2)),
4905            (EXTRACT_SUBREG
4906                (Instr
4907                    (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src1, sub_ymm),
4908                    (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src2, sub_ymm)),
4909             sub_ymm)>;
4910
4911  def : Pat<(v2i64 (OpNode VR128X:$src1, VR128X:$src2)),
4912            (EXTRACT_SUBREG
4913                (Instr
4914                    (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src1, sub_xmm),
4915                    (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src2, sub_xmm)),
4916             sub_xmm)>;
4917}
4918
4919let Predicates = [HasAVX512, NoVLX] in {
4920  defm : avx512_min_max_lowering<VPMAXUQZrr, umax>;
4921  defm : avx512_min_max_lowering<VPMINUQZrr, umin>;
4922  defm : avx512_min_max_lowering<VPMAXSQZrr, smax>;
4923  defm : avx512_min_max_lowering<VPMINSQZrr, smin>;
4924}
4925
4926//===----------------------------------------------------------------------===//
4927// AVX-512  Logical Instructions
4928//===----------------------------------------------------------------------===//
4929
4930defm VPAND : avx512_binop_rm_vl_dq<0xDB, 0xDB, "vpand", and,
4931                                   SchedWriteVecLogic, HasAVX512, 1>;
4932defm VPOR : avx512_binop_rm_vl_dq<0xEB, 0xEB, "vpor", or,
4933                                  SchedWriteVecLogic, HasAVX512, 1>;
4934defm VPXOR : avx512_binop_rm_vl_dq<0xEF, 0xEF, "vpxor", xor,
4935                                   SchedWriteVecLogic, HasAVX512, 1>;
4936defm VPANDN : avx512_binop_rm_vl_dq<0xDF, 0xDF, "vpandn", X86andnp,
4937                                    SchedWriteVecLogic, HasAVX512>;
4938
4939let Predicates = [HasVLX] in {
4940  def : Pat<(v16i8 (and VR128X:$src1, VR128X:$src2)),
4941            (VPANDQZ128rr VR128X:$src1, VR128X:$src2)>;
4942  def : Pat<(v8i16 (and VR128X:$src1, VR128X:$src2)),
4943            (VPANDQZ128rr VR128X:$src1, VR128X:$src2)>;
4944
4945  def : Pat<(v16i8 (or VR128X:$src1, VR128X:$src2)),
4946            (VPORQZ128rr VR128X:$src1, VR128X:$src2)>;
4947  def : Pat<(v8i16 (or VR128X:$src1, VR128X:$src2)),
4948            (VPORQZ128rr VR128X:$src1, VR128X:$src2)>;
4949
4950  def : Pat<(v16i8 (xor VR128X:$src1, VR128X:$src2)),
4951            (VPXORQZ128rr VR128X:$src1, VR128X:$src2)>;
4952  def : Pat<(v8i16 (xor VR128X:$src1, VR128X:$src2)),
4953            (VPXORQZ128rr VR128X:$src1, VR128X:$src2)>;
4954
4955  def : Pat<(v16i8 (X86andnp VR128X:$src1, VR128X:$src2)),
4956            (VPANDNQZ128rr VR128X:$src1, VR128X:$src2)>;
4957  def : Pat<(v8i16 (X86andnp VR128X:$src1, VR128X:$src2)),
4958            (VPANDNQZ128rr VR128X:$src1, VR128X:$src2)>;
4959
4960  def : Pat<(and VR128X:$src1, (loadv16i8 addr:$src2)),
4961            (VPANDQZ128rm VR128X:$src1, addr:$src2)>;
4962  def : Pat<(and VR128X:$src1, (loadv8i16 addr:$src2)),
4963            (VPANDQZ128rm VR128X:$src1, addr:$src2)>;
4964
4965  def : Pat<(or VR128X:$src1, (loadv16i8 addr:$src2)),
4966            (VPORQZ128rm VR128X:$src1, addr:$src2)>;
4967  def : Pat<(or VR128X:$src1, (loadv8i16 addr:$src2)),
4968            (VPORQZ128rm VR128X:$src1, addr:$src2)>;
4969
4970  def : Pat<(xor VR128X:$src1, (loadv16i8 addr:$src2)),
4971            (VPXORQZ128rm VR128X:$src1, addr:$src2)>;
4972  def : Pat<(xor VR128X:$src1, (loadv8i16 addr:$src2)),
4973            (VPXORQZ128rm VR128X:$src1, addr:$src2)>;
4974
4975  def : Pat<(X86andnp VR128X:$src1, (loadv16i8 addr:$src2)),
4976            (VPANDNQZ128rm VR128X:$src1, addr:$src2)>;
4977  def : Pat<(X86andnp VR128X:$src1, (loadv8i16 addr:$src2)),
4978            (VPANDNQZ128rm VR128X:$src1, addr:$src2)>;
4979
4980  def : Pat<(and VR128X:$src1,
4981                 (bc_v4i32 (v4f32 (X86VBroadcast (loadf32 addr:$src2))))),
4982            (VPANDDZ128rmb VR128X:$src1, addr:$src2)>;
4983  def : Pat<(or VR128X:$src1,
4984                (bc_v4i32 (v4f32 (X86VBroadcast (loadf32 addr:$src2))))),
4985            (VPORDZ128rmb VR128X:$src1, addr:$src2)>;
4986  def : Pat<(xor VR128X:$src1,
4987                 (bc_v4i32 (v4f32 (X86VBroadcast (loadf32 addr:$src2))))),
4988            (VPXORDZ128rmb VR128X:$src1, addr:$src2)>;
4989  def : Pat<(X86andnp VR128X:$src1,
4990                      (bc_v4i32 (v4f32 (X86VBroadcast (loadf32 addr:$src2))))),
4991            (VPANDNDZ128rmb VR128X:$src1, addr:$src2)>;
4992
4993  def : Pat<(and VR128X:$src1,
4994                 (bc_v2i64 (v2f64 (X86VBroadcast (loadf64 addr:$src2))))),
4995            (VPANDQZ128rmb VR128X:$src1, addr:$src2)>;
4996  def : Pat<(or VR128X:$src1,
4997                (bc_v2i64 (v2f64 (X86VBroadcast (loadf64 addr:$src2))))),
4998            (VPORQZ128rmb VR128X:$src1, addr:$src2)>;
4999  def : Pat<(xor VR128X:$src1,
5000                 (bc_v2i64 (v2f64 (X86VBroadcast (loadf64 addr:$src2))))),
5001            (VPXORQZ128rmb VR128X:$src1, addr:$src2)>;
5002  def : Pat<(X86andnp VR128X:$src1,
5003                      (bc_v2i64 (v2f64 (X86VBroadcast (loadf64 addr:$src2))))),
5004            (VPANDNQZ128rmb VR128X:$src1, addr:$src2)>;
5005
5006  def : Pat<(v32i8 (and VR256X:$src1, VR256X:$src2)),
5007            (VPANDQZ256rr VR256X:$src1, VR256X:$src2)>;
5008  def : Pat<(v16i16 (and VR256X:$src1, VR256X:$src2)),
5009            (VPANDQZ256rr VR256X:$src1, VR256X:$src2)>;
5010
5011  def : Pat<(v32i8 (or VR256X:$src1, VR256X:$src2)),
5012            (VPORQZ256rr VR256X:$src1, VR256X:$src2)>;
5013  def : Pat<(v16i16 (or VR256X:$src1, VR256X:$src2)),
5014            (VPORQZ256rr VR256X:$src1, VR256X:$src2)>;
5015
5016  def : Pat<(v32i8 (xor VR256X:$src1, VR256X:$src2)),
5017            (VPXORQZ256rr VR256X:$src1, VR256X:$src2)>;
5018  def : Pat<(v16i16 (xor VR256X:$src1, VR256X:$src2)),
5019            (VPXORQZ256rr VR256X:$src1, VR256X:$src2)>;
5020
5021  def : Pat<(v32i8 (X86andnp VR256X:$src1, VR256X:$src2)),
5022            (VPANDNQZ256rr VR256X:$src1, VR256X:$src2)>;
5023  def : Pat<(v16i16 (X86andnp VR256X:$src1, VR256X:$src2)),
5024            (VPANDNQZ256rr VR256X:$src1, VR256X:$src2)>;
5025
5026  def : Pat<(and VR256X:$src1, (loadv32i8 addr:$src2)),
5027            (VPANDQZ256rm VR256X:$src1, addr:$src2)>;
5028  def : Pat<(and VR256X:$src1, (loadv16i16 addr:$src2)),
5029            (VPANDQZ256rm VR256X:$src1, addr:$src2)>;
5030
5031  def : Pat<(or VR256X:$src1, (loadv32i8 addr:$src2)),
5032            (VPORQZ256rm VR256X:$src1, addr:$src2)>;
5033  def : Pat<(or VR256X:$src1, (loadv16i16 addr:$src2)),
5034            (VPORQZ256rm VR256X:$src1, addr:$src2)>;
5035
5036  def : Pat<(xor VR256X:$src1, (loadv32i8 addr:$src2)),
5037            (VPXORQZ256rm VR256X:$src1, addr:$src2)>;
5038  def : Pat<(xor VR256X:$src1, (loadv16i16 addr:$src2)),
5039            (VPXORQZ256rm VR256X:$src1, addr:$src2)>;
5040
5041  def : Pat<(X86andnp VR256X:$src1, (loadv32i8 addr:$src2)),
5042            (VPANDNQZ256rm VR256X:$src1, addr:$src2)>;
5043  def : Pat<(X86andnp VR256X:$src1, (loadv16i16 addr:$src2)),
5044            (VPANDNQZ256rm VR256X:$src1, addr:$src2)>;
5045
5046  def : Pat<(and VR256X:$src1,
5047                 (bc_v8i32 (v8f32 (X86VBroadcast (loadf32 addr:$src2))))),
5048            (VPANDDZ256rmb VR256X:$src1, addr:$src2)>;
5049  def : Pat<(or VR256X:$src1,
5050                (bc_v8i32 (v8f32 (X86VBroadcast (loadf32 addr:$src2))))),
5051            (VPORDZ256rmb VR256X:$src1, addr:$src2)>;
5052  def : Pat<(xor VR256X:$src1,
5053                 (bc_v8i32 (v8f32 (X86VBroadcast (loadf32 addr:$src2))))),
5054            (VPXORDZ256rmb VR256X:$src1, addr:$src2)>;
5055  def : Pat<(X86andnp VR256X:$src1,
5056                      (bc_v8i32 (v8f32 (X86VBroadcast (loadf32 addr:$src2))))),
5057            (VPANDNDZ256rmb VR256X:$src1, addr:$src2)>;
5058
5059  def : Pat<(and VR256X:$src1,
5060                 (bc_v4i64 (v4f64 (X86VBroadcast (loadf64 addr:$src2))))),
5061            (VPANDQZ256rmb VR256X:$src1, addr:$src2)>;
5062  def : Pat<(or VR256X:$src1,
5063                (bc_v4i64 (v4f64 (X86VBroadcast (loadf64 addr:$src2))))),
5064            (VPORQZ256rmb VR256X:$src1, addr:$src2)>;
5065  def : Pat<(xor VR256X:$src1,
5066                 (bc_v4i64 (v4f64 (X86VBroadcast (loadf64 addr:$src2))))),
5067            (VPXORQZ256rmb VR256X:$src1, addr:$src2)>;
5068  def : Pat<(X86andnp VR256X:$src1,
5069                      (bc_v4i64 (v4f64 (X86VBroadcast (loadf64 addr:$src2))))),
5070            (VPANDNQZ256rmb VR256X:$src1, addr:$src2)>;
5071}
5072
5073let Predicates = [HasAVX512] in {
5074  def : Pat<(v64i8 (and VR512:$src1, VR512:$src2)),
5075            (VPANDQZrr VR512:$src1, VR512:$src2)>;
5076  def : Pat<(v32i16 (and VR512:$src1, VR512:$src2)),
5077            (VPANDQZrr VR512:$src1, VR512:$src2)>;
5078
5079  def : Pat<(v64i8 (or VR512:$src1, VR512:$src2)),
5080            (VPORQZrr VR512:$src1, VR512:$src2)>;
5081  def : Pat<(v32i16 (or VR512:$src1, VR512:$src2)),
5082            (VPORQZrr VR512:$src1, VR512:$src2)>;
5083
5084  def : Pat<(v64i8 (xor VR512:$src1, VR512:$src2)),
5085            (VPXORQZrr VR512:$src1, VR512:$src2)>;
5086  def : Pat<(v32i16 (xor VR512:$src1, VR512:$src2)),
5087            (VPXORQZrr VR512:$src1, VR512:$src2)>;
5088
5089  def : Pat<(v64i8 (X86andnp VR512:$src1, VR512:$src2)),
5090            (VPANDNQZrr VR512:$src1, VR512:$src2)>;
5091  def : Pat<(v32i16 (X86andnp VR512:$src1, VR512:$src2)),
5092            (VPANDNQZrr VR512:$src1, VR512:$src2)>;
5093
5094  def : Pat<(and VR512:$src1, (loadv64i8 addr:$src2)),
5095            (VPANDQZrm VR512:$src1, addr:$src2)>;
5096  def : Pat<(and VR512:$src1, (loadv32i16 addr:$src2)),
5097            (VPANDQZrm VR512:$src1, addr:$src2)>;
5098
5099  def : Pat<(or VR512:$src1, (loadv64i8 addr:$src2)),
5100            (VPORQZrm VR512:$src1, addr:$src2)>;
5101  def : Pat<(or VR512:$src1, (loadv32i16 addr:$src2)),
5102            (VPORQZrm VR512:$src1, addr:$src2)>;
5103
5104  def : Pat<(xor VR512:$src1, (loadv64i8 addr:$src2)),
5105            (VPXORQZrm VR512:$src1, addr:$src2)>;
5106  def : Pat<(xor VR512:$src1, (loadv32i16 addr:$src2)),
5107            (VPXORQZrm VR512:$src1, addr:$src2)>;
5108
5109  def : Pat<(X86andnp VR512:$src1, (loadv64i8 addr:$src2)),
5110            (VPANDNQZrm VR512:$src1, addr:$src2)>;
5111  def : Pat<(X86andnp VR512:$src1, (loadv32i16 addr:$src2)),
5112            (VPANDNQZrm VR512:$src1, addr:$src2)>;
5113
5114  def : Pat<(and VR512:$src1,
5115                 (bc_v16i32 (v16f32 (X86VBroadcast (loadf32 addr:$src2))))),
5116            (VPANDDZrmb VR512:$src1, addr:$src2)>;
5117  def : Pat<(or VR512:$src1,
5118                (bc_v16i32 (v16f32 (X86VBroadcast (loadf32 addr:$src2))))),
5119            (VPORDZrmb VR512:$src1, addr:$src2)>;
5120  def : Pat<(xor VR512:$src1,
5121                 (bc_v16i32 (v16f32 (X86VBroadcast (loadf32 addr:$src2))))),
5122            (VPXORDZrmb VR512:$src1, addr:$src2)>;
5123  def : Pat<(X86andnp VR512:$src1,
5124                      (bc_v16i32 (v16f32 (X86VBroadcast (loadf32 addr:$src2))))),
5125            (VPANDNDZrmb VR512:$src1, addr:$src2)>;
5126
5127  def : Pat<(and VR512:$src1,
5128                 (bc_v8i64 (v8f64 (X86VBroadcast (loadf64 addr:$src2))))),
5129            (VPANDQZrmb VR512:$src1, addr:$src2)>;
5130  def : Pat<(or VR512:$src1,
5131                (bc_v8i64 (v8f64 (X86VBroadcast (loadf64 addr:$src2))))),
5132            (VPORQZrmb VR512:$src1, addr:$src2)>;
5133  def : Pat<(xor VR512:$src1,
5134                 (bc_v8i64 (v8f64 (X86VBroadcast (loadf64 addr:$src2))))),
5135            (VPXORQZrmb VR512:$src1, addr:$src2)>;
5136  def : Pat<(X86andnp VR512:$src1,
5137                      (bc_v8i64 (v8f64 (X86VBroadcast (loadf64 addr:$src2))))),
5138            (VPANDNQZrmb VR512:$src1, addr:$src2)>;
5139}
5140
5141// Patterns to catch vselect with different type than logic op.
5142multiclass avx512_logical_lowering<string InstrStr, SDNode OpNode,
5143                                    X86VectorVTInfo _,
5144                                    X86VectorVTInfo IntInfo> {
5145  // Masked register-register logical operations.
5146  def : Pat<(_.VT (vselect _.KRCWM:$mask,
5147                   (bitconvert (IntInfo.VT (OpNode _.RC:$src1, _.RC:$src2))),
5148                   _.RC:$src0)),
5149            (!cast<Instruction>(InstrStr#rrk) _.RC:$src0, _.KRCWM:$mask,
5150             _.RC:$src1, _.RC:$src2)>;
5151
5152  def : Pat<(_.VT (vselect _.KRCWM:$mask,
5153                   (bitconvert (IntInfo.VT (OpNode _.RC:$src1, _.RC:$src2))),
5154                   _.ImmAllZerosV)),
5155            (!cast<Instruction>(InstrStr#rrkz) _.KRCWM:$mask, _.RC:$src1,
5156             _.RC:$src2)>;
5157
5158  // Masked register-memory logical operations.
5159  def : Pat<(_.VT (vselect _.KRCWM:$mask,
5160                   (bitconvert (IntInfo.VT (OpNode _.RC:$src1,
5161                                            (load addr:$src2)))),
5162                   _.RC:$src0)),
5163            (!cast<Instruction>(InstrStr#rmk) _.RC:$src0, _.KRCWM:$mask,
5164             _.RC:$src1, addr:$src2)>;
5165  def : Pat<(_.VT (vselect _.KRCWM:$mask,
5166                   (bitconvert (IntInfo.VT (OpNode _.RC:$src1,
5167                                            (load addr:$src2)))),
5168                   _.ImmAllZerosV)),
5169            (!cast<Instruction>(InstrStr#rmkz) _.KRCWM:$mask, _.RC:$src1,
5170             addr:$src2)>;
5171}
5172
5173multiclass avx512_logical_lowering_bcast<string InstrStr, SDNode OpNode,
5174                                         X86VectorVTInfo _,
5175                                         X86VectorVTInfo IntInfo> {
5176  // Register-broadcast logical operations.
5177  def : Pat<(IntInfo.VT (OpNode _.RC:$src1,
5178                         (bitconvert (_.VT (X86VBroadcast
5179                                            (_.ScalarLdFrag addr:$src2)))))),
5180            (!cast<Instruction>(InstrStr#rmb) _.RC:$src1, addr:$src2)>;
5181  def : Pat<(_.VT (vselect _.KRCWM:$mask,
5182                   (bitconvert
5183                    (IntInfo.VT (OpNode _.RC:$src1,
5184                                 (bitconvert (_.VT
5185                                              (X86VBroadcast
5186                                               (_.ScalarLdFrag addr:$src2))))))),
5187                   _.RC:$src0)),
5188            (!cast<Instruction>(InstrStr#rmbk) _.RC:$src0, _.KRCWM:$mask,
5189             _.RC:$src1, addr:$src2)>;
5190  def : Pat<(_.VT (vselect _.KRCWM:$mask,
5191                   (bitconvert
5192                    (IntInfo.VT (OpNode _.RC:$src1,
5193                                 (bitconvert (_.VT
5194                                              (X86VBroadcast
5195                                               (_.ScalarLdFrag addr:$src2))))))),
5196                   _.ImmAllZerosV)),
5197            (!cast<Instruction>(InstrStr#rmbkz)  _.KRCWM:$mask,
5198             _.RC:$src1, addr:$src2)>;
5199}
5200
5201multiclass avx512_logical_lowering_sizes<string InstrStr, SDNode OpNode,
5202                                         AVX512VLVectorVTInfo SelectInfo,
5203                                         AVX512VLVectorVTInfo IntInfo> {
5204let Predicates = [HasVLX] in {
5205  defm : avx512_logical_lowering<InstrStr#"Z128", OpNode, SelectInfo.info128,
5206                                 IntInfo.info128>;
5207  defm : avx512_logical_lowering<InstrStr#"Z256", OpNode, SelectInfo.info256,
5208                                 IntInfo.info256>;
5209}
5210let Predicates = [HasAVX512] in {
5211  defm : avx512_logical_lowering<InstrStr#"Z", OpNode, SelectInfo.info512,
5212                                 IntInfo.info512>;
5213}
5214}
5215
5216multiclass avx512_logical_lowering_sizes_bcast<string InstrStr, SDNode OpNode,
5217                                               AVX512VLVectorVTInfo SelectInfo,
5218                                               AVX512VLVectorVTInfo IntInfo> {
5219let Predicates = [HasVLX] in {
5220  defm : avx512_logical_lowering_bcast<InstrStr#"Z128", OpNode,
5221                                       SelectInfo.info128, IntInfo.info128>;
5222  defm : avx512_logical_lowering_bcast<InstrStr#"Z256", OpNode,
5223                                       SelectInfo.info256, IntInfo.info256>;
5224}
5225let Predicates = [HasAVX512] in {
5226  defm : avx512_logical_lowering_bcast<InstrStr#"Z", OpNode,
5227                                       SelectInfo.info512, IntInfo.info512>;
5228}
5229}
5230
5231multiclass avx512_logical_lowering_types<string InstrStr, SDNode OpNode> {
5232  // i64 vselect with i32/i16/i8 logic op
5233  defm : avx512_logical_lowering_sizes<InstrStr#"Q", OpNode, avx512vl_i64_info,
5234                                       avx512vl_i32_info>;
5235  defm : avx512_logical_lowering_sizes<InstrStr#"Q", OpNode, avx512vl_i64_info,
5236                                       avx512vl_i16_info>;
5237  defm : avx512_logical_lowering_sizes<InstrStr#"Q", OpNode, avx512vl_i64_info,
5238                                       avx512vl_i8_info>;
5239
5240  // i32 vselect with i64/i16/i8 logic op
5241  defm : avx512_logical_lowering_sizes<InstrStr#"D", OpNode, avx512vl_i32_info,
5242                                       avx512vl_i64_info>;
5243  defm : avx512_logical_lowering_sizes<InstrStr#"D", OpNode, avx512vl_i32_info,
5244                                       avx512vl_i16_info>;
5245  defm : avx512_logical_lowering_sizes<InstrStr#"D", OpNode, avx512vl_i32_info,
5246                                       avx512vl_i8_info>;
5247
5248  // f32 vselect with i64/i32/i16/i8 logic op
5249  defm : avx512_logical_lowering_sizes<InstrStr#"D", OpNode, avx512vl_f32_info,
5250                                       avx512vl_i64_info>;
5251  defm : avx512_logical_lowering_sizes<InstrStr#"D", OpNode, avx512vl_f32_info,
5252                                       avx512vl_i32_info>;
5253  defm : avx512_logical_lowering_sizes<InstrStr#"D", OpNode, avx512vl_f32_info,
5254                                       avx512vl_i16_info>;
5255  defm : avx512_logical_lowering_sizes<InstrStr#"D", OpNode, avx512vl_f32_info,
5256                                       avx512vl_i8_info>;
5257
5258  // f64 vselect with i64/i32/i16/i8 logic op
5259  defm : avx512_logical_lowering_sizes<InstrStr#"Q", OpNode, avx512vl_f64_info,
5260                                       avx512vl_i64_info>;
5261  defm : avx512_logical_lowering_sizes<InstrStr#"Q", OpNode, avx512vl_f64_info,
5262                                       avx512vl_i32_info>;
5263  defm : avx512_logical_lowering_sizes<InstrStr#"Q", OpNode, avx512vl_f64_info,
5264                                       avx512vl_i16_info>;
5265  defm : avx512_logical_lowering_sizes<InstrStr#"Q", OpNode, avx512vl_f64_info,
5266                                       avx512vl_i8_info>;
5267
5268  defm : avx512_logical_lowering_sizes_bcast<InstrStr#"D", OpNode,
5269                                             avx512vl_f32_info,
5270                                             avx512vl_i32_info>;
5271  defm : avx512_logical_lowering_sizes_bcast<InstrStr#"Q", OpNode,
5272                                             avx512vl_f64_info,
5273                                             avx512vl_i64_info>;
5274}
5275
5276defm : avx512_logical_lowering_types<"VPAND", and>;
5277defm : avx512_logical_lowering_types<"VPOR",  or>;
5278defm : avx512_logical_lowering_types<"VPXOR", xor>;
5279defm : avx512_logical_lowering_types<"VPANDN", X86andnp>;
5280
5281//===----------------------------------------------------------------------===//
5282// AVX-512  FP arithmetic
5283//===----------------------------------------------------------------------===//
5284
5285multiclass avx512_fp_scalar<bits<8> opc, string OpcodeStr,X86VectorVTInfo _,
5286                            SDNode OpNode, SDNode VecNode,
5287                            X86FoldableSchedWrite sched, bit IsCommutable> {
5288  let ExeDomain = _.ExeDomain in {
5289  defm rr_Int : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
5290                           (ins _.RC:$src1, _.RC:$src2), OpcodeStr,
5291                           "$src2, $src1", "$src1, $src2",
5292                           (_.VT (VecNode _.RC:$src1, _.RC:$src2))>,
5293                           Sched<[sched]>;
5294
5295  defm rm_Int : AVX512_maskable_scalar<opc, MRMSrcMem, _, (outs _.RC:$dst),
5296                         (ins _.RC:$src1, _.IntScalarMemOp:$src2), OpcodeStr,
5297                         "$src2, $src1", "$src1, $src2",
5298                         (_.VT (VecNode _.RC:$src1,
5299                                        _.ScalarIntMemCPat:$src2))>,
5300                         Sched<[sched.Folded, sched.ReadAfterFold]>;
5301  let isCodeGenOnly = 1, Predicates = [HasAVX512] in {
5302  def rr : I< opc, MRMSrcReg, (outs _.FRC:$dst),
5303                         (ins _.FRC:$src1, _.FRC:$src2),
5304                          OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}",
5305                          [(set _.FRC:$dst, (OpNode _.FRC:$src1, _.FRC:$src2))]>,
5306                          Sched<[sched]> {
5307    let isCommutable = IsCommutable;
5308  }
5309  def rm : I< opc, MRMSrcMem, (outs _.FRC:$dst),
5310                         (ins _.FRC:$src1, _.ScalarMemOp:$src2),
5311                         OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}",
5312                         [(set _.FRC:$dst, (OpNode _.FRC:$src1,
5313                         (_.ScalarLdFrag addr:$src2)))]>,
5314                         Sched<[sched.Folded, sched.ReadAfterFold]>;
5315  }
5316  }
5317}
5318
5319multiclass avx512_fp_scalar_round<bits<8> opc, string OpcodeStr,X86VectorVTInfo _,
5320                                  SDNode VecNode, X86FoldableSchedWrite sched,
5321                                  bit IsCommutable = 0> {
5322  let ExeDomain = _.ExeDomain in
5323  defm rrb_Int : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
5324                          (ins _.RC:$src1, _.RC:$src2, AVX512RC:$rc), OpcodeStr,
5325                          "$rc, $src2, $src1", "$src1, $src2, $rc",
5326                          (VecNode (_.VT _.RC:$src1), (_.VT _.RC:$src2),
5327                          (i32 timm:$rc))>,
5328                          EVEX_B, EVEX_RC, Sched<[sched]>;
5329}
5330multiclass avx512_fp_scalar_sae<bits<8> opc, string OpcodeStr,X86VectorVTInfo _,
5331                                SDNode OpNode, SDNode VecNode, SDNode SaeNode,
5332                                X86FoldableSchedWrite sched, bit IsCommutable> {
5333  let ExeDomain = _.ExeDomain in {
5334  defm rr_Int : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
5335                           (ins _.RC:$src1, _.RC:$src2), OpcodeStr,
5336                           "$src2, $src1", "$src1, $src2",
5337                           (_.VT (VecNode _.RC:$src1, _.RC:$src2))>,
5338                           Sched<[sched]>;
5339
5340  defm rm_Int : AVX512_maskable_scalar<opc, MRMSrcMem, _, (outs _.RC:$dst),
5341                         (ins _.RC:$src1, _.IntScalarMemOp:$src2), OpcodeStr,
5342                         "$src2, $src1", "$src1, $src2",
5343                         (_.VT (VecNode _.RC:$src1,
5344                                        _.ScalarIntMemCPat:$src2))>,
5345                         Sched<[sched.Folded, sched.ReadAfterFold]>;
5346
5347  let isCodeGenOnly = 1, Predicates = [HasAVX512] in {
5348  def rr : I< opc, MRMSrcReg, (outs _.FRC:$dst),
5349                         (ins _.FRC:$src1, _.FRC:$src2),
5350                          OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}",
5351                          [(set _.FRC:$dst, (OpNode _.FRC:$src1, _.FRC:$src2))]>,
5352                          Sched<[sched]> {
5353    let isCommutable = IsCommutable;
5354  }
5355  def rm : I< opc, MRMSrcMem, (outs _.FRC:$dst),
5356                         (ins _.FRC:$src1, _.ScalarMemOp:$src2),
5357                         OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}",
5358                         [(set _.FRC:$dst, (OpNode _.FRC:$src1,
5359                         (_.ScalarLdFrag addr:$src2)))]>,
5360                         Sched<[sched.Folded, sched.ReadAfterFold]>;
5361  }
5362
5363  defm rrb_Int : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
5364                            (ins _.RC:$src1, _.RC:$src2), OpcodeStr,
5365                            "{sae}, $src2, $src1", "$src1, $src2, {sae}",
5366                            (SaeNode (_.VT _.RC:$src1), (_.VT _.RC:$src2))>,
5367                            EVEX_B, Sched<[sched]>;
5368  }
5369}
5370
5371multiclass avx512_binop_s_round<bits<8> opc, string OpcodeStr, SDNode OpNode,
5372                                SDNode VecNode, SDNode RndNode,
5373                                X86SchedWriteSizes sched, bit IsCommutable> {
5374  defm SSZ : avx512_fp_scalar<opc, OpcodeStr#"ss", f32x_info, OpNode, VecNode,
5375                              sched.PS.Scl, IsCommutable>,
5376             avx512_fp_scalar_round<opc, OpcodeStr#"ss", f32x_info, RndNode,
5377                              sched.PS.Scl, IsCommutable>,
5378                              XS, EVEX_4V, VEX_LIG,  EVEX_CD8<32, CD8VT1>;
5379  defm SDZ : avx512_fp_scalar<opc, OpcodeStr#"sd", f64x_info, OpNode, VecNode,
5380                              sched.PD.Scl, IsCommutable>,
5381             avx512_fp_scalar_round<opc, OpcodeStr#"sd", f64x_info, RndNode,
5382                              sched.PD.Scl, IsCommutable>,
5383                              XD, VEX_W, EVEX_4V, VEX_LIG, EVEX_CD8<64, CD8VT1>;
5384}
5385
5386multiclass avx512_binop_s_sae<bits<8> opc, string OpcodeStr, SDNode OpNode,
5387                              SDNode VecNode, SDNode SaeNode,
5388                              X86SchedWriteSizes sched, bit IsCommutable> {
5389  defm SSZ : avx512_fp_scalar_sae<opc, OpcodeStr#"ss", f32x_info, OpNode,
5390                              VecNode, SaeNode, sched.PS.Scl, IsCommutable>,
5391                              XS, EVEX_4V, VEX_LIG,  EVEX_CD8<32, CD8VT1>;
5392  defm SDZ : avx512_fp_scalar_sae<opc, OpcodeStr#"sd", f64x_info, OpNode,
5393                              VecNode, SaeNode, sched.PD.Scl, IsCommutable>,
5394                              XD, VEX_W, EVEX_4V, VEX_LIG, EVEX_CD8<64, CD8VT1>;
5395}
5396defm VADD : avx512_binop_s_round<0x58, "vadd", fadd, X86fadds, X86faddRnds,
5397                                 SchedWriteFAddSizes, 1>;
5398defm VMUL : avx512_binop_s_round<0x59, "vmul", fmul, X86fmuls, X86fmulRnds,
5399                                 SchedWriteFMulSizes, 1>;
5400defm VSUB : avx512_binop_s_round<0x5C, "vsub", fsub, X86fsubs, X86fsubRnds,
5401                                 SchedWriteFAddSizes, 0>;
5402defm VDIV : avx512_binop_s_round<0x5E, "vdiv", fdiv, X86fdivs, X86fdivRnds,
5403                                 SchedWriteFDivSizes, 0>;
5404defm VMIN : avx512_binop_s_sae<0x5D, "vmin", X86fmin, X86fmins, X86fminSAEs,
5405                               SchedWriteFCmpSizes, 0>;
5406defm VMAX : avx512_binop_s_sae<0x5F, "vmax", X86fmax, X86fmaxs, X86fmaxSAEs,
5407                               SchedWriteFCmpSizes, 0>;
5408
5409// MIN/MAX nodes are commutable under "unsafe-fp-math". In this case we use
5410// X86fminc and X86fmaxc instead of X86fmin and X86fmax
5411multiclass avx512_comutable_binop_s<bits<8> opc, string OpcodeStr,
5412                                    X86VectorVTInfo _, SDNode OpNode,
5413                                    X86FoldableSchedWrite sched> {
5414  let isCodeGenOnly = 1, Predicates = [HasAVX512], ExeDomain = _.ExeDomain in {
5415  def rr : I< opc, MRMSrcReg, (outs _.FRC:$dst),
5416                         (ins _.FRC:$src1, _.FRC:$src2),
5417                          OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}",
5418                          [(set _.FRC:$dst, (OpNode _.FRC:$src1, _.FRC:$src2))]>,
5419                          Sched<[sched]> {
5420    let isCommutable = 1;
5421  }
5422  def rm : I< opc, MRMSrcMem, (outs _.FRC:$dst),
5423                         (ins _.FRC:$src1, _.ScalarMemOp:$src2),
5424                         OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}",
5425                         [(set _.FRC:$dst, (OpNode _.FRC:$src1,
5426                         (_.ScalarLdFrag addr:$src2)))]>,
5427                         Sched<[sched.Folded, sched.ReadAfterFold]>;
5428  }
5429}
5430defm VMINCSSZ : avx512_comutable_binop_s<0x5D, "vminss", f32x_info, X86fminc,
5431                                         SchedWriteFCmp.Scl>, XS, EVEX_4V,
5432                                         VEX_LIG, EVEX_CD8<32, CD8VT1>;
5433
5434defm VMINCSDZ : avx512_comutable_binop_s<0x5D, "vminsd", f64x_info, X86fminc,
5435                                         SchedWriteFCmp.Scl>, XD, VEX_W, EVEX_4V,
5436                                         VEX_LIG, EVEX_CD8<64, CD8VT1>;
5437
5438defm VMAXCSSZ : avx512_comutable_binop_s<0x5F, "vmaxss", f32x_info, X86fmaxc,
5439                                         SchedWriteFCmp.Scl>, XS, EVEX_4V,
5440                                         VEX_LIG, EVEX_CD8<32, CD8VT1>;
5441
5442defm VMAXCSDZ : avx512_comutable_binop_s<0x5F, "vmaxsd", f64x_info, X86fmaxc,
5443                                         SchedWriteFCmp.Scl>, XD, VEX_W, EVEX_4V,
5444                                         VEX_LIG, EVEX_CD8<64, CD8VT1>;
5445
5446multiclass avx512_fp_packed<bits<8> opc, string OpcodeStr, SDPatternOperator OpNode,
5447                            X86VectorVTInfo _, X86FoldableSchedWrite sched,
5448                            bit IsCommutable,
5449                            bit IsKCommutable = IsCommutable> {
5450  let ExeDomain = _.ExeDomain, hasSideEffects = 0 in {
5451  defm rr: AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
5452                  (ins _.RC:$src1, _.RC:$src2), OpcodeStr##_.Suffix,
5453                  "$src2, $src1", "$src1, $src2",
5454                  (_.VT (OpNode _.RC:$src1, _.RC:$src2)), IsCommutable,
5455                  IsKCommutable, IsKCommutable>,
5456                  EVEX_4V, Sched<[sched]>;
5457  let mayLoad = 1 in {
5458    defm rm: AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
5459                    (ins _.RC:$src1, _.MemOp:$src2), OpcodeStr##_.Suffix,
5460                    "$src2, $src1", "$src1, $src2",
5461                    (OpNode _.RC:$src1, (_.LdFrag addr:$src2))>,
5462                    EVEX_4V, Sched<[sched.Folded, sched.ReadAfterFold]>;
5463    defm rmb: AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
5464                     (ins _.RC:$src1, _.ScalarMemOp:$src2), OpcodeStr##_.Suffix,
5465                     "${src2}"##_.BroadcastStr##", $src1",
5466                     "$src1, ${src2}"##_.BroadcastStr,
5467                     (OpNode  _.RC:$src1, (_.VT (X86VBroadcast
5468                                                (_.ScalarLdFrag addr:$src2))))>,
5469                     EVEX_4V, EVEX_B,
5470                     Sched<[sched.Folded, sched.ReadAfterFold]>;
5471    }
5472  }
5473}
5474
5475multiclass avx512_fp_round_packed<bits<8> opc, string OpcodeStr,
5476                                  SDPatternOperator OpNodeRnd,
5477                                  X86FoldableSchedWrite sched, X86VectorVTInfo _> {
5478  let ExeDomain = _.ExeDomain in
5479  defm rrb: AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
5480                  (ins _.RC:$src1, _.RC:$src2, AVX512RC:$rc), OpcodeStr##_.Suffix,
5481                  "$rc, $src2, $src1", "$src1, $src2, $rc",
5482                  (_.VT (OpNodeRnd _.RC:$src1, _.RC:$src2, (i32 timm:$rc)))>,
5483                  EVEX_4V, EVEX_B, EVEX_RC, Sched<[sched]>;
5484}
5485
5486multiclass avx512_fp_sae_packed<bits<8> opc, string OpcodeStr,
5487                                SDPatternOperator OpNodeSAE,
5488                                X86FoldableSchedWrite sched, X86VectorVTInfo _> {
5489  let ExeDomain = _.ExeDomain in
5490  defm rrb: AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
5491                  (ins _.RC:$src1, _.RC:$src2), OpcodeStr##_.Suffix,
5492                  "{sae}, $src2, $src1", "$src1, $src2, {sae}",
5493                  (_.VT (OpNodeSAE _.RC:$src1, _.RC:$src2))>,
5494                  EVEX_4V, EVEX_B, Sched<[sched]>;
5495}
5496
5497multiclass avx512_fp_binop_p<bits<8> opc, string OpcodeStr, SDPatternOperator OpNode,
5498                             Predicate prd, X86SchedWriteSizes sched,
5499                             bit IsCommutable = 0,
5500                             bit IsPD128Commutable = IsCommutable> {
5501  let Predicates = [prd] in {
5502  defm PSZ : avx512_fp_packed<opc, OpcodeStr, OpNode, v16f32_info,
5503                              sched.PS.ZMM, IsCommutable>, EVEX_V512, PS,
5504                              EVEX_CD8<32, CD8VF>;
5505  defm PDZ : avx512_fp_packed<opc, OpcodeStr, OpNode, v8f64_info,
5506                              sched.PD.ZMM, IsCommutable>, EVEX_V512, PD, VEX_W,
5507                              EVEX_CD8<64, CD8VF>;
5508  }
5509
5510    // Define only if AVX512VL feature is present.
5511  let Predicates = [prd, HasVLX] in {
5512    defm PSZ128 : avx512_fp_packed<opc, OpcodeStr, OpNode, v4f32x_info,
5513                                   sched.PS.XMM, IsCommutable>, EVEX_V128, PS,
5514                                   EVEX_CD8<32, CD8VF>;
5515    defm PSZ256 : avx512_fp_packed<opc, OpcodeStr, OpNode, v8f32x_info,
5516                                   sched.PS.YMM, IsCommutable>, EVEX_V256, PS,
5517                                   EVEX_CD8<32, CD8VF>;
5518    defm PDZ128 : avx512_fp_packed<opc, OpcodeStr, OpNode, v2f64x_info,
5519                                   sched.PD.XMM, IsPD128Commutable,
5520                                   IsCommutable>, EVEX_V128, PD, VEX_W,
5521                                   EVEX_CD8<64, CD8VF>;
5522    defm PDZ256 : avx512_fp_packed<opc, OpcodeStr, OpNode, v4f64x_info,
5523                                   sched.PD.YMM, IsCommutable>, EVEX_V256, PD, VEX_W,
5524                                   EVEX_CD8<64, CD8VF>;
5525  }
5526}
5527
5528multiclass avx512_fp_binop_p_round<bits<8> opc, string OpcodeStr, SDNode OpNodeRnd,
5529                                   X86SchedWriteSizes sched> {
5530  defm PSZ : avx512_fp_round_packed<opc, OpcodeStr, OpNodeRnd, sched.PS.ZMM,
5531                                    v16f32_info>,
5532                                    EVEX_V512, PS, EVEX_CD8<32, CD8VF>;
5533  defm PDZ : avx512_fp_round_packed<opc, OpcodeStr, OpNodeRnd, sched.PD.ZMM,
5534                                    v8f64_info>,
5535                                    EVEX_V512, PD, VEX_W,EVEX_CD8<64, CD8VF>;
5536}
5537
5538multiclass avx512_fp_binop_p_sae<bits<8> opc, string OpcodeStr, SDNode OpNodeRnd,
5539                                 X86SchedWriteSizes sched> {
5540  defm PSZ : avx512_fp_sae_packed<opc, OpcodeStr, OpNodeRnd, sched.PS.ZMM,
5541                                  v16f32_info>,
5542                                  EVEX_V512, PS, EVEX_CD8<32, CD8VF>;
5543  defm PDZ : avx512_fp_sae_packed<opc, OpcodeStr, OpNodeRnd, sched.PD.ZMM,
5544                                  v8f64_info>,
5545                                  EVEX_V512, PD, VEX_W,EVEX_CD8<64, CD8VF>;
5546}
5547
5548defm VADD : avx512_fp_binop_p<0x58, "vadd", fadd, HasAVX512,
5549                              SchedWriteFAddSizes, 1>,
5550            avx512_fp_binop_p_round<0x58, "vadd", X86faddRnd, SchedWriteFAddSizes>;
5551defm VMUL : avx512_fp_binop_p<0x59, "vmul", fmul, HasAVX512,
5552                              SchedWriteFMulSizes, 1>,
5553            avx512_fp_binop_p_round<0x59, "vmul", X86fmulRnd, SchedWriteFMulSizes>;
5554defm VSUB : avx512_fp_binop_p<0x5C, "vsub", fsub, HasAVX512,
5555                              SchedWriteFAddSizes>,
5556            avx512_fp_binop_p_round<0x5C, "vsub", X86fsubRnd, SchedWriteFAddSizes>;
5557defm VDIV : avx512_fp_binop_p<0x5E, "vdiv", fdiv, HasAVX512,
5558                              SchedWriteFDivSizes>,
5559            avx512_fp_binop_p_round<0x5E, "vdiv", X86fdivRnd, SchedWriteFDivSizes>;
5560defm VMIN : avx512_fp_binop_p<0x5D, "vmin", X86fmin, HasAVX512,
5561                              SchedWriteFCmpSizes, 0>,
5562            avx512_fp_binop_p_sae<0x5D, "vmin", X86fminSAE, SchedWriteFCmpSizes>;
5563defm VMAX : avx512_fp_binop_p<0x5F, "vmax", X86fmax, HasAVX512,
5564                              SchedWriteFCmpSizes, 0>,
5565            avx512_fp_binop_p_sae<0x5F, "vmax", X86fmaxSAE, SchedWriteFCmpSizes>;
5566let isCodeGenOnly = 1 in {
5567  defm VMINC : avx512_fp_binop_p<0x5D, "vmin", X86fminc, HasAVX512,
5568                                 SchedWriteFCmpSizes, 1>;
5569  defm VMAXC : avx512_fp_binop_p<0x5F, "vmax", X86fmaxc, HasAVX512,
5570                                 SchedWriteFCmpSizes, 1>;
5571}
5572defm VAND  : avx512_fp_binop_p<0x54, "vand", null_frag, HasDQI,
5573                               SchedWriteFLogicSizes, 1>;
5574defm VANDN : avx512_fp_binop_p<0x55, "vandn", null_frag, HasDQI,
5575                               SchedWriteFLogicSizes, 0>;
5576defm VOR   : avx512_fp_binop_p<0x56, "vor", null_frag, HasDQI,
5577                               SchedWriteFLogicSizes, 1>;
5578defm VXOR  : avx512_fp_binop_p<0x57, "vxor", null_frag, HasDQI,
5579                               SchedWriteFLogicSizes, 1>;
5580
5581multiclass avx512_fp_scalef_p<bits<8> opc, string OpcodeStr, SDNode OpNode,
5582                              X86FoldableSchedWrite sched, X86VectorVTInfo _> {
5583  let ExeDomain = _.ExeDomain in {
5584  defm rr: AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
5585                  (ins _.RC:$src1, _.RC:$src2), OpcodeStr##_.Suffix,
5586                  "$src2, $src1", "$src1, $src2",
5587                  (_.VT (OpNode _.RC:$src1, _.RC:$src2))>,
5588                  EVEX_4V, Sched<[sched]>;
5589  defm rm: AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
5590                  (ins _.RC:$src1, _.MemOp:$src2), OpcodeStr##_.Suffix,
5591                  "$src2, $src1", "$src1, $src2",
5592                  (OpNode _.RC:$src1, (_.LdFrag addr:$src2))>,
5593                  EVEX_4V, Sched<[sched.Folded, sched.ReadAfterFold]>;
5594  defm rmb: AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
5595                   (ins _.RC:$src1, _.ScalarMemOp:$src2), OpcodeStr##_.Suffix,
5596                   "${src2}"##_.BroadcastStr##", $src1",
5597                   "$src1, ${src2}"##_.BroadcastStr,
5598                   (OpNode  _.RC:$src1, (_.VT (X86VBroadcast
5599                                              (_.ScalarLdFrag addr:$src2))))>,
5600                   EVEX_4V, EVEX_B, Sched<[sched.Folded, sched.ReadAfterFold]>;
5601  }
5602}
5603
5604multiclass avx512_fp_scalef_scalar<bits<8> opc, string OpcodeStr, SDNode OpNode,
5605                                   X86FoldableSchedWrite sched, X86VectorVTInfo _> {
5606  let ExeDomain = _.ExeDomain in {
5607  defm rr: AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
5608                  (ins _.RC:$src1, _.RC:$src2), OpcodeStr##_.Suffix,
5609                  "$src2, $src1", "$src1, $src2",
5610                  (_.VT (OpNode _.RC:$src1, _.RC:$src2))>,
5611                  Sched<[sched]>;
5612  defm rm: AVX512_maskable_scalar<opc, MRMSrcMem, _, (outs _.RC:$dst),
5613                  (ins _.RC:$src1, _.IntScalarMemOp:$src2), OpcodeStr##_.Suffix,
5614                  "$src2, $src1", "$src1, $src2",
5615                  (OpNode _.RC:$src1, _.ScalarIntMemCPat:$src2)>,
5616                  Sched<[sched.Folded, sched.ReadAfterFold]>;
5617  }
5618}
5619
5620multiclass avx512_fp_scalef_all<bits<8> opc, bits<8> opcScaler, string OpcodeStr,
5621                                X86SchedWriteWidths sched> {
5622  defm PSZ : avx512_fp_scalef_p<opc, OpcodeStr, X86scalef, sched.ZMM, v16f32_info>,
5623             avx512_fp_round_packed<opc, OpcodeStr, X86scalefRnd, sched.ZMM, v16f32_info>,
5624                              EVEX_V512, EVEX_CD8<32, CD8VF>;
5625  defm PDZ : avx512_fp_scalef_p<opc, OpcodeStr, X86scalef, sched.ZMM, v8f64_info>,
5626             avx512_fp_round_packed<opc, OpcodeStr, X86scalefRnd, sched.ZMM, v8f64_info>,
5627                              EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>;
5628  defm SSZ : avx512_fp_scalef_scalar<opcScaler, OpcodeStr, X86scalefs, sched.Scl, f32x_info>,
5629             avx512_fp_scalar_round<opcScaler, OpcodeStr##"ss", f32x_info,
5630                                    X86scalefsRnd, sched.Scl>,
5631                                    EVEX_4V, VEX_LIG, EVEX_CD8<32, CD8VT1>;
5632  defm SDZ : avx512_fp_scalef_scalar<opcScaler, OpcodeStr, X86scalefs, sched.Scl, f64x_info>,
5633             avx512_fp_scalar_round<opcScaler, OpcodeStr##"sd", f64x_info,
5634                                    X86scalefsRnd, sched.Scl>,
5635                                    EVEX_4V, VEX_LIG, EVEX_CD8<64, CD8VT1>, VEX_W;
5636
5637  // Define only if AVX512VL feature is present.
5638  let Predicates = [HasVLX] in {
5639    defm PSZ128 : avx512_fp_scalef_p<opc, OpcodeStr, X86scalef, sched.XMM, v4f32x_info>,
5640                                   EVEX_V128, EVEX_CD8<32, CD8VF>;
5641    defm PSZ256 : avx512_fp_scalef_p<opc, OpcodeStr, X86scalef, sched.YMM, v8f32x_info>,
5642                                   EVEX_V256, EVEX_CD8<32, CD8VF>;
5643    defm PDZ128 : avx512_fp_scalef_p<opc, OpcodeStr, X86scalef, sched.XMM, v2f64x_info>,
5644                                   EVEX_V128, VEX_W, EVEX_CD8<64, CD8VF>;
5645    defm PDZ256 : avx512_fp_scalef_p<opc, OpcodeStr, X86scalef, sched.YMM, v4f64x_info>,
5646                                   EVEX_V256, VEX_W, EVEX_CD8<64, CD8VF>;
5647  }
5648}
5649defm VSCALEF : avx512_fp_scalef_all<0x2C, 0x2D, "vscalef",
5650                                    SchedWriteFAdd>, T8PD, NotEVEX2VEXConvertible;
5651
5652//===----------------------------------------------------------------------===//
5653// AVX-512  VPTESTM instructions
5654//===----------------------------------------------------------------------===//
5655
5656multiclass avx512_vptest<bits<8> opc, string OpcodeStr,
5657                         X86FoldableSchedWrite sched, X86VectorVTInfo _,
5658                         string Name> {
5659  // NOTE: Patterns are omitted in favor of manual selection in X86ISelDAGToDAG.
5660  // There are just too many permuations due to commutability and bitcasts.
5661  let ExeDomain = _.ExeDomain, hasSideEffects = 0 in {
5662  defm rr : AVX512_maskable_cmp<opc, MRMSrcReg, _, (outs _.KRC:$dst),
5663                   (ins _.RC:$src1, _.RC:$src2), OpcodeStr,
5664                      "$src2, $src1", "$src1, $src2",
5665                   (null_frag), (null_frag), 1>,
5666                   EVEX_4V, Sched<[sched]>;
5667  let mayLoad = 1 in
5668  defm rm : AVX512_maskable_cmp<opc, MRMSrcMem, _, (outs _.KRC:$dst),
5669                   (ins _.RC:$src1, _.MemOp:$src2), OpcodeStr,
5670                       "$src2, $src1", "$src1, $src2",
5671                   (null_frag), (null_frag)>,
5672                   EVEX_4V, EVEX_CD8<_.EltSize, CD8VF>,
5673                   Sched<[sched.Folded, sched.ReadAfterFold]>;
5674  }
5675}
5676
5677multiclass avx512_vptest_mb<bits<8> opc, string OpcodeStr,
5678                            X86FoldableSchedWrite sched, X86VectorVTInfo _> {
5679  let ExeDomain = _.ExeDomain, mayLoad = 1, hasSideEffects = 0 in
5680  defm rmb : AVX512_maskable_cmp<opc, MRMSrcMem, _, (outs _.KRC:$dst),
5681                    (ins _.RC:$src1, _.ScalarMemOp:$src2), OpcodeStr,
5682                    "${src2}"##_.BroadcastStr##", $src1",
5683                    "$src1, ${src2}"##_.BroadcastStr,
5684                    (null_frag), (null_frag)>,
5685                    EVEX_B, EVEX_4V, EVEX_CD8<_.EltSize, CD8VF>,
5686                    Sched<[sched.Folded, sched.ReadAfterFold]>;
5687}
5688
5689multiclass avx512_vptest_dq_sizes<bits<8> opc, string OpcodeStr,
5690                                  X86SchedWriteWidths sched,
5691                                  AVX512VLVectorVTInfo _> {
5692  let Predicates  = [HasAVX512] in
5693  defm Z : avx512_vptest<opc, OpcodeStr, sched.ZMM, _.info512, NAME>,
5694           avx512_vptest_mb<opc, OpcodeStr, sched.ZMM, _.info512>, EVEX_V512;
5695
5696  let Predicates = [HasAVX512, HasVLX] in {
5697  defm Z256 : avx512_vptest<opc, OpcodeStr, sched.YMM, _.info256, NAME>,
5698              avx512_vptest_mb<opc, OpcodeStr, sched.YMM, _.info256>, EVEX_V256;
5699  defm Z128 : avx512_vptest<opc, OpcodeStr, sched.XMM, _.info128, NAME>,
5700              avx512_vptest_mb<opc, OpcodeStr, sched.XMM, _.info128>, EVEX_V128;
5701  }
5702}
5703
5704multiclass avx512_vptest_dq<bits<8> opc, string OpcodeStr,
5705                            X86SchedWriteWidths sched> {
5706  defm D : avx512_vptest_dq_sizes<opc, OpcodeStr#"d", sched,
5707                                 avx512vl_i32_info>;
5708  defm Q : avx512_vptest_dq_sizes<opc, OpcodeStr#"q", sched,
5709                                 avx512vl_i64_info>, VEX_W;
5710}
5711
5712multiclass avx512_vptest_wb<bits<8> opc, string OpcodeStr,
5713                            X86SchedWriteWidths sched> {
5714  let Predicates = [HasBWI] in {
5715  defm WZ:    avx512_vptest<opc, OpcodeStr#"w", sched.ZMM,
5716                            v32i16_info, NAME#"W">, EVEX_V512, VEX_W;
5717  defm BZ:    avx512_vptest<opc, OpcodeStr#"b", sched.ZMM,
5718                            v64i8_info, NAME#"B">, EVEX_V512;
5719  }
5720  let Predicates = [HasVLX, HasBWI] in {
5721
5722  defm WZ256: avx512_vptest<opc, OpcodeStr#"w", sched.YMM,
5723                            v16i16x_info, NAME#"W">, EVEX_V256, VEX_W;
5724  defm WZ128: avx512_vptest<opc, OpcodeStr#"w", sched.XMM,
5725                            v8i16x_info, NAME#"W">, EVEX_V128, VEX_W;
5726  defm BZ256: avx512_vptest<opc, OpcodeStr#"b", sched.YMM,
5727                            v32i8x_info, NAME#"B">, EVEX_V256;
5728  defm BZ128: avx512_vptest<opc, OpcodeStr#"b", sched.XMM,
5729                            v16i8x_info, NAME#"B">, EVEX_V128;
5730  }
5731}
5732
5733multiclass avx512_vptest_all_forms<bits<8> opc_wb, bits<8> opc_dq, string OpcodeStr,
5734                                   X86SchedWriteWidths sched> :
5735  avx512_vptest_wb<opc_wb, OpcodeStr, sched>,
5736  avx512_vptest_dq<opc_dq, OpcodeStr, sched>;
5737
5738defm VPTESTM   : avx512_vptest_all_forms<0x26, 0x27, "vptestm",
5739                                         SchedWriteVecLogic>, T8PD;
5740defm VPTESTNM  : avx512_vptest_all_forms<0x26, 0x27, "vptestnm",
5741                                         SchedWriteVecLogic>, T8XS;
5742
5743//===----------------------------------------------------------------------===//
5744// AVX-512  Shift instructions
5745//===----------------------------------------------------------------------===//
5746
5747multiclass avx512_shift_rmi<bits<8> opc, Format ImmFormR, Format ImmFormM,
5748                            string OpcodeStr, SDNode OpNode,
5749                            X86FoldableSchedWrite sched, X86VectorVTInfo _> {
5750  let ExeDomain = _.ExeDomain in {
5751  defm ri : AVX512_maskable<opc, ImmFormR, _, (outs _.RC:$dst),
5752                   (ins _.RC:$src1, u8imm:$src2), OpcodeStr,
5753                      "$src2, $src1", "$src1, $src2",
5754                   (_.VT (OpNode _.RC:$src1, (i8 imm:$src2)))>,
5755                   Sched<[sched]>;
5756  defm mi : AVX512_maskable<opc, ImmFormM, _, (outs _.RC:$dst),
5757                   (ins _.MemOp:$src1, u8imm:$src2), OpcodeStr,
5758                       "$src2, $src1", "$src1, $src2",
5759                   (_.VT (OpNode (_.VT (_.LdFrag addr:$src1)),
5760                          (i8 imm:$src2)))>,
5761                   Sched<[sched.Folded]>;
5762  }
5763}
5764
5765multiclass avx512_shift_rmbi<bits<8> opc, Format ImmFormM,
5766                             string OpcodeStr, SDNode OpNode,
5767                             X86FoldableSchedWrite sched, X86VectorVTInfo _> {
5768  let ExeDomain = _.ExeDomain in
5769  defm mbi : AVX512_maskable<opc, ImmFormM, _, (outs _.RC:$dst),
5770                   (ins _.ScalarMemOp:$src1, u8imm:$src2), OpcodeStr,
5771      "$src2, ${src1}"##_.BroadcastStr, "${src1}"##_.BroadcastStr##", $src2",
5772     (_.VT (OpNode (X86VBroadcast (_.ScalarLdFrag addr:$src1)), (i8 imm:$src2)))>,
5773     EVEX_B, Sched<[sched.Folded]>;
5774}
5775
5776multiclass avx512_shift_rrm<bits<8> opc, string OpcodeStr, SDNode OpNode,
5777                            X86FoldableSchedWrite sched, ValueType SrcVT,
5778                            X86VectorVTInfo _> {
5779   // src2 is always 128-bit
5780  let ExeDomain = _.ExeDomain in {
5781  defm rr : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
5782                   (ins _.RC:$src1, VR128X:$src2), OpcodeStr,
5783                      "$src2, $src1", "$src1, $src2",
5784                   (_.VT (OpNode _.RC:$src1, (SrcVT VR128X:$src2)))>,
5785                   AVX512BIBase, EVEX_4V, Sched<[sched]>;
5786  defm rm : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
5787                   (ins _.RC:$src1, i128mem:$src2), OpcodeStr,
5788                       "$src2, $src1", "$src1, $src2",
5789                   (_.VT (OpNode _.RC:$src1, (SrcVT (load addr:$src2))))>,
5790                   AVX512BIBase,
5791                   EVEX_4V, Sched<[sched.Folded, sched.ReadAfterFold]>;
5792  }
5793}
5794
5795multiclass avx512_shift_sizes<bits<8> opc, string OpcodeStr, SDNode OpNode,
5796                              X86SchedWriteWidths sched, ValueType SrcVT,
5797                              AVX512VLVectorVTInfo VTInfo,
5798                              Predicate prd> {
5799  let Predicates = [prd] in
5800  defm Z    : avx512_shift_rrm<opc, OpcodeStr, OpNode, sched.ZMM, SrcVT,
5801                               VTInfo.info512>, EVEX_V512,
5802                               EVEX_CD8<VTInfo.info512.EltSize, CD8VQ> ;
5803  let Predicates = [prd, HasVLX] in {
5804  defm Z256 : avx512_shift_rrm<opc, OpcodeStr, OpNode, sched.YMM, SrcVT,
5805                               VTInfo.info256>, EVEX_V256,
5806                               EVEX_CD8<VTInfo.info256.EltSize, CD8VH>;
5807  defm Z128 : avx512_shift_rrm<opc, OpcodeStr, OpNode, sched.XMM, SrcVT,
5808                               VTInfo.info128>, EVEX_V128,
5809                               EVEX_CD8<VTInfo.info128.EltSize, CD8VF>;
5810  }
5811}
5812
5813multiclass avx512_shift_types<bits<8> opcd, bits<8> opcq, bits<8> opcw,
5814                              string OpcodeStr, SDNode OpNode,
5815                              X86SchedWriteWidths sched,
5816                              bit NotEVEX2VEXConvertibleQ = 0> {
5817  defm D : avx512_shift_sizes<opcd, OpcodeStr#"d", OpNode, sched, v4i32,
5818                              avx512vl_i32_info, HasAVX512>;
5819  let notEVEX2VEXConvertible = NotEVEX2VEXConvertibleQ in
5820  defm Q : avx512_shift_sizes<opcq, OpcodeStr#"q", OpNode, sched, v2i64,
5821                              avx512vl_i64_info, HasAVX512>, VEX_W;
5822  defm W : avx512_shift_sizes<opcw, OpcodeStr#"w", OpNode, sched, v8i16,
5823                              avx512vl_i16_info, HasBWI>;
5824}
5825
5826multiclass avx512_shift_rmi_sizes<bits<8> opc, Format ImmFormR, Format ImmFormM,
5827                                  string OpcodeStr, SDNode OpNode,
5828                                  X86SchedWriteWidths sched,
5829                                  AVX512VLVectorVTInfo VTInfo> {
5830  let Predicates = [HasAVX512] in
5831  defm Z:    avx512_shift_rmi<opc, ImmFormR, ImmFormM, OpcodeStr, OpNode,
5832                              sched.ZMM, VTInfo.info512>,
5833             avx512_shift_rmbi<opc, ImmFormM, OpcodeStr, OpNode, sched.ZMM,
5834                               VTInfo.info512>, EVEX_V512;
5835  let Predicates = [HasAVX512, HasVLX] in {
5836  defm Z256: avx512_shift_rmi<opc, ImmFormR, ImmFormM, OpcodeStr, OpNode,
5837                              sched.YMM, VTInfo.info256>,
5838             avx512_shift_rmbi<opc, ImmFormM, OpcodeStr, OpNode, sched.YMM,
5839                               VTInfo.info256>, EVEX_V256;
5840  defm Z128: avx512_shift_rmi<opc, ImmFormR, ImmFormM, OpcodeStr, OpNode,
5841                              sched.XMM, VTInfo.info128>,
5842             avx512_shift_rmbi<opc, ImmFormM, OpcodeStr, OpNode, sched.XMM,
5843                               VTInfo.info128>, EVEX_V128;
5844  }
5845}
5846
5847multiclass avx512_shift_rmi_w<bits<8> opcw, Format ImmFormR, Format ImmFormM,
5848                              string OpcodeStr, SDNode OpNode,
5849                              X86SchedWriteWidths sched> {
5850  let Predicates = [HasBWI] in
5851  defm WZ:    avx512_shift_rmi<opcw, ImmFormR, ImmFormM, OpcodeStr, OpNode,
5852                               sched.ZMM, v32i16_info>, EVEX_V512, VEX_WIG;
5853  let Predicates = [HasVLX, HasBWI] in {
5854  defm WZ256: avx512_shift_rmi<opcw, ImmFormR, ImmFormM, OpcodeStr, OpNode,
5855                               sched.YMM, v16i16x_info>, EVEX_V256, VEX_WIG;
5856  defm WZ128: avx512_shift_rmi<opcw, ImmFormR, ImmFormM, OpcodeStr, OpNode,
5857                               sched.XMM, v8i16x_info>, EVEX_V128, VEX_WIG;
5858  }
5859}
5860
5861multiclass avx512_shift_rmi_dq<bits<8> opcd, bits<8> opcq,
5862                               Format ImmFormR, Format ImmFormM,
5863                               string OpcodeStr, SDNode OpNode,
5864                               X86SchedWriteWidths sched,
5865                               bit NotEVEX2VEXConvertibleQ = 0> {
5866  defm D: avx512_shift_rmi_sizes<opcd, ImmFormR, ImmFormM, OpcodeStr#"d", OpNode,
5867                                 sched, avx512vl_i32_info>, EVEX_CD8<32, CD8VF>;
5868  let notEVEX2VEXConvertible = NotEVEX2VEXConvertibleQ in
5869  defm Q: avx512_shift_rmi_sizes<opcq, ImmFormR, ImmFormM, OpcodeStr#"q", OpNode,
5870                                 sched, avx512vl_i64_info>, EVEX_CD8<64, CD8VF>, VEX_W;
5871}
5872
5873defm VPSRL : avx512_shift_rmi_dq<0x72, 0x73, MRM2r, MRM2m, "vpsrl", X86vsrli,
5874                                 SchedWriteVecShiftImm>,
5875             avx512_shift_rmi_w<0x71, MRM2r, MRM2m, "vpsrlw", X86vsrli,
5876                                SchedWriteVecShiftImm>, AVX512BIi8Base, EVEX_4V;
5877
5878defm VPSLL : avx512_shift_rmi_dq<0x72, 0x73, MRM6r, MRM6m, "vpsll", X86vshli,
5879                                 SchedWriteVecShiftImm>,
5880             avx512_shift_rmi_w<0x71, MRM6r, MRM6m, "vpsllw", X86vshli,
5881                                SchedWriteVecShiftImm>, AVX512BIi8Base, EVEX_4V;
5882
5883defm VPSRA : avx512_shift_rmi_dq<0x72, 0x72, MRM4r, MRM4m, "vpsra", X86vsrai,
5884                                 SchedWriteVecShiftImm, 1>,
5885             avx512_shift_rmi_w<0x71, MRM4r, MRM4m, "vpsraw", X86vsrai,
5886                                SchedWriteVecShiftImm>, AVX512BIi8Base, EVEX_4V;
5887
5888defm VPROR : avx512_shift_rmi_dq<0x72, 0x72, MRM0r, MRM0m, "vpror", X86vrotri,
5889                                 SchedWriteVecShiftImm>, AVX512BIi8Base, EVEX_4V;
5890defm VPROL : avx512_shift_rmi_dq<0x72, 0x72, MRM1r, MRM1m, "vprol", X86vrotli,
5891                                 SchedWriteVecShiftImm>, AVX512BIi8Base, EVEX_4V;
5892
5893defm VPSLL : avx512_shift_types<0xF2, 0xF3, 0xF1, "vpsll", X86vshl,
5894                                SchedWriteVecShift>;
5895defm VPSRA : avx512_shift_types<0xE2, 0xE2, 0xE1, "vpsra", X86vsra,
5896                                SchedWriteVecShift, 1>;
5897defm VPSRL : avx512_shift_types<0xD2, 0xD3, 0xD1, "vpsrl", X86vsrl,
5898                                SchedWriteVecShift>;
5899
5900// Use 512bit VPSRA/VPSRAI version to implement v2i64/v4i64 in case NoVLX.
5901let Predicates = [HasAVX512, NoVLX] in {
5902  def : Pat<(v4i64 (X86vsra (v4i64 VR256X:$src1), (v2i64 VR128X:$src2))),
5903            (EXTRACT_SUBREG (v8i64
5904              (VPSRAQZrr
5905                (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src1, sub_ymm)),
5906                 VR128X:$src2)), sub_ymm)>;
5907
5908  def : Pat<(v2i64 (X86vsra (v2i64 VR128X:$src1), (v2i64 VR128X:$src2))),
5909            (EXTRACT_SUBREG (v8i64
5910              (VPSRAQZrr
5911                (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR128X:$src1, sub_xmm)),
5912                 VR128X:$src2)), sub_xmm)>;
5913
5914  def : Pat<(v4i64 (X86vsrai (v4i64 VR256X:$src1), (i8 imm:$src2))),
5915            (EXTRACT_SUBREG (v8i64
5916              (VPSRAQZri
5917                (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src1, sub_ymm)),
5918                 imm:$src2)), sub_ymm)>;
5919
5920  def : Pat<(v2i64 (X86vsrai (v2i64 VR128X:$src1), (i8 imm:$src2))),
5921            (EXTRACT_SUBREG (v8i64
5922              (VPSRAQZri
5923                (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR128X:$src1, sub_xmm)),
5924                 imm:$src2)), sub_xmm)>;
5925}
5926
5927//===-------------------------------------------------------------------===//
5928// Variable Bit Shifts
5929//===-------------------------------------------------------------------===//
5930
5931multiclass avx512_var_shift<bits<8> opc, string OpcodeStr, SDNode OpNode,
5932                            X86FoldableSchedWrite sched, X86VectorVTInfo _> {
5933  let ExeDomain = _.ExeDomain in {
5934  defm rr : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
5935                   (ins _.RC:$src1, _.RC:$src2), OpcodeStr,
5936                      "$src2, $src1", "$src1, $src2",
5937                   (_.VT (OpNode _.RC:$src1, (_.VT _.RC:$src2)))>,
5938                   AVX5128IBase, EVEX_4V, Sched<[sched]>;
5939  defm rm : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
5940                   (ins _.RC:$src1, _.MemOp:$src2), OpcodeStr,
5941                       "$src2, $src1", "$src1, $src2",
5942                   (_.VT (OpNode _.RC:$src1,
5943                   (_.VT (_.LdFrag addr:$src2))))>,
5944                   AVX5128IBase, EVEX_4V, EVEX_CD8<_.EltSize, CD8VF>,
5945                   Sched<[sched.Folded, sched.ReadAfterFold]>;
5946  }
5947}
5948
5949multiclass avx512_var_shift_mb<bits<8> opc, string OpcodeStr, SDNode OpNode,
5950                               X86FoldableSchedWrite sched, X86VectorVTInfo _> {
5951  let ExeDomain = _.ExeDomain in
5952  defm rmb : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
5953                    (ins _.RC:$src1, _.ScalarMemOp:$src2), OpcodeStr,
5954                    "${src2}"##_.BroadcastStr##", $src1",
5955                    "$src1, ${src2}"##_.BroadcastStr,
5956                    (_.VT (OpNode _.RC:$src1, (_.VT (X86VBroadcast
5957                                                (_.ScalarLdFrag addr:$src2)))))>,
5958                    AVX5128IBase, EVEX_B, EVEX_4V, EVEX_CD8<_.EltSize, CD8VF>,
5959                    Sched<[sched.Folded, sched.ReadAfterFold]>;
5960}
5961
5962multiclass avx512_var_shift_sizes<bits<8> opc, string OpcodeStr, SDNode OpNode,
5963                                  X86SchedWriteWidths sched, AVX512VLVectorVTInfo _> {
5964  let Predicates  = [HasAVX512] in
5965  defm Z : avx512_var_shift<opc, OpcodeStr, OpNode, sched.ZMM, _.info512>,
5966           avx512_var_shift_mb<opc, OpcodeStr, OpNode, sched.ZMM, _.info512>, EVEX_V512;
5967
5968  let Predicates = [HasAVX512, HasVLX] in {
5969  defm Z256 : avx512_var_shift<opc, OpcodeStr, OpNode, sched.YMM, _.info256>,
5970              avx512_var_shift_mb<opc, OpcodeStr, OpNode, sched.YMM, _.info256>, EVEX_V256;
5971  defm Z128 : avx512_var_shift<opc, OpcodeStr, OpNode, sched.XMM, _.info128>,
5972              avx512_var_shift_mb<opc, OpcodeStr, OpNode, sched.XMM, _.info128>, EVEX_V128;
5973  }
5974}
5975
5976multiclass avx512_var_shift_types<bits<8> opc, string OpcodeStr,
5977                                  SDNode OpNode, X86SchedWriteWidths sched> {
5978  defm D : avx512_var_shift_sizes<opc, OpcodeStr#"d", OpNode, sched,
5979                                 avx512vl_i32_info>;
5980  defm Q : avx512_var_shift_sizes<opc, OpcodeStr#"q", OpNode, sched,
5981                                 avx512vl_i64_info>, VEX_W;
5982}
5983
5984// Use 512bit version to implement 128/256 bit in case NoVLX.
5985multiclass avx512_var_shift_lowering<AVX512VLVectorVTInfo _, string OpcodeStr,
5986                                     SDNode OpNode, list<Predicate> p> {
5987  let Predicates = p in {
5988  def : Pat<(_.info256.VT (OpNode (_.info256.VT _.info256.RC:$src1),
5989                                  (_.info256.VT _.info256.RC:$src2))),
5990            (EXTRACT_SUBREG
5991                (!cast<Instruction>(OpcodeStr#"Zrr")
5992                    (INSERT_SUBREG (_.info512.VT (IMPLICIT_DEF)), VR256X:$src1, sub_ymm),
5993                    (INSERT_SUBREG (_.info512.VT (IMPLICIT_DEF)), VR256X:$src2, sub_ymm)),
5994             sub_ymm)>;
5995
5996  def : Pat<(_.info128.VT (OpNode (_.info128.VT _.info128.RC:$src1),
5997                                  (_.info128.VT _.info128.RC:$src2))),
5998            (EXTRACT_SUBREG
5999                (!cast<Instruction>(OpcodeStr#"Zrr")
6000                    (INSERT_SUBREG (_.info512.VT (IMPLICIT_DEF)), VR128X:$src1, sub_xmm),
6001                    (INSERT_SUBREG (_.info512.VT (IMPLICIT_DEF)), VR128X:$src2, sub_xmm)),
6002             sub_xmm)>;
6003  }
6004}
6005multiclass avx512_var_shift_w<bits<8> opc, string OpcodeStr,
6006                              SDNode OpNode, X86SchedWriteWidths sched> {
6007  let Predicates = [HasBWI] in
6008  defm WZ:    avx512_var_shift<opc, OpcodeStr, OpNode, sched.ZMM, v32i16_info>,
6009              EVEX_V512, VEX_W;
6010  let Predicates = [HasVLX, HasBWI] in {
6011
6012  defm WZ256: avx512_var_shift<opc, OpcodeStr, OpNode, sched.YMM, v16i16x_info>,
6013              EVEX_V256, VEX_W;
6014  defm WZ128: avx512_var_shift<opc, OpcodeStr, OpNode, sched.XMM, v8i16x_info>,
6015              EVEX_V128, VEX_W;
6016  }
6017}
6018
6019defm VPSLLV : avx512_var_shift_types<0x47, "vpsllv", X86vshlv, SchedWriteVarVecShift>,
6020              avx512_var_shift_w<0x12, "vpsllvw", X86vshlv, SchedWriteVarVecShift>;
6021
6022defm VPSRAV : avx512_var_shift_types<0x46, "vpsrav", X86vsrav, SchedWriteVarVecShift>,
6023              avx512_var_shift_w<0x11, "vpsravw", X86vsrav, SchedWriteVarVecShift>;
6024
6025defm VPSRLV : avx512_var_shift_types<0x45, "vpsrlv", X86vsrlv, SchedWriteVarVecShift>,
6026              avx512_var_shift_w<0x10, "vpsrlvw", X86vsrlv, SchedWriteVarVecShift>;
6027
6028defm VPRORV : avx512_var_shift_types<0x14, "vprorv", rotr, SchedWriteVarVecShift>;
6029defm VPROLV : avx512_var_shift_types<0x15, "vprolv", rotl, SchedWriteVarVecShift>;
6030
6031defm : avx512_var_shift_lowering<avx512vl_i64_info, "VPSRAVQ", X86vsrav, [HasAVX512, NoVLX]>;
6032defm : avx512_var_shift_lowering<avx512vl_i16_info, "VPSLLVW", X86vshlv, [HasBWI, NoVLX]>;
6033defm : avx512_var_shift_lowering<avx512vl_i16_info, "VPSRAVW", X86vsrav, [HasBWI, NoVLX]>;
6034defm : avx512_var_shift_lowering<avx512vl_i16_info, "VPSRLVW", X86vsrlv, [HasBWI, NoVLX]>;
6035
6036
6037// Use 512bit VPROL/VPROLI version to implement v2i64/v4i64 + v4i32/v8i32 in case NoVLX.
6038let Predicates = [HasAVX512, NoVLX] in {
6039  def : Pat<(v2i64 (rotl (v2i64 VR128X:$src1), (v2i64 VR128X:$src2))),
6040            (EXTRACT_SUBREG (v8i64
6041              (VPROLVQZrr
6042                (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR128X:$src1, sub_xmm)),
6043                (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR128X:$src2, sub_xmm)))),
6044                       sub_xmm)>;
6045  def : Pat<(v4i64 (rotl (v4i64 VR256X:$src1), (v4i64 VR256X:$src2))),
6046            (EXTRACT_SUBREG (v8i64
6047              (VPROLVQZrr
6048                (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src1, sub_ymm)),
6049                (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src2, sub_ymm)))),
6050                       sub_ymm)>;
6051
6052  def : Pat<(v4i32 (rotl (v4i32 VR128X:$src1), (v4i32 VR128X:$src2))),
6053            (EXTRACT_SUBREG (v16i32
6054              (VPROLVDZrr
6055                (v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR128X:$src1, sub_xmm)),
6056                (v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR128X:$src2, sub_xmm)))),
6057                        sub_xmm)>;
6058  def : Pat<(v8i32 (rotl (v8i32 VR256X:$src1), (v8i32 VR256X:$src2))),
6059            (EXTRACT_SUBREG (v16i32
6060              (VPROLVDZrr
6061                (v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src1, sub_ymm)),
6062                (v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src2, sub_ymm)))),
6063                        sub_ymm)>;
6064
6065  def : Pat<(v2i64 (X86vrotli (v2i64 VR128X:$src1), (i8 imm:$src2))),
6066            (EXTRACT_SUBREG (v8i64
6067              (VPROLQZri
6068                (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR128X:$src1, sub_xmm)),
6069                        imm:$src2)), sub_xmm)>;
6070  def : Pat<(v4i64 (X86vrotli (v4i64 VR256X:$src1), (i8 imm:$src2))),
6071            (EXTRACT_SUBREG (v8i64
6072              (VPROLQZri
6073                (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src1, sub_ymm)),
6074                       imm:$src2)), sub_ymm)>;
6075
6076  def : Pat<(v4i32 (X86vrotli (v4i32 VR128X:$src1), (i8 imm:$src2))),
6077            (EXTRACT_SUBREG (v16i32
6078              (VPROLDZri
6079                (v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR128X:$src1, sub_xmm)),
6080                        imm:$src2)), sub_xmm)>;
6081  def : Pat<(v8i32 (X86vrotli (v8i32 VR256X:$src1), (i8 imm:$src2))),
6082            (EXTRACT_SUBREG (v16i32
6083              (VPROLDZri
6084                (v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src1, sub_ymm)),
6085                        imm:$src2)), sub_ymm)>;
6086}
6087
6088// Use 512bit VPROR/VPRORI version to implement v2i64/v4i64 + v4i32/v8i32 in case NoVLX.
6089let Predicates = [HasAVX512, NoVLX] in {
6090  def : Pat<(v2i64 (rotr (v2i64 VR128X:$src1), (v2i64 VR128X:$src2))),
6091            (EXTRACT_SUBREG (v8i64
6092              (VPRORVQZrr
6093                (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR128X:$src1, sub_xmm)),
6094                (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR128X:$src2, sub_xmm)))),
6095                       sub_xmm)>;
6096  def : Pat<(v4i64 (rotr (v4i64 VR256X:$src1), (v4i64 VR256X:$src2))),
6097            (EXTRACT_SUBREG (v8i64
6098              (VPRORVQZrr
6099                (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src1, sub_ymm)),
6100                (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src2, sub_ymm)))),
6101                       sub_ymm)>;
6102
6103  def : Pat<(v4i32 (rotr (v4i32 VR128X:$src1), (v4i32 VR128X:$src2))),
6104            (EXTRACT_SUBREG (v16i32
6105              (VPRORVDZrr
6106                (v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR128X:$src1, sub_xmm)),
6107                (v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR128X:$src2, sub_xmm)))),
6108                        sub_xmm)>;
6109  def : Pat<(v8i32 (rotr (v8i32 VR256X:$src1), (v8i32 VR256X:$src2))),
6110            (EXTRACT_SUBREG (v16i32
6111              (VPRORVDZrr
6112                (v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src1, sub_ymm)),
6113                (v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src2, sub_ymm)))),
6114                        sub_ymm)>;
6115
6116  def : Pat<(v2i64 (X86vrotri (v2i64 VR128X:$src1), (i8 imm:$src2))),
6117            (EXTRACT_SUBREG (v8i64
6118              (VPRORQZri
6119                (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR128X:$src1, sub_xmm)),
6120                        imm:$src2)), sub_xmm)>;
6121  def : Pat<(v4i64 (X86vrotri (v4i64 VR256X:$src1), (i8 imm:$src2))),
6122            (EXTRACT_SUBREG (v8i64
6123              (VPRORQZri
6124                (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src1, sub_ymm)),
6125                       imm:$src2)), sub_ymm)>;
6126
6127  def : Pat<(v4i32 (X86vrotri (v4i32 VR128X:$src1), (i8 imm:$src2))),
6128            (EXTRACT_SUBREG (v16i32
6129              (VPRORDZri
6130                (v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR128X:$src1, sub_xmm)),
6131                        imm:$src2)), sub_xmm)>;
6132  def : Pat<(v8i32 (X86vrotri (v8i32 VR256X:$src1), (i8 imm:$src2))),
6133            (EXTRACT_SUBREG (v16i32
6134              (VPRORDZri
6135                (v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src1, sub_ymm)),
6136                        imm:$src2)), sub_ymm)>;
6137}
6138
6139//===-------------------------------------------------------------------===//
6140// 1-src variable permutation VPERMW/D/Q
6141//===-------------------------------------------------------------------===//
6142
6143multiclass avx512_vperm_dq_sizes<bits<8> opc, string OpcodeStr, SDNode OpNode,
6144                                 X86FoldableSchedWrite sched, AVX512VLVectorVTInfo _> {
6145  let Predicates  = [HasAVX512] in
6146  defm Z : avx512_var_shift<opc, OpcodeStr, OpNode, sched, _.info512>,
6147           avx512_var_shift_mb<opc, OpcodeStr, OpNode, sched, _.info512>, EVEX_V512;
6148
6149  let Predicates = [HasAVX512, HasVLX] in
6150  defm Z256 : avx512_var_shift<opc, OpcodeStr, OpNode, sched, _.info256>,
6151              avx512_var_shift_mb<opc, OpcodeStr, OpNode, sched, _.info256>, EVEX_V256;
6152}
6153
6154multiclass avx512_vpermi_dq_sizes<bits<8> opc, Format ImmFormR, Format ImmFormM,
6155                                 string OpcodeStr, SDNode OpNode,
6156                                 X86FoldableSchedWrite sched, AVX512VLVectorVTInfo VTInfo> {
6157  let Predicates = [HasAVX512] in
6158  defm Z:    avx512_shift_rmi<opc, ImmFormR, ImmFormM, OpcodeStr, OpNode,
6159                              sched, VTInfo.info512>,
6160             avx512_shift_rmbi<opc, ImmFormM, OpcodeStr, OpNode,
6161                               sched, VTInfo.info512>, EVEX_V512;
6162  let Predicates = [HasAVX512, HasVLX] in
6163  defm Z256: avx512_shift_rmi<opc, ImmFormR, ImmFormM, OpcodeStr, OpNode,
6164                              sched, VTInfo.info256>,
6165             avx512_shift_rmbi<opc, ImmFormM, OpcodeStr, OpNode,
6166                               sched, VTInfo.info256>, EVEX_V256;
6167}
6168
6169multiclass avx512_vperm_bw<bits<8> opc, string OpcodeStr,
6170                              Predicate prd, SDNode OpNode,
6171                              X86FoldableSchedWrite sched, AVX512VLVectorVTInfo _> {
6172  let Predicates = [prd] in
6173  defm Z:    avx512_var_shift<opc, OpcodeStr, OpNode, sched, _.info512>,
6174              EVEX_V512 ;
6175  let Predicates = [HasVLX, prd] in {
6176  defm Z256: avx512_var_shift<opc, OpcodeStr, OpNode, sched, _.info256>,
6177              EVEX_V256 ;
6178  defm Z128: avx512_var_shift<opc, OpcodeStr, OpNode, sched, _.info128>,
6179              EVEX_V128 ;
6180  }
6181}
6182
6183defm VPERMW  : avx512_vperm_bw<0x8D, "vpermw", HasBWI, X86VPermv,
6184                               WriteVarShuffle256, avx512vl_i16_info>, VEX_W;
6185defm VPERMB  : avx512_vperm_bw<0x8D, "vpermb", HasVBMI, X86VPermv,
6186                               WriteVarShuffle256, avx512vl_i8_info>;
6187
6188defm VPERMD : avx512_vperm_dq_sizes<0x36, "vpermd", X86VPermv,
6189                                    WriteVarShuffle256, avx512vl_i32_info>;
6190defm VPERMQ : avx512_vperm_dq_sizes<0x36, "vpermq", X86VPermv,
6191                                    WriteVarShuffle256, avx512vl_i64_info>, VEX_W;
6192defm VPERMPS : avx512_vperm_dq_sizes<0x16, "vpermps", X86VPermv,
6193                                     WriteFVarShuffle256, avx512vl_f32_info>;
6194defm VPERMPD : avx512_vperm_dq_sizes<0x16, "vpermpd", X86VPermv,
6195                                     WriteFVarShuffle256, avx512vl_f64_info>, VEX_W;
6196
6197defm VPERMQ : avx512_vpermi_dq_sizes<0x00, MRMSrcReg, MRMSrcMem, "vpermq",
6198                             X86VPermi, WriteShuffle256, avx512vl_i64_info>,
6199                             EVEX, AVX512AIi8Base, EVEX_CD8<64, CD8VF>, VEX_W;
6200defm VPERMPD : avx512_vpermi_dq_sizes<0x01, MRMSrcReg, MRMSrcMem, "vpermpd",
6201                             X86VPermi, WriteFShuffle256, avx512vl_f64_info>,
6202                             EVEX, AVX512AIi8Base, EVEX_CD8<64, CD8VF>, VEX_W;
6203
6204//===----------------------------------------------------------------------===//
6205// AVX-512 - VPERMIL
6206//===----------------------------------------------------------------------===//
6207
6208multiclass avx512_permil_vec<bits<8> OpcVar, string OpcodeStr, SDNode OpNode,
6209                             X86FoldableSchedWrite sched, X86VectorVTInfo _,
6210                             X86VectorVTInfo Ctrl> {
6211  defm rr: AVX512_maskable<OpcVar, MRMSrcReg, _, (outs _.RC:$dst),
6212                  (ins _.RC:$src1, Ctrl.RC:$src2), OpcodeStr,
6213                  "$src2, $src1", "$src1, $src2",
6214                  (_.VT (OpNode _.RC:$src1,
6215                               (Ctrl.VT Ctrl.RC:$src2)))>,
6216                  T8PD, EVEX_4V, Sched<[sched]>;
6217  defm rm: AVX512_maskable<OpcVar, MRMSrcMem, _, (outs _.RC:$dst),
6218                  (ins _.RC:$src1, Ctrl.MemOp:$src2), OpcodeStr,
6219                  "$src2, $src1", "$src1, $src2",
6220                  (_.VT (OpNode
6221                           _.RC:$src1,
6222                           (Ctrl.VT (Ctrl.LdFrag addr:$src2))))>,
6223                  T8PD, EVEX_4V, EVEX_CD8<_.EltSize, CD8VF>,
6224                  Sched<[sched.Folded, sched.ReadAfterFold]>;
6225  defm rmb: AVX512_maskable<OpcVar, MRMSrcMem, _, (outs _.RC:$dst),
6226                   (ins _.RC:$src1, _.ScalarMemOp:$src2), OpcodeStr,
6227                   "${src2}"##_.BroadcastStr##", $src1",
6228                   "$src1, ${src2}"##_.BroadcastStr,
6229                   (_.VT (OpNode
6230                            _.RC:$src1,
6231                            (Ctrl.VT (X86VBroadcast
6232                                       (Ctrl.ScalarLdFrag addr:$src2)))))>,
6233                   T8PD, EVEX_4V, EVEX_B, EVEX_CD8<_.EltSize, CD8VF>,
6234                   Sched<[sched.Folded, sched.ReadAfterFold]>;
6235}
6236
6237multiclass avx512_permil_vec_common<string OpcodeStr, bits<8> OpcVar,
6238                                    X86SchedWriteWidths sched,
6239                                    AVX512VLVectorVTInfo _,
6240                                    AVX512VLVectorVTInfo Ctrl> {
6241  let Predicates = [HasAVX512] in {
6242    defm Z    : avx512_permil_vec<OpcVar, OpcodeStr, X86VPermilpv, sched.ZMM,
6243                                  _.info512, Ctrl.info512>, EVEX_V512;
6244  }
6245  let Predicates = [HasAVX512, HasVLX] in {
6246    defm Z128 : avx512_permil_vec<OpcVar, OpcodeStr, X86VPermilpv, sched.XMM,
6247                                  _.info128, Ctrl.info128>, EVEX_V128;
6248    defm Z256 : avx512_permil_vec<OpcVar, OpcodeStr, X86VPermilpv, sched.YMM,
6249                                  _.info256, Ctrl.info256>, EVEX_V256;
6250  }
6251}
6252
6253multiclass avx512_permil<string OpcodeStr, bits<8> OpcImm, bits<8> OpcVar,
6254                         AVX512VLVectorVTInfo _, AVX512VLVectorVTInfo Ctrl>{
6255  defm NAME: avx512_permil_vec_common<OpcodeStr, OpcVar, SchedWriteFVarShuffle,
6256                                      _, Ctrl>;
6257  defm NAME: avx512_shift_rmi_sizes<OpcImm, MRMSrcReg, MRMSrcMem, OpcodeStr,
6258                                    X86VPermilpi, SchedWriteFShuffle, _>,
6259                    EVEX, AVX512AIi8Base, EVEX_CD8<_.info128.EltSize, CD8VF>;
6260}
6261
6262let ExeDomain = SSEPackedSingle in
6263defm VPERMILPS : avx512_permil<"vpermilps", 0x04, 0x0C, avx512vl_f32_info,
6264                               avx512vl_i32_info>;
6265let ExeDomain = SSEPackedDouble in
6266defm VPERMILPD : avx512_permil<"vpermilpd", 0x05, 0x0D, avx512vl_f64_info,
6267                               avx512vl_i64_info>, VEX_W1X;
6268
6269//===----------------------------------------------------------------------===//
6270// AVX-512 - VPSHUFD, VPSHUFLW, VPSHUFHW
6271//===----------------------------------------------------------------------===//
6272
6273defm VPSHUFD : avx512_shift_rmi_sizes<0x70, MRMSrcReg, MRMSrcMem, "vpshufd",
6274                             X86PShufd, SchedWriteShuffle, avx512vl_i32_info>,
6275                             EVEX, AVX512BIi8Base, EVEX_CD8<32, CD8VF>;
6276defm VPSHUFH : avx512_shift_rmi_w<0x70, MRMSrcReg, MRMSrcMem, "vpshufhw",
6277                                  X86PShufhw, SchedWriteShuffle>,
6278                                  EVEX, AVX512XSIi8Base;
6279defm VPSHUFL : avx512_shift_rmi_w<0x70, MRMSrcReg, MRMSrcMem, "vpshuflw",
6280                                  X86PShuflw, SchedWriteShuffle>,
6281                                  EVEX, AVX512XDIi8Base;
6282
6283//===----------------------------------------------------------------------===//
6284// AVX-512 - VPSHUFB
6285//===----------------------------------------------------------------------===//
6286
6287multiclass avx512_pshufb_sizes<bits<8> opc, string OpcodeStr, SDNode OpNode,
6288                               X86SchedWriteWidths sched> {
6289  let Predicates = [HasBWI] in
6290  defm Z:    avx512_var_shift<opc, OpcodeStr, OpNode, sched.ZMM, v64i8_info>,
6291                              EVEX_V512;
6292
6293  let Predicates = [HasVLX, HasBWI] in {
6294  defm Z256: avx512_var_shift<opc, OpcodeStr, OpNode, sched.YMM, v32i8x_info>,
6295                              EVEX_V256;
6296  defm Z128: avx512_var_shift<opc, OpcodeStr, OpNode, sched.XMM, v16i8x_info>,
6297                              EVEX_V128;
6298  }
6299}
6300
6301defm VPSHUFB: avx512_pshufb_sizes<0x00, "vpshufb", X86pshufb,
6302                                  SchedWriteVarShuffle>, VEX_WIG;
6303
6304//===----------------------------------------------------------------------===//
6305// Move Low to High and High to Low packed FP Instructions
6306//===----------------------------------------------------------------------===//
6307
6308def VMOVLHPSZrr : AVX512PSI<0x16, MRMSrcReg, (outs VR128X:$dst),
6309          (ins VR128X:$src1, VR128X:$src2),
6310          "vmovlhps\t{$src2, $src1, $dst|$dst, $src1, $src2}",
6311          [(set VR128X:$dst, (v4f32 (X86Movlhps VR128X:$src1, VR128X:$src2)))]>,
6312          Sched<[SchedWriteFShuffle.XMM]>, EVEX_4V;
6313let isCommutable = 1 in
6314def VMOVHLPSZrr : AVX512PSI<0x12, MRMSrcReg, (outs VR128X:$dst),
6315          (ins VR128X:$src1, VR128X:$src2),
6316          "vmovhlps\t{$src2, $src1, $dst|$dst, $src1, $src2}",
6317          [(set VR128X:$dst, (v4f32 (X86Movhlps VR128X:$src1, VR128X:$src2)))]>,
6318          Sched<[SchedWriteFShuffle.XMM]>, EVEX_4V, NotMemoryFoldable;
6319
6320//===----------------------------------------------------------------------===//
6321// VMOVHPS/PD VMOVLPS Instructions
6322// All patterns was taken from SSS implementation.
6323//===----------------------------------------------------------------------===//
6324
6325multiclass avx512_mov_hilo_packed<bits<8> opc, string OpcodeStr,
6326                                  SDPatternOperator OpNode,
6327                                  X86VectorVTInfo _> {
6328  let hasSideEffects = 0, mayLoad = 1, ExeDomain = _.ExeDomain in
6329  def rm : AVX512<opc, MRMSrcMem, (outs _.RC:$dst),
6330                  (ins _.RC:$src1, f64mem:$src2),
6331                  !strconcat(OpcodeStr,
6332                             "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
6333                  [(set _.RC:$dst,
6334                     (OpNode _.RC:$src1,
6335                       (_.VT (bitconvert
6336                         (v2f64 (scalar_to_vector (loadf64 addr:$src2)))))))]>,
6337                  Sched<[SchedWriteFShuffle.XMM.Folded, SchedWriteFShuffle.XMM.ReadAfterFold]>, EVEX_4V;
6338}
6339
6340// No patterns for MOVLPS/MOVHPS as the Movlhps node should only be created in
6341// SSE1. And MOVLPS pattern is even more complex.
6342defm VMOVHPSZ128 : avx512_mov_hilo_packed<0x16, "vmovhps", null_frag,
6343                                  v4f32x_info>, EVEX_CD8<32, CD8VT2>, PS;
6344defm VMOVHPDZ128 : avx512_mov_hilo_packed<0x16, "vmovhpd", X86Unpckl,
6345                                  v2f64x_info>, EVEX_CD8<64, CD8VT1>, PD, VEX_W;
6346defm VMOVLPSZ128 : avx512_mov_hilo_packed<0x12, "vmovlps", null_frag,
6347                                  v4f32x_info>, EVEX_CD8<32, CD8VT2>, PS;
6348defm VMOVLPDZ128 : avx512_mov_hilo_packed<0x12, "vmovlpd", X86Movsd,
6349                                  v2f64x_info>, EVEX_CD8<64, CD8VT1>, PD, VEX_W;
6350
6351let Predicates = [HasAVX512] in {
6352  // VMOVHPD patterns
6353  def : Pat<(v2f64 (X86Unpckl VR128X:$src1,
6354                    (bc_v2f64 (v2i64 (scalar_to_vector (loadi64 addr:$src2)))))),
6355           (VMOVHPDZ128rm VR128X:$src1, addr:$src2)>;
6356  def : Pat<(v2f64 (X86Unpckl VR128X:$src1, (X86vzload64 addr:$src2))),
6357            (VMOVHPDZ128rm VR128X:$src1, addr:$src2)>;
6358
6359  // VMOVLPD patterns
6360  def : Pat<(v2f64 (X86Movsd VR128X:$src1, (X86vzload64 addr:$src2))),
6361            (VMOVLPDZ128rm VR128X:$src1, addr:$src2)>;
6362}
6363
6364let SchedRW = [WriteFStore] in {
6365let mayStore = 1, hasSideEffects = 0 in
6366def VMOVHPSZ128mr : AVX512PSI<0x17, MRMDestMem, (outs),
6367                       (ins f64mem:$dst, VR128X:$src),
6368                       "vmovhps\t{$src, $dst|$dst, $src}",
6369                       []>, EVEX, EVEX_CD8<32, CD8VT2>;
6370def VMOVHPDZ128mr : AVX512PDI<0x17, MRMDestMem, (outs),
6371                       (ins f64mem:$dst, VR128X:$src),
6372                       "vmovhpd\t{$src, $dst|$dst, $src}",
6373                       [(store (f64 (extractelt
6374                                     (v2f64 (X86Unpckh VR128X:$src, VR128X:$src)),
6375                                     (iPTR 0))), addr:$dst)]>,
6376                       EVEX, EVEX_CD8<64, CD8VT1>, VEX_W;
6377let mayStore = 1, hasSideEffects = 0 in
6378def VMOVLPSZ128mr : AVX512PSI<0x13, MRMDestMem, (outs),
6379                       (ins f64mem:$dst, VR128X:$src),
6380                       "vmovlps\t{$src, $dst|$dst, $src}",
6381                       []>, EVEX, EVEX_CD8<32, CD8VT2>;
6382def VMOVLPDZ128mr : AVX512PDI<0x13, MRMDestMem, (outs),
6383                       (ins f64mem:$dst, VR128X:$src),
6384                       "vmovlpd\t{$src, $dst|$dst, $src}",
6385                       [(store (f64 (extractelt (v2f64 VR128X:$src),
6386                                     (iPTR 0))), addr:$dst)]>,
6387                       EVEX, EVEX_CD8<64, CD8VT1>, VEX_W;
6388} // SchedRW
6389
6390let Predicates = [HasAVX512] in {
6391  // VMOVHPD patterns
6392  def : Pat<(store (f64 (extractelt
6393                           (v2f64 (X86VPermilpi VR128X:$src, (i8 1))),
6394                           (iPTR 0))), addr:$dst),
6395           (VMOVHPDZ128mr addr:$dst, VR128X:$src)>;
6396}
6397//===----------------------------------------------------------------------===//
6398// FMA - Fused Multiply Operations
6399//
6400
6401multiclass avx512_fma3p_213_rm<bits<8> opc, string OpcodeStr, SDNode OpNode,
6402                               X86FoldableSchedWrite sched,
6403                               X86VectorVTInfo _, string Suff> {
6404  let Constraints = "$src1 = $dst", ExeDomain = _.ExeDomain, hasSideEffects = 0 in {
6405  defm r: AVX512_maskable_3src<opc, MRMSrcReg, _, (outs _.RC:$dst),
6406          (ins _.RC:$src2, _.RC:$src3),
6407          OpcodeStr, "$src3, $src2", "$src2, $src3",
6408          (_.VT (OpNode _.RC:$src2, _.RC:$src1, _.RC:$src3)), 1, 1>,
6409          AVX512FMA3Base, Sched<[sched]>;
6410
6411  defm m: AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst),
6412          (ins _.RC:$src2, _.MemOp:$src3),
6413          OpcodeStr, "$src3, $src2", "$src2, $src3",
6414          (_.VT (OpNode _.RC:$src2, _.RC:$src1, (_.LdFrag addr:$src3))), 1, 0>,
6415          AVX512FMA3Base, Sched<[sched.Folded, sched.ReadAfterFold]>;
6416
6417  defm mb: AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst),
6418            (ins _.RC:$src2, _.ScalarMemOp:$src3),
6419            OpcodeStr,   !strconcat("${src3}", _.BroadcastStr,", $src2"),
6420            !strconcat("$src2, ${src3}", _.BroadcastStr ),
6421            (OpNode _.RC:$src2,
6422             _.RC:$src1,(_.VT (X86VBroadcast (_.ScalarLdFrag addr:$src3)))), 1, 0>,
6423             AVX512FMA3Base, EVEX_B, Sched<[sched.Folded, sched.ReadAfterFold]>;
6424  }
6425}
6426
6427multiclass avx512_fma3_213_round<bits<8> opc, string OpcodeStr, SDNode OpNode,
6428                                 X86FoldableSchedWrite sched,
6429                                 X86VectorVTInfo _, string Suff> {
6430  let Constraints = "$src1 = $dst", ExeDomain = _.ExeDomain, hasSideEffects = 0 in
6431  defm rb: AVX512_maskable_3src<opc, MRMSrcReg, _, (outs _.RC:$dst),
6432          (ins _.RC:$src2, _.RC:$src3, AVX512RC:$rc),
6433          OpcodeStr, "$rc, $src3, $src2", "$src2, $src3, $rc",
6434          (_.VT ( OpNode _.RC:$src2, _.RC:$src1, _.RC:$src3, (i32 timm:$rc))), 1, 1>,
6435          AVX512FMA3Base, EVEX_B, EVEX_RC, Sched<[sched]>;
6436}
6437
6438multiclass avx512_fma3p_213_common<bits<8> opc, string OpcodeStr, SDNode OpNode,
6439                                   SDNode OpNodeRnd, X86SchedWriteWidths sched,
6440                                   AVX512VLVectorVTInfo _, string Suff> {
6441  let Predicates = [HasAVX512] in {
6442    defm Z      : avx512_fma3p_213_rm<opc, OpcodeStr, OpNode, sched.ZMM,
6443                                      _.info512, Suff>,
6444                  avx512_fma3_213_round<opc, OpcodeStr, OpNodeRnd, sched.ZMM,
6445                                        _.info512, Suff>,
6446                              EVEX_V512, EVEX_CD8<_.info512.EltSize, CD8VF>;
6447  }
6448  let Predicates = [HasVLX, HasAVX512] in {
6449    defm Z256 : avx512_fma3p_213_rm<opc, OpcodeStr, OpNode, sched.YMM,
6450                                    _.info256, Suff>,
6451                      EVEX_V256, EVEX_CD8<_.info256.EltSize, CD8VF>;
6452    defm Z128 : avx512_fma3p_213_rm<opc, OpcodeStr, OpNode, sched.XMM,
6453                                    _.info128, Suff>,
6454                      EVEX_V128, EVEX_CD8<_.info128.EltSize, CD8VF>;
6455  }
6456}
6457
6458multiclass avx512_fma3p_213_f<bits<8> opc, string OpcodeStr, SDNode OpNode,
6459                              SDNode OpNodeRnd> {
6460    defm PS : avx512_fma3p_213_common<opc, OpcodeStr#"ps", OpNode, OpNodeRnd,
6461                                      SchedWriteFMA, avx512vl_f32_info, "PS">;
6462    defm PD : avx512_fma3p_213_common<opc, OpcodeStr#"pd", OpNode, OpNodeRnd,
6463                                      SchedWriteFMA, avx512vl_f64_info, "PD">,
6464                                      VEX_W;
6465}
6466
6467defm VFMADD213    : avx512_fma3p_213_f<0xA8, "vfmadd213", X86Fmadd, X86FmaddRnd>;
6468defm VFMSUB213    : avx512_fma3p_213_f<0xAA, "vfmsub213", X86Fmsub, X86FmsubRnd>;
6469defm VFMADDSUB213 : avx512_fma3p_213_f<0xA6, "vfmaddsub213", X86Fmaddsub, X86FmaddsubRnd>;
6470defm VFMSUBADD213 : avx512_fma3p_213_f<0xA7, "vfmsubadd213", X86Fmsubadd, X86FmsubaddRnd>;
6471defm VFNMADD213   : avx512_fma3p_213_f<0xAC, "vfnmadd213", X86Fnmadd, X86FnmaddRnd>;
6472defm VFNMSUB213   : avx512_fma3p_213_f<0xAE, "vfnmsub213", X86Fnmsub, X86FnmsubRnd>;
6473
6474
6475multiclass avx512_fma3p_231_rm<bits<8> opc, string OpcodeStr, SDNode OpNode,
6476                               X86FoldableSchedWrite sched,
6477                               X86VectorVTInfo _, string Suff> {
6478  let Constraints = "$src1 = $dst", ExeDomain = _.ExeDomain, hasSideEffects = 0 in {
6479  defm r: AVX512_maskable_3src<opc, MRMSrcReg, _, (outs _.RC:$dst),
6480          (ins _.RC:$src2, _.RC:$src3),
6481          OpcodeStr, "$src3, $src2", "$src2, $src3",
6482          (_.VT (OpNode _.RC:$src2, _.RC:$src3, _.RC:$src1)), 1, 1,
6483          vselect, 1>, AVX512FMA3Base, Sched<[sched]>;
6484
6485  defm m: AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst),
6486          (ins _.RC:$src2, _.MemOp:$src3),
6487          OpcodeStr, "$src3, $src2", "$src2, $src3",
6488          (_.VT (OpNode _.RC:$src2, (_.LdFrag addr:$src3), _.RC:$src1)), 1, 0>,
6489          AVX512FMA3Base, Sched<[sched.Folded, sched.ReadAfterFold]>;
6490
6491  defm mb: AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst),
6492         (ins _.RC:$src2, _.ScalarMemOp:$src3),
6493         OpcodeStr, "${src3}"##_.BroadcastStr##", $src2",
6494         "$src2, ${src3}"##_.BroadcastStr,
6495         (_.VT (OpNode _.RC:$src2,
6496                      (_.VT (X86VBroadcast(_.ScalarLdFrag addr:$src3))),
6497                      _.RC:$src1)), 1, 0>, AVX512FMA3Base, EVEX_B,
6498         Sched<[sched.Folded, sched.ReadAfterFold]>;
6499  }
6500}
6501
6502multiclass avx512_fma3_231_round<bits<8> opc, string OpcodeStr, SDNode OpNode,
6503                                 X86FoldableSchedWrite sched,
6504                                 X86VectorVTInfo _, string Suff> {
6505  let Constraints = "$src1 = $dst", ExeDomain = _.ExeDomain, hasSideEffects = 0 in
6506  defm rb: AVX512_maskable_3src<opc, MRMSrcReg, _, (outs _.RC:$dst),
6507          (ins _.RC:$src2, _.RC:$src3, AVX512RC:$rc),
6508          OpcodeStr, "$rc, $src3, $src2", "$src2, $src3, $rc",
6509          (_.VT ( OpNode _.RC:$src2, _.RC:$src3, _.RC:$src1, (i32 timm:$rc))),
6510          1, 1, vselect, 1>,
6511          AVX512FMA3Base, EVEX_B, EVEX_RC, Sched<[sched]>;
6512}
6513
6514multiclass avx512_fma3p_231_common<bits<8> opc, string OpcodeStr, SDNode OpNode,
6515                                   SDNode OpNodeRnd, X86SchedWriteWidths sched,
6516                                   AVX512VLVectorVTInfo _, string Suff> {
6517  let Predicates = [HasAVX512] in {
6518    defm Z      : avx512_fma3p_231_rm<opc, OpcodeStr, OpNode, sched.ZMM,
6519                                      _.info512, Suff>,
6520                  avx512_fma3_231_round<opc, OpcodeStr, OpNodeRnd, sched.ZMM,
6521                                        _.info512, Suff>,
6522                              EVEX_V512, EVEX_CD8<_.info512.EltSize, CD8VF>;
6523  }
6524  let Predicates = [HasVLX, HasAVX512] in {
6525    defm Z256 : avx512_fma3p_231_rm<opc, OpcodeStr, OpNode, sched.YMM,
6526                                    _.info256, Suff>,
6527                      EVEX_V256, EVEX_CD8<_.info256.EltSize, CD8VF>;
6528    defm Z128 : avx512_fma3p_231_rm<opc, OpcodeStr, OpNode, sched.XMM,
6529                                    _.info128, Suff>,
6530                      EVEX_V128, EVEX_CD8<_.info128.EltSize, CD8VF>;
6531  }
6532}
6533
6534multiclass avx512_fma3p_231_f<bits<8> opc, string OpcodeStr, SDNode OpNode,
6535                              SDNode OpNodeRnd > {
6536    defm PS : avx512_fma3p_231_common<opc, OpcodeStr#"ps", OpNode, OpNodeRnd,
6537                                      SchedWriteFMA, avx512vl_f32_info, "PS">;
6538    defm PD : avx512_fma3p_231_common<opc, OpcodeStr#"pd", OpNode, OpNodeRnd,
6539                                      SchedWriteFMA, avx512vl_f64_info, "PD">,
6540                                      VEX_W;
6541}
6542
6543defm VFMADD231    : avx512_fma3p_231_f<0xB8, "vfmadd231", X86Fmadd, X86FmaddRnd>;
6544defm VFMSUB231    : avx512_fma3p_231_f<0xBA, "vfmsub231", X86Fmsub, X86FmsubRnd>;
6545defm VFMADDSUB231 : avx512_fma3p_231_f<0xB6, "vfmaddsub231", X86Fmaddsub, X86FmaddsubRnd>;
6546defm VFMSUBADD231 : avx512_fma3p_231_f<0xB7, "vfmsubadd231", X86Fmsubadd, X86FmsubaddRnd>;
6547defm VFNMADD231   : avx512_fma3p_231_f<0xBC, "vfnmadd231", X86Fnmadd, X86FnmaddRnd>;
6548defm VFNMSUB231   : avx512_fma3p_231_f<0xBE, "vfnmsub231", X86Fnmsub, X86FnmsubRnd>;
6549
6550multiclass avx512_fma3p_132_rm<bits<8> opc, string OpcodeStr, SDNode OpNode,
6551                               X86FoldableSchedWrite sched,
6552                               X86VectorVTInfo _, string Suff> {
6553  let Constraints = "$src1 = $dst", ExeDomain = _.ExeDomain, hasSideEffects = 0 in {
6554  defm r: AVX512_maskable_3src<opc, MRMSrcReg, _, (outs _.RC:$dst),
6555          (ins _.RC:$src2, _.RC:$src3),
6556          OpcodeStr, "$src3, $src2", "$src2, $src3",
6557          (_.VT (OpNode _.RC:$src1, _.RC:$src3, _.RC:$src2)), 1, 1, vselect, 1>,
6558          AVX512FMA3Base, Sched<[sched]>;
6559
6560  // Pattern is 312 order so that the load is in a different place from the
6561  // 213 and 231 patterns this helps tablegen's duplicate pattern detection.
6562  defm m: AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst),
6563          (ins _.RC:$src2, _.MemOp:$src3),
6564          OpcodeStr, "$src3, $src2", "$src2, $src3",
6565          (_.VT (OpNode (_.LdFrag addr:$src3), _.RC:$src1, _.RC:$src2)), 1, 0>,
6566          AVX512FMA3Base, Sched<[sched.Folded, sched.ReadAfterFold]>;
6567
6568  // Pattern is 312 order so that the load is in a different place from the
6569  // 213 and 231 patterns this helps tablegen's duplicate pattern detection.
6570  defm mb: AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst),
6571         (ins _.RC:$src2, _.ScalarMemOp:$src3),
6572         OpcodeStr, "${src3}"##_.BroadcastStr##", $src2",
6573         "$src2, ${src3}"##_.BroadcastStr,
6574         (_.VT (OpNode (_.VT (X86VBroadcast(_.ScalarLdFrag addr:$src3))),
6575                       _.RC:$src1, _.RC:$src2)), 1, 0>,
6576         AVX512FMA3Base, EVEX_B, Sched<[sched.Folded, sched.ReadAfterFold]>;
6577  }
6578}
6579
6580multiclass avx512_fma3_132_round<bits<8> opc, string OpcodeStr, SDNode OpNode,
6581                                 X86FoldableSchedWrite sched,
6582                                 X86VectorVTInfo _, string Suff> {
6583  let Constraints = "$src1 = $dst", ExeDomain = _.ExeDomain, hasSideEffects = 0 in
6584  defm rb: AVX512_maskable_3src<opc, MRMSrcReg, _, (outs _.RC:$dst),
6585          (ins _.RC:$src2, _.RC:$src3, AVX512RC:$rc),
6586          OpcodeStr, "$rc, $src3, $src2", "$src2, $src3, $rc",
6587          (_.VT ( OpNode _.RC:$src1, _.RC:$src3, _.RC:$src2, (i32 timm:$rc))),
6588          1, 1, vselect, 1>,
6589          AVX512FMA3Base, EVEX_B, EVEX_RC, Sched<[sched]>;
6590}
6591
6592multiclass avx512_fma3p_132_common<bits<8> opc, string OpcodeStr, SDNode OpNode,
6593                                   SDNode OpNodeRnd, X86SchedWriteWidths sched,
6594                                   AVX512VLVectorVTInfo _, string Suff> {
6595  let Predicates = [HasAVX512] in {
6596    defm Z      : avx512_fma3p_132_rm<opc, OpcodeStr, OpNode, sched.ZMM,
6597                                      _.info512, Suff>,
6598                  avx512_fma3_132_round<opc, OpcodeStr, OpNodeRnd, sched.ZMM,
6599                                        _.info512, Suff>,
6600                              EVEX_V512, EVEX_CD8<_.info512.EltSize, CD8VF>;
6601  }
6602  let Predicates = [HasVLX, HasAVX512] in {
6603    defm Z256 : avx512_fma3p_132_rm<opc, OpcodeStr, OpNode, sched.YMM,
6604                                    _.info256, Suff>,
6605                      EVEX_V256, EVEX_CD8<_.info256.EltSize, CD8VF>;
6606    defm Z128 : avx512_fma3p_132_rm<opc, OpcodeStr, OpNode, sched.XMM,
6607                                    _.info128, Suff>,
6608                      EVEX_V128, EVEX_CD8<_.info128.EltSize, CD8VF>;
6609  }
6610}
6611
6612multiclass avx512_fma3p_132_f<bits<8> opc, string OpcodeStr, SDNode OpNode,
6613                              SDNode OpNodeRnd > {
6614    defm PS : avx512_fma3p_132_common<opc, OpcodeStr#"ps", OpNode, OpNodeRnd,
6615                                      SchedWriteFMA, avx512vl_f32_info, "PS">;
6616    defm PD : avx512_fma3p_132_common<opc, OpcodeStr#"pd", OpNode, OpNodeRnd,
6617                                      SchedWriteFMA, avx512vl_f64_info, "PD">,
6618                                      VEX_W;
6619}
6620
6621defm VFMADD132    : avx512_fma3p_132_f<0x98, "vfmadd132", X86Fmadd, X86FmaddRnd>;
6622defm VFMSUB132    : avx512_fma3p_132_f<0x9A, "vfmsub132", X86Fmsub, X86FmsubRnd>;
6623defm VFMADDSUB132 : avx512_fma3p_132_f<0x96, "vfmaddsub132", X86Fmaddsub, X86FmaddsubRnd>;
6624defm VFMSUBADD132 : avx512_fma3p_132_f<0x97, "vfmsubadd132", X86Fmsubadd, X86FmsubaddRnd>;
6625defm VFNMADD132   : avx512_fma3p_132_f<0x9C, "vfnmadd132", X86Fnmadd, X86FnmaddRnd>;
6626defm VFNMSUB132   : avx512_fma3p_132_f<0x9E, "vfnmsub132", X86Fnmsub, X86FnmsubRnd>;
6627
6628// Scalar FMA
6629multiclass avx512_fma3s_common<bits<8> opc, string OpcodeStr, X86VectorVTInfo _,
6630                               dag RHS_r, dag RHS_m, dag RHS_b, bit MaskOnlyReg> {
6631let Constraints = "$src1 = $dst", hasSideEffects = 0 in {
6632  defm r_Int: AVX512_maskable_3src_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
6633          (ins _.RC:$src2, _.RC:$src3), OpcodeStr,
6634          "$src3, $src2", "$src2, $src3", (null_frag), 1, 1>,
6635          AVX512FMA3Base, Sched<[SchedWriteFMA.Scl]>;
6636
6637  let mayLoad = 1 in
6638  defm m_Int: AVX512_maskable_3src_scalar<opc, MRMSrcMem, _, (outs _.RC:$dst),
6639          (ins _.RC:$src2, _.IntScalarMemOp:$src3), OpcodeStr,
6640          "$src3, $src2", "$src2, $src3", (null_frag), 1, 1>,
6641          AVX512FMA3Base, Sched<[SchedWriteFMA.Scl.Folded, SchedWriteFMA.Scl.ReadAfterFold]>;
6642
6643  defm rb_Int: AVX512_maskable_3src_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
6644         (ins _.RC:$src2, _.RC:$src3, AVX512RC:$rc),
6645         OpcodeStr, "$rc, $src3, $src2", "$src2, $src3, $rc", (null_frag), 1, 1>,
6646         AVX512FMA3Base, EVEX_B, EVEX_RC, Sched<[SchedWriteFMA.Scl]>;
6647
6648  let isCodeGenOnly = 1, isCommutable = 1 in {
6649    def r     : AVX512FMA3S<opc, MRMSrcReg, (outs _.FRC:$dst),
6650                     (ins _.FRC:$src1, _.FRC:$src2, _.FRC:$src3),
6651                     !strconcat(OpcodeStr,
6652                              "\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
6653                     !if(MaskOnlyReg, [], [RHS_r])>, Sched<[SchedWriteFMA.Scl]>;
6654    def m     : AVX512FMA3S<opc, MRMSrcMem, (outs _.FRC:$dst),
6655                    (ins _.FRC:$src1, _.FRC:$src2, _.ScalarMemOp:$src3),
6656                    !strconcat(OpcodeStr,
6657                               "\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
6658                    [RHS_m]>, Sched<[SchedWriteFMA.Scl.Folded, SchedWriteFMA.Scl.ReadAfterFold]>;
6659
6660    def rb    : AVX512FMA3S<opc, MRMSrcReg, (outs _.FRC:$dst),
6661                     (ins _.FRC:$src1, _.FRC:$src2, _.FRC:$src3, AVX512RC:$rc),
6662                     !strconcat(OpcodeStr,
6663                              "\t{$rc, $src3, $src2, $dst|$dst, $src2, $src3, $rc}"),
6664                     !if(MaskOnlyReg, [], [RHS_b])>, EVEX_B, EVEX_RC,
6665                     Sched<[SchedWriteFMA.Scl]>;
6666  }// isCodeGenOnly = 1
6667}// Constraints = "$src1 = $dst"
6668}
6669
6670multiclass avx512_fma3s_all<bits<8> opc213, bits<8> opc231, bits<8> opc132,
6671                            string OpcodeStr, SDNode OpNode, SDNode OpNodeRnd,
6672                            X86VectorVTInfo _, string SUFF> {
6673  let ExeDomain = _.ExeDomain in {
6674  defm NAME#213#SUFF#Z: avx512_fma3s_common<opc213, OpcodeStr#"213"#_.Suffix, _,
6675                // Operands for intrinsic are in 123 order to preserve passthu
6676                // semantics.
6677                (set _.FRC:$dst, (_.EltVT (OpNode _.FRC:$src2, _.FRC:$src1,
6678                         _.FRC:$src3))),
6679                (set _.FRC:$dst, (_.EltVT (OpNode _.FRC:$src2, _.FRC:$src1,
6680                         (_.ScalarLdFrag addr:$src3)))),
6681                (set _.FRC:$dst, (_.EltVT (OpNodeRnd _.FRC:$src2, _.FRC:$src1,
6682                         _.FRC:$src3, (i32 timm:$rc)))), 0>;
6683
6684  defm NAME#231#SUFF#Z: avx512_fma3s_common<opc231, OpcodeStr#"231"#_.Suffix, _,
6685                (set _.FRC:$dst, (_.EltVT (OpNode _.FRC:$src2, _.FRC:$src3,
6686                                          _.FRC:$src1))),
6687                (set _.FRC:$dst, (_.EltVT (OpNode _.FRC:$src2,
6688                            (_.ScalarLdFrag addr:$src3), _.FRC:$src1))),
6689                (set _.FRC:$dst, (_.EltVT (OpNodeRnd _.FRC:$src2, _.FRC:$src3,
6690                         _.FRC:$src1, (i32 timm:$rc)))), 1>;
6691
6692  // One pattern is 312 order so that the load is in a different place from the
6693  // 213 and 231 patterns this helps tablegen's duplicate pattern detection.
6694  defm NAME#132#SUFF#Z: avx512_fma3s_common<opc132, OpcodeStr#"132"#_.Suffix, _,
6695                (set _.FRC:$dst, (_.EltVT (OpNode _.FRC:$src1, _.FRC:$src3,
6696                         _.FRC:$src2))),
6697                (set _.FRC:$dst, (_.EltVT (OpNode (_.ScalarLdFrag addr:$src3),
6698                                 _.FRC:$src1, _.FRC:$src2))),
6699                (set _.FRC:$dst, (_.EltVT (OpNodeRnd _.FRC:$src1, _.FRC:$src3,
6700                         _.FRC:$src2, (i32 timm:$rc)))), 1>;
6701  }
6702}
6703
6704multiclass avx512_fma3s<bits<8> opc213, bits<8> opc231, bits<8> opc132,
6705                        string OpcodeStr, SDNode OpNode, SDNode OpNodeRnd> {
6706  let Predicates = [HasAVX512] in {
6707    defm NAME : avx512_fma3s_all<opc213, opc231, opc132, OpcodeStr, OpNode,
6708                                 OpNodeRnd, f32x_info, "SS">,
6709                                 EVEX_CD8<32, CD8VT1>, VEX_LIG;
6710    defm NAME : avx512_fma3s_all<opc213, opc231, opc132, OpcodeStr, OpNode,
6711                                 OpNodeRnd, f64x_info, "SD">,
6712                                 EVEX_CD8<64, CD8VT1>, VEX_LIG, VEX_W;
6713  }
6714}
6715
6716defm VFMADD  : avx512_fma3s<0xA9, 0xB9, 0x99, "vfmadd", X86Fmadd, X86FmaddRnd>;
6717defm VFMSUB  : avx512_fma3s<0xAB, 0xBB, 0x9B, "vfmsub", X86Fmsub, X86FmsubRnd>;
6718defm VFNMADD : avx512_fma3s<0xAD, 0xBD, 0x9D, "vfnmadd", X86Fnmadd, X86FnmaddRnd>;
6719defm VFNMSUB : avx512_fma3s<0xAF, 0xBF, 0x9F, "vfnmsub", X86Fnmsub, X86FnmsubRnd>;
6720
6721multiclass avx512_scalar_fma_patterns<SDNode Op, SDNode RndOp, string Prefix,
6722                                      string Suffix, SDNode Move,
6723                                      X86VectorVTInfo _, PatLeaf ZeroFP> {
6724  let Predicates = [HasAVX512] in {
6725    def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector
6726                (Op _.FRC:$src2,
6727                    (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0))),
6728                    _.FRC:$src3))))),
6729              (!cast<I>(Prefix#"213"#Suffix#"Zr_Int")
6730               VR128X:$src1, (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)),
6731               (_.VT (COPY_TO_REGCLASS _.FRC:$src3, VR128X)))>;
6732
6733    def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector
6734                (Op _.FRC:$src2, _.FRC:$src3,
6735                    (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0)))))))),
6736              (!cast<I>(Prefix#"231"#Suffix#"Zr_Int")
6737               VR128X:$src1, (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)),
6738               (_.VT (COPY_TO_REGCLASS _.FRC:$src3, VR128X)))>;
6739
6740    def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector
6741                (Op _.FRC:$src2,
6742                    (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0))),
6743                    (_.ScalarLdFrag addr:$src3)))))),
6744              (!cast<I>(Prefix#"213"#Suffix#"Zm_Int")
6745               VR128X:$src1, (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)),
6746               addr:$src3)>;
6747
6748    def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector
6749                (Op (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0))),
6750                    (_.ScalarLdFrag addr:$src3), _.FRC:$src2))))),
6751              (!cast<I>(Prefix#"132"#Suffix#"Zm_Int")
6752               VR128X:$src1, (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)),
6753               addr:$src3)>;
6754
6755    def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector
6756                (Op _.FRC:$src2, (_.ScalarLdFrag addr:$src3),
6757                    (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0)))))))),
6758              (!cast<I>(Prefix#"231"#Suffix#"Zm_Int")
6759               VR128X:$src1, (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)),
6760               addr:$src3)>;
6761
6762    def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector
6763               (X86selects VK1WM:$mask,
6764                (Op _.FRC:$src2,
6765                    (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0))),
6766                    _.FRC:$src3),
6767                (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0)))))))),
6768              (!cast<I>(Prefix#"213"#Suffix#"Zr_Intk")
6769               VR128X:$src1, VK1WM:$mask,
6770               (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)),
6771               (_.VT (COPY_TO_REGCLASS _.FRC:$src3, VR128X)))>;
6772
6773    def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector
6774               (X86selects VK1WM:$mask,
6775                (Op _.FRC:$src2,
6776                    (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0))),
6777                    (_.ScalarLdFrag addr:$src3)),
6778                (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0)))))))),
6779              (!cast<I>(Prefix#"213"#Suffix#"Zm_Intk")
6780               VR128X:$src1, VK1WM:$mask,
6781               (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)), addr:$src3)>;
6782
6783    def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector
6784               (X86selects VK1WM:$mask,
6785                (Op (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0))),
6786                    (_.ScalarLdFrag addr:$src3), _.FRC:$src2),
6787                (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0)))))))),
6788              (!cast<I>(Prefix#"132"#Suffix#"Zm_Intk")
6789               VR128X:$src1, VK1WM:$mask,
6790               (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)), addr:$src3)>;
6791
6792    def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector
6793               (X86selects VK1WM:$mask,
6794                (Op _.FRC:$src2, _.FRC:$src3,
6795                    (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0)))),
6796                (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0)))))))),
6797              (!cast<I>(Prefix#"231"#Suffix#"Zr_Intk")
6798               VR128X:$src1, VK1WM:$mask,
6799               (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)),
6800               (_.VT (COPY_TO_REGCLASS _.FRC:$src3, VR128X)))>;
6801
6802    def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector
6803               (X86selects VK1WM:$mask,
6804                (Op _.FRC:$src2, (_.ScalarLdFrag addr:$src3),
6805                    (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0)))),
6806                (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0)))))))),
6807              (!cast<I>(Prefix#"231"#Suffix#"Zm_Intk")
6808               VR128X:$src1, VK1WM:$mask,
6809               (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)), addr:$src3)>;
6810
6811    def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector
6812               (X86selects VK1WM:$mask,
6813                (Op _.FRC:$src2,
6814                    (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0))),
6815                    _.FRC:$src3),
6816                (_.EltVT ZeroFP)))))),
6817              (!cast<I>(Prefix#"213"#Suffix#"Zr_Intkz")
6818               VR128X:$src1, VK1WM:$mask,
6819               (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)),
6820               (_.VT (COPY_TO_REGCLASS _.FRC:$src3, VR128X)))>;
6821
6822    def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector
6823               (X86selects VK1WM:$mask,
6824                (Op _.FRC:$src2, _.FRC:$src3,
6825                    (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0)))),
6826                (_.EltVT ZeroFP)))))),
6827              (!cast<I>(Prefix#"231"#Suffix#"Zr_Intkz")
6828               VR128X:$src1, VK1WM:$mask,
6829               (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)),
6830               (_.VT (COPY_TO_REGCLASS _.FRC:$src3, VR128X)))>;
6831
6832    def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector
6833               (X86selects VK1WM:$mask,
6834                (Op _.FRC:$src2,
6835                    (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0))),
6836                    (_.ScalarLdFrag addr:$src3)),
6837                (_.EltVT ZeroFP)))))),
6838              (!cast<I>(Prefix#"213"#Suffix#"Zm_Intkz")
6839               VR128X:$src1, VK1WM:$mask,
6840               (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)), addr:$src3)>;
6841
6842    def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector
6843               (X86selects VK1WM:$mask,
6844                (Op (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0))),
6845                    _.FRC:$src2, (_.ScalarLdFrag addr:$src3)),
6846                (_.EltVT ZeroFP)))))),
6847              (!cast<I>(Prefix#"132"#Suffix#"Zm_Intkz")
6848               VR128X:$src1, VK1WM:$mask,
6849               (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)), addr:$src3)>;
6850
6851    def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector
6852               (X86selects VK1WM:$mask,
6853                (Op _.FRC:$src2, (_.ScalarLdFrag addr:$src3),
6854                    (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0)))),
6855                (_.EltVT ZeroFP)))))),
6856              (!cast<I>(Prefix#"231"#Suffix#"Zm_Intkz")
6857               VR128X:$src1, VK1WM:$mask,
6858               (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)), addr:$src3)>;
6859
6860    // Patterns with rounding mode.
6861    def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector
6862                (RndOp _.FRC:$src2,
6863                       (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0))),
6864                       _.FRC:$src3, (i32 timm:$rc)))))),
6865              (!cast<I>(Prefix#"213"#Suffix#"Zrb_Int")
6866               VR128X:$src1, (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)),
6867               (_.VT (COPY_TO_REGCLASS _.FRC:$src3, VR128X)), AVX512RC:$rc)>;
6868
6869    def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector
6870                (RndOp _.FRC:$src2, _.FRC:$src3,
6871                       (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0))),
6872                       (i32 timm:$rc)))))),
6873              (!cast<I>(Prefix#"231"#Suffix#"Zrb_Int")
6874               VR128X:$src1, (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)),
6875               (_.VT (COPY_TO_REGCLASS _.FRC:$src3, VR128X)), AVX512RC:$rc)>;
6876
6877    def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector
6878               (X86selects VK1WM:$mask,
6879                (RndOp _.FRC:$src2,
6880                       (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0))),
6881                       _.FRC:$src3, (i32 timm:$rc)),
6882                (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0)))))))),
6883              (!cast<I>(Prefix#"213"#Suffix#"Zrb_Intk")
6884               VR128X:$src1, VK1WM:$mask,
6885               (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)),
6886               (_.VT (COPY_TO_REGCLASS _.FRC:$src3, VR128X)), AVX512RC:$rc)>;
6887
6888    def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector
6889               (X86selects VK1WM:$mask,
6890                (RndOp _.FRC:$src2, _.FRC:$src3,
6891                       (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0))),
6892                       (i32 timm:$rc)),
6893                (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0)))))))),
6894              (!cast<I>(Prefix#"231"#Suffix#"Zrb_Intk")
6895               VR128X:$src1, VK1WM:$mask,
6896               (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)),
6897               (_.VT (COPY_TO_REGCLASS _.FRC:$src3, VR128X)), AVX512RC:$rc)>;
6898
6899    def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector
6900               (X86selects VK1WM:$mask,
6901                (RndOp _.FRC:$src2,
6902                       (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0))),
6903                       _.FRC:$src3, (i32 timm:$rc)),
6904                (_.EltVT ZeroFP)))))),
6905              (!cast<I>(Prefix#"213"#Suffix#"Zrb_Intkz")
6906               VR128X:$src1, VK1WM:$mask,
6907               (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)),
6908               (_.VT (COPY_TO_REGCLASS _.FRC:$src3, VR128X)), AVX512RC:$rc)>;
6909
6910    def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector
6911               (X86selects VK1WM:$mask,
6912                (RndOp _.FRC:$src2, _.FRC:$src3,
6913                       (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0))),
6914                       (i32 timm:$rc)),
6915                (_.EltVT ZeroFP)))))),
6916              (!cast<I>(Prefix#"231"#Suffix#"Zrb_Intkz")
6917               VR128X:$src1, VK1WM:$mask,
6918               (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)),
6919               (_.VT (COPY_TO_REGCLASS _.FRC:$src3, VR128X)), AVX512RC:$rc)>;
6920  }
6921}
6922
6923defm : avx512_scalar_fma_patterns<X86Fmadd, X86FmaddRnd, "VFMADD", "SS",
6924                                  X86Movss, v4f32x_info, fp32imm0>;
6925defm : avx512_scalar_fma_patterns<X86Fmsub, X86FmsubRnd, "VFMSUB", "SS",
6926                                  X86Movss, v4f32x_info, fp32imm0>;
6927defm : avx512_scalar_fma_patterns<X86Fnmadd, X86FnmaddRnd, "VFNMADD", "SS",
6928                                  X86Movss, v4f32x_info, fp32imm0>;
6929defm : avx512_scalar_fma_patterns<X86Fnmsub, X86FnmsubRnd, "VFNMSUB", "SS",
6930                                  X86Movss, v4f32x_info, fp32imm0>;
6931
6932defm : avx512_scalar_fma_patterns<X86Fmadd, X86FmaddRnd, "VFMADD", "SD",
6933                                  X86Movsd, v2f64x_info, fp64imm0>;
6934defm : avx512_scalar_fma_patterns<X86Fmsub, X86FmsubRnd, "VFMSUB", "SD",
6935                                  X86Movsd, v2f64x_info, fp64imm0>;
6936defm : avx512_scalar_fma_patterns<X86Fnmadd, X86FnmaddRnd, "VFNMADD", "SD",
6937                                  X86Movsd, v2f64x_info, fp64imm0>;
6938defm : avx512_scalar_fma_patterns<X86Fnmsub, X86FnmsubRnd, "VFNMSUB", "SD",
6939                                  X86Movsd, v2f64x_info, fp64imm0>;
6940
6941//===----------------------------------------------------------------------===//
6942// AVX-512  Packed Multiply of Unsigned 52-bit Integers and Add the Low 52-bit IFMA
6943//===----------------------------------------------------------------------===//
6944let Constraints = "$src1 = $dst" in {
6945multiclass avx512_pmadd52_rm<bits<8> opc, string OpcodeStr, SDNode OpNode,
6946                             X86FoldableSchedWrite sched, X86VectorVTInfo _> {
6947  // NOTE: The SDNode have the multiply operands first with the add last.
6948  // This enables commuted load patterns to be autogenerated by tablegen.
6949  let ExeDomain = _.ExeDomain in {
6950  defm r: AVX512_maskable_3src<opc, MRMSrcReg, _, (outs _.RC:$dst),
6951          (ins _.RC:$src2, _.RC:$src3),
6952          OpcodeStr, "$src3, $src2", "$src2, $src3",
6953          (_.VT (OpNode _.RC:$src2, _.RC:$src3, _.RC:$src1)), 1, 1>,
6954         AVX512FMA3Base, Sched<[sched]>;
6955
6956  defm m: AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst),
6957          (ins _.RC:$src2, _.MemOp:$src3),
6958          OpcodeStr, "$src3, $src2", "$src2, $src3",
6959          (_.VT (OpNode _.RC:$src2, (_.LdFrag addr:$src3), _.RC:$src1))>,
6960          AVX512FMA3Base, Sched<[sched.Folded, sched.ReadAfterFold]>;
6961
6962  defm mb: AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst),
6963            (ins _.RC:$src2, _.ScalarMemOp:$src3),
6964            OpcodeStr,   !strconcat("${src3}", _.BroadcastStr,", $src2"),
6965            !strconcat("$src2, ${src3}", _.BroadcastStr ),
6966            (OpNode _.RC:$src2,
6967                    (_.VT (X86VBroadcast (_.ScalarLdFrag addr:$src3))),
6968                    _.RC:$src1)>,
6969            AVX512FMA3Base, EVEX_B, Sched<[sched.Folded, sched.ReadAfterFold]>;
6970  }
6971}
6972} // Constraints = "$src1 = $dst"
6973
6974multiclass avx512_pmadd52_common<bits<8> opc, string OpcodeStr, SDNode OpNode,
6975                                 X86SchedWriteWidths sched, AVX512VLVectorVTInfo _> {
6976  let Predicates = [HasIFMA] in {
6977    defm Z      : avx512_pmadd52_rm<opc, OpcodeStr, OpNode, sched.ZMM, _.info512>,
6978                      EVEX_V512, EVEX_CD8<_.info512.EltSize, CD8VF>;
6979  }
6980  let Predicates = [HasVLX, HasIFMA] in {
6981    defm Z256 : avx512_pmadd52_rm<opc, OpcodeStr, OpNode, sched.YMM, _.info256>,
6982                      EVEX_V256, EVEX_CD8<_.info256.EltSize, CD8VF>;
6983    defm Z128 : avx512_pmadd52_rm<opc, OpcodeStr, OpNode, sched.XMM, _.info128>,
6984                      EVEX_V128, EVEX_CD8<_.info128.EltSize, CD8VF>;
6985  }
6986}
6987
6988defm VPMADD52LUQ : avx512_pmadd52_common<0xb4, "vpmadd52luq", x86vpmadd52l,
6989                                         SchedWriteVecIMul, avx512vl_i64_info>,
6990                                         VEX_W;
6991defm VPMADD52HUQ : avx512_pmadd52_common<0xb5, "vpmadd52huq", x86vpmadd52h,
6992                                         SchedWriteVecIMul, avx512vl_i64_info>,
6993                                         VEX_W;
6994
6995//===----------------------------------------------------------------------===//
6996// AVX-512  Scalar convert from sign integer to float/double
6997//===----------------------------------------------------------------------===//
6998
6999multiclass avx512_vcvtsi<bits<8> opc, SDPatternOperator OpNode, X86FoldableSchedWrite sched,
7000                    RegisterClass SrcRC, X86VectorVTInfo DstVT,
7001                    X86MemOperand x86memop, PatFrag ld_frag, string asm,
7002                    string mem> {
7003  let hasSideEffects = 0, isCodeGenOnly = 1 in {
7004    def rr : SI<opc, MRMSrcReg, (outs DstVT.FRC:$dst),
7005              (ins DstVT.FRC:$src1, SrcRC:$src),
7006              !strconcat(asm,"\t{$src, $src1, $dst|$dst, $src1, $src}"), []>,
7007              EVEX_4V, Sched<[sched, ReadDefault, ReadInt2Fpu]>;
7008    let mayLoad = 1 in
7009      def rm : SI<opc, MRMSrcMem, (outs DstVT.FRC:$dst),
7010              (ins DstVT.FRC:$src1, x86memop:$src),
7011              asm#"{"#mem#"}\t{$src, $src1, $dst|$dst, $src1, $src}", []>,
7012              EVEX_4V, Sched<[sched.Folded, sched.ReadAfterFold]>;
7013  } // hasSideEffects = 0
7014  def rr_Int : SI<opc, MRMSrcReg, (outs DstVT.RC:$dst),
7015                (ins DstVT.RC:$src1, SrcRC:$src2),
7016                !strconcat(asm,"\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
7017                [(set DstVT.RC:$dst,
7018                      (OpNode (DstVT.VT DstVT.RC:$src1), SrcRC:$src2))]>,
7019               EVEX_4V, Sched<[sched, ReadDefault, ReadInt2Fpu]>;
7020
7021  def rm_Int : SI<opc, MRMSrcMem, (outs DstVT.RC:$dst),
7022                (ins DstVT.RC:$src1, x86memop:$src2),
7023                asm#"{"#mem#"}\t{$src2, $src1, $dst|$dst, $src1, $src2}",
7024                [(set DstVT.RC:$dst,
7025                      (OpNode (DstVT.VT DstVT.RC:$src1),
7026                               (ld_frag addr:$src2)))]>,
7027                EVEX_4V, Sched<[sched.Folded, sched.ReadAfterFold]>;
7028  def : InstAlias<"v"#asm#mem#"\t{$src2, $src1, $dst|$dst, $src1, $src2}",
7029                  (!cast<Instruction>(NAME#"rr_Int") DstVT.RC:$dst,
7030                  DstVT.RC:$src1, SrcRC:$src2), 0, "att">;
7031}
7032
7033multiclass avx512_vcvtsi_round<bits<8> opc, SDNode OpNode,
7034                               X86FoldableSchedWrite sched, RegisterClass SrcRC,
7035                               X86VectorVTInfo DstVT, string asm,
7036                               string mem> {
7037  def rrb_Int : SI<opc, MRMSrcReg, (outs DstVT.RC:$dst),
7038              (ins DstVT.RC:$src1, SrcRC:$src2, AVX512RC:$rc),
7039              !strconcat(asm,
7040                  "\t{$src2, $rc, $src1, $dst|$dst, $src1, $rc, $src2}"),
7041              [(set DstVT.RC:$dst,
7042                    (OpNode (DstVT.VT DstVT.RC:$src1),
7043                             SrcRC:$src2,
7044                             (i32 timm:$rc)))]>,
7045              EVEX_4V, EVEX_B, EVEX_RC, Sched<[sched, ReadDefault, ReadInt2Fpu]>;
7046  def : InstAlias<"v"#asm#mem#"\t{$src2, $rc, $src1, $dst|$dst, $src1, $rc, $src2}",
7047                  (!cast<Instruction>(NAME#"rrb_Int") DstVT.RC:$dst,
7048                  DstVT.RC:$src1, SrcRC:$src2, AVX512RC:$rc), 0, "att">;
7049}
7050
7051multiclass avx512_vcvtsi_common<bits<8> opc, SDNode OpNode, SDNode OpNodeRnd,
7052                                X86FoldableSchedWrite sched,
7053                                RegisterClass SrcRC, X86VectorVTInfo DstVT,
7054                                X86MemOperand x86memop, PatFrag ld_frag,
7055                                string asm, string mem> {
7056  defm NAME : avx512_vcvtsi_round<opc, OpNodeRnd, sched, SrcRC, DstVT, asm, mem>,
7057              avx512_vcvtsi<opc, OpNode, sched, SrcRC, DstVT, x86memop,
7058                            ld_frag, asm, mem>, VEX_LIG;
7059}
7060
7061let Predicates = [HasAVX512] in {
7062defm VCVTSI2SSZ  : avx512_vcvtsi_common<0x2A, X86SintToFp, X86SintToFpRnd,
7063                                 WriteCvtI2SS, GR32,
7064                                 v4f32x_info, i32mem, loadi32, "cvtsi2ss", "l">,
7065                                 XS, EVEX_CD8<32, CD8VT1>;
7066defm VCVTSI642SSZ: avx512_vcvtsi_common<0x2A, X86SintToFp, X86SintToFpRnd,
7067                                 WriteCvtI2SS, GR64,
7068                                 v4f32x_info, i64mem, loadi64, "cvtsi2ss", "q">,
7069                                 XS, VEX_W, EVEX_CD8<64, CD8VT1>;
7070defm VCVTSI2SDZ  : avx512_vcvtsi<0x2A, null_frag, WriteCvtI2SD, GR32,
7071                                 v2f64x_info, i32mem, loadi32, "cvtsi2sd", "l">,
7072                                 XD, VEX_LIG, EVEX_CD8<32, CD8VT1>;
7073defm VCVTSI642SDZ: avx512_vcvtsi_common<0x2A, X86SintToFp, X86SintToFpRnd,
7074                                 WriteCvtI2SD, GR64,
7075                                 v2f64x_info, i64mem, loadi64, "cvtsi2sd", "q">,
7076                                 XD, VEX_W, EVEX_CD8<64, CD8VT1>;
7077
7078def : InstAlias<"vcvtsi2ss\t{$src, $src1, $dst|$dst, $src1, $src}",
7079              (VCVTSI2SSZrm_Int VR128X:$dst, VR128X:$src1, i32mem:$src), 0, "att">;
7080def : InstAlias<"vcvtsi2sd\t{$src, $src1, $dst|$dst, $src1, $src}",
7081              (VCVTSI2SDZrm_Int VR128X:$dst, VR128X:$src1, i32mem:$src), 0, "att">;
7082
7083def : Pat<(f32 (sint_to_fp (loadi32 addr:$src))),
7084          (VCVTSI2SSZrm (f32 (IMPLICIT_DEF)), addr:$src)>;
7085def : Pat<(f32 (sint_to_fp (loadi64 addr:$src))),
7086          (VCVTSI642SSZrm (f32 (IMPLICIT_DEF)), addr:$src)>;
7087def : Pat<(f64 (sint_to_fp (loadi32 addr:$src))),
7088          (VCVTSI2SDZrm (f64 (IMPLICIT_DEF)), addr:$src)>;
7089def : Pat<(f64 (sint_to_fp (loadi64 addr:$src))),
7090          (VCVTSI642SDZrm (f64 (IMPLICIT_DEF)), addr:$src)>;
7091
7092def : Pat<(f32 (sint_to_fp GR32:$src)),
7093          (VCVTSI2SSZrr (f32 (IMPLICIT_DEF)), GR32:$src)>;
7094def : Pat<(f32 (sint_to_fp GR64:$src)),
7095          (VCVTSI642SSZrr (f32 (IMPLICIT_DEF)), GR64:$src)>;
7096def : Pat<(f64 (sint_to_fp GR32:$src)),
7097          (VCVTSI2SDZrr (f64 (IMPLICIT_DEF)), GR32:$src)>;
7098def : Pat<(f64 (sint_to_fp GR64:$src)),
7099          (VCVTSI642SDZrr (f64 (IMPLICIT_DEF)), GR64:$src)>;
7100
7101defm VCVTUSI2SSZ   : avx512_vcvtsi_common<0x7B, X86UintToFp, X86UintToFpRnd,
7102                                  WriteCvtI2SS, GR32,
7103                                  v4f32x_info, i32mem, loadi32,
7104                                  "cvtusi2ss", "l">, XS, EVEX_CD8<32, CD8VT1>;
7105defm VCVTUSI642SSZ : avx512_vcvtsi_common<0x7B, X86UintToFp, X86UintToFpRnd,
7106                                  WriteCvtI2SS, GR64,
7107                                  v4f32x_info, i64mem, loadi64, "cvtusi2ss", "q">,
7108                                  XS, VEX_W, EVEX_CD8<64, CD8VT1>;
7109defm VCVTUSI2SDZ   : avx512_vcvtsi<0x7B, null_frag, WriteCvtI2SD, GR32, v2f64x_info,
7110                                  i32mem, loadi32, "cvtusi2sd", "l">,
7111                                  XD, VEX_LIG, EVEX_CD8<32, CD8VT1>;
7112defm VCVTUSI642SDZ : avx512_vcvtsi_common<0x7B, X86UintToFp, X86UintToFpRnd,
7113                                  WriteCvtI2SD, GR64,
7114                                  v2f64x_info, i64mem, loadi64, "cvtusi2sd", "q">,
7115                                  XD, VEX_W, EVEX_CD8<64, CD8VT1>;
7116
7117def : InstAlias<"vcvtusi2ss\t{$src, $src1, $dst|$dst, $src1, $src}",
7118              (VCVTUSI2SSZrm_Int VR128X:$dst, VR128X:$src1, i32mem:$src), 0, "att">;
7119def : InstAlias<"vcvtusi2sd\t{$src, $src1, $dst|$dst, $src1, $src}",
7120              (VCVTUSI2SDZrm_Int VR128X:$dst, VR128X:$src1, i32mem:$src), 0, "att">;
7121
7122def : Pat<(f32 (uint_to_fp (loadi32 addr:$src))),
7123          (VCVTUSI2SSZrm (f32 (IMPLICIT_DEF)), addr:$src)>;
7124def : Pat<(f32 (uint_to_fp (loadi64 addr:$src))),
7125          (VCVTUSI642SSZrm (f32 (IMPLICIT_DEF)), addr:$src)>;
7126def : Pat<(f64 (uint_to_fp (loadi32 addr:$src))),
7127          (VCVTUSI2SDZrm (f64 (IMPLICIT_DEF)), addr:$src)>;
7128def : Pat<(f64 (uint_to_fp (loadi64 addr:$src))),
7129          (VCVTUSI642SDZrm (f64 (IMPLICIT_DEF)), addr:$src)>;
7130
7131def : Pat<(f32 (uint_to_fp GR32:$src)),
7132          (VCVTUSI2SSZrr (f32 (IMPLICIT_DEF)), GR32:$src)>;
7133def : Pat<(f32 (uint_to_fp GR64:$src)),
7134          (VCVTUSI642SSZrr (f32 (IMPLICIT_DEF)), GR64:$src)>;
7135def : Pat<(f64 (uint_to_fp GR32:$src)),
7136          (VCVTUSI2SDZrr (f64 (IMPLICIT_DEF)), GR32:$src)>;
7137def : Pat<(f64 (uint_to_fp GR64:$src)),
7138          (VCVTUSI642SDZrr (f64 (IMPLICIT_DEF)), GR64:$src)>;
7139}
7140
7141//===----------------------------------------------------------------------===//
7142// AVX-512  Scalar convert from float/double to integer
7143//===----------------------------------------------------------------------===//
7144
7145multiclass avx512_cvt_s_int_round<bits<8> opc, X86VectorVTInfo SrcVT,
7146                                  X86VectorVTInfo DstVT, SDNode OpNode,
7147                                  SDNode OpNodeRnd,
7148                                  X86FoldableSchedWrite sched, string asm,
7149                                  string aliasStr> {
7150  let Predicates = [HasAVX512] in {
7151    def rr_Int : SI<opc, MRMSrcReg, (outs DstVT.RC:$dst), (ins SrcVT.RC:$src),
7152                !strconcat(asm,"\t{$src, $dst|$dst, $src}"),
7153                [(set DstVT.RC:$dst, (OpNode (SrcVT.VT SrcVT.RC:$src)))]>,
7154                EVEX, VEX_LIG, Sched<[sched]>;
7155    def rrb_Int : SI<opc, MRMSrcReg, (outs DstVT.RC:$dst), (ins SrcVT.RC:$src, AVX512RC:$rc),
7156                 !strconcat(asm,"\t{$rc, $src, $dst|$dst, $src, $rc}"),
7157                 [(set DstVT.RC:$dst, (OpNodeRnd (SrcVT.VT SrcVT.RC:$src),(i32 timm:$rc)))]>,
7158                 EVEX, VEX_LIG, EVEX_B, EVEX_RC,
7159                 Sched<[sched]>;
7160    def rm_Int : SI<opc, MRMSrcMem, (outs DstVT.RC:$dst), (ins SrcVT.IntScalarMemOp:$src),
7161                !strconcat(asm,"\t{$src, $dst|$dst, $src}"),
7162                [(set DstVT.RC:$dst, (OpNode
7163                      (SrcVT.VT SrcVT.ScalarIntMemCPat:$src)))]>,
7164                EVEX, VEX_LIG, Sched<[sched.Folded, sched.ReadAfterFold]>;
7165  } // Predicates = [HasAVX512]
7166
7167  def : InstAlias<"v" # asm # aliasStr # "\t{$src, $dst|$dst, $src}",
7168          (!cast<Instruction>(NAME # "rr_Int") DstVT.RC:$dst, SrcVT.RC:$src), 0, "att">;
7169  def : InstAlias<"v" # asm # aliasStr # "\t{$rc, $src, $dst|$dst, $src, $rc}",
7170          (!cast<Instruction>(NAME # "rrb_Int") DstVT.RC:$dst, SrcVT.RC:$src, AVX512RC:$rc), 0, "att">;
7171  def : InstAlias<"v" # asm # aliasStr # "\t{$src, $dst|$dst, $src}",
7172          (!cast<Instruction>(NAME # "rm_Int") DstVT.RC:$dst,
7173                                          SrcVT.IntScalarMemOp:$src), 0, "att">;
7174}
7175
7176// Convert float/double to signed/unsigned int 32/64
7177defm VCVTSS2SIZ: avx512_cvt_s_int_round<0x2D, f32x_info, i32x_info,X86cvts2si,
7178                                   X86cvts2siRnd, WriteCvtSS2I, "cvtss2si", "{l}">,
7179                                   XS, EVEX_CD8<32, CD8VT1>;
7180defm VCVTSS2SI64Z: avx512_cvt_s_int_round<0x2D, f32x_info, i64x_info, X86cvts2si,
7181                                   X86cvts2siRnd, WriteCvtSS2I, "cvtss2si", "{q}">,
7182                                   XS, VEX_W, EVEX_CD8<32, CD8VT1>;
7183defm VCVTSS2USIZ: avx512_cvt_s_int_round<0x79, f32x_info, i32x_info, X86cvts2usi,
7184                                   X86cvts2usiRnd, WriteCvtSS2I, "cvtss2usi", "{l}">,
7185                                   XS, EVEX_CD8<32, CD8VT1>;
7186defm VCVTSS2USI64Z: avx512_cvt_s_int_round<0x79, f32x_info, i64x_info, X86cvts2usi,
7187                                   X86cvts2usiRnd, WriteCvtSS2I, "cvtss2usi", "{q}">,
7188                                   XS, VEX_W, EVEX_CD8<32, CD8VT1>;
7189defm VCVTSD2SIZ: avx512_cvt_s_int_round<0x2D, f64x_info, i32x_info, X86cvts2si,
7190                                   X86cvts2siRnd, WriteCvtSD2I, "cvtsd2si", "{l}">,
7191                                   XD, EVEX_CD8<64, CD8VT1>;
7192defm VCVTSD2SI64Z: avx512_cvt_s_int_round<0x2D, f64x_info, i64x_info, X86cvts2si,
7193                                   X86cvts2siRnd, WriteCvtSD2I, "cvtsd2si", "{q}">,
7194                                   XD, VEX_W, EVEX_CD8<64, CD8VT1>;
7195defm VCVTSD2USIZ:   avx512_cvt_s_int_round<0x79, f64x_info, i32x_info, X86cvts2usi,
7196                                   X86cvts2usiRnd, WriteCvtSD2I, "cvtsd2usi", "{l}">,
7197                                   XD, EVEX_CD8<64, CD8VT1>;
7198defm VCVTSD2USI64Z: avx512_cvt_s_int_round<0x79, f64x_info, i64x_info, X86cvts2usi,
7199                                   X86cvts2usiRnd, WriteCvtSD2I, "cvtsd2usi", "{q}">,
7200                                   XD, VEX_W, EVEX_CD8<64, CD8VT1>;
7201
7202// Patterns used for matching vcvtsi2s{s,d} intrinsic sequences from clang
7203// which produce unnecessary vmovs{s,d} instructions
7204let Predicates = [HasAVX512] in {
7205def : Pat<(v4f32 (X86Movss
7206                   (v4f32 VR128X:$dst),
7207                   (v4f32 (scalar_to_vector (f32 (sint_to_fp GR64:$src)))))),
7208          (VCVTSI642SSZrr_Int VR128X:$dst, GR64:$src)>;
7209
7210def : Pat<(v4f32 (X86Movss
7211                   (v4f32 VR128X:$dst),
7212                   (v4f32 (scalar_to_vector (f32 (sint_to_fp (loadi64 addr:$src))))))),
7213          (VCVTSI642SSZrm_Int VR128X:$dst, addr:$src)>;
7214
7215def : Pat<(v4f32 (X86Movss
7216                   (v4f32 VR128X:$dst),
7217                   (v4f32 (scalar_to_vector (f32 (sint_to_fp GR32:$src)))))),
7218          (VCVTSI2SSZrr_Int VR128X:$dst, GR32:$src)>;
7219
7220def : Pat<(v4f32 (X86Movss
7221                   (v4f32 VR128X:$dst),
7222                   (v4f32 (scalar_to_vector (f32 (sint_to_fp (loadi32 addr:$src))))))),
7223          (VCVTSI2SSZrm_Int VR128X:$dst, addr:$src)>;
7224
7225def : Pat<(v2f64 (X86Movsd
7226                   (v2f64 VR128X:$dst),
7227                   (v2f64 (scalar_to_vector (f64 (sint_to_fp GR64:$src)))))),
7228          (VCVTSI642SDZrr_Int VR128X:$dst, GR64:$src)>;
7229
7230def : Pat<(v2f64 (X86Movsd
7231                   (v2f64 VR128X:$dst),
7232                   (v2f64 (scalar_to_vector (f64 (sint_to_fp (loadi64 addr:$src))))))),
7233          (VCVTSI642SDZrm_Int VR128X:$dst, addr:$src)>;
7234
7235def : Pat<(v2f64 (X86Movsd
7236                   (v2f64 VR128X:$dst),
7237                   (v2f64 (scalar_to_vector (f64 (sint_to_fp GR32:$src)))))),
7238          (VCVTSI2SDZrr_Int VR128X:$dst, GR32:$src)>;
7239
7240def : Pat<(v2f64 (X86Movsd
7241                   (v2f64 VR128X:$dst),
7242                   (v2f64 (scalar_to_vector (f64 (sint_to_fp (loadi32 addr:$src))))))),
7243          (VCVTSI2SDZrm_Int VR128X:$dst, addr:$src)>;
7244
7245def : Pat<(v4f32 (X86Movss
7246                   (v4f32 VR128X:$dst),
7247                   (v4f32 (scalar_to_vector (f32 (uint_to_fp GR64:$src)))))),
7248          (VCVTUSI642SSZrr_Int VR128X:$dst, GR64:$src)>;
7249
7250def : Pat<(v4f32 (X86Movss
7251                   (v4f32 VR128X:$dst),
7252                   (v4f32 (scalar_to_vector (f32 (uint_to_fp (loadi64 addr:$src))))))),
7253          (VCVTUSI642SSZrm_Int VR128X:$dst, addr:$src)>;
7254
7255def : Pat<(v4f32 (X86Movss
7256                   (v4f32 VR128X:$dst),
7257                   (v4f32 (scalar_to_vector (f32 (uint_to_fp GR32:$src)))))),
7258          (VCVTUSI2SSZrr_Int VR128X:$dst, GR32:$src)>;
7259
7260def : Pat<(v4f32 (X86Movss
7261                   (v4f32 VR128X:$dst),
7262                   (v4f32 (scalar_to_vector (f32 (uint_to_fp (loadi32 addr:$src))))))),
7263          (VCVTUSI2SSZrm_Int VR128X:$dst, addr:$src)>;
7264
7265def : Pat<(v2f64 (X86Movsd
7266                   (v2f64 VR128X:$dst),
7267                   (v2f64 (scalar_to_vector (f64 (uint_to_fp GR64:$src)))))),
7268          (VCVTUSI642SDZrr_Int VR128X:$dst, GR64:$src)>;
7269
7270def : Pat<(v2f64 (X86Movsd
7271                   (v2f64 VR128X:$dst),
7272                   (v2f64 (scalar_to_vector (f64 (uint_to_fp (loadi64 addr:$src))))))),
7273          (VCVTUSI642SDZrm_Int VR128X:$dst, addr:$src)>;
7274
7275def : Pat<(v2f64 (X86Movsd
7276                   (v2f64 VR128X:$dst),
7277                   (v2f64 (scalar_to_vector (f64 (uint_to_fp GR32:$src)))))),
7278          (VCVTUSI2SDZrr_Int VR128X:$dst, GR32:$src)>;
7279
7280def : Pat<(v2f64 (X86Movsd
7281                   (v2f64 VR128X:$dst),
7282                   (v2f64 (scalar_to_vector (f64 (uint_to_fp (loadi32 addr:$src))))))),
7283          (VCVTUSI2SDZrm_Int VR128X:$dst, addr:$src)>;
7284} // Predicates = [HasAVX512]
7285
7286// Convert float/double to signed/unsigned int 32/64 with truncation
7287multiclass avx512_cvt_s_all<bits<8> opc, string asm, X86VectorVTInfo _SrcRC,
7288                            X86VectorVTInfo _DstRC, SDNode OpNode,
7289                            SDNode OpNodeInt, SDNode OpNodeSAE,
7290                            X86FoldableSchedWrite sched, string aliasStr>{
7291let Predicates = [HasAVX512] in {
7292  let isCodeGenOnly = 1 in {
7293  def rr : AVX512<opc, MRMSrcReg, (outs _DstRC.RC:$dst), (ins _SrcRC.FRC:$src),
7294              !strconcat(asm,"\t{$src, $dst|$dst, $src}"),
7295              [(set _DstRC.RC:$dst, (OpNode _SrcRC.FRC:$src))]>,
7296              EVEX, VEX_LIG, Sched<[sched]>;
7297  def rm : AVX512<opc, MRMSrcMem, (outs _DstRC.RC:$dst), (ins _SrcRC.ScalarMemOp:$src),
7298              !strconcat(asm,"\t{$src, $dst|$dst, $src}"),
7299              [(set _DstRC.RC:$dst, (OpNode (_SrcRC.ScalarLdFrag addr:$src)))]>,
7300              EVEX, VEX_LIG, Sched<[sched.Folded, sched.ReadAfterFold]>;
7301  }
7302
7303  def rr_Int : AVX512<opc, MRMSrcReg, (outs _DstRC.RC:$dst), (ins _SrcRC.RC:$src),
7304            !strconcat(asm,"\t{$src, $dst|$dst, $src}"),
7305           [(set _DstRC.RC:$dst, (OpNodeInt (_SrcRC.VT _SrcRC.RC:$src)))]>,
7306           EVEX, VEX_LIG, Sched<[sched]>;
7307  def rrb_Int : AVX512<opc, MRMSrcReg, (outs _DstRC.RC:$dst), (ins _SrcRC.RC:$src),
7308            !strconcat(asm,"\t{{sae}, $src, $dst|$dst, $src, {sae}}"),
7309            [(set _DstRC.RC:$dst, (OpNodeSAE (_SrcRC.VT _SrcRC.RC:$src)))]>,
7310                                  EVEX, VEX_LIG, EVEX_B, Sched<[sched]>;
7311  def rm_Int : AVX512<opc, MRMSrcMem, (outs _DstRC.RC:$dst),
7312              (ins _SrcRC.IntScalarMemOp:$src),
7313              !strconcat(asm,"\t{$src, $dst|$dst, $src}"),
7314              [(set _DstRC.RC:$dst,
7315                (OpNodeInt (_SrcRC.VT _SrcRC.ScalarIntMemCPat:$src)))]>,
7316              EVEX, VEX_LIG, Sched<[sched.Folded, sched.ReadAfterFold]>;
7317} //HasAVX512
7318
7319  def : InstAlias<asm # aliasStr # "\t{$src, $dst|$dst, $src}",
7320          (!cast<Instruction>(NAME # "rr_Int") _DstRC.RC:$dst, _SrcRC.RC:$src), 0, "att">;
7321  def : InstAlias<asm # aliasStr # "\t{{sae}, $src, $dst|$dst, $src, {sae}}",
7322          (!cast<Instruction>(NAME # "rrb_Int") _DstRC.RC:$dst, _SrcRC.RC:$src), 0, "att">;
7323  def : InstAlias<asm # aliasStr # "\t{$src, $dst|$dst, $src}",
7324          (!cast<Instruction>(NAME # "rm_Int") _DstRC.RC:$dst,
7325                                          _SrcRC.IntScalarMemOp:$src), 0, "att">;
7326}
7327
7328defm VCVTTSS2SIZ: avx512_cvt_s_all<0x2C, "vcvttss2si", f32x_info, i32x_info,
7329                        fp_to_sint, X86cvtts2Int, X86cvtts2IntSAE, WriteCvtSS2I,
7330                        "{l}">, XS, EVEX_CD8<32, CD8VT1>;
7331defm VCVTTSS2SI64Z: avx512_cvt_s_all<0x2C, "vcvttss2si", f32x_info, i64x_info,
7332                        fp_to_sint, X86cvtts2Int, X86cvtts2IntSAE, WriteCvtSS2I,
7333                        "{q}">, VEX_W, XS, EVEX_CD8<32, CD8VT1>;
7334defm VCVTTSD2SIZ: avx512_cvt_s_all<0x2C, "vcvttsd2si", f64x_info, i32x_info,
7335                        fp_to_sint, X86cvtts2Int, X86cvtts2IntSAE, WriteCvtSD2I,
7336                        "{l}">, XD, EVEX_CD8<64, CD8VT1>;
7337defm VCVTTSD2SI64Z: avx512_cvt_s_all<0x2C, "vcvttsd2si", f64x_info, i64x_info,
7338                        fp_to_sint, X86cvtts2Int, X86cvtts2IntSAE, WriteCvtSD2I,
7339                        "{q}">, VEX_W, XD, EVEX_CD8<64, CD8VT1>;
7340
7341defm VCVTTSS2USIZ: avx512_cvt_s_all<0x78, "vcvttss2usi", f32x_info, i32x_info,
7342                        fp_to_uint, X86cvtts2UInt, X86cvtts2UIntSAE, WriteCvtSS2I,
7343                        "{l}">, XS, EVEX_CD8<32, CD8VT1>;
7344defm VCVTTSS2USI64Z: avx512_cvt_s_all<0x78, "vcvttss2usi", f32x_info, i64x_info,
7345                        fp_to_uint, X86cvtts2UInt, X86cvtts2UIntSAE, WriteCvtSS2I,
7346                        "{q}">, XS,VEX_W, EVEX_CD8<32, CD8VT1>;
7347defm VCVTTSD2USIZ: avx512_cvt_s_all<0x78, "vcvttsd2usi", f64x_info, i32x_info,
7348                        fp_to_uint, X86cvtts2UInt, X86cvtts2UIntSAE, WriteCvtSD2I,
7349                        "{l}">, XD, EVEX_CD8<64, CD8VT1>;
7350defm VCVTTSD2USI64Z: avx512_cvt_s_all<0x78, "vcvttsd2usi", f64x_info, i64x_info,
7351                        fp_to_uint, X86cvtts2UInt, X86cvtts2UIntSAE, WriteCvtSD2I,
7352                        "{q}">, XD, VEX_W, EVEX_CD8<64, CD8VT1>;
7353
7354//===----------------------------------------------------------------------===//
7355// AVX-512  Convert form float to double and back
7356//===----------------------------------------------------------------------===//
7357
7358multiclass avx512_cvt_fp_scalar<bits<8> opc, string OpcodeStr, X86VectorVTInfo _,
7359                                X86VectorVTInfo _Src, SDNode OpNode,
7360                                X86FoldableSchedWrite sched> {
7361  defm rr_Int : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
7362                         (ins _.RC:$src1, _Src.RC:$src2), OpcodeStr,
7363                         "$src2, $src1", "$src1, $src2",
7364                         (_.VT (OpNode (_.VT _.RC:$src1),
7365                                       (_Src.VT _Src.RC:$src2)))>,
7366                         EVEX_4V, VEX_LIG, Sched<[sched]>;
7367  defm rm_Int : AVX512_maskable_scalar<opc, MRMSrcMem, _, (outs _.RC:$dst),
7368                         (ins _.RC:$src1, _Src.IntScalarMemOp:$src2), OpcodeStr,
7369                         "$src2, $src1", "$src1, $src2",
7370                         (_.VT (OpNode (_.VT _.RC:$src1),
7371                                  (_Src.VT _Src.ScalarIntMemCPat:$src2)))>,
7372                         EVEX_4V, VEX_LIG,
7373                         Sched<[sched.Folded, sched.ReadAfterFold]>;
7374
7375  let isCodeGenOnly = 1, hasSideEffects = 0 in {
7376    def rr : I<opc, MRMSrcReg, (outs _.FRC:$dst),
7377               (ins _.FRC:$src1, _Src.FRC:$src2),
7378               OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}", []>,
7379               EVEX_4V, VEX_LIG, Sched<[sched]>;
7380    let mayLoad = 1 in
7381    def rm : I<opc, MRMSrcMem, (outs _.FRC:$dst),
7382               (ins _.FRC:$src1, _Src.ScalarMemOp:$src2),
7383               OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}", []>,
7384               EVEX_4V, VEX_LIG, Sched<[sched.Folded, sched.ReadAfterFold]>;
7385  }
7386}
7387
7388// Scalar Coversion with SAE - suppress all exceptions
7389multiclass avx512_cvt_fp_sae_scalar<bits<8> opc, string OpcodeStr, X86VectorVTInfo _,
7390                                    X86VectorVTInfo _Src, SDNode OpNodeSAE,
7391                                    X86FoldableSchedWrite sched> {
7392  defm rrb_Int : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
7393                        (ins _.RC:$src1, _Src.RC:$src2), OpcodeStr,
7394                        "{sae}, $src2, $src1", "$src1, $src2, {sae}",
7395                        (_.VT (OpNodeSAE (_.VT _.RC:$src1),
7396                                         (_Src.VT _Src.RC:$src2)))>,
7397                        EVEX_4V, VEX_LIG, EVEX_B, Sched<[sched]>;
7398}
7399
7400// Scalar Conversion with rounding control (RC)
7401multiclass avx512_cvt_fp_rc_scalar<bits<8> opc, string OpcodeStr, X86VectorVTInfo _,
7402                                   X86VectorVTInfo _Src, SDNode OpNodeRnd,
7403                                   X86FoldableSchedWrite sched> {
7404  defm rrb_Int : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
7405                        (ins _.RC:$src1, _Src.RC:$src2, AVX512RC:$rc), OpcodeStr,
7406                        "$rc, $src2, $src1", "$src1, $src2, $rc",
7407                        (_.VT (OpNodeRnd (_.VT _.RC:$src1),
7408                                         (_Src.VT _Src.RC:$src2), (i32 timm:$rc)))>,
7409                        EVEX_4V, VEX_LIG, Sched<[sched]>,
7410                        EVEX_B, EVEX_RC;
7411}
7412multiclass avx512_cvt_fp_scalar_sd2ss<bits<8> opc, string OpcodeStr,
7413                                      SDNode OpNode, SDNode OpNodeRnd,
7414                                      X86FoldableSchedWrite sched,
7415                                      X86VectorVTInfo _src, X86VectorVTInfo _dst> {
7416  let Predicates = [HasAVX512] in {
7417    defm Z : avx512_cvt_fp_scalar<opc, OpcodeStr, _dst, _src, OpNode, sched>,
7418             avx512_cvt_fp_rc_scalar<opc, OpcodeStr, _dst, _src,
7419                               OpNodeRnd, sched>, VEX_W, EVEX_CD8<64, CD8VT1>, XD;
7420  }
7421}
7422
7423multiclass avx512_cvt_fp_scalar_ss2sd<bits<8> opc, string OpcodeStr,
7424                                      SDNode OpNode, SDNode OpNodeSAE,
7425                                      X86FoldableSchedWrite sched,
7426                                      X86VectorVTInfo _src, X86VectorVTInfo _dst> {
7427  let Predicates = [HasAVX512] in {
7428    defm Z : avx512_cvt_fp_scalar<opc, OpcodeStr, _dst, _src, OpNode, sched>,
7429             avx512_cvt_fp_sae_scalar<opc, OpcodeStr, _dst, _src, OpNodeSAE, sched>,
7430             EVEX_CD8<32, CD8VT1>, XS;
7431  }
7432}
7433defm VCVTSD2SS : avx512_cvt_fp_scalar_sd2ss<0x5A, "vcvtsd2ss", X86frounds,
7434                                         X86froundsRnd, WriteCvtSD2SS, f64x_info,
7435                                         f32x_info>;
7436defm VCVTSS2SD : avx512_cvt_fp_scalar_ss2sd<0x5A, "vcvtss2sd", X86fpexts,
7437                                          X86fpextsSAE, WriteCvtSS2SD, f32x_info,
7438                                          f64x_info>;
7439
7440def : Pat<(f64 (fpextend FR32X:$src)),
7441          (VCVTSS2SDZrr (f64 (IMPLICIT_DEF)), FR32X:$src)>,
7442          Requires<[HasAVX512]>;
7443def : Pat<(f64 (fpextend (loadf32 addr:$src))),
7444          (VCVTSS2SDZrm (f64 (IMPLICIT_DEF)), addr:$src)>,
7445          Requires<[HasAVX512, OptForSize]>;
7446
7447def : Pat<(f32 (fpround FR64X:$src)),
7448          (VCVTSD2SSZrr (f32 (IMPLICIT_DEF)), FR64X:$src)>,
7449           Requires<[HasAVX512]>;
7450
7451def : Pat<(v4f32 (X86Movss
7452                   (v4f32 VR128X:$dst),
7453                   (v4f32 (scalar_to_vector
7454                     (f32 (fpround (f64 (extractelt VR128X:$src, (iPTR 0))))))))),
7455          (VCVTSD2SSZrr_Int VR128X:$dst, VR128X:$src)>,
7456          Requires<[HasAVX512]>;
7457
7458def : Pat<(v2f64 (X86Movsd
7459                   (v2f64 VR128X:$dst),
7460                   (v2f64 (scalar_to_vector
7461                     (f64 (fpextend (f32 (extractelt VR128X:$src, (iPTR 0))))))))),
7462          (VCVTSS2SDZrr_Int VR128X:$dst, VR128X:$src)>,
7463          Requires<[HasAVX512]>;
7464
7465//===----------------------------------------------------------------------===//
7466// AVX-512  Vector convert from signed/unsigned integer to float/double
7467//          and from float/double to signed/unsigned integer
7468//===----------------------------------------------------------------------===//
7469
7470multiclass avx512_vcvt_fp<bits<8> opc, string OpcodeStr, X86VectorVTInfo _,
7471                          X86VectorVTInfo _Src, SDNode OpNode,
7472                          X86FoldableSchedWrite sched,
7473                          string Broadcast = _.BroadcastStr,
7474                          string Alias = "", X86MemOperand MemOp = _Src.MemOp,
7475                          RegisterClass MaskRC = _.KRCWM,
7476                          dag LdDAG = (_.VT (OpNode (_Src.VT (_Src.LdFrag addr:$src))))> {
7477
7478  defm rr : AVX512_maskable_common<opc, MRMSrcReg, _, (outs _.RC:$dst),
7479                         (ins _Src.RC:$src),
7480                         (ins _.RC:$src0, MaskRC:$mask, _Src.RC:$src),
7481                         (ins MaskRC:$mask, _Src.RC:$src),
7482                          OpcodeStr, "$src", "$src",
7483                         (_.VT (OpNode (_Src.VT _Src.RC:$src))),
7484                         (vselect MaskRC:$mask,
7485                                  (_.VT (OpNode (_Src.VT _Src.RC:$src))),
7486                                  _.RC:$src0),
7487                         vselect, "$src0 = $dst">,
7488                         EVEX, Sched<[sched]>;
7489
7490  defm rm : AVX512_maskable_common<opc, MRMSrcMem, _, (outs _.RC:$dst),
7491                         (ins MemOp:$src),
7492                         (ins _.RC:$src0, MaskRC:$mask, MemOp:$src),
7493                         (ins MaskRC:$mask, MemOp:$src),
7494                         OpcodeStr#Alias, "$src", "$src",
7495                         LdDAG,
7496                         (vselect MaskRC:$mask, LdDAG, _.RC:$src0),
7497                         vselect, "$src0 = $dst">,
7498                         EVEX, Sched<[sched.Folded]>;
7499
7500  defm rmb : AVX512_maskable_common<opc, MRMSrcMem, _, (outs _.RC:$dst),
7501                         (ins _Src.ScalarMemOp:$src),
7502                         (ins _.RC:$src0, MaskRC:$mask, _Src.ScalarMemOp:$src),
7503                         (ins MaskRC:$mask, _Src.ScalarMemOp:$src),
7504                         OpcodeStr,
7505                         "${src}"##Broadcast, "${src}"##Broadcast,
7506                         (_.VT (OpNode (_Src.VT
7507                                  (X86VBroadcast (_Src.ScalarLdFrag addr:$src)))
7508                            )),
7509                         (vselect MaskRC:$mask,
7510                                  (_.VT
7511                                   (OpNode
7512                                    (_Src.VT
7513                                     (X86VBroadcast
7514                                      (_Src.ScalarLdFrag addr:$src))))),
7515                                  _.RC:$src0),
7516                         vselect, "$src0 = $dst">,
7517                         EVEX, EVEX_B, Sched<[sched.Folded]>;
7518}
7519// Coversion with SAE - suppress all exceptions
7520multiclass avx512_vcvt_fp_sae<bits<8> opc, string OpcodeStr, X86VectorVTInfo _,
7521                              X86VectorVTInfo _Src, SDNode OpNodeSAE,
7522                              X86FoldableSchedWrite sched> {
7523  defm rrb : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
7524                        (ins _Src.RC:$src), OpcodeStr,
7525                        "{sae}, $src", "$src, {sae}",
7526                        (_.VT (OpNodeSAE (_Src.VT _Src.RC:$src)))>,
7527                        EVEX, EVEX_B, Sched<[sched]>;
7528}
7529
7530// Conversion with rounding control (RC)
7531multiclass avx512_vcvt_fp_rc<bits<8> opc, string OpcodeStr, X86VectorVTInfo _,
7532                         X86VectorVTInfo _Src, SDNode OpNodeRnd,
7533                         X86FoldableSchedWrite sched> {
7534  defm rrb : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
7535                        (ins _Src.RC:$src, AVX512RC:$rc), OpcodeStr,
7536                        "$rc, $src", "$src, $rc",
7537                        (_.VT (OpNodeRnd (_Src.VT _Src.RC:$src), (i32 timm:$rc)))>,
7538                        EVEX, EVEX_B, EVEX_RC, Sched<[sched]>;
7539}
7540
7541// Similar to avx512_vcvt_fp, but uses an extload for the memory form.
7542multiclass avx512_vcvt_fpextend<bits<8> opc, string OpcodeStr, X86VectorVTInfo _,
7543                                X86VectorVTInfo _Src, SDNode OpNode,
7544                                X86FoldableSchedWrite sched,
7545                                string Broadcast = _.BroadcastStr,
7546                                string Alias = "", X86MemOperand MemOp = _Src.MemOp,
7547                                RegisterClass MaskRC = _.KRCWM>
7548  : avx512_vcvt_fp<opc, OpcodeStr, _, _Src, OpNode, sched, Broadcast, Alias,
7549                   MemOp, MaskRC,
7550                   (_.VT (!cast<PatFrag>("extload"#_Src.VTName) addr:$src))>;
7551
7552// Extend Float to Double
7553multiclass avx512_cvtps2pd<bits<8> opc, string OpcodeStr,
7554                           X86SchedWriteWidths sched> {
7555  let Predicates = [HasAVX512] in {
7556    defm Z : avx512_vcvt_fpextend<opc, OpcodeStr, v8f64_info, v8f32x_info,
7557                            fpextend, sched.ZMM>,
7558             avx512_vcvt_fp_sae<opc, OpcodeStr, v8f64_info, v8f32x_info,
7559                                X86vfpextSAE, sched.ZMM>, EVEX_V512;
7560  }
7561  let Predicates = [HasVLX] in {
7562    defm Z128 : avx512_vcvt_fpextend<opc, OpcodeStr, v2f64x_info, v4f32x_info,
7563                               X86vfpext, sched.XMM, "{1to2}", "", f64mem>, EVEX_V128;
7564    defm Z256 : avx512_vcvt_fpextend<opc, OpcodeStr, v4f64x_info, v4f32x_info, fpextend,
7565                               sched.YMM>, EVEX_V256;
7566  }
7567}
7568
7569// Truncate Double to Float
7570multiclass avx512_cvtpd2ps<bits<8> opc, string OpcodeStr, X86SchedWriteWidths sched> {
7571  let Predicates = [HasAVX512] in {
7572    defm Z : avx512_vcvt_fp<opc, OpcodeStr, v8f32x_info, v8f64_info, X86vfpround, sched.ZMM>,
7573             avx512_vcvt_fp_rc<opc, OpcodeStr, v8f32x_info, v8f64_info,
7574                               X86vfproundRnd, sched.ZMM>, EVEX_V512;
7575  }
7576  let Predicates = [HasVLX] in {
7577    defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v4f32x_info, v2f64x_info,
7578                               null_frag, sched.XMM, "{1to2}", "{x}", f128mem, VK2WM>,
7579                               EVEX_V128;
7580    defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v4f32x_info, v4f64x_info, X86vfpround,
7581                               sched.YMM, "{1to4}", "{y}">, EVEX_V256;
7582  }
7583
7584  def : InstAlias<OpcodeStr##"x\t{$src, $dst|$dst, $src}",
7585                  (!cast<Instruction>(NAME # "Z128rr") VR128X:$dst, VR128X:$src), 0, "att">;
7586  def : InstAlias<OpcodeStr##"x\t{$src, $dst {${mask}}|$dst {${mask}}, $src}",
7587                  (!cast<Instruction>(NAME # "Z128rrk") VR128X:$dst,
7588                  VK2WM:$mask, VR128X:$src), 0, "att">;
7589  def : InstAlias<OpcodeStr##"x\t{$src, $dst {${mask}} {z}|"
7590                  "$dst {${mask}} {z}, $src}",
7591                  (!cast<Instruction>(NAME # "Z128rrkz") VR128X:$dst,
7592                  VK2WM:$mask, VR128X:$src), 0, "att">;
7593  def : InstAlias<OpcodeStr##"x\t{${src}{1to2}, $dst|$dst, ${src}{1to2}}",
7594                  (!cast<Instruction>(NAME # "Z128rmb") VR128X:$dst, f64mem:$src), 0, "att">;
7595  def : InstAlias<OpcodeStr##"x\t{${src}{1to2}, $dst {${mask}}|"
7596                  "$dst {${mask}}, ${src}{1to2}}",
7597                  (!cast<Instruction>(NAME # "Z128rmbk") VR128X:$dst,
7598                  VK2WM:$mask, f64mem:$src), 0, "att">;
7599  def : InstAlias<OpcodeStr##"x\t{${src}{1to2}, $dst {${mask}} {z}|"
7600                  "$dst {${mask}} {z}, ${src}{1to2}}",
7601                  (!cast<Instruction>(NAME # "Z128rmbkz") VR128X:$dst,
7602                  VK2WM:$mask, f64mem:$src), 0, "att">;
7603
7604  def : InstAlias<OpcodeStr##"y\t{$src, $dst|$dst, $src}",
7605                  (!cast<Instruction>(NAME # "Z256rr") VR128X:$dst, VR256X:$src), 0, "att">;
7606  def : InstAlias<OpcodeStr##"y\t{$src, $dst {${mask}}|$dst {${mask}}, $src}",
7607                  (!cast<Instruction>(NAME # "Z256rrk") VR128X:$dst,
7608                  VK4WM:$mask, VR256X:$src), 0, "att">;
7609  def : InstAlias<OpcodeStr##"y\t{$src, $dst {${mask}} {z}|"
7610                  "$dst {${mask}} {z}, $src}",
7611                  (!cast<Instruction>(NAME # "Z256rrkz") VR128X:$dst,
7612                  VK4WM:$mask, VR256X:$src), 0, "att">;
7613  def : InstAlias<OpcodeStr##"y\t{${src}{1to4}, $dst|$dst, ${src}{1to4}}",
7614                  (!cast<Instruction>(NAME # "Z256rmb") VR128X:$dst, f64mem:$src), 0, "att">;
7615  def : InstAlias<OpcodeStr##"y\t{${src}{1to4}, $dst {${mask}}|"
7616                  "$dst {${mask}}, ${src}{1to4}}",
7617                  (!cast<Instruction>(NAME # "Z256rmbk") VR128X:$dst,
7618                  VK4WM:$mask, f64mem:$src), 0, "att">;
7619  def : InstAlias<OpcodeStr##"y\t{${src}{1to4}, $dst {${mask}} {z}|"
7620                  "$dst {${mask}} {z}, ${src}{1to4}}",
7621                  (!cast<Instruction>(NAME # "Z256rmbkz") VR128X:$dst,
7622                  VK4WM:$mask, f64mem:$src), 0, "att">;
7623}
7624
7625defm VCVTPD2PS : avx512_cvtpd2ps<0x5A, "vcvtpd2ps", SchedWriteCvtPD2PS>,
7626                                  VEX_W, PD, EVEX_CD8<64, CD8VF>;
7627defm VCVTPS2PD : avx512_cvtps2pd<0x5A, "vcvtps2pd", SchedWriteCvtPS2PD>,
7628                                  PS, EVEX_CD8<32, CD8VH>;
7629
7630let Predicates = [HasAVX512] in {
7631  def : Pat<(v8f32 (fpround (v8f64 VR512:$src))),
7632            (VCVTPD2PSZrr VR512:$src)>;
7633  def : Pat<(vselect VK8WM:$mask, (v8f32 (fpround (v8f64 VR512:$src))),
7634                     VR256X:$src0),
7635            (VCVTPD2PSZrrk VR256X:$src0, VK8WM:$mask, VR512:$src)>;
7636  def : Pat<(vselect VK8WM:$mask, (v8f32 (fpround (v8f64 VR512:$src))),
7637                     v8f32x_info.ImmAllZerosV),
7638            (VCVTPD2PSZrrkz VK8WM:$mask, VR512:$src)>;
7639
7640  def : Pat<(v8f32 (fpround (loadv8f64 addr:$src))),
7641            (VCVTPD2PSZrm addr:$src)>;
7642  def : Pat<(vselect VK8WM:$mask, (v8f32 (fpround (loadv8f64 addr:$src))),
7643                     VR256X:$src0),
7644            (VCVTPD2PSZrmk VR256X:$src0, VK8WM:$mask, addr:$src)>;
7645  def : Pat<(vselect VK8WM:$mask, (v8f32 (fpround (loadv8f64 addr:$src))),
7646                     v8f32x_info.ImmAllZerosV),
7647            (VCVTPD2PSZrmkz VK8WM:$mask, addr:$src)>;
7648
7649  def : Pat<(v8f32 (fpround (v8f64 (X86VBroadcast (loadf64 addr:$src))))),
7650            (VCVTPD2PSZrmb addr:$src)>;
7651  def : Pat<(vselect VK8WM:$mask,
7652                     (fpround (v8f64 (X86VBroadcast (loadf64 addr:$src)))),
7653                     (v8f32 VR256X:$src0)),
7654            (VCVTPD2PSZrmbk VR256X:$src0, VK8WM:$mask, addr:$src)>;
7655  def : Pat<(vselect VK8WM:$mask,
7656                     (fpround (v8f64 (X86VBroadcast (loadf64 addr:$src)))),
7657                     v8f32x_info.ImmAllZerosV),
7658            (VCVTPD2PSZrmbkz VK8WM:$mask, addr:$src)>;
7659}
7660
7661let Predicates = [HasVLX] in {
7662  def : Pat<(v4f32 (fpround (v4f64 VR256X:$src))),
7663            (VCVTPD2PSZ256rr VR256X:$src)>;
7664  def : Pat<(vselect VK4WM:$mask, (v4f32 (fpround (v4f64 VR256X:$src))),
7665                     VR128X:$src0),
7666            (VCVTPD2PSZ256rrk VR128X:$src0, VK4WM:$mask, VR256X:$src)>;
7667  def : Pat<(vselect VK4WM:$mask, (v4f32 (fpround (v4f64 VR256X:$src))),
7668                     v4f32x_info.ImmAllZerosV),
7669            (VCVTPD2PSZ256rrkz VK4WM:$mask, VR256X:$src)>;
7670
7671  def : Pat<(v4f32 (fpround (loadv4f64 addr:$src))),
7672            (VCVTPD2PSZ256rm addr:$src)>;
7673  def : Pat<(vselect VK4WM:$mask, (v4f32 (fpround (loadv4f64 addr:$src))),
7674                     VR128X:$src0),
7675            (VCVTPD2PSZ256rmk VR128X:$src0, VK4WM:$mask, addr:$src)>;
7676  def : Pat<(vselect VK4WM:$mask, (v4f32 (fpround (loadv4f64 addr:$src))),
7677                     v4f32x_info.ImmAllZerosV),
7678            (VCVTPD2PSZ256rmkz VK4WM:$mask, addr:$src)>;
7679
7680  def : Pat<(v4f32 (fpround (v4f64 (X86VBroadcast (loadf64 addr:$src))))),
7681            (VCVTPD2PSZ256rmb addr:$src)>;
7682  def : Pat<(vselect VK4WM:$mask,
7683                     (v4f32 (fpround (v4f64 (X86VBroadcast (loadf64 addr:$src))))),
7684                     VR128X:$src0),
7685            (VCVTPD2PSZ256rmbk VR128X:$src0, VK4WM:$mask, addr:$src)>;
7686  def : Pat<(vselect VK4WM:$mask,
7687                     (v4f32 (fpround (v4f64 (X86VBroadcast (loadf64 addr:$src))))),
7688                     v4f32x_info.ImmAllZerosV),
7689            (VCVTPD2PSZ256rmbkz VK4WM:$mask, addr:$src)>;
7690
7691  // Special patterns to allow use of X86vmfpround for masking. Instruction
7692  // patterns have been disabled with null_frag.
7693  def : Pat<(X86vfpround (v2f64 VR128X:$src)),
7694            (VCVTPD2PSZ128rr VR128X:$src)>;
7695  def : Pat<(X86vmfpround (v2f64 VR128X:$src), (v4f32 VR128X:$src0),
7696                          VK2WM:$mask),
7697            (VCVTPD2PSZ128rrk VR128X:$src0, VK2WM:$mask, VR128X:$src)>;
7698  def : Pat<(X86vmfpround (v2f64 VR128X:$src), v4f32x_info.ImmAllZerosV,
7699                          VK2WM:$mask),
7700            (VCVTPD2PSZ128rrkz VK2WM:$mask, VR128X:$src)>;
7701
7702  def : Pat<(X86vfpround (loadv2f64 addr:$src)),
7703            (VCVTPD2PSZ128rm addr:$src)>;
7704  def : Pat<(X86vmfpround (loadv2f64 addr:$src), (v4f32 VR128X:$src0),
7705                          VK2WM:$mask),
7706            (VCVTPD2PSZ128rmk VR128X:$src0, VK2WM:$mask, addr:$src)>;
7707  def : Pat<(X86vmfpround (loadv2f64 addr:$src), v4f32x_info.ImmAllZerosV,
7708                          VK2WM:$mask),
7709            (VCVTPD2PSZ128rmkz VK2WM:$mask, addr:$src)>;
7710
7711  def : Pat<(X86vfpround (v2f64 (X86VBroadcast (loadf64 addr:$src)))),
7712            (VCVTPD2PSZ128rmb addr:$src)>;
7713  def : Pat<(X86vmfpround (v2f64 (X86VBroadcast (loadf64 addr:$src))),
7714                          (v4f32 VR128X:$src0), VK2WM:$mask),
7715            (VCVTPD2PSZ128rmbk VR128X:$src0, VK2WM:$mask, addr:$src)>;
7716  def : Pat<(X86vmfpround (v2f64 (X86VBroadcast (loadf64 addr:$src))),
7717                          v4f32x_info.ImmAllZerosV, VK2WM:$mask),
7718            (VCVTPD2PSZ128rmbkz VK2WM:$mask, addr:$src)>;
7719}
7720
7721// Convert Signed/Unsigned Doubleword to Double
7722multiclass avx512_cvtdq2pd<bits<8> opc, string OpcodeStr, SDNode OpNode,
7723                           SDNode OpNode128, X86SchedWriteWidths sched> {
7724  // No rounding in this op
7725  let Predicates = [HasAVX512] in
7726    defm Z : avx512_vcvt_fp<opc, OpcodeStr, v8f64_info, v8i32x_info, OpNode,
7727                            sched.ZMM>, EVEX_V512;
7728
7729  let Predicates = [HasVLX] in {
7730    defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v2f64x_info, v4i32x_info,
7731                               OpNode128, sched.XMM, "{1to2}", "", i64mem, VK2WM,
7732                               (v2f64 (OpNode128 (bc_v4i32
7733                                (v2i64
7734                                 (scalar_to_vector (loadi64 addr:$src))))))>,
7735                               EVEX_V128;
7736    defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v4f64x_info, v4i32x_info, OpNode,
7737                               sched.YMM>, EVEX_V256;
7738  }
7739}
7740
7741// Convert Signed/Unsigned Doubleword to Float
7742multiclass avx512_cvtdq2ps<bits<8> opc, string OpcodeStr, SDNode OpNode,
7743                           SDNode OpNodeRnd, X86SchedWriteWidths sched> {
7744  let Predicates = [HasAVX512] in
7745    defm Z : avx512_vcvt_fp<opc, OpcodeStr, v16f32_info, v16i32_info, OpNode,
7746                            sched.ZMM>,
7747             avx512_vcvt_fp_rc<opc, OpcodeStr, v16f32_info, v16i32_info,
7748                               OpNodeRnd, sched.ZMM>, EVEX_V512;
7749
7750  let Predicates = [HasVLX] in {
7751    defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v4f32x_info, v4i32x_info, OpNode,
7752                               sched.XMM>, EVEX_V128;
7753    defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v8f32x_info, v8i32x_info, OpNode,
7754                               sched.YMM>, EVEX_V256;
7755  }
7756}
7757
7758// Convert Float to Signed/Unsigned Doubleword with truncation
7759multiclass avx512_cvttps2dq<bits<8> opc, string OpcodeStr, SDNode OpNode,
7760                            SDNode OpNodeSAE, X86SchedWriteWidths sched> {
7761  let Predicates = [HasAVX512] in {
7762    defm Z : avx512_vcvt_fp<opc, OpcodeStr, v16i32_info, v16f32_info, OpNode,
7763                            sched.ZMM>,
7764             avx512_vcvt_fp_sae<opc, OpcodeStr, v16i32_info, v16f32_info,
7765                                OpNodeSAE, sched.ZMM>, EVEX_V512;
7766  }
7767  let Predicates = [HasVLX] in {
7768    defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v4i32x_info, v4f32x_info, OpNode,
7769                               sched.XMM>, EVEX_V128;
7770    defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v8i32x_info, v8f32x_info, OpNode,
7771                               sched.YMM>, EVEX_V256;
7772  }
7773}
7774
7775// Convert Float to Signed/Unsigned Doubleword
7776multiclass avx512_cvtps2dq<bits<8> opc, string OpcodeStr, SDNode OpNode,
7777                           SDNode OpNodeRnd, X86SchedWriteWidths sched> {
7778  let Predicates = [HasAVX512] in {
7779    defm Z : avx512_vcvt_fp<opc, OpcodeStr, v16i32_info, v16f32_info, OpNode,
7780                            sched.ZMM>,
7781             avx512_vcvt_fp_rc<opc, OpcodeStr, v16i32_info, v16f32_info,
7782                                OpNodeRnd, sched.ZMM>, EVEX_V512;
7783  }
7784  let Predicates = [HasVLX] in {
7785    defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v4i32x_info, v4f32x_info, OpNode,
7786                               sched.XMM>, EVEX_V128;
7787    defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v8i32x_info, v8f32x_info, OpNode,
7788                               sched.YMM>, EVEX_V256;
7789  }
7790}
7791
7792// Convert Double to Signed/Unsigned Doubleword with truncation
7793multiclass avx512_cvttpd2dq<bits<8> opc, string OpcodeStr, SDNode OpNode,
7794                            SDNode OpNodeSAE, X86SchedWriteWidths sched> {
7795  let Predicates = [HasAVX512] in {
7796    defm Z : avx512_vcvt_fp<opc, OpcodeStr, v8i32x_info, v8f64_info, OpNode,
7797                            sched.ZMM>,
7798             avx512_vcvt_fp_sae<opc, OpcodeStr, v8i32x_info, v8f64_info,
7799                                OpNodeSAE, sched.ZMM>, EVEX_V512;
7800  }
7801  let Predicates = [HasVLX] in {
7802    // we need "x"/"y" suffixes in order to distinguish between 128 and 256
7803    // memory forms of these instructions in Asm Parser. They have the same
7804    // dest type - 'v4i32x_info'. We also specify the broadcast string explicitly
7805    // due to the same reason.
7806    defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v4i32x_info, v2f64x_info,
7807                               null_frag, sched.XMM, "{1to2}", "{x}", f128mem,
7808                               VK2WM>, EVEX_V128;
7809    defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v4i32x_info, v4f64x_info, OpNode,
7810                               sched.YMM, "{1to4}", "{y}">, EVEX_V256;
7811  }
7812
7813  def : InstAlias<OpcodeStr##"x\t{$src, $dst|$dst, $src}",
7814                  (!cast<Instruction>(NAME # "Z128rr") VR128X:$dst,
7815                  VR128X:$src), 0, "att">;
7816  def : InstAlias<OpcodeStr##"x\t{$src, $dst {${mask}}|$dst {${mask}}, $src}",
7817                  (!cast<Instruction>(NAME # "Z128rrk") VR128X:$dst,
7818                  VK2WM:$mask, VR128X:$src), 0, "att">;
7819  def : InstAlias<OpcodeStr##"x\t{$src, $dst {${mask}} {z}|$dst {${mask}} {z}, $src}",
7820                  (!cast<Instruction>(NAME # "Z128rrkz") VR128X:$dst,
7821                  VK2WM:$mask, VR128X:$src), 0, "att">;
7822  def : InstAlias<OpcodeStr##"x\t{${src}{1to2}, $dst|$dst, ${src}{1to2}}",
7823                  (!cast<Instruction>(NAME # "Z128rmb") VR128X:$dst,
7824                  f64mem:$src), 0, "att">;
7825  def : InstAlias<OpcodeStr##"x\t{${src}{1to2}, $dst {${mask}}|"
7826                  "$dst {${mask}}, ${src}{1to2}}",
7827                  (!cast<Instruction>(NAME # "Z128rmbk") VR128X:$dst,
7828                  VK2WM:$mask, f64mem:$src), 0, "att">;
7829  def : InstAlias<OpcodeStr##"x\t{${src}{1to2}, $dst {${mask}} {z}|"
7830                  "$dst {${mask}} {z}, ${src}{1to2}}",
7831                  (!cast<Instruction>(NAME # "Z128rmbkz") VR128X:$dst,
7832                  VK2WM:$mask, f64mem:$src), 0, "att">;
7833
7834  def : InstAlias<OpcodeStr##"y\t{$src, $dst|$dst, $src}",
7835                  (!cast<Instruction>(NAME # "Z256rr") VR128X:$dst,
7836                  VR256X:$src), 0, "att">;
7837  def : InstAlias<OpcodeStr##"y\t{$src, $dst {${mask}}|$dst {${mask}}, $src}",
7838                  (!cast<Instruction>(NAME # "Z256rrk") VR128X:$dst,
7839                  VK4WM:$mask, VR256X:$src), 0, "att">;
7840  def : InstAlias<OpcodeStr##"y\t{$src, $dst {${mask}} {z}|$dst {${mask}} {z}, $src}",
7841                  (!cast<Instruction>(NAME # "Z256rrkz") VR128X:$dst,
7842                  VK4WM:$mask, VR256X:$src), 0, "att">;
7843  def : InstAlias<OpcodeStr##"y\t{${src}{1to4}, $dst|$dst, ${src}{1to4}}",
7844                  (!cast<Instruction>(NAME # "Z256rmb") VR128X:$dst,
7845                  f64mem:$src), 0, "att">;
7846  def : InstAlias<OpcodeStr##"y\t{${src}{1to4}, $dst {${mask}}|"
7847                  "$dst {${mask}}, ${src}{1to4}}",
7848                  (!cast<Instruction>(NAME # "Z256rmbk") VR128X:$dst,
7849                  VK4WM:$mask, f64mem:$src), 0, "att">;
7850  def : InstAlias<OpcodeStr##"y\t{${src}{1to4}, $dst {${mask}} {z}|"
7851                  "$dst {${mask}} {z}, ${src}{1to4}}",
7852                  (!cast<Instruction>(NAME # "Z256rmbkz") VR128X:$dst,
7853                  VK4WM:$mask, f64mem:$src), 0, "att">;
7854}
7855
7856// Convert Double to Signed/Unsigned Doubleword
7857multiclass avx512_cvtpd2dq<bits<8> opc, string OpcodeStr, SDNode OpNode,
7858                           SDNode OpNodeRnd, X86SchedWriteWidths sched> {
7859  let Predicates = [HasAVX512] in {
7860    defm Z : avx512_vcvt_fp<opc, OpcodeStr, v8i32x_info, v8f64_info, OpNode,
7861                            sched.ZMM>,
7862             avx512_vcvt_fp_rc<opc, OpcodeStr, v8i32x_info, v8f64_info,
7863                               OpNodeRnd, sched.ZMM>, EVEX_V512;
7864  }
7865  let Predicates = [HasVLX] in {
7866    // we need "x"/"y" suffixes in order to distinguish between 128 and 256
7867    // memory forms of these instructions in Asm Parcer. They have the same
7868    // dest type - 'v4i32x_info'. We also specify the broadcast string explicitly
7869    // due to the same reason.
7870    defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v4i32x_info, v2f64x_info,
7871                               null_frag, sched.XMM, "{1to2}", "{x}", f128mem,
7872                               VK2WM>, EVEX_V128;
7873    defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v4i32x_info, v4f64x_info, OpNode,
7874                               sched.YMM, "{1to4}", "{y}">, EVEX_V256;
7875  }
7876
7877  def : InstAlias<OpcodeStr##"x\t{$src, $dst|$dst, $src}",
7878                  (!cast<Instruction>(NAME # "Z128rr") VR128X:$dst, VR128X:$src), 0, "att">;
7879  def : InstAlias<OpcodeStr##"x\t{$src, $dst {${mask}}|$dst {${mask}}, $src}",
7880                  (!cast<Instruction>(NAME # "Z128rrk") VR128X:$dst,
7881                  VK2WM:$mask, VR128X:$src), 0, "att">;
7882  def : InstAlias<OpcodeStr##"x\t{$src, $dst {${mask}} {z}|$dst {${mask}} {z}, $src}",
7883                  (!cast<Instruction>(NAME # "Z128rrkz") VR128X:$dst,
7884                  VK2WM:$mask, VR128X:$src), 0, "att">;
7885  def : InstAlias<OpcodeStr##"x\t{${src}{1to2}, $dst|$dst, ${src}{1to2}}",
7886                  (!cast<Instruction>(NAME # "Z128rmb") VR128X:$dst,
7887                  f64mem:$src), 0, "att">;
7888  def : InstAlias<OpcodeStr##"x\t{${src}{1to2}, $dst {${mask}}|"
7889                  "$dst {${mask}}, ${src}{1to2}}",
7890                  (!cast<Instruction>(NAME # "Z128rmbk") VR128X:$dst,
7891                  VK2WM:$mask, f64mem:$src), 0, "att">;
7892  def : InstAlias<OpcodeStr##"x\t{${src}{1to2}, $dst {${mask}} {z}|"
7893                  "$dst {${mask}} {z}, ${src}{1to2}}",
7894                  (!cast<Instruction>(NAME # "Z128rmbkz") VR128X:$dst,
7895                  VK2WM:$mask, f64mem:$src), 0, "att">;
7896
7897  def : InstAlias<OpcodeStr##"y\t{$src, $dst|$dst, $src}",
7898                  (!cast<Instruction>(NAME # "Z256rr") VR128X:$dst, VR256X:$src), 0, "att">;
7899  def : InstAlias<OpcodeStr##"y\t{$src, $dst {${mask}}|$dst {${mask}}, $src}",
7900                  (!cast<Instruction>(NAME # "Z256rrk") VR128X:$dst,
7901                  VK4WM:$mask, VR256X:$src), 0, "att">;
7902  def : InstAlias<OpcodeStr##"y\t{$src, $dst {${mask}} {z}|$dst {${mask}} {z}, $src}",
7903                  (!cast<Instruction>(NAME # "Z256rrkz") VR128X:$dst,
7904                  VK4WM:$mask, VR256X:$src), 0, "att">;
7905  def : InstAlias<OpcodeStr##"y\t{${src}{1to4}, $dst|$dst, ${src}{1to4}}",
7906                  (!cast<Instruction>(NAME # "Z256rmb") VR128X:$dst,
7907                  f64mem:$src), 0, "att">;
7908  def : InstAlias<OpcodeStr##"y\t{${src}{1to4}, $dst {${mask}}|"
7909                  "$dst {${mask}}, ${src}{1to4}}",
7910                  (!cast<Instruction>(NAME # "Z256rmbk") VR128X:$dst,
7911                  VK4WM:$mask, f64mem:$src), 0, "att">;
7912  def : InstAlias<OpcodeStr##"y\t{${src}{1to4}, $dst {${mask}} {z}|"
7913                  "$dst {${mask}} {z}, ${src}{1to4}}",
7914                  (!cast<Instruction>(NAME # "Z256rmbkz") VR128X:$dst,
7915                  VK4WM:$mask, f64mem:$src), 0, "att">;
7916}
7917
7918// Convert Double to Signed/Unsigned Quardword
7919multiclass avx512_cvtpd2qq<bits<8> opc, string OpcodeStr, SDNode OpNode,
7920                           SDNode OpNodeRnd, X86SchedWriteWidths sched> {
7921  let Predicates = [HasDQI] in {
7922    defm Z : avx512_vcvt_fp<opc, OpcodeStr, v8i64_info, v8f64_info, OpNode,
7923                            sched.ZMM>,
7924             avx512_vcvt_fp_rc<opc, OpcodeStr, v8i64_info, v8f64_info,
7925                               OpNodeRnd, sched.ZMM>, EVEX_V512;
7926  }
7927  let Predicates = [HasDQI, HasVLX] in {
7928    defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v2i64x_info, v2f64x_info, OpNode,
7929                               sched.XMM>, EVEX_V128;
7930    defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v4i64x_info, v4f64x_info, OpNode,
7931                               sched.YMM>, EVEX_V256;
7932  }
7933}
7934
7935// Convert Double to Signed/Unsigned Quardword with truncation
7936multiclass avx512_cvttpd2qq<bits<8> opc, string OpcodeStr, SDNode OpNode,
7937                            SDNode OpNodeRnd, X86SchedWriteWidths sched> {
7938  let Predicates = [HasDQI] in {
7939    defm Z : avx512_vcvt_fp<opc, OpcodeStr, v8i64_info, v8f64_info, OpNode,
7940                            sched.ZMM>,
7941             avx512_vcvt_fp_sae<opc, OpcodeStr, v8i64_info, v8f64_info,
7942                                OpNodeRnd, sched.ZMM>, EVEX_V512;
7943  }
7944  let Predicates = [HasDQI, HasVLX] in {
7945    defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v2i64x_info, v2f64x_info, OpNode,
7946                               sched.XMM>, EVEX_V128;
7947    defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v4i64x_info, v4f64x_info, OpNode,
7948                               sched.YMM>, EVEX_V256;
7949  }
7950}
7951
7952// Convert Signed/Unsigned Quardword to Double
7953multiclass avx512_cvtqq2pd<bits<8> opc, string OpcodeStr, SDNode OpNode,
7954                           SDNode OpNodeRnd, X86SchedWriteWidths sched> {
7955  let Predicates = [HasDQI] in {
7956    defm Z : avx512_vcvt_fp<opc, OpcodeStr, v8f64_info, v8i64_info, OpNode,
7957                            sched.ZMM>,
7958             avx512_vcvt_fp_rc<opc, OpcodeStr, v8f64_info, v8i64_info,
7959                               OpNodeRnd, sched.ZMM>, EVEX_V512;
7960  }
7961  let Predicates = [HasDQI, HasVLX] in {
7962    defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v2f64x_info, v2i64x_info, OpNode,
7963                               sched.XMM>, EVEX_V128, NotEVEX2VEXConvertible;
7964    defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v4f64x_info, v4i64x_info, OpNode,
7965                               sched.YMM>, EVEX_V256, NotEVEX2VEXConvertible;
7966  }
7967}
7968
7969// Convert Float to Signed/Unsigned Quardword
7970multiclass avx512_cvtps2qq<bits<8> opc, string OpcodeStr, SDNode OpNode,
7971                           SDNode OpNodeRnd, X86SchedWriteWidths sched> {
7972  let Predicates = [HasDQI] in {
7973    defm Z : avx512_vcvt_fp<opc, OpcodeStr, v8i64_info, v8f32x_info, OpNode,
7974                            sched.ZMM>,
7975             avx512_vcvt_fp_rc<opc, OpcodeStr, v8i64_info, v8f32x_info,
7976                               OpNodeRnd, sched.ZMM>, EVEX_V512;
7977  }
7978  let Predicates = [HasDQI, HasVLX] in {
7979    // Explicitly specified broadcast string, since we take only 2 elements
7980    // from v4f32x_info source
7981    defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v2i64x_info, v4f32x_info, OpNode,
7982                               sched.XMM, "{1to2}", "", f64mem, VK2WM,
7983                               (v2i64 (OpNode (bc_v4f32
7984                                (v2f64
7985                                 (scalar_to_vector (loadf64 addr:$src))))))>,
7986                               EVEX_V128;
7987    defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v4i64x_info, v4f32x_info, OpNode,
7988                               sched.YMM>, EVEX_V256;
7989  }
7990}
7991
7992// Convert Float to Signed/Unsigned Quardword with truncation
7993multiclass avx512_cvttps2qq<bits<8> opc, string OpcodeStr, SDNode OpNode,
7994                            SDNode OpNodeRnd, X86SchedWriteWidths sched> {
7995  let Predicates = [HasDQI] in {
7996    defm Z : avx512_vcvt_fp<opc, OpcodeStr, v8i64_info, v8f32x_info, OpNode, sched.ZMM>,
7997             avx512_vcvt_fp_sae<opc, OpcodeStr, v8i64_info, v8f32x_info,
7998                                OpNodeRnd, sched.ZMM>, EVEX_V512;
7999  }
8000  let Predicates = [HasDQI, HasVLX] in {
8001    // Explicitly specified broadcast string, since we take only 2 elements
8002    // from v4f32x_info source
8003    defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v2i64x_info, v4f32x_info, OpNode,
8004                               sched.XMM, "{1to2}", "", f64mem, VK2WM,
8005                               (v2i64 (OpNode (bc_v4f32
8006                                (v2f64
8007                                 (scalar_to_vector (loadf64 addr:$src))))))>,
8008                               EVEX_V128;
8009    defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v4i64x_info, v4f32x_info, OpNode,
8010                               sched.YMM>, EVEX_V256;
8011  }
8012}
8013
8014// Convert Signed/Unsigned Quardword to Float
8015multiclass avx512_cvtqq2ps<bits<8> opc, string OpcodeStr, SDNode OpNode,
8016                           SDNode OpNodeRnd, X86SchedWriteWidths sched> {
8017  let Predicates = [HasDQI] in {
8018    defm Z : avx512_vcvt_fp<opc, OpcodeStr, v8f32x_info, v8i64_info, OpNode,
8019                            sched.ZMM>,
8020             avx512_vcvt_fp_rc<opc, OpcodeStr, v8f32x_info, v8i64_info,
8021                               OpNodeRnd, sched.ZMM>, EVEX_V512;
8022  }
8023  let Predicates = [HasDQI, HasVLX] in {
8024    // we need "x"/"y" suffixes in order to distinguish between 128 and 256
8025    // memory forms of these instructions in Asm Parcer. They have the same
8026    // dest type - 'v4i32x_info'. We also specify the broadcast string explicitly
8027    // due to the same reason.
8028    defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v4f32x_info, v2i64x_info, null_frag,
8029                               sched.XMM, "{1to2}", "{x}", i128mem, VK2WM>,
8030                               EVEX_V128, NotEVEX2VEXConvertible;
8031    defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v4f32x_info, v4i64x_info, OpNode,
8032                               sched.YMM, "{1to4}", "{y}">, EVEX_V256,
8033                               NotEVEX2VEXConvertible;
8034  }
8035
8036  def : InstAlias<OpcodeStr##"x\t{$src, $dst|$dst, $src}",
8037                  (!cast<Instruction>(NAME # "Z128rr") VR128X:$dst,
8038                  VR128X:$src), 0, "att">;
8039  def : InstAlias<OpcodeStr##"x\t{$src, $dst {${mask}}|$dst {${mask}}, $src}",
8040                  (!cast<Instruction>(NAME # "Z128rrk") VR128X:$dst,
8041                  VK2WM:$mask, VR128X:$src), 0, "att">;
8042  def : InstAlias<OpcodeStr##"x\t{$src, $dst {${mask}} {z}|$dst {${mask}} {z}, $src}",
8043                  (!cast<Instruction>(NAME # "Z128rrkz") VR128X:$dst,
8044                  VK2WM:$mask, VR128X:$src), 0, "att">;
8045  def : InstAlias<OpcodeStr##"x\t{${src}{1to2}, $dst|$dst, ${src}{1to2}}",
8046                  (!cast<Instruction>(NAME # "Z128rmb") VR128X:$dst,
8047                  i64mem:$src), 0, "att">;
8048  def : InstAlias<OpcodeStr##"x\t{${src}{1to2}, $dst {${mask}}|"
8049                  "$dst {${mask}}, ${src}{1to2}}",
8050                  (!cast<Instruction>(NAME # "Z128rmbk") VR128X:$dst,
8051                  VK2WM:$mask, i64mem:$src), 0, "att">;
8052  def : InstAlias<OpcodeStr##"x\t{${src}{1to2}, $dst {${mask}} {z}|"
8053                  "$dst {${mask}} {z}, ${src}{1to2}}",
8054                  (!cast<Instruction>(NAME # "Z128rmbkz") VR128X:$dst,
8055                  VK2WM:$mask, i64mem:$src), 0, "att">;
8056
8057  def : InstAlias<OpcodeStr##"y\t{$src, $dst|$dst, $src}",
8058                  (!cast<Instruction>(NAME # "Z256rr") VR128X:$dst,
8059                  VR256X:$src), 0, "att">;
8060  def : InstAlias<OpcodeStr##"y\t{$src, $dst {${mask}}|"
8061                  "$dst {${mask}}, $src}",
8062                  (!cast<Instruction>(NAME # "Z256rrk") VR128X:$dst,
8063                  VK4WM:$mask, VR256X:$src), 0, "att">;
8064  def : InstAlias<OpcodeStr##"y\t{$src, $dst {${mask}} {z}|"
8065                  "$dst {${mask}} {z}, $src}",
8066                  (!cast<Instruction>(NAME # "Z256rrkz") VR128X:$dst,
8067                  VK4WM:$mask, VR256X:$src), 0, "att">;
8068  def : InstAlias<OpcodeStr##"y\t{${src}{1to4}, $dst|$dst, ${src}{1to4}}",
8069                  (!cast<Instruction>(NAME # "Z256rmb") VR128X:$dst,
8070                  i64mem:$src), 0, "att">;
8071  def : InstAlias<OpcodeStr##"y\t{${src}{1to4}, $dst {${mask}}|"
8072                  "$dst {${mask}}, ${src}{1to4}}",
8073                  (!cast<Instruction>(NAME # "Z256rmbk") VR128X:$dst,
8074                  VK4WM:$mask, i64mem:$src), 0, "att">;
8075  def : InstAlias<OpcodeStr##"y\t{${src}{1to4}, $dst {${mask}} {z}|"
8076                  "$dst {${mask}} {z}, ${src}{1to4}}",
8077                  (!cast<Instruction>(NAME # "Z256rmbkz") VR128X:$dst,
8078                  VK4WM:$mask, i64mem:$src), 0, "att">;
8079}
8080
8081defm VCVTDQ2PD : avx512_cvtdq2pd<0xE6, "vcvtdq2pd", sint_to_fp, X86VSintToFP,
8082                                 SchedWriteCvtDQ2PD>, XS, EVEX_CD8<32, CD8VH>;
8083
8084defm VCVTDQ2PS : avx512_cvtdq2ps<0x5B, "vcvtdq2ps", sint_to_fp,
8085                                X86VSintToFpRnd, SchedWriteCvtDQ2PS>,
8086                                PS, EVEX_CD8<32, CD8VF>;
8087
8088defm VCVTTPS2DQ : avx512_cvttps2dq<0x5B, "vcvttps2dq", X86cvttp2si,
8089                                X86cvttp2siSAE, SchedWriteCvtPS2DQ>,
8090                                XS, EVEX_CD8<32, CD8VF>;
8091
8092defm VCVTTPD2DQ : avx512_cvttpd2dq<0xE6, "vcvttpd2dq", X86cvttp2si,
8093                                 X86cvttp2siSAE, SchedWriteCvtPD2DQ>,
8094                                 PD, VEX_W, EVEX_CD8<64, CD8VF>;
8095
8096defm VCVTTPS2UDQ : avx512_cvttps2dq<0x78, "vcvttps2udq", X86cvttp2ui,
8097                                 X86cvttp2uiSAE, SchedWriteCvtPS2DQ>, PS,
8098                                 EVEX_CD8<32, CD8VF>;
8099
8100defm VCVTTPD2UDQ : avx512_cvttpd2dq<0x78, "vcvttpd2udq", X86cvttp2ui,
8101                                 X86cvttp2uiSAE, SchedWriteCvtPD2DQ>,
8102                                 PS, VEX_W, EVEX_CD8<64, CD8VF>;
8103
8104defm VCVTUDQ2PD : avx512_cvtdq2pd<0x7A, "vcvtudq2pd", uint_to_fp,
8105                                  X86VUintToFP, SchedWriteCvtDQ2PD>, XS,
8106                                  EVEX_CD8<32, CD8VH>;
8107
8108defm VCVTUDQ2PS : avx512_cvtdq2ps<0x7A, "vcvtudq2ps", uint_to_fp,
8109                                 X86VUintToFpRnd, SchedWriteCvtDQ2PS>, XD,
8110                                 EVEX_CD8<32, CD8VF>;
8111
8112defm VCVTPS2DQ : avx512_cvtps2dq<0x5B, "vcvtps2dq", X86cvtp2Int,
8113                                 X86cvtp2IntRnd, SchedWriteCvtPS2DQ>, PD,
8114                                 EVEX_CD8<32, CD8VF>;
8115
8116defm VCVTPD2DQ : avx512_cvtpd2dq<0xE6, "vcvtpd2dq", X86cvtp2Int,
8117                                 X86cvtp2IntRnd, SchedWriteCvtPD2DQ>, XD,
8118                                 VEX_W, EVEX_CD8<64, CD8VF>;
8119
8120defm VCVTPS2UDQ : avx512_cvtps2dq<0x79, "vcvtps2udq", X86cvtp2UInt,
8121                                 X86cvtp2UIntRnd, SchedWriteCvtPS2DQ>,
8122                                 PS, EVEX_CD8<32, CD8VF>;
8123
8124defm VCVTPD2UDQ : avx512_cvtpd2dq<0x79, "vcvtpd2udq", X86cvtp2UInt,
8125                                 X86cvtp2UIntRnd, SchedWriteCvtPD2DQ>, VEX_W,
8126                                 PS, EVEX_CD8<64, CD8VF>;
8127
8128defm VCVTPD2QQ : avx512_cvtpd2qq<0x7B, "vcvtpd2qq", X86cvtp2Int,
8129                                 X86cvtp2IntRnd, SchedWriteCvtPD2DQ>, VEX_W,
8130                                 PD, EVEX_CD8<64, CD8VF>;
8131
8132defm VCVTPS2QQ : avx512_cvtps2qq<0x7B, "vcvtps2qq", X86cvtp2Int,
8133                                 X86cvtp2IntRnd, SchedWriteCvtPS2DQ>, PD,
8134                                 EVEX_CD8<32, CD8VH>;
8135
8136defm VCVTPD2UQQ : avx512_cvtpd2qq<0x79, "vcvtpd2uqq", X86cvtp2UInt,
8137                                 X86cvtp2UIntRnd, SchedWriteCvtPD2DQ>, VEX_W,
8138                                 PD, EVEX_CD8<64, CD8VF>;
8139
8140defm VCVTPS2UQQ : avx512_cvtps2qq<0x79, "vcvtps2uqq", X86cvtp2UInt,
8141                                 X86cvtp2UIntRnd, SchedWriteCvtPS2DQ>, PD,
8142                                 EVEX_CD8<32, CD8VH>;
8143
8144defm VCVTTPD2QQ : avx512_cvttpd2qq<0x7A, "vcvttpd2qq", X86cvttp2si,
8145                                 X86cvttp2siSAE, SchedWriteCvtPD2DQ>, VEX_W,
8146                                 PD, EVEX_CD8<64, CD8VF>;
8147
8148defm VCVTTPS2QQ : avx512_cvttps2qq<0x7A, "vcvttps2qq", X86cvttp2si,
8149                                 X86cvttp2siSAE, SchedWriteCvtPS2DQ>, PD,
8150                                 EVEX_CD8<32, CD8VH>;
8151
8152defm VCVTTPD2UQQ : avx512_cvttpd2qq<0x78, "vcvttpd2uqq", X86cvttp2ui,
8153                                 X86cvttp2uiSAE, SchedWriteCvtPD2DQ>, VEX_W,
8154                                 PD, EVEX_CD8<64, CD8VF>;
8155
8156defm VCVTTPS2UQQ : avx512_cvttps2qq<0x78, "vcvttps2uqq", X86cvttp2ui,
8157                                 X86cvttp2uiSAE, SchedWriteCvtPS2DQ>, PD,
8158                                 EVEX_CD8<32, CD8VH>;
8159
8160defm VCVTQQ2PD : avx512_cvtqq2pd<0xE6, "vcvtqq2pd", sint_to_fp,
8161                            X86VSintToFpRnd, SchedWriteCvtDQ2PD>, VEX_W, XS,
8162                            EVEX_CD8<64, CD8VF>;
8163
8164defm VCVTUQQ2PD : avx512_cvtqq2pd<0x7A, "vcvtuqq2pd", uint_to_fp,
8165                            X86VUintToFpRnd, SchedWriteCvtDQ2PD>, VEX_W, XS,
8166                            EVEX_CD8<64, CD8VF>;
8167
8168defm VCVTQQ2PS : avx512_cvtqq2ps<0x5B, "vcvtqq2ps", sint_to_fp,
8169                            X86VSintToFpRnd, SchedWriteCvtDQ2PS>, VEX_W, PS,
8170                            EVEX_CD8<64, CD8VF>;
8171
8172defm VCVTUQQ2PS : avx512_cvtqq2ps<0x7A, "vcvtuqq2ps", uint_to_fp,
8173                            X86VUintToFpRnd, SchedWriteCvtDQ2PS>, VEX_W, XD,
8174                            EVEX_CD8<64, CD8VF>;
8175
8176let Predicates = [HasVLX] in {
8177  // Special patterns to allow use of X86mcvtp2Int for masking. Instruction
8178  // patterns have been disabled with null_frag.
8179  def : Pat<(v4i32 (X86cvtp2Int (v2f64 VR128X:$src))),
8180            (VCVTPD2DQZ128rr VR128X:$src)>;
8181  def : Pat<(X86mcvtp2Int (v2f64 VR128X:$src), (v4i32 VR128X:$src0),
8182                          VK2WM:$mask),
8183            (VCVTPD2DQZ128rrk VR128X:$src0, VK2WM:$mask, VR128X:$src)>;
8184  def : Pat<(X86mcvtp2Int (v2f64 VR128X:$src), v4i32x_info.ImmAllZerosV,
8185                          VK2WM:$mask),
8186            (VCVTPD2DQZ128rrkz VK2WM:$mask, VR128X:$src)>;
8187
8188  def : Pat<(v4i32 (X86cvtp2Int (loadv2f64 addr:$src))),
8189            (VCVTPD2DQZ128rm addr:$src)>;
8190  def : Pat<(X86mcvtp2Int (loadv2f64 addr:$src), (v4i32 VR128X:$src0),
8191                          VK2WM:$mask),
8192            (VCVTPD2DQZ128rmk VR128X:$src0, VK2WM:$mask, addr:$src)>;
8193  def : Pat<(X86mcvtp2Int (loadv2f64 addr:$src), v4i32x_info.ImmAllZerosV,
8194                          VK2WM:$mask),
8195            (VCVTPD2DQZ128rmkz VK2WM:$mask, addr:$src)>;
8196
8197  def : Pat<(v4i32 (X86cvtp2Int (v2f64 (X86VBroadcast (loadf64 addr:$src))))),
8198            (VCVTPD2DQZ128rmb addr:$src)>;
8199  def : Pat<(X86mcvtp2Int (v2f64 (X86VBroadcast (loadf64 addr:$src))),
8200                          (v4i32 VR128X:$src0), VK2WM:$mask),
8201            (VCVTPD2DQZ128rmbk VR128X:$src0, VK2WM:$mask, addr:$src)>;
8202  def : Pat<(X86mcvtp2Int (v2f64 (X86VBroadcast (loadf64 addr:$src))),
8203                          v4i32x_info.ImmAllZerosV, VK2WM:$mask),
8204            (VCVTPD2DQZ128rmbkz VK2WM:$mask, addr:$src)>;
8205
8206  // Special patterns to allow use of X86mcvttp2si for masking. Instruction
8207  // patterns have been disabled with null_frag.
8208  def : Pat<(v4i32 (X86cvttp2si (v2f64 VR128X:$src))),
8209            (VCVTTPD2DQZ128rr VR128X:$src)>;
8210  def : Pat<(X86mcvttp2si (v2f64 VR128X:$src), (v4i32 VR128X:$src0),
8211                          VK2WM:$mask),
8212            (VCVTTPD2DQZ128rrk VR128X:$src0, VK2WM:$mask, VR128X:$src)>;
8213  def : Pat<(X86mcvttp2si (v2f64 VR128X:$src), v4i32x_info.ImmAllZerosV,
8214                          VK2WM:$mask),
8215            (VCVTTPD2DQZ128rrkz VK2WM:$mask, VR128X:$src)>;
8216
8217  def : Pat<(v4i32 (X86cvttp2si (loadv2f64 addr:$src))),
8218            (VCVTTPD2DQZ128rm addr:$src)>;
8219  def : Pat<(X86mcvttp2si (loadv2f64 addr:$src), (v4i32 VR128X:$src0),
8220                          VK2WM:$mask),
8221            (VCVTTPD2DQZ128rmk VR128X:$src0, VK2WM:$mask, addr:$src)>;
8222  def : Pat<(X86mcvttp2si (loadv2f64 addr:$src), v4i32x_info.ImmAllZerosV,
8223                          VK2WM:$mask),
8224            (VCVTTPD2DQZ128rmkz VK2WM:$mask, addr:$src)>;
8225
8226  def : Pat<(v4i32 (X86cvttp2si (v2f64 (X86VBroadcast (loadf64 addr:$src))))),
8227            (VCVTTPD2DQZ128rmb addr:$src)>;
8228  def : Pat<(X86mcvttp2si (v2f64 (X86VBroadcast (loadf64 addr:$src))),
8229                          (v4i32 VR128X:$src0), VK2WM:$mask),
8230            (VCVTTPD2DQZ128rmbk VR128X:$src0, VK2WM:$mask, addr:$src)>;
8231  def : Pat<(X86mcvttp2si (v2f64 (X86VBroadcast (loadf64 addr:$src))),
8232                          v4i32x_info.ImmAllZerosV, VK2WM:$mask),
8233            (VCVTTPD2DQZ128rmbkz VK2WM:$mask, addr:$src)>;
8234
8235  // Special patterns to allow use of X86mcvtp2UInt for masking. Instruction
8236  // patterns have been disabled with null_frag.
8237  def : Pat<(v4i32 (X86cvtp2UInt (v2f64 VR128X:$src))),
8238            (VCVTPD2UDQZ128rr VR128X:$src)>;
8239  def : Pat<(X86mcvtp2UInt (v2f64 VR128X:$src), (v4i32 VR128X:$src0),
8240                           VK2WM:$mask),
8241            (VCVTPD2UDQZ128rrk VR128X:$src0, VK2WM:$mask, VR128X:$src)>;
8242  def : Pat<(X86mcvtp2UInt (v2f64 VR128X:$src), v4i32x_info.ImmAllZerosV,
8243                           VK2WM:$mask),
8244            (VCVTPD2UDQZ128rrkz VK2WM:$mask, VR128X:$src)>;
8245
8246  def : Pat<(v4i32 (X86cvtp2UInt (loadv2f64 addr:$src))),
8247            (VCVTPD2UDQZ128rm addr:$src)>;
8248  def : Pat<(X86mcvtp2UInt (loadv2f64 addr:$src), (v4i32 VR128X:$src0),
8249                           VK2WM:$mask),
8250            (VCVTPD2UDQZ128rmk VR128X:$src0, VK2WM:$mask, addr:$src)>;
8251  def : Pat<(X86mcvtp2UInt (loadv2f64 addr:$src), v4i32x_info.ImmAllZerosV,
8252                           VK2WM:$mask),
8253            (VCVTPD2UDQZ128rmkz VK2WM:$mask, addr:$src)>;
8254
8255  def : Pat<(v4i32 (X86cvtp2UInt (v2f64 (X86VBroadcast (loadf64 addr:$src))))),
8256            (VCVTPD2UDQZ128rmb addr:$src)>;
8257  def : Pat<(X86mcvtp2UInt (v2f64 (X86VBroadcast (loadf64 addr:$src))),
8258                           (v4i32 VR128X:$src0), VK2WM:$mask),
8259            (VCVTPD2UDQZ128rmbk VR128X:$src0, VK2WM:$mask, addr:$src)>;
8260  def : Pat<(X86mcvtp2UInt (v2f64 (X86VBroadcast (loadf64 addr:$src))),
8261                           v4i32x_info.ImmAllZerosV, VK2WM:$mask),
8262            (VCVTPD2UDQZ128rmbkz VK2WM:$mask, addr:$src)>;
8263
8264  // Special patterns to allow use of X86mcvtp2UInt for masking. Instruction
8265  // patterns have been disabled with null_frag.
8266  def : Pat<(v4i32 (X86cvttp2ui (v2f64 VR128X:$src))),
8267            (VCVTTPD2UDQZ128rr VR128X:$src)>;
8268  def : Pat<(X86mcvttp2ui (v2f64 VR128X:$src), (v4i32 VR128X:$src0),
8269                          VK2WM:$mask),
8270            (VCVTTPD2UDQZ128rrk VR128X:$src0, VK2WM:$mask, VR128X:$src)>;
8271  def : Pat<(X86mcvttp2ui (v2f64 VR128X:$src), v4i32x_info.ImmAllZerosV,
8272                          VK2WM:$mask),
8273            (VCVTTPD2UDQZ128rrkz VK2WM:$mask, VR128X:$src)>;
8274
8275  def : Pat<(v4i32 (X86cvttp2ui (loadv2f64 addr:$src))),
8276            (VCVTTPD2UDQZ128rm addr:$src)>;
8277  def : Pat<(X86mcvttp2ui (loadv2f64 addr:$src), (v4i32 VR128X:$src0),
8278                          VK2WM:$mask),
8279            (VCVTTPD2UDQZ128rmk VR128X:$src0, VK2WM:$mask, addr:$src)>;
8280  def : Pat<(X86mcvttp2ui (loadv2f64 addr:$src), v4i32x_info.ImmAllZerosV,
8281                          VK2WM:$mask),
8282            (VCVTTPD2UDQZ128rmkz VK2WM:$mask, addr:$src)>;
8283
8284  def : Pat<(v4i32 (X86cvttp2ui (v2f64 (X86VBroadcast (loadf64 addr:$src))))),
8285            (VCVTTPD2UDQZ128rmb addr:$src)>;
8286  def : Pat<(X86mcvttp2ui (v2f64 (X86VBroadcast (loadf64 addr:$src))),
8287                          (v4i32 VR128X:$src0), VK2WM:$mask),
8288            (VCVTTPD2UDQZ128rmbk VR128X:$src0, VK2WM:$mask, addr:$src)>;
8289  def : Pat<(X86mcvttp2ui (v2f64 (X86VBroadcast (loadf64 addr:$src))),
8290                          v4i32x_info.ImmAllZerosV, VK2WM:$mask),
8291            (VCVTTPD2UDQZ128rmbkz VK2WM:$mask, addr:$src)>;
8292}
8293
8294let Predicates = [HasDQI, HasVLX] in {
8295  def : Pat<(v2i64 (X86cvtp2Int (bc_v4f32 (v2f64 (X86vzload64 addr:$src))))),
8296            (VCVTPS2QQZ128rm addr:$src)>;
8297  def : Pat<(v2i64 (vselect VK2WM:$mask,
8298                            (X86cvtp2Int (bc_v4f32 (v2f64 (X86vzload64 addr:$src)))),
8299                            VR128X:$src0)),
8300            (VCVTPS2QQZ128rmk VR128X:$src0, VK2WM:$mask, addr:$src)>;
8301  def : Pat<(v2i64 (vselect VK2WM:$mask,
8302                            (X86cvtp2Int (bc_v4f32 (v2f64 (X86vzload64 addr:$src)))),
8303                            v2i64x_info.ImmAllZerosV)),
8304            (VCVTPS2QQZ128rmkz VK2WM:$mask, addr:$src)>;
8305
8306  def : Pat<(v2i64 (X86cvtp2UInt (bc_v4f32 (v2f64 (X86vzload64 addr:$src))))),
8307            (VCVTPS2UQQZ128rm addr:$src)>;
8308  def : Pat<(v2i64 (vselect VK2WM:$mask,
8309                            (X86cvtp2UInt (bc_v4f32 (v2f64 (X86vzload64 addr:$src)))),
8310                            VR128X:$src0)),
8311            (VCVTPS2UQQZ128rmk VR128X:$src0, VK2WM:$mask, addr:$src)>;
8312  def : Pat<(v2i64 (vselect VK2WM:$mask,
8313                            (X86cvtp2UInt (bc_v4f32 (v2f64 (X86vzload64 addr:$src)))),
8314                            v2i64x_info.ImmAllZerosV)),
8315            (VCVTPS2UQQZ128rmkz VK2WM:$mask, addr:$src)>;
8316
8317  def : Pat<(v2i64 (X86cvttp2si (bc_v4f32 (v2f64 (X86vzload64 addr:$src))))),
8318            (VCVTTPS2QQZ128rm addr:$src)>;
8319  def : Pat<(v2i64 (vselect VK2WM:$mask,
8320                            (X86cvttp2si (bc_v4f32 (v2f64 (X86vzload64 addr:$src)))),
8321                            VR128X:$src0)),
8322            (VCVTTPS2QQZ128rmk VR128X:$src0, VK2WM:$mask, addr:$src)>;
8323  def : Pat<(v2i64 (vselect VK2WM:$mask,
8324                            (X86cvttp2si (bc_v4f32 (v2f64 (X86vzload64 addr:$src)))),
8325                            v2i64x_info.ImmAllZerosV)),
8326            (VCVTTPS2QQZ128rmkz VK2WM:$mask, addr:$src)>;
8327
8328  def : Pat<(v2i64 (X86cvttp2ui (bc_v4f32 (v2f64 (X86vzload64 addr:$src))))),
8329            (VCVTTPS2UQQZ128rm addr:$src)>;
8330  def : Pat<(v2i64 (vselect VK2WM:$mask,
8331                            (X86cvttp2ui (bc_v4f32 (v2f64 (X86vzload64 addr:$src)))),
8332                            VR128X:$src0)),
8333            (VCVTTPS2UQQZ128rmk VR128X:$src0, VK2WM:$mask, addr:$src)>;
8334  def : Pat<(v2i64 (vselect VK2WM:$mask,
8335                            (X86cvttp2ui (bc_v4f32 (v2f64 (X86vzload64 addr:$src)))),
8336                            v2i64x_info.ImmAllZerosV)),
8337            (VCVTTPS2UQQZ128rmkz VK2WM:$mask, addr:$src)>;
8338}
8339
8340let Predicates = [HasAVX512, NoVLX] in {
8341def : Pat<(v8i32 (X86cvttp2ui (v8f32 VR256X:$src1))),
8342          (EXTRACT_SUBREG (v16i32 (VCVTTPS2UDQZrr
8343           (v16f32 (INSERT_SUBREG (IMPLICIT_DEF),
8344                                  VR256X:$src1, sub_ymm)))), sub_ymm)>;
8345
8346def : Pat<(v4i32 (X86cvttp2ui (v4f32 VR128X:$src1))),
8347          (EXTRACT_SUBREG (v16i32 (VCVTTPS2UDQZrr
8348           (v16f32 (INSERT_SUBREG (IMPLICIT_DEF),
8349                                  VR128X:$src1, sub_xmm)))), sub_xmm)>;
8350
8351def : Pat<(v4i32 (X86cvttp2ui (v4f64 VR256X:$src1))),
8352          (EXTRACT_SUBREG (v8i32 (VCVTTPD2UDQZrr
8353           (v8f64 (INSERT_SUBREG (IMPLICIT_DEF),
8354                                 VR256X:$src1, sub_ymm)))), sub_xmm)>;
8355
8356def : Pat<(v8f32 (uint_to_fp (v8i32 VR256X:$src1))),
8357          (EXTRACT_SUBREG (v16f32 (VCVTUDQ2PSZrr
8358           (v16i32 (INSERT_SUBREG (IMPLICIT_DEF),
8359                                  VR256X:$src1, sub_ymm)))), sub_ymm)>;
8360
8361def : Pat<(v4f32 (uint_to_fp (v4i32 VR128X:$src1))),
8362          (EXTRACT_SUBREG (v16f32 (VCVTUDQ2PSZrr
8363           (v16i32 (INSERT_SUBREG (IMPLICIT_DEF),
8364                                  VR128X:$src1, sub_xmm)))), sub_xmm)>;
8365
8366def : Pat<(v4f64 (uint_to_fp (v4i32 VR128X:$src1))),
8367          (EXTRACT_SUBREG (v8f64 (VCVTUDQ2PDZrr
8368           (v8i32 (INSERT_SUBREG (IMPLICIT_DEF),
8369                                 VR128X:$src1, sub_xmm)))), sub_ymm)>;
8370
8371def : Pat<(v2f64 (X86VUintToFP (v4i32 VR128X:$src1))),
8372          (EXTRACT_SUBREG (v8f64 (VCVTUDQ2PDZrr
8373           (v8i32 (INSERT_SUBREG (IMPLICIT_DEF),
8374                                 VR128X:$src1, sub_xmm)))), sub_xmm)>;
8375}
8376
8377let Predicates = [HasVLX] in {
8378  def : Pat<(v2f64 (X86VSintToFP (bc_v4i32 (v2i64 (X86vzload64 addr:$src))))),
8379            (VCVTDQ2PDZ128rm addr:$src)>;
8380  def : Pat<(v2f64 (vselect VK2WM:$mask,
8381                            (X86VSintToFP (bc_v4i32 (v2i64 (X86vzload64 addr:$src)))),
8382                            VR128X:$src0)),
8383            (VCVTDQ2PDZ128rmk VR128X:$src0, VK2WM:$mask, addr:$src)>;
8384  def : Pat<(v2f64 (vselect VK2WM:$mask,
8385                            (X86VSintToFP (bc_v4i32 (v2i64 (X86vzload64 addr:$src)))),
8386                            v2f64x_info.ImmAllZerosV)),
8387            (VCVTDQ2PDZ128rmkz VK2WM:$mask, addr:$src)>;
8388
8389  def : Pat<(v2f64 (X86VUintToFP (bc_v4i32 (v2i64 (X86vzload64 addr:$src))))),
8390            (VCVTUDQ2PDZ128rm addr:$src)>;
8391  def : Pat<(v2f64 (vselect VK2WM:$mask,
8392                            (X86VUintToFP (bc_v4i32 (v2i64 (X86vzload64 addr:$src)))),
8393                            VR128X:$src0)),
8394            (VCVTUDQ2PDZ128rmk VR128X:$src0, VK2WM:$mask, addr:$src)>;
8395  def : Pat<(v2f64 (vselect VK2WM:$mask,
8396                            (X86VUintToFP (bc_v4i32 (v2i64 (X86vzload64 addr:$src)))),
8397                            v2f64x_info.ImmAllZerosV)),
8398            (VCVTUDQ2PDZ128rmkz VK2WM:$mask, addr:$src)>;
8399}
8400
8401let Predicates = [HasDQI, HasVLX] in {
8402  // Special patterns to allow use of X86VMSintToFP for masking. Instruction
8403  // patterns have been disabled with null_frag.
8404  def : Pat<(v4f32 (X86VSintToFP (v2i64 VR128X:$src))),
8405            (VCVTQQ2PSZ128rr VR128X:$src)>;
8406  def : Pat<(X86VMSintToFP (v2i64 VR128X:$src), (v4f32 VR128X:$src0),
8407                           VK2WM:$mask),
8408            (VCVTQQ2PSZ128rrk VR128X:$src0, VK2WM:$mask, VR128X:$src)>;
8409  def : Pat<(X86VMSintToFP (v2i64 VR128X:$src), v4f32x_info.ImmAllZerosV,
8410                           VK2WM:$mask),
8411            (VCVTQQ2PSZ128rrkz VK2WM:$mask, VR128X:$src)>;
8412
8413  def : Pat<(v4f32 (X86VSintToFP (loadv2i64 addr:$src))),
8414            (VCVTQQ2PSZ128rm addr:$src)>;
8415  def : Pat<(X86VMSintToFP (loadv2i64 addr:$src), (v4f32 VR128X:$src0),
8416                           VK2WM:$mask),
8417            (VCVTQQ2PSZ128rmk VR128X:$src0, VK2WM:$mask, addr:$src)>;
8418  def : Pat<(X86VMSintToFP (loadv2i64 addr:$src), v4f32x_info.ImmAllZerosV,
8419                           VK2WM:$mask),
8420            (VCVTQQ2PSZ128rmkz VK2WM:$mask, addr:$src)>;
8421
8422  def : Pat<(v4f32 (X86VSintToFP (v2i64 (X86VBroadcast (loadi64 addr:$src))))),
8423            (VCVTQQ2PSZ128rmb addr:$src)>;
8424  def : Pat<(X86VMSintToFP (v2i64 (X86VBroadcast (loadi64 addr:$src))),
8425                           (v4f32 VR128X:$src0), VK2WM:$mask),
8426            (VCVTQQ2PSZ128rmbk VR128X:$src0, VK2WM:$mask, addr:$src)>;
8427  def : Pat<(X86VMSintToFP (v2i64 (X86VBroadcast (loadi64 addr:$src))),
8428                           v4f32x_info.ImmAllZerosV, VK2WM:$mask),
8429            (VCVTQQ2PSZ128rmbkz VK2WM:$mask, addr:$src)>;
8430
8431  // Special patterns to allow use of X86VMUintToFP for masking. Instruction
8432  // patterns have been disabled with null_frag.
8433  def : Pat<(v4f32 (X86VUintToFP (v2i64 VR128X:$src))),
8434            (VCVTUQQ2PSZ128rr VR128X:$src)>;
8435  def : Pat<(X86VMUintToFP (v2i64 VR128X:$src), (v4f32 VR128X:$src0),
8436                           VK2WM:$mask),
8437            (VCVTUQQ2PSZ128rrk VR128X:$src0, VK2WM:$mask, VR128X:$src)>;
8438  def : Pat<(X86VMUintToFP (v2i64 VR128X:$src), v4f32x_info.ImmAllZerosV,
8439                           VK2WM:$mask),
8440            (VCVTUQQ2PSZ128rrkz VK2WM:$mask, VR128X:$src)>;
8441
8442  def : Pat<(v4f32 (X86VUintToFP (loadv2i64 addr:$src))),
8443            (VCVTUQQ2PSZ128rm addr:$src)>;
8444  def : Pat<(X86VMUintToFP (loadv2i64 addr:$src), (v4f32 VR128X:$src0),
8445                           VK2WM:$mask),
8446            (VCVTUQQ2PSZ128rmk VR128X:$src0, VK2WM:$mask, addr:$src)>;
8447  def : Pat<(X86VMUintToFP (loadv2i64 addr:$src), v4f32x_info.ImmAllZerosV,
8448                           VK2WM:$mask),
8449            (VCVTUQQ2PSZ128rmkz VK2WM:$mask, addr:$src)>;
8450
8451  def : Pat<(v4f32 (X86VUintToFP (v2i64 (X86VBroadcast (loadi64 addr:$src))))),
8452            (VCVTUQQ2PSZ128rmb addr:$src)>;
8453  def : Pat<(X86VMUintToFP (v2i64 (X86VBroadcast (loadi64 addr:$src))),
8454                           (v4f32 VR128X:$src0), VK2WM:$mask),
8455            (VCVTUQQ2PSZ128rmbk VR128X:$src0, VK2WM:$mask, addr:$src)>;
8456  def : Pat<(X86VMUintToFP (v2i64 (X86VBroadcast (loadi64 addr:$src))),
8457                           v4f32x_info.ImmAllZerosV, VK2WM:$mask),
8458            (VCVTUQQ2PSZ128rmbkz VK2WM:$mask, addr:$src)>;
8459}
8460
8461let Predicates = [HasDQI, NoVLX] in {
8462def : Pat<(v2i64 (X86cvttp2si (v2f64 VR128X:$src1))),
8463          (EXTRACT_SUBREG (v8i64 (VCVTTPD2QQZrr
8464           (v8f64 (INSERT_SUBREG (IMPLICIT_DEF),
8465                                  VR128X:$src1, sub_xmm)))), sub_xmm)>;
8466
8467def : Pat<(v4i64 (X86cvttp2si (v4f32 VR128X:$src1))),
8468          (EXTRACT_SUBREG (v8i64 (VCVTTPS2QQZrr
8469           (v8f32 (INSERT_SUBREG (IMPLICIT_DEF),
8470                                  VR128X:$src1, sub_xmm)))), sub_ymm)>;
8471
8472def : Pat<(v4i64 (X86cvttp2si (v4f64 VR256X:$src1))),
8473          (EXTRACT_SUBREG (v8i64 (VCVTTPD2QQZrr
8474           (v8f64 (INSERT_SUBREG (IMPLICIT_DEF),
8475                                  VR256X:$src1, sub_ymm)))), sub_ymm)>;
8476
8477def : Pat<(v2i64 (X86cvttp2ui (v2f64 VR128X:$src1))),
8478          (EXTRACT_SUBREG (v8i64 (VCVTTPD2UQQZrr
8479           (v8f64 (INSERT_SUBREG (IMPLICIT_DEF),
8480                                  VR128X:$src1, sub_xmm)))), sub_xmm)>;
8481
8482def : Pat<(v4i64 (X86cvttp2ui (v4f32 VR128X:$src1))),
8483          (EXTRACT_SUBREG (v8i64 (VCVTTPS2UQQZrr
8484           (v8f32 (INSERT_SUBREG (IMPLICIT_DEF),
8485                                  VR128X:$src1, sub_xmm)))), sub_ymm)>;
8486
8487def : Pat<(v4i64 (X86cvttp2ui (v4f64 VR256X:$src1))),
8488          (EXTRACT_SUBREG (v8i64 (VCVTTPD2UQQZrr
8489           (v8f64 (INSERT_SUBREG (IMPLICIT_DEF),
8490                                  VR256X:$src1, sub_ymm)))), sub_ymm)>;
8491
8492def : Pat<(v4f32 (sint_to_fp (v4i64 VR256X:$src1))),
8493          (EXTRACT_SUBREG (v8f32 (VCVTQQ2PSZrr
8494           (v8i64 (INSERT_SUBREG (IMPLICIT_DEF),
8495                                  VR256X:$src1, sub_ymm)))), sub_xmm)>;
8496
8497def : Pat<(v2f64 (sint_to_fp (v2i64 VR128X:$src1))),
8498          (EXTRACT_SUBREG (v8f64 (VCVTQQ2PDZrr
8499           (v8i64 (INSERT_SUBREG (IMPLICIT_DEF),
8500                                  VR128X:$src1, sub_xmm)))), sub_xmm)>;
8501
8502def : Pat<(v4f64 (sint_to_fp (v4i64 VR256X:$src1))),
8503          (EXTRACT_SUBREG (v8f64 (VCVTQQ2PDZrr
8504           (v8i64 (INSERT_SUBREG (IMPLICIT_DEF),
8505                                  VR256X:$src1, sub_ymm)))), sub_ymm)>;
8506
8507def : Pat<(v4f32 (uint_to_fp (v4i64 VR256X:$src1))),
8508          (EXTRACT_SUBREG (v8f32 (VCVTUQQ2PSZrr
8509           (v8i64 (INSERT_SUBREG (IMPLICIT_DEF),
8510                                  VR256X:$src1, sub_ymm)))), sub_xmm)>;
8511
8512def : Pat<(v2f64 (uint_to_fp (v2i64 VR128X:$src1))),
8513          (EXTRACT_SUBREG (v8f64 (VCVTUQQ2PDZrr
8514           (v8i64 (INSERT_SUBREG (IMPLICIT_DEF),
8515                                  VR128X:$src1, sub_xmm)))), sub_xmm)>;
8516
8517def : Pat<(v4f64 (uint_to_fp (v4i64 VR256X:$src1))),
8518          (EXTRACT_SUBREG (v8f64 (VCVTUQQ2PDZrr
8519           (v8i64 (INSERT_SUBREG (IMPLICIT_DEF),
8520                                  VR256X:$src1, sub_ymm)))), sub_ymm)>;
8521}
8522
8523//===----------------------------------------------------------------------===//
8524// Half precision conversion instructions
8525//===----------------------------------------------------------------------===//
8526
8527multiclass avx512_cvtph2ps<X86VectorVTInfo _dest, X86VectorVTInfo _src,
8528                           X86MemOperand x86memop, PatFrag ld_frag,
8529                           X86FoldableSchedWrite sched> {
8530  defm rr : AVX512_maskable<0x13, MRMSrcReg, _dest ,(outs _dest.RC:$dst),
8531                            (ins _src.RC:$src), "vcvtph2ps", "$src", "$src",
8532                            (X86cvtph2ps (_src.VT _src.RC:$src))>,
8533                            T8PD, Sched<[sched]>;
8534  defm rm : AVX512_maskable<0x13, MRMSrcMem, _dest, (outs _dest.RC:$dst),
8535                            (ins x86memop:$src), "vcvtph2ps", "$src", "$src",
8536                            (X86cvtph2ps (_src.VT
8537                                          (ld_frag addr:$src)))>,
8538                            T8PD, Sched<[sched.Folded]>;
8539}
8540
8541multiclass avx512_cvtph2ps_sae<X86VectorVTInfo _dest, X86VectorVTInfo _src,
8542                               X86FoldableSchedWrite sched> {
8543  defm rrb : AVX512_maskable<0x13, MRMSrcReg, _dest, (outs _dest.RC:$dst),
8544                             (ins _src.RC:$src), "vcvtph2ps",
8545                             "{sae}, $src", "$src, {sae}",
8546                             (X86cvtph2psSAE (_src.VT _src.RC:$src))>,
8547                             T8PD, EVEX_B, Sched<[sched]>;
8548}
8549
8550let Predicates = [HasAVX512] in
8551  defm VCVTPH2PSZ : avx512_cvtph2ps<v16f32_info, v16i16x_info, f256mem, load,
8552                                    WriteCvtPH2PSZ>,
8553                    avx512_cvtph2ps_sae<v16f32_info, v16i16x_info, WriteCvtPH2PSZ>,
8554                    EVEX, EVEX_V512, EVEX_CD8<32, CD8VH>;
8555
8556let Predicates = [HasVLX] in {
8557  defm VCVTPH2PSZ256 : avx512_cvtph2ps<v8f32x_info, v8i16x_info, f128mem,
8558                       load, WriteCvtPH2PSY>, EVEX, EVEX_V256,
8559                       EVEX_CD8<32, CD8VH>;
8560  defm VCVTPH2PSZ128 : avx512_cvtph2ps<v4f32x_info, v8i16x_info, f64mem,
8561                       load, WriteCvtPH2PS>, EVEX, EVEX_V128,
8562                       EVEX_CD8<32, CD8VH>;
8563
8564  // Pattern match vcvtph2ps of a scalar i64 load.
8565  def : Pat<(v4f32 (X86cvtph2ps (bc_v8i16 (v2i64 (X86vzload64 addr:$src))))),
8566            (VCVTPH2PSZ128rm addr:$src)>;
8567  def : Pat<(v4f32 (X86cvtph2ps (v8i16 (bitconvert
8568              (v2i64 (scalar_to_vector (loadi64 addr:$src))))))),
8569            (VCVTPH2PSZ128rm addr:$src)>;
8570}
8571
8572multiclass avx512_cvtps2ph<X86VectorVTInfo _dest, X86VectorVTInfo _src,
8573                           X86MemOperand x86memop, SchedWrite RR, SchedWrite MR> {
8574let ExeDomain = GenericDomain in {
8575  def rr : AVX512AIi8<0x1D, MRMDestReg, (outs _dest.RC:$dst),
8576             (ins _src.RC:$src1, i32u8imm:$src2),
8577             "vcvtps2ph\t{$src2, $src1, $dst|$dst, $src1, $src2}",
8578             [(set _dest.RC:$dst,
8579                   (X86cvtps2ph (_src.VT _src.RC:$src1), (i32 imm:$src2)))]>,
8580             Sched<[RR]>;
8581  let Constraints = "$src0 = $dst" in
8582  def rrk : AVX512AIi8<0x1D, MRMDestReg, (outs _dest.RC:$dst),
8583             (ins _dest.RC:$src0, _src.KRCWM:$mask, _src.RC:$src1, i32u8imm:$src2),
8584             "vcvtps2ph\t{$src2, $src1, $dst {${mask}}|$dst {${mask}}, $src1, $src2}",
8585             [(set _dest.RC:$dst,
8586                   (X86mcvtps2ph (_src.VT _src.RC:$src1), (i32 imm:$src2),
8587                                 _dest.RC:$src0, _src.KRCWM:$mask))]>,
8588             Sched<[RR]>, EVEX_K;
8589  def rrkz : AVX512AIi8<0x1D, MRMDestReg, (outs _dest.RC:$dst),
8590             (ins _src.KRCWM:$mask, _src.RC:$src1, i32u8imm:$src2),
8591             "vcvtps2ph\t{$src2, $src1, $dst {${mask}} {z}|$dst {${mask}} {z}, $src1, $src2}",
8592             [(set _dest.RC:$dst,
8593                   (X86mcvtps2ph (_src.VT _src.RC:$src1), (i32 imm:$src2),
8594                                 _dest.ImmAllZerosV, _src.KRCWM:$mask))]>,
8595             Sched<[RR]>, EVEX_KZ;
8596  let hasSideEffects = 0, mayStore = 1 in {
8597    def mr : AVX512AIi8<0x1D, MRMDestMem, (outs),
8598               (ins x86memop:$dst, _src.RC:$src1, i32u8imm:$src2),
8599               "vcvtps2ph\t{$src2, $src1, $dst|$dst, $src1, $src2}", []>,
8600               Sched<[MR]>;
8601    def mrk : AVX512AIi8<0x1D, MRMDestMem, (outs),
8602               (ins x86memop:$dst, _dest.KRCWM:$mask, _src.RC:$src1, i32u8imm:$src2),
8603               "vcvtps2ph\t{$src2, $src1, $dst {${mask}}|$dst {${mask}}, $src1, $src2}", []>,
8604                EVEX_K, Sched<[MR]>, NotMemoryFoldable;
8605  }
8606}
8607}
8608
8609multiclass avx512_cvtps2ph_sae<X86VectorVTInfo _dest, X86VectorVTInfo _src,
8610                               SchedWrite Sched> {
8611  let hasSideEffects = 0 in
8612  defm rrb : AVX512_maskable_in_asm<0x1D, MRMDestReg, _dest,
8613                   (outs _dest.RC:$dst),
8614                   (ins _src.RC:$src1, i32u8imm:$src2),
8615                   "vcvtps2ph", "$src2, {sae}, $src1", "$src1, {sae}, $src2", []>,
8616                   EVEX_B, AVX512AIi8Base, Sched<[Sched]>;
8617}
8618
8619let Predicates = [HasAVX512] in {
8620  defm VCVTPS2PHZ : avx512_cvtps2ph<v16i16x_info, v16f32_info, f256mem,
8621                                    WriteCvtPS2PHZ, WriteCvtPS2PHZSt>,
8622                    avx512_cvtps2ph_sae<v16i16x_info, v16f32_info, WriteCvtPS2PHZ>,
8623                                        EVEX, EVEX_V512, EVEX_CD8<32, CD8VH>;
8624  let Predicates = [HasVLX] in {
8625    defm VCVTPS2PHZ256 : avx512_cvtps2ph<v8i16x_info, v8f32x_info, f128mem,
8626                                         WriteCvtPS2PHY, WriteCvtPS2PHYSt>,
8627                                         EVEX, EVEX_V256, EVEX_CD8<32, CD8VH>;
8628    defm VCVTPS2PHZ128 : avx512_cvtps2ph<v8i16x_info, v4f32x_info, f64mem,
8629                                         WriteCvtPS2PH, WriteCvtPS2PHSt>,
8630                                         EVEX, EVEX_V128, EVEX_CD8<32, CD8VH>;
8631  }
8632
8633  def : Pat<(store (f64 (extractelt
8634                         (bc_v2f64 (v8i16 (X86cvtps2ph VR128X:$src1, i32:$src2))),
8635                         (iPTR 0))), addr:$dst),
8636            (VCVTPS2PHZ128mr addr:$dst, VR128X:$src1, imm:$src2)>;
8637  def : Pat<(store (i64 (extractelt
8638                         (bc_v2i64 (v8i16 (X86cvtps2ph VR128X:$src1, i32:$src2))),
8639                         (iPTR 0))), addr:$dst),
8640            (VCVTPS2PHZ128mr addr:$dst, VR128X:$src1, imm:$src2)>;
8641  def : Pat<(store (v8i16 (X86cvtps2ph VR256X:$src1, i32:$src2)), addr:$dst),
8642            (VCVTPS2PHZ256mr addr:$dst, VR256X:$src1, imm:$src2)>;
8643  def : Pat<(store (v16i16 (X86cvtps2ph VR512:$src1, i32:$src2)), addr:$dst),
8644            (VCVTPS2PHZmr addr:$dst, VR512:$src1, imm:$src2)>;
8645}
8646
8647// Patterns for matching conversions from float to half-float and vice versa.
8648let Predicates = [HasVLX] in {
8649  // Use MXCSR.RC for rounding instead of explicitly specifying the default
8650  // rounding mode (Nearest-Even, encoded as 0). Both are equivalent in the
8651  // configurations we support (the default). However, falling back to MXCSR is
8652  // more consistent with other instructions, which are always controlled by it.
8653  // It's encoded as 0b100.
8654  def : Pat<(fp_to_f16 FR32X:$src),
8655            (i16 (EXTRACT_SUBREG (VMOVPDI2DIZrr (v8i16 (VCVTPS2PHZ128rr
8656              (v4f32 (COPY_TO_REGCLASS FR32X:$src, VR128X)), 4))), sub_16bit))>;
8657
8658  def : Pat<(f16_to_fp GR16:$src),
8659            (f32 (COPY_TO_REGCLASS (v4f32 (VCVTPH2PSZ128rr
8660              (v8i16 (COPY_TO_REGCLASS (MOVSX32rr16 GR16:$src), VR128X)))), FR32X)) >;
8661
8662  def : Pat<(f16_to_fp (i16 (fp_to_f16 FR32X:$src))),
8663            (f32 (COPY_TO_REGCLASS (v4f32 (VCVTPH2PSZ128rr
8664              (v8i16 (VCVTPS2PHZ128rr
8665               (v4f32 (COPY_TO_REGCLASS FR32X:$src, VR128X)), 4)))), FR32X)) >;
8666}
8667
8668//  Unordered/Ordered scalar fp compare with Sae and set EFLAGS
8669multiclass avx512_ord_cmp_sae<bits<8> opc, X86VectorVTInfo _,
8670                            string OpcodeStr, X86FoldableSchedWrite sched> {
8671  let hasSideEffects = 0 in
8672  def rrb: AVX512<opc, MRMSrcReg, (outs), (ins _.RC:$src1, _.RC:$src2),
8673                  !strconcat(OpcodeStr, "\t{{sae}, $src2, $src1|$src1, $src2, {sae}}"), []>,
8674                  EVEX, EVEX_B, VEX_LIG, EVEX_V128, Sched<[sched]>;
8675}
8676
8677let Defs = [EFLAGS], Predicates = [HasAVX512] in {
8678  defm VUCOMISSZ : avx512_ord_cmp_sae<0x2E, v4f32x_info, "vucomiss", WriteFCom>,
8679                                   AVX512PSIi8Base, EVEX_CD8<32, CD8VT1>;
8680  defm VUCOMISDZ : avx512_ord_cmp_sae<0x2E, v2f64x_info, "vucomisd", WriteFCom>,
8681                                   AVX512PDIi8Base, VEX_W, EVEX_CD8<64, CD8VT1>;
8682  defm VCOMISSZ : avx512_ord_cmp_sae<0x2F, v4f32x_info, "vcomiss", WriteFCom>,
8683                                   AVX512PSIi8Base, EVEX_CD8<32, CD8VT1>;
8684  defm VCOMISDZ : avx512_ord_cmp_sae<0x2F, v2f64x_info, "vcomisd", WriteFCom>,
8685                                   AVX512PDIi8Base, VEX_W, EVEX_CD8<64, CD8VT1>;
8686}
8687
8688let Defs = [EFLAGS], Predicates = [HasAVX512] in {
8689  defm VUCOMISSZ : sse12_ord_cmp<0x2E, FR32X, X86cmp, f32, f32mem, loadf32,
8690                                 "ucomiss", WriteFCom>, PS, EVEX, VEX_LIG,
8691                                 EVEX_CD8<32, CD8VT1>;
8692  defm VUCOMISDZ : sse12_ord_cmp<0x2E, FR64X, X86cmp, f64, f64mem, loadf64,
8693                                  "ucomisd", WriteFCom>, PD, EVEX,
8694                                  VEX_LIG, VEX_W, EVEX_CD8<64, CD8VT1>;
8695  let Pattern = []<dag> in {
8696    defm VCOMISSZ  : sse12_ord_cmp<0x2F, FR32X, undef, f32, f32mem, loadf32,
8697                                   "comiss", WriteFCom>, PS, EVEX, VEX_LIG,
8698                                   EVEX_CD8<32, CD8VT1>;
8699    defm VCOMISDZ  : sse12_ord_cmp<0x2F, FR64X, undef, f64, f64mem, loadf64,
8700                                   "comisd", WriteFCom>, PD, EVEX,
8701                                    VEX_LIG, VEX_W, EVEX_CD8<64, CD8VT1>;
8702  }
8703  let isCodeGenOnly = 1 in {
8704    defm VUCOMISSZ  : sse12_ord_cmp_int<0x2E, VR128X, X86ucomi, v4f32, ssmem,
8705                          sse_load_f32, "ucomiss", WriteFCom>, PS, EVEX, VEX_LIG,
8706                          EVEX_CD8<32, CD8VT1>;
8707    defm VUCOMISDZ  : sse12_ord_cmp_int<0x2E, VR128X, X86ucomi, v2f64, sdmem,
8708                          sse_load_f64, "ucomisd", WriteFCom>, PD, EVEX,
8709                          VEX_LIG, VEX_W, EVEX_CD8<64, CD8VT1>;
8710
8711    defm VCOMISSZ  : sse12_ord_cmp_int<0x2F, VR128X, X86comi, v4f32, ssmem,
8712                          sse_load_f32, "comiss", WriteFCom>, PS, EVEX, VEX_LIG,
8713                          EVEX_CD8<32, CD8VT1>;
8714    defm VCOMISDZ  : sse12_ord_cmp_int<0x2F, VR128X, X86comi, v2f64, sdmem,
8715                          sse_load_f64, "comisd", WriteFCom>, PD, EVEX,
8716                          VEX_LIG, VEX_W, EVEX_CD8<64, CD8VT1>;
8717  }
8718}
8719
8720/// avx512_fp14_s rcp14ss, rcp14sd, rsqrt14ss, rsqrt14sd
8721multiclass avx512_fp14_s<bits<8> opc, string OpcodeStr, SDNode OpNode,
8722                         X86FoldableSchedWrite sched, X86VectorVTInfo _> {
8723  let Predicates = [HasAVX512], ExeDomain = _.ExeDomain in {
8724  defm rr : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
8725                           (ins _.RC:$src1, _.RC:$src2), OpcodeStr,
8726                           "$src2, $src1", "$src1, $src2",
8727                           (OpNode (_.VT _.RC:$src1), (_.VT _.RC:$src2))>,
8728                           EVEX_4V, VEX_LIG, Sched<[sched]>;
8729  defm rm : AVX512_maskable_scalar<opc, MRMSrcMem, _, (outs _.RC:$dst),
8730                         (ins _.RC:$src1, _.IntScalarMemOp:$src2), OpcodeStr,
8731                         "$src2, $src1", "$src1, $src2",
8732                         (OpNode (_.VT _.RC:$src1),
8733                          _.ScalarIntMemCPat:$src2)>, EVEX_4V, VEX_LIG,
8734                          Sched<[sched.Folded, sched.ReadAfterFold]>;
8735}
8736}
8737
8738defm VRCP14SSZ : avx512_fp14_s<0x4D, "vrcp14ss", X86rcp14s, SchedWriteFRcp.Scl,
8739                               f32x_info>, EVEX_CD8<32, CD8VT1>,
8740                               T8PD;
8741defm VRCP14SDZ : avx512_fp14_s<0x4D, "vrcp14sd", X86rcp14s, SchedWriteFRcp.Scl,
8742                               f64x_info>, VEX_W, EVEX_CD8<64, CD8VT1>,
8743                               T8PD;
8744defm VRSQRT14SSZ : avx512_fp14_s<0x4F, "vrsqrt14ss", X86rsqrt14s,
8745                                 SchedWriteFRsqrt.Scl, f32x_info>,
8746                                 EVEX_CD8<32, CD8VT1>, T8PD;
8747defm VRSQRT14SDZ : avx512_fp14_s<0x4F, "vrsqrt14sd", X86rsqrt14s,
8748                                 SchedWriteFRsqrt.Scl, f64x_info>, VEX_W,
8749                                 EVEX_CD8<64, CD8VT1>, T8PD;
8750
8751/// avx512_fp14_p rcp14ps, rcp14pd, rsqrt14ps, rsqrt14pd
8752multiclass avx512_fp14_p<bits<8> opc, string OpcodeStr, SDNode OpNode,
8753                         X86FoldableSchedWrite sched, X86VectorVTInfo _> {
8754  let ExeDomain = _.ExeDomain in {
8755  defm r: AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
8756                         (ins _.RC:$src), OpcodeStr, "$src", "$src",
8757                         (_.VT (OpNode _.RC:$src))>, EVEX, T8PD,
8758                         Sched<[sched]>;
8759  defm m: AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
8760                         (ins _.MemOp:$src), OpcodeStr, "$src", "$src",
8761                         (OpNode (_.VT
8762                           (bitconvert (_.LdFrag addr:$src))))>, EVEX, T8PD,
8763                         Sched<[sched.Folded, sched.ReadAfterFold]>;
8764  defm mb: AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
8765                          (ins _.ScalarMemOp:$src), OpcodeStr,
8766                          "${src}"##_.BroadcastStr, "${src}"##_.BroadcastStr,
8767                          (OpNode (_.VT
8768                            (X86VBroadcast (_.ScalarLdFrag addr:$src))))>,
8769                          EVEX, T8PD, EVEX_B, Sched<[sched.Folded, sched.ReadAfterFold]>;
8770  }
8771}
8772
8773multiclass avx512_fp14_p_vl_all<bits<8> opc, string OpcodeStr, SDNode OpNode,
8774                                X86SchedWriteWidths sched> {
8775  defm PSZ : avx512_fp14_p<opc, !strconcat(OpcodeStr, "ps"), OpNode, sched.ZMM,
8776                           v16f32_info>, EVEX_V512, EVEX_CD8<32, CD8VF>;
8777  defm PDZ : avx512_fp14_p<opc, !strconcat(OpcodeStr, "pd"), OpNode, sched.ZMM,
8778                           v8f64_info>, EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>;
8779
8780  // Define only if AVX512VL feature is present.
8781  let Predicates = [HasVLX] in {
8782    defm PSZ128 : avx512_fp14_p<opc, !strconcat(OpcodeStr, "ps"),
8783                                OpNode, sched.XMM, v4f32x_info>,
8784                               EVEX_V128, EVEX_CD8<32, CD8VF>;
8785    defm PSZ256 : avx512_fp14_p<opc, !strconcat(OpcodeStr, "ps"),
8786                                OpNode, sched.YMM, v8f32x_info>,
8787                               EVEX_V256, EVEX_CD8<32, CD8VF>;
8788    defm PDZ128 : avx512_fp14_p<opc, !strconcat(OpcodeStr, "pd"),
8789                                OpNode, sched.XMM, v2f64x_info>,
8790                               EVEX_V128, VEX_W, EVEX_CD8<64, CD8VF>;
8791    defm PDZ256 : avx512_fp14_p<opc, !strconcat(OpcodeStr, "pd"),
8792                                OpNode, sched.YMM, v4f64x_info>,
8793                               EVEX_V256, VEX_W, EVEX_CD8<64, CD8VF>;
8794  }
8795}
8796
8797defm VRSQRT14 : avx512_fp14_p_vl_all<0x4E, "vrsqrt14", X86rsqrt14, SchedWriteFRsqrt>;
8798defm VRCP14 : avx512_fp14_p_vl_all<0x4C, "vrcp14", X86rcp14, SchedWriteFRcp>;
8799
8800/// avx512_fp28_s rcp28ss, rcp28sd, rsqrt28ss, rsqrt28sd
8801multiclass avx512_fp28_s<bits<8> opc, string OpcodeStr,X86VectorVTInfo _,
8802                         SDNode OpNode, SDNode OpNodeSAE,
8803                         X86FoldableSchedWrite sched> {
8804  let ExeDomain = _.ExeDomain in {
8805  defm r : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
8806                           (ins _.RC:$src1, _.RC:$src2), OpcodeStr,
8807                           "$src2, $src1", "$src1, $src2",
8808                           (OpNode (_.VT _.RC:$src1), (_.VT _.RC:$src2))>,
8809                           Sched<[sched]>;
8810
8811  defm rb : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
8812                            (ins _.RC:$src1, _.RC:$src2), OpcodeStr,
8813                            "{sae}, $src2, $src1", "$src1, $src2, {sae}",
8814                            (OpNodeSAE (_.VT _.RC:$src1), (_.VT _.RC:$src2))>,
8815                            EVEX_B, Sched<[sched]>;
8816
8817  defm m : AVX512_maskable_scalar<opc, MRMSrcMem, _, (outs _.RC:$dst),
8818                         (ins _.RC:$src1, _.IntScalarMemOp:$src2), OpcodeStr,
8819                         "$src2, $src1", "$src1, $src2",
8820                         (OpNode (_.VT _.RC:$src1), _.ScalarIntMemCPat:$src2)>,
8821                         Sched<[sched.Folded, sched.ReadAfterFold]>;
8822  }
8823}
8824
8825multiclass avx512_eri_s<bits<8> opc, string OpcodeStr, SDNode OpNode,
8826                        SDNode OpNodeSAE, X86FoldableSchedWrite sched> {
8827  defm SSZ : avx512_fp28_s<opc, OpcodeStr#"ss", f32x_info, OpNode, OpNodeSAE,
8828                           sched>, EVEX_CD8<32, CD8VT1>, VEX_LIG;
8829  defm SDZ : avx512_fp28_s<opc, OpcodeStr#"sd", f64x_info, OpNode, OpNodeSAE,
8830                           sched>, EVEX_CD8<64, CD8VT1>, VEX_LIG, VEX_W;
8831}
8832
8833let Predicates = [HasERI] in {
8834  defm VRCP28   : avx512_eri_s<0xCB, "vrcp28", X86rcp28s, X86rcp28SAEs,
8835                               SchedWriteFRcp.Scl>, T8PD, EVEX_4V;
8836  defm VRSQRT28 : avx512_eri_s<0xCD, "vrsqrt28", X86rsqrt28s, X86rsqrt28SAEs,
8837                               SchedWriteFRsqrt.Scl>, T8PD, EVEX_4V;
8838}
8839
8840defm VGETEXP   : avx512_eri_s<0x43, "vgetexp", X86fgetexps, X86fgetexpSAEs,
8841                              SchedWriteFRnd.Scl>, T8PD, EVEX_4V;
8842/// avx512_fp28_p rcp28ps, rcp28pd, rsqrt28ps, rsqrt28pd
8843
8844multiclass avx512_fp28_p<bits<8> opc, string OpcodeStr, X86VectorVTInfo _,
8845                         SDNode OpNode, X86FoldableSchedWrite sched> {
8846  let ExeDomain = _.ExeDomain in {
8847  defm r : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
8848                         (ins _.RC:$src), OpcodeStr, "$src", "$src",
8849                         (OpNode (_.VT _.RC:$src))>,
8850                         Sched<[sched]>;
8851
8852  defm m : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
8853                         (ins _.MemOp:$src), OpcodeStr, "$src", "$src",
8854                         (OpNode (_.VT
8855                             (bitconvert (_.LdFrag addr:$src))))>,
8856                          Sched<[sched.Folded, sched.ReadAfterFold]>;
8857
8858  defm mb : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
8859                         (ins _.ScalarMemOp:$src), OpcodeStr,
8860                         "${src}"##_.BroadcastStr, "${src}"##_.BroadcastStr,
8861                         (OpNode (_.VT
8862                                  (X86VBroadcast (_.ScalarLdFrag addr:$src))))>,
8863                         EVEX_B, Sched<[sched.Folded, sched.ReadAfterFold]>;
8864  }
8865}
8866multiclass avx512_fp28_p_sae<bits<8> opc, string OpcodeStr, X86VectorVTInfo _,
8867                         SDNode OpNode, X86FoldableSchedWrite sched> {
8868  let ExeDomain = _.ExeDomain in
8869  defm rb : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
8870                        (ins _.RC:$src), OpcodeStr,
8871                        "{sae}, $src", "$src, {sae}",
8872                        (OpNode (_.VT _.RC:$src))>,
8873                        EVEX_B, Sched<[sched]>;
8874}
8875
8876multiclass  avx512_eri<bits<8> opc, string OpcodeStr, SDNode OpNode,
8877                       SDNode OpNodeSAE, X86SchedWriteWidths sched> {
8878   defm PSZ : avx512_fp28_p<opc, OpcodeStr#"ps", v16f32_info, OpNode, sched.ZMM>,
8879              avx512_fp28_p_sae<opc, OpcodeStr#"ps", v16f32_info, OpNodeSAE, sched.ZMM>,
8880              T8PD, EVEX_V512, EVEX_CD8<32, CD8VF>;
8881   defm PDZ : avx512_fp28_p<opc, OpcodeStr#"pd", v8f64_info, OpNode, sched.ZMM>,
8882              avx512_fp28_p_sae<opc, OpcodeStr#"pd", v8f64_info, OpNodeSAE, sched.ZMM>,
8883              T8PD, EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>;
8884}
8885
8886multiclass avx512_fp_unaryop_packed<bits<8> opc, string OpcodeStr,
8887                                  SDNode OpNode, X86SchedWriteWidths sched> {
8888  // Define only if AVX512VL feature is present.
8889  let Predicates = [HasVLX] in {
8890    defm PSZ128 : avx512_fp28_p<opc, OpcodeStr#"ps", v4f32x_info, OpNode,
8891                                sched.XMM>,
8892                                EVEX_V128, T8PD, EVEX_CD8<32, CD8VF>;
8893    defm PSZ256 : avx512_fp28_p<opc, OpcodeStr#"ps", v8f32x_info, OpNode,
8894                                sched.YMM>,
8895                                EVEX_V256, T8PD, EVEX_CD8<32, CD8VF>;
8896    defm PDZ128 : avx512_fp28_p<opc, OpcodeStr#"pd", v2f64x_info, OpNode,
8897                                sched.XMM>,
8898                                EVEX_V128, VEX_W, T8PD, EVEX_CD8<64, CD8VF>;
8899    defm PDZ256 : avx512_fp28_p<opc, OpcodeStr#"pd", v4f64x_info, OpNode,
8900                                sched.YMM>,
8901                                EVEX_V256, VEX_W, T8PD, EVEX_CD8<64, CD8VF>;
8902  }
8903}
8904
8905let Predicates = [HasERI] in {
8906 defm VRSQRT28 : avx512_eri<0xCC, "vrsqrt28", X86rsqrt28, X86rsqrt28SAE,
8907                            SchedWriteFRsqrt>, EVEX;
8908 defm VRCP28   : avx512_eri<0xCA, "vrcp28", X86rcp28, X86rcp28SAE,
8909                            SchedWriteFRcp>, EVEX;
8910 defm VEXP2    : avx512_eri<0xC8, "vexp2", X86exp2, X86exp2SAE,
8911                            SchedWriteFAdd>, EVEX;
8912}
8913defm VGETEXP   : avx512_eri<0x42, "vgetexp", X86fgetexp, X86fgetexpSAE,
8914                            SchedWriteFRnd>,
8915                 avx512_fp_unaryop_packed<0x42, "vgetexp", X86fgetexp,
8916                                          SchedWriteFRnd>, EVEX;
8917
8918multiclass avx512_sqrt_packed_round<bits<8> opc, string OpcodeStr,
8919                                    X86FoldableSchedWrite sched, X86VectorVTInfo _>{
8920  let ExeDomain = _.ExeDomain in
8921  defm rb: AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
8922                         (ins _.RC:$src, AVX512RC:$rc), OpcodeStr, "$rc, $src", "$src, $rc",
8923                         (_.VT (X86fsqrtRnd _.RC:$src, (i32 timm:$rc)))>,
8924                         EVEX, EVEX_B, EVEX_RC, Sched<[sched]>;
8925}
8926
8927multiclass avx512_sqrt_packed<bits<8> opc, string OpcodeStr,
8928                              X86FoldableSchedWrite sched, X86VectorVTInfo _>{
8929  let ExeDomain = _.ExeDomain in {
8930  defm r: AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
8931                         (ins _.RC:$src), OpcodeStr, "$src", "$src",
8932                         (_.VT (fsqrt _.RC:$src))>, EVEX,
8933                         Sched<[sched]>;
8934  defm m: AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
8935                         (ins _.MemOp:$src), OpcodeStr, "$src", "$src",
8936                         (fsqrt (_.VT
8937                           (bitconvert (_.LdFrag addr:$src))))>, EVEX,
8938                           Sched<[sched.Folded, sched.ReadAfterFold]>;
8939  defm mb: AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
8940                          (ins _.ScalarMemOp:$src), OpcodeStr,
8941                          "${src}"##_.BroadcastStr, "${src}"##_.BroadcastStr,
8942                          (fsqrt (_.VT
8943                            (X86VBroadcast (_.ScalarLdFrag addr:$src))))>,
8944                          EVEX, EVEX_B, Sched<[sched.Folded, sched.ReadAfterFold]>;
8945  }
8946}
8947
8948multiclass avx512_sqrt_packed_all<bits<8> opc, string OpcodeStr,
8949                                  X86SchedWriteSizes sched> {
8950  defm PSZ : avx512_sqrt_packed<opc, !strconcat(OpcodeStr, "ps"),
8951                                sched.PS.ZMM, v16f32_info>,
8952                                EVEX_V512, PS, EVEX_CD8<32, CD8VF>;
8953  defm PDZ : avx512_sqrt_packed<opc, !strconcat(OpcodeStr, "pd"),
8954                                sched.PD.ZMM, v8f64_info>,
8955                                EVEX_V512, VEX_W, PD, EVEX_CD8<64, CD8VF>;
8956  // Define only if AVX512VL feature is present.
8957  let Predicates = [HasVLX] in {
8958    defm PSZ128 : avx512_sqrt_packed<opc, !strconcat(OpcodeStr, "ps"),
8959                                     sched.PS.XMM, v4f32x_info>,
8960                                     EVEX_V128, PS, EVEX_CD8<32, CD8VF>;
8961    defm PSZ256 : avx512_sqrt_packed<opc, !strconcat(OpcodeStr, "ps"),
8962                                     sched.PS.YMM, v8f32x_info>,
8963                                     EVEX_V256, PS, EVEX_CD8<32, CD8VF>;
8964    defm PDZ128 : avx512_sqrt_packed<opc, !strconcat(OpcodeStr, "pd"),
8965                                     sched.PD.XMM, v2f64x_info>,
8966                                     EVEX_V128, VEX_W, PD, EVEX_CD8<64, CD8VF>;
8967    defm PDZ256 : avx512_sqrt_packed<opc, !strconcat(OpcodeStr, "pd"),
8968                                     sched.PD.YMM, v4f64x_info>,
8969                                     EVEX_V256, VEX_W, PD, EVEX_CD8<64, CD8VF>;
8970  }
8971}
8972
8973multiclass avx512_sqrt_packed_all_round<bits<8> opc, string OpcodeStr,
8974                                        X86SchedWriteSizes sched> {
8975  defm PSZ : avx512_sqrt_packed_round<opc, !strconcat(OpcodeStr, "ps"),
8976                                      sched.PS.ZMM, v16f32_info>,
8977                                      EVEX_V512, PS, EVEX_CD8<32, CD8VF>;
8978  defm PDZ : avx512_sqrt_packed_round<opc, !strconcat(OpcodeStr, "pd"),
8979                                      sched.PD.ZMM, v8f64_info>,
8980                                      EVEX_V512, VEX_W, PD, EVEX_CD8<64, CD8VF>;
8981}
8982
8983multiclass avx512_sqrt_scalar<bits<8> opc, string OpcodeStr, X86FoldableSchedWrite sched,
8984                              X86VectorVTInfo _, string Name> {
8985  let ExeDomain = _.ExeDomain in {
8986    defm r_Int : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
8987                         (ins _.RC:$src1, _.RC:$src2), OpcodeStr,
8988                         "$src2, $src1", "$src1, $src2",
8989                         (X86fsqrts (_.VT _.RC:$src1),
8990                                    (_.VT _.RC:$src2))>,
8991                         Sched<[sched]>;
8992    defm m_Int : AVX512_maskable_scalar<opc, MRMSrcMem, _, (outs _.RC:$dst),
8993                         (ins _.RC:$src1, _.IntScalarMemOp:$src2), OpcodeStr,
8994                         "$src2, $src1", "$src1, $src2",
8995                         (X86fsqrts (_.VT _.RC:$src1),
8996                                    _.ScalarIntMemCPat:$src2)>,
8997                         Sched<[sched.Folded, sched.ReadAfterFold]>;
8998    defm rb_Int : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
8999                         (ins _.RC:$src1, _.RC:$src2, AVX512RC:$rc), OpcodeStr,
9000                         "$rc, $src2, $src1", "$src1, $src2, $rc",
9001                         (X86fsqrtRnds (_.VT _.RC:$src1),
9002                                     (_.VT _.RC:$src2),
9003                                     (i32 timm:$rc))>,
9004                         EVEX_B, EVEX_RC, Sched<[sched]>;
9005
9006    let isCodeGenOnly = 1, hasSideEffects = 0, Predicates=[HasAVX512] in {
9007      def r : I<opc, MRMSrcReg, (outs _.FRC:$dst),
9008                (ins _.FRC:$src1, _.FRC:$src2),
9009                OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}", []>,
9010                Sched<[sched]>;
9011      let mayLoad = 1 in
9012        def m : I<opc, MRMSrcMem, (outs _.FRC:$dst),
9013                  (ins _.FRC:$src1, _.ScalarMemOp:$src2),
9014                  OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}", []>,
9015                  Sched<[sched.Folded, sched.ReadAfterFold]>;
9016    }
9017  }
9018
9019  let Predicates = [HasAVX512] in {
9020    def : Pat<(_.EltVT (fsqrt _.FRC:$src)),
9021              (!cast<Instruction>(Name#Zr)
9022                  (_.EltVT (IMPLICIT_DEF)), _.FRC:$src)>;
9023  }
9024
9025  let Predicates = [HasAVX512, OptForSize] in {
9026    def : Pat<(_.EltVT (fsqrt (load addr:$src))),
9027              (!cast<Instruction>(Name#Zm)
9028                  (_.EltVT (IMPLICIT_DEF)), addr:$src)>;
9029  }
9030}
9031
9032multiclass avx512_sqrt_scalar_all<bits<8> opc, string OpcodeStr,
9033                                  X86SchedWriteSizes sched> {
9034  defm SSZ : avx512_sqrt_scalar<opc, OpcodeStr#"ss", sched.PS.Scl, f32x_info, NAME#"SS">,
9035                        EVEX_CD8<32, CD8VT1>, EVEX_4V, XS;
9036  defm SDZ : avx512_sqrt_scalar<opc, OpcodeStr#"sd", sched.PD.Scl, f64x_info, NAME#"SD">,
9037                        EVEX_CD8<64, CD8VT1>, EVEX_4V, XD, VEX_W;
9038}
9039
9040defm VSQRT : avx512_sqrt_packed_all<0x51, "vsqrt", SchedWriteFSqrtSizes>,
9041             avx512_sqrt_packed_all_round<0x51, "vsqrt", SchedWriteFSqrtSizes>;
9042
9043defm VSQRT : avx512_sqrt_scalar_all<0x51, "vsqrt", SchedWriteFSqrtSizes>, VEX_LIG;
9044
9045multiclass avx512_rndscale_scalar<bits<8> opc, string OpcodeStr,
9046                                  X86FoldableSchedWrite sched, X86VectorVTInfo _> {
9047  let ExeDomain = _.ExeDomain in {
9048  defm r_Int : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
9049                           (ins _.RC:$src1, _.RC:$src2, i32u8imm:$src3), OpcodeStr,
9050                           "$src3, $src2, $src1", "$src1, $src2, $src3",
9051                           (_.VT (X86RndScales (_.VT _.RC:$src1), (_.VT _.RC:$src2),
9052                           (i32 imm:$src3)))>,
9053                           Sched<[sched]>;
9054
9055  defm rb_Int : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
9056                         (ins _.RC:$src1, _.RC:$src2, i32u8imm:$src3), OpcodeStr,
9057                         "$src3, {sae}, $src2, $src1", "$src1, $src2, {sae}, $src3",
9058                         (_.VT (X86RndScalesSAE (_.VT _.RC:$src1), (_.VT _.RC:$src2),
9059                         (i32 imm:$src3)))>, EVEX_B,
9060                         Sched<[sched]>;
9061
9062  defm m_Int : AVX512_maskable_scalar<opc, MRMSrcMem, _, (outs _.RC:$dst),
9063                         (ins _.RC:$src1, _.IntScalarMemOp:$src2, i32u8imm:$src3),
9064                         OpcodeStr,
9065                         "$src3, $src2, $src1", "$src1, $src2, $src3",
9066                         (_.VT (X86RndScales _.RC:$src1,
9067                                _.ScalarIntMemCPat:$src2, (i32 imm:$src3)))>,
9068                         Sched<[sched.Folded, sched.ReadAfterFold]>;
9069
9070  let isCodeGenOnly = 1, hasSideEffects = 0, Predicates = [HasAVX512] in {
9071    def r : I<opc, MRMSrcReg, (outs _.FRC:$dst),
9072               (ins _.FRC:$src1, _.FRC:$src2, i32u8imm:$src3),
9073               OpcodeStr#"\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}",
9074               []>, Sched<[sched]>;
9075
9076    let mayLoad = 1 in
9077      def m : I<opc, MRMSrcMem, (outs _.FRC:$dst),
9078                 (ins _.FRC:$src1, _.ScalarMemOp:$src2, i32u8imm:$src3),
9079                 OpcodeStr#"\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}",
9080                 []>, Sched<[sched.Folded, sched.ReadAfterFold]>;
9081  }
9082  }
9083
9084  let Predicates = [HasAVX512] in {
9085    def : Pat<(X86VRndScale _.FRC:$src1, imm:$src2),
9086              (_.EltVT (!cast<Instruction>(NAME##r) (_.EltVT (IMPLICIT_DEF)),
9087               _.FRC:$src1, imm:$src2))>;
9088  }
9089
9090  let Predicates = [HasAVX512, OptForSize] in {
9091    def : Pat<(X86VRndScale (_.ScalarLdFrag addr:$src1), imm:$src2),
9092              (_.EltVT (!cast<Instruction>(NAME##m) (_.EltVT (IMPLICIT_DEF)),
9093               addr:$src1, imm:$src2))>;
9094  }
9095}
9096
9097defm VRNDSCALESSZ : avx512_rndscale_scalar<0x0A, "vrndscaless",
9098                                           SchedWriteFRnd.Scl, f32x_info>,
9099                                           AVX512AIi8Base, EVEX_4V, VEX_LIG,
9100                                           EVEX_CD8<32, CD8VT1>;
9101
9102defm VRNDSCALESDZ : avx512_rndscale_scalar<0x0B, "vrndscalesd",
9103                                           SchedWriteFRnd.Scl, f64x_info>,
9104                                           VEX_W, AVX512AIi8Base, EVEX_4V, VEX_LIG,
9105                                           EVEX_CD8<64, CD8VT1>;
9106
9107multiclass avx512_masked_scalar<SDNode OpNode, string OpcPrefix, SDNode Move,
9108                                dag Mask, X86VectorVTInfo _, PatLeaf ZeroFP,
9109                                dag OutMask, Predicate BasePredicate> {
9110  let Predicates = [BasePredicate] in {
9111    def : Pat<(Move _.VT:$src1, (scalar_to_vector (X86selects Mask,
9112               (OpNode (extractelt _.VT:$src2, (iPTR 0))),
9113               (extractelt _.VT:$dst, (iPTR 0))))),
9114              (!cast<Instruction>("V"#OpcPrefix#r_Intk)
9115               _.VT:$dst, OutMask, _.VT:$src2, _.VT:$src1)>;
9116
9117    def : Pat<(Move _.VT:$src1, (scalar_to_vector (X86selects Mask,
9118               (OpNode (extractelt _.VT:$src2, (iPTR 0))),
9119               ZeroFP))),
9120              (!cast<Instruction>("V"#OpcPrefix#r_Intkz)
9121               OutMask, _.VT:$src2, _.VT:$src1)>;
9122  }
9123}
9124
9125defm : avx512_masked_scalar<fsqrt, "SQRTSSZ", X86Movss,
9126                            (v1i1 (scalar_to_vector (i8 (trunc (i32 GR32:$mask))))), v4f32x_info,
9127                            fp32imm0, (COPY_TO_REGCLASS  $mask, VK1WM), HasAVX512>;
9128defm : avx512_masked_scalar<fsqrt, "SQRTSDZ", X86Movsd,
9129                            (v1i1 (scalar_to_vector (i8 (trunc (i32 GR32:$mask))))), v2f64x_info,
9130                            fp64imm0, (COPY_TO_REGCLASS  $mask, VK1WM), HasAVX512>;
9131
9132
9133//-------------------------------------------------
9134// Integer truncate and extend operations
9135//-------------------------------------------------
9136
9137// PatFrags that contain a select and a truncate op. The take operands in the
9138// same order as X86vmtrunc, X86vmtruncs, X86vmtruncus. This allows us to pass
9139// either to the multiclasses.
9140def select_trunc : PatFrag<(ops node:$src, node:$src0, node:$mask),
9141                           (vselect node:$mask,
9142                                    (trunc node:$src), node:$src0)>;
9143def select_truncs : PatFrag<(ops node:$src, node:$src0, node:$mask),
9144                            (vselect node:$mask,
9145                                     (X86vtruncs node:$src), node:$src0)>;
9146def select_truncus : PatFrag<(ops node:$src, node:$src0, node:$mask),
9147                             (vselect node:$mask,
9148                                      (X86vtruncus node:$src), node:$src0)>;
9149
9150multiclass avx512_trunc_common<bits<8> opc, string OpcodeStr, SDNode OpNode,
9151                              SDPatternOperator MaskNode,
9152                              X86FoldableSchedWrite sched, X86VectorVTInfo SrcInfo,
9153                              X86VectorVTInfo DestInfo, X86MemOperand x86memop> {
9154  let ExeDomain = DestInfo.ExeDomain in {
9155  def rr : AVX512XS8I<opc, MRMDestReg, (outs DestInfo.RC:$dst),
9156             (ins SrcInfo.RC:$src),
9157             OpcodeStr # "\t{$src, $dst|$dst, $src}",
9158             [(set DestInfo.RC:$dst,
9159                   (DestInfo.VT (OpNode (SrcInfo.VT SrcInfo.RC:$src))))]>,
9160             EVEX, Sched<[sched]>;
9161  let Constraints = "$src0 = $dst" in
9162  def rrk : AVX512XS8I<opc, MRMDestReg, (outs DestInfo.RC:$dst),
9163             (ins DestInfo.RC:$src0, SrcInfo.KRCWM:$mask, SrcInfo.RC:$src),
9164             OpcodeStr # "\t{$src, $dst {${mask}}|$dst {${mask}}, $src}",
9165             [(set DestInfo.RC:$dst,
9166                   (MaskNode (SrcInfo.VT SrcInfo.RC:$src),
9167                             (DestInfo.VT DestInfo.RC:$src0),
9168                             SrcInfo.KRCWM:$mask))]>,
9169             EVEX, EVEX_K, Sched<[sched]>;
9170  def rrkz : AVX512XS8I<opc, MRMDestReg, (outs DestInfo.RC:$dst),
9171             (ins SrcInfo.KRCWM:$mask, SrcInfo.RC:$src),
9172             OpcodeStr # "\t{$src, $dst {${mask}} {z}|$dst {${mask}} {z}, $src}",
9173             [(set DestInfo.RC:$dst,
9174                   (DestInfo.VT (MaskNode (SrcInfo.VT SrcInfo.RC:$src),
9175                             DestInfo.ImmAllZerosV, SrcInfo.KRCWM:$mask)))]>,
9176             EVEX, EVEX_KZ, Sched<[sched]>;
9177  }
9178
9179  let mayStore = 1, hasSideEffects = 0, ExeDomain = DestInfo.ExeDomain in {
9180    def mr : AVX512XS8I<opc, MRMDestMem, (outs),
9181               (ins x86memop:$dst, SrcInfo.RC:$src),
9182               OpcodeStr # "\t{$src, $dst|$dst, $src}", []>,
9183               EVEX, Sched<[sched.Folded]>;
9184
9185    def mrk : AVX512XS8I<opc, MRMDestMem, (outs),
9186               (ins x86memop:$dst, SrcInfo.KRCWM:$mask, SrcInfo.RC:$src),
9187               OpcodeStr # "\t{$src, $dst {${mask}}|$dst {${mask}}, $src}", []>,
9188               EVEX, EVEX_K, Sched<[sched.Folded]>, NotMemoryFoldable;
9189  }//mayStore = 1, hasSideEffects = 0
9190}
9191
9192multiclass avx512_trunc_mr_lowering<X86VectorVTInfo SrcInfo,
9193                                    X86VectorVTInfo DestInfo,
9194                                    PatFrag truncFrag, PatFrag mtruncFrag,
9195                                    string Name> {
9196
9197  def : Pat<(truncFrag (SrcInfo.VT SrcInfo.RC:$src), addr:$dst),
9198            (!cast<Instruction>(Name#SrcInfo.ZSuffix##mr)
9199                                    addr:$dst, SrcInfo.RC:$src)>;
9200
9201  def : Pat<(mtruncFrag (SrcInfo.VT SrcInfo.RC:$src), addr:$dst,
9202                        SrcInfo.KRCWM:$mask),
9203            (!cast<Instruction>(Name#SrcInfo.ZSuffix##mrk)
9204                            addr:$dst, SrcInfo.KRCWM:$mask, SrcInfo.RC:$src)>;
9205}
9206
9207multiclass avx512_trunc<bits<8> opc, string OpcodeStr, SDNode OpNode128,
9208                        SDNode OpNode256, SDNode OpNode512,
9209                        SDPatternOperator MaskNode128,
9210                        SDPatternOperator MaskNode256,
9211                        SDPatternOperator MaskNode512,
9212                        X86FoldableSchedWrite sched,
9213                        AVX512VLVectorVTInfo VTSrcInfo,
9214                        X86VectorVTInfo DestInfoZ128,
9215                        X86VectorVTInfo DestInfoZ256, X86VectorVTInfo DestInfoZ,
9216                        X86MemOperand x86memopZ128, X86MemOperand x86memopZ256,
9217                        X86MemOperand x86memopZ, PatFrag truncFrag,
9218                        PatFrag mtruncFrag, Predicate prd = HasAVX512>{
9219
9220  let Predicates = [HasVLX, prd] in {
9221    defm Z128:  avx512_trunc_common<opc, OpcodeStr, OpNode128, MaskNode128, sched,
9222                             VTSrcInfo.info128, DestInfoZ128, x86memopZ128>,
9223                avx512_trunc_mr_lowering<VTSrcInfo.info128, DestInfoZ128,
9224                             truncFrag, mtruncFrag, NAME>, EVEX_V128;
9225
9226    defm Z256:  avx512_trunc_common<opc, OpcodeStr, OpNode256, MaskNode256, sched,
9227                             VTSrcInfo.info256, DestInfoZ256, x86memopZ256>,
9228                avx512_trunc_mr_lowering<VTSrcInfo.info256, DestInfoZ256,
9229                             truncFrag, mtruncFrag, NAME>, EVEX_V256;
9230  }
9231  let Predicates = [prd] in
9232    defm Z:     avx512_trunc_common<opc, OpcodeStr, OpNode512, MaskNode512, sched,
9233                             VTSrcInfo.info512, DestInfoZ, x86memopZ>,
9234                avx512_trunc_mr_lowering<VTSrcInfo.info512, DestInfoZ,
9235                             truncFrag, mtruncFrag, NAME>, EVEX_V512;
9236}
9237
9238multiclass avx512_trunc_qb<bits<8> opc, string OpcodeStr, SDNode OpNode,
9239                           SDPatternOperator MaskNode,
9240                           X86FoldableSchedWrite sched, PatFrag StoreNode,
9241                           PatFrag MaskedStoreNode, SDNode InVecNode,
9242                           SDPatternOperator InVecMaskNode> {
9243  defm NAME: avx512_trunc<opc, OpcodeStr, InVecNode, InVecNode, InVecNode,
9244                          InVecMaskNode, InVecMaskNode, InVecMaskNode, sched,
9245                          avx512vl_i64_info, v16i8x_info, v16i8x_info,
9246                          v16i8x_info, i16mem, i32mem, i64mem, StoreNode,
9247                          MaskedStoreNode>, EVEX_CD8<8, CD8VO>;
9248}
9249
9250multiclass avx512_trunc_qw<bits<8> opc, string OpcodeStr, SDNode OpNode,
9251                           SDPatternOperator MaskNode,
9252                           X86FoldableSchedWrite sched, PatFrag StoreNode,
9253                           PatFrag MaskedStoreNode, SDNode InVecNode,
9254                           SDPatternOperator InVecMaskNode> {
9255  defm NAME: avx512_trunc<opc, OpcodeStr, InVecNode, InVecNode, OpNode,
9256                          InVecMaskNode, InVecMaskNode, MaskNode, sched,
9257                          avx512vl_i64_info, v8i16x_info, v8i16x_info,
9258                          v8i16x_info, i32mem, i64mem, i128mem, StoreNode,
9259                          MaskedStoreNode>, EVEX_CD8<16, CD8VQ>;
9260}
9261
9262multiclass avx512_trunc_qd<bits<8> opc, string OpcodeStr, SDNode OpNode,
9263                           SDPatternOperator MaskNode,
9264                           X86FoldableSchedWrite sched, PatFrag StoreNode,
9265                           PatFrag MaskedStoreNode, SDNode InVecNode,
9266                           SDPatternOperator InVecMaskNode> {
9267  defm NAME: avx512_trunc<opc, OpcodeStr, InVecNode, OpNode, OpNode,
9268                          InVecMaskNode, MaskNode, MaskNode, sched,
9269                          avx512vl_i64_info, v4i32x_info, v4i32x_info,
9270                          v8i32x_info, i64mem, i128mem, i256mem, StoreNode,
9271                          MaskedStoreNode>, EVEX_CD8<32, CD8VH>;
9272}
9273
9274multiclass avx512_trunc_db<bits<8> opc, string OpcodeStr, SDNode OpNode,
9275                           SDPatternOperator MaskNode,
9276                           X86FoldableSchedWrite sched, PatFrag StoreNode,
9277                           PatFrag MaskedStoreNode, SDNode InVecNode,
9278                           SDPatternOperator InVecMaskNode> {
9279  defm NAME: avx512_trunc<opc, OpcodeStr, InVecNode, InVecNode, OpNode,
9280                          InVecMaskNode, InVecMaskNode, MaskNode, sched,
9281                          avx512vl_i32_info, v16i8x_info, v16i8x_info,
9282                          v16i8x_info, i32mem, i64mem, i128mem, StoreNode,
9283                          MaskedStoreNode>, EVEX_CD8<8, CD8VQ>;
9284}
9285
9286multiclass avx512_trunc_dw<bits<8> opc, string OpcodeStr, SDNode OpNode,
9287                           SDPatternOperator MaskNode,
9288                           X86FoldableSchedWrite sched, PatFrag StoreNode,
9289                           PatFrag MaskedStoreNode, SDNode InVecNode,
9290                           SDPatternOperator InVecMaskNode> {
9291  defm NAME: avx512_trunc<opc, OpcodeStr, InVecNode, OpNode, OpNode,
9292                          InVecMaskNode, MaskNode, MaskNode, sched,
9293                          avx512vl_i32_info, v8i16x_info, v8i16x_info,
9294                          v16i16x_info, i64mem, i128mem, i256mem, StoreNode,
9295                          MaskedStoreNode>, EVEX_CD8<16, CD8VH>;
9296}
9297
9298multiclass avx512_trunc_wb<bits<8> opc, string OpcodeStr, SDNode OpNode,
9299                           SDPatternOperator MaskNode,
9300                           X86FoldableSchedWrite sched, PatFrag StoreNode,
9301                           PatFrag MaskedStoreNode, SDNode InVecNode,
9302                           SDPatternOperator InVecMaskNode> {
9303  defm NAME: avx512_trunc<opc, OpcodeStr, InVecNode, OpNode, OpNode,
9304                          InVecMaskNode, MaskNode, MaskNode, sched,
9305                          avx512vl_i16_info, v16i8x_info, v16i8x_info,
9306                          v32i8x_info, i64mem, i128mem, i256mem, StoreNode,
9307                          MaskedStoreNode, HasBWI>, EVEX_CD8<16, CD8VH>;
9308}
9309
9310defm VPMOVQB    : avx512_trunc_qb<0x32, "vpmovqb",   trunc, select_trunc,
9311                                  WriteShuffle256, truncstorevi8,
9312                                  masked_truncstorevi8, X86vtrunc, X86vmtrunc>;
9313defm VPMOVSQB   : avx512_trunc_qb<0x22, "vpmovsqb",  X86vtruncs, select_truncs,
9314                                  WriteShuffle256, truncstore_s_vi8,
9315                                  masked_truncstore_s_vi8, X86vtruncs,
9316                                  X86vmtruncs>;
9317defm VPMOVUSQB  : avx512_trunc_qb<0x12, "vpmovusqb", X86vtruncus,
9318                                  select_truncus, WriteShuffle256,
9319                                  truncstore_us_vi8, masked_truncstore_us_vi8,
9320                                  X86vtruncus, X86vmtruncus>;
9321
9322defm VPMOVQW    : avx512_trunc_qw<0x34, "vpmovqw", trunc, select_trunc,
9323                                  WriteShuffle256, truncstorevi16,
9324                                  masked_truncstorevi16, X86vtrunc, X86vmtrunc>;
9325defm VPMOVSQW   : avx512_trunc_qw<0x24, "vpmovsqw",  X86vtruncs, select_truncs,
9326                                  WriteShuffle256, truncstore_s_vi16,
9327                                  masked_truncstore_s_vi16, X86vtruncs,
9328                                  X86vmtruncs>;
9329defm VPMOVUSQW  : avx512_trunc_qw<0x14, "vpmovusqw", X86vtruncus,
9330                                  select_truncus, WriteShuffle256,
9331                                  truncstore_us_vi16, masked_truncstore_us_vi16,
9332                                  X86vtruncus, X86vmtruncus>;
9333
9334defm VPMOVQD    : avx512_trunc_qd<0x35, "vpmovqd", trunc, select_trunc,
9335                                  WriteShuffle256, truncstorevi32,
9336                                  masked_truncstorevi32, X86vtrunc, X86vmtrunc>;
9337defm VPMOVSQD   : avx512_trunc_qd<0x25, "vpmovsqd",  X86vtruncs, select_truncs,
9338                                  WriteShuffle256, truncstore_s_vi32,
9339                                  masked_truncstore_s_vi32, X86vtruncs,
9340                                  X86vmtruncs>;
9341defm VPMOVUSQD  : avx512_trunc_qd<0x15, "vpmovusqd", X86vtruncus,
9342                                  select_truncus, WriteShuffle256,
9343                                  truncstore_us_vi32, masked_truncstore_us_vi32,
9344                                  X86vtruncus, X86vmtruncus>;
9345
9346defm VPMOVDB    : avx512_trunc_db<0x31, "vpmovdb", trunc, select_trunc,
9347                                  WriteShuffle256, truncstorevi8,
9348                                  masked_truncstorevi8, X86vtrunc, X86vmtrunc>;
9349defm VPMOVSDB   : avx512_trunc_db<0x21, "vpmovsdb", X86vtruncs, select_truncs,
9350                                  WriteShuffle256, truncstore_s_vi8,
9351                                  masked_truncstore_s_vi8, X86vtruncs,
9352                                  X86vmtruncs>;
9353defm VPMOVUSDB  : avx512_trunc_db<0x11, "vpmovusdb",  X86vtruncus,
9354                                  select_truncus, WriteShuffle256,
9355                                  truncstore_us_vi8, masked_truncstore_us_vi8,
9356                                  X86vtruncus, X86vmtruncus>;
9357
9358defm VPMOVDW    : avx512_trunc_dw<0x33, "vpmovdw", trunc, select_trunc,
9359                                  WriteShuffle256, truncstorevi16,
9360                                  masked_truncstorevi16, X86vtrunc, X86vmtrunc>;
9361defm VPMOVSDW   : avx512_trunc_dw<0x23, "vpmovsdw", X86vtruncs, select_truncs,
9362                                  WriteShuffle256, truncstore_s_vi16,
9363                                  masked_truncstore_s_vi16, X86vtruncs,
9364                                  X86vmtruncs>;
9365defm VPMOVUSDW  : avx512_trunc_dw<0x13, "vpmovusdw", X86vtruncus,
9366                                  select_truncus, WriteShuffle256,
9367                                  truncstore_us_vi16, masked_truncstore_us_vi16,
9368                                  X86vtruncus, X86vmtruncus>;
9369
9370defm VPMOVWB    : avx512_trunc_wb<0x30, "vpmovwb", trunc, select_trunc,
9371                                  WriteShuffle256, truncstorevi8,
9372                                  masked_truncstorevi8, X86vtrunc,
9373                                  X86vmtrunc>;
9374defm VPMOVSWB   : avx512_trunc_wb<0x20, "vpmovswb", X86vtruncs, select_truncs,
9375                                  WriteShuffle256, truncstore_s_vi8,
9376                                  masked_truncstore_s_vi8, X86vtruncs,
9377                                  X86vmtruncs>;
9378defm VPMOVUSWB  : avx512_trunc_wb<0x10, "vpmovuswb", X86vtruncus,
9379                                  select_truncus, WriteShuffle256,
9380                                  truncstore_us_vi8, masked_truncstore_us_vi8,
9381                                  X86vtruncus, X86vmtruncus>;
9382
9383let Predicates = [HasAVX512, NoVLX] in {
9384def: Pat<(v8i16 (trunc (v8i32 VR256X:$src))),
9385         (v8i16 (EXTRACT_SUBREG
9386                 (v16i16 (VPMOVDWZrr (v16i32 (INSERT_SUBREG (IMPLICIT_DEF),
9387                                          VR256X:$src, sub_ymm)))), sub_xmm))>;
9388def: Pat<(v4i32 (trunc (v4i64 VR256X:$src))),
9389         (v4i32 (EXTRACT_SUBREG
9390                 (v8i32 (VPMOVQDZrr (v8i64 (INSERT_SUBREG (IMPLICIT_DEF),
9391                                           VR256X:$src, sub_ymm)))), sub_xmm))>;
9392}
9393
9394let Predicates = [HasBWI, NoVLX] in {
9395def: Pat<(v16i8 (trunc (v16i16 VR256X:$src))),
9396         (v16i8 (EXTRACT_SUBREG (VPMOVWBZrr (v32i16 (INSERT_SUBREG (IMPLICIT_DEF),
9397                                            VR256X:$src, sub_ymm))), sub_xmm))>;
9398}
9399
9400// Without BWI we can't use vXi16/vXi8 vselect so we have to use vmtrunc nodes.
9401multiclass mtrunc_lowering<string InstrName, SDNode OpNode,
9402                           X86VectorVTInfo DestInfo,
9403                           X86VectorVTInfo SrcInfo> {
9404  def : Pat<(DestInfo.VT (OpNode (SrcInfo.VT SrcInfo.RC:$src),
9405                                 DestInfo.RC:$src0,
9406                                 SrcInfo.KRCWM:$mask)),
9407            (!cast<Instruction>(InstrName#"rrk") DestInfo.RC:$src0,
9408                                                 SrcInfo.KRCWM:$mask,
9409                                                 SrcInfo.RC:$src)>;
9410
9411  def : Pat<(DestInfo.VT (OpNode (SrcInfo.VT SrcInfo.RC:$src),
9412                                 DestInfo.ImmAllZerosV,
9413                                 SrcInfo.KRCWM:$mask)),
9414            (!cast<Instruction>(InstrName#"rrkz") SrcInfo.KRCWM:$mask,
9415                                                  SrcInfo.RC:$src)>;
9416}
9417
9418let Predicates = [HasVLX] in {
9419defm : mtrunc_lowering<"VPMOVDWZ256", X86vmtrunc, v8i16x_info, v8i32x_info>;
9420defm : mtrunc_lowering<"VPMOVSDWZ256", X86vmtruncs, v8i16x_info, v8i32x_info>;
9421defm : mtrunc_lowering<"VPMOVUSDWZ256", X86vmtruncus, v8i16x_info, v8i32x_info>;
9422}
9423
9424let Predicates = [HasAVX512] in {
9425defm : mtrunc_lowering<"VPMOVDWZ", X86vmtrunc, v16i16x_info, v16i32_info>;
9426defm : mtrunc_lowering<"VPMOVSDWZ", X86vmtruncs, v16i16x_info, v16i32_info>;
9427defm : mtrunc_lowering<"VPMOVUSDWZ", X86vmtruncus, v16i16x_info, v16i32_info>;
9428
9429defm : mtrunc_lowering<"VPMOVDBZ", X86vmtrunc, v16i8x_info, v16i32_info>;
9430defm : mtrunc_lowering<"VPMOVSDBZ", X86vmtruncs, v16i8x_info, v16i32_info>;
9431defm : mtrunc_lowering<"VPMOVUSDBZ", X86vmtruncus, v16i8x_info, v16i32_info>;
9432
9433defm : mtrunc_lowering<"VPMOVQWZ", X86vmtrunc, v8i16x_info, v8i64_info>;
9434defm : mtrunc_lowering<"VPMOVSQWZ", X86vmtruncs, v8i16x_info, v8i64_info>;
9435defm : mtrunc_lowering<"VPMOVUSQWZ", X86vmtruncus, v8i16x_info, v8i64_info>;
9436}
9437
9438multiclass WriteShuffle256_common<bits<8> opc, string OpcodeStr, X86FoldableSchedWrite sched,
9439              X86VectorVTInfo DestInfo, X86VectorVTInfo SrcInfo,
9440              X86MemOperand x86memop, PatFrag LdFrag, SDNode OpNode>{
9441  let ExeDomain = DestInfo.ExeDomain in {
9442  defm rr   : AVX512_maskable<opc, MRMSrcReg, DestInfo, (outs DestInfo.RC:$dst),
9443                    (ins SrcInfo.RC:$src), OpcodeStr ,"$src", "$src",
9444                    (DestInfo.VT (OpNode (SrcInfo.VT SrcInfo.RC:$src)))>,
9445                  EVEX, Sched<[sched]>;
9446
9447  defm rm : AVX512_maskable<opc, MRMSrcMem, DestInfo, (outs DestInfo.RC:$dst),
9448                  (ins x86memop:$src), OpcodeStr ,"$src", "$src",
9449                  (DestInfo.VT (LdFrag addr:$src))>,
9450                EVEX, Sched<[sched.Folded]>;
9451  }
9452}
9453
9454multiclass WriteShuffle256_BW<bits<8> opc, string OpcodeStr,
9455          SDNode OpNode, SDNode InVecNode, string ExtTy,
9456          X86FoldableSchedWrite sched, PatFrag LdFrag = !cast<PatFrag>(ExtTy#"extloadvi8")> {
9457  let Predicates = [HasVLX, HasBWI] in {
9458    defm Z128:  WriteShuffle256_common<opc, OpcodeStr, sched, v8i16x_info,
9459                    v16i8x_info, i64mem, LdFrag, InVecNode>,
9460                     EVEX_CD8<8, CD8VH>, T8PD, EVEX_V128, VEX_WIG;
9461
9462    defm Z256:  WriteShuffle256_common<opc, OpcodeStr, sched, v16i16x_info,
9463                    v16i8x_info, i128mem, LdFrag, OpNode>,
9464                     EVEX_CD8<8, CD8VH>, T8PD, EVEX_V256, VEX_WIG;
9465  }
9466  let Predicates = [HasBWI] in {
9467    defm Z   :  WriteShuffle256_common<opc, OpcodeStr, sched, v32i16_info,
9468                    v32i8x_info, i256mem, LdFrag, OpNode>,
9469                     EVEX_CD8<8, CD8VH>, T8PD, EVEX_V512, VEX_WIG;
9470  }
9471}
9472
9473multiclass WriteShuffle256_BD<bits<8> opc, string OpcodeStr,
9474          SDNode OpNode, SDNode InVecNode, string ExtTy,
9475          X86FoldableSchedWrite sched, PatFrag LdFrag = !cast<PatFrag>(ExtTy#"extloadvi8")> {
9476  let Predicates = [HasVLX, HasAVX512] in {
9477    defm Z128:  WriteShuffle256_common<opc, OpcodeStr, sched, v4i32x_info,
9478                   v16i8x_info, i32mem, LdFrag, InVecNode>,
9479                         EVEX_CD8<8, CD8VQ>, T8PD, EVEX_V128, VEX_WIG;
9480
9481    defm Z256:  WriteShuffle256_common<opc, OpcodeStr, sched, v8i32x_info,
9482                   v16i8x_info, i64mem, LdFrag, InVecNode>,
9483                         EVEX_CD8<8, CD8VQ>, T8PD, EVEX_V256, VEX_WIG;
9484  }
9485  let Predicates = [HasAVX512] in {
9486    defm Z   :  WriteShuffle256_common<opc, OpcodeStr, sched, v16i32_info,
9487                   v16i8x_info, i128mem, LdFrag, OpNode>,
9488                         EVEX_CD8<8, CD8VQ>, T8PD, EVEX_V512, VEX_WIG;
9489  }
9490}
9491
9492multiclass WriteShuffle256_BQ<bits<8> opc, string OpcodeStr,
9493          SDNode OpNode, SDNode InVecNode, string ExtTy,
9494          X86FoldableSchedWrite sched, PatFrag LdFrag = !cast<PatFrag>(ExtTy#"extloadvi8")> {
9495  let Predicates = [HasVLX, HasAVX512] in {
9496    defm Z128:  WriteShuffle256_common<opc, OpcodeStr, sched, v2i64x_info,
9497                   v16i8x_info, i16mem, LdFrag, InVecNode>,
9498                     EVEX_CD8<8, CD8VO>, T8PD, EVEX_V128, VEX_WIG;
9499
9500    defm Z256:  WriteShuffle256_common<opc, OpcodeStr, sched, v4i64x_info,
9501                   v16i8x_info, i32mem, LdFrag, InVecNode>,
9502                     EVEX_CD8<8, CD8VO>, T8PD, EVEX_V256, VEX_WIG;
9503  }
9504  let Predicates = [HasAVX512] in {
9505    defm Z   :  WriteShuffle256_common<opc, OpcodeStr, sched, v8i64_info,
9506                   v16i8x_info, i64mem, LdFrag, InVecNode>,
9507                     EVEX_CD8<8, CD8VO>, T8PD, EVEX_V512, VEX_WIG;
9508  }
9509}
9510
9511multiclass WriteShuffle256_WD<bits<8> opc, string OpcodeStr,
9512         SDNode OpNode, SDNode InVecNode, string ExtTy,
9513         X86FoldableSchedWrite sched, PatFrag LdFrag = !cast<PatFrag>(ExtTy#"extloadvi16")> {
9514  let Predicates = [HasVLX, HasAVX512] in {
9515    defm Z128:  WriteShuffle256_common<opc, OpcodeStr, sched, v4i32x_info,
9516                   v8i16x_info, i64mem, LdFrag, InVecNode>,
9517                     EVEX_CD8<16, CD8VH>, T8PD, EVEX_V128, VEX_WIG;
9518
9519    defm Z256:  WriteShuffle256_common<opc, OpcodeStr, sched, v8i32x_info,
9520                   v8i16x_info, i128mem, LdFrag, OpNode>,
9521                     EVEX_CD8<16, CD8VH>, T8PD, EVEX_V256, VEX_WIG;
9522  }
9523  let Predicates = [HasAVX512] in {
9524    defm Z   :  WriteShuffle256_common<opc, OpcodeStr, sched, v16i32_info,
9525                   v16i16x_info, i256mem, LdFrag, OpNode>,
9526                     EVEX_CD8<16, CD8VH>, T8PD, EVEX_V512, VEX_WIG;
9527  }
9528}
9529
9530multiclass WriteShuffle256_WQ<bits<8> opc, string OpcodeStr,
9531         SDNode OpNode, SDNode InVecNode, string ExtTy,
9532         X86FoldableSchedWrite sched, PatFrag LdFrag = !cast<PatFrag>(ExtTy#"extloadvi16")> {
9533  let Predicates = [HasVLX, HasAVX512] in {
9534    defm Z128:  WriteShuffle256_common<opc, OpcodeStr, sched, v2i64x_info,
9535                   v8i16x_info, i32mem, LdFrag, InVecNode>,
9536                     EVEX_CD8<16, CD8VQ>, T8PD, EVEX_V128, VEX_WIG;
9537
9538    defm Z256:  WriteShuffle256_common<opc, OpcodeStr, sched, v4i64x_info,
9539                   v8i16x_info, i64mem, LdFrag, InVecNode>,
9540                     EVEX_CD8<16, CD8VQ>, T8PD, EVEX_V256, VEX_WIG;
9541  }
9542  let Predicates = [HasAVX512] in {
9543    defm Z   :  WriteShuffle256_common<opc, OpcodeStr, sched, v8i64_info,
9544                   v8i16x_info, i128mem, LdFrag, OpNode>,
9545                     EVEX_CD8<16, CD8VQ>, T8PD, EVEX_V512, VEX_WIG;
9546  }
9547}
9548
9549multiclass WriteShuffle256_DQ<bits<8> opc, string OpcodeStr,
9550         SDNode OpNode, SDNode InVecNode, string ExtTy,
9551         X86FoldableSchedWrite sched, PatFrag LdFrag = !cast<PatFrag>(ExtTy#"extloadvi32")> {
9552
9553  let Predicates = [HasVLX, HasAVX512] in {
9554    defm Z128:  WriteShuffle256_common<opc, OpcodeStr, sched, v2i64x_info,
9555                   v4i32x_info, i64mem, LdFrag, InVecNode>,
9556                     EVEX_CD8<32, CD8VH>, T8PD, EVEX_V128;
9557
9558    defm Z256:  WriteShuffle256_common<opc, OpcodeStr, sched, v4i64x_info,
9559                   v4i32x_info, i128mem, LdFrag, OpNode>,
9560                     EVEX_CD8<32, CD8VH>, T8PD, EVEX_V256;
9561  }
9562  let Predicates = [HasAVX512] in {
9563    defm Z   :  WriteShuffle256_common<opc, OpcodeStr, sched, v8i64_info,
9564                   v8i32x_info, i256mem, LdFrag, OpNode>,
9565                     EVEX_CD8<32, CD8VH>, T8PD, EVEX_V512;
9566  }
9567}
9568
9569defm VPMOVZXBW : WriteShuffle256_BW<0x30, "vpmovzxbw", zext, zext_invec, "z", WriteShuffle256>;
9570defm VPMOVZXBD : WriteShuffle256_BD<0x31, "vpmovzxbd", zext, zext_invec, "z", WriteShuffle256>;
9571defm VPMOVZXBQ : WriteShuffle256_BQ<0x32, "vpmovzxbq", zext, zext_invec, "z", WriteShuffle256>;
9572defm VPMOVZXWD : WriteShuffle256_WD<0x33, "vpmovzxwd", zext, zext_invec, "z", WriteShuffle256>;
9573defm VPMOVZXWQ : WriteShuffle256_WQ<0x34, "vpmovzxwq", zext, zext_invec, "z", WriteShuffle256>;
9574defm VPMOVZXDQ : WriteShuffle256_DQ<0x35, "vpmovzxdq", zext, zext_invec, "z", WriteShuffle256>;
9575
9576defm VPMOVSXBW: WriteShuffle256_BW<0x20, "vpmovsxbw", sext, sext_invec, "s", WriteShuffle256>;
9577defm VPMOVSXBD: WriteShuffle256_BD<0x21, "vpmovsxbd", sext, sext_invec, "s", WriteShuffle256>;
9578defm VPMOVSXBQ: WriteShuffle256_BQ<0x22, "vpmovsxbq", sext, sext_invec, "s", WriteShuffle256>;
9579defm VPMOVSXWD: WriteShuffle256_WD<0x23, "vpmovsxwd", sext, sext_invec, "s", WriteShuffle256>;
9580defm VPMOVSXWQ: WriteShuffle256_WQ<0x24, "vpmovsxwq", sext, sext_invec, "s", WriteShuffle256>;
9581defm VPMOVSXDQ: WriteShuffle256_DQ<0x25, "vpmovsxdq", sext, sext_invec, "s", WriteShuffle256>;
9582
9583
9584// Patterns that we also need any extend versions of. aext_vector_inreg
9585// is currently legalized to zext_vector_inreg.
9586multiclass AVX512_pmovx_patterns_base<string OpcPrefix, SDNode ExtOp> {
9587  // 256-bit patterns
9588  let Predicates = [HasVLX, HasBWI] in {
9589    def : Pat<(v16i16 (ExtOp (loadv16i8 addr:$src))),
9590              (!cast<I>(OpcPrefix#BWZ256rm) addr:$src)>;
9591  }
9592
9593  let Predicates = [HasVLX] in {
9594    def : Pat<(v8i32 (ExtOp (loadv8i16 addr:$src))),
9595              (!cast<I>(OpcPrefix#WDZ256rm) addr:$src)>;
9596
9597    def : Pat<(v4i64 (ExtOp (loadv4i32 addr:$src))),
9598              (!cast<I>(OpcPrefix#DQZ256rm) addr:$src)>;
9599  }
9600
9601  // 512-bit patterns
9602  let Predicates = [HasBWI] in {
9603    def : Pat<(v32i16 (ExtOp (loadv32i8 addr:$src))),
9604              (!cast<I>(OpcPrefix#BWZrm) addr:$src)>;
9605  }
9606  let Predicates = [HasAVX512] in {
9607    def : Pat<(v16i32 (ExtOp (loadv16i8 addr:$src))),
9608              (!cast<I>(OpcPrefix#BDZrm) addr:$src)>;
9609    def : Pat<(v16i32 (ExtOp (loadv16i16 addr:$src))),
9610              (!cast<I>(OpcPrefix#WDZrm) addr:$src)>;
9611
9612    def : Pat<(v8i64 (ExtOp (loadv8i16 addr:$src))),
9613              (!cast<I>(OpcPrefix#WQZrm) addr:$src)>;
9614
9615    def : Pat<(v8i64 (ExtOp (loadv8i32 addr:$src))),
9616              (!cast<I>(OpcPrefix#DQZrm) addr:$src)>;
9617  }
9618}
9619
9620multiclass AVX512_pmovx_patterns<string OpcPrefix, SDNode ExtOp,
9621                                 SDNode InVecOp> :
9622    AVX512_pmovx_patterns_base<OpcPrefix, ExtOp> {
9623  // 128-bit patterns
9624  let Predicates = [HasVLX, HasBWI] in {
9625  def : Pat<(v8i16 (InVecOp (bc_v16i8 (v2i64 (scalar_to_vector (loadi64 addr:$src)))))),
9626            (!cast<I>(OpcPrefix#BWZ128rm) addr:$src)>;
9627  def : Pat<(v8i16 (InVecOp (bc_v16i8 (v2f64 (scalar_to_vector (loadf64 addr:$src)))))),
9628            (!cast<I>(OpcPrefix#BWZ128rm) addr:$src)>;
9629  def : Pat<(v8i16 (InVecOp (bc_v16i8 (v2i64 (X86vzload64 addr:$src))))),
9630            (!cast<I>(OpcPrefix#BWZ128rm) addr:$src)>;
9631  }
9632  let Predicates = [HasVLX] in {
9633  def : Pat<(v4i32 (InVecOp (bc_v16i8 (v4i32 (scalar_to_vector (loadi32 addr:$src)))))),
9634            (!cast<I>(OpcPrefix#BDZ128rm) addr:$src)>;
9635  def : Pat<(v4i32 (InVecOp (bc_v16i8 (v4i32 (X86vzload32 addr:$src))))),
9636            (!cast<I>(OpcPrefix#BDZ128rm) addr:$src)>;
9637
9638  def : Pat<(v2i64 (InVecOp (bc_v16i8 (v4i32 (scalar_to_vector (extloadi32i16 addr:$src)))))),
9639            (!cast<I>(OpcPrefix#BQZ128rm) addr:$src)>;
9640
9641  def : Pat<(v4i32 (InVecOp (bc_v8i16 (v2i64 (scalar_to_vector (loadi64 addr:$src)))))),
9642            (!cast<I>(OpcPrefix#WDZ128rm) addr:$src)>;
9643  def : Pat<(v4i32 (InVecOp (bc_v8i16 (v2f64 (scalar_to_vector (loadf64 addr:$src)))))),
9644            (!cast<I>(OpcPrefix#WDZ128rm) addr:$src)>;
9645  def : Pat<(v4i32 (InVecOp (bc_v8i16 (v2i64 (X86vzload64 addr:$src))))),
9646            (!cast<I>(OpcPrefix#WDZ128rm) addr:$src)>;
9647
9648  def : Pat<(v2i64 (InVecOp (bc_v8i16 (v4i32 (scalar_to_vector (loadi32 addr:$src)))))),
9649            (!cast<I>(OpcPrefix#WQZ128rm) addr:$src)>;
9650  def : Pat<(v2i64 (InVecOp (bc_v8i16 (v4i32 (X86vzload32 addr:$src))))),
9651            (!cast<I>(OpcPrefix#WQZ128rm) addr:$src)>;
9652
9653  def : Pat<(v2i64 (InVecOp (bc_v4i32 (v2i64 (scalar_to_vector (loadi64 addr:$src)))))),
9654            (!cast<I>(OpcPrefix#DQZ128rm) addr:$src)>;
9655  def : Pat<(v2i64 (InVecOp (bc_v4i32 (v2f64 (scalar_to_vector (loadf64 addr:$src)))))),
9656            (!cast<I>(OpcPrefix#DQZ128rm) addr:$src)>;
9657  def : Pat<(v2i64 (InVecOp (bc_v4i32 (v2i64 (X86vzload64 addr:$src))))),
9658            (!cast<I>(OpcPrefix#DQZ128rm) addr:$src)>;
9659  }
9660  let Predicates = [HasVLX] in {
9661  def : Pat<(v8i32 (InVecOp (bc_v16i8 (v2i64 (scalar_to_vector (loadi64 addr:$src)))))),
9662            (!cast<I>(OpcPrefix#BDZ256rm) addr:$src)>;
9663  def : Pat<(v8i32 (InVecOp (bc_v16i8 (v2i64 (X86vzload64 addr:$src))))),
9664            (!cast<I>(OpcPrefix#BDZ256rm) addr:$src)>;
9665
9666  def : Pat<(v4i64 (InVecOp (bc_v16i8 (v4i32 (scalar_to_vector (loadi32 addr:$src)))))),
9667            (!cast<I>(OpcPrefix#BQZ256rm) addr:$src)>;
9668  def : Pat<(v4i64 (InVecOp (bc_v16i8 (v4i32 (X86vzload32 addr:$src))))),
9669            (!cast<I>(OpcPrefix#BQZ256rm) addr:$src)>;
9670
9671  def : Pat<(v4i64 (InVecOp (bc_v8i16 (v2i64 (scalar_to_vector (loadi64 addr:$src)))))),
9672            (!cast<I>(OpcPrefix#WQZ256rm) addr:$src)>;
9673  def : Pat<(v4i64 (InVecOp (bc_v8i16 (v2i64 (X86vzload64 addr:$src))))),
9674            (!cast<I>(OpcPrefix#WQZ256rm) addr:$src)>;
9675  }
9676  // 512-bit patterns
9677  let Predicates = [HasAVX512] in {
9678  def : Pat<(v8i64 (InVecOp (bc_v16i8 (v2i64 (scalar_to_vector (loadi64 addr:$src)))))),
9679            (!cast<I>(OpcPrefix#BQZrm) addr:$src)>;
9680  }
9681}
9682
9683defm : AVX512_pmovx_patterns<"VPMOVSX", sext, sext_invec>;
9684defm : AVX512_pmovx_patterns<"VPMOVZX", zext, zext_invec>;
9685
9686// Without BWI we can't do a trunc from v16i16 to v16i8. DAG combine can merge
9687// ext+trunc aggresively making it impossible to legalize the DAG to this
9688// pattern directly.
9689let Predicates = [HasAVX512, NoBWI] in {
9690def: Pat<(v16i8 (trunc (v16i16 VR256X:$src))),
9691         (VPMOVDBZrr (v16i32 (VPMOVZXWDZrr VR256X:$src)))>;
9692def: Pat<(v16i8 (trunc (loadv16i16 addr:$src))),
9693         (VPMOVDBZrr (v16i32 (VPMOVZXWDZrm addr:$src)))>;
9694}
9695
9696//===----------------------------------------------------------------------===//
9697// GATHER - SCATTER Operations
9698
9699// FIXME: Improve scheduling of gather/scatter instructions.
9700multiclass avx512_gather<bits<8> opc, string OpcodeStr, X86VectorVTInfo _,
9701                         X86MemOperand memop, PatFrag GatherNode,
9702                         RegisterClass MaskRC = _.KRCWM> {
9703  let Constraints = "@earlyclobber $dst, $src1 = $dst, $mask = $mask_wb",
9704      ExeDomain = _.ExeDomain in
9705  def rm  : AVX5128I<opc, MRMSrcMem, (outs _.RC:$dst, MaskRC:$mask_wb),
9706            (ins _.RC:$src1, MaskRC:$mask, memop:$src2),
9707            !strconcat(OpcodeStr#_.Suffix,
9708            "\t{$src2, ${dst} {${mask}}|${dst} {${mask}}, $src2}"),
9709            [(set _.RC:$dst, MaskRC:$mask_wb,
9710              (GatherNode  (_.VT _.RC:$src1), MaskRC:$mask,
9711                     vectoraddr:$src2))]>, EVEX, EVEX_K,
9712             EVEX_CD8<_.EltSize, CD8VT1>, Sched<[WriteLoad]>;
9713}
9714
9715multiclass avx512_gather_q_pd<bits<8> dopc, bits<8> qopc,
9716                        AVX512VLVectorVTInfo _, string OpcodeStr, string SUFF> {
9717  defm NAME##D##SUFF##Z: avx512_gather<dopc, OpcodeStr##"d", _.info512,
9718                                      vy512xmem, mgatherv8i32>, EVEX_V512, VEX_W;
9719  defm NAME##Q##SUFF##Z: avx512_gather<qopc, OpcodeStr##"q", _.info512,
9720                                      vz512mem,  mgatherv8i64>, EVEX_V512, VEX_W;
9721let Predicates = [HasVLX] in {
9722  defm NAME##D##SUFF##Z256: avx512_gather<dopc, OpcodeStr##"d", _.info256,
9723                              vx256xmem, mgatherv4i32>, EVEX_V256, VEX_W;
9724  defm NAME##Q##SUFF##Z256: avx512_gather<qopc, OpcodeStr##"q", _.info256,
9725                              vy256xmem, mgatherv4i64>, EVEX_V256, VEX_W;
9726  defm NAME##D##SUFF##Z128: avx512_gather<dopc, OpcodeStr##"d", _.info128,
9727                              vx128xmem, mgatherv4i32>, EVEX_V128, VEX_W;
9728  defm NAME##Q##SUFF##Z128: avx512_gather<qopc, OpcodeStr##"q", _.info128,
9729                              vx128xmem, mgatherv2i64>, EVEX_V128, VEX_W;
9730}
9731}
9732
9733multiclass avx512_gather_d_ps<bits<8> dopc, bits<8> qopc,
9734                       AVX512VLVectorVTInfo _, string OpcodeStr, string SUFF> {
9735  defm NAME##D##SUFF##Z: avx512_gather<dopc, OpcodeStr##"d", _.info512, vz512mem,
9736                                       mgatherv16i32>, EVEX_V512;
9737  defm NAME##Q##SUFF##Z: avx512_gather<qopc, OpcodeStr##"q", _.info256, vz256mem,
9738                                       mgatherv8i64>, EVEX_V512;
9739let Predicates = [HasVLX] in {
9740  defm NAME##D##SUFF##Z256: avx512_gather<dopc, OpcodeStr##"d", _.info256,
9741                                          vy256xmem, mgatherv8i32>, EVEX_V256;
9742  defm NAME##Q##SUFF##Z256: avx512_gather<qopc, OpcodeStr##"q", _.info128,
9743                                          vy128xmem, mgatherv4i64>, EVEX_V256;
9744  defm NAME##D##SUFF##Z128: avx512_gather<dopc, OpcodeStr##"d", _.info128,
9745                                          vx128xmem, mgatherv4i32>, EVEX_V128;
9746  defm NAME##Q##SUFF##Z128: avx512_gather<qopc, OpcodeStr##"q", _.info128,
9747                                          vx64xmem, mgatherv2i64, VK2WM>,
9748                                          EVEX_V128;
9749}
9750}
9751
9752
9753defm VGATHER : avx512_gather_q_pd<0x92, 0x93, avx512vl_f64_info, "vgather", "PD">,
9754               avx512_gather_d_ps<0x92, 0x93, avx512vl_f32_info, "vgather", "PS">;
9755
9756defm VPGATHER : avx512_gather_q_pd<0x90, 0x91, avx512vl_i64_info, "vpgather", "Q">,
9757                avx512_gather_d_ps<0x90, 0x91, avx512vl_i32_info, "vpgather", "D">;
9758
9759multiclass avx512_scatter<bits<8> opc, string OpcodeStr, X86VectorVTInfo _,
9760                          X86MemOperand memop, PatFrag ScatterNode,
9761                          RegisterClass MaskRC = _.KRCWM> {
9762
9763let mayStore = 1, Constraints = "$mask = $mask_wb", ExeDomain = _.ExeDomain in
9764
9765  def mr  : AVX5128I<opc, MRMDestMem, (outs MaskRC:$mask_wb),
9766            (ins memop:$dst, MaskRC:$mask, _.RC:$src),
9767            !strconcat(OpcodeStr#_.Suffix,
9768            "\t{$src, ${dst} {${mask}}|${dst} {${mask}}, $src}"),
9769            [(set MaskRC:$mask_wb, (ScatterNode (_.VT _.RC:$src),
9770                                    MaskRC:$mask,  vectoraddr:$dst))]>,
9771            EVEX, EVEX_K, EVEX_CD8<_.EltSize, CD8VT1>,
9772            Sched<[WriteStore]>;
9773}
9774
9775multiclass avx512_scatter_q_pd<bits<8> dopc, bits<8> qopc,
9776                        AVX512VLVectorVTInfo _, string OpcodeStr, string SUFF> {
9777  defm NAME##D##SUFF##Z: avx512_scatter<dopc, OpcodeStr##"d", _.info512,
9778                                      vy512xmem, mscatterv8i32>, EVEX_V512, VEX_W;
9779  defm NAME##Q##SUFF##Z: avx512_scatter<qopc, OpcodeStr##"q", _.info512,
9780                                      vz512mem,  mscatterv8i64>, EVEX_V512, VEX_W;
9781let Predicates = [HasVLX] in {
9782  defm NAME##D##SUFF##Z256: avx512_scatter<dopc, OpcodeStr##"d", _.info256,
9783                              vx256xmem, mscatterv4i32>, EVEX_V256, VEX_W;
9784  defm NAME##Q##SUFF##Z256: avx512_scatter<qopc, OpcodeStr##"q", _.info256,
9785                              vy256xmem, mscatterv4i64>, EVEX_V256, VEX_W;
9786  defm NAME##D##SUFF##Z128: avx512_scatter<dopc, OpcodeStr##"d", _.info128,
9787                              vx128xmem, mscatterv4i32>, EVEX_V128, VEX_W;
9788  defm NAME##Q##SUFF##Z128: avx512_scatter<qopc, OpcodeStr##"q", _.info128,
9789                              vx128xmem, mscatterv2i64>, EVEX_V128, VEX_W;
9790}
9791}
9792
9793multiclass avx512_scatter_d_ps<bits<8> dopc, bits<8> qopc,
9794                       AVX512VLVectorVTInfo _, string OpcodeStr, string SUFF> {
9795  defm NAME##D##SUFF##Z: avx512_scatter<dopc, OpcodeStr##"d", _.info512, vz512mem,
9796                                       mscatterv16i32>, EVEX_V512;
9797  defm NAME##Q##SUFF##Z: avx512_scatter<qopc, OpcodeStr##"q", _.info256, vz256mem,
9798                                       mscatterv8i64>, EVEX_V512;
9799let Predicates = [HasVLX] in {
9800  defm NAME##D##SUFF##Z256: avx512_scatter<dopc, OpcodeStr##"d", _.info256,
9801                                          vy256xmem, mscatterv8i32>, EVEX_V256;
9802  defm NAME##Q##SUFF##Z256: avx512_scatter<qopc, OpcodeStr##"q", _.info128,
9803                                          vy128xmem, mscatterv4i64>, EVEX_V256;
9804  defm NAME##D##SUFF##Z128: avx512_scatter<dopc, OpcodeStr##"d", _.info128,
9805                                          vx128xmem, mscatterv4i32>, EVEX_V128;
9806  defm NAME##Q##SUFF##Z128: avx512_scatter<qopc, OpcodeStr##"q", _.info128,
9807                                          vx64xmem, mscatterv2i64, VK2WM>,
9808                                          EVEX_V128;
9809}
9810}
9811
9812defm VSCATTER : avx512_scatter_q_pd<0xA2, 0xA3, avx512vl_f64_info, "vscatter", "PD">,
9813               avx512_scatter_d_ps<0xA2, 0xA3, avx512vl_f32_info, "vscatter", "PS">;
9814
9815defm VPSCATTER : avx512_scatter_q_pd<0xA0, 0xA1, avx512vl_i64_info, "vpscatter", "Q">,
9816                avx512_scatter_d_ps<0xA0, 0xA1, avx512vl_i32_info, "vpscatter", "D">;
9817
9818// prefetch
9819multiclass avx512_gather_scatter_prefetch<bits<8> opc, Format F, string OpcodeStr,
9820                       RegisterClass KRC, X86MemOperand memop> {
9821  let Predicates = [HasPFI], mayLoad = 1, mayStore = 1 in
9822  def m  : AVX5128I<opc, F, (outs), (ins KRC:$mask, memop:$src),
9823            !strconcat(OpcodeStr, "\t{$src {${mask}}|{${mask}}, $src}"), []>,
9824            EVEX, EVEX_K, Sched<[WriteLoad]>;
9825}
9826
9827defm VGATHERPF0DPS: avx512_gather_scatter_prefetch<0xC6, MRM1m, "vgatherpf0dps",
9828                     VK16WM, vz512mem>, EVEX_V512, EVEX_CD8<32, CD8VT1>;
9829
9830defm VGATHERPF0QPS: avx512_gather_scatter_prefetch<0xC7, MRM1m, "vgatherpf0qps",
9831                     VK8WM, vz256mem>, EVEX_V512, EVEX_CD8<64, CD8VT1>;
9832
9833defm VGATHERPF0DPD: avx512_gather_scatter_prefetch<0xC6, MRM1m, "vgatherpf0dpd",
9834                     VK8WM, vy512xmem>, EVEX_V512, VEX_W, EVEX_CD8<32, CD8VT1>;
9835
9836defm VGATHERPF0QPD: avx512_gather_scatter_prefetch<0xC7, MRM1m, "vgatherpf0qpd",
9837                     VK8WM, vz512mem>, EVEX_V512, VEX_W, EVEX_CD8<64, CD8VT1>;
9838
9839defm VGATHERPF1DPS: avx512_gather_scatter_prefetch<0xC6, MRM2m, "vgatherpf1dps",
9840                     VK16WM, vz512mem>, EVEX_V512, EVEX_CD8<32, CD8VT1>;
9841
9842defm VGATHERPF1QPS: avx512_gather_scatter_prefetch<0xC7, MRM2m, "vgatherpf1qps",
9843                     VK8WM, vz256mem>, EVEX_V512, EVEX_CD8<64, CD8VT1>;
9844
9845defm VGATHERPF1DPD: avx512_gather_scatter_prefetch<0xC6, MRM2m, "vgatherpf1dpd",
9846                     VK8WM, vy512xmem>, EVEX_V512, VEX_W, EVEX_CD8<32, CD8VT1>;
9847
9848defm VGATHERPF1QPD: avx512_gather_scatter_prefetch<0xC7, MRM2m, "vgatherpf1qpd",
9849                     VK8WM, vz512mem>, EVEX_V512, VEX_W, EVEX_CD8<64, CD8VT1>;
9850
9851defm VSCATTERPF0DPS: avx512_gather_scatter_prefetch<0xC6, MRM5m, "vscatterpf0dps",
9852                     VK16WM, vz512mem>, EVEX_V512, EVEX_CD8<32, CD8VT1>;
9853
9854defm VSCATTERPF0QPS: avx512_gather_scatter_prefetch<0xC7, MRM5m, "vscatterpf0qps",
9855                     VK8WM, vz256mem>, EVEX_V512, EVEX_CD8<64, CD8VT1>;
9856
9857defm VSCATTERPF0DPD: avx512_gather_scatter_prefetch<0xC6, MRM5m, "vscatterpf0dpd",
9858                     VK8WM, vy512xmem>, EVEX_V512, VEX_W, EVEX_CD8<32, CD8VT1>;
9859
9860defm VSCATTERPF0QPD: avx512_gather_scatter_prefetch<0xC7, MRM5m, "vscatterpf0qpd",
9861                     VK8WM, vz512mem>, EVEX_V512, VEX_W, EVEX_CD8<64, CD8VT1>;
9862
9863defm VSCATTERPF1DPS: avx512_gather_scatter_prefetch<0xC6, MRM6m, "vscatterpf1dps",
9864                     VK16WM, vz512mem>, EVEX_V512, EVEX_CD8<32, CD8VT1>;
9865
9866defm VSCATTERPF1QPS: avx512_gather_scatter_prefetch<0xC7, MRM6m, "vscatterpf1qps",
9867                     VK8WM, vz256mem>, EVEX_V512, EVEX_CD8<64, CD8VT1>;
9868
9869defm VSCATTERPF1DPD: avx512_gather_scatter_prefetch<0xC6, MRM6m, "vscatterpf1dpd",
9870                     VK8WM, vy512xmem>, EVEX_V512, VEX_W, EVEX_CD8<32, CD8VT1>;
9871
9872defm VSCATTERPF1QPD: avx512_gather_scatter_prefetch<0xC7, MRM6m, "vscatterpf1qpd",
9873                     VK8WM, vz512mem>, EVEX_V512, VEX_W, EVEX_CD8<64, CD8VT1>;
9874
9875multiclass cvt_by_vec_width<bits<8> opc, X86VectorVTInfo Vec, string OpcodeStr > {
9876def rr : AVX512XS8I<opc, MRMSrcReg, (outs Vec.RC:$dst), (ins Vec.KRC:$src),
9877                  !strconcat(OpcodeStr##Vec.Suffix, "\t{$src, $dst|$dst, $src}"),
9878                  [(set Vec.RC:$dst, (Vec.VT (sext Vec.KRC:$src)))]>,
9879                  EVEX, Sched<[WriteMove]>; // TODO - WriteVecTrunc?
9880
9881// Also need a pattern for anyextend.
9882def : Pat<(Vec.VT (anyext Vec.KRC:$src)),
9883          (!cast<Instruction>(NAME#"rr") Vec.KRC:$src)>;
9884}
9885
9886multiclass cvt_mask_by_elt_width<bits<8> opc, AVX512VLVectorVTInfo VTInfo,
9887                                 string OpcodeStr, Predicate prd> {
9888let Predicates = [prd] in
9889  defm Z : cvt_by_vec_width<opc, VTInfo.info512, OpcodeStr>, EVEX_V512;
9890
9891  let Predicates = [prd, HasVLX] in {
9892    defm Z256 : cvt_by_vec_width<opc, VTInfo.info256, OpcodeStr>, EVEX_V256;
9893    defm Z128 : cvt_by_vec_width<opc, VTInfo.info128, OpcodeStr>, EVEX_V128;
9894  }
9895}
9896
9897defm VPMOVM2B : cvt_mask_by_elt_width<0x28, avx512vl_i8_info, "vpmovm2" , HasBWI>;
9898defm VPMOVM2W : cvt_mask_by_elt_width<0x28, avx512vl_i16_info, "vpmovm2", HasBWI> , VEX_W;
9899defm VPMOVM2D : cvt_mask_by_elt_width<0x38, avx512vl_i32_info, "vpmovm2", HasDQI>;
9900defm VPMOVM2Q : cvt_mask_by_elt_width<0x38, avx512vl_i64_info, "vpmovm2", HasDQI> , VEX_W;
9901
9902multiclass convert_vector_to_mask_common<bits<8> opc, X86VectorVTInfo _, string OpcodeStr > {
9903    def rr : AVX512XS8I<opc, MRMSrcReg, (outs _.KRC:$dst), (ins _.RC:$src),
9904                        !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
9905                        [(set _.KRC:$dst, (X86pcmpgtm _.ImmAllZerosV, (_.VT _.RC:$src)))]>,
9906                        EVEX, Sched<[WriteMove]>;
9907}
9908
9909// Use 512bit version to implement 128/256 bit in case NoVLX.
9910multiclass convert_vector_to_mask_lowering<X86VectorVTInfo ExtendInfo,
9911                                           X86VectorVTInfo _,
9912                                           string Name> {
9913
9914  def : Pat<(_.KVT (X86pcmpgtm _.ImmAllZerosV, (_.VT _.RC:$src))),
9915            (_.KVT (COPY_TO_REGCLASS
9916                     (!cast<Instruction>(Name#"Zrr")
9917                       (INSERT_SUBREG (ExtendInfo.VT (IMPLICIT_DEF)),
9918                                      _.RC:$src, _.SubRegIdx)),
9919                   _.KRC))>;
9920}
9921
9922multiclass avx512_convert_vector_to_mask<bits<8> opc, string OpcodeStr,
9923                                   AVX512VLVectorVTInfo VTInfo, Predicate prd> {
9924  let Predicates = [prd] in
9925    defm Z : convert_vector_to_mask_common <opc, VTInfo.info512, OpcodeStr>,
9926                                            EVEX_V512;
9927
9928  let Predicates = [prd, HasVLX] in {
9929    defm Z256 : convert_vector_to_mask_common<opc, VTInfo.info256, OpcodeStr>,
9930                                              EVEX_V256;
9931    defm Z128 : convert_vector_to_mask_common<opc, VTInfo.info128, OpcodeStr>,
9932                                               EVEX_V128;
9933  }
9934  let Predicates = [prd, NoVLX] in {
9935    defm Z256_Alt : convert_vector_to_mask_lowering<VTInfo.info512, VTInfo.info256, NAME>;
9936    defm Z128_Alt : convert_vector_to_mask_lowering<VTInfo.info512, VTInfo.info128, NAME>;
9937  }
9938}
9939
9940defm VPMOVB2M : avx512_convert_vector_to_mask<0x29, "vpmovb2m",
9941                                              avx512vl_i8_info, HasBWI>;
9942defm VPMOVW2M : avx512_convert_vector_to_mask<0x29, "vpmovw2m",
9943                                              avx512vl_i16_info, HasBWI>, VEX_W;
9944defm VPMOVD2M : avx512_convert_vector_to_mask<0x39, "vpmovd2m",
9945                                              avx512vl_i32_info, HasDQI>;
9946defm VPMOVQ2M : avx512_convert_vector_to_mask<0x39, "vpmovq2m",
9947                                              avx512vl_i64_info, HasDQI>, VEX_W;
9948
9949// Patterns for handling sext from a mask register to v16i8/v16i16 when DQI
9950// is available, but BWI is not. We can't handle this in lowering because
9951// a target independent DAG combine likes to combine sext and trunc.
9952let Predicates = [HasDQI, NoBWI] in {
9953  def : Pat<(v16i8 (sext (v16i1 VK16:$src))),
9954            (VPMOVDBZrr (v16i32 (VPMOVM2DZrr VK16:$src)))>;
9955  def : Pat<(v16i16 (sext (v16i1 VK16:$src))),
9956            (VPMOVDWZrr (v16i32 (VPMOVM2DZrr VK16:$src)))>;
9957
9958  def : Pat<(v16i8 (anyext (v16i1 VK16:$src))),
9959            (VPMOVDBZrr (v16i32 (VPMOVM2DZrr VK16:$src)))>;
9960  def : Pat<(v16i16 (anyext (v16i1 VK16:$src))),
9961            (VPMOVDWZrr (v16i32 (VPMOVM2DZrr VK16:$src)))>;
9962}
9963
9964let Predicates = [HasDQI, NoBWI, HasVLX] in {
9965  def : Pat<(v8i16 (sext (v8i1 VK8:$src))),
9966            (VPMOVDWZ256rr (v8i32 (VPMOVM2DZ256rr VK8:$src)))>;
9967
9968  def : Pat<(v8i16 (anyext (v8i1 VK8:$src))),
9969            (VPMOVDWZ256rr (v8i32 (VPMOVM2DZ256rr VK8:$src)))>;
9970}
9971
9972//===----------------------------------------------------------------------===//
9973// AVX-512 - COMPRESS and EXPAND
9974//
9975
9976multiclass compress_by_vec_width_common<bits<8> opc, X86VectorVTInfo _,
9977                                 string OpcodeStr, X86FoldableSchedWrite sched> {
9978  defm rr : AVX512_maskable<opc, MRMDestReg, _, (outs _.RC:$dst),
9979              (ins _.RC:$src1), OpcodeStr, "$src1", "$src1",
9980              (null_frag)>, AVX5128IBase,
9981              Sched<[sched]>;
9982
9983  let mayStore = 1, hasSideEffects = 0 in
9984  def mr : AVX5128I<opc, MRMDestMem, (outs),
9985              (ins _.MemOp:$dst, _.RC:$src),
9986              OpcodeStr # "\t{$src, $dst|$dst, $src}",
9987              []>, EVEX_CD8<_.EltSize, CD8VT1>,
9988              Sched<[sched.Folded]>;
9989
9990  def mrk : AVX5128I<opc, MRMDestMem, (outs),
9991              (ins _.MemOp:$dst, _.KRCWM:$mask, _.RC:$src),
9992              OpcodeStr # "\t{$src, $dst {${mask}}|$dst {${mask}}, $src}",
9993              []>,
9994              EVEX_K, EVEX_CD8<_.EltSize, CD8VT1>,
9995              Sched<[sched.Folded]>;
9996}
9997
9998multiclass compress_by_vec_width_lowering<X86VectorVTInfo _, string Name> {
9999  def : Pat<(X86mCompressingStore (_.VT _.RC:$src), addr:$dst, _.KRCWM:$mask),
10000            (!cast<Instruction>(Name#_.ZSuffix##mrk)
10001                            addr:$dst, _.KRCWM:$mask, _.RC:$src)>;
10002
10003  def : Pat<(X86compress (_.VT _.RC:$src), _.RC:$src0, _.KRCWM:$mask),
10004            (!cast<Instruction>(Name#_.ZSuffix##rrk)
10005                            _.RC:$src0, _.KRCWM:$mask, _.RC:$src)>;
10006  def : Pat<(X86compress (_.VT _.RC:$src), _.ImmAllZerosV, _.KRCWM:$mask),
10007            (!cast<Instruction>(Name#_.ZSuffix##rrkz)
10008                            _.KRCWM:$mask, _.RC:$src)>;
10009}
10010
10011multiclass compress_by_elt_width<bits<8> opc, string OpcodeStr,
10012                                 X86FoldableSchedWrite sched,
10013                                 AVX512VLVectorVTInfo VTInfo,
10014                                 Predicate Pred = HasAVX512> {
10015  let Predicates = [Pred] in
10016  defm Z : compress_by_vec_width_common<opc, VTInfo.info512, OpcodeStr, sched>,
10017           compress_by_vec_width_lowering<VTInfo.info512, NAME>, EVEX_V512;
10018
10019  let Predicates = [Pred, HasVLX] in {
10020    defm Z256 : compress_by_vec_width_common<opc, VTInfo.info256, OpcodeStr, sched>,
10021                compress_by_vec_width_lowering<VTInfo.info256, NAME>, EVEX_V256;
10022    defm Z128 : compress_by_vec_width_common<opc, VTInfo.info128, OpcodeStr, sched>,
10023                compress_by_vec_width_lowering<VTInfo.info128, NAME>, EVEX_V128;
10024  }
10025}
10026
10027// FIXME: Is there a better scheduler class for VPCOMPRESS?
10028defm VPCOMPRESSD : compress_by_elt_width <0x8B, "vpcompressd", WriteVarShuffle256,
10029                                          avx512vl_i32_info>, EVEX, NotMemoryFoldable;
10030defm VPCOMPRESSQ : compress_by_elt_width <0x8B, "vpcompressq", WriteVarShuffle256,
10031                                          avx512vl_i64_info>, EVEX, VEX_W, NotMemoryFoldable;
10032defm VCOMPRESSPS : compress_by_elt_width <0x8A, "vcompressps", WriteVarShuffle256,
10033                                          avx512vl_f32_info>, EVEX, NotMemoryFoldable;
10034defm VCOMPRESSPD : compress_by_elt_width <0x8A, "vcompresspd", WriteVarShuffle256,
10035                                          avx512vl_f64_info>, EVEX, VEX_W, NotMemoryFoldable;
10036
10037// expand
10038multiclass expand_by_vec_width<bits<8> opc, X86VectorVTInfo _,
10039                                 string OpcodeStr, X86FoldableSchedWrite sched> {
10040  defm rr : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
10041              (ins _.RC:$src1), OpcodeStr, "$src1", "$src1",
10042              (null_frag)>, AVX5128IBase,
10043              Sched<[sched]>;
10044
10045  defm rm : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
10046              (ins _.MemOp:$src1), OpcodeStr, "$src1", "$src1",
10047              (null_frag)>,
10048            AVX5128IBase, EVEX_CD8<_.EltSize, CD8VT1>,
10049            Sched<[sched.Folded, sched.ReadAfterFold]>;
10050}
10051
10052multiclass expand_by_vec_width_lowering<X86VectorVTInfo _, string Name> {
10053
10054  def : Pat<(_.VT (X86mExpandingLoad addr:$src, _.KRCWM:$mask, undef)),
10055            (!cast<Instruction>(Name#_.ZSuffix##rmkz)
10056                                        _.KRCWM:$mask, addr:$src)>;
10057
10058  def : Pat<(_.VT (X86mExpandingLoad addr:$src, _.KRCWM:$mask, _.ImmAllZerosV)),
10059            (!cast<Instruction>(Name#_.ZSuffix##rmkz)
10060                                        _.KRCWM:$mask, addr:$src)>;
10061
10062  def : Pat<(_.VT (X86mExpandingLoad addr:$src, _.KRCWM:$mask,
10063                                               (_.VT _.RC:$src0))),
10064            (!cast<Instruction>(Name#_.ZSuffix##rmk)
10065                            _.RC:$src0, _.KRCWM:$mask, addr:$src)>;
10066
10067  def : Pat<(X86expand (_.VT _.RC:$src), _.RC:$src0, _.KRCWM:$mask),
10068            (!cast<Instruction>(Name#_.ZSuffix##rrk)
10069                            _.RC:$src0, _.KRCWM:$mask, _.RC:$src)>;
10070  def : Pat<(X86expand (_.VT _.RC:$src), _.ImmAllZerosV, _.KRCWM:$mask),
10071            (!cast<Instruction>(Name#_.ZSuffix##rrkz)
10072                            _.KRCWM:$mask, _.RC:$src)>;
10073}
10074
10075multiclass expand_by_elt_width<bits<8> opc, string OpcodeStr,
10076                               X86FoldableSchedWrite sched,
10077                               AVX512VLVectorVTInfo VTInfo,
10078                               Predicate Pred = HasAVX512> {
10079  let Predicates = [Pred] in
10080  defm Z : expand_by_vec_width<opc, VTInfo.info512, OpcodeStr, sched>,
10081           expand_by_vec_width_lowering<VTInfo.info512, NAME>, EVEX_V512;
10082
10083  let Predicates = [Pred, HasVLX] in {
10084    defm Z256 : expand_by_vec_width<opc, VTInfo.info256, OpcodeStr, sched>,
10085                expand_by_vec_width_lowering<VTInfo.info256, NAME>, EVEX_V256;
10086    defm Z128 : expand_by_vec_width<opc, VTInfo.info128, OpcodeStr, sched>,
10087                expand_by_vec_width_lowering<VTInfo.info128, NAME>, EVEX_V128;
10088  }
10089}
10090
10091// FIXME: Is there a better scheduler class for VPEXPAND?
10092defm VPEXPANDD : expand_by_elt_width <0x89, "vpexpandd", WriteVarShuffle256,
10093                                      avx512vl_i32_info>, EVEX;
10094defm VPEXPANDQ : expand_by_elt_width <0x89, "vpexpandq", WriteVarShuffle256,
10095                                      avx512vl_i64_info>, EVEX, VEX_W;
10096defm VEXPANDPS : expand_by_elt_width <0x88, "vexpandps", WriteVarShuffle256,
10097                                      avx512vl_f32_info>, EVEX;
10098defm VEXPANDPD : expand_by_elt_width <0x88, "vexpandpd", WriteVarShuffle256,
10099                                      avx512vl_f64_info>, EVEX, VEX_W;
10100
10101//handle instruction  reg_vec1 = op(reg_vec,imm)
10102//                               op(mem_vec,imm)
10103//                               op(broadcast(eltVt),imm)
10104//all instruction created with FROUND_CURRENT
10105multiclass avx512_unary_fp_packed_imm<bits<8> opc, string OpcodeStr, SDNode OpNode,
10106                                      X86FoldableSchedWrite sched, X86VectorVTInfo _> {
10107  let ExeDomain = _.ExeDomain in {
10108  defm rri : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
10109                      (ins _.RC:$src1, i32u8imm:$src2),
10110                      OpcodeStr##_.Suffix, "$src2, $src1", "$src1, $src2",
10111                      (OpNode (_.VT _.RC:$src1),
10112                              (i32 imm:$src2))>, Sched<[sched]>;
10113  defm rmi : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
10114                    (ins _.MemOp:$src1, i32u8imm:$src2),
10115                    OpcodeStr##_.Suffix, "$src2, $src1", "$src1, $src2",
10116                    (OpNode (_.VT (bitconvert (_.LdFrag addr:$src1))),
10117                            (i32 imm:$src2))>,
10118                    Sched<[sched.Folded, sched.ReadAfterFold]>;
10119  defm rmbi : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
10120                    (ins _.ScalarMemOp:$src1, i32u8imm:$src2),
10121                    OpcodeStr##_.Suffix, "$src2, ${src1}"##_.BroadcastStr,
10122                    "${src1}"##_.BroadcastStr##", $src2",
10123                    (OpNode (_.VT (X86VBroadcast(_.ScalarLdFrag addr:$src1))),
10124                            (i32 imm:$src2))>, EVEX_B,
10125                    Sched<[sched.Folded, sched.ReadAfterFold]>;
10126  }
10127}
10128
10129//handle instruction  reg_vec1 = op(reg_vec2,reg_vec3,imm),{sae}
10130multiclass avx512_unary_fp_sae_packed_imm<bits<8> opc, string OpcodeStr,
10131                                          SDNode OpNode, X86FoldableSchedWrite sched,
10132                                          X86VectorVTInfo _> {
10133  let ExeDomain = _.ExeDomain in
10134  defm rrib : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
10135                      (ins _.RC:$src1, i32u8imm:$src2),
10136                      OpcodeStr##_.Suffix, "$src2, {sae}, $src1",
10137                      "$src1, {sae}, $src2",
10138                      (OpNode (_.VT _.RC:$src1),
10139                              (i32 imm:$src2))>,
10140                      EVEX_B, Sched<[sched]>;
10141}
10142
10143multiclass avx512_common_unary_fp_sae_packed_imm<string OpcodeStr,
10144            AVX512VLVectorVTInfo _, bits<8> opc, SDNode OpNode,
10145            SDNode OpNodeSAE, X86SchedWriteWidths sched, Predicate prd>{
10146  let Predicates = [prd] in {
10147    defm Z    : avx512_unary_fp_packed_imm<opc, OpcodeStr, OpNode, sched.ZMM,
10148                                           _.info512>,
10149                avx512_unary_fp_sae_packed_imm<opc, OpcodeStr, OpNodeSAE,
10150                                               sched.ZMM, _.info512>, EVEX_V512;
10151  }
10152  let Predicates = [prd, HasVLX] in {
10153    defm Z128 : avx512_unary_fp_packed_imm<opc, OpcodeStr, OpNode, sched.XMM,
10154                                           _.info128>, EVEX_V128;
10155    defm Z256 : avx512_unary_fp_packed_imm<opc, OpcodeStr, OpNode, sched.YMM,
10156                                           _.info256>, EVEX_V256;
10157  }
10158}
10159
10160//handle instruction  reg_vec1 = op(reg_vec2,reg_vec3,imm)
10161//                               op(reg_vec2,mem_vec,imm)
10162//                               op(reg_vec2,broadcast(eltVt),imm)
10163//all instruction created with FROUND_CURRENT
10164multiclass avx512_fp_packed_imm<bits<8> opc, string OpcodeStr, SDNode OpNode,
10165                                X86FoldableSchedWrite sched, X86VectorVTInfo _>{
10166  let ExeDomain = _.ExeDomain in {
10167  defm rri : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
10168                      (ins _.RC:$src1, _.RC:$src2, i32u8imm:$src3),
10169                      OpcodeStr, "$src3, $src2, $src1", "$src1, $src2, $src3",
10170                      (OpNode (_.VT _.RC:$src1),
10171                              (_.VT _.RC:$src2),
10172                              (i32 imm:$src3))>,
10173                      Sched<[sched]>;
10174  defm rmi : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
10175                    (ins _.RC:$src1, _.MemOp:$src2, i32u8imm:$src3),
10176                    OpcodeStr, "$src3, $src2, $src1", "$src1, $src2, $src3",
10177                    (OpNode (_.VT _.RC:$src1),
10178                            (_.VT (bitconvert (_.LdFrag addr:$src2))),
10179                            (i32 imm:$src3))>,
10180                    Sched<[sched.Folded, sched.ReadAfterFold]>;
10181  defm rmbi : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
10182                    (ins _.RC:$src1, _.ScalarMemOp:$src2, i32u8imm:$src3),
10183                    OpcodeStr, "$src3, ${src2}"##_.BroadcastStr##", $src1",
10184                    "$src1, ${src2}"##_.BroadcastStr##", $src3",
10185                    (OpNode (_.VT _.RC:$src1),
10186                            (_.VT (X86VBroadcast(_.ScalarLdFrag addr:$src2))),
10187                            (i32 imm:$src3))>, EVEX_B,
10188                    Sched<[sched.Folded, sched.ReadAfterFold]>;
10189  }
10190}
10191
10192//handle instruction  reg_vec1 = op(reg_vec2,reg_vec3,imm)
10193//                               op(reg_vec2,mem_vec,imm)
10194multiclass avx512_3Op_rm_imm8<bits<8> opc, string OpcodeStr, SDNode OpNode,
10195                              X86FoldableSchedWrite sched, X86VectorVTInfo DestInfo,
10196                              X86VectorVTInfo SrcInfo>{
10197  let ExeDomain = DestInfo.ExeDomain in {
10198  defm rri : AVX512_maskable<opc, MRMSrcReg, DestInfo, (outs DestInfo.RC:$dst),
10199                  (ins SrcInfo.RC:$src1, SrcInfo.RC:$src2, u8imm:$src3),
10200                  OpcodeStr, "$src3, $src2, $src1", "$src1, $src2, $src3",
10201                  (DestInfo.VT (OpNode (SrcInfo.VT SrcInfo.RC:$src1),
10202                               (SrcInfo.VT SrcInfo.RC:$src2),
10203                               (i8 imm:$src3)))>,
10204                  Sched<[sched]>;
10205  defm rmi : AVX512_maskable<opc, MRMSrcMem, DestInfo, (outs DestInfo.RC:$dst),
10206                (ins SrcInfo.RC:$src1, SrcInfo.MemOp:$src2, u8imm:$src3),
10207                OpcodeStr, "$src3, $src2, $src1", "$src1, $src2, $src3",
10208                (DestInfo.VT (OpNode (SrcInfo.VT SrcInfo.RC:$src1),
10209                             (SrcInfo.VT (bitconvert
10210                                                (SrcInfo.LdFrag addr:$src2))),
10211                             (i8 imm:$src3)))>,
10212                Sched<[sched.Folded, sched.ReadAfterFold]>;
10213  }
10214}
10215
10216//handle instruction  reg_vec1 = op(reg_vec2,reg_vec3,imm)
10217//                               op(reg_vec2,mem_vec,imm)
10218//                               op(reg_vec2,broadcast(eltVt),imm)
10219multiclass avx512_3Op_imm8<bits<8> opc, string OpcodeStr, SDNode OpNode,
10220                           X86FoldableSchedWrite sched, X86VectorVTInfo _>:
10221  avx512_3Op_rm_imm8<opc, OpcodeStr, OpNode, sched, _, _>{
10222
10223  let ExeDomain = _.ExeDomain in
10224  defm rmbi : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
10225                    (ins _.RC:$src1, _.ScalarMemOp:$src2, u8imm:$src3),
10226                    OpcodeStr, "$src3, ${src2}"##_.BroadcastStr##", $src1",
10227                    "$src1, ${src2}"##_.BroadcastStr##", $src3",
10228                    (OpNode (_.VT _.RC:$src1),
10229                            (_.VT (X86VBroadcast(_.ScalarLdFrag addr:$src2))),
10230                            (i8 imm:$src3))>, EVEX_B,
10231                    Sched<[sched.Folded, sched.ReadAfterFold]>;
10232}
10233
10234//handle scalar instruction  reg_vec1 = op(reg_vec2,reg_vec3,imm)
10235//                                      op(reg_vec2,mem_scalar,imm)
10236multiclass avx512_fp_scalar_imm<bits<8> opc, string OpcodeStr, SDNode OpNode,
10237                                X86FoldableSchedWrite sched, X86VectorVTInfo _> {
10238  let ExeDomain = _.ExeDomain in {
10239  defm rri : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
10240                      (ins _.RC:$src1, _.RC:$src2, i32u8imm:$src3),
10241                      OpcodeStr, "$src3, $src2, $src1", "$src1, $src2, $src3",
10242                      (OpNode (_.VT _.RC:$src1),
10243                              (_.VT _.RC:$src2),
10244                              (i32 imm:$src3))>,
10245                      Sched<[sched]>;
10246  defm rmi : AVX512_maskable_scalar<opc, MRMSrcMem, _, (outs _.RC:$dst),
10247                    (ins _.RC:$src1, _.ScalarMemOp:$src2, i32u8imm:$src3),
10248                    OpcodeStr, "$src3, $src2, $src1", "$src1, $src2, $src3",
10249                    (OpNode (_.VT _.RC:$src1),
10250                            (_.VT (scalar_to_vector
10251                                      (_.ScalarLdFrag addr:$src2))),
10252                            (i32 imm:$src3))>,
10253                    Sched<[sched.Folded, sched.ReadAfterFold]>;
10254  }
10255}
10256
10257//handle instruction  reg_vec1 = op(reg_vec2,reg_vec3,imm),{sae}
10258multiclass avx512_fp_sae_packed_imm<bits<8> opc, string OpcodeStr,
10259                                    SDNode OpNode, X86FoldableSchedWrite sched,
10260                                    X86VectorVTInfo _> {
10261  let ExeDomain = _.ExeDomain in
10262  defm rrib : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
10263                      (ins _.RC:$src1, _.RC:$src2, i32u8imm:$src3),
10264                      OpcodeStr, "$src3, {sae}, $src2, $src1",
10265                      "$src1, $src2, {sae}, $src3",
10266                      (OpNode (_.VT _.RC:$src1),
10267                              (_.VT _.RC:$src2),
10268                              (i32 imm:$src3))>,
10269                      EVEX_B, Sched<[sched]>;
10270}
10271
10272//handle scalar instruction  reg_vec1 = op(reg_vec2,reg_vec3,imm),{sae}
10273multiclass avx512_fp_sae_scalar_imm<bits<8> opc, string OpcodeStr, SDNode OpNode,
10274                                    X86FoldableSchedWrite sched, X86VectorVTInfo _> {
10275  let ExeDomain = _.ExeDomain in
10276  defm NAME#rrib : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
10277                      (ins _.RC:$src1, _.RC:$src2, i32u8imm:$src3),
10278                      OpcodeStr, "$src3, {sae}, $src2, $src1",
10279                      "$src1, $src2, {sae}, $src3",
10280                      (OpNode (_.VT _.RC:$src1),
10281                              (_.VT _.RC:$src2),
10282                              (i32 imm:$src3))>,
10283                      EVEX_B, Sched<[sched]>;
10284}
10285
10286multiclass avx512_common_fp_sae_packed_imm<string OpcodeStr,
10287            AVX512VLVectorVTInfo _, bits<8> opc, SDNode OpNode,
10288            SDNode OpNodeSAE, X86SchedWriteWidths sched, Predicate prd>{
10289  let Predicates = [prd] in {
10290    defm Z    : avx512_fp_packed_imm<opc, OpcodeStr, OpNode, sched.ZMM, _.info512>,
10291                avx512_fp_sae_packed_imm<opc, OpcodeStr, OpNodeSAE, sched.ZMM, _.info512>,
10292                                  EVEX_V512;
10293
10294  }
10295  let Predicates = [prd, HasVLX] in {
10296    defm Z128 : avx512_fp_packed_imm<opc, OpcodeStr, OpNode, sched.XMM, _.info128>,
10297                                  EVEX_V128;
10298    defm Z256 : avx512_fp_packed_imm<opc, OpcodeStr, OpNode, sched.YMM, _.info256>,
10299                                  EVEX_V256;
10300  }
10301}
10302
10303multiclass avx512_common_3Op_rm_imm8<bits<8> opc, SDNode OpNode, string OpStr,
10304                   X86SchedWriteWidths sched, AVX512VLVectorVTInfo DestInfo,
10305                   AVX512VLVectorVTInfo SrcInfo, Predicate Pred = HasBWI> {
10306  let Predicates = [Pred] in {
10307    defm Z    : avx512_3Op_rm_imm8<opc, OpStr, OpNode, sched.ZMM, DestInfo.info512,
10308                           SrcInfo.info512>, EVEX_V512, AVX512AIi8Base, EVEX_4V;
10309  }
10310  let Predicates = [Pred, HasVLX] in {
10311    defm Z128 : avx512_3Op_rm_imm8<opc, OpStr, OpNode, sched.XMM, DestInfo.info128,
10312                           SrcInfo.info128>, EVEX_V128, AVX512AIi8Base, EVEX_4V;
10313    defm Z256 : avx512_3Op_rm_imm8<opc, OpStr, OpNode, sched.YMM, DestInfo.info256,
10314                           SrcInfo.info256>, EVEX_V256, AVX512AIi8Base, EVEX_4V;
10315  }
10316}
10317
10318multiclass avx512_common_3Op_imm8<string OpcodeStr, AVX512VLVectorVTInfo _,
10319                                  bits<8> opc, SDNode OpNode, X86SchedWriteWidths sched,
10320                                  Predicate Pred = HasAVX512> {
10321  let Predicates = [Pred] in {
10322    defm Z    : avx512_3Op_imm8<opc, OpcodeStr, OpNode, sched.ZMM, _.info512>,
10323                                EVEX_V512;
10324  }
10325  let Predicates = [Pred, HasVLX] in {
10326    defm Z128 : avx512_3Op_imm8<opc, OpcodeStr, OpNode, sched.XMM, _.info128>,
10327                                EVEX_V128;
10328    defm Z256 : avx512_3Op_imm8<opc, OpcodeStr, OpNode, sched.YMM, _.info256>,
10329                                EVEX_V256;
10330  }
10331}
10332
10333multiclass avx512_common_fp_sae_scalar_imm<string OpcodeStr,
10334                  X86VectorVTInfo _, bits<8> opc, SDNode OpNode,
10335                  SDNode OpNodeSAE, X86SchedWriteWidths sched, Predicate prd> {
10336  let Predicates = [prd] in {
10337     defm Z : avx512_fp_scalar_imm<opc, OpcodeStr, OpNode, sched.XMM, _>,
10338              avx512_fp_sae_scalar_imm<opc, OpcodeStr, OpNodeSAE, sched.XMM, _>;
10339  }
10340}
10341
10342multiclass avx512_common_unary_fp_sae_packed_imm_all<string OpcodeStr,
10343                    bits<8> opcPs, bits<8> opcPd, SDNode OpNode,
10344                    SDNode OpNodeSAE, X86SchedWriteWidths sched, Predicate prd>{
10345  defm PS : avx512_common_unary_fp_sae_packed_imm<OpcodeStr, avx512vl_f32_info,
10346                            opcPs, OpNode, OpNodeSAE, sched, prd>,
10347                            EVEX_CD8<32, CD8VF>;
10348  defm PD : avx512_common_unary_fp_sae_packed_imm<OpcodeStr, avx512vl_f64_info,
10349                            opcPd, OpNode, OpNodeSAE, sched, prd>,
10350                            EVEX_CD8<64, CD8VF>, VEX_W;
10351}
10352
10353defm VREDUCE   : avx512_common_unary_fp_sae_packed_imm_all<"vreduce", 0x56, 0x56,
10354                              X86VReduce, X86VReduceSAE, SchedWriteFRnd, HasDQI>,
10355                              AVX512AIi8Base, EVEX;
10356defm VRNDSCALE : avx512_common_unary_fp_sae_packed_imm_all<"vrndscale", 0x08, 0x09,
10357                              X86VRndScale, X86VRndScaleSAE, SchedWriteFRnd, HasAVX512>,
10358                              AVX512AIi8Base, EVEX;
10359defm VGETMANT : avx512_common_unary_fp_sae_packed_imm_all<"vgetmant", 0x26, 0x26,
10360                              X86VGetMant, X86VGetMantSAE, SchedWriteFRnd, HasAVX512>,
10361                              AVX512AIi8Base, EVEX;
10362
10363defm VRANGEPD : avx512_common_fp_sae_packed_imm<"vrangepd", avx512vl_f64_info,
10364                                                0x50, X86VRange, X86VRangeSAE,
10365                                                SchedWriteFAdd, HasDQI>,
10366      AVX512AIi8Base, EVEX_4V, EVEX_CD8<64, CD8VF>, VEX_W;
10367defm VRANGEPS : avx512_common_fp_sae_packed_imm<"vrangeps", avx512vl_f32_info,
10368                                                0x50, X86VRange, X86VRangeSAE,
10369                                                SchedWriteFAdd, HasDQI>,
10370      AVX512AIi8Base, EVEX_4V, EVEX_CD8<32, CD8VF>;
10371
10372defm VRANGESD: avx512_common_fp_sae_scalar_imm<"vrangesd",
10373      f64x_info, 0x51, X86Ranges, X86RangesSAE, SchedWriteFAdd, HasDQI>,
10374      AVX512AIi8Base, VEX_LIG, EVEX_4V, EVEX_CD8<64, CD8VT1>, VEX_W;
10375defm VRANGESS: avx512_common_fp_sae_scalar_imm<"vrangess", f32x_info,
10376      0x51, X86Ranges, X86RangesSAE, SchedWriteFAdd, HasDQI>,
10377      AVX512AIi8Base, VEX_LIG, EVEX_4V, EVEX_CD8<32, CD8VT1>;
10378
10379defm VREDUCESD: avx512_common_fp_sae_scalar_imm<"vreducesd", f64x_info,
10380      0x57, X86Reduces, X86ReducesSAE, SchedWriteFRnd, HasDQI>,
10381      AVX512AIi8Base, VEX_LIG, EVEX_4V, EVEX_CD8<64, CD8VT1>, VEX_W;
10382defm VREDUCESS: avx512_common_fp_sae_scalar_imm<"vreducess", f32x_info,
10383      0x57, X86Reduces, X86ReducesSAE, SchedWriteFRnd, HasDQI>,
10384      AVX512AIi8Base, VEX_LIG, EVEX_4V, EVEX_CD8<32, CD8VT1>;
10385
10386defm VGETMANTSD: avx512_common_fp_sae_scalar_imm<"vgetmantsd", f64x_info,
10387      0x27, X86GetMants, X86GetMantsSAE, SchedWriteFRnd, HasAVX512>,
10388      AVX512AIi8Base, VEX_LIG, EVEX_4V, EVEX_CD8<64, CD8VT1>, VEX_W;
10389defm VGETMANTSS: avx512_common_fp_sae_scalar_imm<"vgetmantss", f32x_info,
10390      0x27, X86GetMants, X86GetMantsSAE, SchedWriteFRnd, HasAVX512>,
10391      AVX512AIi8Base, VEX_LIG, EVEX_4V, EVEX_CD8<32, CD8VT1>;
10392
10393multiclass avx512_shuff_packed_128_common<bits<8> opc, string OpcodeStr,
10394                                          X86FoldableSchedWrite sched,
10395                                          X86VectorVTInfo _,
10396                                          X86VectorVTInfo CastInfo,
10397                                          string EVEX2VEXOvrd> {
10398  let ExeDomain = _.ExeDomain in {
10399  defm rri : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
10400                  (ins _.RC:$src1, _.RC:$src2, u8imm:$src3),
10401                  OpcodeStr, "$src3, $src2, $src1", "$src1, $src2, $src3",
10402                  (_.VT (bitconvert
10403                         (CastInfo.VT (X86Shuf128 _.RC:$src1, _.RC:$src2,
10404                                                  (i8 imm:$src3)))))>,
10405                  Sched<[sched]>, EVEX2VEXOverride<EVEX2VEXOvrd#"rr">;
10406  defm rmi : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
10407                (ins _.RC:$src1, _.MemOp:$src2, u8imm:$src3),
10408                OpcodeStr, "$src3, $src2, $src1", "$src1, $src2, $src3",
10409                (_.VT
10410                 (bitconvert
10411                  (CastInfo.VT (X86Shuf128 _.RC:$src1,
10412                                           (CastInfo.LdFrag addr:$src2),
10413                                           (i8 imm:$src3)))))>,
10414                Sched<[sched.Folded, sched.ReadAfterFold]>,
10415                EVEX2VEXOverride<EVEX2VEXOvrd#"rm">;
10416  defm rmbi : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
10417                    (ins _.RC:$src1, _.ScalarMemOp:$src2, u8imm:$src3),
10418                    OpcodeStr, "$src3, ${src2}"##_.BroadcastStr##", $src1",
10419                    "$src1, ${src2}"##_.BroadcastStr##", $src3",
10420                    (_.VT
10421                     (bitconvert
10422                      (CastInfo.VT
10423                       (X86Shuf128 _.RC:$src1,
10424                                   (X86VBroadcast (_.ScalarLdFrag addr:$src2)),
10425                                   (i8 imm:$src3)))))>, EVEX_B,
10426                    Sched<[sched.Folded, sched.ReadAfterFold]>;
10427  }
10428}
10429
10430multiclass avx512_shuff_packed_128<string OpcodeStr, X86FoldableSchedWrite sched,
10431                                   AVX512VLVectorVTInfo _,
10432                                   AVX512VLVectorVTInfo CastInfo, bits<8> opc,
10433                                   string EVEX2VEXOvrd>{
10434  let Predicates = [HasAVX512] in
10435  defm Z : avx512_shuff_packed_128_common<opc, OpcodeStr, sched,
10436                                          _.info512, CastInfo.info512, "">, EVEX_V512;
10437
10438  let Predicates = [HasAVX512, HasVLX] in
10439  defm Z256 : avx512_shuff_packed_128_common<opc, OpcodeStr, sched,
10440                                             _.info256, CastInfo.info256,
10441                                             EVEX2VEXOvrd>, EVEX_V256;
10442}
10443
10444defm VSHUFF32X4 : avx512_shuff_packed_128<"vshuff32x4", WriteFShuffle256,
10445      avx512vl_f32_info, avx512vl_f64_info, 0x23, "VPERM2F128">, AVX512AIi8Base, EVEX_4V, EVEX_CD8<32, CD8VF>;
10446defm VSHUFF64X2 : avx512_shuff_packed_128<"vshuff64x2", WriteFShuffle256,
10447      avx512vl_f64_info, avx512vl_f64_info, 0x23, "VPERM2F128">, AVX512AIi8Base, EVEX_4V, EVEX_CD8<64, CD8VF>, VEX_W;
10448defm VSHUFI32X4 : avx512_shuff_packed_128<"vshufi32x4", WriteFShuffle256,
10449      avx512vl_i32_info, avx512vl_i64_info, 0x43, "VPERM2I128">, AVX512AIi8Base, EVEX_4V, EVEX_CD8<32, CD8VF>;
10450defm VSHUFI64X2 : avx512_shuff_packed_128<"vshufi64x2", WriteFShuffle256,
10451      avx512vl_i64_info, avx512vl_i64_info, 0x43, "VPERM2I128">, AVX512AIi8Base, EVEX_4V, EVEX_CD8<64, CD8VF>, VEX_W;
10452
10453let Predicates = [HasAVX512] in {
10454// Provide fallback in case the load node that is used in the broadcast
10455// patterns above is used by additional users, which prevents the pattern
10456// selection.
10457def : Pat<(v8f64 (X86SubVBroadcast (v2f64 VR128X:$src))),
10458          (VSHUFF64X2Zrri (INSERT_SUBREG (v8f64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
10459                          (INSERT_SUBREG (v8f64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
10460                          0)>;
10461def : Pat<(v8i64 (X86SubVBroadcast (v2i64 VR128X:$src))),
10462          (VSHUFI64X2Zrri (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
10463                          (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
10464                          0)>;
10465
10466def : Pat<(v16f32 (X86SubVBroadcast (v4f32 VR128X:$src))),
10467          (VSHUFF32X4Zrri (INSERT_SUBREG (v16f32 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
10468                          (INSERT_SUBREG (v16f32 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
10469                          0)>;
10470def : Pat<(v16i32 (X86SubVBroadcast (v4i32 VR128X:$src))),
10471          (VSHUFI32X4Zrri (INSERT_SUBREG (v16i32 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
10472                          (INSERT_SUBREG (v16i32 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
10473                          0)>;
10474
10475def : Pat<(v32i16 (X86SubVBroadcast (v8i16 VR128X:$src))),
10476          (VSHUFI32X4Zrri (INSERT_SUBREG (v32i16 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
10477                          (INSERT_SUBREG (v32i16 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
10478                          0)>;
10479
10480def : Pat<(v64i8 (X86SubVBroadcast (v16i8 VR128X:$src))),
10481          (VSHUFI32X4Zrri (INSERT_SUBREG (v64i8 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
10482                          (INSERT_SUBREG (v64i8 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
10483                          0)>;
10484}
10485
10486multiclass avx512_valign<bits<8> opc, string OpcodeStr,
10487                         X86FoldableSchedWrite sched, X86VectorVTInfo _>{
10488  // NOTE: EVEX2VEXOverride changed back to Unset for 256-bit at the
10489  // instantiation of this class.
10490  let ExeDomain = _.ExeDomain in {
10491  defm rri : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
10492                  (ins _.RC:$src1, _.RC:$src2, u8imm:$src3),
10493                  OpcodeStr, "$src3, $src2, $src1", "$src1, $src2, $src3",
10494                  (_.VT (X86VAlign _.RC:$src1, _.RC:$src2, (i8 imm:$src3)))>,
10495                  Sched<[sched]>, EVEX2VEXOverride<"VPALIGNRrri">;
10496  defm rmi : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
10497                (ins _.RC:$src1, _.MemOp:$src2, u8imm:$src3),
10498                OpcodeStr, "$src3, $src2, $src1", "$src1, $src2, $src3",
10499                (_.VT (X86VAlign _.RC:$src1,
10500                                 (bitconvert (_.LdFrag addr:$src2)),
10501                                 (i8 imm:$src3)))>,
10502                Sched<[sched.Folded, sched.ReadAfterFold]>,
10503                EVEX2VEXOverride<"VPALIGNRrmi">;
10504
10505  defm rmbi : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
10506                   (ins _.RC:$src1, _.ScalarMemOp:$src2, u8imm:$src3),
10507                   OpcodeStr, "$src3, ${src2}"##_.BroadcastStr##", $src1",
10508                   "$src1, ${src2}"##_.BroadcastStr##", $src3",
10509                   (X86VAlign _.RC:$src1,
10510                              (_.VT (X86VBroadcast(_.ScalarLdFrag addr:$src2))),
10511                              (i8 imm:$src3))>, EVEX_B,
10512                   Sched<[sched.Folded, sched.ReadAfterFold]>;
10513  }
10514}
10515
10516multiclass avx512_valign_common<string OpcodeStr, X86SchedWriteWidths sched,
10517                                AVX512VLVectorVTInfo _> {
10518  let Predicates = [HasAVX512] in {
10519    defm Z    : avx512_valign<0x03, OpcodeStr, sched.ZMM, _.info512>,
10520                                AVX512AIi8Base, EVEX_4V, EVEX_V512;
10521  }
10522  let Predicates = [HasAVX512, HasVLX] in {
10523    defm Z128 : avx512_valign<0x03, OpcodeStr, sched.XMM, _.info128>,
10524                                AVX512AIi8Base, EVEX_4V, EVEX_V128;
10525    // We can't really override the 256-bit version so change it back to unset.
10526    let EVEX2VEXOverride = ? in
10527    defm Z256 : avx512_valign<0x03, OpcodeStr, sched.YMM, _.info256>,
10528                                AVX512AIi8Base, EVEX_4V, EVEX_V256;
10529  }
10530}
10531
10532defm VALIGND: avx512_valign_common<"valignd", SchedWriteShuffle,
10533                                   avx512vl_i32_info>, EVEX_CD8<32, CD8VF>;
10534defm VALIGNQ: avx512_valign_common<"valignq", SchedWriteShuffle,
10535                                   avx512vl_i64_info>, EVEX_CD8<64, CD8VF>,
10536                                   VEX_W;
10537
10538defm VPALIGNR: avx512_common_3Op_rm_imm8<0x0F, X86PAlignr, "vpalignr",
10539                                         SchedWriteShuffle, avx512vl_i8_info,
10540                                         avx512vl_i8_info>, EVEX_CD8<8, CD8VF>;
10541
10542// Fragments to help convert valignq into masked valignd. Or valignq/valignd
10543// into vpalignr.
10544def ValignqImm32XForm : SDNodeXForm<imm, [{
10545  return getI8Imm(N->getZExtValue() * 2, SDLoc(N));
10546}]>;
10547def ValignqImm8XForm : SDNodeXForm<imm, [{
10548  return getI8Imm(N->getZExtValue() * 8, SDLoc(N));
10549}]>;
10550def ValigndImm8XForm : SDNodeXForm<imm, [{
10551  return getI8Imm(N->getZExtValue() * 4, SDLoc(N));
10552}]>;
10553
10554multiclass avx512_vpalign_mask_lowering<string OpcodeStr, SDNode OpNode,
10555                                        X86VectorVTInfo From, X86VectorVTInfo To,
10556                                        SDNodeXForm ImmXForm> {
10557  def : Pat<(To.VT (vselect To.KRCWM:$mask,
10558                            (bitconvert
10559                             (From.VT (OpNode From.RC:$src1, From.RC:$src2,
10560                                              imm:$src3))),
10561                            To.RC:$src0)),
10562            (!cast<Instruction>(OpcodeStr#"rrik") To.RC:$src0, To.KRCWM:$mask,
10563                                                  To.RC:$src1, To.RC:$src2,
10564                                                  (ImmXForm imm:$src3))>;
10565
10566  def : Pat<(To.VT (vselect To.KRCWM:$mask,
10567                            (bitconvert
10568                             (From.VT (OpNode From.RC:$src1, From.RC:$src2,
10569                                              imm:$src3))),
10570                            To.ImmAllZerosV)),
10571            (!cast<Instruction>(OpcodeStr#"rrikz") To.KRCWM:$mask,
10572                                                   To.RC:$src1, To.RC:$src2,
10573                                                   (ImmXForm imm:$src3))>;
10574
10575  def : Pat<(To.VT (vselect To.KRCWM:$mask,
10576                            (bitconvert
10577                             (From.VT (OpNode From.RC:$src1,
10578                                              (From.LdFrag addr:$src2),
10579                                      imm:$src3))),
10580                            To.RC:$src0)),
10581            (!cast<Instruction>(OpcodeStr#"rmik") To.RC:$src0, To.KRCWM:$mask,
10582                                                  To.RC:$src1, addr:$src2,
10583                                                  (ImmXForm imm:$src3))>;
10584
10585  def : Pat<(To.VT (vselect To.KRCWM:$mask,
10586                            (bitconvert
10587                             (From.VT (OpNode From.RC:$src1,
10588                                              (From.LdFrag addr:$src2),
10589                                      imm:$src3))),
10590                            To.ImmAllZerosV)),
10591            (!cast<Instruction>(OpcodeStr#"rmikz") To.KRCWM:$mask,
10592                                                   To.RC:$src1, addr:$src2,
10593                                                   (ImmXForm imm:$src3))>;
10594}
10595
10596multiclass avx512_vpalign_mask_lowering_mb<string OpcodeStr, SDNode OpNode,
10597                                           X86VectorVTInfo From,
10598                                           X86VectorVTInfo To,
10599                                           SDNodeXForm ImmXForm> :
10600      avx512_vpalign_mask_lowering<OpcodeStr, OpNode, From, To, ImmXForm> {
10601  def : Pat<(From.VT (OpNode From.RC:$src1,
10602                             (bitconvert (To.VT (X86VBroadcast
10603                                                (To.ScalarLdFrag addr:$src2)))),
10604                             imm:$src3)),
10605            (!cast<Instruction>(OpcodeStr#"rmbi") To.RC:$src1, addr:$src2,
10606                                                  (ImmXForm imm:$src3))>;
10607
10608  def : Pat<(To.VT (vselect To.KRCWM:$mask,
10609                            (bitconvert
10610                             (From.VT (OpNode From.RC:$src1,
10611                                      (bitconvert
10612                                       (To.VT (X86VBroadcast
10613                                               (To.ScalarLdFrag addr:$src2)))),
10614                                      imm:$src3))),
10615                            To.RC:$src0)),
10616            (!cast<Instruction>(OpcodeStr#"rmbik") To.RC:$src0, To.KRCWM:$mask,
10617                                                   To.RC:$src1, addr:$src2,
10618                                                   (ImmXForm imm:$src3))>;
10619
10620  def : Pat<(To.VT (vselect To.KRCWM:$mask,
10621                            (bitconvert
10622                             (From.VT (OpNode From.RC:$src1,
10623                                      (bitconvert
10624                                       (To.VT (X86VBroadcast
10625                                               (To.ScalarLdFrag addr:$src2)))),
10626                                      imm:$src3))),
10627                            To.ImmAllZerosV)),
10628            (!cast<Instruction>(OpcodeStr#"rmbikz") To.KRCWM:$mask,
10629                                                    To.RC:$src1, addr:$src2,
10630                                                    (ImmXForm imm:$src3))>;
10631}
10632
10633let Predicates = [HasAVX512] in {
10634  // For 512-bit we lower to the widest element type we can. So we only need
10635  // to handle converting valignq to valignd.
10636  defm : avx512_vpalign_mask_lowering_mb<"VALIGNDZ", X86VAlign, v8i64_info,
10637                                         v16i32_info, ValignqImm32XForm>;
10638}
10639
10640let Predicates = [HasVLX] in {
10641  // For 128-bit we lower to the widest element type we can. So we only need
10642  // to handle converting valignq to valignd.
10643  defm : avx512_vpalign_mask_lowering_mb<"VALIGNDZ128", X86VAlign, v2i64x_info,
10644                                         v4i32x_info, ValignqImm32XForm>;
10645  // For 256-bit we lower to the widest element type we can. So we only need
10646  // to handle converting valignq to valignd.
10647  defm : avx512_vpalign_mask_lowering_mb<"VALIGNDZ256", X86VAlign, v4i64x_info,
10648                                         v8i32x_info, ValignqImm32XForm>;
10649}
10650
10651let Predicates = [HasVLX, HasBWI] in {
10652  // We can turn 128 and 256 bit VALIGND/VALIGNQ into VPALIGNR.
10653  defm : avx512_vpalign_mask_lowering<"VPALIGNRZ128", X86VAlign, v2i64x_info,
10654                                      v16i8x_info, ValignqImm8XForm>;
10655  defm : avx512_vpalign_mask_lowering<"VPALIGNRZ128", X86VAlign, v4i32x_info,
10656                                      v16i8x_info, ValigndImm8XForm>;
10657}
10658
10659defm VDBPSADBW: avx512_common_3Op_rm_imm8<0x42, X86dbpsadbw, "vdbpsadbw",
10660                SchedWritePSADBW, avx512vl_i16_info, avx512vl_i8_info>,
10661                EVEX_CD8<8, CD8VF>, NotEVEX2VEXConvertible;
10662
10663multiclass avx512_unary_rm<bits<8> opc, string OpcodeStr, SDNode OpNode,
10664                           X86FoldableSchedWrite sched, X86VectorVTInfo _> {
10665  let ExeDomain = _.ExeDomain in {
10666  defm rr : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
10667                    (ins _.RC:$src1), OpcodeStr,
10668                    "$src1", "$src1",
10669                    (_.VT (OpNode _.RC:$src1))>, EVEX, AVX5128IBase,
10670                    Sched<[sched]>;
10671
10672  defm rm : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
10673                  (ins _.MemOp:$src1), OpcodeStr,
10674                  "$src1", "$src1",
10675                  (_.VT (OpNode (bitconvert (_.LdFrag addr:$src1))))>,
10676            EVEX, AVX5128IBase, EVEX_CD8<_.EltSize, CD8VF>,
10677            Sched<[sched.Folded]>;
10678  }
10679}
10680
10681multiclass avx512_unary_rmb<bits<8> opc, string OpcodeStr, SDNode OpNode,
10682                            X86FoldableSchedWrite sched, X86VectorVTInfo _> :
10683           avx512_unary_rm<opc, OpcodeStr, OpNode, sched, _> {
10684  defm rmb : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
10685                  (ins _.ScalarMemOp:$src1), OpcodeStr,
10686                  "${src1}"##_.BroadcastStr,
10687                  "${src1}"##_.BroadcastStr,
10688                  (_.VT (OpNode (X86VBroadcast
10689                                    (_.ScalarLdFrag addr:$src1))))>,
10690             EVEX, AVX5128IBase, EVEX_B, EVEX_CD8<_.EltSize, CD8VF>,
10691             Sched<[sched.Folded]>;
10692}
10693
10694multiclass avx512_unary_rm_vl<bits<8> opc, string OpcodeStr, SDNode OpNode,
10695                              X86SchedWriteWidths sched,
10696                              AVX512VLVectorVTInfo VTInfo, Predicate prd> {
10697  let Predicates = [prd] in
10698    defm Z : avx512_unary_rm<opc, OpcodeStr, OpNode, sched.ZMM, VTInfo.info512>,
10699                             EVEX_V512;
10700
10701  let Predicates = [prd, HasVLX] in {
10702    defm Z256 : avx512_unary_rm<opc, OpcodeStr, OpNode, sched.YMM, VTInfo.info256>,
10703                              EVEX_V256;
10704    defm Z128 : avx512_unary_rm<opc, OpcodeStr, OpNode, sched.XMM, VTInfo.info128>,
10705                              EVEX_V128;
10706  }
10707}
10708
10709multiclass avx512_unary_rmb_vl<bits<8> opc, string OpcodeStr, SDNode OpNode,
10710                               X86SchedWriteWidths sched, AVX512VLVectorVTInfo VTInfo,
10711                               Predicate prd> {
10712  let Predicates = [prd] in
10713    defm Z : avx512_unary_rmb<opc, OpcodeStr, OpNode, sched.ZMM, VTInfo.info512>,
10714                              EVEX_V512;
10715
10716  let Predicates = [prd, HasVLX] in {
10717    defm Z256 : avx512_unary_rmb<opc, OpcodeStr, OpNode, sched.YMM, VTInfo.info256>,
10718                                 EVEX_V256;
10719    defm Z128 : avx512_unary_rmb<opc, OpcodeStr, OpNode, sched.XMM, VTInfo.info128>,
10720                                 EVEX_V128;
10721  }
10722}
10723
10724multiclass avx512_unary_rm_vl_dq<bits<8> opc_d, bits<8> opc_q, string OpcodeStr,
10725                                 SDNode OpNode, X86SchedWriteWidths sched,
10726                                 Predicate prd> {
10727  defm Q : avx512_unary_rmb_vl<opc_q, OpcodeStr#"q", OpNode, sched,
10728                               avx512vl_i64_info, prd>, VEX_W;
10729  defm D : avx512_unary_rmb_vl<opc_d, OpcodeStr#"d", OpNode, sched,
10730                               avx512vl_i32_info, prd>;
10731}
10732
10733multiclass avx512_unary_rm_vl_bw<bits<8> opc_b, bits<8> opc_w, string OpcodeStr,
10734                                 SDNode OpNode, X86SchedWriteWidths sched,
10735                                 Predicate prd> {
10736  defm W : avx512_unary_rm_vl<opc_w, OpcodeStr#"w", OpNode, sched,
10737                              avx512vl_i16_info, prd>, VEX_WIG;
10738  defm B : avx512_unary_rm_vl<opc_b, OpcodeStr#"b", OpNode, sched,
10739                              avx512vl_i8_info, prd>, VEX_WIG;
10740}
10741
10742multiclass avx512_unary_rm_vl_all<bits<8> opc_b, bits<8> opc_w,
10743                                  bits<8> opc_d, bits<8> opc_q,
10744                                  string OpcodeStr, SDNode OpNode,
10745                                  X86SchedWriteWidths sched> {
10746  defm NAME : avx512_unary_rm_vl_dq<opc_d, opc_q, OpcodeStr, OpNode, sched,
10747                                    HasAVX512>,
10748              avx512_unary_rm_vl_bw<opc_b, opc_w, OpcodeStr, OpNode, sched,
10749                                    HasBWI>;
10750}
10751
10752defm VPABS : avx512_unary_rm_vl_all<0x1C, 0x1D, 0x1E, 0x1F, "vpabs", abs,
10753                                    SchedWriteVecALU>;
10754
10755// VPABS: Use 512bit version to implement 128/256 bit in case NoVLX.
10756let Predicates = [HasAVX512, NoVLX] in {
10757  def : Pat<(v4i64 (abs VR256X:$src)),
10758            (EXTRACT_SUBREG
10759                (VPABSQZrr
10760                    (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src, sub_ymm)),
10761             sub_ymm)>;
10762  def : Pat<(v2i64 (abs VR128X:$src)),
10763            (EXTRACT_SUBREG
10764                (VPABSQZrr
10765                    (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm)),
10766             sub_xmm)>;
10767}
10768
10769// Use 512bit version to implement 128/256 bit.
10770multiclass avx512_unary_lowering<string InstrStr, SDNode OpNode,
10771                                 AVX512VLVectorVTInfo _, Predicate prd> {
10772  let Predicates = [prd, NoVLX] in {
10773    def : Pat<(_.info256.VT(OpNode _.info256.RC:$src1)),
10774              (EXTRACT_SUBREG
10775                (!cast<Instruction>(InstrStr # "Zrr")
10776                  (INSERT_SUBREG(_.info512.VT(IMPLICIT_DEF)),
10777                                 _.info256.RC:$src1,
10778                                 _.info256.SubRegIdx)),
10779              _.info256.SubRegIdx)>;
10780
10781    def : Pat<(_.info128.VT(OpNode _.info128.RC:$src1)),
10782              (EXTRACT_SUBREG
10783                (!cast<Instruction>(InstrStr # "Zrr")
10784                  (INSERT_SUBREG(_.info512.VT(IMPLICIT_DEF)),
10785                                 _.info128.RC:$src1,
10786                                 _.info128.SubRegIdx)),
10787              _.info128.SubRegIdx)>;
10788  }
10789}
10790
10791defm VPLZCNT    : avx512_unary_rm_vl_dq<0x44, 0x44, "vplzcnt", ctlz,
10792                                        SchedWriteVecIMul, HasCDI>;
10793
10794// FIXME: Is there a better scheduler class for VPCONFLICT?
10795defm VPCONFLICT : avx512_unary_rm_vl_dq<0xC4, 0xC4, "vpconflict", X86Conflict,
10796                                        SchedWriteVecALU, HasCDI>;
10797
10798// VPLZCNT: Use 512bit version to implement 128/256 bit in case NoVLX.
10799defm : avx512_unary_lowering<"VPLZCNTQ", ctlz, avx512vl_i64_info, HasCDI>;
10800defm : avx512_unary_lowering<"VPLZCNTD", ctlz, avx512vl_i32_info, HasCDI>;
10801
10802//===---------------------------------------------------------------------===//
10803// Counts number of ones - VPOPCNTD and VPOPCNTQ
10804//===---------------------------------------------------------------------===//
10805
10806// FIXME: Is there a better scheduler class for VPOPCNTD/VPOPCNTQ?
10807defm VPOPCNT : avx512_unary_rm_vl_dq<0x55, 0x55, "vpopcnt", ctpop,
10808                                     SchedWriteVecALU, HasVPOPCNTDQ>;
10809
10810defm : avx512_unary_lowering<"VPOPCNTQ", ctpop, avx512vl_i64_info, HasVPOPCNTDQ>;
10811defm : avx512_unary_lowering<"VPOPCNTD", ctpop, avx512vl_i32_info, HasVPOPCNTDQ>;
10812
10813//===---------------------------------------------------------------------===//
10814// Replicate Single FP - MOVSHDUP and MOVSLDUP
10815//===---------------------------------------------------------------------===//
10816
10817multiclass avx512_replicate<bits<8> opc, string OpcodeStr, SDNode OpNode,
10818                            X86SchedWriteWidths sched> {
10819  defm NAME:       avx512_unary_rm_vl<opc, OpcodeStr, OpNode, sched,
10820                                      avx512vl_f32_info, HasAVX512>, XS;
10821}
10822
10823defm VMOVSHDUP : avx512_replicate<0x16, "vmovshdup", X86Movshdup,
10824                                  SchedWriteFShuffle>;
10825defm VMOVSLDUP : avx512_replicate<0x12, "vmovsldup", X86Movsldup,
10826                                  SchedWriteFShuffle>;
10827
10828//===----------------------------------------------------------------------===//
10829// AVX-512 - MOVDDUP
10830//===----------------------------------------------------------------------===//
10831
10832multiclass avx512_movddup_128<bits<8> opc, string OpcodeStr, SDNode OpNode,
10833                              X86FoldableSchedWrite sched, X86VectorVTInfo _> {
10834  let ExeDomain = _.ExeDomain in {
10835  defm rr : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
10836                   (ins _.RC:$src), OpcodeStr, "$src", "$src",
10837                   (_.VT (OpNode (_.VT _.RC:$src)))>, EVEX,
10838                   Sched<[sched]>;
10839  defm rm : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
10840                 (ins _.ScalarMemOp:$src), OpcodeStr, "$src", "$src",
10841                 (_.VT (OpNode (_.VT (scalar_to_vector
10842                                       (_.ScalarLdFrag addr:$src)))))>,
10843                 EVEX, EVEX_CD8<_.EltSize, CD8VH>,
10844                 Sched<[sched.Folded]>;
10845  }
10846}
10847
10848multiclass avx512_movddup_common<bits<8> opc, string OpcodeStr, SDNode OpNode,
10849                                 X86SchedWriteWidths sched, AVX512VLVectorVTInfo VTInfo> {
10850  defm Z : avx512_unary_rm<opc, OpcodeStr, X86Movddup, sched.ZMM,
10851                           VTInfo.info512>, EVEX_V512;
10852
10853  let Predicates = [HasAVX512, HasVLX] in {
10854    defm Z256 : avx512_unary_rm<opc, OpcodeStr, X86Movddup, sched.YMM,
10855                                VTInfo.info256>, EVEX_V256;
10856    defm Z128 : avx512_movddup_128<opc, OpcodeStr, X86VBroadcast, sched.XMM,
10857                                   VTInfo.info128>, EVEX_V128;
10858  }
10859}
10860
10861multiclass avx512_movddup<bits<8> opc, string OpcodeStr, SDNode OpNode,
10862                          X86SchedWriteWidths sched> {
10863  defm NAME:      avx512_movddup_common<opc, OpcodeStr, OpNode, sched,
10864                                        avx512vl_f64_info>, XD, VEX_W;
10865}
10866
10867defm VMOVDDUP : avx512_movddup<0x12, "vmovddup", X86Movddup, SchedWriteFShuffle>;
10868
10869let Predicates = [HasVLX] in {
10870def : Pat<(v2f64 (X86VBroadcast (loadf64 addr:$src))),
10871          (VMOVDDUPZ128rm addr:$src)>;
10872def : Pat<(v2f64 (X86VBroadcast f64:$src)),
10873          (VMOVDDUPZ128rr (v2f64 (COPY_TO_REGCLASS FR64X:$src, VR128X)))>;
10874def : Pat<(v2f64 (X86VBroadcast (v2f64 (nonvolatile_load addr:$src)))),
10875          (VMOVDDUPZ128rm addr:$src)>;
10876def : Pat<(v2f64 (X86VBroadcast (v2f64 (X86vzload64 addr:$src)))),
10877          (VMOVDDUPZ128rm addr:$src)>;
10878
10879def : Pat<(vselect (v2i1 VK2WM:$mask), (v2f64 (X86VBroadcast f64:$src)),
10880                   (v2f64 VR128X:$src0)),
10881          (VMOVDDUPZ128rrk VR128X:$src0, VK2WM:$mask,
10882                           (v2f64 (COPY_TO_REGCLASS FR64X:$src, VR128X)))>;
10883def : Pat<(vselect (v2i1 VK2WM:$mask), (v2f64 (X86VBroadcast f64:$src)),
10884                   immAllZerosV),
10885          (VMOVDDUPZ128rrkz VK2WM:$mask, (v2f64 (COPY_TO_REGCLASS FR64X:$src, VR128X)))>;
10886
10887def : Pat<(vselect (v2i1 VK2WM:$mask), (v2f64 (X86VBroadcast (loadf64 addr:$src))),
10888                   (v2f64 VR128X:$src0)),
10889          (VMOVDDUPZ128rmk VR128X:$src0, VK2WM:$mask, addr:$src)>;
10890def : Pat<(vselect (v2i1 VK2WM:$mask), (v2f64 (X86VBroadcast (loadf64 addr:$src))),
10891                   immAllZerosV),
10892          (VMOVDDUPZ128rmkz VK2WM:$mask, addr:$src)>;
10893
10894def : Pat<(vselect (v2i1 VK2WM:$mask), (v2f64 (X86VBroadcast (v2f64 (nonvolatile_load addr:$src)))),
10895                   (v2f64 VR128X:$src0)),
10896          (VMOVDDUPZ128rmk VR128X:$src0, VK2WM:$mask, addr:$src)>;
10897def : Pat<(vselect (v2i1 VK2WM:$mask), (v2f64 (X86VBroadcast (v2f64 (nonvolatile_load addr:$src)))),
10898                   immAllZerosV),
10899          (VMOVDDUPZ128rmkz VK2WM:$mask, addr:$src)>;
10900}
10901
10902//===----------------------------------------------------------------------===//
10903// AVX-512 - Unpack Instructions
10904//===----------------------------------------------------------------------===//
10905
10906defm VUNPCKH : avx512_fp_binop_p<0x15, "vunpckh", X86Unpckh, HasAVX512,
10907                                 SchedWriteFShuffleSizes, 0, 1>;
10908defm VUNPCKL : avx512_fp_binop_p<0x14, "vunpckl", X86Unpckl, HasAVX512,
10909                                 SchedWriteFShuffleSizes>;
10910
10911defm VPUNPCKLBW : avx512_binop_rm_vl_b<0x60, "vpunpcklbw", X86Unpckl,
10912                                       SchedWriteShuffle, HasBWI>;
10913defm VPUNPCKHBW : avx512_binop_rm_vl_b<0x68, "vpunpckhbw", X86Unpckh,
10914                                       SchedWriteShuffle, HasBWI>;
10915defm VPUNPCKLWD : avx512_binop_rm_vl_w<0x61, "vpunpcklwd", X86Unpckl,
10916                                       SchedWriteShuffle, HasBWI>;
10917defm VPUNPCKHWD : avx512_binop_rm_vl_w<0x69, "vpunpckhwd", X86Unpckh,
10918                                       SchedWriteShuffle, HasBWI>;
10919
10920defm VPUNPCKLDQ : avx512_binop_rm_vl_d<0x62, "vpunpckldq", X86Unpckl,
10921                                       SchedWriteShuffle, HasAVX512>;
10922defm VPUNPCKHDQ : avx512_binop_rm_vl_d<0x6A, "vpunpckhdq", X86Unpckh,
10923                                       SchedWriteShuffle, HasAVX512>;
10924defm VPUNPCKLQDQ : avx512_binop_rm_vl_q<0x6C, "vpunpcklqdq", X86Unpckl,
10925                                        SchedWriteShuffle, HasAVX512>;
10926defm VPUNPCKHQDQ : avx512_binop_rm_vl_q<0x6D, "vpunpckhqdq", X86Unpckh,
10927                                        SchedWriteShuffle, HasAVX512>;
10928
10929//===----------------------------------------------------------------------===//
10930// AVX-512 - Extract & Insert Integer Instructions
10931//===----------------------------------------------------------------------===//
10932
10933multiclass avx512_extract_elt_bw_m<bits<8> opc, string OpcodeStr, SDNode OpNode,
10934                                                            X86VectorVTInfo _> {
10935  def mr : AVX512Ii8<opc, MRMDestMem, (outs),
10936              (ins _.ScalarMemOp:$dst, _.RC:$src1, u8imm:$src2),
10937              OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}",
10938              [(store (_.EltVT (trunc (OpNode (_.VT _.RC:$src1), imm:$src2))),
10939                       addr:$dst)]>,
10940              EVEX, EVEX_CD8<_.EltSize, CD8VT1>, Sched<[WriteVecExtractSt]>;
10941}
10942
10943multiclass avx512_extract_elt_b<string OpcodeStr, X86VectorVTInfo _> {
10944  let Predicates = [HasBWI] in {
10945    def rr : AVX512Ii8<0x14, MRMDestReg, (outs GR32orGR64:$dst),
10946                  (ins _.RC:$src1, u8imm:$src2),
10947                  OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}",
10948                  [(set GR32orGR64:$dst,
10949                        (X86pextrb (_.VT _.RC:$src1), imm:$src2))]>,
10950                  EVEX, TAPD, Sched<[WriteVecExtract]>;
10951
10952    defm NAME : avx512_extract_elt_bw_m<0x14, OpcodeStr, X86pextrb, _>, TAPD;
10953  }
10954}
10955
10956multiclass avx512_extract_elt_w<string OpcodeStr, X86VectorVTInfo _> {
10957  let Predicates = [HasBWI] in {
10958    def rr : AVX512Ii8<0xC5, MRMSrcReg, (outs GR32orGR64:$dst),
10959                  (ins _.RC:$src1, u8imm:$src2),
10960                  OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}",
10961                  [(set GR32orGR64:$dst,
10962                        (X86pextrw (_.VT _.RC:$src1), imm:$src2))]>,
10963                  EVEX, PD, Sched<[WriteVecExtract]>;
10964
10965    let hasSideEffects = 0, isCodeGenOnly = 1, ForceDisassemble = 1 in
10966    def rr_REV : AVX512Ii8<0x15, MRMDestReg, (outs GR32orGR64:$dst),
10967                   (ins _.RC:$src1, u8imm:$src2),
10968                   OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}", []>,
10969                   EVEX, TAPD, FoldGenData<NAME#rr>,
10970                   Sched<[WriteVecExtract]>;
10971
10972    defm NAME : avx512_extract_elt_bw_m<0x15, OpcodeStr, X86pextrw, _>, TAPD;
10973  }
10974}
10975
10976multiclass avx512_extract_elt_dq<string OpcodeStr, X86VectorVTInfo _,
10977                                                            RegisterClass GRC> {
10978  let Predicates = [HasDQI] in {
10979    def rr : AVX512Ii8<0x16, MRMDestReg, (outs GRC:$dst),
10980                  (ins _.RC:$src1, u8imm:$src2),
10981                  OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}",
10982                  [(set GRC:$dst,
10983                      (extractelt (_.VT _.RC:$src1), imm:$src2))]>,
10984                  EVEX, TAPD, Sched<[WriteVecExtract]>;
10985
10986    def mr : AVX512Ii8<0x16, MRMDestMem, (outs),
10987                (ins _.ScalarMemOp:$dst, _.RC:$src1, u8imm:$src2),
10988                OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}",
10989                [(store (extractelt (_.VT _.RC:$src1),
10990                                    imm:$src2),addr:$dst)]>,
10991                EVEX, EVEX_CD8<_.EltSize, CD8VT1>, TAPD,
10992                Sched<[WriteVecExtractSt]>;
10993  }
10994}
10995
10996defm VPEXTRBZ : avx512_extract_elt_b<"vpextrb", v16i8x_info>, VEX_WIG;
10997defm VPEXTRWZ : avx512_extract_elt_w<"vpextrw", v8i16x_info>, VEX_WIG;
10998defm VPEXTRDZ : avx512_extract_elt_dq<"vpextrd", v4i32x_info, GR32>;
10999defm VPEXTRQZ : avx512_extract_elt_dq<"vpextrq", v2i64x_info, GR64>, VEX_W;
11000
11001multiclass avx512_insert_elt_m<bits<8> opc, string OpcodeStr, SDNode OpNode,
11002                                            X86VectorVTInfo _, PatFrag LdFrag> {
11003  def rm : AVX512Ii8<opc, MRMSrcMem, (outs _.RC:$dst),
11004      (ins _.RC:$src1,  _.ScalarMemOp:$src2, u8imm:$src3),
11005      OpcodeStr#"\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}",
11006      [(set _.RC:$dst,
11007          (_.VT (OpNode _.RC:$src1, (LdFrag addr:$src2), imm:$src3)))]>,
11008      EVEX_4V, EVEX_CD8<_.EltSize, CD8VT1>, Sched<[WriteVecInsert.Folded, WriteVecInsert.ReadAfterFold]>;
11009}
11010
11011multiclass avx512_insert_elt_bw<bits<8> opc, string OpcodeStr, SDNode OpNode,
11012                                            X86VectorVTInfo _, PatFrag LdFrag> {
11013  let Predicates = [HasBWI] in {
11014    def rr : AVX512Ii8<opc, MRMSrcReg, (outs _.RC:$dst),
11015        (ins _.RC:$src1, GR32orGR64:$src2, u8imm:$src3),
11016        OpcodeStr#"\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}",
11017        [(set _.RC:$dst,
11018            (OpNode _.RC:$src1, GR32orGR64:$src2, imm:$src3))]>, EVEX_4V,
11019        Sched<[WriteVecInsert]>;
11020
11021    defm NAME : avx512_insert_elt_m<opc, OpcodeStr, OpNode, _, LdFrag>;
11022  }
11023}
11024
11025multiclass avx512_insert_elt_dq<bits<8> opc, string OpcodeStr,
11026                                         X86VectorVTInfo _, RegisterClass GRC> {
11027  let Predicates = [HasDQI] in {
11028    def rr : AVX512Ii8<opc, MRMSrcReg, (outs _.RC:$dst),
11029        (ins _.RC:$src1, GRC:$src2, u8imm:$src3),
11030        OpcodeStr#"\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}",
11031        [(set _.RC:$dst,
11032            (_.VT (insertelt _.RC:$src1, GRC:$src2, imm:$src3)))]>,
11033        EVEX_4V, TAPD, Sched<[WriteVecInsert]>;
11034
11035    defm NAME : avx512_insert_elt_m<opc, OpcodeStr, insertelt, _,
11036                                    _.ScalarLdFrag>, TAPD;
11037  }
11038}
11039
11040defm VPINSRBZ : avx512_insert_elt_bw<0x20, "vpinsrb", X86pinsrb, v16i8x_info,
11041                                     extloadi8>, TAPD, VEX_WIG;
11042defm VPINSRWZ : avx512_insert_elt_bw<0xC4, "vpinsrw", X86pinsrw, v8i16x_info,
11043                                     extloadi16>, PD, VEX_WIG;
11044defm VPINSRDZ : avx512_insert_elt_dq<0x22, "vpinsrd", v4i32x_info, GR32>;
11045defm VPINSRQZ : avx512_insert_elt_dq<0x22, "vpinsrq", v2i64x_info, GR64>, VEX_W;
11046
11047//===----------------------------------------------------------------------===//
11048// VSHUFPS - VSHUFPD Operations
11049//===----------------------------------------------------------------------===//
11050
11051multiclass avx512_shufp<string OpcodeStr, AVX512VLVectorVTInfo VTInfo_I,
11052                        AVX512VLVectorVTInfo VTInfo_FP>{
11053  defm NAME: avx512_common_3Op_imm8<OpcodeStr, VTInfo_FP, 0xC6, X86Shufp,
11054                                    SchedWriteFShuffle>,
11055                                    EVEX_CD8<VTInfo_FP.info512.EltSize, CD8VF>,
11056                                    AVX512AIi8Base, EVEX_4V;
11057}
11058
11059defm VSHUFPS: avx512_shufp<"vshufps", avx512vl_i32_info, avx512vl_f32_info>, PS;
11060defm VSHUFPD: avx512_shufp<"vshufpd", avx512vl_i64_info, avx512vl_f64_info>, PD, VEX_W;
11061
11062//===----------------------------------------------------------------------===//
11063// AVX-512 - Byte shift Left/Right
11064//===----------------------------------------------------------------------===//
11065
11066// FIXME: The SSE/AVX names are PSLLDQri etc. - should we add the i here as well?
11067multiclass avx512_shift_packed<bits<8> opc, SDNode OpNode, Format MRMr,
11068                               Format MRMm, string OpcodeStr,
11069                               X86FoldableSchedWrite sched, X86VectorVTInfo _>{
11070  def rr : AVX512<opc, MRMr,
11071             (outs _.RC:$dst), (ins _.RC:$src1, u8imm:$src2),
11072             !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
11073             [(set _.RC:$dst,(_.VT (OpNode _.RC:$src1, (i8 imm:$src2))))]>,
11074             Sched<[sched]>;
11075  def rm : AVX512<opc, MRMm,
11076           (outs _.RC:$dst), (ins _.MemOp:$src1, u8imm:$src2),
11077           !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
11078           [(set _.RC:$dst,(_.VT (OpNode
11079                                 (_.VT (bitconvert (_.LdFrag addr:$src1))),
11080                                 (i8 imm:$src2))))]>,
11081           Sched<[sched.Folded, sched.ReadAfterFold]>;
11082}
11083
11084multiclass avx512_shift_packed_all<bits<8> opc, SDNode OpNode, Format MRMr,
11085                                   Format MRMm, string OpcodeStr,
11086                                   X86SchedWriteWidths sched, Predicate prd>{
11087  let Predicates = [prd] in
11088    defm Z : avx512_shift_packed<opc, OpNode, MRMr, MRMm, OpcodeStr,
11089                                 sched.ZMM, v64i8_info>, EVEX_V512;
11090  let Predicates = [prd, HasVLX] in {
11091    defm Z256 : avx512_shift_packed<opc, OpNode, MRMr, MRMm, OpcodeStr,
11092                                    sched.YMM, v32i8x_info>, EVEX_V256;
11093    defm Z128 : avx512_shift_packed<opc, OpNode, MRMr, MRMm, OpcodeStr,
11094                                    sched.XMM, v16i8x_info>, EVEX_V128;
11095  }
11096}
11097defm VPSLLDQ : avx512_shift_packed_all<0x73, X86vshldq, MRM7r, MRM7m, "vpslldq",
11098                                       SchedWriteShuffle, HasBWI>,
11099                                       AVX512PDIi8Base, EVEX_4V, VEX_WIG;
11100defm VPSRLDQ : avx512_shift_packed_all<0x73, X86vshrdq, MRM3r, MRM3m, "vpsrldq",
11101                                       SchedWriteShuffle, HasBWI>,
11102                                       AVX512PDIi8Base, EVEX_4V, VEX_WIG;
11103
11104multiclass avx512_psadbw_packed<bits<8> opc, SDNode OpNode,
11105                                string OpcodeStr, X86FoldableSchedWrite sched,
11106                                X86VectorVTInfo _dst, X86VectorVTInfo _src> {
11107  def rr : AVX512BI<opc, MRMSrcReg,
11108             (outs _dst.RC:$dst), (ins _src.RC:$src1, _src.RC:$src2),
11109             !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
11110             [(set _dst.RC:$dst,(_dst.VT
11111                                (OpNode (_src.VT _src.RC:$src1),
11112                                        (_src.VT _src.RC:$src2))))]>,
11113             Sched<[sched]>;
11114  def rm : AVX512BI<opc, MRMSrcMem,
11115           (outs _dst.RC:$dst), (ins _src.RC:$src1, _src.MemOp:$src2),
11116           !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
11117           [(set _dst.RC:$dst,(_dst.VT
11118                              (OpNode (_src.VT _src.RC:$src1),
11119                              (_src.VT (bitconvert
11120                                        (_src.LdFrag addr:$src2))))))]>,
11121           Sched<[sched.Folded, sched.ReadAfterFold]>;
11122}
11123
11124multiclass avx512_psadbw_packed_all<bits<8> opc, SDNode OpNode,
11125                                    string OpcodeStr, X86SchedWriteWidths sched,
11126                                    Predicate prd> {
11127  let Predicates = [prd] in
11128    defm Z : avx512_psadbw_packed<opc, OpNode, OpcodeStr, sched.ZMM,
11129                                  v8i64_info, v64i8_info>, EVEX_V512;
11130  let Predicates = [prd, HasVLX] in {
11131    defm Z256 : avx512_psadbw_packed<opc, OpNode, OpcodeStr, sched.YMM,
11132                                     v4i64x_info, v32i8x_info>, EVEX_V256;
11133    defm Z128 : avx512_psadbw_packed<opc, OpNode, OpcodeStr, sched.XMM,
11134                                     v2i64x_info, v16i8x_info>, EVEX_V128;
11135  }
11136}
11137
11138defm VPSADBW : avx512_psadbw_packed_all<0xf6, X86psadbw, "vpsadbw",
11139                                        SchedWritePSADBW, HasBWI>, EVEX_4V, VEX_WIG;
11140
11141// Transforms to swizzle an immediate to enable better matching when
11142// memory operand isn't in the right place.
11143def VPTERNLOG321_imm8 : SDNodeXForm<imm, [{
11144  // Convert a VPTERNLOG immediate by swapping operand 0 and operand 2.
11145  uint8_t Imm = N->getZExtValue();
11146  // Swap bits 1/4 and 3/6.
11147  uint8_t NewImm = Imm & 0xa5;
11148  if (Imm & 0x02) NewImm |= 0x10;
11149  if (Imm & 0x10) NewImm |= 0x02;
11150  if (Imm & 0x08) NewImm |= 0x40;
11151  if (Imm & 0x40) NewImm |= 0x08;
11152  return getI8Imm(NewImm, SDLoc(N));
11153}]>;
11154def VPTERNLOG213_imm8 : SDNodeXForm<imm, [{
11155  // Convert a VPTERNLOG immediate by swapping operand 1 and operand 2.
11156  uint8_t Imm = N->getZExtValue();
11157  // Swap bits 2/4 and 3/5.
11158  uint8_t NewImm = Imm & 0xc3;
11159  if (Imm & 0x04) NewImm |= 0x10;
11160  if (Imm & 0x10) NewImm |= 0x04;
11161  if (Imm & 0x08) NewImm |= 0x20;
11162  if (Imm & 0x20) NewImm |= 0x08;
11163  return getI8Imm(NewImm, SDLoc(N));
11164}]>;
11165def VPTERNLOG132_imm8 : SDNodeXForm<imm, [{
11166  // Convert a VPTERNLOG immediate by swapping operand 1 and operand 2.
11167  uint8_t Imm = N->getZExtValue();
11168  // Swap bits 1/2 and 5/6.
11169  uint8_t NewImm = Imm & 0x99;
11170  if (Imm & 0x02) NewImm |= 0x04;
11171  if (Imm & 0x04) NewImm |= 0x02;
11172  if (Imm & 0x20) NewImm |= 0x40;
11173  if (Imm & 0x40) NewImm |= 0x20;
11174  return getI8Imm(NewImm, SDLoc(N));
11175}]>;
11176def VPTERNLOG231_imm8 : SDNodeXForm<imm, [{
11177  // Convert a VPTERNLOG immediate by moving operand 1 to the end.
11178  uint8_t Imm = N->getZExtValue();
11179  // Move bits 1->2, 2->4, 3->6, 4->1, 5->3, 6->5
11180  uint8_t NewImm = Imm & 0x81;
11181  if (Imm & 0x02) NewImm |= 0x04;
11182  if (Imm & 0x04) NewImm |= 0x10;
11183  if (Imm & 0x08) NewImm |= 0x40;
11184  if (Imm & 0x10) NewImm |= 0x02;
11185  if (Imm & 0x20) NewImm |= 0x08;
11186  if (Imm & 0x40) NewImm |= 0x20;
11187  return getI8Imm(NewImm, SDLoc(N));
11188}]>;
11189def VPTERNLOG312_imm8 : SDNodeXForm<imm, [{
11190  // Convert a VPTERNLOG immediate by moving operand 2 to the beginning.
11191  uint8_t Imm = N->getZExtValue();
11192  // Move bits 1->4, 2->1, 3->5, 4->2, 5->6, 6->3
11193  uint8_t NewImm = Imm & 0x81;
11194  if (Imm & 0x02) NewImm |= 0x10;
11195  if (Imm & 0x04) NewImm |= 0x02;
11196  if (Imm & 0x08) NewImm |= 0x20;
11197  if (Imm & 0x10) NewImm |= 0x04;
11198  if (Imm & 0x20) NewImm |= 0x40;
11199  if (Imm & 0x40) NewImm |= 0x08;
11200  return getI8Imm(NewImm, SDLoc(N));
11201}]>;
11202
11203multiclass avx512_ternlog<bits<8> opc, string OpcodeStr, SDNode OpNode,
11204                          X86FoldableSchedWrite sched, X86VectorVTInfo _,
11205                          string Name>{
11206  let Constraints = "$src1 = $dst", ExeDomain = _.ExeDomain in {
11207  defm rri : AVX512_maskable_3src<opc, MRMSrcReg, _, (outs _.RC:$dst),
11208                      (ins _.RC:$src2, _.RC:$src3, u8imm:$src4),
11209                      OpcodeStr, "$src4, $src3, $src2", "$src2, $src3, $src4",
11210                      (OpNode (_.VT _.RC:$src1),
11211                              (_.VT _.RC:$src2),
11212                              (_.VT _.RC:$src3),
11213                              (i8 imm:$src4)), 1, 1>,
11214                      AVX512AIi8Base, EVEX_4V, Sched<[sched]>;
11215  defm rmi : AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst),
11216                    (ins _.RC:$src2, _.MemOp:$src3, u8imm:$src4),
11217                    OpcodeStr, "$src4, $src3, $src2", "$src2, $src3, $src4",
11218                    (OpNode (_.VT _.RC:$src1),
11219                            (_.VT _.RC:$src2),
11220                            (_.VT (bitconvert (_.LdFrag addr:$src3))),
11221                            (i8 imm:$src4)), 1, 0>,
11222                    AVX512AIi8Base, EVEX_4V, EVEX_CD8<_.EltSize, CD8VF>,
11223                    Sched<[sched.Folded, sched.ReadAfterFold]>;
11224  defm rmbi : AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst),
11225                    (ins _.RC:$src2, _.ScalarMemOp:$src3, u8imm:$src4),
11226                    OpcodeStr, "$src4, ${src3}"##_.BroadcastStr##", $src2",
11227                    "$src2, ${src3}"##_.BroadcastStr##", $src4",
11228                    (OpNode (_.VT _.RC:$src1),
11229                            (_.VT _.RC:$src2),
11230                            (_.VT (X86VBroadcast(_.ScalarLdFrag addr:$src3))),
11231                            (i8 imm:$src4)), 1, 0>, EVEX_B,
11232                    AVX512AIi8Base, EVEX_4V, EVEX_CD8<_.EltSize, CD8VF>,
11233                    Sched<[sched.Folded, sched.ReadAfterFold]>;
11234  }// Constraints = "$src1 = $dst"
11235
11236  // Additional patterns for matching passthru operand in other positions.
11237  def : Pat<(_.VT (vselect _.KRCWM:$mask,
11238                   (OpNode _.RC:$src3, _.RC:$src2, _.RC:$src1, (i8 imm:$src4)),
11239                   _.RC:$src1)),
11240            (!cast<Instruction>(Name#_.ZSuffix#rrik) _.RC:$src1, _.KRCWM:$mask,
11241             _.RC:$src2, _.RC:$src3, (VPTERNLOG321_imm8 imm:$src4))>;
11242  def : Pat<(_.VT (vselect _.KRCWM:$mask,
11243                   (OpNode _.RC:$src2, _.RC:$src1, _.RC:$src3, (i8 imm:$src4)),
11244                   _.RC:$src1)),
11245            (!cast<Instruction>(Name#_.ZSuffix#rrik) _.RC:$src1, _.KRCWM:$mask,
11246             _.RC:$src2, _.RC:$src3, (VPTERNLOG213_imm8 imm:$src4))>;
11247
11248  // Additional patterns for matching loads in other positions.
11249  def : Pat<(_.VT (OpNode (bitconvert (_.LdFrag addr:$src3)),
11250                          _.RC:$src2, _.RC:$src1, (i8 imm:$src4))),
11251            (!cast<Instruction>(Name#_.ZSuffix#rmi) _.RC:$src1, _.RC:$src2,
11252                                   addr:$src3, (VPTERNLOG321_imm8 imm:$src4))>;
11253  def : Pat<(_.VT (OpNode _.RC:$src1,
11254                          (bitconvert (_.LdFrag addr:$src3)),
11255                          _.RC:$src2, (i8 imm:$src4))),
11256            (!cast<Instruction>(Name#_.ZSuffix#rmi) _.RC:$src1, _.RC:$src2,
11257                                   addr:$src3, (VPTERNLOG132_imm8 imm:$src4))>;
11258
11259  // Additional patterns for matching zero masking with loads in other
11260  // positions.
11261  def : Pat<(_.VT (vselect _.KRCWM:$mask,
11262                   (OpNode (bitconvert (_.LdFrag addr:$src3)),
11263                    _.RC:$src2, _.RC:$src1, (i8 imm:$src4)),
11264                   _.ImmAllZerosV)),
11265            (!cast<Instruction>(Name#_.ZSuffix#rmikz) _.RC:$src1, _.KRCWM:$mask,
11266             _.RC:$src2, addr:$src3, (VPTERNLOG321_imm8 imm:$src4))>;
11267  def : Pat<(_.VT (vselect _.KRCWM:$mask,
11268                   (OpNode _.RC:$src1, (bitconvert (_.LdFrag addr:$src3)),
11269                    _.RC:$src2, (i8 imm:$src4)),
11270                   _.ImmAllZerosV)),
11271            (!cast<Instruction>(Name#_.ZSuffix#rmikz) _.RC:$src1, _.KRCWM:$mask,
11272             _.RC:$src2, addr:$src3, (VPTERNLOG132_imm8 imm:$src4))>;
11273
11274  // Additional patterns for matching masked loads with different
11275  // operand orders.
11276  def : Pat<(_.VT (vselect _.KRCWM:$mask,
11277                   (OpNode _.RC:$src1, (bitconvert (_.LdFrag addr:$src3)),
11278                    _.RC:$src2, (i8 imm:$src4)),
11279                   _.RC:$src1)),
11280            (!cast<Instruction>(Name#_.ZSuffix#rmik) _.RC:$src1, _.KRCWM:$mask,
11281             _.RC:$src2, addr:$src3, (VPTERNLOG132_imm8 imm:$src4))>;
11282  def : Pat<(_.VT (vselect _.KRCWM:$mask,
11283                   (OpNode (bitconvert (_.LdFrag addr:$src3)),
11284                    _.RC:$src2, _.RC:$src1, (i8 imm:$src4)),
11285                   _.RC:$src1)),
11286            (!cast<Instruction>(Name#_.ZSuffix#rmik) _.RC:$src1, _.KRCWM:$mask,
11287             _.RC:$src2, addr:$src3, (VPTERNLOG321_imm8 imm:$src4))>;
11288  def : Pat<(_.VT (vselect _.KRCWM:$mask,
11289                   (OpNode _.RC:$src2, _.RC:$src1,
11290                    (bitconvert (_.LdFrag addr:$src3)), (i8 imm:$src4)),
11291                   _.RC:$src1)),
11292            (!cast<Instruction>(Name#_.ZSuffix#rmik) _.RC:$src1, _.KRCWM:$mask,
11293             _.RC:$src2, addr:$src3, (VPTERNLOG213_imm8 imm:$src4))>;
11294  def : Pat<(_.VT (vselect _.KRCWM:$mask,
11295                   (OpNode _.RC:$src2, (bitconvert (_.LdFrag addr:$src3)),
11296                    _.RC:$src1, (i8 imm:$src4)),
11297                   _.RC:$src1)),
11298            (!cast<Instruction>(Name#_.ZSuffix#rmik) _.RC:$src1, _.KRCWM:$mask,
11299             _.RC:$src2, addr:$src3, (VPTERNLOG231_imm8 imm:$src4))>;
11300  def : Pat<(_.VT (vselect _.KRCWM:$mask,
11301                   (OpNode (bitconvert (_.LdFrag addr:$src3)),
11302                    _.RC:$src1, _.RC:$src2, (i8 imm:$src4)),
11303                   _.RC:$src1)),
11304            (!cast<Instruction>(Name#_.ZSuffix#rmik) _.RC:$src1, _.KRCWM:$mask,
11305             _.RC:$src2, addr:$src3, (VPTERNLOG312_imm8 imm:$src4))>;
11306
11307  // Additional patterns for matching broadcasts in other positions.
11308  def : Pat<(_.VT (OpNode (X86VBroadcast (_.ScalarLdFrag addr:$src3)),
11309                          _.RC:$src2, _.RC:$src1, (i8 imm:$src4))),
11310            (!cast<Instruction>(Name#_.ZSuffix#rmbi) _.RC:$src1, _.RC:$src2,
11311                                   addr:$src3, (VPTERNLOG321_imm8 imm:$src4))>;
11312  def : Pat<(_.VT (OpNode _.RC:$src1,
11313                          (X86VBroadcast (_.ScalarLdFrag addr:$src3)),
11314                          _.RC:$src2, (i8 imm:$src4))),
11315            (!cast<Instruction>(Name#_.ZSuffix#rmbi) _.RC:$src1, _.RC:$src2,
11316                                   addr:$src3, (VPTERNLOG132_imm8 imm:$src4))>;
11317
11318  // Additional patterns for matching zero masking with broadcasts in other
11319  // positions.
11320  def : Pat<(_.VT (vselect _.KRCWM:$mask,
11321                   (OpNode (X86VBroadcast (_.ScalarLdFrag addr:$src3)),
11322                    _.RC:$src2, _.RC:$src1, (i8 imm:$src4)),
11323                   _.ImmAllZerosV)),
11324            (!cast<Instruction>(Name#_.ZSuffix#rmbikz) _.RC:$src1,
11325             _.KRCWM:$mask, _.RC:$src2, addr:$src3,
11326             (VPTERNLOG321_imm8 imm:$src4))>;
11327  def : Pat<(_.VT (vselect _.KRCWM:$mask,
11328                   (OpNode _.RC:$src1,
11329                    (X86VBroadcast (_.ScalarLdFrag addr:$src3)),
11330                    _.RC:$src2, (i8 imm:$src4)),
11331                   _.ImmAllZerosV)),
11332            (!cast<Instruction>(Name#_.ZSuffix#rmbikz) _.RC:$src1,
11333             _.KRCWM:$mask, _.RC:$src2, addr:$src3,
11334             (VPTERNLOG132_imm8 imm:$src4))>;
11335
11336  // Additional patterns for matching masked broadcasts with different
11337  // operand orders.
11338  def : Pat<(_.VT (vselect _.KRCWM:$mask,
11339                   (OpNode _.RC:$src1,
11340                    (X86VBroadcast (_.ScalarLdFrag addr:$src3)),
11341                    _.RC:$src2, (i8 imm:$src4)),
11342                   _.RC:$src1)),
11343            (!cast<Instruction>(Name#_.ZSuffix#rmbik) _.RC:$src1, _.KRCWM:$mask,
11344             _.RC:$src2, addr:$src3, (VPTERNLOG132_imm8 imm:$src4))>;
11345  def : Pat<(_.VT (vselect _.KRCWM:$mask,
11346                   (OpNode (X86VBroadcast (_.ScalarLdFrag addr:$src3)),
11347                    _.RC:$src2, _.RC:$src1, (i8 imm:$src4)),
11348                   _.RC:$src1)),
11349            (!cast<Instruction>(Name#_.ZSuffix#rmbik) _.RC:$src1, _.KRCWM:$mask,
11350             _.RC:$src2, addr:$src3, (VPTERNLOG321_imm8 imm:$src4))>;
11351  def : Pat<(_.VT (vselect _.KRCWM:$mask,
11352                   (OpNode _.RC:$src2, _.RC:$src1,
11353                    (X86VBroadcast (_.ScalarLdFrag addr:$src3)),
11354                    (i8 imm:$src4)), _.RC:$src1)),
11355            (!cast<Instruction>(Name#_.ZSuffix#rmbik) _.RC:$src1, _.KRCWM:$mask,
11356             _.RC:$src2, addr:$src3, (VPTERNLOG213_imm8 imm:$src4))>;
11357  def : Pat<(_.VT (vselect _.KRCWM:$mask,
11358                   (OpNode _.RC:$src2,
11359                    (X86VBroadcast (_.ScalarLdFrag addr:$src3)),
11360                    _.RC:$src1, (i8 imm:$src4)),
11361                   _.RC:$src1)),
11362            (!cast<Instruction>(Name#_.ZSuffix#rmbik) _.RC:$src1, _.KRCWM:$mask,
11363             _.RC:$src2, addr:$src3, (VPTERNLOG231_imm8 imm:$src4))>;
11364  def : Pat<(_.VT (vselect _.KRCWM:$mask,
11365                   (OpNode (X86VBroadcast (_.ScalarLdFrag addr:$src3)),
11366                    _.RC:$src1, _.RC:$src2, (i8 imm:$src4)),
11367                   _.RC:$src1)),
11368            (!cast<Instruction>(Name#_.ZSuffix#rmbik) _.RC:$src1, _.KRCWM:$mask,
11369             _.RC:$src2, addr:$src3, (VPTERNLOG312_imm8 imm:$src4))>;
11370}
11371
11372multiclass avx512_common_ternlog<string OpcodeStr, X86SchedWriteWidths sched,
11373                                 AVX512VLVectorVTInfo _> {
11374  let Predicates = [HasAVX512] in
11375    defm Z    : avx512_ternlog<0x25, OpcodeStr, X86vpternlog, sched.ZMM,
11376                               _.info512, NAME>, EVEX_V512;
11377  let Predicates = [HasAVX512, HasVLX] in {
11378    defm Z128 : avx512_ternlog<0x25, OpcodeStr, X86vpternlog, sched.XMM,
11379                               _.info128, NAME>, EVEX_V128;
11380    defm Z256 : avx512_ternlog<0x25, OpcodeStr, X86vpternlog, sched.YMM,
11381                               _.info256, NAME>, EVEX_V256;
11382  }
11383}
11384
11385defm VPTERNLOGD : avx512_common_ternlog<"vpternlogd", SchedWriteVecALU,
11386                                        avx512vl_i32_info>;
11387defm VPTERNLOGQ : avx512_common_ternlog<"vpternlogq", SchedWriteVecALU,
11388                                        avx512vl_i64_info>, VEX_W;
11389
11390// Patterns to implement vnot using vpternlog instead of creating all ones
11391// using pcmpeq or vpternlog and then xoring with that. The value 15 is chosen
11392// so that the result is only dependent on src0. But we use the same source
11393// for all operands to prevent a false dependency.
11394// TODO: We should maybe have a more generalized algorithm for folding to
11395// vpternlog.
11396let Predicates = [HasAVX512] in {
11397  def : Pat<(xor VR512:$src, (v64i8 immAllOnesV)),
11398            (VPTERNLOGQZrri VR512:$src, VR512:$src, VR512:$src, (i8 15))>;
11399  def : Pat<(xor VR512:$src, (v32i16 immAllOnesV)),
11400            (VPTERNLOGQZrri VR512:$src, VR512:$src, VR512:$src, (i8 15))>;
11401  def : Pat<(xor VR512:$src, (v16i32 immAllOnesV)),
11402            (VPTERNLOGQZrri VR512:$src, VR512:$src, VR512:$src, (i8 15))>;
11403  def : Pat<(xor VR512:$src, (v8i64 immAllOnesV)),
11404            (VPTERNLOGQZrri VR512:$src, VR512:$src, VR512:$src, (i8 15))>;
11405}
11406
11407let Predicates = [HasAVX512, NoVLX] in {
11408  def : Pat<(xor VR128X:$src, (v16i8 immAllOnesV)),
11409            (EXTRACT_SUBREG
11410             (VPTERNLOGQZrri
11411              (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
11412              (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
11413              (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
11414              (i8 15)), sub_xmm)>;
11415  def : Pat<(xor VR128X:$src, (v8i16 immAllOnesV)),
11416            (EXTRACT_SUBREG
11417             (VPTERNLOGQZrri
11418              (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
11419              (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
11420              (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
11421              (i8 15)), sub_xmm)>;
11422  def : Pat<(xor VR128X:$src, (v4i32 immAllOnesV)),
11423            (EXTRACT_SUBREG
11424             (VPTERNLOGQZrri
11425              (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
11426              (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
11427              (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
11428              (i8 15)), sub_xmm)>;
11429  def : Pat<(xor VR128X:$src, (v2i64 immAllOnesV)),
11430            (EXTRACT_SUBREG
11431             (VPTERNLOGQZrri
11432              (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
11433              (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
11434              (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
11435              (i8 15)), sub_xmm)>;
11436
11437  def : Pat<(xor VR256X:$src, (v32i8 immAllOnesV)),
11438            (EXTRACT_SUBREG
11439             (VPTERNLOGQZrri
11440              (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src, sub_ymm),
11441              (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src, sub_ymm),
11442              (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src, sub_ymm),
11443              (i8 15)), sub_ymm)>;
11444  def : Pat<(xor VR256X:$src, (v16i16 immAllOnesV)),
11445            (EXTRACT_SUBREG
11446             (VPTERNLOGQZrri
11447              (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src, sub_ymm),
11448              (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src, sub_ymm),
11449              (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src, sub_ymm),
11450              (i8 15)), sub_ymm)>;
11451  def : Pat<(xor VR256X:$src, (v8i32 immAllOnesV)),
11452            (EXTRACT_SUBREG
11453             (VPTERNLOGQZrri
11454              (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src, sub_ymm),
11455              (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src, sub_ymm),
11456              (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src, sub_ymm),
11457              (i8 15)), sub_ymm)>;
11458  def : Pat<(xor VR256X:$src, (v4i64 immAllOnesV)),
11459            (EXTRACT_SUBREG
11460             (VPTERNLOGQZrri
11461              (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src, sub_ymm),
11462              (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src, sub_ymm),
11463              (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src, sub_ymm),
11464              (i8 15)), sub_ymm)>;
11465}
11466
11467let Predicates = [HasVLX] in {
11468  def : Pat<(xor VR128X:$src, (v16i8 immAllOnesV)),
11469            (VPTERNLOGQZ128rri VR128X:$src, VR128X:$src, VR128X:$src, (i8 15))>;
11470  def : Pat<(xor VR128X:$src, (v8i16 immAllOnesV)),
11471            (VPTERNLOGQZ128rri VR128X:$src, VR128X:$src, VR128X:$src, (i8 15))>;
11472  def : Pat<(xor VR128X:$src, (v4i32 immAllOnesV)),
11473            (VPTERNLOGQZ128rri VR128X:$src, VR128X:$src, VR128X:$src, (i8 15))>;
11474  def : Pat<(xor VR128X:$src, (v2i64 immAllOnesV)),
11475            (VPTERNLOGQZ128rri VR128X:$src, VR128X:$src, VR128X:$src, (i8 15))>;
11476
11477  def : Pat<(xor VR256X:$src, (v32i8 immAllOnesV)),
11478            (VPTERNLOGQZ256rri VR256X:$src, VR256X:$src, VR256X:$src, (i8 15))>;
11479  def : Pat<(xor VR256X:$src, (v16i16 immAllOnesV)),
11480            (VPTERNLOGQZ256rri VR256X:$src, VR256X:$src, VR256X:$src, (i8 15))>;
11481  def : Pat<(xor VR256X:$src, (v8i32 immAllOnesV)),
11482            (VPTERNLOGQZ256rri VR256X:$src, VR256X:$src, VR256X:$src, (i8 15))>;
11483  def : Pat<(xor VR256X:$src, (v4i64 immAllOnesV)),
11484            (VPTERNLOGQZ256rri VR256X:$src, VR256X:$src, VR256X:$src, (i8 15))>;
11485}
11486
11487//===----------------------------------------------------------------------===//
11488// AVX-512 - FixupImm
11489//===----------------------------------------------------------------------===//
11490
11491multiclass avx512_fixupimm_packed<bits<8> opc, string OpcodeStr,
11492                                  X86FoldableSchedWrite sched, X86VectorVTInfo _,
11493                                  X86VectorVTInfo TblVT>{
11494  let Constraints = "$src1 = $dst", ExeDomain = _.ExeDomain in {
11495    defm rri : AVX512_maskable_3src<opc, MRMSrcReg, _, (outs _.RC:$dst),
11496                        (ins _.RC:$src2, _.RC:$src3, i32u8imm:$src4),
11497                         OpcodeStr##_.Suffix, "$src4, $src3, $src2", "$src2, $src3, $src4",
11498                        (X86VFixupimm (_.VT _.RC:$src1),
11499                                      (_.VT _.RC:$src2),
11500                                      (TblVT.VT _.RC:$src3),
11501                                      (i32 imm:$src4))>, Sched<[sched]>;
11502    defm rmi : AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst),
11503                      (ins _.RC:$src2, _.MemOp:$src3, i32u8imm:$src4),
11504                      OpcodeStr##_.Suffix, "$src4, $src3, $src2", "$src2, $src3, $src4",
11505                      (X86VFixupimm (_.VT _.RC:$src1),
11506                                    (_.VT _.RC:$src2),
11507                                    (TblVT.VT (bitconvert (TblVT.LdFrag addr:$src3))),
11508                                    (i32 imm:$src4))>,
11509                      Sched<[sched.Folded, sched.ReadAfterFold]>;
11510    defm rmbi : AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst),
11511                      (ins _.RC:$src2, _.ScalarMemOp:$src3, i32u8imm:$src4),
11512                    OpcodeStr##_.Suffix, "$src4, ${src3}"##_.BroadcastStr##", $src2",
11513                    "$src2, ${src3}"##_.BroadcastStr##", $src4",
11514                      (X86VFixupimm (_.VT _.RC:$src1),
11515                                    (_.VT _.RC:$src2),
11516                                    (TblVT.VT (X86VBroadcast(TblVT.ScalarLdFrag addr:$src3))),
11517                                    (i32 imm:$src4))>,
11518                    EVEX_B, Sched<[sched.Folded, sched.ReadAfterFold]>;
11519  } // Constraints = "$src1 = $dst"
11520}
11521
11522multiclass avx512_fixupimm_packed_sae<bits<8> opc, string OpcodeStr,
11523                                      X86FoldableSchedWrite sched,
11524                                      X86VectorVTInfo _, X86VectorVTInfo TblVT>
11525  : avx512_fixupimm_packed<opc, OpcodeStr, sched, _, TblVT> {
11526let Constraints = "$src1 = $dst", ExeDomain = _.ExeDomain in {
11527  defm rrib : AVX512_maskable_3src<opc, MRMSrcReg, _, (outs _.RC:$dst),
11528                      (ins _.RC:$src2, _.RC:$src3, i32u8imm:$src4),
11529                      OpcodeStr##_.Suffix, "$src4, {sae}, $src3, $src2",
11530                      "$src2, $src3, {sae}, $src4",
11531                      (X86VFixupimmSAE (_.VT _.RC:$src1),
11532                                       (_.VT _.RC:$src2),
11533                                       (TblVT.VT _.RC:$src3),
11534                                       (i32 imm:$src4))>,
11535                      EVEX_B, Sched<[sched]>;
11536  }
11537}
11538
11539multiclass avx512_fixupimm_scalar<bits<8> opc, string OpcodeStr,
11540                                  X86FoldableSchedWrite sched, X86VectorVTInfo _,
11541                                  X86VectorVTInfo _src3VT> {
11542  let Constraints = "$src1 = $dst" , Predicates = [HasAVX512],
11543      ExeDomain = _.ExeDomain in {
11544    defm rri : AVX512_maskable_3src_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
11545                      (ins _.RC:$src2, _.RC:$src3, i32u8imm:$src4),
11546                      OpcodeStr##_.Suffix, "$src4, $src3, $src2", "$src2, $src3, $src4",
11547                      (X86VFixupimms (_.VT _.RC:$src1),
11548                                     (_.VT _.RC:$src2),
11549                                     (_src3VT.VT _src3VT.RC:$src3),
11550                                     (i32 imm:$src4))>, Sched<[sched]>;
11551    defm rrib : AVX512_maskable_3src_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
11552                      (ins _.RC:$src2, _.RC:$src3, i32u8imm:$src4),
11553                      OpcodeStr##_.Suffix, "$src4, {sae}, $src3, $src2",
11554                      "$src2, $src3, {sae}, $src4",
11555                      (X86VFixupimmSAEs (_.VT _.RC:$src1),
11556                                        (_.VT _.RC:$src2),
11557                                        (_src3VT.VT _src3VT.RC:$src3),
11558                                        (i32 imm:$src4))>,
11559                      EVEX_B, Sched<[sched.Folded, sched.ReadAfterFold]>;
11560    defm rmi : AVX512_maskable_3src_scalar<opc, MRMSrcMem, _, (outs _.RC:$dst),
11561                     (ins _.RC:$src2, _.ScalarMemOp:$src3, i32u8imm:$src4),
11562                     OpcodeStr##_.Suffix, "$src4, $src3, $src2", "$src2, $src3, $src4",
11563                     (X86VFixupimms (_.VT _.RC:$src1),
11564                                    (_.VT _.RC:$src2),
11565                                    (_src3VT.VT (scalar_to_vector
11566                                              (_src3VT.ScalarLdFrag addr:$src3))),
11567                                    (i32 imm:$src4))>,
11568                     Sched<[sched.Folded, sched.ReadAfterFold]>;
11569  }
11570}
11571
11572multiclass avx512_fixupimm_packed_all<X86SchedWriteWidths sched,
11573                                      AVX512VLVectorVTInfo _Vec,
11574                                      AVX512VLVectorVTInfo _Tbl> {
11575  let Predicates = [HasAVX512] in
11576    defm Z    : avx512_fixupimm_packed_sae<0x54, "vfixupimm", sched.ZMM,
11577                                _Vec.info512, _Tbl.info512>, AVX512AIi8Base,
11578                                EVEX_4V, EVEX_V512;
11579  let Predicates = [HasAVX512, HasVLX] in {
11580    defm Z128 : avx512_fixupimm_packed<0x54, "vfixupimm", sched.XMM,
11581                            _Vec.info128, _Tbl.info128>, AVX512AIi8Base,
11582                            EVEX_4V, EVEX_V128;
11583    defm Z256 : avx512_fixupimm_packed<0x54, "vfixupimm", sched.YMM,
11584                            _Vec.info256, _Tbl.info256>, AVX512AIi8Base,
11585                            EVEX_4V, EVEX_V256;
11586  }
11587}
11588
11589defm VFIXUPIMMSSZ : avx512_fixupimm_scalar<0x55, "vfixupimm",
11590                                           SchedWriteFAdd.Scl, f32x_info, v4i32x_info>,
11591                          AVX512AIi8Base, VEX_LIG, EVEX_4V, EVEX_CD8<32, CD8VT1>;
11592defm VFIXUPIMMSDZ : avx512_fixupimm_scalar<0x55, "vfixupimm",
11593                                           SchedWriteFAdd.Scl, f64x_info, v2i64x_info>,
11594                          AVX512AIi8Base, VEX_LIG, EVEX_4V, EVEX_CD8<64, CD8VT1>, VEX_W;
11595defm VFIXUPIMMPS : avx512_fixupimm_packed_all<SchedWriteFAdd, avx512vl_f32_info,
11596                         avx512vl_i32_info>, EVEX_CD8<32, CD8VF>;
11597defm VFIXUPIMMPD : avx512_fixupimm_packed_all<SchedWriteFAdd, avx512vl_f64_info,
11598                         avx512vl_i64_info>, EVEX_CD8<64, CD8VF>, VEX_W;
11599
11600// Patterns used to select SSE scalar fp arithmetic instructions from
11601// either:
11602//
11603// (1) a scalar fp operation followed by a blend
11604//
11605// The effect is that the backend no longer emits unnecessary vector
11606// insert instructions immediately after SSE scalar fp instructions
11607// like addss or mulss.
11608//
11609// For example, given the following code:
11610//   __m128 foo(__m128 A, __m128 B) {
11611//     A[0] += B[0];
11612//     return A;
11613//   }
11614//
11615// Previously we generated:
11616//   addss %xmm0, %xmm1
11617//   movss %xmm1, %xmm0
11618//
11619// We now generate:
11620//   addss %xmm1, %xmm0
11621//
11622// (2) a vector packed single/double fp operation followed by a vector insert
11623//
11624// The effect is that the backend converts the packed fp instruction
11625// followed by a vector insert into a single SSE scalar fp instruction.
11626//
11627// For example, given the following code:
11628//   __m128 foo(__m128 A, __m128 B) {
11629//     __m128 C = A + B;
11630//     return (__m128) {c[0], a[1], a[2], a[3]};
11631//   }
11632//
11633// Previously we generated:
11634//   addps %xmm0, %xmm1
11635//   movss %xmm1, %xmm0
11636//
11637// We now generate:
11638//   addss %xmm1, %xmm0
11639
11640// TODO: Some canonicalization in lowering would simplify the number of
11641// patterns we have to try to match.
11642multiclass AVX512_scalar_math_fp_patterns<SDNode Op, string OpcPrefix, SDNode MoveNode,
11643                                           X86VectorVTInfo _, PatLeaf ZeroFP> {
11644  let Predicates = [HasAVX512] in {
11645    // extracted scalar math op with insert via movss
11646    def : Pat<(MoveNode
11647               (_.VT VR128X:$dst),
11648               (_.VT (scalar_to_vector
11649                      (Op (_.EltVT (extractelt (_.VT VR128X:$dst), (iPTR 0))),
11650                          _.FRC:$src)))),
11651              (!cast<Instruction>("V"#OpcPrefix#Zrr_Int) _.VT:$dst,
11652               (_.VT (COPY_TO_REGCLASS _.FRC:$src, VR128X)))>;
11653    def : Pat<(MoveNode
11654               (_.VT VR128X:$dst),
11655               (_.VT (scalar_to_vector
11656                      (Op (_.EltVT (extractelt (_.VT VR128X:$dst), (iPTR 0))),
11657                          (_.ScalarLdFrag addr:$src))))),
11658              (!cast<Instruction>("V"#OpcPrefix#Zrm_Int) _.VT:$dst, addr:$src)>;
11659
11660    // extracted masked scalar math op with insert via movss
11661    def : Pat<(MoveNode (_.VT VR128X:$src1),
11662               (scalar_to_vector
11663                (X86selects VK1WM:$mask,
11664                            (Op (_.EltVT
11665                                 (extractelt (_.VT VR128X:$src1), (iPTR 0))),
11666                                _.FRC:$src2),
11667                            _.FRC:$src0))),
11668              (!cast<Instruction>("V"#OpcPrefix#Zrr_Intk)
11669               (_.VT (COPY_TO_REGCLASS _.FRC:$src0, VR128X)),
11670               VK1WM:$mask, _.VT:$src1,
11671               (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)))>;
11672    def : Pat<(MoveNode (_.VT VR128X:$src1),
11673               (scalar_to_vector
11674                (X86selects VK1WM:$mask,
11675                            (Op (_.EltVT
11676                                 (extractelt (_.VT VR128X:$src1), (iPTR 0))),
11677                                (_.ScalarLdFrag addr:$src2)),
11678                            _.FRC:$src0))),
11679              (!cast<Instruction>("V"#OpcPrefix#Zrm_Intk)
11680               (_.VT (COPY_TO_REGCLASS _.FRC:$src0, VR128X)),
11681               VK1WM:$mask, _.VT:$src1, addr:$src2)>;
11682
11683    // extracted masked scalar math op with insert via movss
11684    def : Pat<(MoveNode (_.VT VR128X:$src1),
11685               (scalar_to_vector
11686                (X86selects VK1WM:$mask,
11687                            (Op (_.EltVT
11688                                 (extractelt (_.VT VR128X:$src1), (iPTR 0))),
11689                                _.FRC:$src2), (_.EltVT ZeroFP)))),
11690      (!cast<I>("V"#OpcPrefix#Zrr_Intkz)
11691          VK1WM:$mask, _.VT:$src1,
11692          (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)))>;
11693    def : Pat<(MoveNode (_.VT VR128X:$src1),
11694               (scalar_to_vector
11695                (X86selects VK1WM:$mask,
11696                            (Op (_.EltVT
11697                                 (extractelt (_.VT VR128X:$src1), (iPTR 0))),
11698                                (_.ScalarLdFrag addr:$src2)), (_.EltVT ZeroFP)))),
11699      (!cast<I>("V"#OpcPrefix#Zrm_Intkz) VK1WM:$mask, _.VT:$src1, addr:$src2)>;
11700  }
11701}
11702
11703defm : AVX512_scalar_math_fp_patterns<fadd, "ADDSS", X86Movss, v4f32x_info, fp32imm0>;
11704defm : AVX512_scalar_math_fp_patterns<fsub, "SUBSS", X86Movss, v4f32x_info, fp32imm0>;
11705defm : AVX512_scalar_math_fp_patterns<fmul, "MULSS", X86Movss, v4f32x_info, fp32imm0>;
11706defm : AVX512_scalar_math_fp_patterns<fdiv, "DIVSS", X86Movss, v4f32x_info, fp32imm0>;
11707
11708defm : AVX512_scalar_math_fp_patterns<fadd, "ADDSD", X86Movsd, v2f64x_info, fp64imm0>;
11709defm : AVX512_scalar_math_fp_patterns<fsub, "SUBSD", X86Movsd, v2f64x_info, fp64imm0>;
11710defm : AVX512_scalar_math_fp_patterns<fmul, "MULSD", X86Movsd, v2f64x_info, fp64imm0>;
11711defm : AVX512_scalar_math_fp_patterns<fdiv, "DIVSD", X86Movsd, v2f64x_info, fp64imm0>;
11712
11713multiclass AVX512_scalar_unary_math_patterns<SDNode OpNode, string OpcPrefix,
11714                                             SDNode Move, X86VectorVTInfo _> {
11715  let Predicates = [HasAVX512] in {
11716    def : Pat<(_.VT (Move _.VT:$dst,
11717                     (scalar_to_vector (OpNode (extractelt _.VT:$src, 0))))),
11718              (!cast<Instruction>("V"#OpcPrefix#Zr_Int) _.VT:$dst, _.VT:$src)>;
11719  }
11720}
11721
11722defm : AVX512_scalar_unary_math_patterns<fsqrt, "SQRTSS", X86Movss, v4f32x_info>;
11723defm : AVX512_scalar_unary_math_patterns<fsqrt, "SQRTSD", X86Movsd, v2f64x_info>;
11724
11725//===----------------------------------------------------------------------===//
11726// AES instructions
11727//===----------------------------------------------------------------------===//
11728
11729multiclass avx512_vaes<bits<8> Op, string OpStr, string IntPrefix> {
11730  let Predicates = [HasVLX, HasVAES] in {
11731    defm Z128 : AESI_binop_rm_int<Op, OpStr,
11732                                  !cast<Intrinsic>(IntPrefix),
11733                                  loadv2i64, 0, VR128X, i128mem>,
11734                  EVEX_4V, EVEX_CD8<64, CD8VF>, EVEX_V128, VEX_WIG;
11735    defm Z256 : AESI_binop_rm_int<Op, OpStr,
11736                                  !cast<Intrinsic>(IntPrefix##"_256"),
11737                                  loadv4i64, 0, VR256X, i256mem>,
11738                  EVEX_4V, EVEX_CD8<64, CD8VF>, EVEX_V256, VEX_WIG;
11739    }
11740    let Predicates = [HasAVX512, HasVAES] in
11741    defm Z    : AESI_binop_rm_int<Op, OpStr,
11742                                  !cast<Intrinsic>(IntPrefix##"_512"),
11743                                  loadv8i64, 0, VR512, i512mem>,
11744                  EVEX_4V, EVEX_CD8<64, CD8VF>, EVEX_V512, VEX_WIG;
11745}
11746
11747defm VAESENC      : avx512_vaes<0xDC, "vaesenc", "int_x86_aesni_aesenc">;
11748defm VAESENCLAST  : avx512_vaes<0xDD, "vaesenclast", "int_x86_aesni_aesenclast">;
11749defm VAESDEC      : avx512_vaes<0xDE, "vaesdec", "int_x86_aesni_aesdec">;
11750defm VAESDECLAST  : avx512_vaes<0xDF, "vaesdeclast", "int_x86_aesni_aesdeclast">;
11751
11752//===----------------------------------------------------------------------===//
11753// PCLMUL instructions - Carry less multiplication
11754//===----------------------------------------------------------------------===//
11755
11756let Predicates = [HasAVX512, HasVPCLMULQDQ] in
11757defm VPCLMULQDQZ : vpclmulqdq<VR512, i512mem, loadv8i64, int_x86_pclmulqdq_512>,
11758                              EVEX_4V, EVEX_V512, EVEX_CD8<64, CD8VF>, VEX_WIG;
11759
11760let Predicates = [HasVLX, HasVPCLMULQDQ] in {
11761defm VPCLMULQDQZ128 : vpclmulqdq<VR128X, i128mem, loadv2i64, int_x86_pclmulqdq>,
11762                              EVEX_4V, EVEX_V128, EVEX_CD8<64, CD8VF>, VEX_WIG;
11763
11764defm VPCLMULQDQZ256: vpclmulqdq<VR256X, i256mem, loadv4i64,
11765                                int_x86_pclmulqdq_256>, EVEX_4V, EVEX_V256,
11766                                EVEX_CD8<64, CD8VF>, VEX_WIG;
11767}
11768
11769// Aliases
11770defm : vpclmulqdq_aliases<"VPCLMULQDQZ", VR512, i512mem>;
11771defm : vpclmulqdq_aliases<"VPCLMULQDQZ128", VR128X, i128mem>;
11772defm : vpclmulqdq_aliases<"VPCLMULQDQZ256", VR256X, i256mem>;
11773
11774//===----------------------------------------------------------------------===//
11775// VBMI2
11776//===----------------------------------------------------------------------===//
11777
11778multiclass VBMI2_shift_var_rm<bits<8> Op, string OpStr, SDNode OpNode,
11779                              X86FoldableSchedWrite sched, X86VectorVTInfo VTI> {
11780  let Constraints = "$src1 = $dst",
11781      ExeDomain   = VTI.ExeDomain in {
11782    defm r:   AVX512_maskable_3src<Op, MRMSrcReg, VTI, (outs VTI.RC:$dst),
11783                (ins VTI.RC:$src2, VTI.RC:$src3), OpStr,
11784                "$src3, $src2", "$src2, $src3",
11785                (VTI.VT (OpNode VTI.RC:$src1, VTI.RC:$src2, VTI.RC:$src3))>,
11786                AVX512FMA3Base, Sched<[sched]>;
11787    defm m:   AVX512_maskable_3src<Op, MRMSrcMem, VTI, (outs VTI.RC:$dst),
11788                (ins VTI.RC:$src2, VTI.MemOp:$src3), OpStr,
11789                "$src3, $src2", "$src2, $src3",
11790                (VTI.VT (OpNode VTI.RC:$src1, VTI.RC:$src2,
11791                        (VTI.VT (VTI.LdFrag addr:$src3))))>,
11792                AVX512FMA3Base,
11793                Sched<[sched.Folded, sched.ReadAfterFold]>;
11794  }
11795}
11796
11797multiclass VBMI2_shift_var_rmb<bits<8> Op, string OpStr, SDNode OpNode,
11798                               X86FoldableSchedWrite sched, X86VectorVTInfo VTI>
11799         : VBMI2_shift_var_rm<Op, OpStr, OpNode, sched, VTI> {
11800  let Constraints = "$src1 = $dst",
11801      ExeDomain   = VTI.ExeDomain in
11802  defm mb:  AVX512_maskable_3src<Op, MRMSrcMem, VTI, (outs VTI.RC:$dst),
11803              (ins VTI.RC:$src2, VTI.ScalarMemOp:$src3), OpStr,
11804              "${src3}"##VTI.BroadcastStr##", $src2",
11805              "$src2, ${src3}"##VTI.BroadcastStr,
11806              (OpNode VTI.RC:$src1, VTI.RC:$src2,
11807               (VTI.VT (X86VBroadcast (VTI.ScalarLdFrag addr:$src3))))>,
11808              AVX512FMA3Base, EVEX_B,
11809              Sched<[sched.Folded, sched.ReadAfterFold]>;
11810}
11811
11812multiclass VBMI2_shift_var_rm_common<bits<8> Op, string OpStr, SDNode OpNode,
11813                                     X86SchedWriteWidths sched, AVX512VLVectorVTInfo VTI> {
11814  let Predicates = [HasVBMI2] in
11815  defm Z      : VBMI2_shift_var_rm<Op, OpStr, OpNode, sched.ZMM, VTI.info512>,
11816                                   EVEX_V512;
11817  let Predicates = [HasVBMI2, HasVLX] in {
11818    defm Z256 : VBMI2_shift_var_rm<Op, OpStr, OpNode, sched.YMM, VTI.info256>,
11819                                   EVEX_V256;
11820    defm Z128 : VBMI2_shift_var_rm<Op, OpStr, OpNode, sched.XMM, VTI.info128>,
11821                                   EVEX_V128;
11822  }
11823}
11824
11825multiclass VBMI2_shift_var_rmb_common<bits<8> Op, string OpStr, SDNode OpNode,
11826                                      X86SchedWriteWidths sched, AVX512VLVectorVTInfo VTI> {
11827  let Predicates = [HasVBMI2] in
11828  defm Z      : VBMI2_shift_var_rmb<Op, OpStr, OpNode, sched.ZMM, VTI.info512>,
11829                                    EVEX_V512;
11830  let Predicates = [HasVBMI2, HasVLX] in {
11831    defm Z256 : VBMI2_shift_var_rmb<Op, OpStr, OpNode, sched.YMM, VTI.info256>,
11832                                    EVEX_V256;
11833    defm Z128 : VBMI2_shift_var_rmb<Op, OpStr, OpNode, sched.XMM, VTI.info128>,
11834                                    EVEX_V128;
11835  }
11836}
11837multiclass VBMI2_shift_var<bits<8> wOp, bits<8> dqOp, string Prefix,
11838                           SDNode OpNode, X86SchedWriteWidths sched> {
11839  defm W : VBMI2_shift_var_rm_common<wOp, Prefix##"w", OpNode, sched,
11840             avx512vl_i16_info>, VEX_W, EVEX_CD8<16, CD8VF>;
11841  defm D : VBMI2_shift_var_rmb_common<dqOp, Prefix##"d", OpNode, sched,
11842             avx512vl_i32_info>, EVEX_CD8<32, CD8VF>;
11843  defm Q : VBMI2_shift_var_rmb_common<dqOp, Prefix##"q", OpNode, sched,
11844             avx512vl_i64_info>, VEX_W, EVEX_CD8<64, CD8VF>;
11845}
11846
11847multiclass VBMI2_shift_imm<bits<8> wOp, bits<8> dqOp, string Prefix,
11848                           SDNode OpNode, X86SchedWriteWidths sched> {
11849  defm W : avx512_common_3Op_rm_imm8<wOp, OpNode, Prefix##"w", sched,
11850             avx512vl_i16_info, avx512vl_i16_info, HasVBMI2>,
11851             VEX_W, EVEX_CD8<16, CD8VF>;
11852  defm D : avx512_common_3Op_imm8<Prefix##"d", avx512vl_i32_info, dqOp,
11853             OpNode, sched, HasVBMI2>, AVX512AIi8Base, EVEX_4V, EVEX_CD8<32, CD8VF>;
11854  defm Q : avx512_common_3Op_imm8<Prefix##"q", avx512vl_i64_info, dqOp, OpNode,
11855             sched, HasVBMI2>, AVX512AIi8Base, EVEX_4V, EVEX_CD8<64, CD8VF>, VEX_W;
11856}
11857
11858// Concat & Shift
11859defm VPSHLDV : VBMI2_shift_var<0x70, 0x71, "vpshldv", X86VShldv, SchedWriteVecIMul>;
11860defm VPSHRDV : VBMI2_shift_var<0x72, 0x73, "vpshrdv", X86VShrdv, SchedWriteVecIMul>;
11861defm VPSHLD  : VBMI2_shift_imm<0x70, 0x71, "vpshld", X86VShld, SchedWriteVecIMul>;
11862defm VPSHRD  : VBMI2_shift_imm<0x72, 0x73, "vpshrd", X86VShrd, SchedWriteVecIMul>;
11863
11864// Compress
11865defm VPCOMPRESSB : compress_by_elt_width<0x63, "vpcompressb", WriteVarShuffle256,
11866                                         avx512vl_i8_info, HasVBMI2>, EVEX,
11867                                         NotMemoryFoldable;
11868defm VPCOMPRESSW : compress_by_elt_width <0x63, "vpcompressw", WriteVarShuffle256,
11869                                          avx512vl_i16_info, HasVBMI2>, EVEX, VEX_W,
11870                                          NotMemoryFoldable;
11871// Expand
11872defm VPEXPANDB : expand_by_elt_width <0x62, "vpexpandb", WriteVarShuffle256,
11873                                      avx512vl_i8_info, HasVBMI2>, EVEX;
11874defm VPEXPANDW : expand_by_elt_width <0x62, "vpexpandw", WriteVarShuffle256,
11875                                      avx512vl_i16_info, HasVBMI2>, EVEX, VEX_W;
11876
11877//===----------------------------------------------------------------------===//
11878// VNNI
11879//===----------------------------------------------------------------------===//
11880
11881let Constraints = "$src1 = $dst" in
11882multiclass VNNI_rmb<bits<8> Op, string OpStr, SDNode OpNode,
11883                    X86FoldableSchedWrite sched, X86VectorVTInfo VTI> {
11884  defm r  :   AVX512_maskable_3src<Op, MRMSrcReg, VTI, (outs VTI.RC:$dst),
11885                                   (ins VTI.RC:$src2, VTI.RC:$src3), OpStr,
11886                                   "$src3, $src2", "$src2, $src3",
11887                                   (VTI.VT (OpNode VTI.RC:$src1,
11888                                            VTI.RC:$src2, VTI.RC:$src3))>,
11889                                   EVEX_4V, T8PD, Sched<[sched]>;
11890  defm m  :   AVX512_maskable_3src<Op, MRMSrcMem, VTI, (outs VTI.RC:$dst),
11891                                   (ins VTI.RC:$src2, VTI.MemOp:$src3), OpStr,
11892                                   "$src3, $src2", "$src2, $src3",
11893                                   (VTI.VT (OpNode VTI.RC:$src1, VTI.RC:$src2,
11894                                            (VTI.VT (VTI.LdFrag addr:$src3))))>,
11895                                   EVEX_4V, EVEX_CD8<32, CD8VF>, T8PD,
11896                                   Sched<[sched.Folded, sched.ReadAfterFold]>;
11897  defm mb :   AVX512_maskable_3src<Op, MRMSrcMem, VTI, (outs VTI.RC:$dst),
11898                                   (ins VTI.RC:$src2, VTI.ScalarMemOp:$src3),
11899                                   OpStr, "${src3}"##VTI.BroadcastStr##", $src2",
11900                                   "$src2, ${src3}"##VTI.BroadcastStr,
11901                                   (OpNode VTI.RC:$src1, VTI.RC:$src2,
11902                                    (VTI.VT (X86VBroadcast
11903                                             (VTI.ScalarLdFrag addr:$src3))))>,
11904                                   EVEX_4V, EVEX_CD8<32, CD8VF>, EVEX_B,
11905                                   T8PD, Sched<[sched.Folded, sched.ReadAfterFold]>;
11906}
11907
11908multiclass VNNI_common<bits<8> Op, string OpStr, SDNode OpNode,
11909                       X86SchedWriteWidths sched> {
11910  let Predicates = [HasVNNI] in
11911  defm Z      :   VNNI_rmb<Op, OpStr, OpNode, sched.ZMM, v16i32_info>, EVEX_V512;
11912  let Predicates = [HasVNNI, HasVLX] in {
11913    defm Z256 :   VNNI_rmb<Op, OpStr, OpNode, sched.YMM, v8i32x_info>, EVEX_V256;
11914    defm Z128 :   VNNI_rmb<Op, OpStr, OpNode, sched.XMM, v4i32x_info>, EVEX_V128;
11915  }
11916}
11917
11918// FIXME: Is there a better scheduler class for VPDP?
11919defm VPDPBUSD   : VNNI_common<0x50, "vpdpbusd", X86Vpdpbusd, SchedWriteVecIMul>;
11920defm VPDPBUSDS  : VNNI_common<0x51, "vpdpbusds", X86Vpdpbusds, SchedWriteVecIMul>;
11921defm VPDPWSSD   : VNNI_common<0x52, "vpdpwssd", X86Vpdpwssd, SchedWriteVecIMul>;
11922defm VPDPWSSDS  : VNNI_common<0x53, "vpdpwssds", X86Vpdpwssds, SchedWriteVecIMul>;
11923
11924//===----------------------------------------------------------------------===//
11925// Bit Algorithms
11926//===----------------------------------------------------------------------===//
11927
11928// FIXME: Is there a better scheduler class for VPOPCNTB/VPOPCNTW?
11929defm VPOPCNTB : avx512_unary_rm_vl<0x54, "vpopcntb", ctpop, SchedWriteVecALU,
11930                                   avx512vl_i8_info, HasBITALG>;
11931defm VPOPCNTW : avx512_unary_rm_vl<0x54, "vpopcntw", ctpop, SchedWriteVecALU,
11932                                   avx512vl_i16_info, HasBITALG>, VEX_W;
11933
11934defm : avx512_unary_lowering<"VPOPCNTB", ctpop, avx512vl_i8_info, HasBITALG>;
11935defm : avx512_unary_lowering<"VPOPCNTW", ctpop, avx512vl_i16_info, HasBITALG>;
11936
11937def X86Vpshufbitqmb_su : PatFrag<(ops node:$src1, node:$src2),
11938                                 (X86Vpshufbitqmb node:$src1, node:$src2), [{
11939  return N->hasOneUse();
11940}]>;
11941
11942multiclass VPSHUFBITQMB_rm<X86FoldableSchedWrite sched, X86VectorVTInfo VTI> {
11943  defm rr : AVX512_maskable_cmp<0x8F, MRMSrcReg, VTI, (outs VTI.KRC:$dst),
11944                                (ins VTI.RC:$src1, VTI.RC:$src2),
11945                                "vpshufbitqmb",
11946                                "$src2, $src1", "$src1, $src2",
11947                                (X86Vpshufbitqmb (VTI.VT VTI.RC:$src1),
11948                                (VTI.VT VTI.RC:$src2)),
11949                                (X86Vpshufbitqmb_su (VTI.VT VTI.RC:$src1),
11950                                (VTI.VT VTI.RC:$src2))>, EVEX_4V, T8PD,
11951                                Sched<[sched]>;
11952  defm rm : AVX512_maskable_cmp<0x8F, MRMSrcMem, VTI, (outs VTI.KRC:$dst),
11953                                (ins VTI.RC:$src1, VTI.MemOp:$src2),
11954                                "vpshufbitqmb",
11955                                "$src2, $src1", "$src1, $src2",
11956                                (X86Vpshufbitqmb (VTI.VT VTI.RC:$src1),
11957                                (VTI.VT (VTI.LdFrag addr:$src2))),
11958                                (X86Vpshufbitqmb_su (VTI.VT VTI.RC:$src1),
11959                                (VTI.VT (VTI.LdFrag addr:$src2)))>,
11960                                EVEX_4V, EVEX_CD8<8, CD8VF>, T8PD,
11961                                Sched<[sched.Folded, sched.ReadAfterFold]>;
11962}
11963
11964multiclass VPSHUFBITQMB_common<X86SchedWriteWidths sched, AVX512VLVectorVTInfo VTI> {
11965  let Predicates = [HasBITALG] in
11966  defm Z      : VPSHUFBITQMB_rm<sched.ZMM, VTI.info512>, EVEX_V512;
11967  let Predicates = [HasBITALG, HasVLX] in {
11968    defm Z256 : VPSHUFBITQMB_rm<sched.YMM, VTI.info256>, EVEX_V256;
11969    defm Z128 : VPSHUFBITQMB_rm<sched.XMM, VTI.info128>, EVEX_V128;
11970  }
11971}
11972
11973// FIXME: Is there a better scheduler class for VPSHUFBITQMB?
11974defm VPSHUFBITQMB : VPSHUFBITQMB_common<SchedWriteVecIMul, avx512vl_i8_info>;
11975
11976//===----------------------------------------------------------------------===//
11977// GFNI
11978//===----------------------------------------------------------------------===//
11979
11980multiclass GF2P8MULB_avx512_common<bits<8> Op, string OpStr, SDNode OpNode,
11981                                   X86SchedWriteWidths sched> {
11982  let Predicates = [HasGFNI, HasAVX512, HasBWI] in
11983  defm Z      : avx512_binop_rm<Op, OpStr, OpNode, v64i8_info, sched.ZMM, 1>,
11984                                EVEX_V512;
11985  let Predicates = [HasGFNI, HasVLX, HasBWI] in {
11986    defm Z256 : avx512_binop_rm<Op, OpStr, OpNode, v32i8x_info, sched.YMM, 1>,
11987                                EVEX_V256;
11988    defm Z128 : avx512_binop_rm<Op, OpStr, OpNode, v16i8x_info, sched.XMM, 1>,
11989                                EVEX_V128;
11990  }
11991}
11992
11993defm VGF2P8MULB : GF2P8MULB_avx512_common<0xCF, "vgf2p8mulb", X86GF2P8mulb,
11994                                          SchedWriteVecALU>,
11995                                          EVEX_CD8<8, CD8VF>, T8PD;
11996
11997multiclass GF2P8AFFINE_avx512_rmb_imm<bits<8> Op, string OpStr, SDNode OpNode,
11998                                      X86FoldableSchedWrite sched, X86VectorVTInfo VTI,
11999                                      X86VectorVTInfo BcstVTI>
12000           : avx512_3Op_rm_imm8<Op, OpStr, OpNode, sched, VTI, VTI> {
12001  let ExeDomain = VTI.ExeDomain in
12002  defm rmbi : AVX512_maskable<Op, MRMSrcMem, VTI, (outs VTI.RC:$dst),
12003                (ins VTI.RC:$src1, VTI.ScalarMemOp:$src2, u8imm:$src3),
12004                OpStr, "$src3, ${src2}"##BcstVTI.BroadcastStr##", $src1",
12005                "$src1, ${src2}"##BcstVTI.BroadcastStr##", $src3",
12006                (OpNode (VTI.VT VTI.RC:$src1),
12007                 (bitconvert (BcstVTI.VT (X86VBroadcast (loadi64 addr:$src2)))),
12008                 (i8 imm:$src3))>, EVEX_B,
12009                 Sched<[sched.Folded, sched.ReadAfterFold]>;
12010}
12011
12012multiclass GF2P8AFFINE_avx512_common<bits<8> Op, string OpStr, SDNode OpNode,
12013                                     X86SchedWriteWidths sched> {
12014  let Predicates = [HasGFNI, HasAVX512, HasBWI] in
12015  defm Z      : GF2P8AFFINE_avx512_rmb_imm<Op, OpStr, OpNode, sched.ZMM,
12016                                           v64i8_info, v8i64_info>, EVEX_V512;
12017  let Predicates = [HasGFNI, HasVLX, HasBWI] in {
12018    defm Z256 : GF2P8AFFINE_avx512_rmb_imm<Op, OpStr, OpNode, sched.YMM,
12019                                           v32i8x_info, v4i64x_info>, EVEX_V256;
12020    defm Z128 : GF2P8AFFINE_avx512_rmb_imm<Op, OpStr, OpNode, sched.XMM,
12021                                           v16i8x_info, v2i64x_info>, EVEX_V128;
12022  }
12023}
12024
12025defm VGF2P8AFFINEINVQB : GF2P8AFFINE_avx512_common<0xCF, "vgf2p8affineinvqb",
12026                         X86GF2P8affineinvqb, SchedWriteVecIMul>,
12027                         EVEX_4V, EVEX_CD8<8, CD8VF>, VEX_W, AVX512AIi8Base;
12028defm VGF2P8AFFINEQB    : GF2P8AFFINE_avx512_common<0xCE, "vgf2p8affineqb",
12029                         X86GF2P8affineqb, SchedWriteVecIMul>,
12030                         EVEX_4V, EVEX_CD8<8, CD8VF>, VEX_W, AVX512AIi8Base;
12031
12032
12033//===----------------------------------------------------------------------===//
12034// AVX5124FMAPS
12035//===----------------------------------------------------------------------===//
12036
12037let hasSideEffects = 0, mayLoad = 1, ExeDomain = SSEPackedSingle,
12038    Constraints = "$src1 = $dst" in {
12039defm V4FMADDPSrm : AVX512_maskable_3src_in_asm<0x9A, MRMSrcMem, v16f32_info,
12040                    (outs VR512:$dst), (ins VR512:$src2, f128mem:$src3),
12041                    "v4fmaddps", "$src3, $src2", "$src2, $src3",
12042                    []>, EVEX_V512, EVEX_4V, T8XD, EVEX_CD8<32, CD8VQ>,
12043                    Sched<[SchedWriteFMA.ZMM.Folded]>;
12044
12045defm V4FNMADDPSrm : AVX512_maskable_3src_in_asm<0xAA, MRMSrcMem, v16f32_info,
12046                     (outs VR512:$dst), (ins VR512:$src2, f128mem:$src3),
12047                     "v4fnmaddps", "$src3, $src2", "$src2, $src3",
12048                     []>, EVEX_V512, EVEX_4V, T8XD, EVEX_CD8<32, CD8VQ>,
12049                     Sched<[SchedWriteFMA.ZMM.Folded]>;
12050
12051defm V4FMADDSSrm : AVX512_maskable_3src_in_asm<0x9B, MRMSrcMem, f32x_info,
12052                    (outs VR128X:$dst), (ins  VR128X:$src2, f128mem:$src3),
12053                    "v4fmaddss", "$src3, $src2", "$src2, $src3",
12054                    []>, VEX_LIG, EVEX_4V, T8XD, EVEX_CD8<32, CD8VF>,
12055                    Sched<[SchedWriteFMA.Scl.Folded]>;
12056
12057defm V4FNMADDSSrm : AVX512_maskable_3src_in_asm<0xAB, MRMSrcMem, f32x_info,
12058                     (outs VR128X:$dst), (ins VR128X:$src2, f128mem:$src3),
12059                     "v4fnmaddss", "$src3, $src2", "$src2, $src3",
12060                     []>, VEX_LIG, EVEX_4V, T8XD, EVEX_CD8<32, CD8VF>,
12061                     Sched<[SchedWriteFMA.Scl.Folded]>;
12062}
12063
12064//===----------------------------------------------------------------------===//
12065// AVX5124VNNIW
12066//===----------------------------------------------------------------------===//
12067
12068let hasSideEffects = 0, mayLoad = 1, ExeDomain = SSEPackedInt,
12069    Constraints = "$src1 = $dst" in {
12070defm VP4DPWSSDrm : AVX512_maskable_3src_in_asm<0x52, MRMSrcMem, v16i32_info,
12071                    (outs VR512:$dst), (ins VR512:$src2, f128mem:$src3),
12072                     "vp4dpwssd", "$src3, $src2", "$src2, $src3",
12073                    []>, EVEX_V512, EVEX_4V, T8XD, EVEX_CD8<32, CD8VQ>,
12074                    Sched<[SchedWriteFMA.ZMM.Folded]>;
12075
12076defm VP4DPWSSDSrm : AVX512_maskable_3src_in_asm<0x53, MRMSrcMem, v16i32_info,
12077                     (outs VR512:$dst), (ins VR512:$src2, f128mem:$src3),
12078                     "vp4dpwssds", "$src3, $src2", "$src2, $src3",
12079                     []>, EVEX_V512, EVEX_4V, T8XD, EVEX_CD8<32, CD8VQ>,
12080                     Sched<[SchedWriteFMA.ZMM.Folded]>;
12081}
12082
12083let hasSideEffects = 0 in {
12084  let mayStore = 1 in
12085  def MASKPAIR16STORE : PseudoI<(outs), (ins anymem:$dst, VK16PAIR:$src), []>;
12086  let mayLoad = 1 in
12087  def MASKPAIR16LOAD : PseudoI<(outs VK16PAIR:$dst), (ins anymem:$src), []>;
12088}
12089
12090//===----------------------------------------------------------------------===//
12091// VP2INTERSECT
12092//===----------------------------------------------------------------------===//
12093
12094multiclass avx512_vp2intersect_modes<X86VectorVTInfo _> {
12095  def rr : I<0x68, MRMSrcReg,
12096                  (outs _.KRPC:$dst),
12097                  (ins _.RC:$src1, _.RC:$src2),
12098                  !strconcat("vp2intersect", _.Suffix,
12099                             "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
12100                  [(set _.KRPC:$dst, (X86vp2intersect
12101                            _.RC:$src1, (_.VT _.RC:$src2)))]>,
12102                  EVEX_4V, T8XD;
12103
12104  def rm : I<0x68, MRMSrcMem,
12105                  (outs _.KRPC:$dst),
12106                  (ins  _.RC:$src1, _.MemOp:$src2),
12107                  !strconcat("vp2intersect", _.Suffix,
12108                             "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
12109                  [(set _.KRPC:$dst, (X86vp2intersect
12110                            _.RC:$src1, (_.VT (bitconvert (_.LdFrag addr:$src2)))))]>,
12111                  EVEX_4V, T8XD, EVEX_CD8<_.EltSize, CD8VF>;
12112
12113  def rmb : I<0x68, MRMSrcMem,
12114                  (outs _.KRPC:$dst),
12115                  (ins _.RC:$src1, _.ScalarMemOp:$src2),
12116                  !strconcat("vp2intersect", _.Suffix, "\t{${src2}", _.BroadcastStr,
12117                             ", $src1, $dst|$dst, $src1, ${src2}", _.BroadcastStr ,"}"),
12118                  [(set _.KRPC:$dst, (X86vp2intersect
12119                             _.RC:$src1, (_.VT (X86VBroadcast (_.ScalarLdFrag addr:$src2)))))]>,
12120                  EVEX_4V, T8XD, EVEX_B, EVEX_CD8<_.EltSize, CD8VF>;
12121}
12122
12123multiclass avx512_vp2intersect<AVX512VLVectorVTInfo _> {
12124  let Predicates  = [HasAVX512, HasVP2INTERSECT] in
12125    defm Z : avx512_vp2intersect_modes<_.info512>, EVEX_V512;
12126
12127  let Predicates = [HasAVX512, HasVP2INTERSECT, HasVLX] in {
12128    defm Z256 : avx512_vp2intersect_modes<_.info256>, EVEX_V256;
12129    defm Z128 : avx512_vp2intersect_modes<_.info128>, EVEX_V128;
12130  }
12131}
12132
12133defm VP2INTERSECTD : avx512_vp2intersect<avx512vl_i32_info>;
12134defm VP2INTERSECTQ : avx512_vp2intersect<avx512vl_i64_info>, VEX_W;
12135
12136multiclass avx512_binop_all2<bits<8> opc, string OpcodeStr,
12137                             X86SchedWriteWidths sched,
12138                             AVX512VLVectorVTInfo _SrcVTInfo,
12139                             AVX512VLVectorVTInfo _DstVTInfo,
12140                             SDNode OpNode, Predicate prd,
12141                             bit IsCommutable = 0> {
12142  let Predicates = [prd] in
12143    defm NAME#Z : avx512_binop_rm2<opc, OpcodeStr, sched.ZMM, OpNode,
12144                                   _SrcVTInfo.info512, _DstVTInfo.info512,
12145                                   _SrcVTInfo.info512, IsCommutable>,
12146                                   EVEX_V512, EVEX_CD8<32, CD8VF>;
12147  let Predicates = [HasVLX, prd] in {
12148    defm NAME#Z256 : avx512_binop_rm2<opc, OpcodeStr, sched.YMM, OpNode,
12149                                      _SrcVTInfo.info256, _DstVTInfo.info256,
12150                                      _SrcVTInfo.info256, IsCommutable>,
12151                                     EVEX_V256, EVEX_CD8<32, CD8VF>;
12152    defm NAME#Z128 : avx512_binop_rm2<opc, OpcodeStr, sched.XMM, OpNode,
12153                                      _SrcVTInfo.info128, _DstVTInfo.info128,
12154                                      _SrcVTInfo.info128, IsCommutable>,
12155                                      EVEX_V128, EVEX_CD8<32, CD8VF>;
12156  }
12157}
12158
12159defm VCVTNE2PS2BF16 : avx512_binop_all2<0x72, "vcvtne2ps2bf16",
12160                                        SchedWriteCvtPD2PS, //FIXME: Shoulod be SchedWriteCvtPS2BF
12161                                        avx512vl_f32_info, avx512vl_i16_info,
12162                                        X86cvtne2ps2bf16, HasBF16, 0>, T8XD;
12163
12164// Truncate Float to BFloat16
12165multiclass avx512_cvtps2bf16<bits<8> opc, string OpcodeStr,
12166                             X86SchedWriteWidths sched> {
12167  let Predicates = [HasBF16] in {
12168    defm Z : avx512_vcvt_fp<opc, OpcodeStr, v16i16x_info, v16f32_info,
12169                            X86cvtneps2bf16, sched.ZMM>, EVEX_V512;
12170  }
12171  let Predicates = [HasBF16, HasVLX] in {
12172    defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v8i16x_info, v4f32x_info,
12173                               null_frag, sched.XMM, "{1to4}", "{x}", f128mem,
12174                               VK4WM>, EVEX_V128;
12175    defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v8i16x_info, v8f32x_info,
12176                               X86cvtneps2bf16,
12177                               sched.YMM, "{1to8}", "{y}">, EVEX_V256;
12178
12179    def : InstAlias<OpcodeStr##"x\t{$src, $dst|$dst, $src}",
12180                    (!cast<Instruction>(NAME # "Z128rr") VR128X:$dst,
12181                    VR128X:$src), 0>;
12182    def : InstAlias<OpcodeStr##"x\t{$src, $dst|$dst, $src}",
12183                    (!cast<Instruction>(NAME # "Z128rm") VR128X:$dst,
12184                    f128mem:$src), 0, "intel">;
12185    def : InstAlias<OpcodeStr##"y\t{$src, $dst|$dst, $src}",
12186                    (!cast<Instruction>(NAME # "Z256rr") VR128X:$dst,
12187                    VR256X:$src), 0>;
12188    def : InstAlias<OpcodeStr##"y\t{$src, $dst|$dst, $src}",
12189                    (!cast<Instruction>(NAME # "Z256rm") VR128X:$dst,
12190                    f256mem:$src), 0, "intel">;
12191  }
12192}
12193
12194defm VCVTNEPS2BF16 : avx512_cvtps2bf16<0x72, "vcvtneps2bf16",
12195                                       SchedWriteCvtPD2PS>, T8XS,
12196                                       EVEX_CD8<32, CD8VF>;
12197
12198let Predicates = [HasBF16, HasVLX] in {
12199  // Special patterns to allow use of X86mcvtneps2bf16 for masking. Instruction
12200  // patterns have been disabled with null_frag.
12201  def : Pat<(v8i16 (X86cvtneps2bf16 (v4f32 VR128X:$src))),
12202            (VCVTNEPS2BF16Z128rr VR128X:$src)>;
12203  def : Pat<(X86mcvtneps2bf16 (v4f32 VR128X:$src), (v8i16 VR128X:$src0),
12204                              VK4WM:$mask),
12205            (VCVTNEPS2BF16Z128rrk VR128X:$src0, VK4WM:$mask, VR128X:$src)>;
12206  def : Pat<(X86mcvtneps2bf16 (v4f32 VR128X:$src), v8i16x_info.ImmAllZerosV,
12207                              VK4WM:$mask),
12208            (VCVTNEPS2BF16Z128rrkz VK4WM:$mask, VR128X:$src)>;
12209
12210  def : Pat<(v8i16 (X86cvtneps2bf16 (loadv4f32 addr:$src))),
12211            (VCVTNEPS2BF16Z128rm addr:$src)>;
12212  def : Pat<(X86mcvtneps2bf16 (loadv4f32 addr:$src), (v8i16 VR128X:$src0),
12213                              VK4WM:$mask),
12214            (VCVTNEPS2BF16Z128rmk VR128X:$src0, VK4WM:$mask, addr:$src)>;
12215  def : Pat<(X86mcvtneps2bf16 (loadv4f32 addr:$src), v8i16x_info.ImmAllZerosV,
12216                              VK4WM:$mask),
12217            (VCVTNEPS2BF16Z128rmkz VK4WM:$mask, addr:$src)>;
12218
12219  def : Pat<(v8i16 (X86cvtneps2bf16 (v4f32
12220                                     (X86VBroadcast (loadf32 addr:$src))))),
12221            (VCVTNEPS2BF16Z128rmb addr:$src)>;
12222  def : Pat<(X86mcvtneps2bf16 (v4f32 (X86VBroadcast (loadf32 addr:$src))),
12223                              (v8i16 VR128X:$src0), VK4WM:$mask),
12224            (VCVTNEPS2BF16Z128rmbk VR128X:$src0, VK4WM:$mask, addr:$src)>;
12225  def : Pat<(X86mcvtneps2bf16 (v4f32 (X86VBroadcast (loadf32 addr:$src))),
12226                              v8i16x_info.ImmAllZerosV, VK4WM:$mask),
12227            (VCVTNEPS2BF16Z128rmbkz VK4WM:$mask, addr:$src)>;
12228}
12229
12230let Constraints = "$src1 = $dst" in {
12231multiclass avx512_dpbf16ps_rm<bits<8> opc, string OpcodeStr, SDNode OpNode,
12232                              X86VectorVTInfo _, X86VectorVTInfo src_v> {
12233  defm r: AVX512_maskable_3src<opc, MRMSrcReg, _, (outs _.RC:$dst),
12234                           (ins _.RC:$src2, _.RC:$src3),
12235                           OpcodeStr, "$src3, $src2", "$src2, $src3",
12236                           (_.VT (OpNode _.RC:$src1, _.RC:$src2, _.RC:$src3))>,
12237                           EVEX_4V;
12238
12239  defm m: AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst),
12240                               (ins _.RC:$src2, _.MemOp:$src3),
12241                               OpcodeStr, "$src3, $src2", "$src2, $src3",
12242                               (_.VT (OpNode _.RC:$src1, _.RC:$src2,
12243                               (src_v.VT (bitconvert
12244                               (src_v.LdFrag addr:$src3)))))>, EVEX_4V;
12245
12246  defm mb: AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst),
12247                  (ins _.RC:$src2, _.ScalarMemOp:$src3),
12248                  OpcodeStr,
12249                  !strconcat("${src3}", _.BroadcastStr,", $src2"),
12250                  !strconcat("$src2, ${src3}", _.BroadcastStr),
12251                  (_.VT (OpNode _.RC:$src1, _.RC:$src2,
12252                  (src_v.VT (X86VBroadcast(src_v.ScalarLdFrag addr:$src3)))))>,
12253                  EVEX_B, EVEX_4V;
12254
12255}
12256} // Constraints = "$src1 = $dst"
12257
12258multiclass avx512_dpbf16ps_sizes<bits<8> opc, string OpcodeStr, SDNode OpNode,
12259                                 AVX512VLVectorVTInfo _,
12260                                 AVX512VLVectorVTInfo src_v, Predicate prd> {
12261  let Predicates = [prd] in {
12262    defm Z    : avx512_dpbf16ps_rm<opc, OpcodeStr, OpNode, _.info512,
12263                                   src_v.info512>, EVEX_V512;
12264  }
12265  let Predicates = [HasVLX, prd] in {
12266    defm Z256 : avx512_dpbf16ps_rm<opc, OpcodeStr, OpNode, _.info256,
12267                                   src_v.info256>, EVEX_V256;
12268    defm Z128 : avx512_dpbf16ps_rm<opc, OpcodeStr, OpNode, _.info128,
12269                                   src_v.info128>, EVEX_V128;
12270  }
12271}
12272
12273defm VDPBF16PS : avx512_dpbf16ps_sizes<0x52, "vdpbf16ps", X86dpbf16ps,
12274                                       avx512vl_f32_info, avx512vl_i32_info,
12275                                       HasBF16>, T8XS, EVEX_CD8<32, CD8VF>;
12276