xref: /freebsd/contrib/llvm-project/llvm/lib/Target/X86/X86InstrAVX512.td (revision d409305fa3838fb39b38c26fc085fb729b8766d5)
1//===-- X86InstrAVX512.td - AVX512 Instruction Set ---------*- tablegen -*-===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file describes the X86 AVX512 instruction set, defining the
10// instructions, and properties of the instructions which are needed for code
11// generation, machine code emission, and analysis.
12//
13//===----------------------------------------------------------------------===//
14
15// Group template arguments that can be derived from the vector type (EltNum x
16// EltVT).  These are things like the register class for the writemask, etc.
17// The idea is to pass one of these as the template argument rather than the
18// individual arguments.
19// The template is also used for scalar types, in this case numelts is 1.
20class X86VectorVTInfo<int numelts, ValueType eltvt, RegisterClass rc,
21                      string suffix = ""> {
22  RegisterClass RC = rc;
23  ValueType EltVT = eltvt;
24  int NumElts = numelts;
25
26  // Corresponding mask register class.
27  RegisterClass KRC = !cast<RegisterClass>("VK" # NumElts);
28
29  // Corresponding mask register pair class.
30  RegisterOperand KRPC = !if (!gt(NumElts, 16), ?,
31                              !cast<RegisterOperand>("VK" # NumElts # "Pair"));
32
33  // Corresponding write-mask register class.
34  RegisterClass KRCWM = !cast<RegisterClass>("VK" # NumElts # "WM");
35
36  // The mask VT.
37  ValueType KVT = !cast<ValueType>("v" # NumElts # "i1");
38
39  // Suffix used in the instruction mnemonic.
40  string Suffix = suffix;
41
42  // VTName is a string name for vector VT. For vector types it will be
43  // v # NumElts # EltVT, so for vector of 8 elements of i32 it will be v8i32
44  // It is a little bit complex for scalar types, where NumElts = 1.
45  // In this case we build v4f32 or v2f64
46  string VTName = "v" # !if (!eq (NumElts, 1),
47                        !if (!eq (EltVT.Size, 32), 4,
48                        !if (!eq (EltVT.Size, 64), 2, NumElts)), NumElts) # EltVT;
49
50  // The vector VT.
51  ValueType VT = !cast<ValueType>(VTName);
52
53  string EltTypeName = !cast<string>(EltVT);
54  // Size of the element type in bits, e.g. 32 for v16i32.
55  string EltSizeName = !subst("i", "", !subst("f", "", EltTypeName));
56  int EltSize = EltVT.Size;
57
58  // "i" for integer types and "f" for floating-point types
59  string TypeVariantName = !subst(EltSizeName, "", EltTypeName);
60
61  // Size of RC in bits, e.g. 512 for VR512.
62  int Size = VT.Size;
63
64  // The corresponding memory operand, e.g. i512mem for VR512.
65  X86MemOperand MemOp = !cast<X86MemOperand>(TypeVariantName # Size # "mem");
66  X86MemOperand ScalarMemOp = !cast<X86MemOperand>(EltVT # "mem");
67  // FP scalar memory operand for intrinsics - ssmem/sdmem.
68  Operand IntScalarMemOp = !if (!eq (EltTypeName, "f32"), !cast<Operand>("ssmem"),
69                           !if (!eq (EltTypeName, "f64"), !cast<Operand>("sdmem"), ?));
70
71  // Load patterns
72  PatFrag LdFrag = !cast<PatFrag>("load" # VTName);
73
74  PatFrag AlignedLdFrag = !cast<PatFrag>("alignedload" # VTName);
75
76  PatFrag ScalarLdFrag = !cast<PatFrag>("load" # EltVT);
77  PatFrag BroadcastLdFrag = !cast<PatFrag>("X86VBroadcastld" # EltSizeName);
78
79  PatFrags ScalarIntMemFrags = !if (!eq (EltTypeName, "f32"),
80                                           !cast<PatFrags>("sse_load_f32"),
81                               !if (!eq (EltTypeName, "f64"),
82                                     !cast<PatFrags>("sse_load_f64"),
83                               ?));
84
85  // The string to specify embedded broadcast in assembly.
86  string BroadcastStr = "{1to" # NumElts # "}";
87
88  // 8-bit compressed displacement tuple/subvector format.  This is only
89  // defined for NumElts <= 8.
90  CD8VForm CD8TupleForm = !if (!eq (!srl(NumElts, 4), 0),
91                               !cast<CD8VForm>("CD8VT" # NumElts), ?);
92
93  SubRegIndex SubRegIdx = !if (!eq (Size, 128), sub_xmm,
94                          !if (!eq (Size, 256), sub_ymm, ?));
95
96  Domain ExeDomain = !if (!eq (EltTypeName, "f32"), SSEPackedSingle,
97                     !if (!eq (EltTypeName, "f64"), SSEPackedDouble,
98                     SSEPackedInt));
99
100  RegisterClass FRC = !if (!eq (EltTypeName, "f32"), FR32X, FR64X);
101
102  dag ImmAllZerosV = (VT immAllZerosV);
103
104  string ZSuffix = !if (!eq (Size, 128), "Z128",
105                   !if (!eq (Size, 256), "Z256", "Z"));
106}
107
108def v64i8_info  : X86VectorVTInfo<64,  i8, VR512, "b">;
109def v32i16_info : X86VectorVTInfo<32, i16, VR512, "w">;
110def v16i32_info : X86VectorVTInfo<16, i32, VR512, "d">;
111def v8i64_info  : X86VectorVTInfo<8,  i64, VR512, "q">;
112def v16f32_info : X86VectorVTInfo<16, f32, VR512, "ps">;
113def v8f64_info  : X86VectorVTInfo<8,  f64, VR512, "pd">;
114
115// "x" in v32i8x_info means RC = VR256X
116def v32i8x_info  : X86VectorVTInfo<32,  i8, VR256X, "b">;
117def v16i16x_info : X86VectorVTInfo<16, i16, VR256X, "w">;
118def v8i32x_info  : X86VectorVTInfo<8,  i32, VR256X, "d">;
119def v4i64x_info  : X86VectorVTInfo<4,  i64, VR256X, "q">;
120def v8f32x_info  : X86VectorVTInfo<8,  f32, VR256X, "ps">;
121def v4f64x_info  : X86VectorVTInfo<4,  f64, VR256X, "pd">;
122
123def v16i8x_info  : X86VectorVTInfo<16,  i8, VR128X, "b">;
124def v8i16x_info  : X86VectorVTInfo<8,  i16, VR128X, "w">;
125def v4i32x_info  : X86VectorVTInfo<4,  i32, VR128X, "d">;
126def v2i64x_info  : X86VectorVTInfo<2,  i64, VR128X, "q">;
127def v4f32x_info  : X86VectorVTInfo<4,  f32, VR128X, "ps">;
128def v2f64x_info  : X86VectorVTInfo<2,  f64, VR128X, "pd">;
129
130// We map scalar types to the smallest (128-bit) vector type
131// with the appropriate element type. This allows to use the same masking logic.
132def i32x_info    : X86VectorVTInfo<1,  i32, GR32, "si">;
133def i64x_info    : X86VectorVTInfo<1,  i64, GR64, "sq">;
134def f32x_info    : X86VectorVTInfo<1,  f32, VR128X, "ss">;
135def f64x_info    : X86VectorVTInfo<1,  f64, VR128X, "sd">;
136
137class AVX512VLVectorVTInfo<X86VectorVTInfo i512, X86VectorVTInfo i256,
138                           X86VectorVTInfo i128> {
139  X86VectorVTInfo info512 = i512;
140  X86VectorVTInfo info256 = i256;
141  X86VectorVTInfo info128 = i128;
142}
143
144def avx512vl_i8_info  : AVX512VLVectorVTInfo<v64i8_info, v32i8x_info,
145                                             v16i8x_info>;
146def avx512vl_i16_info : AVX512VLVectorVTInfo<v32i16_info, v16i16x_info,
147                                             v8i16x_info>;
148def avx512vl_i32_info : AVX512VLVectorVTInfo<v16i32_info, v8i32x_info,
149                                             v4i32x_info>;
150def avx512vl_i64_info : AVX512VLVectorVTInfo<v8i64_info, v4i64x_info,
151                                             v2i64x_info>;
152def avx512vl_f32_info : AVX512VLVectorVTInfo<v16f32_info, v8f32x_info,
153                                             v4f32x_info>;
154def avx512vl_f64_info : AVX512VLVectorVTInfo<v8f64_info, v4f64x_info,
155                                             v2f64x_info>;
156
157class X86KVectorVTInfo<RegisterClass _krc, RegisterClass _krcwm,
158                       ValueType _vt> {
159  RegisterClass KRC = _krc;
160  RegisterClass KRCWM = _krcwm;
161  ValueType KVT = _vt;
162}
163
164def v1i1_info : X86KVectorVTInfo<VK1, VK1WM, v1i1>;
165def v2i1_info : X86KVectorVTInfo<VK2, VK2WM, v2i1>;
166def v4i1_info : X86KVectorVTInfo<VK4, VK4WM, v4i1>;
167def v8i1_info : X86KVectorVTInfo<VK8, VK8WM, v8i1>;
168def v16i1_info : X86KVectorVTInfo<VK16, VK16WM, v16i1>;
169def v32i1_info : X86KVectorVTInfo<VK32, VK32WM, v32i1>;
170def v64i1_info : X86KVectorVTInfo<VK64, VK64WM, v64i1>;
171
172// Used for matching masked operations. Ensures the operation part only has a
173// single use.
174def vselect_mask : PatFrag<(ops node:$mask, node:$src1, node:$src2),
175                           (vselect node:$mask, node:$src1, node:$src2), [{
176  return isProfitableToFormMaskedOp(N);
177}]>;
178
179def X86selects_mask : PatFrag<(ops node:$mask, node:$src1, node:$src2),
180                              (X86selects node:$mask, node:$src1, node:$src2), [{
181  return isProfitableToFormMaskedOp(N);
182}]>;
183
184// This multiclass generates the masking variants from the non-masking
185// variant.  It only provides the assembly pieces for the masking variants.
186// It assumes custom ISel patterns for masking which can be provided as
187// template arguments.
188multiclass AVX512_maskable_custom<bits<8> O, Format F,
189                                  dag Outs,
190                                  dag Ins, dag MaskingIns, dag ZeroMaskingIns,
191                                  string OpcodeStr,
192                                  string AttSrcAsm, string IntelSrcAsm,
193                                  list<dag> Pattern,
194                                  list<dag> MaskingPattern,
195                                  list<dag> ZeroMaskingPattern,
196                                  string MaskingConstraint = "",
197                                  bit IsCommutable = 0,
198                                  bit IsKCommutable = 0,
199                                  bit IsKZCommutable = IsCommutable> {
200  let isCommutable = IsCommutable in
201    def NAME: AVX512<O, F, Outs, Ins,
202                       OpcodeStr#"\t{"#AttSrcAsm#", $dst|"#
203                                     "$dst, "#IntelSrcAsm#"}",
204                       Pattern>;
205
206  // Prefer over VMOV*rrk Pat<>
207  let isCommutable = IsKCommutable in
208    def NAME#k: AVX512<O, F, Outs, MaskingIns,
209                       OpcodeStr#"\t{"#AttSrcAsm#", $dst {${mask}}|"#
210                                     "$dst {${mask}}, "#IntelSrcAsm#"}",
211                       MaskingPattern>,
212              EVEX_K {
213      // In case of the 3src subclass this is overridden with a let.
214      string Constraints = MaskingConstraint;
215    }
216
217  // Zero mask does not add any restrictions to commute operands transformation.
218  // So, it is Ok to use IsCommutable instead of IsKCommutable.
219  let isCommutable = IsKZCommutable in // Prefer over VMOV*rrkz Pat<>
220    def NAME#kz: AVX512<O, F, Outs, ZeroMaskingIns,
221                       OpcodeStr#"\t{"#AttSrcAsm#", $dst {${mask}} {z}|"#
222                                     "$dst {${mask}} {z}, "#IntelSrcAsm#"}",
223                       ZeroMaskingPattern>,
224              EVEX_KZ;
225}
226
227
228// Common base class of AVX512_maskable and AVX512_maskable_3src.
229multiclass AVX512_maskable_common<bits<8> O, Format F, X86VectorVTInfo _,
230                                  dag Outs,
231                                  dag Ins, dag MaskingIns, dag ZeroMaskingIns,
232                                  string OpcodeStr,
233                                  string AttSrcAsm, string IntelSrcAsm,
234                                  dag RHS, dag MaskingRHS,
235                                  SDPatternOperator Select = vselect_mask,
236                                  string MaskingConstraint = "",
237                                  bit IsCommutable = 0,
238                                  bit IsKCommutable = 0,
239                                  bit IsKZCommutable = IsCommutable> :
240  AVX512_maskable_custom<O, F, Outs, Ins, MaskingIns, ZeroMaskingIns, OpcodeStr,
241                         AttSrcAsm, IntelSrcAsm,
242                         [(set _.RC:$dst, RHS)],
243                         [(set _.RC:$dst, MaskingRHS)],
244                         [(set _.RC:$dst,
245                               (Select _.KRCWM:$mask, RHS, _.ImmAllZerosV))],
246                         MaskingConstraint, IsCommutable,
247                         IsKCommutable, IsKZCommutable>;
248
249// This multiclass generates the unconditional/non-masking, the masking and
250// the zero-masking variant of the vector instruction.  In the masking case, the
251// preserved vector elements come from a new dummy input operand tied to $dst.
252// This version uses a separate dag for non-masking and masking.
253multiclass AVX512_maskable_split<bits<8> O, Format F, X86VectorVTInfo _,
254                           dag Outs, dag Ins, string OpcodeStr,
255                           string AttSrcAsm, string IntelSrcAsm,
256                           dag RHS, dag MaskRHS,
257                           bit IsCommutable = 0, bit IsKCommutable = 0,
258                           bit IsKZCommutable = IsCommutable> :
259   AVX512_maskable_custom<O, F, Outs, Ins,
260                          !con((ins _.RC:$src0, _.KRCWM:$mask), Ins),
261                          !con((ins _.KRCWM:$mask), Ins),
262                          OpcodeStr, AttSrcAsm, IntelSrcAsm,
263                          [(set _.RC:$dst, RHS)],
264                          [(set _.RC:$dst,
265                              (vselect_mask _.KRCWM:$mask, MaskRHS, _.RC:$src0))],
266                          [(set _.RC:$dst,
267                              (vselect_mask _.KRCWM:$mask, MaskRHS, _.ImmAllZerosV))],
268                          "$src0 = $dst", IsCommutable, IsKCommutable,
269                          IsKZCommutable>;
270
271// This multiclass generates the unconditional/non-masking, the masking and
272// the zero-masking variant of the vector instruction.  In the masking case, the
273// preserved vector elements come from a new dummy input operand tied to $dst.
274multiclass AVX512_maskable<bits<8> O, Format F, X86VectorVTInfo _,
275                           dag Outs, dag Ins, string OpcodeStr,
276                           string AttSrcAsm, string IntelSrcAsm,
277                           dag RHS,
278                           bit IsCommutable = 0, bit IsKCommutable = 0,
279                           bit IsKZCommutable = IsCommutable,
280                           SDPatternOperator Select = vselect_mask> :
281   AVX512_maskable_common<O, F, _, Outs, Ins,
282                          !con((ins _.RC:$src0, _.KRCWM:$mask), Ins),
283                          !con((ins _.KRCWM:$mask), Ins),
284                          OpcodeStr, AttSrcAsm, IntelSrcAsm, RHS,
285                          (Select _.KRCWM:$mask, RHS, _.RC:$src0),
286                          Select, "$src0 = $dst", IsCommutable, IsKCommutable,
287                          IsKZCommutable>;
288
289// This multiclass generates the unconditional/non-masking, the masking and
290// the zero-masking variant of the scalar instruction.
291multiclass AVX512_maskable_scalar<bits<8> O, Format F, X86VectorVTInfo _,
292                           dag Outs, dag Ins, string OpcodeStr,
293                           string AttSrcAsm, string IntelSrcAsm,
294                           dag RHS> :
295   AVX512_maskable<O, F, _, Outs, Ins, OpcodeStr, AttSrcAsm, IntelSrcAsm,
296                   RHS, 0, 0, 0, X86selects_mask>;
297
298// Similar to AVX512_maskable but in this case one of the source operands
299// ($src1) is already tied to $dst so we just use that for the preserved
300// vector elements.  NOTE that the NonTiedIns (the ins dag) should exclude
301// $src1.
302multiclass AVX512_maskable_3src<bits<8> O, Format F, X86VectorVTInfo _,
303                                dag Outs, dag NonTiedIns, string OpcodeStr,
304                                string AttSrcAsm, string IntelSrcAsm,
305                                dag RHS,
306                                bit IsCommutable = 0,
307                                bit IsKCommutable = 0,
308                                SDPatternOperator Select = vselect_mask,
309                                bit MaskOnly = 0> :
310   AVX512_maskable_common<O, F, _, Outs,
311                          !con((ins _.RC:$src1), NonTiedIns),
312                          !con((ins _.RC:$src1, _.KRCWM:$mask), NonTiedIns),
313                          !con((ins _.RC:$src1, _.KRCWM:$mask), NonTiedIns),
314                          OpcodeStr, AttSrcAsm, IntelSrcAsm,
315                          !if(MaskOnly, (null_frag), RHS),
316                          (Select _.KRCWM:$mask, RHS, _.RC:$src1),
317                          Select, "", IsCommutable, IsKCommutable>;
318
319// Similar to AVX512_maskable_3src but in this case the input VT for the tied
320// operand differs from the output VT. This requires a bitconvert on
321// the preserved vector going into the vselect.
322// NOTE: The unmasked pattern is disabled.
323multiclass AVX512_maskable_3src_cast<bits<8> O, Format F, X86VectorVTInfo OutVT,
324                                     X86VectorVTInfo InVT,
325                                     dag Outs, dag NonTiedIns, string OpcodeStr,
326                                     string AttSrcAsm, string IntelSrcAsm,
327                                     dag RHS, bit IsCommutable = 0> :
328   AVX512_maskable_common<O, F, OutVT, Outs,
329                          !con((ins InVT.RC:$src1), NonTiedIns),
330                          !con((ins InVT.RC:$src1, InVT.KRCWM:$mask), NonTiedIns),
331                          !con((ins InVT.RC:$src1, InVT.KRCWM:$mask), NonTiedIns),
332                          OpcodeStr, AttSrcAsm, IntelSrcAsm, (null_frag),
333                          (vselect_mask InVT.KRCWM:$mask, RHS,
334                           (bitconvert InVT.RC:$src1)),
335                           vselect_mask, "", IsCommutable>;
336
337multiclass AVX512_maskable_3src_scalar<bits<8> O, Format F, X86VectorVTInfo _,
338                                     dag Outs, dag NonTiedIns, string OpcodeStr,
339                                     string AttSrcAsm, string IntelSrcAsm,
340                                     dag RHS,
341                                     bit IsCommutable = 0,
342                                     bit IsKCommutable = 0,
343                                     bit MaskOnly = 0> :
344   AVX512_maskable_3src<O, F, _, Outs, NonTiedIns, OpcodeStr, AttSrcAsm,
345                        IntelSrcAsm, RHS, IsCommutable, IsKCommutable,
346                        X86selects_mask, MaskOnly>;
347
348multiclass AVX512_maskable_in_asm<bits<8> O, Format F, X86VectorVTInfo _,
349                                  dag Outs, dag Ins,
350                                  string OpcodeStr,
351                                  string AttSrcAsm, string IntelSrcAsm,
352                                  list<dag> Pattern> :
353   AVX512_maskable_custom<O, F, Outs, Ins,
354                          !con((ins _.RC:$src0, _.KRCWM:$mask), Ins),
355                          !con((ins _.KRCWM:$mask), Ins),
356                          OpcodeStr, AttSrcAsm, IntelSrcAsm, Pattern, [], [],
357                          "$src0 = $dst">;
358
359multiclass AVX512_maskable_3src_in_asm<bits<8> O, Format F, X86VectorVTInfo _,
360                                       dag Outs, dag NonTiedIns,
361                                       string OpcodeStr,
362                                       string AttSrcAsm, string IntelSrcAsm,
363                                       list<dag> Pattern> :
364   AVX512_maskable_custom<O, F, Outs,
365                          !con((ins _.RC:$src1), NonTiedIns),
366                          !con((ins _.RC:$src1, _.KRCWM:$mask), NonTiedIns),
367                          !con((ins _.RC:$src1, _.KRCWM:$mask), NonTiedIns),
368                          OpcodeStr, AttSrcAsm, IntelSrcAsm, Pattern, [], [],
369                          "">;
370
371// Instruction with mask that puts result in mask register,
372// like "compare" and "vptest"
373multiclass AVX512_maskable_custom_cmp<bits<8> O, Format F,
374                                  dag Outs,
375                                  dag Ins, dag MaskingIns,
376                                  string OpcodeStr,
377                                  string AttSrcAsm, string IntelSrcAsm,
378                                  list<dag> Pattern,
379                                  list<dag> MaskingPattern,
380                                  bit IsCommutable = 0> {
381    let isCommutable = IsCommutable in {
382    def NAME: AVX512<O, F, Outs, Ins,
383                       OpcodeStr#"\t{"#AttSrcAsm#", $dst|"#
384                                     "$dst, "#IntelSrcAsm#"}",
385                       Pattern>;
386
387    def NAME#k: AVX512<O, F, Outs, MaskingIns,
388                       OpcodeStr#"\t{"#AttSrcAsm#", $dst {${mask}}|"#
389                                     "$dst {${mask}}, "#IntelSrcAsm#"}",
390                       MaskingPattern>, EVEX_K;
391    }
392}
393
394multiclass AVX512_maskable_common_cmp<bits<8> O, Format F, X86VectorVTInfo _,
395                                  dag Outs,
396                                  dag Ins, dag MaskingIns,
397                                  string OpcodeStr,
398                                  string AttSrcAsm, string IntelSrcAsm,
399                                  dag RHS, dag MaskingRHS,
400                                  bit IsCommutable = 0> :
401  AVX512_maskable_custom_cmp<O, F, Outs, Ins, MaskingIns, OpcodeStr,
402                         AttSrcAsm, IntelSrcAsm,
403                         [(set _.KRC:$dst, RHS)],
404                         [(set _.KRC:$dst, MaskingRHS)], IsCommutable>;
405
406multiclass AVX512_maskable_cmp<bits<8> O, Format F, X86VectorVTInfo _,
407                           dag Outs, dag Ins, string OpcodeStr,
408                           string AttSrcAsm, string IntelSrcAsm,
409                           dag RHS, dag RHS_su, bit IsCommutable = 0> :
410   AVX512_maskable_common_cmp<O, F, _, Outs, Ins,
411                          !con((ins _.KRCWM:$mask), Ins),
412                          OpcodeStr, AttSrcAsm, IntelSrcAsm, RHS,
413                          (and _.KRCWM:$mask, RHS_su), IsCommutable>;
414
415// Used by conversion instructions.
416multiclass AVX512_maskable_cvt<bits<8> O, Format F, X86VectorVTInfo _,
417                                  dag Outs,
418                                  dag Ins, dag MaskingIns, dag ZeroMaskingIns,
419                                  string OpcodeStr,
420                                  string AttSrcAsm, string IntelSrcAsm,
421                                  dag RHS, dag MaskingRHS, dag ZeroMaskingRHS> :
422  AVX512_maskable_custom<O, F, Outs, Ins, MaskingIns, ZeroMaskingIns, OpcodeStr,
423                         AttSrcAsm, IntelSrcAsm,
424                         [(set _.RC:$dst, RHS)],
425                         [(set _.RC:$dst, MaskingRHS)],
426                         [(set _.RC:$dst, ZeroMaskingRHS)],
427                         "$src0 = $dst">;
428
429multiclass AVX512_maskable_fma<bits<8> O, Format F, X86VectorVTInfo _,
430                               dag Outs, dag NonTiedIns, string OpcodeStr,
431                               string AttSrcAsm, string IntelSrcAsm,
432                               dag RHS, dag MaskingRHS, bit IsCommutable,
433                               bit IsKCommutable> :
434   AVX512_maskable_custom<O, F, Outs,
435                          !con((ins _.RC:$src1), NonTiedIns),
436                          !con((ins _.RC:$src1, _.KRCWM:$mask), NonTiedIns),
437                          !con((ins _.RC:$src1, _.KRCWM:$mask), NonTiedIns),
438                          OpcodeStr, AttSrcAsm, IntelSrcAsm,
439                          [(set _.RC:$dst, RHS)],
440                          [(set _.RC:$dst,
441                            (vselect_mask _.KRCWM:$mask, MaskingRHS, _.RC:$src1))],
442                          [(set _.RC:$dst,
443                            (vselect_mask _.KRCWM:$mask, MaskingRHS, _.ImmAllZerosV))],
444                          "", IsCommutable, IsKCommutable>;
445
446// Alias instruction that maps zero vector to pxor / xorp* for AVX-512.
447// This is expanded by ExpandPostRAPseudos to an xorps / vxorps, and then
448// swizzled by ExecutionDomainFix to pxor.
449// We set canFoldAsLoad because this can be converted to a constant-pool
450// load of an all-zeros value if folding it would be beneficial.
451let isReMaterializable = 1, isAsCheapAsAMove = 1, canFoldAsLoad = 1,
452    isPseudo = 1, Predicates = [HasAVX512], SchedRW = [WriteZero] in {
453def AVX512_512_SET0 : I<0, Pseudo, (outs VR512:$dst), (ins), "",
454               [(set VR512:$dst, (v16i32 immAllZerosV))]>;
455def AVX512_512_SETALLONES : I<0, Pseudo, (outs VR512:$dst), (ins), "",
456               [(set VR512:$dst, (v16i32 immAllOnesV))]>;
457}
458
459let Predicates = [HasAVX512] in {
460def : Pat<(v64i8 immAllZerosV), (AVX512_512_SET0)>;
461def : Pat<(v32i16 immAllZerosV), (AVX512_512_SET0)>;
462def : Pat<(v8i64 immAllZerosV), (AVX512_512_SET0)>;
463def : Pat<(v16f32 immAllZerosV), (AVX512_512_SET0)>;
464def : Pat<(v8f64 immAllZerosV), (AVX512_512_SET0)>;
465}
466
467// Alias instructions that allow VPTERNLOG to be used with a mask to create
468// a mix of all ones and all zeros elements. This is done this way to force
469// the same register to be used as input for all three sources.
470let isPseudo = 1, Predicates = [HasAVX512], SchedRW = [WriteVecALU] in {
471def AVX512_512_SEXT_MASK_32 : I<0, Pseudo, (outs VR512:$dst),
472                                (ins VK16WM:$mask), "",
473                           [(set VR512:$dst, (vselect (v16i1 VK16WM:$mask),
474                                                      (v16i32 immAllOnesV),
475                                                      (v16i32 immAllZerosV)))]>;
476def AVX512_512_SEXT_MASK_64 : I<0, Pseudo, (outs VR512:$dst),
477                                (ins VK8WM:$mask), "",
478                [(set VR512:$dst, (vselect (v8i1 VK8WM:$mask),
479                                           (v8i64 immAllOnesV),
480                                           (v8i64 immAllZerosV)))]>;
481}
482
483let isReMaterializable = 1, isAsCheapAsAMove = 1, canFoldAsLoad = 1,
484    isPseudo = 1, Predicates = [HasAVX512], SchedRW = [WriteZero] in {
485def AVX512_128_SET0 : I<0, Pseudo, (outs VR128X:$dst), (ins), "",
486               [(set VR128X:$dst, (v4i32 immAllZerosV))]>;
487def AVX512_256_SET0 : I<0, Pseudo, (outs VR256X:$dst), (ins), "",
488               [(set VR256X:$dst, (v8i32 immAllZerosV))]>;
489}
490
491let Predicates = [HasAVX512] in {
492def : Pat<(v8i16 immAllZerosV), (AVX512_128_SET0)>;
493def : Pat<(v16i8 immAllZerosV), (AVX512_128_SET0)>;
494def : Pat<(v2i64 immAllZerosV), (AVX512_128_SET0)>;
495def : Pat<(v4f32 immAllZerosV), (AVX512_128_SET0)>;
496def : Pat<(v2f64 immAllZerosV), (AVX512_128_SET0)>;
497def : Pat<(v32i8 immAllZerosV), (AVX512_256_SET0)>;
498def : Pat<(v16i16 immAllZerosV), (AVX512_256_SET0)>;
499def : Pat<(v4i64 immAllZerosV), (AVX512_256_SET0)>;
500def : Pat<(v8f32 immAllZerosV), (AVX512_256_SET0)>;
501def : Pat<(v4f64 immAllZerosV), (AVX512_256_SET0)>;
502}
503
504// Alias instructions that map fld0 to xorps for sse or vxorps for avx.
505// This is expanded by ExpandPostRAPseudos.
506let isReMaterializable = 1, isAsCheapAsAMove = 1, canFoldAsLoad = 1,
507    isPseudo = 1, SchedRW = [WriteZero], Predicates = [HasAVX512] in {
508  def AVX512_FsFLD0SS : I<0, Pseudo, (outs FR32X:$dst), (ins), "",
509                          [(set FR32X:$dst, fp32imm0)]>;
510  def AVX512_FsFLD0SD : I<0, Pseudo, (outs FR64X:$dst), (ins), "",
511                          [(set FR64X:$dst, fp64imm0)]>;
512  def AVX512_FsFLD0F128 : I<0, Pseudo, (outs VR128X:$dst), (ins), "",
513                            [(set VR128X:$dst, fp128imm0)]>;
514}
515
516//===----------------------------------------------------------------------===//
517// AVX-512 - VECTOR INSERT
518//
519
520// Supports two different pattern operators for mask and unmasked ops. Allows
521// null_frag to be passed for one.
522multiclass vinsert_for_size_split<int Opcode, X86VectorVTInfo From,
523                                  X86VectorVTInfo To,
524                                  SDPatternOperator vinsert_insert,
525                                  SDPatternOperator vinsert_for_mask,
526                                  X86FoldableSchedWrite sched> {
527  let hasSideEffects = 0, ExeDomain = To.ExeDomain in {
528    defm rr : AVX512_maskable_split<Opcode, MRMSrcReg, To, (outs To.RC:$dst),
529                   (ins To.RC:$src1, From.RC:$src2, u8imm:$src3),
530                   "vinsert" # From.EltTypeName # "x" # From.NumElts,
531                   "$src3, $src2, $src1", "$src1, $src2, $src3",
532                   (vinsert_insert:$src3 (To.VT To.RC:$src1),
533                                         (From.VT From.RC:$src2),
534                                         (iPTR imm)),
535                   (vinsert_for_mask:$src3 (To.VT To.RC:$src1),
536                                           (From.VT From.RC:$src2),
537                                           (iPTR imm))>,
538                   AVX512AIi8Base, EVEX_4V, Sched<[sched]>;
539    let mayLoad = 1 in
540    defm rm : AVX512_maskable_split<Opcode, MRMSrcMem, To, (outs To.RC:$dst),
541                   (ins To.RC:$src1, From.MemOp:$src2, u8imm:$src3),
542                   "vinsert" # From.EltTypeName # "x" # From.NumElts,
543                   "$src3, $src2, $src1", "$src1, $src2, $src3",
544                   (vinsert_insert:$src3 (To.VT To.RC:$src1),
545                               (From.VT (From.LdFrag addr:$src2)),
546                               (iPTR imm)),
547                   (vinsert_for_mask:$src3 (To.VT To.RC:$src1),
548                               (From.VT (From.LdFrag addr:$src2)),
549                               (iPTR imm))>, AVX512AIi8Base, EVEX_4V,
550                   EVEX_CD8<From.EltSize, From.CD8TupleForm>,
551                   Sched<[sched.Folded, sched.ReadAfterFold]>;
552  }
553}
554
555// Passes the same pattern operator for masked and unmasked ops.
556multiclass vinsert_for_size<int Opcode, X86VectorVTInfo From,
557                            X86VectorVTInfo To,
558                            SDPatternOperator vinsert_insert,
559                            X86FoldableSchedWrite sched> :
560  vinsert_for_size_split<Opcode, From, To, vinsert_insert, vinsert_insert, sched>;
561
562multiclass vinsert_for_size_lowering<string InstrStr, X86VectorVTInfo From,
563                       X86VectorVTInfo To, PatFrag vinsert_insert,
564                       SDNodeXForm INSERT_get_vinsert_imm , list<Predicate> p> {
565  let Predicates = p in {
566    def : Pat<(vinsert_insert:$ins
567                     (To.VT To.RC:$src1), (From.VT From.RC:$src2), (iPTR imm)),
568              (To.VT (!cast<Instruction>(InstrStr#"rr")
569                     To.RC:$src1, From.RC:$src2,
570                     (INSERT_get_vinsert_imm To.RC:$ins)))>;
571
572    def : Pat<(vinsert_insert:$ins
573                  (To.VT To.RC:$src1),
574                  (From.VT (From.LdFrag addr:$src2)),
575                  (iPTR imm)),
576              (To.VT (!cast<Instruction>(InstrStr#"rm")
577                  To.RC:$src1, addr:$src2,
578                  (INSERT_get_vinsert_imm To.RC:$ins)))>;
579  }
580}
581
582multiclass vinsert_for_type<ValueType EltVT32, int Opcode128,
583                            ValueType EltVT64, int Opcode256,
584                            X86FoldableSchedWrite sched> {
585
586  let Predicates = [HasVLX] in
587    defm NAME # "32x4Z256" : vinsert_for_size<Opcode128,
588                                 X86VectorVTInfo< 4, EltVT32, VR128X>,
589                                 X86VectorVTInfo< 8, EltVT32, VR256X>,
590                                 vinsert128_insert, sched>, EVEX_V256;
591
592  defm NAME # "32x4Z" : vinsert_for_size<Opcode128,
593                                 X86VectorVTInfo< 4, EltVT32, VR128X>,
594                                 X86VectorVTInfo<16, EltVT32, VR512>,
595                                 vinsert128_insert, sched>, EVEX_V512;
596
597  defm NAME # "64x4Z" : vinsert_for_size<Opcode256,
598                                 X86VectorVTInfo< 4, EltVT64, VR256X>,
599                                 X86VectorVTInfo< 8, EltVT64, VR512>,
600                                 vinsert256_insert, sched>, VEX_W, EVEX_V512;
601
602  // Even with DQI we'd like to only use these instructions for masking.
603  let Predicates = [HasVLX, HasDQI] in
604    defm NAME # "64x2Z256" : vinsert_for_size_split<Opcode128,
605                                   X86VectorVTInfo< 2, EltVT64, VR128X>,
606                                   X86VectorVTInfo< 4, EltVT64, VR256X>,
607                                   null_frag, vinsert128_insert, sched>,
608                                   VEX_W1X, EVEX_V256;
609
610  // Even with DQI we'd like to only use these instructions for masking.
611  let Predicates = [HasDQI] in {
612    defm NAME # "64x2Z" : vinsert_for_size_split<Opcode128,
613                                 X86VectorVTInfo< 2, EltVT64, VR128X>,
614                                 X86VectorVTInfo< 8, EltVT64, VR512>,
615                                 null_frag, vinsert128_insert, sched>,
616                                 VEX_W, EVEX_V512;
617
618    defm NAME # "32x8Z" : vinsert_for_size_split<Opcode256,
619                                   X86VectorVTInfo< 8, EltVT32, VR256X>,
620                                   X86VectorVTInfo<16, EltVT32, VR512>,
621                                   null_frag, vinsert256_insert, sched>,
622                                   EVEX_V512;
623  }
624}
625
626// FIXME: Is there a better scheduler class for VINSERTF/VINSERTI?
627defm VINSERTF : vinsert_for_type<f32, 0x18, f64, 0x1a, WriteFShuffle256>;
628defm VINSERTI : vinsert_for_type<i32, 0x38, i64, 0x3a, WriteShuffle256>;
629
630// Codegen pattern with the alternative types,
631// Even with AVX512DQ we'll still use these for unmasked operations.
632defm : vinsert_for_size_lowering<"VINSERTF32x4Z256", v2f64x_info, v4f64x_info,
633              vinsert128_insert, INSERT_get_vinsert128_imm, [HasVLX]>;
634defm : vinsert_for_size_lowering<"VINSERTI32x4Z256", v2i64x_info, v4i64x_info,
635              vinsert128_insert, INSERT_get_vinsert128_imm, [HasVLX]>;
636
637defm : vinsert_for_size_lowering<"VINSERTF32x4Z", v2f64x_info, v8f64_info,
638              vinsert128_insert, INSERT_get_vinsert128_imm, [HasAVX512]>;
639defm : vinsert_for_size_lowering<"VINSERTI32x4Z", v2i64x_info, v8i64_info,
640              vinsert128_insert, INSERT_get_vinsert128_imm, [HasAVX512]>;
641
642defm : vinsert_for_size_lowering<"VINSERTF64x4Z", v8f32x_info, v16f32_info,
643              vinsert256_insert, INSERT_get_vinsert256_imm, [HasAVX512]>;
644defm : vinsert_for_size_lowering<"VINSERTI64x4Z", v8i32x_info, v16i32_info,
645              vinsert256_insert, INSERT_get_vinsert256_imm, [HasAVX512]>;
646
647// Codegen pattern with the alternative types insert VEC128 into VEC256
648defm : vinsert_for_size_lowering<"VINSERTI32x4Z256", v8i16x_info, v16i16x_info,
649              vinsert128_insert, INSERT_get_vinsert128_imm, [HasVLX]>;
650defm : vinsert_for_size_lowering<"VINSERTI32x4Z256", v16i8x_info, v32i8x_info,
651              vinsert128_insert, INSERT_get_vinsert128_imm, [HasVLX]>;
652// Codegen pattern with the alternative types insert VEC128 into VEC512
653defm : vinsert_for_size_lowering<"VINSERTI32x4Z", v8i16x_info, v32i16_info,
654              vinsert128_insert, INSERT_get_vinsert128_imm, [HasAVX512]>;
655defm : vinsert_for_size_lowering<"VINSERTI32x4Z", v16i8x_info, v64i8_info,
656               vinsert128_insert, INSERT_get_vinsert128_imm, [HasAVX512]>;
657// Codegen pattern with the alternative types insert VEC256 into VEC512
658defm : vinsert_for_size_lowering<"VINSERTI64x4Z", v16i16x_info, v32i16_info,
659              vinsert256_insert, INSERT_get_vinsert256_imm, [HasAVX512]>;
660defm : vinsert_for_size_lowering<"VINSERTI64x4Z", v32i8x_info, v64i8_info,
661              vinsert256_insert, INSERT_get_vinsert256_imm, [HasAVX512]>;
662
663
664multiclass vinsert_for_mask_cast<string InstrStr, X86VectorVTInfo From,
665                                 X86VectorVTInfo To, X86VectorVTInfo Cast,
666                                 PatFrag vinsert_insert,
667                                 SDNodeXForm INSERT_get_vinsert_imm,
668                                 list<Predicate> p> {
669let Predicates = p in {
670  def : Pat<(Cast.VT
671             (vselect_mask Cast.KRCWM:$mask,
672                           (bitconvert
673                            (vinsert_insert:$ins (To.VT To.RC:$src1),
674                                                 (From.VT From.RC:$src2),
675                                                 (iPTR imm))),
676                           Cast.RC:$src0)),
677            (!cast<Instruction>(InstrStr#"rrk")
678             Cast.RC:$src0, Cast.KRCWM:$mask, To.RC:$src1, From.RC:$src2,
679             (INSERT_get_vinsert_imm To.RC:$ins))>;
680  def : Pat<(Cast.VT
681             (vselect_mask Cast.KRCWM:$mask,
682                           (bitconvert
683                            (vinsert_insert:$ins (To.VT To.RC:$src1),
684                                                 (From.VT
685                                                  (bitconvert
686                                                   (From.LdFrag addr:$src2))),
687                                                 (iPTR imm))),
688                           Cast.RC:$src0)),
689            (!cast<Instruction>(InstrStr#"rmk")
690             Cast.RC:$src0, Cast.KRCWM:$mask, To.RC:$src1, addr:$src2,
691             (INSERT_get_vinsert_imm To.RC:$ins))>;
692
693  def : Pat<(Cast.VT
694             (vselect_mask Cast.KRCWM:$mask,
695                           (bitconvert
696                            (vinsert_insert:$ins (To.VT To.RC:$src1),
697                                                 (From.VT From.RC:$src2),
698                                                 (iPTR imm))),
699                           Cast.ImmAllZerosV)),
700            (!cast<Instruction>(InstrStr#"rrkz")
701             Cast.KRCWM:$mask, To.RC:$src1, From.RC:$src2,
702             (INSERT_get_vinsert_imm To.RC:$ins))>;
703  def : Pat<(Cast.VT
704             (vselect_mask Cast.KRCWM:$mask,
705                           (bitconvert
706                            (vinsert_insert:$ins (To.VT To.RC:$src1),
707                                                 (From.VT (From.LdFrag addr:$src2)),
708                                                 (iPTR imm))),
709                           Cast.ImmAllZerosV)),
710            (!cast<Instruction>(InstrStr#"rmkz")
711             Cast.KRCWM:$mask, To.RC:$src1, addr:$src2,
712             (INSERT_get_vinsert_imm To.RC:$ins))>;
713}
714}
715
716defm : vinsert_for_mask_cast<"VINSERTF32x4Z256", v2f64x_info, v4f64x_info,
717                             v8f32x_info, vinsert128_insert,
718                             INSERT_get_vinsert128_imm, [HasVLX]>;
719defm : vinsert_for_mask_cast<"VINSERTF64x2Z256", v4f32x_info, v8f32x_info,
720                             v4f64x_info, vinsert128_insert,
721                             INSERT_get_vinsert128_imm, [HasDQI, HasVLX]>;
722
723defm : vinsert_for_mask_cast<"VINSERTI32x4Z256", v2i64x_info, v4i64x_info,
724                             v8i32x_info, vinsert128_insert,
725                             INSERT_get_vinsert128_imm, [HasVLX]>;
726defm : vinsert_for_mask_cast<"VINSERTI32x4Z256", v8i16x_info, v16i16x_info,
727                             v8i32x_info, vinsert128_insert,
728                             INSERT_get_vinsert128_imm, [HasVLX]>;
729defm : vinsert_for_mask_cast<"VINSERTI32x4Z256", v16i8x_info, v32i8x_info,
730                             v8i32x_info, vinsert128_insert,
731                             INSERT_get_vinsert128_imm, [HasVLX]>;
732defm : vinsert_for_mask_cast<"VINSERTF64x2Z256", v4i32x_info, v8i32x_info,
733                             v4i64x_info, vinsert128_insert,
734                             INSERT_get_vinsert128_imm, [HasDQI, HasVLX]>;
735defm : vinsert_for_mask_cast<"VINSERTF64x2Z256", v8i16x_info, v16i16x_info,
736                             v4i64x_info, vinsert128_insert,
737                             INSERT_get_vinsert128_imm, [HasDQI, HasVLX]>;
738defm : vinsert_for_mask_cast<"VINSERTF64x2Z256", v16i8x_info, v32i8x_info,
739                             v4i64x_info, vinsert128_insert,
740                             INSERT_get_vinsert128_imm, [HasDQI, HasVLX]>;
741
742defm : vinsert_for_mask_cast<"VINSERTF32x4Z", v2f64x_info, v8f64_info,
743                             v16f32_info, vinsert128_insert,
744                             INSERT_get_vinsert128_imm, [HasAVX512]>;
745defm : vinsert_for_mask_cast<"VINSERTF64x2Z", v4f32x_info, v16f32_info,
746                             v8f64_info, vinsert128_insert,
747                             INSERT_get_vinsert128_imm, [HasDQI]>;
748
749defm : vinsert_for_mask_cast<"VINSERTI32x4Z", v2i64x_info, v8i64_info,
750                             v16i32_info, vinsert128_insert,
751                             INSERT_get_vinsert128_imm, [HasAVX512]>;
752defm : vinsert_for_mask_cast<"VINSERTI32x4Z", v8i16x_info, v32i16_info,
753                             v16i32_info, vinsert128_insert,
754                             INSERT_get_vinsert128_imm, [HasAVX512]>;
755defm : vinsert_for_mask_cast<"VINSERTI32x4Z", v16i8x_info, v64i8_info,
756                             v16i32_info, vinsert128_insert,
757                             INSERT_get_vinsert128_imm, [HasAVX512]>;
758defm : vinsert_for_mask_cast<"VINSERTI64x2Z", v4i32x_info, v16i32_info,
759                             v8i64_info, vinsert128_insert,
760                             INSERT_get_vinsert128_imm, [HasDQI]>;
761defm : vinsert_for_mask_cast<"VINSERTI64x2Z", v8i16x_info, v32i16_info,
762                             v8i64_info, vinsert128_insert,
763                             INSERT_get_vinsert128_imm, [HasDQI]>;
764defm : vinsert_for_mask_cast<"VINSERTI64x2Z", v16i8x_info, v64i8_info,
765                             v8i64_info, vinsert128_insert,
766                             INSERT_get_vinsert128_imm, [HasDQI]>;
767
768defm : vinsert_for_mask_cast<"VINSERTF32x8Z", v4f64x_info, v8f64_info,
769                             v16f32_info, vinsert256_insert,
770                             INSERT_get_vinsert256_imm, [HasDQI]>;
771defm : vinsert_for_mask_cast<"VINSERTF64x4Z", v8f32x_info, v16f32_info,
772                             v8f64_info, vinsert256_insert,
773                             INSERT_get_vinsert256_imm, [HasAVX512]>;
774
775defm : vinsert_for_mask_cast<"VINSERTI32x8Z", v4i64x_info, v8i64_info,
776                             v16i32_info, vinsert256_insert,
777                             INSERT_get_vinsert256_imm, [HasDQI]>;
778defm : vinsert_for_mask_cast<"VINSERTI32x8Z", v16i16x_info, v32i16_info,
779                             v16i32_info, vinsert256_insert,
780                             INSERT_get_vinsert256_imm, [HasDQI]>;
781defm : vinsert_for_mask_cast<"VINSERTI32x8Z", v32i8x_info, v64i8_info,
782                             v16i32_info, vinsert256_insert,
783                             INSERT_get_vinsert256_imm, [HasDQI]>;
784defm : vinsert_for_mask_cast<"VINSERTI64x4Z", v8i32x_info, v16i32_info,
785                             v8i64_info, vinsert256_insert,
786                             INSERT_get_vinsert256_imm, [HasAVX512]>;
787defm : vinsert_for_mask_cast<"VINSERTI64x4Z", v16i16x_info, v32i16_info,
788                             v8i64_info, vinsert256_insert,
789                             INSERT_get_vinsert256_imm, [HasAVX512]>;
790defm : vinsert_for_mask_cast<"VINSERTI64x4Z", v32i8x_info, v64i8_info,
791                             v8i64_info, vinsert256_insert,
792                             INSERT_get_vinsert256_imm, [HasAVX512]>;
793
794// vinsertps - insert f32 to XMM
795let ExeDomain = SSEPackedSingle in {
796let isCommutable = 1 in
797def VINSERTPSZrr : AVX512AIi8<0x21, MRMSrcReg, (outs VR128X:$dst),
798      (ins VR128X:$src1, VR128X:$src2, u8imm:$src3),
799      "vinsertps\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}",
800      [(set VR128X:$dst, (X86insertps VR128X:$src1, VR128X:$src2, timm:$src3))]>,
801      EVEX_4V, Sched<[SchedWriteFShuffle.XMM]>;
802def VINSERTPSZrm: AVX512AIi8<0x21, MRMSrcMem, (outs VR128X:$dst),
803      (ins VR128X:$src1, f32mem:$src2, u8imm:$src3),
804      "vinsertps\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}",
805      [(set VR128X:$dst, (X86insertps VR128X:$src1,
806                          (v4f32 (scalar_to_vector (loadf32 addr:$src2))),
807                          timm:$src3))]>,
808      EVEX_4V, EVEX_CD8<32, CD8VT1>,
809      Sched<[SchedWriteFShuffle.XMM.Folded, SchedWriteFShuffle.XMM.ReadAfterFold]>;
810}
811
812//===----------------------------------------------------------------------===//
813// AVX-512 VECTOR EXTRACT
814//---
815
816// Supports two different pattern operators for mask and unmasked ops. Allows
817// null_frag to be passed for one.
818multiclass vextract_for_size_split<int Opcode,
819                                   X86VectorVTInfo From, X86VectorVTInfo To,
820                                   SDPatternOperator vextract_extract,
821                                   SDPatternOperator vextract_for_mask,
822                                   SchedWrite SchedRR, SchedWrite SchedMR> {
823
824  let hasSideEffects = 0, ExeDomain = To.ExeDomain in {
825    defm rr : AVX512_maskable_split<Opcode, MRMDestReg, To, (outs To.RC:$dst),
826                (ins From.RC:$src1, u8imm:$idx),
827                "vextract" # To.EltTypeName # "x" # To.NumElts,
828                "$idx, $src1", "$src1, $idx",
829                (vextract_extract:$idx (From.VT From.RC:$src1), (iPTR imm)),
830                (vextract_for_mask:$idx (From.VT From.RC:$src1), (iPTR imm))>,
831                AVX512AIi8Base, EVEX, Sched<[SchedRR]>;
832
833    def mr  : AVX512AIi8<Opcode, MRMDestMem, (outs),
834                    (ins To.MemOp:$dst, From.RC:$src1, u8imm:$idx),
835                    "vextract" # To.EltTypeName # "x" # To.NumElts #
836                        "\t{$idx, $src1, $dst|$dst, $src1, $idx}",
837                    [(store (To.VT (vextract_extract:$idx
838                                    (From.VT From.RC:$src1), (iPTR imm))),
839                             addr:$dst)]>, EVEX,
840                    Sched<[SchedMR]>;
841
842    let mayStore = 1, hasSideEffects = 0 in
843    def mrk : AVX512AIi8<Opcode, MRMDestMem, (outs),
844                    (ins To.MemOp:$dst, To.KRCWM:$mask,
845                                        From.RC:$src1, u8imm:$idx),
846                     "vextract" # To.EltTypeName # "x" # To.NumElts #
847                          "\t{$idx, $src1, $dst {${mask}}|"
848                          "$dst {${mask}}, $src1, $idx}", []>,
849                    EVEX_K, EVEX, Sched<[SchedMR]>, NotMemoryFoldable;
850  }
851}
852
853// Passes the same pattern operator for masked and unmasked ops.
854multiclass vextract_for_size<int Opcode, X86VectorVTInfo From,
855                             X86VectorVTInfo To,
856                             SDPatternOperator vextract_extract,
857                             SchedWrite SchedRR, SchedWrite SchedMR> :
858  vextract_for_size_split<Opcode, From, To, vextract_extract, vextract_extract, SchedRR, SchedMR>;
859
860// Codegen pattern for the alternative types
861multiclass vextract_for_size_lowering<string InstrStr, X86VectorVTInfo From,
862                X86VectorVTInfo To, PatFrag vextract_extract,
863                SDNodeXForm EXTRACT_get_vextract_imm, list<Predicate> p> {
864  let Predicates = p in {
865     def : Pat<(vextract_extract:$ext (From.VT From.RC:$src1), (iPTR imm)),
866               (To.VT (!cast<Instruction>(InstrStr#"rr")
867                          From.RC:$src1,
868                          (EXTRACT_get_vextract_imm To.RC:$ext)))>;
869     def : Pat<(store (To.VT (vextract_extract:$ext (From.VT From.RC:$src1),
870                              (iPTR imm))), addr:$dst),
871               (!cast<Instruction>(InstrStr#"mr") addr:$dst, From.RC:$src1,
872                (EXTRACT_get_vextract_imm To.RC:$ext))>;
873  }
874}
875
876multiclass vextract_for_type<ValueType EltVT32, int Opcode128,
877                             ValueType EltVT64, int Opcode256,
878                             SchedWrite SchedRR, SchedWrite SchedMR> {
879  let Predicates = [HasAVX512] in {
880    defm NAME # "32x4Z" : vextract_for_size<Opcode128,
881                                   X86VectorVTInfo<16, EltVT32, VR512>,
882                                   X86VectorVTInfo< 4, EltVT32, VR128X>,
883                                   vextract128_extract, SchedRR, SchedMR>,
884                                       EVEX_V512, EVEX_CD8<32, CD8VT4>;
885    defm NAME # "64x4Z" : vextract_for_size<Opcode256,
886                                   X86VectorVTInfo< 8, EltVT64, VR512>,
887                                   X86VectorVTInfo< 4, EltVT64, VR256X>,
888                                   vextract256_extract, SchedRR, SchedMR>,
889                                       VEX_W, EVEX_V512, EVEX_CD8<64, CD8VT4>;
890  }
891  let Predicates = [HasVLX] in
892    defm NAME # "32x4Z256" : vextract_for_size<Opcode128,
893                                 X86VectorVTInfo< 8, EltVT32, VR256X>,
894                                 X86VectorVTInfo< 4, EltVT32, VR128X>,
895                                 vextract128_extract, SchedRR, SchedMR>,
896                                     EVEX_V256, EVEX_CD8<32, CD8VT4>;
897
898  // Even with DQI we'd like to only use these instructions for masking.
899  let Predicates = [HasVLX, HasDQI] in
900    defm NAME # "64x2Z256" : vextract_for_size_split<Opcode128,
901                                 X86VectorVTInfo< 4, EltVT64, VR256X>,
902                                 X86VectorVTInfo< 2, EltVT64, VR128X>,
903                                 null_frag, vextract128_extract, SchedRR, SchedMR>,
904                                     VEX_W1X, EVEX_V256, EVEX_CD8<64, CD8VT2>;
905
906  // Even with DQI we'd like to only use these instructions for masking.
907  let Predicates = [HasDQI] in {
908    defm NAME # "64x2Z" : vextract_for_size_split<Opcode128,
909                                 X86VectorVTInfo< 8, EltVT64, VR512>,
910                                 X86VectorVTInfo< 2, EltVT64, VR128X>,
911                                 null_frag, vextract128_extract, SchedRR, SchedMR>,
912                                     VEX_W, EVEX_V512, EVEX_CD8<64, CD8VT2>;
913    defm NAME # "32x8Z" : vextract_for_size_split<Opcode256,
914                                 X86VectorVTInfo<16, EltVT32, VR512>,
915                                 X86VectorVTInfo< 8, EltVT32, VR256X>,
916                                 null_frag, vextract256_extract, SchedRR, SchedMR>,
917                                     EVEX_V512, EVEX_CD8<32, CD8VT8>;
918  }
919}
920
921// TODO - replace WriteFStore/WriteVecStore with X86SchedWriteMoveLSWidths types.
922defm VEXTRACTF : vextract_for_type<f32, 0x19, f64, 0x1b, WriteFShuffle256, WriteFStore>;
923defm VEXTRACTI : vextract_for_type<i32, 0x39, i64, 0x3b, WriteShuffle256, WriteVecStore>;
924
925// extract_subvector codegen patterns with the alternative types.
926// Even with AVX512DQ we'll still use these for unmasked operations.
927defm : vextract_for_size_lowering<"VEXTRACTF32x4Z", v8f64_info, v2f64x_info,
928          vextract128_extract, EXTRACT_get_vextract128_imm, [HasAVX512]>;
929defm : vextract_for_size_lowering<"VEXTRACTI32x4Z", v8i64_info, v2i64x_info,
930          vextract128_extract, EXTRACT_get_vextract128_imm, [HasAVX512]>;
931
932defm : vextract_for_size_lowering<"VEXTRACTF64x4Z", v16f32_info, v8f32x_info,
933          vextract256_extract, EXTRACT_get_vextract256_imm, [HasAVX512]>;
934defm : vextract_for_size_lowering<"VEXTRACTI64x4Z", v16i32_info, v8i32x_info,
935          vextract256_extract, EXTRACT_get_vextract256_imm, [HasAVX512]>;
936
937defm : vextract_for_size_lowering<"VEXTRACTF32x4Z256", v4f64x_info, v2f64x_info,
938          vextract128_extract, EXTRACT_get_vextract128_imm, [HasVLX]>;
939defm : vextract_for_size_lowering<"VEXTRACTI32x4Z256", v4i64x_info, v2i64x_info,
940          vextract128_extract, EXTRACT_get_vextract128_imm, [HasVLX]>;
941
942// Codegen pattern with the alternative types extract VEC128 from VEC256
943defm : vextract_for_size_lowering<"VEXTRACTI32x4Z256", v16i16x_info, v8i16x_info,
944          vextract128_extract, EXTRACT_get_vextract128_imm, [HasVLX]>;
945defm : vextract_for_size_lowering<"VEXTRACTI32x4Z256", v32i8x_info, v16i8x_info,
946          vextract128_extract, EXTRACT_get_vextract128_imm, [HasVLX]>;
947
948// Codegen pattern with the alternative types extract VEC128 from VEC512
949defm : vextract_for_size_lowering<"VEXTRACTI32x4Z", v32i16_info, v8i16x_info,
950                 vextract128_extract, EXTRACT_get_vextract128_imm, [HasAVX512]>;
951defm : vextract_for_size_lowering<"VEXTRACTI32x4Z", v64i8_info, v16i8x_info,
952                 vextract128_extract, EXTRACT_get_vextract128_imm, [HasAVX512]>;
953// Codegen pattern with the alternative types extract VEC256 from VEC512
954defm : vextract_for_size_lowering<"VEXTRACTI64x4Z", v32i16_info, v16i16x_info,
955                 vextract256_extract, EXTRACT_get_vextract256_imm, [HasAVX512]>;
956defm : vextract_for_size_lowering<"VEXTRACTI64x4Z", v64i8_info, v32i8x_info,
957                 vextract256_extract, EXTRACT_get_vextract256_imm, [HasAVX512]>;
958
959
960// A 128-bit extract from bits [255:128] of a 512-bit vector should use a
961// smaller extract to enable EVEX->VEX.
962let Predicates = [NoVLX] in {
963def : Pat<(v2i64 (extract_subvector (v8i64 VR512:$src), (iPTR 2))),
964          (v2i64 (VEXTRACTI128rr
965                  (v4i64 (EXTRACT_SUBREG (v8i64 VR512:$src), sub_ymm)),
966                  (iPTR 1)))>;
967def : Pat<(v2f64 (extract_subvector (v8f64 VR512:$src), (iPTR 2))),
968          (v2f64 (VEXTRACTF128rr
969                  (v4f64 (EXTRACT_SUBREG (v8f64 VR512:$src), sub_ymm)),
970                  (iPTR 1)))>;
971def : Pat<(v4i32 (extract_subvector (v16i32 VR512:$src), (iPTR 4))),
972          (v4i32 (VEXTRACTI128rr
973                  (v8i32 (EXTRACT_SUBREG (v16i32 VR512:$src), sub_ymm)),
974                  (iPTR 1)))>;
975def : Pat<(v4f32 (extract_subvector (v16f32 VR512:$src), (iPTR 4))),
976          (v4f32 (VEXTRACTF128rr
977                  (v8f32 (EXTRACT_SUBREG (v16f32 VR512:$src), sub_ymm)),
978                  (iPTR 1)))>;
979def : Pat<(v8i16 (extract_subvector (v32i16 VR512:$src), (iPTR 8))),
980          (v8i16 (VEXTRACTI128rr
981                  (v16i16 (EXTRACT_SUBREG (v32i16 VR512:$src), sub_ymm)),
982                  (iPTR 1)))>;
983def : Pat<(v16i8 (extract_subvector (v64i8 VR512:$src), (iPTR 16))),
984          (v16i8 (VEXTRACTI128rr
985                  (v32i8 (EXTRACT_SUBREG (v64i8 VR512:$src), sub_ymm)),
986                  (iPTR 1)))>;
987}
988
989// A 128-bit extract from bits [255:128] of a 512-bit vector should use a
990// smaller extract to enable EVEX->VEX.
991let Predicates = [HasVLX] in {
992def : Pat<(v2i64 (extract_subvector (v8i64 VR512:$src), (iPTR 2))),
993          (v2i64 (VEXTRACTI32x4Z256rr
994                  (v4i64 (EXTRACT_SUBREG (v8i64 VR512:$src), sub_ymm)),
995                  (iPTR 1)))>;
996def : Pat<(v2f64 (extract_subvector (v8f64 VR512:$src), (iPTR 2))),
997          (v2f64 (VEXTRACTF32x4Z256rr
998                  (v4f64 (EXTRACT_SUBREG (v8f64 VR512:$src), sub_ymm)),
999                  (iPTR 1)))>;
1000def : Pat<(v4i32 (extract_subvector (v16i32 VR512:$src), (iPTR 4))),
1001          (v4i32 (VEXTRACTI32x4Z256rr
1002                  (v8i32 (EXTRACT_SUBREG (v16i32 VR512:$src), sub_ymm)),
1003                  (iPTR 1)))>;
1004def : Pat<(v4f32 (extract_subvector (v16f32 VR512:$src), (iPTR 4))),
1005          (v4f32 (VEXTRACTF32x4Z256rr
1006                  (v8f32 (EXTRACT_SUBREG (v16f32 VR512:$src), sub_ymm)),
1007                  (iPTR 1)))>;
1008def : Pat<(v8i16 (extract_subvector (v32i16 VR512:$src), (iPTR 8))),
1009          (v8i16 (VEXTRACTI32x4Z256rr
1010                  (v16i16 (EXTRACT_SUBREG (v32i16 VR512:$src), sub_ymm)),
1011                  (iPTR 1)))>;
1012def : Pat<(v16i8 (extract_subvector (v64i8 VR512:$src), (iPTR 16))),
1013          (v16i8 (VEXTRACTI32x4Z256rr
1014                  (v32i8 (EXTRACT_SUBREG (v64i8 VR512:$src), sub_ymm)),
1015                  (iPTR 1)))>;
1016}
1017
1018
1019// Additional patterns for handling a bitcast between the vselect and the
1020// extract_subvector.
1021multiclass vextract_for_mask_cast<string InstrStr, X86VectorVTInfo From,
1022                                  X86VectorVTInfo To, X86VectorVTInfo Cast,
1023                                  PatFrag vextract_extract,
1024                                  SDNodeXForm EXTRACT_get_vextract_imm,
1025                                  list<Predicate> p> {
1026let Predicates = p in {
1027  def : Pat<(Cast.VT (vselect_mask Cast.KRCWM:$mask,
1028                                   (bitconvert
1029                                    (To.VT (vextract_extract:$ext
1030                                            (From.VT From.RC:$src), (iPTR imm)))),
1031                                   To.RC:$src0)),
1032            (Cast.VT (!cast<Instruction>(InstrStr#"rrk")
1033                      Cast.RC:$src0, Cast.KRCWM:$mask, From.RC:$src,
1034                      (EXTRACT_get_vextract_imm To.RC:$ext)))>;
1035
1036  def : Pat<(Cast.VT (vselect_mask Cast.KRCWM:$mask,
1037                                   (bitconvert
1038                                    (To.VT (vextract_extract:$ext
1039                                            (From.VT From.RC:$src), (iPTR imm)))),
1040                                   Cast.ImmAllZerosV)),
1041            (Cast.VT (!cast<Instruction>(InstrStr#"rrkz")
1042                      Cast.KRCWM:$mask, From.RC:$src,
1043                      (EXTRACT_get_vextract_imm To.RC:$ext)))>;
1044}
1045}
1046
1047defm : vextract_for_mask_cast<"VEXTRACTF32x4Z256", v4f64x_info, v2f64x_info,
1048                              v4f32x_info, vextract128_extract,
1049                              EXTRACT_get_vextract128_imm, [HasVLX]>;
1050defm : vextract_for_mask_cast<"VEXTRACTF64x2Z256", v8f32x_info, v4f32x_info,
1051                              v2f64x_info, vextract128_extract,
1052                              EXTRACT_get_vextract128_imm, [HasDQI, HasVLX]>;
1053
1054defm : vextract_for_mask_cast<"VEXTRACTI32x4Z256", v4i64x_info, v2i64x_info,
1055                              v4i32x_info, vextract128_extract,
1056                              EXTRACT_get_vextract128_imm, [HasVLX]>;
1057defm : vextract_for_mask_cast<"VEXTRACTI32x4Z256", v16i16x_info, v8i16x_info,
1058                              v4i32x_info, vextract128_extract,
1059                              EXTRACT_get_vextract128_imm, [HasVLX]>;
1060defm : vextract_for_mask_cast<"VEXTRACTI32x4Z256", v32i8x_info, v16i8x_info,
1061                              v4i32x_info, vextract128_extract,
1062                              EXTRACT_get_vextract128_imm, [HasVLX]>;
1063defm : vextract_for_mask_cast<"VEXTRACTI64x2Z256", v8i32x_info, v4i32x_info,
1064                              v2i64x_info, vextract128_extract,
1065                              EXTRACT_get_vextract128_imm, [HasDQI, HasVLX]>;
1066defm : vextract_for_mask_cast<"VEXTRACTI64x2Z256", v16i16x_info, v8i16x_info,
1067                              v2i64x_info, vextract128_extract,
1068                              EXTRACT_get_vextract128_imm, [HasDQI, HasVLX]>;
1069defm : vextract_for_mask_cast<"VEXTRACTI64x2Z256", v32i8x_info, v16i8x_info,
1070                              v2i64x_info, vextract128_extract,
1071                              EXTRACT_get_vextract128_imm, [HasDQI, HasVLX]>;
1072
1073defm : vextract_for_mask_cast<"VEXTRACTF32x4Z", v8f64_info, v2f64x_info,
1074                              v4f32x_info, vextract128_extract,
1075                              EXTRACT_get_vextract128_imm, [HasAVX512]>;
1076defm : vextract_for_mask_cast<"VEXTRACTF64x2Z", v16f32_info, v4f32x_info,
1077                              v2f64x_info, vextract128_extract,
1078                              EXTRACT_get_vextract128_imm, [HasDQI]>;
1079
1080defm : vextract_for_mask_cast<"VEXTRACTI32x4Z", v8i64_info, v2i64x_info,
1081                              v4i32x_info, vextract128_extract,
1082                              EXTRACT_get_vextract128_imm, [HasAVX512]>;
1083defm : vextract_for_mask_cast<"VEXTRACTI32x4Z", v32i16_info, v8i16x_info,
1084                              v4i32x_info, vextract128_extract,
1085                              EXTRACT_get_vextract128_imm, [HasAVX512]>;
1086defm : vextract_for_mask_cast<"VEXTRACTI32x4Z", v64i8_info, v16i8x_info,
1087                              v4i32x_info, vextract128_extract,
1088                              EXTRACT_get_vextract128_imm, [HasAVX512]>;
1089defm : vextract_for_mask_cast<"VEXTRACTI64x2Z", v16i32_info, v4i32x_info,
1090                              v2i64x_info, vextract128_extract,
1091                              EXTRACT_get_vextract128_imm, [HasDQI]>;
1092defm : vextract_for_mask_cast<"VEXTRACTI64x2Z", v32i16_info, v8i16x_info,
1093                              v2i64x_info, vextract128_extract,
1094                              EXTRACT_get_vextract128_imm, [HasDQI]>;
1095defm : vextract_for_mask_cast<"VEXTRACTI64x2Z", v64i8_info, v16i8x_info,
1096                              v2i64x_info, vextract128_extract,
1097                              EXTRACT_get_vextract128_imm, [HasDQI]>;
1098
1099defm : vextract_for_mask_cast<"VEXTRACTF32x8Z", v8f64_info, v4f64x_info,
1100                              v8f32x_info, vextract256_extract,
1101                              EXTRACT_get_vextract256_imm, [HasDQI]>;
1102defm : vextract_for_mask_cast<"VEXTRACTF64x4Z", v16f32_info, v8f32x_info,
1103                              v4f64x_info, vextract256_extract,
1104                              EXTRACT_get_vextract256_imm, [HasAVX512]>;
1105
1106defm : vextract_for_mask_cast<"VEXTRACTI32x8Z", v8i64_info, v4i64x_info,
1107                              v8i32x_info, vextract256_extract,
1108                              EXTRACT_get_vextract256_imm, [HasDQI]>;
1109defm : vextract_for_mask_cast<"VEXTRACTI32x8Z", v32i16_info, v16i16x_info,
1110                              v8i32x_info, vextract256_extract,
1111                              EXTRACT_get_vextract256_imm, [HasDQI]>;
1112defm : vextract_for_mask_cast<"VEXTRACTI32x8Z", v64i8_info, v32i8x_info,
1113                              v8i32x_info, vextract256_extract,
1114                              EXTRACT_get_vextract256_imm, [HasDQI]>;
1115defm : vextract_for_mask_cast<"VEXTRACTI64x4Z", v16i32_info, v8i32x_info,
1116                              v4i64x_info, vextract256_extract,
1117                              EXTRACT_get_vextract256_imm, [HasAVX512]>;
1118defm : vextract_for_mask_cast<"VEXTRACTI64x4Z", v32i16_info, v16i16x_info,
1119                              v4i64x_info, vextract256_extract,
1120                              EXTRACT_get_vextract256_imm, [HasAVX512]>;
1121defm : vextract_for_mask_cast<"VEXTRACTI64x4Z", v64i8_info, v32i8x_info,
1122                              v4i64x_info, vextract256_extract,
1123                              EXTRACT_get_vextract256_imm, [HasAVX512]>;
1124
1125// vextractps - extract 32 bits from XMM
1126def VEXTRACTPSZrr : AVX512AIi8<0x17, MRMDestReg, (outs GR32orGR64:$dst),
1127      (ins VR128X:$src1, u8imm:$src2),
1128      "vextractps\t{$src2, $src1, $dst|$dst, $src1, $src2}",
1129      [(set GR32orGR64:$dst, (extractelt (bc_v4i32 (v4f32 VR128X:$src1)), imm:$src2))]>,
1130      EVEX, VEX_WIG, Sched<[WriteVecExtract]>;
1131
1132def VEXTRACTPSZmr : AVX512AIi8<0x17, MRMDestMem, (outs),
1133      (ins f32mem:$dst, VR128X:$src1, u8imm:$src2),
1134      "vextractps\t{$src2, $src1, $dst|$dst, $src1, $src2}",
1135      [(store (extractelt (bc_v4i32 (v4f32 VR128X:$src1)), imm:$src2),
1136                          addr:$dst)]>,
1137      EVEX, VEX_WIG, EVEX_CD8<32, CD8VT1>, Sched<[WriteVecExtractSt]>;
1138
1139//===---------------------------------------------------------------------===//
1140// AVX-512 BROADCAST
1141//---
1142// broadcast with a scalar argument.
1143multiclass avx512_broadcast_scalar<bits<8> opc, string OpcodeStr,
1144                            string Name,
1145                            X86VectorVTInfo DestInfo, X86VectorVTInfo SrcInfo> {
1146  def : Pat<(DestInfo.VT (X86VBroadcast SrcInfo.FRC:$src)),
1147            (!cast<Instruction>(Name#DestInfo.ZSuffix#rr)
1148             (SrcInfo.VT (COPY_TO_REGCLASS SrcInfo.FRC:$src, SrcInfo.RC)))>;
1149  def : Pat<(DestInfo.VT (vselect_mask DestInfo.KRCWM:$mask,
1150                                       (X86VBroadcast SrcInfo.FRC:$src),
1151                                       DestInfo.RC:$src0)),
1152            (!cast<Instruction>(Name#DestInfo.ZSuffix#rrk)
1153             DestInfo.RC:$src0, DestInfo.KRCWM:$mask,
1154             (SrcInfo.VT (COPY_TO_REGCLASS SrcInfo.FRC:$src, SrcInfo.RC)))>;
1155  def : Pat<(DestInfo.VT (vselect_mask DestInfo.KRCWM:$mask,
1156                                       (X86VBroadcast SrcInfo.FRC:$src),
1157                                       DestInfo.ImmAllZerosV)),
1158            (!cast<Instruction>(Name#DestInfo.ZSuffix#rrkz)
1159             DestInfo.KRCWM:$mask, (SrcInfo.VT (COPY_TO_REGCLASS SrcInfo.FRC:$src, SrcInfo.RC)))>;
1160}
1161
1162// Split version to allow mask and broadcast node to be different types. This
1163// helps support the 32x2 broadcasts.
1164multiclass avx512_broadcast_rm_split<bits<8> opc, string OpcodeStr,
1165                                     string Name,
1166                                     SchedWrite SchedRR, SchedWrite SchedRM,
1167                                     X86VectorVTInfo MaskInfo,
1168                                     X86VectorVTInfo DestInfo,
1169                                     X86VectorVTInfo SrcInfo,
1170                                     bit IsConvertibleToThreeAddress,
1171                                     SDPatternOperator UnmaskedOp = X86VBroadcast,
1172                                     SDPatternOperator UnmaskedBcastOp = SrcInfo.BroadcastLdFrag> {
1173  let hasSideEffects = 0 in
1174  def rr : AVX512PI<opc, MRMSrcReg, (outs MaskInfo.RC:$dst), (ins SrcInfo.RC:$src),
1175                    !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
1176                    [(set MaskInfo.RC:$dst,
1177                      (MaskInfo.VT
1178                       (bitconvert
1179                        (DestInfo.VT
1180                         (UnmaskedOp (SrcInfo.VT SrcInfo.RC:$src))))))],
1181                    DestInfo.ExeDomain>, T8PD, EVEX, Sched<[SchedRR]>;
1182  def rrkz : AVX512PI<opc, MRMSrcReg, (outs MaskInfo.RC:$dst),
1183                      (ins MaskInfo.KRCWM:$mask, SrcInfo.RC:$src),
1184                      !strconcat(OpcodeStr, "\t{$src, ${dst} {${mask}} {z}|",
1185                       "${dst} {${mask}} {z}, $src}"),
1186                       [(set MaskInfo.RC:$dst,
1187                         (vselect_mask MaskInfo.KRCWM:$mask,
1188                          (MaskInfo.VT
1189                           (bitconvert
1190                            (DestInfo.VT
1191                             (X86VBroadcast (SrcInfo.VT SrcInfo.RC:$src))))),
1192                          MaskInfo.ImmAllZerosV))],
1193                       DestInfo.ExeDomain>, T8PD, EVEX, EVEX_KZ, Sched<[SchedRR]>;
1194  let Constraints = "$src0 = $dst" in
1195  def rrk : AVX512PI<opc, MRMSrcReg, (outs MaskInfo.RC:$dst),
1196                     (ins MaskInfo.RC:$src0, MaskInfo.KRCWM:$mask,
1197                          SrcInfo.RC:$src),
1198                     !strconcat(OpcodeStr, "\t{$src, ${dst} {${mask}}|",
1199                     "${dst} {${mask}}, $src}"),
1200                     [(set MaskInfo.RC:$dst,
1201                       (vselect_mask MaskInfo.KRCWM:$mask,
1202                        (MaskInfo.VT
1203                         (bitconvert
1204                          (DestInfo.VT
1205                           (X86VBroadcast (SrcInfo.VT SrcInfo.RC:$src))))),
1206                        MaskInfo.RC:$src0))],
1207                      DestInfo.ExeDomain>, T8PD, EVEX, EVEX_K, Sched<[SchedRR]>;
1208
1209  let hasSideEffects = 0, mayLoad = 1 in
1210  def rm : AVX512PI<opc, MRMSrcMem, (outs MaskInfo.RC:$dst),
1211                    (ins SrcInfo.ScalarMemOp:$src),
1212                    !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
1213                    [(set MaskInfo.RC:$dst,
1214                      (MaskInfo.VT
1215                       (bitconvert
1216                        (DestInfo.VT
1217                         (UnmaskedBcastOp addr:$src)))))],
1218                    DestInfo.ExeDomain>, T8PD, EVEX,
1219                    EVEX_CD8<SrcInfo.EltSize, CD8VT1>, Sched<[SchedRM]>;
1220
1221  def rmkz : AVX512PI<opc, MRMSrcMem, (outs MaskInfo.RC:$dst),
1222                      (ins MaskInfo.KRCWM:$mask, SrcInfo.ScalarMemOp:$src),
1223                      !strconcat(OpcodeStr, "\t{$src, ${dst} {${mask}} {z}|",
1224                       "${dst} {${mask}} {z}, $src}"),
1225                       [(set MaskInfo.RC:$dst,
1226                         (vselect_mask MaskInfo.KRCWM:$mask,
1227                          (MaskInfo.VT
1228                           (bitconvert
1229                            (DestInfo.VT
1230                             (SrcInfo.BroadcastLdFrag addr:$src)))),
1231                          MaskInfo.ImmAllZerosV))],
1232                       DestInfo.ExeDomain>, T8PD, EVEX, EVEX_KZ,
1233                       EVEX_CD8<SrcInfo.EltSize, CD8VT1>, Sched<[SchedRM]>;
1234
1235  let Constraints = "$src0 = $dst",
1236      isConvertibleToThreeAddress = IsConvertibleToThreeAddress in
1237  def rmk : AVX512PI<opc, MRMSrcMem, (outs MaskInfo.RC:$dst),
1238                     (ins MaskInfo.RC:$src0, MaskInfo.KRCWM:$mask,
1239                          SrcInfo.ScalarMemOp:$src),
1240                     !strconcat(OpcodeStr, "\t{$src, ${dst} {${mask}}|",
1241                     "${dst} {${mask}}, $src}"),
1242                     [(set MaskInfo.RC:$dst,
1243                       (vselect_mask MaskInfo.KRCWM:$mask,
1244                        (MaskInfo.VT
1245                         (bitconvert
1246                          (DestInfo.VT
1247                           (SrcInfo.BroadcastLdFrag addr:$src)))),
1248                        MaskInfo.RC:$src0))],
1249                      DestInfo.ExeDomain>, T8PD, EVEX, EVEX_K,
1250                      EVEX_CD8<SrcInfo.EltSize, CD8VT1>, Sched<[SchedRM]>;
1251}
1252
1253// Helper class to force mask and broadcast result to same type.
1254multiclass avx512_broadcast_rm<bits<8> opc, string OpcodeStr, string Name,
1255                               SchedWrite SchedRR, SchedWrite SchedRM,
1256                               X86VectorVTInfo DestInfo,
1257                               X86VectorVTInfo SrcInfo,
1258                               bit IsConvertibleToThreeAddress> :
1259  avx512_broadcast_rm_split<opc, OpcodeStr, Name, SchedRR, SchedRM,
1260                            DestInfo, DestInfo, SrcInfo,
1261                            IsConvertibleToThreeAddress>;
1262
1263multiclass avx512_fp_broadcast_sd<bits<8> opc, string OpcodeStr,
1264                                                       AVX512VLVectorVTInfo _> {
1265  let Predicates = [HasAVX512] in {
1266    defm Z  : avx512_broadcast_rm<opc, OpcodeStr, NAME, WriteFShuffle256,
1267                                  WriteFShuffle256Ld, _.info512, _.info128, 1>,
1268              avx512_broadcast_scalar<opc, OpcodeStr, NAME, _.info512,
1269                                      _.info128>,
1270              EVEX_V512;
1271  }
1272
1273  let Predicates = [HasVLX] in {
1274    defm Z256  : avx512_broadcast_rm<opc, OpcodeStr, NAME, WriteFShuffle256,
1275                                     WriteFShuffle256Ld, _.info256, _.info128, 1>,
1276                 avx512_broadcast_scalar<opc, OpcodeStr, NAME, _.info256,
1277                                         _.info128>,
1278                 EVEX_V256;
1279  }
1280}
1281
1282multiclass avx512_fp_broadcast_ss<bits<8> opc, string OpcodeStr,
1283                                                       AVX512VLVectorVTInfo _> {
1284  let Predicates = [HasAVX512] in {
1285    defm Z  : avx512_broadcast_rm<opc, OpcodeStr, NAME, WriteFShuffle256,
1286                                  WriteFShuffle256Ld, _.info512, _.info128, 1>,
1287              avx512_broadcast_scalar<opc, OpcodeStr, NAME, _.info512,
1288                                      _.info128>,
1289              EVEX_V512;
1290  }
1291
1292  let Predicates = [HasVLX] in {
1293    defm Z256  : avx512_broadcast_rm<opc, OpcodeStr, NAME, WriteFShuffle256,
1294                                     WriteFShuffle256Ld, _.info256, _.info128, 1>,
1295                 avx512_broadcast_scalar<opc, OpcodeStr, NAME, _.info256,
1296                                         _.info128>,
1297                 EVEX_V256;
1298    defm Z128  : avx512_broadcast_rm<opc, OpcodeStr, NAME, WriteFShuffle256,
1299                                     WriteFShuffle256Ld, _.info128, _.info128, 1>,
1300                 avx512_broadcast_scalar<opc, OpcodeStr, NAME, _.info128,
1301                                         _.info128>,
1302                 EVEX_V128;
1303  }
1304}
1305defm VBROADCASTSS  : avx512_fp_broadcast_ss<0x18, "vbroadcastss",
1306                                       avx512vl_f32_info>;
1307defm VBROADCASTSD  : avx512_fp_broadcast_sd<0x19, "vbroadcastsd",
1308                                       avx512vl_f64_info>, VEX_W1X;
1309
1310multiclass avx512_int_broadcast_reg<bits<8> opc, SchedWrite SchedRR,
1311                                    X86VectorVTInfo _, SDPatternOperator OpNode,
1312                                    RegisterClass SrcRC> {
1313  // Fold with a mask even if it has multiple uses since it is cheap.
1314  let ExeDomain = _.ExeDomain in
1315  defm rr : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
1316                          (ins SrcRC:$src),
1317                          "vpbroadcast"#_.Suffix, "$src", "$src",
1318                          (_.VT (OpNode SrcRC:$src)), /*IsCommutable*/0,
1319                          /*IsKCommutable*/0, /*IsKZCommutable*/0, vselect>,
1320                          T8PD, EVEX, Sched<[SchedRR]>;
1321}
1322
1323multiclass avx512_int_broadcastbw_reg<bits<8> opc, string Name, SchedWrite SchedRR,
1324                                    X86VectorVTInfo _, SDPatternOperator OpNode,
1325                                    RegisterClass SrcRC, SubRegIndex Subreg> {
1326  let hasSideEffects = 0, ExeDomain = _.ExeDomain in
1327  defm rr : AVX512_maskable_custom<opc, MRMSrcReg,
1328                         (outs _.RC:$dst), (ins GR32:$src),
1329                         !con((ins _.RC:$src0, _.KRCWM:$mask), (ins GR32:$src)),
1330                         !con((ins _.KRCWM:$mask), (ins GR32:$src)),
1331                         "vpbroadcast"#_.Suffix, "$src", "$src", [], [], [],
1332                         "$src0 = $dst">, T8PD, EVEX, Sched<[SchedRR]>;
1333
1334  def : Pat <(_.VT (OpNode SrcRC:$src)),
1335             (!cast<Instruction>(Name#rr)
1336              (i32 (INSERT_SUBREG (i32 (IMPLICIT_DEF)), SrcRC:$src, Subreg)))>;
1337
1338  // Fold with a mask even if it has multiple uses since it is cheap.
1339  def : Pat <(vselect _.KRCWM:$mask, (_.VT (OpNode SrcRC:$src)), _.RC:$src0),
1340             (!cast<Instruction>(Name#rrk) _.RC:$src0, _.KRCWM:$mask,
1341              (i32 (INSERT_SUBREG (i32 (IMPLICIT_DEF)), SrcRC:$src, Subreg)))>;
1342
1343  def : Pat <(vselect _.KRCWM:$mask, (_.VT (OpNode SrcRC:$src)), _.ImmAllZerosV),
1344             (!cast<Instruction>(Name#rrkz) _.KRCWM:$mask,
1345              (i32 (INSERT_SUBREG (i32 (IMPLICIT_DEF)), SrcRC:$src, Subreg)))>;
1346}
1347
1348multiclass avx512_int_broadcastbw_reg_vl<bits<8> opc, string Name,
1349                      AVX512VLVectorVTInfo _, SDPatternOperator OpNode,
1350                      RegisterClass SrcRC, SubRegIndex Subreg, Predicate prd> {
1351  let Predicates = [prd] in
1352    defm Z : avx512_int_broadcastbw_reg<opc, Name#Z, WriteShuffle256, _.info512,
1353              OpNode, SrcRC, Subreg>, EVEX_V512;
1354  let Predicates = [prd, HasVLX] in {
1355    defm Z256 : avx512_int_broadcastbw_reg<opc, Name#Z256, WriteShuffle256,
1356              _.info256, OpNode, SrcRC, Subreg>, EVEX_V256;
1357    defm Z128 : avx512_int_broadcastbw_reg<opc, Name#Z128, WriteShuffle,
1358              _.info128, OpNode, SrcRC, Subreg>, EVEX_V128;
1359  }
1360}
1361
1362multiclass avx512_int_broadcast_reg_vl<bits<8> opc, AVX512VLVectorVTInfo _,
1363                                       SDPatternOperator OpNode,
1364                                       RegisterClass SrcRC, Predicate prd> {
1365  let Predicates = [prd] in
1366    defm Z : avx512_int_broadcast_reg<opc, WriteShuffle256, _.info512, OpNode,
1367                                      SrcRC>, EVEX_V512;
1368  let Predicates = [prd, HasVLX] in {
1369    defm Z256 : avx512_int_broadcast_reg<opc, WriteShuffle256, _.info256, OpNode,
1370                                         SrcRC>, EVEX_V256;
1371    defm Z128 : avx512_int_broadcast_reg<opc, WriteShuffle, _.info128, OpNode,
1372                                         SrcRC>, EVEX_V128;
1373  }
1374}
1375
1376defm VPBROADCASTBr : avx512_int_broadcastbw_reg_vl<0x7A, "VPBROADCASTBr",
1377                       avx512vl_i8_info, X86VBroadcast, GR8, sub_8bit, HasBWI>;
1378defm VPBROADCASTWr : avx512_int_broadcastbw_reg_vl<0x7B, "VPBROADCASTWr",
1379                       avx512vl_i16_info, X86VBroadcast, GR16, sub_16bit,
1380                       HasBWI>;
1381defm VPBROADCASTDr : avx512_int_broadcast_reg_vl<0x7C, avx512vl_i32_info,
1382                                                 X86VBroadcast, GR32, HasAVX512>;
1383defm VPBROADCASTQr : avx512_int_broadcast_reg_vl<0x7C, avx512vl_i64_info,
1384                                                 X86VBroadcast, GR64, HasAVX512>, VEX_W;
1385
1386multiclass avx512_int_broadcast_rm_vl<bits<8> opc, string OpcodeStr,
1387                                        AVX512VLVectorVTInfo _, Predicate prd,
1388                                        bit IsConvertibleToThreeAddress> {
1389  let Predicates = [prd] in {
1390    defm Z :   avx512_broadcast_rm<opc, OpcodeStr, NAME, WriteShuffle256,
1391                                   WriteShuffle256Ld, _.info512, _.info128,
1392                                   IsConvertibleToThreeAddress>,
1393                                  EVEX_V512;
1394  }
1395  let Predicates = [prd, HasVLX] in {
1396    defm Z256 : avx512_broadcast_rm<opc, OpcodeStr, NAME, WriteShuffle256,
1397                                    WriteShuffle256Ld, _.info256, _.info128,
1398                                    IsConvertibleToThreeAddress>,
1399                                 EVEX_V256;
1400    defm Z128 : avx512_broadcast_rm<opc, OpcodeStr, NAME, WriteShuffle,
1401                                    WriteShuffleXLd, _.info128, _.info128,
1402                                    IsConvertibleToThreeAddress>,
1403                                 EVEX_V128;
1404  }
1405}
1406
1407defm VPBROADCASTB  : avx512_int_broadcast_rm_vl<0x78, "vpbroadcastb",
1408                                           avx512vl_i8_info, HasBWI, 0>;
1409defm VPBROADCASTW  : avx512_int_broadcast_rm_vl<0x79, "vpbroadcastw",
1410                                           avx512vl_i16_info, HasBWI, 0>;
1411defm VPBROADCASTD  : avx512_int_broadcast_rm_vl<0x58, "vpbroadcastd",
1412                                           avx512vl_i32_info, HasAVX512, 1>;
1413defm VPBROADCASTQ  : avx512_int_broadcast_rm_vl<0x59, "vpbroadcastq",
1414                                           avx512vl_i64_info, HasAVX512, 1>, VEX_W1X;
1415
1416multiclass avx512_subvec_broadcast_rm<bits<8> opc, string OpcodeStr,
1417                                      SDPatternOperator OpNode,
1418                                      X86VectorVTInfo _Dst,
1419                                      X86VectorVTInfo _Src> {
1420  defm rm : AVX512_maskable<opc, MRMSrcMem, _Dst, (outs _Dst.RC:$dst),
1421                           (ins _Src.MemOp:$src), OpcodeStr, "$src", "$src",
1422                           (_Dst.VT (OpNode addr:$src))>,
1423                           Sched<[SchedWriteShuffle.YMM.Folded]>,
1424                           AVX5128IBase, EVEX;
1425}
1426
1427// This should be used for the AVX512DQ broadcast instructions. It disables
1428// the unmasked patterns so that we only use the DQ instructions when masking
1429//  is requested.
1430multiclass avx512_subvec_broadcast_rm_dq<bits<8> opc, string OpcodeStr,
1431                                         SDPatternOperator OpNode,
1432                                         X86VectorVTInfo _Dst,
1433                                         X86VectorVTInfo _Src> {
1434  let hasSideEffects = 0, mayLoad = 1 in
1435  defm rm : AVX512_maskable_split<opc, MRMSrcMem, _Dst, (outs _Dst.RC:$dst),
1436                           (ins _Src.MemOp:$src), OpcodeStr, "$src", "$src",
1437                           (null_frag),
1438                           (_Dst.VT (OpNode addr:$src))>,
1439                           Sched<[SchedWriteShuffle.YMM.Folded]>,
1440                           AVX5128IBase, EVEX;
1441}
1442
1443//===----------------------------------------------------------------------===//
1444// AVX-512 BROADCAST SUBVECTORS
1445//
1446
1447defm VBROADCASTI32X4 : avx512_subvec_broadcast_rm<0x5a, "vbroadcasti32x4",
1448                       X86SubVBroadcastld128, v16i32_info, v4i32x_info>,
1449                       EVEX_V512, EVEX_CD8<32, CD8VT4>;
1450defm VBROADCASTF32X4 : avx512_subvec_broadcast_rm<0x1a, "vbroadcastf32x4",
1451                       X86SubVBroadcastld128, v16f32_info, v4f32x_info>,
1452                       EVEX_V512, EVEX_CD8<32, CD8VT4>;
1453defm VBROADCASTI64X4 : avx512_subvec_broadcast_rm<0x5b, "vbroadcasti64x4",
1454                       X86SubVBroadcastld256, v8i64_info, v4i64x_info>, VEX_W,
1455                       EVEX_V512, EVEX_CD8<64, CD8VT4>;
1456defm VBROADCASTF64X4 : avx512_subvec_broadcast_rm<0x1b, "vbroadcastf64x4",
1457                       X86SubVBroadcastld256, v8f64_info, v4f64x_info>, VEX_W,
1458                       EVEX_V512, EVEX_CD8<64, CD8VT4>;
1459
1460let Predicates = [HasAVX512] in {
1461def : Pat<(v8f64 (X86SubVBroadcastld256 addr:$src)),
1462          (VBROADCASTF64X4rm addr:$src)>;
1463def : Pat<(v16f32 (X86SubVBroadcastld256 addr:$src)),
1464          (VBROADCASTF64X4rm addr:$src)>;
1465def : Pat<(v8i64 (X86SubVBroadcastld256 addr:$src)),
1466          (VBROADCASTI64X4rm addr:$src)>;
1467def : Pat<(v16i32 (X86SubVBroadcastld256 addr:$src)),
1468          (VBROADCASTI64X4rm addr:$src)>;
1469def : Pat<(v32i16 (X86SubVBroadcastld256 addr:$src)),
1470          (VBROADCASTI64X4rm addr:$src)>;
1471def : Pat<(v64i8 (X86SubVBroadcastld256 addr:$src)),
1472          (VBROADCASTI64X4rm addr:$src)>;
1473
1474def : Pat<(v8f64 (X86SubVBroadcastld128 addr:$src)),
1475          (VBROADCASTF32X4rm addr:$src)>;
1476def : Pat<(v16f32 (X86SubVBroadcastld128 addr:$src)),
1477          (VBROADCASTF32X4rm addr:$src)>;
1478def : Pat<(v8i64 (X86SubVBroadcastld128 addr:$src)),
1479          (VBROADCASTI32X4rm addr:$src)>;
1480def : Pat<(v16i32 (X86SubVBroadcastld128 addr:$src)),
1481          (VBROADCASTI32X4rm addr:$src)>;
1482def : Pat<(v32i16 (X86SubVBroadcastld128 addr:$src)),
1483          (VBROADCASTI32X4rm addr:$src)>;
1484def : Pat<(v64i8 (X86SubVBroadcastld128 addr:$src)),
1485          (VBROADCASTI32X4rm addr:$src)>;
1486
1487// Patterns for selects of bitcasted operations.
1488def : Pat<(vselect_mask VK16WM:$mask,
1489                        (bc_v16f32 (v8f64 (X86SubVBroadcastld128 addr:$src))),
1490                        (v16f32 immAllZerosV)),
1491          (VBROADCASTF32X4rmkz VK16WM:$mask, addr:$src)>;
1492def : Pat<(vselect_mask VK16WM:$mask,
1493                        (bc_v16f32 (v8f64 (X86SubVBroadcastld128 addr:$src))),
1494                        VR512:$src0),
1495          (VBROADCASTF32X4rmk VR512:$src0, VK16WM:$mask, addr:$src)>;
1496def : Pat<(vselect_mask VK16WM:$mask,
1497                        (bc_v16i32 (v8i64 (X86SubVBroadcastld128 addr:$src))),
1498                        (v16i32 immAllZerosV)),
1499          (VBROADCASTI32X4rmkz VK16WM:$mask, addr:$src)>;
1500def : Pat<(vselect_mask VK16WM:$mask,
1501                        (bc_v16i32 (v8i64 (X86SubVBroadcastld128 addr:$src))),
1502                        VR512:$src0),
1503          (VBROADCASTI32X4rmk VR512:$src0, VK16WM:$mask, addr:$src)>;
1504
1505def : Pat<(vselect_mask VK8WM:$mask,
1506                        (bc_v8f64 (v16f32 (X86SubVBroadcastld256 addr:$src))),
1507                        (v8f64 immAllZerosV)),
1508          (VBROADCASTF64X4rmkz VK8WM:$mask, addr:$src)>;
1509def : Pat<(vselect_mask VK8WM:$mask,
1510                        (bc_v8f64 (v16f32 (X86SubVBroadcastld256 addr:$src))),
1511                        VR512:$src0),
1512          (VBROADCASTF64X4rmk VR512:$src0, VK8WM:$mask, addr:$src)>;
1513def : Pat<(vselect_mask VK8WM:$mask,
1514                        (bc_v8i64 (v16i32 (X86SubVBroadcastld256 addr:$src))),
1515                        (v8i64 immAllZerosV)),
1516          (VBROADCASTI64X4rmkz VK8WM:$mask, addr:$src)>;
1517def : Pat<(vselect_mask VK8WM:$mask,
1518                        (bc_v8i64 (v16i32 (X86SubVBroadcastld256 addr:$src))),
1519                        VR512:$src0),
1520          (VBROADCASTI64X4rmk VR512:$src0, VK8WM:$mask, addr:$src)>;
1521}
1522
1523let Predicates = [HasVLX] in {
1524defm VBROADCASTI32X4Z256 : avx512_subvec_broadcast_rm<0x5a, "vbroadcasti32x4",
1525                           X86SubVBroadcastld128, v8i32x_info, v4i32x_info>,
1526                           EVEX_V256, EVEX_CD8<32, CD8VT4>;
1527defm VBROADCASTF32X4Z256 : avx512_subvec_broadcast_rm<0x1a, "vbroadcastf32x4",
1528                           X86SubVBroadcastld128, v8f32x_info, v4f32x_info>,
1529                           EVEX_V256, EVEX_CD8<32, CD8VT4>;
1530
1531def : Pat<(v4f64 (X86SubVBroadcastld128 addr:$src)),
1532          (VBROADCASTF32X4Z256rm addr:$src)>;
1533def : Pat<(v8f32 (X86SubVBroadcastld128 addr:$src)),
1534          (VBROADCASTF32X4Z256rm addr:$src)>;
1535def : Pat<(v4i64 (X86SubVBroadcastld128 addr:$src)),
1536          (VBROADCASTI32X4Z256rm addr:$src)>;
1537def : Pat<(v8i32 (X86SubVBroadcastld128 addr:$src)),
1538          (VBROADCASTI32X4Z256rm addr:$src)>;
1539def : Pat<(v16i16 (X86SubVBroadcastld128 addr:$src)),
1540          (VBROADCASTI32X4Z256rm addr:$src)>;
1541def : Pat<(v32i8 (X86SubVBroadcastld128 addr:$src)),
1542          (VBROADCASTI32X4Z256rm addr:$src)>;
1543
1544// Patterns for selects of bitcasted operations.
1545def : Pat<(vselect_mask VK8WM:$mask,
1546                        (bc_v8f32 (v4f64 (X86SubVBroadcastld128 addr:$src))),
1547                        (v8f32 immAllZerosV)),
1548          (VBROADCASTF32X4Z256rmkz VK8WM:$mask, addr:$src)>;
1549def : Pat<(vselect_mask VK8WM:$mask,
1550                        (bc_v8f32 (v4f64 (X86SubVBroadcastld128 addr:$src))),
1551                        VR256X:$src0),
1552          (VBROADCASTF32X4Z256rmk VR256X:$src0, VK8WM:$mask, addr:$src)>;
1553def : Pat<(vselect_mask VK8WM:$mask,
1554                        (bc_v8i32 (v4i64 (X86SubVBroadcastld128 addr:$src))),
1555                        (v8i32 immAllZerosV)),
1556          (VBROADCASTI32X4Z256rmkz VK8WM:$mask, addr:$src)>;
1557def : Pat<(vselect_mask VK8WM:$mask,
1558                        (bc_v8i32 (v4i64 (X86SubVBroadcastld128 addr:$src))),
1559                        VR256X:$src0),
1560          (VBROADCASTI32X4Z256rmk VR256X:$src0, VK8WM:$mask, addr:$src)>;
1561}
1562
1563let Predicates = [HasVLX, HasDQI] in {
1564defm VBROADCASTI64X2Z128 : avx512_subvec_broadcast_rm_dq<0x5a, "vbroadcasti64x2",
1565                           X86SubVBroadcastld128, v4i64x_info, v2i64x_info>, VEX_W1X,
1566                           EVEX_V256, EVEX_CD8<64, CD8VT2>;
1567defm VBROADCASTF64X2Z128 : avx512_subvec_broadcast_rm_dq<0x1a, "vbroadcastf64x2",
1568                           X86SubVBroadcastld128, v4f64x_info, v2f64x_info>, VEX_W1X,
1569                           EVEX_V256, EVEX_CD8<64, CD8VT2>;
1570
1571// Patterns for selects of bitcasted operations.
1572def : Pat<(vselect_mask VK4WM:$mask,
1573                        (bc_v4f64 (v8f32 (X86SubVBroadcastld128 addr:$src))),
1574                        (v4f64 immAllZerosV)),
1575          (VBROADCASTF64X2Z128rmkz VK4WM:$mask, addr:$src)>;
1576def : Pat<(vselect_mask VK4WM:$mask,
1577                        (bc_v4f64 (v8f32 (X86SubVBroadcastld128 addr:$src))),
1578                        VR256X:$src0),
1579          (VBROADCASTF64X2Z128rmk VR256X:$src0, VK4WM:$mask, addr:$src)>;
1580def : Pat<(vselect_mask VK4WM:$mask,
1581                        (bc_v4i64 (v8i32 (X86SubVBroadcastld128 addr:$src))),
1582                        (v4i64 immAllZerosV)),
1583          (VBROADCASTI64X2Z128rmkz VK4WM:$mask, addr:$src)>;
1584def : Pat<(vselect_mask VK4WM:$mask,
1585                        (bc_v4i64 (v8i32 (X86SubVBroadcastld128 addr:$src))),
1586                        VR256X:$src0),
1587          (VBROADCASTI64X2Z128rmk VR256X:$src0, VK4WM:$mask, addr:$src)>;
1588}
1589
1590let Predicates = [HasDQI] in {
1591defm VBROADCASTI64X2 : avx512_subvec_broadcast_rm_dq<0x5a, "vbroadcasti64x2",
1592                       X86SubVBroadcastld128, v8i64_info, v2i64x_info>, VEX_W,
1593                       EVEX_V512, EVEX_CD8<64, CD8VT2>;
1594defm VBROADCASTI32X8 : avx512_subvec_broadcast_rm_dq<0x5b, "vbroadcasti32x8",
1595                       X86SubVBroadcastld256, v16i32_info, v8i32x_info>,
1596                       EVEX_V512, EVEX_CD8<32, CD8VT8>;
1597defm VBROADCASTF64X2 : avx512_subvec_broadcast_rm_dq<0x1a, "vbroadcastf64x2",
1598                       X86SubVBroadcastld128, v8f64_info, v2f64x_info>, VEX_W,
1599                       EVEX_V512, EVEX_CD8<64, CD8VT2>;
1600defm VBROADCASTF32X8 : avx512_subvec_broadcast_rm_dq<0x1b, "vbroadcastf32x8",
1601                       X86SubVBroadcastld256, v16f32_info, v8f32x_info>,
1602                       EVEX_V512, EVEX_CD8<32, CD8VT8>;
1603
1604// Patterns for selects of bitcasted operations.
1605def : Pat<(vselect_mask VK16WM:$mask,
1606                        (bc_v16f32 (v8f64 (X86SubVBroadcastld256 addr:$src))),
1607                        (v16f32 immAllZerosV)),
1608          (VBROADCASTF32X8rmkz VK16WM:$mask, addr:$src)>;
1609def : Pat<(vselect_mask VK16WM:$mask,
1610                        (bc_v16f32 (v8f64 (X86SubVBroadcastld256 addr:$src))),
1611                        VR512:$src0),
1612          (VBROADCASTF32X8rmk VR512:$src0, VK16WM:$mask, addr:$src)>;
1613def : Pat<(vselect_mask VK16WM:$mask,
1614                        (bc_v16i32 (v8i64 (X86SubVBroadcastld256 addr:$src))),
1615                        (v16i32 immAllZerosV)),
1616          (VBROADCASTI32X8rmkz VK16WM:$mask, addr:$src)>;
1617def : Pat<(vselect_mask VK16WM:$mask,
1618                        (bc_v16i32 (v8i64 (X86SubVBroadcastld256 addr:$src))),
1619                        VR512:$src0),
1620          (VBROADCASTI32X8rmk VR512:$src0, VK16WM:$mask, addr:$src)>;
1621
1622def : Pat<(vselect_mask VK8WM:$mask,
1623                        (bc_v8f64 (v16f32 (X86SubVBroadcastld128 addr:$src))),
1624                        (v8f64 immAllZerosV)),
1625          (VBROADCASTF64X2rmkz VK8WM:$mask, addr:$src)>;
1626def : Pat<(vselect_mask VK8WM:$mask,
1627                        (bc_v8f64 (v16f32 (X86SubVBroadcastld128 addr:$src))),
1628                        VR512:$src0),
1629          (VBROADCASTF64X2rmk VR512:$src0, VK8WM:$mask, addr:$src)>;
1630def : Pat<(vselect_mask VK8WM:$mask,
1631                        (bc_v8i64 (v16i32 (X86SubVBroadcastld128 addr:$src))),
1632                        (v8i64 immAllZerosV)),
1633          (VBROADCASTI64X2rmkz VK8WM:$mask, addr:$src)>;
1634def : Pat<(vselect_mask VK8WM:$mask,
1635                        (bc_v8i64 (v16i32 (X86SubVBroadcastld128 addr:$src))),
1636                        VR512:$src0),
1637          (VBROADCASTI64X2rmk VR512:$src0, VK8WM:$mask, addr:$src)>;
1638}
1639
1640multiclass avx512_common_broadcast_32x2<bits<8> opc, string OpcodeStr,
1641                         AVX512VLVectorVTInfo _Dst, AVX512VLVectorVTInfo _Src> {
1642  let Predicates = [HasDQI] in
1643    defm Z :    avx512_broadcast_rm_split<opc, OpcodeStr, NAME, WriteShuffle256,
1644                                          WriteShuffle256Ld, _Dst.info512,
1645                                          _Src.info512, _Src.info128, 0, null_frag, null_frag>,
1646                                          EVEX_V512;
1647  let Predicates = [HasDQI, HasVLX] in
1648    defm Z256 : avx512_broadcast_rm_split<opc, OpcodeStr, NAME, WriteShuffle256,
1649                                          WriteShuffle256Ld, _Dst.info256,
1650                                          _Src.info256, _Src.info128, 0, null_frag, null_frag>,
1651                                          EVEX_V256;
1652}
1653
1654multiclass avx512_common_broadcast_i32x2<bits<8> opc, string OpcodeStr,
1655                         AVX512VLVectorVTInfo _Dst, AVX512VLVectorVTInfo _Src> :
1656  avx512_common_broadcast_32x2<opc, OpcodeStr, _Dst, _Src> {
1657
1658  let Predicates = [HasDQI, HasVLX] in
1659    defm Z128 : avx512_broadcast_rm_split<opc, OpcodeStr, NAME, WriteShuffle,
1660                                          WriteShuffleXLd, _Dst.info128,
1661                                          _Src.info128, _Src.info128, 0, null_frag, null_frag>,
1662                                          EVEX_V128;
1663}
1664
1665defm VBROADCASTI32X2  : avx512_common_broadcast_i32x2<0x59, "vbroadcasti32x2",
1666                                          avx512vl_i32_info, avx512vl_i64_info>;
1667defm VBROADCASTF32X2  : avx512_common_broadcast_32x2<0x19, "vbroadcastf32x2",
1668                                          avx512vl_f32_info, avx512vl_f64_info>;
1669
1670//===----------------------------------------------------------------------===//
1671// AVX-512 BROADCAST MASK TO VECTOR REGISTER
1672//---
1673multiclass avx512_mask_broadcastm<bits<8> opc, string OpcodeStr,
1674                                  X86VectorVTInfo _, RegisterClass KRC> {
1675  def rr : AVX512XS8I<opc, MRMSrcReg, (outs _.RC:$dst), (ins KRC:$src),
1676                  !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
1677                  [(set _.RC:$dst, (_.VT (X86VBroadcastm KRC:$src)))]>,
1678                  EVEX, Sched<[WriteShuffle]>;
1679}
1680
1681multiclass avx512_mask_broadcast<bits<8> opc, string OpcodeStr,
1682                                 AVX512VLVectorVTInfo VTInfo, RegisterClass KRC> {
1683  let Predicates = [HasCDI] in
1684    defm Z : avx512_mask_broadcastm<opc, OpcodeStr, VTInfo.info512, KRC>, EVEX_V512;
1685  let Predicates = [HasCDI, HasVLX] in {
1686    defm Z256 : avx512_mask_broadcastm<opc, OpcodeStr, VTInfo.info256, KRC>, EVEX_V256;
1687    defm Z128 : avx512_mask_broadcastm<opc, OpcodeStr, VTInfo.info128, KRC>, EVEX_V128;
1688  }
1689}
1690
1691defm VPBROADCASTMW2D : avx512_mask_broadcast<0x3A, "vpbroadcastmw2d",
1692                                               avx512vl_i32_info, VK16>;
1693defm VPBROADCASTMB2Q : avx512_mask_broadcast<0x2A, "vpbroadcastmb2q",
1694                                               avx512vl_i64_info, VK8>, VEX_W;
1695
1696//===----------------------------------------------------------------------===//
1697// -- VPERMI2 - 3 source operands form --
1698multiclass avx512_perm_i<bits<8> opc, string OpcodeStr,
1699                         X86FoldableSchedWrite sched,
1700                         X86VectorVTInfo _, X86VectorVTInfo IdxVT> {
1701let Constraints = "$src1 = $dst", ExeDomain = _.ExeDomain,
1702    hasSideEffects = 0 in {
1703  defm rr: AVX512_maskable_3src_cast<opc, MRMSrcReg, _, IdxVT, (outs _.RC:$dst),
1704          (ins _.RC:$src2, _.RC:$src3),
1705          OpcodeStr, "$src3, $src2", "$src2, $src3",
1706          (_.VT (X86VPermt2 _.RC:$src2, IdxVT.RC:$src1, _.RC:$src3)), 1>,
1707          EVEX_4V, AVX5128IBase, Sched<[sched]>;
1708
1709  let mayLoad = 1 in
1710  defm rm: AVX512_maskable_3src_cast<opc, MRMSrcMem, _, IdxVT, (outs _.RC:$dst),
1711            (ins _.RC:$src2, _.MemOp:$src3),
1712            OpcodeStr, "$src3, $src2", "$src2, $src3",
1713            (_.VT (X86VPermt2 _.RC:$src2, IdxVT.RC:$src1,
1714                   (_.VT (_.LdFrag addr:$src3)))), 1>,
1715            EVEX_4V, AVX5128IBase, Sched<[sched.Folded, sched.ReadAfterFold]>;
1716  }
1717}
1718
1719multiclass avx512_perm_i_mb<bits<8> opc, string OpcodeStr,
1720                            X86FoldableSchedWrite sched,
1721                            X86VectorVTInfo _, X86VectorVTInfo IdxVT> {
1722  let Constraints = "$src1 = $dst", ExeDomain = _.ExeDomain,
1723      hasSideEffects = 0, mayLoad = 1 in
1724  defm rmb: AVX512_maskable_3src_cast<opc, MRMSrcMem, _, IdxVT, (outs _.RC:$dst),
1725              (ins _.RC:$src2, _.ScalarMemOp:$src3),
1726              OpcodeStr,   !strconcat("${src3}", _.BroadcastStr,", $src2"),
1727              !strconcat("$src2, ${src3}", _.BroadcastStr ),
1728              (_.VT (X86VPermt2 _.RC:$src2,
1729               IdxVT.RC:$src1,(_.VT (_.BroadcastLdFrag addr:$src3)))), 1>,
1730              AVX5128IBase, EVEX_4V, EVEX_B,
1731              Sched<[sched.Folded, sched.ReadAfterFold]>;
1732}
1733
1734multiclass avx512_perm_i_sizes<bits<8> opc, string OpcodeStr,
1735                               X86FoldableSchedWrite sched,
1736                               AVX512VLVectorVTInfo VTInfo,
1737                               AVX512VLVectorVTInfo ShuffleMask> {
1738  defm NAME: avx512_perm_i<opc, OpcodeStr, sched, VTInfo.info512,
1739                           ShuffleMask.info512>,
1740            avx512_perm_i_mb<opc, OpcodeStr, sched, VTInfo.info512,
1741                             ShuffleMask.info512>, EVEX_V512;
1742  let Predicates = [HasVLX] in {
1743  defm NAME#128: avx512_perm_i<opc, OpcodeStr, sched, VTInfo.info128,
1744                               ShuffleMask.info128>,
1745                 avx512_perm_i_mb<opc, OpcodeStr, sched, VTInfo.info128,
1746                                  ShuffleMask.info128>, EVEX_V128;
1747  defm NAME#256: avx512_perm_i<opc, OpcodeStr, sched, VTInfo.info256,
1748                               ShuffleMask.info256>,
1749                 avx512_perm_i_mb<opc, OpcodeStr, sched, VTInfo.info256,
1750                                  ShuffleMask.info256>, EVEX_V256;
1751  }
1752}
1753
1754multiclass avx512_perm_i_sizes_bw<bits<8> opc, string OpcodeStr,
1755                                  X86FoldableSchedWrite sched,
1756                                  AVX512VLVectorVTInfo VTInfo,
1757                                  AVX512VLVectorVTInfo Idx,
1758                                  Predicate Prd> {
1759  let Predicates = [Prd] in
1760  defm NAME: avx512_perm_i<opc, OpcodeStr, sched, VTInfo.info512,
1761                           Idx.info512>, EVEX_V512;
1762  let Predicates = [Prd, HasVLX] in {
1763  defm NAME#128: avx512_perm_i<opc, OpcodeStr, sched, VTInfo.info128,
1764                               Idx.info128>, EVEX_V128;
1765  defm NAME#256: avx512_perm_i<opc, OpcodeStr, sched, VTInfo.info256,
1766                               Idx.info256>,  EVEX_V256;
1767  }
1768}
1769
1770defm VPERMI2D  : avx512_perm_i_sizes<0x76, "vpermi2d", WriteVarShuffle256,
1771                  avx512vl_i32_info, avx512vl_i32_info>, EVEX_CD8<32, CD8VF>;
1772defm VPERMI2Q  : avx512_perm_i_sizes<0x76, "vpermi2q", WriteVarShuffle256,
1773                  avx512vl_i64_info, avx512vl_i64_info>, VEX_W, EVEX_CD8<64, CD8VF>;
1774defm VPERMI2W  : avx512_perm_i_sizes_bw<0x75, "vpermi2w", WriteVarShuffle256,
1775                  avx512vl_i16_info, avx512vl_i16_info, HasBWI>,
1776                  VEX_W, EVEX_CD8<16, CD8VF>;
1777defm VPERMI2B  : avx512_perm_i_sizes_bw<0x75, "vpermi2b", WriteVarShuffle256,
1778                  avx512vl_i8_info, avx512vl_i8_info, HasVBMI>,
1779                  EVEX_CD8<8, CD8VF>;
1780defm VPERMI2PS : avx512_perm_i_sizes<0x77, "vpermi2ps", WriteFVarShuffle256,
1781                  avx512vl_f32_info, avx512vl_i32_info>, EVEX_CD8<32, CD8VF>;
1782defm VPERMI2PD : avx512_perm_i_sizes<0x77, "vpermi2pd", WriteFVarShuffle256,
1783                  avx512vl_f64_info, avx512vl_i64_info>, VEX_W, EVEX_CD8<64, CD8VF>;
1784
1785// Extra patterns to deal with extra bitcasts due to passthru and index being
1786// different types on the fp versions.
1787multiclass avx512_perm_i_lowering<string InstrStr, X86VectorVTInfo _,
1788                                  X86VectorVTInfo IdxVT,
1789                                  X86VectorVTInfo CastVT> {
1790  def : Pat<(_.VT (vselect_mask _.KRCWM:$mask,
1791                                (X86VPermt2 (_.VT _.RC:$src2),
1792                                            (IdxVT.VT (bitconvert
1793                                                       (CastVT.VT _.RC:$src1))),
1794                                            _.RC:$src3),
1795                                (_.VT (bitconvert (CastVT.VT _.RC:$src1))))),
1796            (!cast<Instruction>(InstrStr#"rrk") _.RC:$src1, _.KRCWM:$mask,
1797                                                _.RC:$src2, _.RC:$src3)>;
1798  def : Pat<(_.VT (vselect_mask _.KRCWM:$mask,
1799                                (X86VPermt2 _.RC:$src2,
1800                                            (IdxVT.VT (bitconvert
1801                                                       (CastVT.VT _.RC:$src1))),
1802                                            (_.LdFrag addr:$src3)),
1803                                (_.VT (bitconvert  (CastVT.VT _.RC:$src1))))),
1804            (!cast<Instruction>(InstrStr#"rmk") _.RC:$src1, _.KRCWM:$mask,
1805                                                _.RC:$src2, addr:$src3)>;
1806  def : Pat<(_.VT (vselect_mask _.KRCWM:$mask,
1807                                 (X86VPermt2 _.RC:$src2,
1808                                             (IdxVT.VT (bitconvert  (CastVT.VT _.RC:$src1))),
1809                                             (_.BroadcastLdFrag addr:$src3)),
1810                                 (_.VT (bitconvert  (CastVT.VT _.RC:$src1))))),
1811            (!cast<Instruction>(InstrStr#"rmbk") _.RC:$src1, _.KRCWM:$mask,
1812                                                 _.RC:$src2, addr:$src3)>;
1813}
1814
1815// TODO: Should we add more casts? The vXi64 case is common due to ABI.
1816defm : avx512_perm_i_lowering<"VPERMI2PS", v16f32_info, v16i32_info, v8i64_info>;
1817defm : avx512_perm_i_lowering<"VPERMI2PS256", v8f32x_info, v8i32x_info, v4i64x_info>;
1818defm : avx512_perm_i_lowering<"VPERMI2PS128", v4f32x_info, v4i32x_info, v2i64x_info>;
1819
1820// VPERMT2
1821multiclass avx512_perm_t<bits<8> opc, string OpcodeStr,
1822                         X86FoldableSchedWrite sched,
1823                         X86VectorVTInfo _, X86VectorVTInfo IdxVT> {
1824let Constraints = "$src1 = $dst", ExeDomain = _.ExeDomain in {
1825  defm rr: AVX512_maskable_3src<opc, MRMSrcReg, _, (outs _.RC:$dst),
1826          (ins IdxVT.RC:$src2, _.RC:$src3),
1827          OpcodeStr, "$src3, $src2", "$src2, $src3",
1828          (_.VT (X86VPermt2 _.RC:$src1, IdxVT.RC:$src2, _.RC:$src3)), 1>,
1829          EVEX_4V, AVX5128IBase, Sched<[sched]>;
1830
1831  defm rm: AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst),
1832            (ins IdxVT.RC:$src2, _.MemOp:$src3),
1833            OpcodeStr, "$src3, $src2", "$src2, $src3",
1834            (_.VT (X86VPermt2 _.RC:$src1, IdxVT.RC:$src2,
1835                   (_.LdFrag addr:$src3))), 1>,
1836            EVEX_4V, AVX5128IBase, Sched<[sched.Folded, sched.ReadAfterFold]>;
1837  }
1838}
1839multiclass avx512_perm_t_mb<bits<8> opc, string OpcodeStr,
1840                            X86FoldableSchedWrite sched,
1841                            X86VectorVTInfo _, X86VectorVTInfo IdxVT> {
1842  let Constraints = "$src1 = $dst", ExeDomain = _.ExeDomain in
1843  defm rmb: AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst),
1844              (ins IdxVT.RC:$src2, _.ScalarMemOp:$src3),
1845              OpcodeStr,   !strconcat("${src3}", _.BroadcastStr,", $src2"),
1846              !strconcat("$src2, ${src3}", _.BroadcastStr ),
1847              (_.VT (X86VPermt2 _.RC:$src1,
1848               IdxVT.RC:$src2,(_.VT (_.BroadcastLdFrag addr:$src3)))), 1>,
1849              AVX5128IBase, EVEX_4V, EVEX_B,
1850              Sched<[sched.Folded, sched.ReadAfterFold]>;
1851}
1852
1853multiclass avx512_perm_t_sizes<bits<8> opc, string OpcodeStr,
1854                               X86FoldableSchedWrite sched,
1855                               AVX512VLVectorVTInfo VTInfo,
1856                               AVX512VLVectorVTInfo ShuffleMask> {
1857  defm NAME: avx512_perm_t<opc, OpcodeStr, sched, VTInfo.info512,
1858                              ShuffleMask.info512>,
1859            avx512_perm_t_mb<opc, OpcodeStr, sched, VTInfo.info512,
1860                              ShuffleMask.info512>, EVEX_V512;
1861  let Predicates = [HasVLX] in {
1862  defm NAME#128: avx512_perm_t<opc, OpcodeStr, sched, VTInfo.info128,
1863                              ShuffleMask.info128>,
1864                 avx512_perm_t_mb<opc, OpcodeStr, sched, VTInfo.info128,
1865                              ShuffleMask.info128>, EVEX_V128;
1866  defm NAME#256: avx512_perm_t<opc, OpcodeStr, sched, VTInfo.info256,
1867                              ShuffleMask.info256>,
1868                 avx512_perm_t_mb<opc, OpcodeStr, sched, VTInfo.info256,
1869                              ShuffleMask.info256>, EVEX_V256;
1870  }
1871}
1872
1873multiclass avx512_perm_t_sizes_bw<bits<8> opc, string OpcodeStr,
1874                                  X86FoldableSchedWrite sched,
1875                                  AVX512VLVectorVTInfo VTInfo,
1876                                  AVX512VLVectorVTInfo Idx, Predicate Prd> {
1877  let Predicates = [Prd] in
1878  defm NAME: avx512_perm_t<opc, OpcodeStr, sched, VTInfo.info512,
1879                           Idx.info512>, EVEX_V512;
1880  let Predicates = [Prd, HasVLX] in {
1881  defm NAME#128: avx512_perm_t<opc, OpcodeStr, sched, VTInfo.info128,
1882                               Idx.info128>, EVEX_V128;
1883  defm NAME#256: avx512_perm_t<opc, OpcodeStr, sched, VTInfo.info256,
1884                               Idx.info256>, EVEX_V256;
1885  }
1886}
1887
1888defm VPERMT2D  : avx512_perm_t_sizes<0x7E, "vpermt2d", WriteVarShuffle256,
1889                  avx512vl_i32_info, avx512vl_i32_info>, EVEX_CD8<32, CD8VF>;
1890defm VPERMT2Q  : avx512_perm_t_sizes<0x7E, "vpermt2q", WriteVarShuffle256,
1891                  avx512vl_i64_info, avx512vl_i64_info>, VEX_W, EVEX_CD8<64, CD8VF>;
1892defm VPERMT2W  : avx512_perm_t_sizes_bw<0x7D, "vpermt2w", WriteVarShuffle256,
1893                  avx512vl_i16_info, avx512vl_i16_info, HasBWI>,
1894                  VEX_W, EVEX_CD8<16, CD8VF>;
1895defm VPERMT2B  : avx512_perm_t_sizes_bw<0x7D, "vpermt2b", WriteVarShuffle256,
1896                  avx512vl_i8_info, avx512vl_i8_info, HasVBMI>,
1897                  EVEX_CD8<8, CD8VF>;
1898defm VPERMT2PS : avx512_perm_t_sizes<0x7F, "vpermt2ps", WriteFVarShuffle256,
1899                  avx512vl_f32_info, avx512vl_i32_info>, EVEX_CD8<32, CD8VF>;
1900defm VPERMT2PD : avx512_perm_t_sizes<0x7F, "vpermt2pd", WriteFVarShuffle256,
1901                  avx512vl_f64_info, avx512vl_i64_info>, VEX_W, EVEX_CD8<64, CD8VF>;
1902
1903//===----------------------------------------------------------------------===//
1904// AVX-512 - BLEND using mask
1905//
1906
1907multiclass WriteFVarBlendask<bits<8> opc, string OpcodeStr,
1908                             X86FoldableSchedWrite sched, X86VectorVTInfo _> {
1909  let ExeDomain = _.ExeDomain, hasSideEffects = 0 in {
1910  def rr : AVX5128I<opc, MRMSrcReg, (outs _.RC:$dst),
1911             (ins _.RC:$src1, _.RC:$src2),
1912             !strconcat(OpcodeStr,
1913             "\t{$src2, $src1, ${dst}|${dst}, $src1, $src2}"), []>,
1914             EVEX_4V, Sched<[sched]>;
1915  def rrk : AVX5128I<opc, MRMSrcReg, (outs _.RC:$dst),
1916             (ins _.KRCWM:$mask, _.RC:$src1, _.RC:$src2),
1917             !strconcat(OpcodeStr,
1918             "\t{$src2, $src1, ${dst} {${mask}}|${dst} {${mask}}, $src1, $src2}"),
1919             []>, EVEX_4V, EVEX_K, Sched<[sched]>;
1920  def rrkz : AVX5128I<opc, MRMSrcReg, (outs _.RC:$dst),
1921             (ins _.KRCWM:$mask, _.RC:$src1, _.RC:$src2),
1922             !strconcat(OpcodeStr,
1923             "\t{$src2, $src1, ${dst} {${mask}} {z}|${dst} {${mask}} {z}, $src1, $src2}"),
1924             []>, EVEX_4V, EVEX_KZ, Sched<[sched]>, NotMemoryFoldable;
1925  let mayLoad = 1 in {
1926  def rm  : AVX5128I<opc, MRMSrcMem, (outs _.RC:$dst),
1927             (ins _.RC:$src1, _.MemOp:$src2),
1928             !strconcat(OpcodeStr,
1929             "\t{$src2, $src1, ${dst}|${dst}, $src1, $src2}"),
1930             []>, EVEX_4V, EVEX_CD8<_.EltSize, CD8VF>,
1931             Sched<[sched.Folded, sched.ReadAfterFold]>;
1932  def rmk : AVX5128I<opc, MRMSrcMem, (outs _.RC:$dst),
1933             (ins _.KRCWM:$mask, _.RC:$src1, _.MemOp:$src2),
1934             !strconcat(OpcodeStr,
1935             "\t{$src2, $src1, ${dst} {${mask}}|${dst} {${mask}}, $src1, $src2}"),
1936             []>, EVEX_4V, EVEX_K, EVEX_CD8<_.EltSize, CD8VF>,
1937             Sched<[sched.Folded, sched.ReadAfterFold]>;
1938  def rmkz : AVX5128I<opc, MRMSrcMem, (outs _.RC:$dst),
1939             (ins _.KRCWM:$mask, _.RC:$src1, _.MemOp:$src2),
1940             !strconcat(OpcodeStr,
1941             "\t{$src2, $src1, ${dst} {${mask}} {z}|${dst} {${mask}} {z}, $src1, $src2}"),
1942             []>, EVEX_4V, EVEX_KZ, EVEX_CD8<_.EltSize, CD8VF>,
1943             Sched<[sched.Folded, sched.ReadAfterFold]>, NotMemoryFoldable;
1944  }
1945  }
1946}
1947multiclass WriteFVarBlendask_rmb<bits<8> opc, string OpcodeStr,
1948                                 X86FoldableSchedWrite sched, X86VectorVTInfo _> {
1949  let ExeDomain = _.ExeDomain, mayLoad = 1, hasSideEffects = 0 in {
1950  def rmbk : AVX5128I<opc, MRMSrcMem, (outs _.RC:$dst),
1951      (ins _.KRCWM:$mask, _.RC:$src1, _.ScalarMemOp:$src2),
1952       !strconcat(OpcodeStr,
1953            "\t{${src2}", _.BroadcastStr, ", $src1, $dst {${mask}}|",
1954            "$dst {${mask}}, $src1, ${src2}", _.BroadcastStr, "}"), []>,
1955      EVEX_4V, EVEX_K, EVEX_B, EVEX_CD8<_.EltSize, CD8VF>,
1956      Sched<[sched.Folded, sched.ReadAfterFold]>;
1957
1958  def rmbkz : AVX5128I<opc, MRMSrcMem, (outs _.RC:$dst),
1959      (ins _.KRCWM:$mask, _.RC:$src1, _.ScalarMemOp:$src2),
1960       !strconcat(OpcodeStr,
1961            "\t{${src2}", _.BroadcastStr, ", $src1, $dst {${mask}} {z}|",
1962            "$dst {${mask}} {z}, $src1, ${src2}", _.BroadcastStr, "}"), []>,
1963      EVEX_4V, EVEX_KZ, EVEX_B, EVEX_CD8<_.EltSize, CD8VF>,
1964      Sched<[sched.Folded, sched.ReadAfterFold]>, NotMemoryFoldable;
1965
1966  def rmb : AVX5128I<opc, MRMSrcMem, (outs _.RC:$dst),
1967      (ins _.RC:$src1, _.ScalarMemOp:$src2),
1968       !strconcat(OpcodeStr,
1969            "\t{${src2}", _.BroadcastStr, ", $src1, $dst|",
1970            "$dst, $src1, ${src2}", _.BroadcastStr, "}"), []>,
1971      EVEX_4V, EVEX_B, EVEX_CD8<_.EltSize, CD8VF>,
1972      Sched<[sched.Folded, sched.ReadAfterFold]>;
1973  }
1974}
1975
1976multiclass blendmask_dq<bits<8> opc, string OpcodeStr, X86SchedWriteWidths sched,
1977                        AVX512VLVectorVTInfo VTInfo> {
1978  defm Z : WriteFVarBlendask<opc, OpcodeStr, sched.ZMM, VTInfo.info512>,
1979           WriteFVarBlendask_rmb<opc, OpcodeStr, sched.ZMM, VTInfo.info512>,
1980                                 EVEX_V512;
1981
1982  let Predicates = [HasVLX] in {
1983    defm Z256 : WriteFVarBlendask<opc, OpcodeStr, sched.YMM, VTInfo.info256>,
1984                WriteFVarBlendask_rmb<opc, OpcodeStr, sched.YMM, VTInfo.info256>,
1985                                      EVEX_V256;
1986    defm Z128 : WriteFVarBlendask<opc, OpcodeStr, sched.XMM, VTInfo.info128>,
1987                WriteFVarBlendask_rmb<opc, OpcodeStr, sched.XMM, VTInfo.info128>,
1988                                      EVEX_V128;
1989  }
1990}
1991
1992multiclass blendmask_bw<bits<8> opc, string OpcodeStr, X86SchedWriteWidths sched,
1993                        AVX512VLVectorVTInfo VTInfo> {
1994  let Predicates = [HasBWI] in
1995    defm Z : WriteFVarBlendask<opc, OpcodeStr, sched.ZMM, VTInfo.info512>,
1996                               EVEX_V512;
1997
1998  let Predicates = [HasBWI, HasVLX] in {
1999    defm Z256 : WriteFVarBlendask<opc, OpcodeStr, sched.YMM, VTInfo.info256>,
2000                                  EVEX_V256;
2001    defm Z128 : WriteFVarBlendask<opc, OpcodeStr, sched.XMM, VTInfo.info128>,
2002                                  EVEX_V128;
2003  }
2004}
2005
2006defm VBLENDMPS : blendmask_dq<0x65, "vblendmps", SchedWriteFVarBlend,
2007                              avx512vl_f32_info>;
2008defm VBLENDMPD : blendmask_dq<0x65, "vblendmpd", SchedWriteFVarBlend,
2009                              avx512vl_f64_info>, VEX_W;
2010defm VPBLENDMD : blendmask_dq<0x64, "vpblendmd", SchedWriteVarBlend,
2011                              avx512vl_i32_info>;
2012defm VPBLENDMQ : blendmask_dq<0x64, "vpblendmq", SchedWriteVarBlend,
2013                              avx512vl_i64_info>, VEX_W;
2014defm VPBLENDMB : blendmask_bw<0x66, "vpblendmb", SchedWriteVarBlend,
2015                              avx512vl_i8_info>;
2016defm VPBLENDMW : blendmask_bw<0x66, "vpblendmw", SchedWriteVarBlend,
2017                              avx512vl_i16_info>, VEX_W;
2018
2019//===----------------------------------------------------------------------===//
2020// Compare Instructions
2021//===----------------------------------------------------------------------===//
2022
2023// avx512_cmp_scalar - AVX512 CMPSS and CMPSD
2024
2025multiclass avx512_cmp_scalar<X86VectorVTInfo _, SDNode OpNode, SDNode OpNodeSAE,
2026                             PatFrag OpNode_su, PatFrag OpNodeSAE_su,
2027                             X86FoldableSchedWrite sched> {
2028  defm  rr_Int  : AVX512_maskable_cmp<0xC2, MRMSrcReg, _,
2029                      (outs _.KRC:$dst),
2030                      (ins _.RC:$src1, _.RC:$src2, u8imm:$cc),
2031                      "vcmp"#_.Suffix,
2032                      "$cc, $src2, $src1", "$src1, $src2, $cc",
2033                      (OpNode (_.VT _.RC:$src1), (_.VT _.RC:$src2), timm:$cc),
2034                      (OpNode_su (_.VT _.RC:$src1), (_.VT _.RC:$src2),
2035                                 timm:$cc)>, EVEX_4V, VEX_LIG, Sched<[sched]>, SIMD_EXC;
2036  let mayLoad = 1 in
2037  defm  rm_Int  : AVX512_maskable_cmp<0xC2, MRMSrcMem, _,
2038                    (outs _.KRC:$dst),
2039                    (ins _.RC:$src1, _.IntScalarMemOp:$src2, u8imm:$cc),
2040                    "vcmp"#_.Suffix,
2041                    "$cc, $src2, $src1", "$src1, $src2, $cc",
2042                    (OpNode (_.VT _.RC:$src1), (_.ScalarIntMemFrags addr:$src2),
2043                        timm:$cc),
2044                    (OpNode_su (_.VT _.RC:$src1), (_.ScalarIntMemFrags addr:$src2),
2045                        timm:$cc)>, EVEX_4V, VEX_LIG, EVEX_CD8<_.EltSize, CD8VT1>,
2046                    Sched<[sched.Folded, sched.ReadAfterFold]>, SIMD_EXC;
2047
2048  let Uses = [MXCSR] in
2049  defm  rrb_Int  : AVX512_maskable_cmp<0xC2, MRMSrcReg, _,
2050                     (outs _.KRC:$dst),
2051                     (ins _.RC:$src1, _.RC:$src2, u8imm:$cc),
2052                     "vcmp"#_.Suffix,
2053                     "$cc, {sae}, $src2, $src1","$src1, $src2, {sae}, $cc",
2054                     (OpNodeSAE (_.VT _.RC:$src1), (_.VT _.RC:$src2),
2055                                timm:$cc),
2056                     (OpNodeSAE_su (_.VT _.RC:$src1), (_.VT _.RC:$src2),
2057                                   timm:$cc)>,
2058                     EVEX_4V, VEX_LIG, EVEX_B, Sched<[sched]>;
2059
2060  let isCodeGenOnly = 1 in {
2061    let isCommutable = 1 in
2062    def rr : AVX512Ii8<0xC2, MRMSrcReg,
2063                (outs _.KRC:$dst), (ins _.FRC:$src1, _.FRC:$src2, u8imm:$cc),
2064                !strconcat("vcmp", _.Suffix,
2065                           "\t{$cc, $src2, $src1, $dst|$dst, $src1, $src2, $cc}"),
2066                [(set _.KRC:$dst, (OpNode _.FRC:$src1,
2067                                          _.FRC:$src2,
2068                                          timm:$cc))]>,
2069                EVEX_4V, VEX_LIG, Sched<[sched]>, SIMD_EXC;
2070    def rm : AVX512Ii8<0xC2, MRMSrcMem,
2071              (outs _.KRC:$dst),
2072              (ins _.FRC:$src1, _.ScalarMemOp:$src2, u8imm:$cc),
2073              !strconcat("vcmp", _.Suffix,
2074                         "\t{$cc, $src2, $src1, $dst|$dst, $src1, $src2, $cc}"),
2075              [(set _.KRC:$dst, (OpNode _.FRC:$src1,
2076                                        (_.ScalarLdFrag addr:$src2),
2077                                        timm:$cc))]>,
2078              EVEX_4V, VEX_LIG, EVEX_CD8<_.EltSize, CD8VT1>,
2079              Sched<[sched.Folded, sched.ReadAfterFold]>, SIMD_EXC;
2080  }
2081}
2082
2083def X86cmpms_su : PatFrag<(ops node:$src1, node:$src2, node:$cc),
2084                          (X86cmpms node:$src1, node:$src2, node:$cc), [{
2085  return N->hasOneUse();
2086}]>;
2087def X86cmpmsSAE_su : PatFrag<(ops node:$src1, node:$src2, node:$cc),
2088                          (X86cmpmsSAE node:$src1, node:$src2, node:$cc), [{
2089  return N->hasOneUse();
2090}]>;
2091
2092let Predicates = [HasAVX512] in {
2093  let ExeDomain = SSEPackedSingle in
2094  defm VCMPSSZ : avx512_cmp_scalar<f32x_info, X86cmpms, X86cmpmsSAE,
2095                                   X86cmpms_su, X86cmpmsSAE_su,
2096                                   SchedWriteFCmp.Scl>, AVX512XSIi8Base;
2097  let ExeDomain = SSEPackedDouble in
2098  defm VCMPSDZ : avx512_cmp_scalar<f64x_info, X86cmpms, X86cmpmsSAE,
2099                                   X86cmpms_su, X86cmpmsSAE_su,
2100                                   SchedWriteFCmp.Scl>, AVX512XDIi8Base, VEX_W;
2101}
2102
2103multiclass avx512_icmp_packed<bits<8> opc, string OpcodeStr,
2104                              X86FoldableSchedWrite sched,
2105                              X86VectorVTInfo _, bit IsCommutable> {
2106  let isCommutable = IsCommutable, hasSideEffects = 0 in
2107  def rr : AVX512BI<opc, MRMSrcReg,
2108             (outs _.KRC:$dst), (ins _.RC:$src1, _.RC:$src2),
2109             !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
2110             []>, EVEX_4V, Sched<[sched]>;
2111  let mayLoad = 1, hasSideEffects = 0 in
2112  def rm : AVX512BI<opc, MRMSrcMem,
2113             (outs _.KRC:$dst), (ins _.RC:$src1, _.MemOp:$src2),
2114             !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
2115             []>, EVEX_4V, Sched<[sched.Folded, sched.ReadAfterFold]>;
2116  let isCommutable = IsCommutable, hasSideEffects = 0 in
2117  def rrk : AVX512BI<opc, MRMSrcReg,
2118              (outs _.KRC:$dst), (ins _.KRCWM:$mask, _.RC:$src1, _.RC:$src2),
2119              !strconcat(OpcodeStr, "\t{$src2, $src1, $dst {${mask}}|",
2120                          "$dst {${mask}}, $src1, $src2}"),
2121              []>, EVEX_4V, EVEX_K, Sched<[sched]>;
2122  let mayLoad = 1, hasSideEffects = 0 in
2123  def rmk : AVX512BI<opc, MRMSrcMem,
2124              (outs _.KRC:$dst), (ins _.KRCWM:$mask, _.RC:$src1, _.MemOp:$src2),
2125              !strconcat(OpcodeStr, "\t{$src2, $src1, $dst {${mask}}|",
2126                          "$dst {${mask}}, $src1, $src2}"),
2127              []>, EVEX_4V, EVEX_K, Sched<[sched.Folded, sched.ReadAfterFold]>;
2128}
2129
2130multiclass avx512_icmp_packed_rmb<bits<8> opc, string OpcodeStr,
2131                                  X86FoldableSchedWrite sched, X86VectorVTInfo _,
2132                                  bit IsCommutable> :
2133           avx512_icmp_packed<opc, OpcodeStr, sched, _, IsCommutable> {
2134  let mayLoad = 1, hasSideEffects = 0 in {
2135  def rmb : AVX512BI<opc, MRMSrcMem,
2136              (outs _.KRC:$dst), (ins _.RC:$src1, _.ScalarMemOp:$src2),
2137              !strconcat(OpcodeStr, "\t{${src2}", _.BroadcastStr, ", $src1, $dst",
2138                                    "|$dst, $src1, ${src2}", _.BroadcastStr, "}"),
2139              []>, EVEX_4V, EVEX_B, Sched<[sched.Folded, sched.ReadAfterFold]>;
2140  def rmbk : AVX512BI<opc, MRMSrcMem,
2141               (outs _.KRC:$dst), (ins _.KRCWM:$mask, _.RC:$src1,
2142                                       _.ScalarMemOp:$src2),
2143               !strconcat(OpcodeStr,
2144                          "\t{${src2}", _.BroadcastStr, ", $src1, $dst {${mask}}|",
2145                          "$dst {${mask}}, $src1, ${src2}", _.BroadcastStr, "}"),
2146               []>, EVEX_4V, EVEX_K, EVEX_B,
2147               Sched<[sched.Folded, sched.ReadAfterFold]>;
2148  }
2149}
2150
2151multiclass avx512_icmp_packed_vl<bits<8> opc, string OpcodeStr,
2152                                 X86SchedWriteWidths sched,
2153                                 AVX512VLVectorVTInfo VTInfo, Predicate prd,
2154                                 bit IsCommutable = 0> {
2155  let Predicates = [prd] in
2156  defm Z : avx512_icmp_packed<opc, OpcodeStr, sched.ZMM,
2157                              VTInfo.info512, IsCommutable>, EVEX_V512;
2158
2159  let Predicates = [prd, HasVLX] in {
2160    defm Z256 : avx512_icmp_packed<opc, OpcodeStr, sched.YMM,
2161                                   VTInfo.info256, IsCommutable>, EVEX_V256;
2162    defm Z128 : avx512_icmp_packed<opc, OpcodeStr, sched.XMM,
2163                                   VTInfo.info128, IsCommutable>, EVEX_V128;
2164  }
2165}
2166
2167multiclass avx512_icmp_packed_rmb_vl<bits<8> opc, string OpcodeStr,
2168                                     X86SchedWriteWidths sched,
2169                                     AVX512VLVectorVTInfo VTInfo,
2170                                     Predicate prd, bit IsCommutable = 0> {
2171  let Predicates = [prd] in
2172  defm Z : avx512_icmp_packed_rmb<opc, OpcodeStr, sched.ZMM,
2173                                  VTInfo.info512, IsCommutable>, EVEX_V512;
2174
2175  let Predicates = [prd, HasVLX] in {
2176    defm Z256 : avx512_icmp_packed_rmb<opc, OpcodeStr, sched.YMM,
2177                                       VTInfo.info256, IsCommutable>, EVEX_V256;
2178    defm Z128 : avx512_icmp_packed_rmb<opc, OpcodeStr, sched.XMM,
2179                                       VTInfo.info128, IsCommutable>, EVEX_V128;
2180  }
2181}
2182
2183// This fragment treats X86cmpm as commutable to help match loads in both
2184// operands for PCMPEQ.
2185def X86setcc_commute : SDNode<"ISD::SETCC", SDTSetCC, [SDNPCommutative]>;
2186def X86pcmpgtm : PatFrag<(ops node:$src1, node:$src2),
2187                         (setcc node:$src1, node:$src2, SETGT)>;
2188
2189// AddedComplexity is needed because the explicit SETEQ/SETGT CondCode doesn't
2190// increase the pattern complexity the way an immediate would.
2191let AddedComplexity = 2 in {
2192// FIXME: Is there a better scheduler class for VPCMP?
2193defm VPCMPEQB : avx512_icmp_packed_vl<0x74, "vpcmpeqb",
2194                      SchedWriteVecALU, avx512vl_i8_info, HasBWI, 1>,
2195                EVEX_CD8<8, CD8VF>, VEX_WIG;
2196
2197defm VPCMPEQW : avx512_icmp_packed_vl<0x75, "vpcmpeqw",
2198                      SchedWriteVecALU, avx512vl_i16_info, HasBWI, 1>,
2199                EVEX_CD8<16, CD8VF>, VEX_WIG;
2200
2201defm VPCMPEQD : avx512_icmp_packed_rmb_vl<0x76, "vpcmpeqd",
2202                      SchedWriteVecALU, avx512vl_i32_info, HasAVX512, 1>,
2203                EVEX_CD8<32, CD8VF>;
2204
2205defm VPCMPEQQ : avx512_icmp_packed_rmb_vl<0x29, "vpcmpeqq",
2206                      SchedWriteVecALU, avx512vl_i64_info, HasAVX512, 1>,
2207                T8PD, VEX_W, EVEX_CD8<64, CD8VF>;
2208
2209defm VPCMPGTB : avx512_icmp_packed_vl<0x64, "vpcmpgtb",
2210                      SchedWriteVecALU, avx512vl_i8_info, HasBWI>,
2211                EVEX_CD8<8, CD8VF>, VEX_WIG;
2212
2213defm VPCMPGTW : avx512_icmp_packed_vl<0x65, "vpcmpgtw",
2214                      SchedWriteVecALU, avx512vl_i16_info, HasBWI>,
2215                EVEX_CD8<16, CD8VF>, VEX_WIG;
2216
2217defm VPCMPGTD : avx512_icmp_packed_rmb_vl<0x66, "vpcmpgtd",
2218                      SchedWriteVecALU, avx512vl_i32_info, HasAVX512>,
2219                EVEX_CD8<32, CD8VF>;
2220
2221defm VPCMPGTQ : avx512_icmp_packed_rmb_vl<0x37, "vpcmpgtq",
2222                      SchedWriteVecALU, avx512vl_i64_info, HasAVX512>,
2223                T8PD, VEX_W, EVEX_CD8<64, CD8VF>;
2224}
2225
2226multiclass avx512_icmp_cc<bits<8> opc, string Suffix, PatFrag Frag,
2227                          PatFrag Frag_su, PatFrag CommFrag, PatFrag CommFrag_su,
2228                          X86FoldableSchedWrite sched,
2229                          X86VectorVTInfo _, string Name> {
2230  let isCommutable = 1 in
2231  def rri : AVX512AIi8<opc, MRMSrcReg,
2232             (outs _.KRC:$dst), (ins _.RC:$src1, _.RC:$src2, u8imm:$cc),
2233             !strconcat("vpcmp", Suffix,
2234                        "\t{$cc, $src2, $src1, $dst|$dst, $src1, $src2, $cc}"),
2235             [(set _.KRC:$dst, (_.KVT (Frag:$cc (_.VT _.RC:$src1),
2236                                                (_.VT _.RC:$src2),
2237                                                cond)))]>,
2238             EVEX_4V, Sched<[sched]>;
2239  def rmi : AVX512AIi8<opc, MRMSrcMem,
2240             (outs _.KRC:$dst), (ins _.RC:$src1, _.MemOp:$src2, u8imm:$cc),
2241             !strconcat("vpcmp", Suffix,
2242                        "\t{$cc, $src2, $src1, $dst|$dst, $src1, $src2, $cc}"),
2243             [(set _.KRC:$dst, (_.KVT
2244                                (Frag:$cc
2245                                 (_.VT _.RC:$src1),
2246                                 (_.VT (_.LdFrag addr:$src2)),
2247                                 cond)))]>,
2248             EVEX_4V, Sched<[sched.Folded, sched.ReadAfterFold]>;
2249  let isCommutable = 1 in
2250  def rrik : AVX512AIi8<opc, MRMSrcReg,
2251              (outs _.KRC:$dst), (ins _.KRCWM:$mask, _.RC:$src1, _.RC:$src2,
2252                                      u8imm:$cc),
2253              !strconcat("vpcmp", Suffix,
2254                         "\t{$cc, $src2, $src1, $dst {${mask}}|",
2255                         "$dst {${mask}}, $src1, $src2, $cc}"),
2256              [(set _.KRC:$dst, (and _.KRCWM:$mask,
2257                                     (_.KVT (Frag_su:$cc (_.VT _.RC:$src1),
2258                                                         (_.VT _.RC:$src2),
2259                                                         cond))))]>,
2260              EVEX_4V, EVEX_K, Sched<[sched]>;
2261  def rmik : AVX512AIi8<opc, MRMSrcMem,
2262              (outs _.KRC:$dst), (ins _.KRCWM:$mask, _.RC:$src1, _.MemOp:$src2,
2263                                    u8imm:$cc),
2264              !strconcat("vpcmp", Suffix,
2265                         "\t{$cc, $src2, $src1, $dst {${mask}}|",
2266                         "$dst {${mask}}, $src1, $src2, $cc}"),
2267              [(set _.KRC:$dst, (and _.KRCWM:$mask,
2268                                     (_.KVT
2269                                      (Frag_su:$cc
2270                                       (_.VT _.RC:$src1),
2271                                       (_.VT (_.LdFrag addr:$src2)),
2272                                       cond))))]>,
2273              EVEX_4V, EVEX_K, Sched<[sched.Folded, sched.ReadAfterFold]>;
2274
2275  def : Pat<(_.KVT (CommFrag:$cc (_.LdFrag addr:$src2),
2276                                 (_.VT _.RC:$src1), cond)),
2277            (!cast<Instruction>(Name#_.ZSuffix#"rmi")
2278             _.RC:$src1, addr:$src2, (CommFrag.OperandTransform $cc))>;
2279
2280  def : Pat<(and _.KRCWM:$mask,
2281                 (_.KVT (CommFrag_su:$cc (_.LdFrag addr:$src2),
2282                                      (_.VT _.RC:$src1), cond))),
2283            (!cast<Instruction>(Name#_.ZSuffix#"rmik")
2284             _.KRCWM:$mask, _.RC:$src1, addr:$src2,
2285             (CommFrag.OperandTransform $cc))>;
2286}
2287
2288multiclass avx512_icmp_cc_rmb<bits<8> opc, string Suffix, PatFrag Frag,
2289                              PatFrag Frag_su, PatFrag CommFrag,
2290                              PatFrag CommFrag_su, X86FoldableSchedWrite sched,
2291                              X86VectorVTInfo _, string Name> :
2292           avx512_icmp_cc<opc, Suffix, Frag, Frag_su, CommFrag, CommFrag_su,
2293                          sched, _, Name> {
2294  def rmib : AVX512AIi8<opc, MRMSrcMem,
2295             (outs _.KRC:$dst), (ins _.RC:$src1, _.ScalarMemOp:$src2,
2296                                     u8imm:$cc),
2297             !strconcat("vpcmp", Suffix,
2298                        "\t{$cc, ${src2}", _.BroadcastStr, ", $src1, $dst|",
2299                        "$dst, $src1, ${src2}", _.BroadcastStr, ", $cc}"),
2300             [(set _.KRC:$dst, (_.KVT (Frag:$cc
2301                                       (_.VT _.RC:$src1),
2302                                       (_.BroadcastLdFrag addr:$src2),
2303                                       cond)))]>,
2304             EVEX_4V, EVEX_B, Sched<[sched.Folded, sched.ReadAfterFold]>;
2305  def rmibk : AVX512AIi8<opc, MRMSrcMem,
2306              (outs _.KRC:$dst), (ins _.KRCWM:$mask, _.RC:$src1,
2307                                       _.ScalarMemOp:$src2, u8imm:$cc),
2308              !strconcat("vpcmp", Suffix,
2309                  "\t{$cc, ${src2}", _.BroadcastStr, ", $src1, $dst {${mask}}|",
2310                  "$dst {${mask}}, $src1, ${src2}", _.BroadcastStr, ", $cc}"),
2311              [(set _.KRC:$dst, (and _.KRCWM:$mask,
2312                                     (_.KVT (Frag_su:$cc
2313                                             (_.VT _.RC:$src1),
2314                                             (_.BroadcastLdFrag addr:$src2),
2315                                             cond))))]>,
2316              EVEX_4V, EVEX_K, EVEX_B, Sched<[sched.Folded, sched.ReadAfterFold]>;
2317
2318  def : Pat<(_.KVT (CommFrag:$cc (_.BroadcastLdFrag addr:$src2),
2319                    (_.VT _.RC:$src1), cond)),
2320            (!cast<Instruction>(Name#_.ZSuffix#"rmib")
2321             _.RC:$src1, addr:$src2, (CommFrag.OperandTransform $cc))>;
2322
2323  def : Pat<(and _.KRCWM:$mask,
2324                 (_.KVT (CommFrag_su:$cc (_.BroadcastLdFrag addr:$src2),
2325                                      (_.VT _.RC:$src1), cond))),
2326            (!cast<Instruction>(Name#_.ZSuffix#"rmibk")
2327             _.KRCWM:$mask, _.RC:$src1, addr:$src2,
2328             (CommFrag_su.OperandTransform $cc))>;
2329}
2330
2331multiclass avx512_icmp_cc_vl<bits<8> opc, string Suffix, PatFrag Frag,
2332                             PatFrag Frag_su, PatFrag CommFrag,
2333                             PatFrag CommFrag_su, X86SchedWriteWidths sched,
2334                             AVX512VLVectorVTInfo VTInfo, Predicate prd> {
2335  let Predicates = [prd] in
2336  defm Z : avx512_icmp_cc<opc, Suffix, Frag, Frag_su, CommFrag, CommFrag_su,
2337                          sched.ZMM, VTInfo.info512, NAME>, EVEX_V512;
2338
2339  let Predicates = [prd, HasVLX] in {
2340    defm Z256 : avx512_icmp_cc<opc, Suffix, Frag, Frag_su, CommFrag, CommFrag_su,
2341                               sched.YMM, VTInfo.info256, NAME>, EVEX_V256;
2342    defm Z128 : avx512_icmp_cc<opc, Suffix, Frag, Frag_su, CommFrag, CommFrag_su,
2343                               sched.XMM, VTInfo.info128, NAME>, EVEX_V128;
2344  }
2345}
2346
2347multiclass avx512_icmp_cc_rmb_vl<bits<8> opc, string Suffix, PatFrag Frag,
2348                                 PatFrag Frag_su, PatFrag CommFrag,
2349                                 PatFrag CommFrag_su, X86SchedWriteWidths sched,
2350                                 AVX512VLVectorVTInfo VTInfo, Predicate prd> {
2351  let Predicates = [prd] in
2352  defm Z : avx512_icmp_cc_rmb<opc, Suffix, Frag, Frag_su, CommFrag, CommFrag_su,
2353                              sched.ZMM, VTInfo.info512, NAME>, EVEX_V512;
2354
2355  let Predicates = [prd, HasVLX] in {
2356    defm Z256 : avx512_icmp_cc_rmb<opc, Suffix, Frag, Frag_su, CommFrag, CommFrag_su,
2357                                   sched.YMM, VTInfo.info256, NAME>, EVEX_V256;
2358    defm Z128 : avx512_icmp_cc_rmb<opc, Suffix, Frag, Frag_su, CommFrag, CommFrag_su,
2359                                   sched.XMM, VTInfo.info128, NAME>, EVEX_V128;
2360  }
2361}
2362
2363def X86pcmpm_imm : SDNodeXForm<setcc, [{
2364  ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(2))->get();
2365  uint8_t SSECC = X86::getVPCMPImmForCond(CC);
2366  return getI8Imm(SSECC, SDLoc(N));
2367}]>;
2368
2369// Swapped operand version of the above.
2370def X86pcmpm_imm_commute : SDNodeXForm<setcc, [{
2371  ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(2))->get();
2372  uint8_t SSECC = X86::getVPCMPImmForCond(CC);
2373  SSECC = X86::getSwappedVPCMPImm(SSECC);
2374  return getI8Imm(SSECC, SDLoc(N));
2375}]>;
2376
2377def X86pcmpm : PatFrag<(ops node:$src1, node:$src2, node:$cc),
2378                       (setcc node:$src1, node:$src2, node:$cc), [{
2379  ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(2))->get();
2380  return !ISD::isUnsignedIntSetCC(CC);
2381}], X86pcmpm_imm>;
2382
2383def X86pcmpm_su : PatFrag<(ops node:$src1, node:$src2, node:$cc),
2384                          (setcc node:$src1, node:$src2, node:$cc), [{
2385  ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(2))->get();
2386  return N->hasOneUse() && !ISD::isUnsignedIntSetCC(CC);
2387}], X86pcmpm_imm>;
2388
2389// Same as above, but commutes immediate. Use for load folding.
2390def X86pcmpm_commute : PatFrag<(ops node:$src1, node:$src2, node:$cc),
2391                               (setcc node:$src1, node:$src2, node:$cc), [{
2392  ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(2))->get();
2393  return !ISD::isUnsignedIntSetCC(CC);
2394}], X86pcmpm_imm_commute>;
2395
2396def X86pcmpm_commute_su : PatFrag<(ops node:$src1, node:$src2, node:$cc),
2397                                  (setcc node:$src1, node:$src2, node:$cc), [{
2398  ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(2))->get();
2399  return N->hasOneUse() && !ISD::isUnsignedIntSetCC(CC);
2400}], X86pcmpm_imm_commute>;
2401
2402def X86pcmpum : PatFrag<(ops node:$src1, node:$src2, node:$cc),
2403                        (setcc node:$src1, node:$src2, node:$cc), [{
2404  ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(2))->get();
2405  return ISD::isUnsignedIntSetCC(CC);
2406}], X86pcmpm_imm>;
2407
2408def X86pcmpum_su : PatFrag<(ops node:$src1, node:$src2, node:$cc),
2409                           (setcc node:$src1, node:$src2, node:$cc), [{
2410  ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(2))->get();
2411  return N->hasOneUse() && ISD::isUnsignedIntSetCC(CC);
2412}], X86pcmpm_imm>;
2413
2414// Same as above, but commutes immediate. Use for load folding.
2415def X86pcmpum_commute : PatFrag<(ops node:$src1, node:$src2, node:$cc),
2416                                (setcc node:$src1, node:$src2, node:$cc), [{
2417  ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(2))->get();
2418  return ISD::isUnsignedIntSetCC(CC);
2419}], X86pcmpm_imm_commute>;
2420
2421def X86pcmpum_commute_su : PatFrag<(ops node:$src1, node:$src2, node:$cc),
2422                                   (setcc node:$src1, node:$src2, node:$cc), [{
2423  ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(2))->get();
2424  return N->hasOneUse() && ISD::isUnsignedIntSetCC(CC);
2425}], X86pcmpm_imm_commute>;
2426
2427// FIXME: Is there a better scheduler class for VPCMP/VPCMPU?
2428defm VPCMPB : avx512_icmp_cc_vl<0x3F, "b", X86pcmpm, X86pcmpm_su,
2429                                X86pcmpm_commute, X86pcmpm_commute_su,
2430                                SchedWriteVecALU, avx512vl_i8_info, HasBWI>,
2431                                EVEX_CD8<8, CD8VF>;
2432defm VPCMPUB : avx512_icmp_cc_vl<0x3E, "ub", X86pcmpum, X86pcmpum_su,
2433                                 X86pcmpum_commute, X86pcmpum_commute_su,
2434                                 SchedWriteVecALU, avx512vl_i8_info, HasBWI>,
2435                                 EVEX_CD8<8, CD8VF>;
2436
2437defm VPCMPW : avx512_icmp_cc_vl<0x3F, "w", X86pcmpm, X86pcmpm_su,
2438                                X86pcmpm_commute, X86pcmpm_commute_su,
2439                                SchedWriteVecALU, avx512vl_i16_info, HasBWI>,
2440                                VEX_W, EVEX_CD8<16, CD8VF>;
2441defm VPCMPUW : avx512_icmp_cc_vl<0x3E, "uw", X86pcmpum, X86pcmpum_su,
2442                                 X86pcmpum_commute, X86pcmpum_commute_su,
2443                                 SchedWriteVecALU, avx512vl_i16_info, HasBWI>,
2444                                 VEX_W, EVEX_CD8<16, CD8VF>;
2445
2446defm VPCMPD : avx512_icmp_cc_rmb_vl<0x1F, "d", X86pcmpm, X86pcmpm_su,
2447                                    X86pcmpm_commute, X86pcmpm_commute_su,
2448                                    SchedWriteVecALU, avx512vl_i32_info,
2449                                    HasAVX512>, EVEX_CD8<32, CD8VF>;
2450defm VPCMPUD : avx512_icmp_cc_rmb_vl<0x1E, "ud", X86pcmpum, X86pcmpum_su,
2451                                     X86pcmpum_commute, X86pcmpum_commute_su,
2452                                     SchedWriteVecALU, avx512vl_i32_info,
2453                                     HasAVX512>, EVEX_CD8<32, CD8VF>;
2454
2455defm VPCMPQ : avx512_icmp_cc_rmb_vl<0x1F, "q", X86pcmpm, X86pcmpm_su,
2456                                    X86pcmpm_commute, X86pcmpm_commute_su,
2457                                    SchedWriteVecALU, avx512vl_i64_info,
2458                                    HasAVX512>, VEX_W, EVEX_CD8<64, CD8VF>;
2459defm VPCMPUQ : avx512_icmp_cc_rmb_vl<0x1E, "uq", X86pcmpum, X86pcmpum_su,
2460                                     X86pcmpum_commute, X86pcmpum_commute_su,
2461                                     SchedWriteVecALU, avx512vl_i64_info,
2462                                     HasAVX512>, VEX_W, EVEX_CD8<64, CD8VF>;
2463
2464def X86cmpm_su : PatFrag<(ops node:$src1, node:$src2, node:$cc),
2465                         (X86cmpm node:$src1, node:$src2, node:$cc), [{
2466  return N->hasOneUse();
2467}]>;
2468
2469def X86cmpm_imm_commute : SDNodeXForm<timm, [{
2470  uint8_t Imm = X86::getSwappedVCMPImm(N->getZExtValue() & 0x1f);
2471  return getI8Imm(Imm, SDLoc(N));
2472}]>;
2473
2474multiclass avx512_vcmp_common<X86FoldableSchedWrite sched, X86VectorVTInfo _,
2475                              string Name> {
2476let Uses = [MXCSR], mayRaiseFPException = 1 in {
2477  defm  rri  : AVX512_maskable_cmp<0xC2, MRMSrcReg, _,
2478                   (outs _.KRC:$dst), (ins _.RC:$src1, _.RC:$src2,u8imm:$cc),
2479                   "vcmp"#_.Suffix,
2480                   "$cc, $src2, $src1", "$src1, $src2, $cc",
2481                   (X86any_cmpm (_.VT _.RC:$src1), (_.VT _.RC:$src2), timm:$cc),
2482                   (X86cmpm_su (_.VT _.RC:$src1), (_.VT _.RC:$src2), timm:$cc),
2483                   1>, Sched<[sched]>;
2484
2485  defm  rmi  : AVX512_maskable_cmp<0xC2, MRMSrcMem, _,
2486                (outs _.KRC:$dst),(ins _.RC:$src1, _.MemOp:$src2, u8imm:$cc),
2487                "vcmp"#_.Suffix,
2488                "$cc, $src2, $src1", "$src1, $src2, $cc",
2489                (X86any_cmpm (_.VT _.RC:$src1), (_.VT (_.LdFrag addr:$src2)),
2490                             timm:$cc),
2491                (X86cmpm_su (_.VT _.RC:$src1), (_.VT (_.LdFrag addr:$src2)),
2492                            timm:$cc)>,
2493                Sched<[sched.Folded, sched.ReadAfterFold]>;
2494
2495  defm  rmbi : AVX512_maskable_cmp<0xC2, MRMSrcMem, _,
2496                (outs _.KRC:$dst),
2497                (ins _.RC:$src1, _.ScalarMemOp:$src2, u8imm:$cc),
2498                "vcmp"#_.Suffix,
2499                "$cc, ${src2}"#_.BroadcastStr#", $src1",
2500                "$src1, ${src2}"#_.BroadcastStr#", $cc",
2501                (X86any_cmpm (_.VT _.RC:$src1),
2502                             (_.VT (_.BroadcastLdFrag addr:$src2)),
2503                             timm:$cc),
2504                (X86cmpm_su (_.VT _.RC:$src1),
2505                            (_.VT (_.BroadcastLdFrag addr:$src2)),
2506                            timm:$cc)>,
2507                EVEX_B, Sched<[sched.Folded, sched.ReadAfterFold]>;
2508  }
2509
2510  // Patterns for selecting with loads in other operand.
2511  def : Pat<(X86any_cmpm (_.LdFrag addr:$src2), (_.VT _.RC:$src1),
2512                         timm:$cc),
2513            (!cast<Instruction>(Name#_.ZSuffix#"rmi") _.RC:$src1, addr:$src2,
2514                                                      (X86cmpm_imm_commute timm:$cc))>;
2515
2516  def : Pat<(and _.KRCWM:$mask, (X86cmpm_su (_.LdFrag addr:$src2),
2517                                            (_.VT _.RC:$src1),
2518                                            timm:$cc)),
2519            (!cast<Instruction>(Name#_.ZSuffix#"rmik") _.KRCWM:$mask,
2520                                                       _.RC:$src1, addr:$src2,
2521                                                       (X86cmpm_imm_commute timm:$cc))>;
2522
2523  def : Pat<(X86any_cmpm (_.BroadcastLdFrag addr:$src2),
2524                         (_.VT _.RC:$src1), timm:$cc),
2525            (!cast<Instruction>(Name#_.ZSuffix#"rmbi") _.RC:$src1, addr:$src2,
2526                                                       (X86cmpm_imm_commute timm:$cc))>;
2527
2528  def : Pat<(and _.KRCWM:$mask, (X86cmpm_su (_.BroadcastLdFrag addr:$src2),
2529                                            (_.VT _.RC:$src1),
2530                                            timm:$cc)),
2531            (!cast<Instruction>(Name#_.ZSuffix#"rmbik") _.KRCWM:$mask,
2532                                                        _.RC:$src1, addr:$src2,
2533                                                        (X86cmpm_imm_commute timm:$cc))>;
2534
2535  // Patterns for mask intrinsics.
2536  def : Pat<(X86cmpmm (_.VT _.RC:$src1), (_.VT _.RC:$src2), timm:$cc,
2537                      (_.KVT immAllOnesV)),
2538            (!cast<Instruction>(Name#_.ZSuffix#"rri") _.RC:$src1, _.RC:$src2, timm:$cc)>;
2539
2540  def : Pat<(X86cmpmm (_.VT _.RC:$src1), (_.VT _.RC:$src2), timm:$cc, _.KRCWM:$mask),
2541            (!cast<Instruction>(Name#_.ZSuffix#"rrik") _.KRCWM:$mask, _.RC:$src1,
2542                                                       _.RC:$src2, timm:$cc)>;
2543
2544  def : Pat<(X86cmpmm (_.VT _.RC:$src1), (_.VT (_.LdFrag addr:$src2)), timm:$cc,
2545                      (_.KVT immAllOnesV)),
2546            (!cast<Instruction>(Name#_.ZSuffix#"rmi") _.RC:$src1, addr:$src2, timm:$cc)>;
2547
2548  def : Pat<(X86cmpmm (_.VT _.RC:$src1), (_.VT (_.LdFrag addr:$src2)), timm:$cc,
2549                      _.KRCWM:$mask),
2550            (!cast<Instruction>(Name#_.ZSuffix#"rmik") _.KRCWM:$mask, _.RC:$src1,
2551                                                       addr:$src2, timm:$cc)>;
2552
2553  def : Pat<(X86cmpmm (_.VT _.RC:$src1), (_.VT (_.BroadcastLdFrag addr:$src2)), timm:$cc,
2554                      (_.KVT immAllOnesV)),
2555            (!cast<Instruction>(Name#_.ZSuffix#"rmbi") _.RC:$src1, addr:$src2, timm:$cc)>;
2556
2557  def : Pat<(X86cmpmm (_.VT _.RC:$src1), (_.VT (_.BroadcastLdFrag addr:$src2)), timm:$cc,
2558                      _.KRCWM:$mask),
2559            (!cast<Instruction>(Name#_.ZSuffix#"rmbik") _.KRCWM:$mask, _.RC:$src1,
2560                                                        addr:$src2, timm:$cc)>;
2561
2562  // Patterns for mask intrinsics with loads in other operand.
2563  def : Pat<(X86cmpmm (_.VT (_.LdFrag addr:$src2)), (_.VT _.RC:$src1), timm:$cc,
2564                      (_.KVT immAllOnesV)),
2565            (!cast<Instruction>(Name#_.ZSuffix#"rmi") _.RC:$src1, addr:$src2,
2566                                                      (X86cmpm_imm_commute timm:$cc))>;
2567
2568  def : Pat<(X86cmpmm (_.VT (_.LdFrag addr:$src2)), (_.VT _.RC:$src1), timm:$cc,
2569                      _.KRCWM:$mask),
2570            (!cast<Instruction>(Name#_.ZSuffix#"rmik") _.KRCWM:$mask,
2571                                                       _.RC:$src1, addr:$src2,
2572                                                       (X86cmpm_imm_commute timm:$cc))>;
2573
2574  def : Pat<(X86cmpmm (_.VT (_.BroadcastLdFrag addr:$src2)), (_.VT _.RC:$src1), timm:$cc,
2575                      (_.KVT immAllOnesV)),
2576            (!cast<Instruction>(Name#_.ZSuffix#"rmbi") _.RC:$src1, addr:$src2,
2577                                                       (X86cmpm_imm_commute timm:$cc))>;
2578
2579  def : Pat<(X86cmpmm (_.VT (_.BroadcastLdFrag addr:$src2)), (_.VT _.RC:$src1), timm:$cc,
2580                      _.KRCWM:$mask),
2581            (!cast<Instruction>(Name#_.ZSuffix#"rmbik") _.KRCWM:$mask,
2582                                                        _.RC:$src1, addr:$src2,
2583                                                        (X86cmpm_imm_commute  timm:$cc))>;
2584}
2585
2586multiclass avx512_vcmp_sae<X86FoldableSchedWrite sched, X86VectorVTInfo _> {
2587  // comparison code form (VCMP[EQ/LT/LE/...]
2588  let Uses = [MXCSR] in
2589  defm  rrib  : AVX512_maskable_custom_cmp<0xC2, MRMSrcReg, (outs _.KRC:$dst),
2590                     (ins _.RC:$src1, _.RC:$src2, u8imm:$cc),
2591                     (ins _.KRCWM:$mask, _.RC:$src1, _.RC:$src2, u8imm:$cc),
2592                     "vcmp"#_.Suffix,
2593                     "$cc, {sae}, $src2, $src1",
2594                     "$src1, $src2, {sae}, $cc",
2595                     [(set _.KRC:$dst, (X86cmpmmSAE (_.VT _.RC:$src1),
2596                                        (_.VT _.RC:$src2), timm:$cc, (_.KVT immAllOnesV)))],
2597                     [(set _.KRC:$dst, (X86cmpmmSAE (_.VT _.RC:$src1),
2598                                        (_.VT _.RC:$src2), timm:$cc, _.KRCWM:$mask))]>,
2599                     EVEX_B, Sched<[sched]>;
2600}
2601
2602multiclass avx512_vcmp<X86SchedWriteWidths sched, AVX512VLVectorVTInfo _> {
2603  let Predicates = [HasAVX512] in {
2604    defm Z    : avx512_vcmp_common<sched.ZMM, _.info512, NAME>,
2605                avx512_vcmp_sae<sched.ZMM, _.info512>, EVEX_V512;
2606
2607  }
2608  let Predicates = [HasAVX512,HasVLX] in {
2609   defm Z128 : avx512_vcmp_common<sched.XMM, _.info128, NAME>, EVEX_V128;
2610   defm Z256 : avx512_vcmp_common<sched.YMM, _.info256, NAME>, EVEX_V256;
2611  }
2612}
2613
2614defm VCMPPD : avx512_vcmp<SchedWriteFCmp, avx512vl_f64_info>,
2615                          AVX512PDIi8Base, EVEX_4V, EVEX_CD8<64, CD8VF>, VEX_W;
2616defm VCMPPS : avx512_vcmp<SchedWriteFCmp, avx512vl_f32_info>,
2617                          AVX512PSIi8Base, EVEX_4V, EVEX_CD8<32, CD8VF>;
2618
2619// Patterns to select fp compares with load as first operand.
2620let Predicates = [HasAVX512] in {
2621  def : Pat<(v1i1 (X86cmpms (loadf64 addr:$src2), FR64X:$src1,
2622                            timm:$cc)),
2623            (VCMPSDZrm FR64X:$src1, addr:$src2, (X86cmpm_imm_commute timm:$cc))>;
2624
2625  def : Pat<(v1i1 (X86cmpms (loadf32 addr:$src2), FR32X:$src1,
2626                            timm:$cc)),
2627            (VCMPSSZrm FR32X:$src1, addr:$src2, (X86cmpm_imm_commute timm:$cc))>;
2628}
2629
2630// ----------------------------------------------------------------
2631// FPClass
2632
2633def X86Vfpclasss_su : PatFrag<(ops node:$src1, node:$src2),
2634                              (X86Vfpclasss node:$src1, node:$src2), [{
2635  return N->hasOneUse();
2636}]>;
2637
2638def X86Vfpclass_su : PatFrag<(ops node:$src1, node:$src2),
2639                             (X86Vfpclass node:$src1, node:$src2), [{
2640  return N->hasOneUse();
2641}]>;
2642
2643//handle fpclass instruction  mask =  op(reg_scalar,imm)
2644//                                    op(mem_scalar,imm)
2645multiclass avx512_scalar_fpclass<bits<8> opc, string OpcodeStr,
2646                                 X86FoldableSchedWrite sched, X86VectorVTInfo _,
2647                                 Predicate prd> {
2648  let Predicates = [prd], ExeDomain = _.ExeDomain, Uses = [MXCSR] in {
2649      def rr : AVX512<opc, MRMSrcReg, (outs _.KRC:$dst),
2650                      (ins _.RC:$src1, i32u8imm:$src2),
2651                      OpcodeStr#_.Suffix#"\t{$src2, $src1, $dst|$dst, $src1, $src2}",
2652                      [(set _.KRC:$dst,(X86Vfpclasss (_.VT _.RC:$src1),
2653                              (i32 timm:$src2)))]>,
2654                      Sched<[sched]>;
2655      def rrk : AVX512<opc, MRMSrcReg, (outs _.KRC:$dst),
2656                      (ins _.KRCWM:$mask, _.RC:$src1, i32u8imm:$src2),
2657                      OpcodeStr#_.Suffix#
2658                      "\t{$src2, $src1, $dst {${mask}}|$dst {${mask}}, $src1, $src2}",
2659                      [(set _.KRC:$dst,(and _.KRCWM:$mask,
2660                                      (X86Vfpclasss_su (_.VT _.RC:$src1),
2661                                      (i32 timm:$src2))))]>,
2662                      EVEX_K, Sched<[sched]>;
2663    def rm : AVX512<opc, MRMSrcMem, (outs _.KRC:$dst),
2664                    (ins _.IntScalarMemOp:$src1, i32u8imm:$src2),
2665                    OpcodeStr#_.Suffix#
2666                              "\t{$src2, $src1, $dst|$dst, $src1, $src2}",
2667                    [(set _.KRC:$dst,
2668                          (X86Vfpclasss (_.ScalarIntMemFrags addr:$src1),
2669                                        (i32 timm:$src2)))]>,
2670                    Sched<[sched.Folded, sched.ReadAfterFold]>;
2671    def rmk : AVX512<opc, MRMSrcMem, (outs _.KRC:$dst),
2672                    (ins _.KRCWM:$mask, _.IntScalarMemOp:$src1, i32u8imm:$src2),
2673                    OpcodeStr#_.Suffix#
2674                    "\t{$src2, $src1, $dst {${mask}}|$dst {${mask}}, $src1, $src2}",
2675                    [(set _.KRC:$dst,(and _.KRCWM:$mask,
2676                        (X86Vfpclasss_su (_.ScalarIntMemFrags addr:$src1),
2677                            (i32 timm:$src2))))]>,
2678                    EVEX_K, Sched<[sched.Folded, sched.ReadAfterFold]>;
2679  }
2680}
2681
2682//handle fpclass instruction mask = fpclass(reg_vec, reg_vec, imm)
2683//                                  fpclass(reg_vec, mem_vec, imm)
2684//                                  fpclass(reg_vec, broadcast(eltVt), imm)
2685multiclass avx512_vector_fpclass<bits<8> opc, string OpcodeStr,
2686                                 X86FoldableSchedWrite sched, X86VectorVTInfo _,
2687                                 string mem>{
2688  let ExeDomain = _.ExeDomain, Uses = [MXCSR] in {
2689  def rr : AVX512<opc, MRMSrcReg, (outs _.KRC:$dst),
2690                      (ins _.RC:$src1, i32u8imm:$src2),
2691                      OpcodeStr#_.Suffix#"\t{$src2, $src1, $dst|$dst, $src1, $src2}",
2692                      [(set _.KRC:$dst,(X86Vfpclass (_.VT _.RC:$src1),
2693                                       (i32 timm:$src2)))]>,
2694                      Sched<[sched]>;
2695  def rrk : AVX512<opc, MRMSrcReg, (outs _.KRC:$dst),
2696                      (ins _.KRCWM:$mask, _.RC:$src1, i32u8imm:$src2),
2697                      OpcodeStr#_.Suffix#
2698                      "\t{$src2, $src1, $dst {${mask}}|$dst {${mask}}, $src1, $src2}",
2699                      [(set _.KRC:$dst,(and _.KRCWM:$mask,
2700                                       (X86Vfpclass_su (_.VT _.RC:$src1),
2701                                       (i32 timm:$src2))))]>,
2702                      EVEX_K, Sched<[sched]>;
2703  def rm : AVX512<opc, MRMSrcMem, (outs _.KRC:$dst),
2704                    (ins _.MemOp:$src1, i32u8imm:$src2),
2705                    OpcodeStr#_.Suffix#"{"#mem#"}"#
2706                    "\t{$src2, $src1, $dst|$dst, $src1, $src2}",
2707                    [(set _.KRC:$dst,(X86Vfpclass
2708                                     (_.VT (_.LdFrag addr:$src1)),
2709                                     (i32 timm:$src2)))]>,
2710                    Sched<[sched.Folded, sched.ReadAfterFold]>;
2711  def rmk : AVX512<opc, MRMSrcMem, (outs _.KRC:$dst),
2712                    (ins _.KRCWM:$mask, _.MemOp:$src1, i32u8imm:$src2),
2713                    OpcodeStr#_.Suffix#"{"#mem#"}"#
2714                    "\t{$src2, $src1, $dst {${mask}}|$dst {${mask}}, $src1, $src2}",
2715                    [(set _.KRC:$dst, (and _.KRCWM:$mask, (X86Vfpclass_su
2716                                  (_.VT (_.LdFrag addr:$src1)),
2717                                  (i32 timm:$src2))))]>,
2718                    EVEX_K, Sched<[sched.Folded, sched.ReadAfterFold]>;
2719  def rmb : AVX512<opc, MRMSrcMem, (outs _.KRC:$dst),
2720                    (ins _.ScalarMemOp:$src1, i32u8imm:$src2),
2721                    OpcodeStr#_.Suffix#"\t{$src2, ${src1}"#
2722                                      _.BroadcastStr#", $dst|$dst, ${src1}"
2723                                                  #_.BroadcastStr#", $src2}",
2724                    [(set _.KRC:$dst,(X86Vfpclass
2725                                     (_.VT (_.BroadcastLdFrag addr:$src1)),
2726                                     (i32 timm:$src2)))]>,
2727                    EVEX_B, Sched<[sched.Folded, sched.ReadAfterFold]>;
2728  def rmbk : AVX512<opc, MRMSrcMem, (outs _.KRC:$dst),
2729                    (ins _.KRCWM:$mask, _.ScalarMemOp:$src1, i32u8imm:$src2),
2730                    OpcodeStr#_.Suffix#"\t{$src2, ${src1}"#
2731                          _.BroadcastStr#", $dst {${mask}}|$dst {${mask}}, ${src1}"#
2732                                                   _.BroadcastStr#", $src2}",
2733                    [(set _.KRC:$dst,(and _.KRCWM:$mask, (X86Vfpclass_su
2734                                     (_.VT (_.BroadcastLdFrag addr:$src1)),
2735                                     (i32 timm:$src2))))]>,
2736                    EVEX_B, EVEX_K,  Sched<[sched.Folded, sched.ReadAfterFold]>;
2737  }
2738
2739  // Allow registers or broadcast with the x, y, z suffix we use to disambiguate
2740  // the memory form.
2741  def : InstAlias<OpcodeStr#_.Suffix#mem#
2742                  "\t{$src2, $src1, $dst|$dst, $src1, $src2}",
2743                  (!cast<Instruction>(NAME#"rr")
2744                   _.KRC:$dst, _.RC:$src1, i32u8imm:$src2), 0, "att">;
2745  def : InstAlias<OpcodeStr#_.Suffix#mem#
2746                  "\t{$src2, $src1, $dst {${mask}}|$dst {${mask}}, $src1, $src2}",
2747                  (!cast<Instruction>(NAME#"rrk")
2748                   _.KRC:$dst, _.KRCWM:$mask, _.RC:$src1, i32u8imm:$src2), 0, "att">;
2749  def : InstAlias<OpcodeStr#_.Suffix#mem#
2750                  "\t{$src2, ${src1}"#_.BroadcastStr#", $dst|$dst, ${src1}"#
2751                  _.BroadcastStr#", $src2}",
2752                  (!cast<Instruction>(NAME#"rmb")
2753                   _.KRC:$dst, _.ScalarMemOp:$src1, i32u8imm:$src2), 0, "att">;
2754  def : InstAlias<OpcodeStr#_.Suffix#mem#
2755                  "\t{$src2, ${src1}"#_.BroadcastStr#", $dst {${mask}}|"
2756                  "$dst {${mask}}, ${src1}"#_.BroadcastStr#", $src2}",
2757                  (!cast<Instruction>(NAME#"rmbk")
2758                   _.KRC:$dst, _.KRCWM:$mask, _.ScalarMemOp:$src1, i32u8imm:$src2), 0, "att">;
2759}
2760
2761multiclass avx512_vector_fpclass_all<string OpcodeStr, AVX512VLVectorVTInfo _,
2762                                     bits<8> opc, X86SchedWriteWidths sched,
2763                                     Predicate prd>{
2764  let Predicates = [prd] in {
2765    defm Z    : avx512_vector_fpclass<opc, OpcodeStr, sched.ZMM,
2766                                      _.info512, "z">, EVEX_V512;
2767  }
2768  let Predicates = [prd, HasVLX] in {
2769    defm Z128 : avx512_vector_fpclass<opc, OpcodeStr, sched.XMM,
2770                                      _.info128, "x">, EVEX_V128;
2771    defm Z256 : avx512_vector_fpclass<opc, OpcodeStr, sched.YMM,
2772                                      _.info256, "y">, EVEX_V256;
2773  }
2774}
2775
2776multiclass avx512_fp_fpclass_all<string OpcodeStr, bits<8> opcVec,
2777                                 bits<8> opcScalar, X86SchedWriteWidths sched,
2778                                 Predicate prd> {
2779  defm PS : avx512_vector_fpclass_all<OpcodeStr,  avx512vl_f32_info, opcVec,
2780                                      sched, prd>,
2781                                      EVEX_CD8<32, CD8VF>;
2782  defm PD : avx512_vector_fpclass_all<OpcodeStr,  avx512vl_f64_info, opcVec,
2783                                      sched, prd>,
2784                                      EVEX_CD8<64, CD8VF> , VEX_W;
2785  defm SSZ : avx512_scalar_fpclass<opcScalar, OpcodeStr,
2786                                   sched.Scl, f32x_info, prd>, VEX_LIG,
2787                                   EVEX_CD8<32, CD8VT1>;
2788  defm SDZ : avx512_scalar_fpclass<opcScalar, OpcodeStr,
2789                                   sched.Scl, f64x_info, prd>, VEX_LIG,
2790                                   EVEX_CD8<64, CD8VT1>, VEX_W;
2791}
2792
2793defm VFPCLASS : avx512_fp_fpclass_all<"vfpclass", 0x66, 0x67, SchedWriteFCmp,
2794                                      HasDQI>, AVX512AIi8Base, EVEX;
2795
2796//-----------------------------------------------------------------
2797// Mask register copy, including
2798// - copy between mask registers
2799// - load/store mask registers
2800// - copy from GPR to mask register and vice versa
2801//
2802multiclass avx512_mask_mov<bits<8> opc_kk, bits<8> opc_km, bits<8> opc_mk,
2803                         string OpcodeStr, RegisterClass KRC,
2804                         ValueType vvt, X86MemOperand x86memop> {
2805  let isMoveReg = 1, hasSideEffects = 0, SchedRW = [WriteMove] in
2806  def kk : I<opc_kk, MRMSrcReg, (outs KRC:$dst), (ins KRC:$src),
2807             !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), []>,
2808             Sched<[WriteMove]>;
2809  def km : I<opc_km, MRMSrcMem, (outs KRC:$dst), (ins x86memop:$src),
2810             !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
2811             [(set KRC:$dst, (vvt (load addr:$src)))]>,
2812             Sched<[WriteLoad]>;
2813  def mk : I<opc_mk, MRMDestMem, (outs), (ins x86memop:$dst, KRC:$src),
2814             !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
2815             [(store KRC:$src, addr:$dst)]>,
2816             Sched<[WriteStore]>;
2817}
2818
2819multiclass avx512_mask_mov_gpr<bits<8> opc_kr, bits<8> opc_rk,
2820                             string OpcodeStr,
2821                             RegisterClass KRC, RegisterClass GRC> {
2822  let hasSideEffects = 0 in {
2823    def kr : I<opc_kr, MRMSrcReg, (outs KRC:$dst), (ins GRC:$src),
2824               !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), []>,
2825               Sched<[WriteMove]>;
2826    def rk : I<opc_rk, MRMSrcReg, (outs GRC:$dst), (ins KRC:$src),
2827               !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), []>,
2828               Sched<[WriteMove]>;
2829  }
2830}
2831
2832let Predicates = [HasDQI] in
2833  defm KMOVB : avx512_mask_mov<0x90, 0x90, 0x91, "kmovb", VK8, v8i1, i8mem>,
2834               avx512_mask_mov_gpr<0x92, 0x93, "kmovb", VK8, GR32>,
2835               VEX, PD;
2836
2837let Predicates = [HasAVX512] in
2838  defm KMOVW : avx512_mask_mov<0x90, 0x90, 0x91, "kmovw", VK16, v16i1, i16mem>,
2839               avx512_mask_mov_gpr<0x92, 0x93, "kmovw", VK16, GR32>,
2840               VEX, PS;
2841
2842let Predicates = [HasBWI] in {
2843  defm KMOVD : avx512_mask_mov<0x90, 0x90, 0x91, "kmovd", VK32, v32i1,i32mem>,
2844               VEX, PD, VEX_W;
2845  defm KMOVD : avx512_mask_mov_gpr<0x92, 0x93, "kmovd", VK32, GR32>,
2846               VEX, XD;
2847  defm KMOVQ : avx512_mask_mov<0x90, 0x90, 0x91, "kmovq", VK64, v64i1, i64mem>,
2848               VEX, PS, VEX_W;
2849  defm KMOVQ : avx512_mask_mov_gpr<0x92, 0x93, "kmovq", VK64, GR64>,
2850               VEX, XD, VEX_W;
2851}
2852
2853// GR from/to mask register
2854def : Pat<(v16i1 (bitconvert (i16 GR16:$src))),
2855          (COPY_TO_REGCLASS (i32 (INSERT_SUBREG (IMPLICIT_DEF), GR16:$src, sub_16bit)), VK16)>;
2856def : Pat<(i16 (bitconvert (v16i1 VK16:$src))),
2857          (EXTRACT_SUBREG (i32 (COPY_TO_REGCLASS VK16:$src, GR32)), sub_16bit)>;
2858def : Pat<(i8 (trunc (i16 (bitconvert (v16i1 VK16:$src))))),
2859          (EXTRACT_SUBREG (i32 (COPY_TO_REGCLASS VK16:$src, GR32)), sub_8bit)>;
2860
2861def : Pat<(v8i1 (bitconvert (i8 GR8:$src))),
2862          (COPY_TO_REGCLASS (i32 (INSERT_SUBREG (IMPLICIT_DEF), GR8:$src, sub_8bit)), VK8)>;
2863def : Pat<(i8 (bitconvert (v8i1 VK8:$src))),
2864          (EXTRACT_SUBREG (i32 (COPY_TO_REGCLASS VK8:$src, GR32)), sub_8bit)>;
2865
2866def : Pat<(i32 (zext (i16 (bitconvert (v16i1 VK16:$src))))),
2867          (KMOVWrk VK16:$src)>;
2868def : Pat<(i64 (zext (i16 (bitconvert (v16i1 VK16:$src))))),
2869          (SUBREG_TO_REG (i64 0), (KMOVWrk VK16:$src), sub_32bit)>;
2870def : Pat<(i32 (anyext (i16 (bitconvert (v16i1 VK16:$src))))),
2871          (COPY_TO_REGCLASS VK16:$src, GR32)>;
2872def : Pat<(i64 (anyext (i16 (bitconvert (v16i1 VK16:$src))))),
2873          (INSERT_SUBREG (IMPLICIT_DEF), (COPY_TO_REGCLASS VK16:$src, GR32), sub_32bit)>;
2874
2875def : Pat<(i32 (zext (i8 (bitconvert (v8i1 VK8:$src))))),
2876          (KMOVBrk VK8:$src)>, Requires<[HasDQI]>;
2877def : Pat<(i64 (zext (i8 (bitconvert (v8i1 VK8:$src))))),
2878          (SUBREG_TO_REG (i64 0), (KMOVBrk VK8:$src), sub_32bit)>, Requires<[HasDQI]>;
2879def : Pat<(i32 (anyext (i8 (bitconvert (v8i1 VK8:$src))))),
2880          (COPY_TO_REGCLASS VK8:$src, GR32)>;
2881def : Pat<(i64 (anyext (i8 (bitconvert (v8i1 VK8:$src))))),
2882          (INSERT_SUBREG (IMPLICIT_DEF), (COPY_TO_REGCLASS VK8:$src, GR32), sub_32bit)>;
2883
2884def : Pat<(v32i1 (bitconvert (i32 GR32:$src))),
2885          (COPY_TO_REGCLASS GR32:$src, VK32)>;
2886def : Pat<(i32 (bitconvert (v32i1 VK32:$src))),
2887          (COPY_TO_REGCLASS VK32:$src, GR32)>;
2888def : Pat<(v64i1 (bitconvert (i64 GR64:$src))),
2889          (COPY_TO_REGCLASS GR64:$src, VK64)>;
2890def : Pat<(i64 (bitconvert (v64i1 VK64:$src))),
2891          (COPY_TO_REGCLASS VK64:$src, GR64)>;
2892
2893// Load/store kreg
2894let Predicates = [HasDQI] in {
2895  def : Pat<(v1i1 (load addr:$src)),
2896            (COPY_TO_REGCLASS (KMOVBkm addr:$src), VK1)>;
2897  def : Pat<(v2i1 (load addr:$src)),
2898            (COPY_TO_REGCLASS (KMOVBkm addr:$src), VK2)>;
2899  def : Pat<(v4i1 (load addr:$src)),
2900            (COPY_TO_REGCLASS (KMOVBkm addr:$src), VK4)>;
2901}
2902
2903let Predicates = [HasAVX512] in {
2904  def : Pat<(v8i1 (bitconvert (i8 (load addr:$src)))),
2905            (COPY_TO_REGCLASS (MOVZX32rm8 addr:$src), VK8)>;
2906  def : Pat<(v16i1 (bitconvert (loadi16 addr:$src))),
2907            (KMOVWkm addr:$src)>;
2908}
2909
2910def X86kextract : SDNode<"ISD::EXTRACT_VECTOR_ELT",
2911                         SDTypeProfile<1, 2, [SDTCisVT<0, i8>,
2912                                              SDTCVecEltisVT<1, i1>,
2913                                              SDTCisPtrTy<2>]>>;
2914
2915let Predicates = [HasAVX512] in {
2916  multiclass operation_gpr_mask_copy_lowering<RegisterClass maskRC, ValueType maskVT> {
2917    def : Pat<(maskVT (scalar_to_vector GR32:$src)),
2918              (COPY_TO_REGCLASS GR32:$src, maskRC)>;
2919
2920    def : Pat<(maskVT (scalar_to_vector GR8:$src)),
2921              (COPY_TO_REGCLASS (INSERT_SUBREG (i32 (IMPLICIT_DEF)), GR8:$src, sub_8bit), maskRC)>;
2922
2923    def : Pat<(i8 (X86kextract maskRC:$src, (iPTR 0))),
2924              (EXTRACT_SUBREG (i32 (COPY_TO_REGCLASS maskRC:$src, GR32)), sub_8bit)>;
2925
2926    def : Pat<(i32 (anyext (i8 (X86kextract maskRC:$src, (iPTR 0))))),
2927              (i32 (COPY_TO_REGCLASS maskRC:$src, GR32))>;
2928  }
2929
2930  defm : operation_gpr_mask_copy_lowering<VK1,  v1i1>;
2931  defm : operation_gpr_mask_copy_lowering<VK2,  v2i1>;
2932  defm : operation_gpr_mask_copy_lowering<VK4,  v4i1>;
2933  defm : operation_gpr_mask_copy_lowering<VK8,  v8i1>;
2934  defm : operation_gpr_mask_copy_lowering<VK16,  v16i1>;
2935  defm : operation_gpr_mask_copy_lowering<VK32,  v32i1>;
2936  defm : operation_gpr_mask_copy_lowering<VK64,  v64i1>;
2937
2938  def : Pat<(insert_subvector (v16i1 immAllZerosV),
2939                              (v1i1 (scalar_to_vector GR8:$src)), (iPTR 0)),
2940            (KMOVWkr (AND32ri8
2941                      (INSERT_SUBREG (i32 (IMPLICIT_DEF)), GR8:$src, sub_8bit),
2942                      (i32 1)))>;
2943}
2944
2945// Mask unary operation
2946// - KNOT
2947multiclass avx512_mask_unop<bits<8> opc, string OpcodeStr,
2948                            RegisterClass KRC, SDPatternOperator OpNode,
2949                            X86FoldableSchedWrite sched, Predicate prd> {
2950  let Predicates = [prd] in
2951    def rr : I<opc, MRMSrcReg, (outs KRC:$dst), (ins KRC:$src),
2952               !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
2953               [(set KRC:$dst, (OpNode KRC:$src))]>,
2954               Sched<[sched]>;
2955}
2956
2957multiclass avx512_mask_unop_all<bits<8> opc, string OpcodeStr,
2958                                SDPatternOperator OpNode,
2959                                X86FoldableSchedWrite sched> {
2960  defm B : avx512_mask_unop<opc, !strconcat(OpcodeStr, "b"), VK8, OpNode,
2961                            sched, HasDQI>, VEX, PD;
2962  defm W : avx512_mask_unop<opc, !strconcat(OpcodeStr, "w"), VK16, OpNode,
2963                            sched, HasAVX512>, VEX, PS;
2964  defm D : avx512_mask_unop<opc, !strconcat(OpcodeStr, "d"), VK32, OpNode,
2965                            sched, HasBWI>, VEX, PD, VEX_W;
2966  defm Q : avx512_mask_unop<opc, !strconcat(OpcodeStr, "q"), VK64, OpNode,
2967                            sched, HasBWI>, VEX, PS, VEX_W;
2968}
2969
2970// TODO - do we need a X86SchedWriteWidths::KMASK type?
2971defm KNOT : avx512_mask_unop_all<0x44, "knot", vnot, SchedWriteVecLogic.XMM>;
2972
2973// KNL does not support KMOVB, 8-bit mask is promoted to 16-bit
2974let Predicates = [HasAVX512, NoDQI] in
2975def : Pat<(vnot VK8:$src),
2976          (COPY_TO_REGCLASS (KNOTWrr (COPY_TO_REGCLASS VK8:$src, VK16)), VK8)>;
2977
2978def : Pat<(vnot VK4:$src),
2979          (COPY_TO_REGCLASS (KNOTWrr (COPY_TO_REGCLASS VK4:$src, VK16)), VK4)>;
2980def : Pat<(vnot VK2:$src),
2981          (COPY_TO_REGCLASS (KNOTWrr (COPY_TO_REGCLASS VK2:$src, VK16)), VK2)>;
2982def : Pat<(vnot VK1:$src),
2983          (COPY_TO_REGCLASS (KNOTWrr (COPY_TO_REGCLASS VK1:$src, VK16)), VK2)>;
2984
2985// Mask binary operation
2986// - KAND, KANDN, KOR, KXNOR, KXOR
2987multiclass avx512_mask_binop<bits<8> opc, string OpcodeStr,
2988                           RegisterClass KRC, SDPatternOperator OpNode,
2989                           X86FoldableSchedWrite sched, Predicate prd,
2990                           bit IsCommutable> {
2991  let Predicates = [prd], isCommutable = IsCommutable in
2992    def rr : I<opc, MRMSrcReg, (outs KRC:$dst), (ins KRC:$src1, KRC:$src2),
2993               !strconcat(OpcodeStr,
2994                          "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
2995               [(set KRC:$dst, (OpNode KRC:$src1, KRC:$src2))]>,
2996               Sched<[sched]>;
2997}
2998
2999multiclass avx512_mask_binop_all<bits<8> opc, string OpcodeStr,
3000                                 SDPatternOperator OpNode,
3001                                 X86FoldableSchedWrite sched, bit IsCommutable,
3002                                 Predicate prdW = HasAVX512> {
3003  defm B : avx512_mask_binop<opc, !strconcat(OpcodeStr, "b"), VK8, OpNode,
3004                             sched, HasDQI, IsCommutable>, VEX_4V, VEX_L, PD;
3005  defm W : avx512_mask_binop<opc, !strconcat(OpcodeStr, "w"), VK16, OpNode,
3006                             sched, prdW, IsCommutable>, VEX_4V, VEX_L, PS;
3007  defm D : avx512_mask_binop<opc, !strconcat(OpcodeStr, "d"), VK32, OpNode,
3008                             sched, HasBWI, IsCommutable>, VEX_4V, VEX_L, VEX_W, PD;
3009  defm Q : avx512_mask_binop<opc, !strconcat(OpcodeStr, "q"), VK64, OpNode,
3010                             sched, HasBWI, IsCommutable>, VEX_4V, VEX_L, VEX_W, PS;
3011}
3012
3013// These nodes use 'vnot' instead of 'not' to support vectors.
3014def vandn : PatFrag<(ops node:$i0, node:$i1), (and (vnot node:$i0), node:$i1)>;
3015def vxnor : PatFrag<(ops node:$i0, node:$i1), (vnot (xor node:$i0, node:$i1))>;
3016
3017// TODO - do we need a X86SchedWriteWidths::KMASK type?
3018defm KAND  : avx512_mask_binop_all<0x41, "kand",  and,     SchedWriteVecLogic.XMM, 1>;
3019defm KOR   : avx512_mask_binop_all<0x45, "kor",   or,      SchedWriteVecLogic.XMM, 1>;
3020defm KXNOR : avx512_mask_binop_all<0x46, "kxnor", vxnor,   SchedWriteVecLogic.XMM, 1>;
3021defm KXOR  : avx512_mask_binop_all<0x47, "kxor",  xor,     SchedWriteVecLogic.XMM, 1>;
3022defm KANDN : avx512_mask_binop_all<0x42, "kandn", vandn,   SchedWriteVecLogic.XMM, 0>;
3023defm KADD  : avx512_mask_binop_all<0x4A, "kadd",  X86kadd, SchedWriteVecLogic.XMM, 1, HasDQI>;
3024
3025multiclass avx512_binop_pat<SDPatternOperator VOpNode,
3026                            Instruction Inst> {
3027  // With AVX512F, 8-bit mask is promoted to 16-bit mask,
3028  // for the DQI set, this type is legal and KxxxB instruction is used
3029  let Predicates = [NoDQI] in
3030  def : Pat<(VOpNode VK8:$src1, VK8:$src2),
3031            (COPY_TO_REGCLASS
3032              (Inst (COPY_TO_REGCLASS VK8:$src1, VK16),
3033                    (COPY_TO_REGCLASS VK8:$src2, VK16)), VK8)>;
3034
3035  // All types smaller than 8 bits require conversion anyway
3036  def : Pat<(VOpNode VK1:$src1, VK1:$src2),
3037        (COPY_TO_REGCLASS (Inst
3038                           (COPY_TO_REGCLASS VK1:$src1, VK16),
3039                           (COPY_TO_REGCLASS VK1:$src2, VK16)), VK1)>;
3040  def : Pat<(VOpNode VK2:$src1, VK2:$src2),
3041        (COPY_TO_REGCLASS (Inst
3042                           (COPY_TO_REGCLASS VK2:$src1, VK16),
3043                           (COPY_TO_REGCLASS VK2:$src2, VK16)), VK2)>;
3044  def : Pat<(VOpNode VK4:$src1, VK4:$src2),
3045        (COPY_TO_REGCLASS (Inst
3046                           (COPY_TO_REGCLASS VK4:$src1, VK16),
3047                           (COPY_TO_REGCLASS VK4:$src2, VK16)), VK4)>;
3048}
3049
3050defm : avx512_binop_pat<and,   KANDWrr>;
3051defm : avx512_binop_pat<vandn, KANDNWrr>;
3052defm : avx512_binop_pat<or,    KORWrr>;
3053defm : avx512_binop_pat<vxnor, KXNORWrr>;
3054defm : avx512_binop_pat<xor,   KXORWrr>;
3055
3056// Mask unpacking
3057multiclass avx512_mask_unpck<string Suffix, X86KVectorVTInfo Dst,
3058                             X86KVectorVTInfo Src, X86FoldableSchedWrite sched,
3059                             Predicate prd> {
3060  let Predicates = [prd] in {
3061    let hasSideEffects = 0 in
3062    def rr : I<0x4b, MRMSrcReg, (outs Dst.KRC:$dst),
3063               (ins Src.KRC:$src1, Src.KRC:$src2),
3064               "kunpck"#Suffix#"\t{$src2, $src1, $dst|$dst, $src1, $src2}", []>,
3065               VEX_4V, VEX_L, Sched<[sched]>;
3066
3067    def : Pat<(Dst.KVT (concat_vectors Src.KRC:$src1, Src.KRC:$src2)),
3068              (!cast<Instruction>(NAME#rr) Src.KRC:$src2, Src.KRC:$src1)>;
3069  }
3070}
3071
3072defm KUNPCKBW : avx512_mask_unpck<"bw", v16i1_info, v8i1_info,  WriteShuffle, HasAVX512>, PD;
3073defm KUNPCKWD : avx512_mask_unpck<"wd", v32i1_info, v16i1_info, WriteShuffle, HasBWI>, PS;
3074defm KUNPCKDQ : avx512_mask_unpck<"dq", v64i1_info, v32i1_info, WriteShuffle, HasBWI>, PS, VEX_W;
3075
3076// Mask bit testing
3077multiclass avx512_mask_testop<bits<8> opc, string OpcodeStr, RegisterClass KRC,
3078                              SDNode OpNode, X86FoldableSchedWrite sched,
3079                              Predicate prd> {
3080  let Predicates = [prd], Defs = [EFLAGS] in
3081    def rr : I<opc, MRMSrcReg, (outs), (ins KRC:$src1, KRC:$src2),
3082               !strconcat(OpcodeStr, "\t{$src2, $src1|$src1, $src2}"),
3083               [(set EFLAGS, (OpNode KRC:$src1, KRC:$src2))]>,
3084               Sched<[sched]>;
3085}
3086
3087multiclass avx512_mask_testop_w<bits<8> opc, string OpcodeStr, SDNode OpNode,
3088                                X86FoldableSchedWrite sched,
3089                                Predicate prdW = HasAVX512> {
3090  defm B : avx512_mask_testop<opc, OpcodeStr#"b", VK8, OpNode, sched, HasDQI>,
3091                                                                VEX, PD;
3092  defm W : avx512_mask_testop<opc, OpcodeStr#"w", VK16, OpNode, sched, prdW>,
3093                                                                VEX, PS;
3094  defm Q : avx512_mask_testop<opc, OpcodeStr#"q", VK64, OpNode, sched, HasBWI>,
3095                                                                VEX, PS, VEX_W;
3096  defm D : avx512_mask_testop<opc, OpcodeStr#"d", VK32, OpNode, sched, HasBWI>,
3097                                                                VEX, PD, VEX_W;
3098}
3099
3100// TODO - do we need a X86SchedWriteWidths::KMASK type?
3101defm KORTEST : avx512_mask_testop_w<0x98, "kortest", X86kortest, SchedWriteVecLogic.XMM>;
3102defm KTEST   : avx512_mask_testop_w<0x99, "ktest", X86ktest, SchedWriteVecLogic.XMM, HasDQI>;
3103
3104// Mask shift
3105multiclass avx512_mask_shiftop<bits<8> opc, string OpcodeStr, RegisterClass KRC,
3106                               SDNode OpNode, X86FoldableSchedWrite sched> {
3107  let Predicates = [HasAVX512] in
3108    def ri : Ii8<opc, MRMSrcReg, (outs KRC:$dst), (ins KRC:$src, u8imm:$imm),
3109                 !strconcat(OpcodeStr,
3110                            "\t{$imm, $src, $dst|$dst, $src, $imm}"),
3111                            [(set KRC:$dst, (OpNode KRC:$src, (i8 timm:$imm)))]>,
3112                 Sched<[sched]>;
3113}
3114
3115multiclass avx512_mask_shiftop_w<bits<8> opc1, bits<8> opc2, string OpcodeStr,
3116                                 SDNode OpNode, X86FoldableSchedWrite sched> {
3117  defm W : avx512_mask_shiftop<opc1, !strconcat(OpcodeStr, "w"), VK16, OpNode,
3118                               sched>, VEX, TAPD, VEX_W;
3119  let Predicates = [HasDQI] in
3120  defm B : avx512_mask_shiftop<opc1, !strconcat(OpcodeStr, "b"), VK8, OpNode,
3121                               sched>, VEX, TAPD;
3122  let Predicates = [HasBWI] in {
3123  defm Q : avx512_mask_shiftop<opc2, !strconcat(OpcodeStr, "q"), VK64, OpNode,
3124                               sched>, VEX, TAPD, VEX_W;
3125  defm D : avx512_mask_shiftop<opc2, !strconcat(OpcodeStr, "d"), VK32, OpNode,
3126                               sched>, VEX, TAPD;
3127  }
3128}
3129
3130defm KSHIFTL : avx512_mask_shiftop_w<0x32, 0x33, "kshiftl", X86kshiftl, WriteShuffle>;
3131defm KSHIFTR : avx512_mask_shiftop_w<0x30, 0x31, "kshiftr", X86kshiftr, WriteShuffle>;
3132
3133// Patterns for comparing 128/256-bit integer vectors using 512-bit instruction.
3134multiclass axv512_icmp_packed_cc_no_vlx_lowering<PatFrag Frag, PatFrag Frag_su,
3135                                                 string InstStr,
3136                                                 X86VectorVTInfo Narrow,
3137                                                 X86VectorVTInfo Wide> {
3138def : Pat<(Narrow.KVT (Frag:$cc (Narrow.VT Narrow.RC:$src1),
3139                                (Narrow.VT Narrow.RC:$src2), cond)),
3140          (COPY_TO_REGCLASS
3141           (!cast<Instruction>(InstStr#"Zrri")
3142            (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src1, Narrow.SubRegIdx)),
3143            (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src2, Narrow.SubRegIdx)),
3144            (Frag.OperandTransform $cc)), Narrow.KRC)>;
3145
3146def : Pat<(Narrow.KVT (and Narrow.KRC:$mask,
3147                           (Narrow.KVT (Frag_su:$cc (Narrow.VT Narrow.RC:$src1),
3148                                                    (Narrow.VT Narrow.RC:$src2),
3149                                                    cond)))),
3150          (COPY_TO_REGCLASS (!cast<Instruction>(InstStr#"Zrrik")
3151           (COPY_TO_REGCLASS Narrow.KRC:$mask, Wide.KRC),
3152           (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src1, Narrow.SubRegIdx)),
3153           (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src2, Narrow.SubRegIdx)),
3154           (Frag_su.OperandTransform $cc)), Narrow.KRC)>;
3155}
3156
3157multiclass axv512_icmp_packed_cc_rmb_no_vlx_lowering<PatFrag Frag, PatFrag Frag_su,
3158                                                     PatFrag CommFrag, PatFrag CommFrag_su,
3159                                                     string InstStr,
3160                                                     X86VectorVTInfo Narrow,
3161                                                     X86VectorVTInfo Wide> {
3162// Broadcast load.
3163def : Pat<(Narrow.KVT (Frag:$cc (Narrow.VT Narrow.RC:$src1),
3164                                (Narrow.BroadcastLdFrag addr:$src2), cond)),
3165          (COPY_TO_REGCLASS
3166           (!cast<Instruction>(InstStr#"Zrmib")
3167            (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src1, Narrow.SubRegIdx)),
3168            addr:$src2, (Frag.OperandTransform $cc)), Narrow.KRC)>;
3169
3170def : Pat<(Narrow.KVT (and Narrow.KRC:$mask,
3171                           (Narrow.KVT
3172                            (Frag_su:$cc (Narrow.VT Narrow.RC:$src1),
3173                                         (Narrow.BroadcastLdFrag addr:$src2),
3174                                         cond)))),
3175          (COPY_TO_REGCLASS (!cast<Instruction>(InstStr#"Zrmibk")
3176           (COPY_TO_REGCLASS Narrow.KRC:$mask, Wide.KRC),
3177           (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src1, Narrow.SubRegIdx)),
3178           addr:$src2, (Frag_su.OperandTransform $cc)), Narrow.KRC)>;
3179
3180// Commuted with broadcast load.
3181def : Pat<(Narrow.KVT (CommFrag:$cc (Narrow.BroadcastLdFrag addr:$src2),
3182                                    (Narrow.VT Narrow.RC:$src1),
3183                                    cond)),
3184          (COPY_TO_REGCLASS
3185           (!cast<Instruction>(InstStr#"Zrmib")
3186            (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src1, Narrow.SubRegIdx)),
3187            addr:$src2, (CommFrag.OperandTransform $cc)), Narrow.KRC)>;
3188
3189def : Pat<(Narrow.KVT (and Narrow.KRC:$mask,
3190                           (Narrow.KVT
3191                            (CommFrag_su:$cc (Narrow.BroadcastLdFrag addr:$src2),
3192                                             (Narrow.VT Narrow.RC:$src1),
3193                                             cond)))),
3194          (COPY_TO_REGCLASS (!cast<Instruction>(InstStr#"Zrmibk")
3195           (COPY_TO_REGCLASS Narrow.KRC:$mask, Wide.KRC),
3196           (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src1, Narrow.SubRegIdx)),
3197           addr:$src2, (CommFrag_su.OperandTransform $cc)), Narrow.KRC)>;
3198}
3199
3200// Same as above, but for fp types which don't use PatFrags.
3201multiclass axv512_cmp_packed_cc_no_vlx_lowering<string InstStr,
3202                                                X86VectorVTInfo Narrow,
3203                                                X86VectorVTInfo Wide> {
3204def : Pat<(Narrow.KVT (X86cmpm (Narrow.VT Narrow.RC:$src1),
3205                               (Narrow.VT Narrow.RC:$src2), timm:$cc)),
3206          (COPY_TO_REGCLASS
3207           (!cast<Instruction>(InstStr#"Zrri")
3208            (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src1, Narrow.SubRegIdx)),
3209            (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src2, Narrow.SubRegIdx)),
3210            timm:$cc), Narrow.KRC)>;
3211
3212def : Pat<(Narrow.KVT (and Narrow.KRC:$mask,
3213                           (X86cmpm_su (Narrow.VT Narrow.RC:$src1),
3214                                       (Narrow.VT Narrow.RC:$src2), timm:$cc))),
3215          (COPY_TO_REGCLASS (!cast<Instruction>(InstStr#"Zrrik")
3216           (COPY_TO_REGCLASS Narrow.KRC:$mask, Wide.KRC),
3217           (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src1, Narrow.SubRegIdx)),
3218           (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src2, Narrow.SubRegIdx)),
3219           timm:$cc), Narrow.KRC)>;
3220
3221// Broadcast load.
3222def : Pat<(Narrow.KVT (X86cmpm (Narrow.VT Narrow.RC:$src1),
3223                               (Narrow.VT (Narrow.BroadcastLdFrag addr:$src2)), timm:$cc)),
3224          (COPY_TO_REGCLASS
3225           (!cast<Instruction>(InstStr#"Zrmbi")
3226            (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src1, Narrow.SubRegIdx)),
3227            addr:$src2, timm:$cc), Narrow.KRC)>;
3228
3229def : Pat<(Narrow.KVT (and Narrow.KRC:$mask,
3230                           (X86cmpm_su (Narrow.VT Narrow.RC:$src1),
3231                                       (Narrow.VT (Narrow.BroadcastLdFrag addr:$src2)), timm:$cc))),
3232          (COPY_TO_REGCLASS (!cast<Instruction>(InstStr#"Zrmbik")
3233           (COPY_TO_REGCLASS Narrow.KRC:$mask, Wide.KRC),
3234           (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src1, Narrow.SubRegIdx)),
3235           addr:$src2, timm:$cc), Narrow.KRC)>;
3236
3237// Commuted with broadcast load.
3238def : Pat<(Narrow.KVT (X86cmpm (Narrow.VT (Narrow.BroadcastLdFrag addr:$src2)),
3239                               (Narrow.VT Narrow.RC:$src1), timm:$cc)),
3240          (COPY_TO_REGCLASS
3241           (!cast<Instruction>(InstStr#"Zrmbi")
3242            (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src1, Narrow.SubRegIdx)),
3243            addr:$src2, (X86cmpm_imm_commute timm:$cc)), Narrow.KRC)>;
3244
3245def : Pat<(Narrow.KVT (and Narrow.KRC:$mask,
3246                           (X86cmpm_su (Narrow.VT (Narrow.BroadcastLdFrag addr:$src2)),
3247                                       (Narrow.VT Narrow.RC:$src1), timm:$cc))),
3248          (COPY_TO_REGCLASS (!cast<Instruction>(InstStr#"Zrmbik")
3249           (COPY_TO_REGCLASS Narrow.KRC:$mask, Wide.KRC),
3250           (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src1, Narrow.SubRegIdx)),
3251           addr:$src2, (X86cmpm_imm_commute timm:$cc)), Narrow.KRC)>;
3252}
3253
3254let Predicates = [HasAVX512, NoVLX] in {
3255  defm : axv512_icmp_packed_cc_no_vlx_lowering<X86pcmpm, X86pcmpm_su, "VPCMPD", v8i32x_info, v16i32_info>;
3256  defm : axv512_icmp_packed_cc_no_vlx_lowering<X86pcmpum, X86pcmpum_su, "VPCMPUD", v8i32x_info, v16i32_info>;
3257
3258  defm : axv512_icmp_packed_cc_no_vlx_lowering<X86pcmpm, X86pcmpm_su, "VPCMPD", v4i32x_info, v16i32_info>;
3259  defm : axv512_icmp_packed_cc_no_vlx_lowering<X86pcmpum, X86pcmpum_su, "VPCMPUD", v4i32x_info, v16i32_info>;
3260
3261  defm : axv512_icmp_packed_cc_no_vlx_lowering<X86pcmpm, X86pcmpm_su, "VPCMPQ", v4i64x_info, v8i64_info>;
3262  defm : axv512_icmp_packed_cc_no_vlx_lowering<X86pcmpum, X86pcmpum_su, "VPCMPUQ", v4i64x_info, v8i64_info>;
3263
3264  defm : axv512_icmp_packed_cc_no_vlx_lowering<X86pcmpm, X86pcmpm_su, "VPCMPQ", v2i64x_info, v8i64_info>;
3265  defm : axv512_icmp_packed_cc_no_vlx_lowering<X86pcmpum, X86pcmpum_su, "VPCMPUQ", v2i64x_info, v8i64_info>;
3266
3267  defm : axv512_icmp_packed_cc_rmb_no_vlx_lowering<X86pcmpm, X86pcmpm_su, X86pcmpm_commute, X86pcmpm_commute_su, "VPCMPD", v8i32x_info, v16i32_info>;
3268  defm : axv512_icmp_packed_cc_rmb_no_vlx_lowering<X86pcmpum, X86pcmpum_su, X86pcmpum_commute, X86pcmpum_commute_su, "VPCMPUD", v8i32x_info, v16i32_info>;
3269
3270  defm : axv512_icmp_packed_cc_rmb_no_vlx_lowering<X86pcmpm, X86pcmpm_su, X86pcmpm_commute, X86pcmpm_commute_su, "VPCMPD", v4i32x_info, v16i32_info>;
3271  defm : axv512_icmp_packed_cc_rmb_no_vlx_lowering<X86pcmpum, X86pcmpum_su, X86pcmpum_commute, X86pcmpum_commute_su, "VPCMPUD", v4i32x_info, v16i32_info>;
3272
3273  defm : axv512_icmp_packed_cc_rmb_no_vlx_lowering<X86pcmpm, X86pcmpm_su, X86pcmpm_commute, X86pcmpm_commute_su, "VPCMPQ", v4i64x_info, v8i64_info>;
3274  defm : axv512_icmp_packed_cc_rmb_no_vlx_lowering<X86pcmpum, X86pcmpum_su, X86pcmpum_commute, X86pcmpum_commute_su, "VPCMPUQ", v4i64x_info, v8i64_info>;
3275
3276  defm : axv512_icmp_packed_cc_rmb_no_vlx_lowering<X86pcmpm, X86pcmpm_su, X86pcmpm_commute, X86pcmpm_commute_su, "VPCMPQ", v2i64x_info, v8i64_info>;
3277  defm : axv512_icmp_packed_cc_rmb_no_vlx_lowering<X86pcmpum, X86pcmpum_su, X86pcmpum_commute, X86pcmpum_commute_su, "VPCMPUQ", v2i64x_info, v8i64_info>;
3278
3279  defm : axv512_cmp_packed_cc_no_vlx_lowering<"VCMPPS", v8f32x_info, v16f32_info>;
3280  defm : axv512_cmp_packed_cc_no_vlx_lowering<"VCMPPS", v4f32x_info, v16f32_info>;
3281  defm : axv512_cmp_packed_cc_no_vlx_lowering<"VCMPPD", v4f64x_info, v8f64_info>;
3282  defm : axv512_cmp_packed_cc_no_vlx_lowering<"VCMPPD", v2f64x_info, v8f64_info>;
3283}
3284
3285let Predicates = [HasBWI, NoVLX] in {
3286  defm : axv512_icmp_packed_cc_no_vlx_lowering<X86pcmpm, X86pcmpm_su, "VPCMPB", v32i8x_info, v64i8_info>;
3287  defm : axv512_icmp_packed_cc_no_vlx_lowering<X86pcmpum, X86pcmpum_su, "VPCMPUB", v32i8x_info, v64i8_info>;
3288
3289  defm : axv512_icmp_packed_cc_no_vlx_lowering<X86pcmpm, X86pcmpm_su, "VPCMPB", v16i8x_info, v64i8_info>;
3290  defm : axv512_icmp_packed_cc_no_vlx_lowering<X86pcmpum, X86pcmpum_su, "VPCMPUB", v16i8x_info, v64i8_info>;
3291
3292  defm : axv512_icmp_packed_cc_no_vlx_lowering<X86pcmpm, X86pcmpm_su, "VPCMPW", v16i16x_info, v32i16_info>;
3293  defm : axv512_icmp_packed_cc_no_vlx_lowering<X86pcmpum, X86pcmpum_su, "VPCMPUW", v16i16x_info, v32i16_info>;
3294
3295  defm : axv512_icmp_packed_cc_no_vlx_lowering<X86pcmpm, X86pcmpm_su, "VPCMPW", v8i16x_info, v32i16_info>;
3296  defm : axv512_icmp_packed_cc_no_vlx_lowering<X86pcmpum, X86pcmpum_su, "VPCMPUW", v8i16x_info, v32i16_info>;
3297}
3298
3299// Mask setting all 0s or 1s
3300multiclass avx512_mask_setop<RegisterClass KRC, ValueType VT, PatFrag Val> {
3301  let Predicates = [HasAVX512] in
3302    let isReMaterializable = 1, isAsCheapAsAMove = 1, isPseudo = 1,
3303        SchedRW = [WriteZero] in
3304      def NAME# : I<0, Pseudo, (outs KRC:$dst), (ins), "",
3305                     [(set KRC:$dst, (VT Val))]>;
3306}
3307
3308multiclass avx512_mask_setop_w<PatFrag Val> {
3309  defm W : avx512_mask_setop<VK16, v16i1, Val>;
3310  defm D : avx512_mask_setop<VK32,  v32i1, Val>;
3311  defm Q : avx512_mask_setop<VK64, v64i1, Val>;
3312}
3313
3314defm KSET0 : avx512_mask_setop_w<immAllZerosV>;
3315defm KSET1 : avx512_mask_setop_w<immAllOnesV>;
3316
3317// With AVX-512 only, 8-bit mask is promoted to 16-bit mask.
3318let Predicates = [HasAVX512] in {
3319  def : Pat<(v8i1 immAllZerosV), (COPY_TO_REGCLASS (KSET0W), VK8)>;
3320  def : Pat<(v4i1 immAllZerosV), (COPY_TO_REGCLASS (KSET0W), VK4)>;
3321  def : Pat<(v2i1 immAllZerosV), (COPY_TO_REGCLASS (KSET0W), VK2)>;
3322  def : Pat<(v1i1 immAllZerosV), (COPY_TO_REGCLASS (KSET0W), VK1)>;
3323  def : Pat<(v8i1 immAllOnesV),  (COPY_TO_REGCLASS (KSET1W), VK8)>;
3324  def : Pat<(v4i1 immAllOnesV),  (COPY_TO_REGCLASS (KSET1W), VK4)>;
3325  def : Pat<(v2i1 immAllOnesV),  (COPY_TO_REGCLASS (KSET1W), VK2)>;
3326  def : Pat<(v1i1 immAllOnesV),  (COPY_TO_REGCLASS (KSET1W), VK1)>;
3327}
3328
3329// Patterns for kmask insert_subvector/extract_subvector to/from index=0
3330multiclass operation_subvector_mask_lowering<RegisterClass subRC, ValueType subVT,
3331                                             RegisterClass RC, ValueType VT> {
3332  def : Pat<(subVT (extract_subvector (VT RC:$src), (iPTR 0))),
3333            (subVT (COPY_TO_REGCLASS RC:$src, subRC))>;
3334
3335  def : Pat<(VT (insert_subvector undef, subRC:$src, (iPTR 0))),
3336            (VT (COPY_TO_REGCLASS subRC:$src, RC))>;
3337}
3338defm : operation_subvector_mask_lowering<VK1,  v1i1,  VK2,  v2i1>;
3339defm : operation_subvector_mask_lowering<VK1,  v1i1,  VK4,  v4i1>;
3340defm : operation_subvector_mask_lowering<VK1,  v1i1,  VK8,  v8i1>;
3341defm : operation_subvector_mask_lowering<VK1,  v1i1,  VK16, v16i1>;
3342defm : operation_subvector_mask_lowering<VK1,  v1i1,  VK32, v32i1>;
3343defm : operation_subvector_mask_lowering<VK1,  v1i1,  VK64, v64i1>;
3344
3345defm : operation_subvector_mask_lowering<VK2,  v2i1,  VK4,  v4i1>;
3346defm : operation_subvector_mask_lowering<VK2,  v2i1,  VK8,  v8i1>;
3347defm : operation_subvector_mask_lowering<VK2,  v2i1,  VK16, v16i1>;
3348defm : operation_subvector_mask_lowering<VK2,  v2i1,  VK32, v32i1>;
3349defm : operation_subvector_mask_lowering<VK2,  v2i1,  VK64, v64i1>;
3350
3351defm : operation_subvector_mask_lowering<VK4,  v4i1,  VK8,  v8i1>;
3352defm : operation_subvector_mask_lowering<VK4,  v4i1,  VK16, v16i1>;
3353defm : operation_subvector_mask_lowering<VK4,  v4i1,  VK32, v32i1>;
3354defm : operation_subvector_mask_lowering<VK4,  v4i1,  VK64, v64i1>;
3355
3356defm : operation_subvector_mask_lowering<VK8,  v8i1,  VK16, v16i1>;
3357defm : operation_subvector_mask_lowering<VK8,  v8i1,  VK32, v32i1>;
3358defm : operation_subvector_mask_lowering<VK8,  v8i1,  VK64, v64i1>;
3359
3360defm : operation_subvector_mask_lowering<VK16, v16i1, VK32, v32i1>;
3361defm : operation_subvector_mask_lowering<VK16, v16i1, VK64, v64i1>;
3362
3363defm : operation_subvector_mask_lowering<VK32, v32i1, VK64, v64i1>;
3364
3365//===----------------------------------------------------------------------===//
3366// AVX-512 - Aligned and unaligned load and store
3367//
3368
3369multiclass avx512_load<bits<8> opc, string OpcodeStr, string Name,
3370                       X86VectorVTInfo _, PatFrag ld_frag, PatFrag mload,
3371                       X86SchedWriteMoveLS Sched, string EVEX2VEXOvrd,
3372                       bit NoRMPattern = 0,
3373                       SDPatternOperator SelectOprr = vselect> {
3374  let hasSideEffects = 0 in {
3375  let isMoveReg = 1 in
3376  def rr : AVX512PI<opc, MRMSrcReg, (outs _.RC:$dst), (ins _.RC:$src),
3377                    !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), [],
3378                    _.ExeDomain>, EVEX, Sched<[Sched.RR]>,
3379                    EVEX2VEXOverride<EVEX2VEXOvrd#"rr">;
3380  def rrkz : AVX512PI<opc, MRMSrcReg, (outs _.RC:$dst),
3381                      (ins _.KRCWM:$mask,  _.RC:$src),
3382                      !strconcat(OpcodeStr, "\t{$src, ${dst} {${mask}} {z}|",
3383                       "${dst} {${mask}} {z}, $src}"),
3384                       [(set _.RC:$dst, (_.VT (SelectOprr _.KRCWM:$mask,
3385                                           (_.VT _.RC:$src),
3386                                           _.ImmAllZerosV)))], _.ExeDomain>,
3387                       EVEX, EVEX_KZ, Sched<[Sched.RR]>;
3388
3389  let mayLoad = 1, canFoldAsLoad = 1, isReMaterializable = 1 in
3390  def rm : AVX512PI<opc, MRMSrcMem, (outs _.RC:$dst), (ins _.MemOp:$src),
3391                    !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
3392                    !if(NoRMPattern, [],
3393                        [(set _.RC:$dst,
3394                          (_.VT (ld_frag addr:$src)))]),
3395                    _.ExeDomain>, EVEX, Sched<[Sched.RM]>,
3396                    EVEX2VEXOverride<EVEX2VEXOvrd#"rm">;
3397
3398  let Constraints = "$src0 = $dst", isConvertibleToThreeAddress = 1 in {
3399    def rrk : AVX512PI<opc, MRMSrcReg, (outs _.RC:$dst),
3400                      (ins _.RC:$src0, _.KRCWM:$mask, _.RC:$src1),
3401                      !strconcat(OpcodeStr, "\t{$src1, ${dst} {${mask}}|",
3402                      "${dst} {${mask}}, $src1}"),
3403                      [(set _.RC:$dst, (_.VT (SelectOprr _.KRCWM:$mask,
3404                                          (_.VT _.RC:$src1),
3405                                          (_.VT _.RC:$src0))))], _.ExeDomain>,
3406                       EVEX, EVEX_K, Sched<[Sched.RR]>;
3407    def rmk : AVX512PI<opc, MRMSrcMem, (outs _.RC:$dst),
3408                     (ins _.RC:$src0, _.KRCWM:$mask, _.MemOp:$src1),
3409                     !strconcat(OpcodeStr, "\t{$src1, ${dst} {${mask}}|",
3410                      "${dst} {${mask}}, $src1}"),
3411                     [(set _.RC:$dst, (_.VT
3412                         (vselect_mask _.KRCWM:$mask,
3413                          (_.VT (ld_frag addr:$src1)),
3414                           (_.VT _.RC:$src0))))], _.ExeDomain>,
3415                     EVEX, EVEX_K, Sched<[Sched.RM]>;
3416  }
3417  def rmkz : AVX512PI<opc, MRMSrcMem, (outs _.RC:$dst),
3418                  (ins _.KRCWM:$mask, _.MemOp:$src),
3419                  OpcodeStr #"\t{$src, ${dst} {${mask}} {z}|"#
3420                                "${dst} {${mask}} {z}, $src}",
3421                  [(set _.RC:$dst, (_.VT (vselect_mask _.KRCWM:$mask,
3422                    (_.VT (ld_frag addr:$src)), _.ImmAllZerosV)))],
3423                  _.ExeDomain>, EVEX, EVEX_KZ, Sched<[Sched.RM]>;
3424  }
3425  def : Pat<(_.VT (mload addr:$ptr, _.KRCWM:$mask, undef)),
3426            (!cast<Instruction>(Name#_.ZSuffix#rmkz) _.KRCWM:$mask, addr:$ptr)>;
3427
3428  def : Pat<(_.VT (mload addr:$ptr, _.KRCWM:$mask, _.ImmAllZerosV)),
3429            (!cast<Instruction>(Name#_.ZSuffix#rmkz) _.KRCWM:$mask, addr:$ptr)>;
3430
3431  def : Pat<(_.VT (mload addr:$ptr, _.KRCWM:$mask, (_.VT _.RC:$src0))),
3432            (!cast<Instruction>(Name#_.ZSuffix#rmk) _.RC:$src0,
3433             _.KRCWM:$mask, addr:$ptr)>;
3434}
3435
3436multiclass avx512_alignedload_vl<bits<8> opc, string OpcodeStr,
3437                                 AVX512VLVectorVTInfo _, Predicate prd,
3438                                 X86SchedWriteMoveLSWidths Sched,
3439                                 string EVEX2VEXOvrd, bit NoRMPattern = 0> {
3440  let Predicates = [prd] in
3441  defm Z : avx512_load<opc, OpcodeStr, NAME, _.info512,
3442                       _.info512.AlignedLdFrag, masked_load_aligned,
3443                       Sched.ZMM, "", NoRMPattern>, EVEX_V512;
3444
3445  let Predicates = [prd, HasVLX] in {
3446  defm Z256 : avx512_load<opc, OpcodeStr, NAME, _.info256,
3447                          _.info256.AlignedLdFrag, masked_load_aligned,
3448                          Sched.YMM, EVEX2VEXOvrd#"Y", NoRMPattern>, EVEX_V256;
3449  defm Z128 : avx512_load<opc, OpcodeStr, NAME, _.info128,
3450                          _.info128.AlignedLdFrag, masked_load_aligned,
3451                          Sched.XMM, EVEX2VEXOvrd, NoRMPattern>, EVEX_V128;
3452  }
3453}
3454
3455multiclass avx512_load_vl<bits<8> opc, string OpcodeStr,
3456                          AVX512VLVectorVTInfo _, Predicate prd,
3457                          X86SchedWriteMoveLSWidths Sched,
3458                          string EVEX2VEXOvrd, bit NoRMPattern = 0,
3459                          SDPatternOperator SelectOprr = vselect> {
3460  let Predicates = [prd] in
3461  defm Z : avx512_load<opc, OpcodeStr, NAME, _.info512, _.info512.LdFrag,
3462                       masked_load, Sched.ZMM, "",
3463                       NoRMPattern, SelectOprr>, EVEX_V512;
3464
3465  let Predicates = [prd, HasVLX] in {
3466  defm Z256 : avx512_load<opc, OpcodeStr, NAME, _.info256, _.info256.LdFrag,
3467                         masked_load, Sched.YMM, EVEX2VEXOvrd#"Y",
3468                         NoRMPattern, SelectOprr>, EVEX_V256;
3469  defm Z128 : avx512_load<opc, OpcodeStr, NAME, _.info128, _.info128.LdFrag,
3470                         masked_load, Sched.XMM, EVEX2VEXOvrd,
3471                         NoRMPattern, SelectOprr>, EVEX_V128;
3472  }
3473}
3474
3475multiclass avx512_store<bits<8> opc, string OpcodeStr, string BaseName,
3476                        X86VectorVTInfo _, PatFrag st_frag, PatFrag mstore,
3477                        X86SchedWriteMoveLS Sched, string EVEX2VEXOvrd,
3478                        bit NoMRPattern = 0> {
3479  let hasSideEffects = 0, isCodeGenOnly = 1, ForceDisassemble = 1 in {
3480  let isMoveReg = 1 in
3481  def rr_REV  : AVX512PI<opc, MRMDestReg, (outs _.RC:$dst), (ins _.RC:$src),
3482                         OpcodeStr # "\t{$src, $dst|$dst, $src}",
3483                         [], _.ExeDomain>, EVEX,
3484                         FoldGenData<BaseName#_.ZSuffix#rr>, Sched<[Sched.RR]>,
3485                         EVEX2VEXOverride<EVEX2VEXOvrd#"rr_REV">;
3486  def rrk_REV : AVX512PI<opc, MRMDestReg, (outs  _.RC:$dst),
3487                         (ins _.KRCWM:$mask, _.RC:$src),
3488                         OpcodeStr # "\t{$src, ${dst} {${mask}}|"#
3489                         "${dst} {${mask}}, $src}",
3490                         [], _.ExeDomain>,  EVEX, EVEX_K,
3491                         FoldGenData<BaseName#_.ZSuffix#rrk>,
3492                         Sched<[Sched.RR]>;
3493  def rrkz_REV : AVX512PI<opc, MRMDestReg, (outs  _.RC:$dst),
3494                          (ins _.KRCWM:$mask, _.RC:$src),
3495                          OpcodeStr # "\t{$src, ${dst} {${mask}} {z}|" #
3496                          "${dst} {${mask}} {z}, $src}",
3497                          [], _.ExeDomain>, EVEX, EVEX_KZ,
3498                          FoldGenData<BaseName#_.ZSuffix#rrkz>,
3499                          Sched<[Sched.RR]>;
3500  }
3501
3502  let hasSideEffects = 0, mayStore = 1 in
3503  def mr : AVX512PI<opc, MRMDestMem, (outs), (ins _.MemOp:$dst, _.RC:$src),
3504                    !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
3505                    !if(NoMRPattern, [],
3506                        [(st_frag (_.VT _.RC:$src), addr:$dst)]),
3507                    _.ExeDomain>, EVEX, Sched<[Sched.MR]>,
3508                    EVEX2VEXOverride<EVEX2VEXOvrd#"mr">;
3509  def mrk : AVX512PI<opc, MRMDestMem, (outs),
3510                     (ins _.MemOp:$dst, _.KRCWM:$mask, _.RC:$src),
3511              OpcodeStr # "\t{$src, ${dst} {${mask}}|${dst} {${mask}}, $src}",
3512               [], _.ExeDomain>, EVEX, EVEX_K, Sched<[Sched.MR]>,
3513               NotMemoryFoldable;
3514
3515  def: Pat<(mstore (_.VT _.RC:$src), addr:$ptr, _.KRCWM:$mask),
3516           (!cast<Instruction>(BaseName#_.ZSuffix#mrk) addr:$ptr,
3517                                                        _.KRCWM:$mask, _.RC:$src)>;
3518
3519  def : InstAlias<OpcodeStr#".s\t{$src, $dst|$dst, $src}",
3520                  (!cast<Instruction>(BaseName#_.ZSuffix#"rr_REV")
3521                   _.RC:$dst, _.RC:$src), 0>;
3522  def : InstAlias<OpcodeStr#".s\t{$src, ${dst} {${mask}}|${dst} {${mask}}, $src}",
3523                  (!cast<Instruction>(BaseName#_.ZSuffix#"rrk_REV")
3524                   _.RC:$dst, _.KRCWM:$mask, _.RC:$src), 0>;
3525  def : InstAlias<OpcodeStr#".s\t{$src, ${dst} {${mask}} {z}|${dst} {${mask}} {z}, $src}",
3526                  (!cast<Instruction>(BaseName#_.ZSuffix#"rrkz_REV")
3527                   _.RC:$dst, _.KRCWM:$mask, _.RC:$src), 0>;
3528}
3529
3530multiclass avx512_store_vl< bits<8> opc, string OpcodeStr,
3531                            AVX512VLVectorVTInfo _, Predicate prd,
3532                            X86SchedWriteMoveLSWidths Sched,
3533                            string EVEX2VEXOvrd, bit NoMRPattern = 0> {
3534  let Predicates = [prd] in
3535  defm Z : avx512_store<opc, OpcodeStr, NAME, _.info512, store,
3536                        masked_store, Sched.ZMM, "",
3537                        NoMRPattern>, EVEX_V512;
3538  let Predicates = [prd, HasVLX] in {
3539    defm Z256 : avx512_store<opc, OpcodeStr, NAME, _.info256, store,
3540                             masked_store, Sched.YMM,
3541                             EVEX2VEXOvrd#"Y", NoMRPattern>, EVEX_V256;
3542    defm Z128 : avx512_store<opc, OpcodeStr, NAME, _.info128, store,
3543                             masked_store, Sched.XMM, EVEX2VEXOvrd,
3544                             NoMRPattern>, EVEX_V128;
3545  }
3546}
3547
3548multiclass avx512_alignedstore_vl<bits<8> opc, string OpcodeStr,
3549                                  AVX512VLVectorVTInfo _, Predicate prd,
3550                                  X86SchedWriteMoveLSWidths Sched,
3551                                  string EVEX2VEXOvrd, bit NoMRPattern = 0> {
3552  let Predicates = [prd] in
3553  defm Z : avx512_store<opc, OpcodeStr, NAME, _.info512, alignedstore,
3554                        masked_store_aligned, Sched.ZMM, "",
3555                        NoMRPattern>, EVEX_V512;
3556
3557  let Predicates = [prd, HasVLX] in {
3558    defm Z256 : avx512_store<opc, OpcodeStr, NAME, _.info256, alignedstore,
3559                             masked_store_aligned, Sched.YMM,
3560                             EVEX2VEXOvrd#"Y", NoMRPattern>, EVEX_V256;
3561    defm Z128 : avx512_store<opc, OpcodeStr, NAME, _.info128, alignedstore,
3562                             masked_store_aligned, Sched.XMM, EVEX2VEXOvrd,
3563                             NoMRPattern>, EVEX_V128;
3564  }
3565}
3566
3567defm VMOVAPS : avx512_alignedload_vl<0x28, "vmovaps", avx512vl_f32_info,
3568                                     HasAVX512, SchedWriteFMoveLS, "VMOVAPS">,
3569               avx512_alignedstore_vl<0x29, "vmovaps", avx512vl_f32_info,
3570                                      HasAVX512, SchedWriteFMoveLS, "VMOVAPS">,
3571               PS, EVEX_CD8<32, CD8VF>;
3572
3573defm VMOVAPD : avx512_alignedload_vl<0x28, "vmovapd", avx512vl_f64_info,
3574                                     HasAVX512, SchedWriteFMoveLS, "VMOVAPD">,
3575               avx512_alignedstore_vl<0x29, "vmovapd", avx512vl_f64_info,
3576                                      HasAVX512, SchedWriteFMoveLS, "VMOVAPD">,
3577               PD, VEX_W, EVEX_CD8<64, CD8VF>;
3578
3579defm VMOVUPS : avx512_load_vl<0x10, "vmovups", avx512vl_f32_info, HasAVX512,
3580                              SchedWriteFMoveLS, "VMOVUPS", 0, null_frag>,
3581               avx512_store_vl<0x11, "vmovups", avx512vl_f32_info, HasAVX512,
3582                               SchedWriteFMoveLS, "VMOVUPS">,
3583                               PS, EVEX_CD8<32, CD8VF>;
3584
3585defm VMOVUPD : avx512_load_vl<0x10, "vmovupd", avx512vl_f64_info, HasAVX512,
3586                              SchedWriteFMoveLS, "VMOVUPD", 0, null_frag>,
3587               avx512_store_vl<0x11, "vmovupd", avx512vl_f64_info, HasAVX512,
3588                               SchedWriteFMoveLS, "VMOVUPD">,
3589               PD, VEX_W, EVEX_CD8<64, CD8VF>;
3590
3591defm VMOVDQA32 : avx512_alignedload_vl<0x6F, "vmovdqa32", avx512vl_i32_info,
3592                                       HasAVX512, SchedWriteVecMoveLS,
3593                                       "VMOVDQA", 1>,
3594                 avx512_alignedstore_vl<0x7F, "vmovdqa32", avx512vl_i32_info,
3595                                        HasAVX512, SchedWriteVecMoveLS,
3596                                        "VMOVDQA", 1>,
3597                 PD, EVEX_CD8<32, CD8VF>;
3598
3599defm VMOVDQA64 : avx512_alignedload_vl<0x6F, "vmovdqa64", avx512vl_i64_info,
3600                                       HasAVX512, SchedWriteVecMoveLS,
3601                                       "VMOVDQA">,
3602                 avx512_alignedstore_vl<0x7F, "vmovdqa64", avx512vl_i64_info,
3603                                        HasAVX512, SchedWriteVecMoveLS,
3604                                        "VMOVDQA">,
3605                 PD, VEX_W, EVEX_CD8<64, CD8VF>;
3606
3607defm VMOVDQU8 : avx512_load_vl<0x6F, "vmovdqu8", avx512vl_i8_info, HasBWI,
3608                               SchedWriteVecMoveLS, "VMOVDQU", 1>,
3609                avx512_store_vl<0x7F, "vmovdqu8", avx512vl_i8_info, HasBWI,
3610                                SchedWriteVecMoveLS, "VMOVDQU", 1>,
3611                XD, EVEX_CD8<8, CD8VF>;
3612
3613defm VMOVDQU16 : avx512_load_vl<0x6F, "vmovdqu16", avx512vl_i16_info, HasBWI,
3614                                SchedWriteVecMoveLS, "VMOVDQU", 1>,
3615                 avx512_store_vl<0x7F, "vmovdqu16", avx512vl_i16_info, HasBWI,
3616                                 SchedWriteVecMoveLS, "VMOVDQU", 1>,
3617                 XD, VEX_W, EVEX_CD8<16, CD8VF>;
3618
3619defm VMOVDQU32 : avx512_load_vl<0x6F, "vmovdqu32", avx512vl_i32_info, HasAVX512,
3620                                SchedWriteVecMoveLS, "VMOVDQU", 1, null_frag>,
3621                 avx512_store_vl<0x7F, "vmovdqu32", avx512vl_i32_info, HasAVX512,
3622                                 SchedWriteVecMoveLS, "VMOVDQU", 1>,
3623                 XS, EVEX_CD8<32, CD8VF>;
3624
3625defm VMOVDQU64 : avx512_load_vl<0x6F, "vmovdqu64", avx512vl_i64_info, HasAVX512,
3626                                SchedWriteVecMoveLS, "VMOVDQU", 0, null_frag>,
3627                 avx512_store_vl<0x7F, "vmovdqu64", avx512vl_i64_info, HasAVX512,
3628                                 SchedWriteVecMoveLS, "VMOVDQU">,
3629                 XS, VEX_W, EVEX_CD8<64, CD8VF>;
3630
3631// Special instructions to help with spilling when we don't have VLX. We need
3632// to load or store from a ZMM register instead. These are converted in
3633// expandPostRAPseudos.
3634let isReMaterializable = 1, canFoldAsLoad = 1,
3635    isPseudo = 1, mayLoad = 1, hasSideEffects = 0 in {
3636def VMOVAPSZ128rm_NOVLX : I<0, Pseudo, (outs VR128X:$dst), (ins f128mem:$src),
3637                            "", []>, Sched<[WriteFLoadX]>;
3638def VMOVAPSZ256rm_NOVLX : I<0, Pseudo, (outs VR256X:$dst), (ins f256mem:$src),
3639                            "", []>, Sched<[WriteFLoadY]>;
3640def VMOVUPSZ128rm_NOVLX : I<0, Pseudo, (outs VR128X:$dst), (ins f128mem:$src),
3641                            "", []>, Sched<[WriteFLoadX]>;
3642def VMOVUPSZ256rm_NOVLX : I<0, Pseudo, (outs VR256X:$dst), (ins f256mem:$src),
3643                            "", []>, Sched<[WriteFLoadY]>;
3644}
3645
3646let isPseudo = 1, mayStore = 1, hasSideEffects = 0 in {
3647def VMOVAPSZ128mr_NOVLX : I<0, Pseudo, (outs), (ins f128mem:$dst, VR128X:$src),
3648                            "", []>, Sched<[WriteFStoreX]>;
3649def VMOVAPSZ256mr_NOVLX : I<0, Pseudo, (outs), (ins f256mem:$dst, VR256X:$src),
3650                            "", []>, Sched<[WriteFStoreY]>;
3651def VMOVUPSZ128mr_NOVLX : I<0, Pseudo, (outs), (ins f128mem:$dst, VR128X:$src),
3652                            "", []>, Sched<[WriteFStoreX]>;
3653def VMOVUPSZ256mr_NOVLX : I<0, Pseudo, (outs), (ins f256mem:$dst, VR256X:$src),
3654                            "", []>, Sched<[WriteFStoreY]>;
3655}
3656
3657def : Pat<(v8i64 (vselect VK8WM:$mask, (v8i64 immAllZerosV),
3658                          (v8i64 VR512:$src))),
3659   (VMOVDQA64Zrrkz (COPY_TO_REGCLASS (KNOTWrr (COPY_TO_REGCLASS VK8:$mask, VK16)),
3660                                              VK8), VR512:$src)>;
3661
3662def : Pat<(v16i32 (vselect VK16WM:$mask, (v16i32 immAllZerosV),
3663                           (v16i32 VR512:$src))),
3664                  (VMOVDQA32Zrrkz (KNOTWrr VK16WM:$mask), VR512:$src)>;
3665
3666// These patterns exist to prevent the above patterns from introducing a second
3667// mask inversion when one already exists.
3668def : Pat<(v8i64 (vselect (xor VK8:$mask, (v8i1 immAllOnesV)),
3669                          (v8i64 immAllZerosV),
3670                          (v8i64 VR512:$src))),
3671                 (VMOVDQA64Zrrkz VK8:$mask, VR512:$src)>;
3672def : Pat<(v16i32 (vselect (xor VK16:$mask, (v16i1 immAllOnesV)),
3673                           (v16i32 immAllZerosV),
3674                           (v16i32 VR512:$src))),
3675                  (VMOVDQA32Zrrkz VK16WM:$mask, VR512:$src)>;
3676
3677multiclass mask_move_lowering<string InstrStr, X86VectorVTInfo Narrow,
3678                              X86VectorVTInfo Wide> {
3679 def : Pat<(Narrow.VT (vselect (Narrow.KVT Narrow.KRCWM:$mask),
3680                               Narrow.RC:$src1, Narrow.RC:$src0)),
3681           (EXTRACT_SUBREG
3682            (Wide.VT
3683             (!cast<Instruction>(InstrStr#"rrk")
3684              (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src0, Narrow.SubRegIdx)),
3685              (COPY_TO_REGCLASS Narrow.KRCWM:$mask, Wide.KRCWM),
3686              (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src1, Narrow.SubRegIdx)))),
3687            Narrow.SubRegIdx)>;
3688
3689 def : Pat<(Narrow.VT (vselect (Narrow.KVT Narrow.KRCWM:$mask),
3690                               Narrow.RC:$src1, Narrow.ImmAllZerosV)),
3691           (EXTRACT_SUBREG
3692            (Wide.VT
3693             (!cast<Instruction>(InstrStr#"rrkz")
3694              (COPY_TO_REGCLASS Narrow.KRCWM:$mask, Wide.KRCWM),
3695              (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src1, Narrow.SubRegIdx)))),
3696            Narrow.SubRegIdx)>;
3697}
3698
3699// Patterns for handling v8i1 selects of 256-bit vectors when VLX isn't
3700// available. Use a 512-bit operation and extract.
3701let Predicates = [HasAVX512, NoVLX] in {
3702  defm : mask_move_lowering<"VMOVAPSZ", v4f32x_info, v16f32_info>;
3703  defm : mask_move_lowering<"VMOVDQA32Z", v4i32x_info, v16i32_info>;
3704  defm : mask_move_lowering<"VMOVAPSZ", v8f32x_info, v16f32_info>;
3705  defm : mask_move_lowering<"VMOVDQA32Z", v8i32x_info, v16i32_info>;
3706
3707  defm : mask_move_lowering<"VMOVAPDZ", v2f64x_info, v8f64_info>;
3708  defm : mask_move_lowering<"VMOVDQA64Z", v2i64x_info, v8i64_info>;
3709  defm : mask_move_lowering<"VMOVAPDZ", v4f64x_info, v8f64_info>;
3710  defm : mask_move_lowering<"VMOVDQA64Z", v4i64x_info, v8i64_info>;
3711}
3712
3713let Predicates = [HasBWI, NoVLX] in {
3714  defm : mask_move_lowering<"VMOVDQU8Z", v16i8x_info, v64i8_info>;
3715  defm : mask_move_lowering<"VMOVDQU8Z", v32i8x_info, v64i8_info>;
3716
3717  defm : mask_move_lowering<"VMOVDQU16Z", v8i16x_info, v32i16_info>;
3718  defm : mask_move_lowering<"VMOVDQU16Z", v16i16x_info, v32i16_info>;
3719}
3720
3721let Predicates = [HasAVX512] in {
3722  // 512-bit load.
3723  def : Pat<(alignedloadv16i32 addr:$src),
3724            (VMOVDQA64Zrm addr:$src)>;
3725  def : Pat<(alignedloadv32i16 addr:$src),
3726            (VMOVDQA64Zrm addr:$src)>;
3727  def : Pat<(alignedloadv64i8 addr:$src),
3728            (VMOVDQA64Zrm addr:$src)>;
3729  def : Pat<(loadv16i32 addr:$src),
3730            (VMOVDQU64Zrm addr:$src)>;
3731  def : Pat<(loadv32i16 addr:$src),
3732            (VMOVDQU64Zrm addr:$src)>;
3733  def : Pat<(loadv64i8 addr:$src),
3734            (VMOVDQU64Zrm addr:$src)>;
3735
3736  // 512-bit store.
3737  def : Pat<(alignedstore (v16i32 VR512:$src), addr:$dst),
3738            (VMOVDQA64Zmr addr:$dst, VR512:$src)>;
3739  def : Pat<(alignedstore (v32i16 VR512:$src), addr:$dst),
3740            (VMOVDQA64Zmr addr:$dst, VR512:$src)>;
3741  def : Pat<(alignedstore (v64i8 VR512:$src), addr:$dst),
3742            (VMOVDQA64Zmr addr:$dst, VR512:$src)>;
3743  def : Pat<(store (v16i32 VR512:$src), addr:$dst),
3744            (VMOVDQU64Zmr addr:$dst, VR512:$src)>;
3745  def : Pat<(store (v32i16 VR512:$src), addr:$dst),
3746            (VMOVDQU64Zmr addr:$dst, VR512:$src)>;
3747  def : Pat<(store (v64i8 VR512:$src), addr:$dst),
3748            (VMOVDQU64Zmr addr:$dst, VR512:$src)>;
3749}
3750
3751let Predicates = [HasVLX] in {
3752  // 128-bit load.
3753  def : Pat<(alignedloadv4i32 addr:$src),
3754            (VMOVDQA64Z128rm addr:$src)>;
3755  def : Pat<(alignedloadv8i16 addr:$src),
3756            (VMOVDQA64Z128rm addr:$src)>;
3757  def : Pat<(alignedloadv16i8 addr:$src),
3758            (VMOVDQA64Z128rm addr:$src)>;
3759  def : Pat<(loadv4i32 addr:$src),
3760            (VMOVDQU64Z128rm addr:$src)>;
3761  def : Pat<(loadv8i16 addr:$src),
3762            (VMOVDQU64Z128rm addr:$src)>;
3763  def : Pat<(loadv16i8 addr:$src),
3764            (VMOVDQU64Z128rm addr:$src)>;
3765
3766  // 128-bit store.
3767  def : Pat<(alignedstore (v4i32 VR128X:$src), addr:$dst),
3768            (VMOVDQA64Z128mr addr:$dst, VR128X:$src)>;
3769  def : Pat<(alignedstore (v8i16 VR128X:$src), addr:$dst),
3770            (VMOVDQA64Z128mr addr:$dst, VR128X:$src)>;
3771  def : Pat<(alignedstore (v16i8 VR128X:$src), addr:$dst),
3772            (VMOVDQA64Z128mr addr:$dst, VR128X:$src)>;
3773  def : Pat<(store (v4i32 VR128X:$src), addr:$dst),
3774            (VMOVDQU64Z128mr addr:$dst, VR128X:$src)>;
3775  def : Pat<(store (v8i16 VR128X:$src), addr:$dst),
3776            (VMOVDQU64Z128mr addr:$dst, VR128X:$src)>;
3777  def : Pat<(store (v16i8 VR128X:$src), addr:$dst),
3778            (VMOVDQU64Z128mr addr:$dst, VR128X:$src)>;
3779
3780  // 256-bit load.
3781  def : Pat<(alignedloadv8i32 addr:$src),
3782            (VMOVDQA64Z256rm addr:$src)>;
3783  def : Pat<(alignedloadv16i16 addr:$src),
3784            (VMOVDQA64Z256rm addr:$src)>;
3785  def : Pat<(alignedloadv32i8 addr:$src),
3786            (VMOVDQA64Z256rm addr:$src)>;
3787  def : Pat<(loadv8i32 addr:$src),
3788            (VMOVDQU64Z256rm addr:$src)>;
3789  def : Pat<(loadv16i16 addr:$src),
3790            (VMOVDQU64Z256rm addr:$src)>;
3791  def : Pat<(loadv32i8 addr:$src),
3792            (VMOVDQU64Z256rm addr:$src)>;
3793
3794  // 256-bit store.
3795  def : Pat<(alignedstore (v8i32 VR256X:$src), addr:$dst),
3796            (VMOVDQA64Z256mr addr:$dst, VR256X:$src)>;
3797  def : Pat<(alignedstore (v16i16 VR256X:$src), addr:$dst),
3798            (VMOVDQA64Z256mr addr:$dst, VR256X:$src)>;
3799  def : Pat<(alignedstore (v32i8 VR256X:$src), addr:$dst),
3800            (VMOVDQA64Z256mr addr:$dst, VR256X:$src)>;
3801  def : Pat<(store (v8i32 VR256X:$src), addr:$dst),
3802            (VMOVDQU64Z256mr addr:$dst, VR256X:$src)>;
3803  def : Pat<(store (v16i16 VR256X:$src), addr:$dst),
3804            (VMOVDQU64Z256mr addr:$dst, VR256X:$src)>;
3805  def : Pat<(store (v32i8 VR256X:$src), addr:$dst),
3806            (VMOVDQU64Z256mr addr:$dst, VR256X:$src)>;
3807}
3808
3809// Move Int Doubleword to Packed Double Int
3810//
3811let ExeDomain = SSEPackedInt in {
3812def VMOVDI2PDIZrr : AVX512BI<0x6E, MRMSrcReg, (outs VR128X:$dst), (ins GR32:$src),
3813                      "vmovd\t{$src, $dst|$dst, $src}",
3814                      [(set VR128X:$dst,
3815                        (v4i32 (scalar_to_vector GR32:$src)))]>,
3816                        EVEX, Sched<[WriteVecMoveFromGpr]>;
3817def VMOVDI2PDIZrm : AVX512BI<0x6E, MRMSrcMem, (outs VR128X:$dst), (ins i32mem:$src),
3818                      "vmovd\t{$src, $dst|$dst, $src}",
3819                      [(set VR128X:$dst,
3820                        (v4i32 (scalar_to_vector (loadi32 addr:$src))))]>,
3821                      EVEX, EVEX_CD8<32, CD8VT1>, Sched<[WriteVecLoad]>;
3822def VMOV64toPQIZrr : AVX512BI<0x6E, MRMSrcReg, (outs VR128X:$dst), (ins GR64:$src),
3823                      "vmovq\t{$src, $dst|$dst, $src}",
3824                        [(set VR128X:$dst,
3825                          (v2i64 (scalar_to_vector GR64:$src)))]>,
3826                      EVEX, VEX_W, Sched<[WriteVecMoveFromGpr]>;
3827let isCodeGenOnly = 1, ForceDisassemble = 1, hasSideEffects = 0, mayLoad = 1 in
3828def VMOV64toPQIZrm : AVX512BI<0x6E, MRMSrcMem, (outs VR128X:$dst),
3829                      (ins i64mem:$src),
3830                      "vmovq\t{$src, $dst|$dst, $src}", []>,
3831                      EVEX, VEX_W, EVEX_CD8<64, CD8VT1>, Sched<[WriteVecLoad]>;
3832let isCodeGenOnly = 1 in {
3833def VMOV64toSDZrr : AVX512BI<0x6E, MRMSrcReg, (outs FR64X:$dst), (ins GR64:$src),
3834                       "vmovq\t{$src, $dst|$dst, $src}",
3835                       [(set FR64X:$dst, (bitconvert GR64:$src))]>,
3836                       EVEX, VEX_W, Sched<[WriteVecMoveFromGpr]>;
3837def VMOVSDto64Zrr : AVX512BI<0x7E, MRMDestReg, (outs GR64:$dst), (ins FR64X:$src),
3838                         "vmovq\t{$src, $dst|$dst, $src}",
3839                         [(set GR64:$dst, (bitconvert FR64X:$src))]>,
3840                         EVEX, VEX_W, Sched<[WriteVecMoveFromGpr]>;
3841}
3842} // ExeDomain = SSEPackedInt
3843
3844// Move Int Doubleword to Single Scalar
3845//
3846let ExeDomain = SSEPackedInt, isCodeGenOnly = 1 in {
3847def VMOVDI2SSZrr  : AVX512BI<0x6E, MRMSrcReg, (outs FR32X:$dst), (ins GR32:$src),
3848                      "vmovd\t{$src, $dst|$dst, $src}",
3849                      [(set FR32X:$dst, (bitconvert GR32:$src))]>,
3850                      EVEX, Sched<[WriteVecMoveFromGpr]>;
3851} // ExeDomain = SSEPackedInt, isCodeGenOnly = 1
3852
3853// Move doubleword from xmm register to r/m32
3854//
3855let ExeDomain = SSEPackedInt in {
3856def VMOVPDI2DIZrr  : AVX512BI<0x7E, MRMDestReg, (outs GR32:$dst), (ins VR128X:$src),
3857                       "vmovd\t{$src, $dst|$dst, $src}",
3858                       [(set GR32:$dst, (extractelt (v4i32 VR128X:$src),
3859                                        (iPTR 0)))]>,
3860                       EVEX, Sched<[WriteVecMoveToGpr]>;
3861def VMOVPDI2DIZmr  : AVX512BI<0x7E, MRMDestMem, (outs),
3862                       (ins i32mem:$dst, VR128X:$src),
3863                       "vmovd\t{$src, $dst|$dst, $src}",
3864                       [(store (i32 (extractelt (v4i32 VR128X:$src),
3865                                     (iPTR 0))), addr:$dst)]>,
3866                       EVEX, EVEX_CD8<32, CD8VT1>, Sched<[WriteVecStore]>;
3867} // ExeDomain = SSEPackedInt
3868
3869// Move quadword from xmm1 register to r/m64
3870//
3871let ExeDomain = SSEPackedInt in {
3872def VMOVPQIto64Zrr : I<0x7E, MRMDestReg, (outs GR64:$dst), (ins VR128X:$src),
3873                      "vmovq\t{$src, $dst|$dst, $src}",
3874                      [(set GR64:$dst, (extractelt (v2i64 VR128X:$src),
3875                                                   (iPTR 0)))]>,
3876                      PD, EVEX, VEX_W, Sched<[WriteVecMoveToGpr]>,
3877                      Requires<[HasAVX512]>;
3878
3879let isCodeGenOnly = 1, ForceDisassemble = 1, hasSideEffects = 0, mayStore = 1 in
3880def VMOVPQIto64Zmr : I<0x7E, MRMDestMem, (outs), (ins i64mem:$dst, VR128X:$src),
3881                      "vmovq\t{$src, $dst|$dst, $src}", []>, PD,
3882                      EVEX, VEX_W, Sched<[WriteVecStore]>,
3883                      Requires<[HasAVX512, In64BitMode]>;
3884
3885def VMOVPQI2QIZmr : I<0xD6, MRMDestMem, (outs),
3886                      (ins i64mem:$dst, VR128X:$src),
3887                      "vmovq\t{$src, $dst|$dst, $src}",
3888                      [(store (extractelt (v2i64 VR128X:$src), (iPTR 0)),
3889                              addr:$dst)]>,
3890                      EVEX, PD, VEX_W, EVEX_CD8<64, CD8VT1>,
3891                      Sched<[WriteVecStore]>, Requires<[HasAVX512]>;
3892
3893let hasSideEffects = 0, isCodeGenOnly = 1, ForceDisassemble = 1 in
3894def VMOVPQI2QIZrr : AVX512BI<0xD6, MRMDestReg, (outs VR128X:$dst),
3895                             (ins VR128X:$src),
3896                             "vmovq\t{$src, $dst|$dst, $src}", []>,
3897                             EVEX, VEX_W, Sched<[SchedWriteVecLogic.XMM]>;
3898} // ExeDomain = SSEPackedInt
3899
3900def : InstAlias<"vmovq.s\t{$src, $dst|$dst, $src}",
3901                (VMOVPQI2QIZrr VR128X:$dst, VR128X:$src), 0>;
3902
3903let Predicates = [HasAVX512] in {
3904  def : Pat<(X86vextractstore64 (v2i64 VR128X:$src), addr:$dst),
3905            (VMOVPQI2QIZmr addr:$dst, VR128X:$src)>;
3906}
3907
3908// Move Scalar Single to Double Int
3909//
3910let ExeDomain = SSEPackedInt, isCodeGenOnly = 1 in {
3911def VMOVSS2DIZrr  : AVX512BI<0x7E, MRMDestReg, (outs GR32:$dst),
3912                      (ins FR32X:$src),
3913                      "vmovd\t{$src, $dst|$dst, $src}",
3914                      [(set GR32:$dst, (bitconvert FR32X:$src))]>,
3915                      EVEX, Sched<[WriteVecMoveToGpr]>;
3916} // ExeDomain = SSEPackedInt, isCodeGenOnly = 1
3917
3918// Move Quadword Int to Packed Quadword Int
3919//
3920let ExeDomain = SSEPackedInt in {
3921def VMOVQI2PQIZrm : AVX512XSI<0x7E, MRMSrcMem, (outs VR128X:$dst),
3922                      (ins i64mem:$src),
3923                      "vmovq\t{$src, $dst|$dst, $src}",
3924                      [(set VR128X:$dst,
3925                        (v2i64 (scalar_to_vector (loadi64 addr:$src))))]>,
3926                      EVEX, VEX_W, EVEX_CD8<8, CD8VT8>, Sched<[WriteVecLoad]>;
3927} // ExeDomain = SSEPackedInt
3928
3929// Allow "vmovd" but print "vmovq".
3930def : InstAlias<"vmovd\t{$src, $dst|$dst, $src}",
3931                (VMOV64toPQIZrr VR128X:$dst, GR64:$src), 0>;
3932def : InstAlias<"vmovd\t{$src, $dst|$dst, $src}",
3933                (VMOVPQIto64Zrr GR64:$dst, VR128X:$src), 0>;
3934
3935// Conversions between masks and scalar fp.
3936def : Pat<(v32i1 (bitconvert FR32X:$src)),
3937          (KMOVDkr (VMOVSS2DIZrr FR32X:$src))>;
3938def : Pat<(f32 (bitconvert VK32:$src)),
3939          (VMOVDI2SSZrr (KMOVDrk VK32:$src))>;
3940
3941def : Pat<(v64i1 (bitconvert FR64X:$src)),
3942          (KMOVQkr (VMOVSDto64Zrr FR64X:$src))>;
3943def : Pat<(f64 (bitconvert VK64:$src)),
3944          (VMOV64toSDZrr (KMOVQrk VK64:$src))>;
3945
3946//===----------------------------------------------------------------------===//
3947// AVX-512  MOVSS, MOVSD
3948//===----------------------------------------------------------------------===//
3949
3950multiclass avx512_move_scalar<string asm, SDNode OpNode, PatFrag vzload_frag,
3951                              X86VectorVTInfo _> {
3952  let Predicates = [HasAVX512, OptForSize] in
3953  def rr : AVX512PI<0x10, MRMSrcReg, (outs _.RC:$dst),
3954             (ins _.RC:$src1, _.RC:$src2),
3955             !strconcat(asm, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
3956             [(set _.RC:$dst, (_.VT (OpNode _.RC:$src1, _.RC:$src2)))],
3957             _.ExeDomain>, EVEX_4V, Sched<[SchedWriteFShuffle.XMM]>;
3958  def rrkz : AVX512PI<0x10, MRMSrcReg, (outs _.RC:$dst),
3959              (ins _.KRCWM:$mask, _.RC:$src1, _.RC:$src2),
3960              !strconcat(asm, "\t{$src2, $src1, $dst {${mask}} {z}|",
3961              "$dst {${mask}} {z}, $src1, $src2}"),
3962              [(set _.RC:$dst, (_.VT (X86selects _.KRCWM:$mask,
3963                                      (_.VT (OpNode _.RC:$src1, _.RC:$src2)),
3964                                      _.ImmAllZerosV)))],
3965              _.ExeDomain>, EVEX_4V, EVEX_KZ, Sched<[SchedWriteFShuffle.XMM]>;
3966  let Constraints = "$src0 = $dst"  in
3967  def rrk : AVX512PI<0x10, MRMSrcReg, (outs _.RC:$dst),
3968             (ins _.RC:$src0, _.KRCWM:$mask, _.RC:$src1, _.RC:$src2),
3969             !strconcat(asm, "\t{$src2, $src1, $dst {${mask}}|",
3970             "$dst {${mask}}, $src1, $src2}"),
3971             [(set _.RC:$dst, (_.VT (X86selects _.KRCWM:$mask,
3972                                     (_.VT (OpNode _.RC:$src1, _.RC:$src2)),
3973                                     (_.VT _.RC:$src0))))],
3974             _.ExeDomain>, EVEX_4V, EVEX_K, Sched<[SchedWriteFShuffle.XMM]>;
3975  let canFoldAsLoad = 1, isReMaterializable = 1 in {
3976  def rm : AVX512PI<0x10, MRMSrcMem, (outs _.RC:$dst), (ins _.ScalarMemOp:$src),
3977             !strconcat(asm, "\t{$src, $dst|$dst, $src}"),
3978             [(set _.RC:$dst, (_.VT (vzload_frag addr:$src)))],
3979             _.ExeDomain>, EVEX, Sched<[WriteFLoad]>;
3980  // _alt version uses FR32/FR64 register class.
3981  let isCodeGenOnly = 1 in
3982  def rm_alt : AVX512PI<0x10, MRMSrcMem, (outs _.FRC:$dst), (ins _.ScalarMemOp:$src),
3983                 !strconcat(asm, "\t{$src, $dst|$dst, $src}"),
3984                 [(set _.FRC:$dst, (_.ScalarLdFrag addr:$src))],
3985                 _.ExeDomain>, EVEX, Sched<[WriteFLoad]>;
3986  }
3987  let mayLoad = 1, hasSideEffects = 0 in {
3988    let Constraints = "$src0 = $dst" in
3989    def rmk : AVX512PI<0x10, MRMSrcMem, (outs _.RC:$dst),
3990               (ins _.RC:$src0, _.KRCWM:$mask, _.ScalarMemOp:$src),
3991               !strconcat(asm, "\t{$src, $dst {${mask}}|",
3992               "$dst {${mask}}, $src}"),
3993               [], _.ExeDomain>, EVEX, EVEX_K, Sched<[WriteFLoad]>;
3994    def rmkz : AVX512PI<0x10, MRMSrcMem, (outs _.RC:$dst),
3995               (ins _.KRCWM:$mask, _.ScalarMemOp:$src),
3996               !strconcat(asm, "\t{$src, $dst {${mask}} {z}|",
3997               "$dst {${mask}} {z}, $src}"),
3998               [], _.ExeDomain>, EVEX, EVEX_KZ, Sched<[WriteFLoad]>;
3999  }
4000  def mr: AVX512PI<0x11, MRMDestMem, (outs), (ins _.ScalarMemOp:$dst, _.FRC:$src),
4001             !strconcat(asm, "\t{$src, $dst|$dst, $src}"),
4002             [(store _.FRC:$src, addr:$dst)],  _.ExeDomain>,
4003             EVEX, Sched<[WriteFStore]>;
4004  let mayStore = 1, hasSideEffects = 0 in
4005  def mrk: AVX512PI<0x11, MRMDestMem, (outs),
4006              (ins _.ScalarMemOp:$dst, VK1WM:$mask, _.RC:$src),
4007              !strconcat(asm, "\t{$src, $dst {${mask}}|$dst {${mask}}, $src}"),
4008              [], _.ExeDomain>, EVEX, EVEX_K, Sched<[WriteFStore]>,
4009              NotMemoryFoldable;
4010}
4011
4012defm VMOVSSZ : avx512_move_scalar<"vmovss", X86Movss, X86vzload32, f32x_info>,
4013                                  VEX_LIG, XS, EVEX_CD8<32, CD8VT1>;
4014
4015defm VMOVSDZ : avx512_move_scalar<"vmovsd", X86Movsd, X86vzload64, f64x_info>,
4016                                  VEX_LIG, XD, VEX_W, EVEX_CD8<64, CD8VT1>;
4017
4018
4019multiclass avx512_move_scalar_lowering<string InstrStr, SDNode OpNode,
4020                                       PatLeaf ZeroFP, X86VectorVTInfo _> {
4021
4022def : Pat<(_.VT (OpNode _.RC:$src0,
4023                        (_.VT (scalar_to_vector
4024                                  (_.EltVT (X86selects VK1WM:$mask,
4025                                                       (_.EltVT _.FRC:$src1),
4026                                                       (_.EltVT _.FRC:$src2))))))),
4027          (!cast<Instruction>(InstrStr#rrk)
4028                        (_.VT (COPY_TO_REGCLASS _.FRC:$src2, _.RC)),
4029                        VK1WM:$mask,
4030                        (_.VT _.RC:$src0),
4031                        (_.VT (COPY_TO_REGCLASS _.FRC:$src1, _.RC)))>;
4032
4033def : Pat<(_.VT (OpNode _.RC:$src0,
4034                        (_.VT (scalar_to_vector
4035                                  (_.EltVT (X86selects VK1WM:$mask,
4036                                                       (_.EltVT _.FRC:$src1),
4037                                                       (_.EltVT ZeroFP))))))),
4038          (!cast<Instruction>(InstrStr#rrkz)
4039                        VK1WM:$mask,
4040                        (_.VT _.RC:$src0),
4041                        (_.VT (COPY_TO_REGCLASS _.FRC:$src1, _.RC)))>;
4042}
4043
4044multiclass avx512_store_scalar_lowering<string InstrStr, AVX512VLVectorVTInfo _,
4045                                        dag Mask, RegisterClass MaskRC> {
4046
4047def : Pat<(masked_store
4048             (_.info512.VT (insert_subvector undef,
4049                               (_.info128.VT _.info128.RC:$src),
4050                               (iPTR 0))), addr:$dst, Mask),
4051          (!cast<Instruction>(InstrStr#mrk) addr:$dst,
4052                      (COPY_TO_REGCLASS MaskRC:$mask, VK1WM),
4053                      _.info128.RC:$src)>;
4054
4055}
4056
4057multiclass avx512_store_scalar_lowering_subreg<string InstrStr,
4058                                               AVX512VLVectorVTInfo _,
4059                                               dag Mask, RegisterClass MaskRC,
4060                                               SubRegIndex subreg> {
4061
4062def : Pat<(masked_store
4063             (_.info512.VT (insert_subvector undef,
4064                               (_.info128.VT _.info128.RC:$src),
4065                               (iPTR 0))), addr:$dst, Mask),
4066          (!cast<Instruction>(InstrStr#mrk) addr:$dst,
4067                      (COPY_TO_REGCLASS (i32 (INSERT_SUBREG (IMPLICIT_DEF), MaskRC:$mask, subreg)), VK1WM),
4068                      _.info128.RC:$src)>;
4069
4070}
4071
4072// This matches the more recent codegen from clang that avoids emitting a 512
4073// bit masked store directly. Codegen will widen 128-bit masked store to 512
4074// bits on AVX512F only targets.
4075multiclass avx512_store_scalar_lowering_subreg2<string InstrStr,
4076                                               AVX512VLVectorVTInfo _,
4077                                               dag Mask512, dag Mask128,
4078                                               RegisterClass MaskRC,
4079                                               SubRegIndex subreg> {
4080
4081// AVX512F pattern.
4082def : Pat<(masked_store
4083             (_.info512.VT (insert_subvector undef,
4084                               (_.info128.VT _.info128.RC:$src),
4085                               (iPTR 0))), addr:$dst, Mask512),
4086          (!cast<Instruction>(InstrStr#mrk) addr:$dst,
4087                      (COPY_TO_REGCLASS (i32 (INSERT_SUBREG (IMPLICIT_DEF), MaskRC:$mask, subreg)), VK1WM),
4088                      _.info128.RC:$src)>;
4089
4090// AVX512VL pattern.
4091def : Pat<(masked_store (_.info128.VT _.info128.RC:$src), addr:$dst, Mask128),
4092          (!cast<Instruction>(InstrStr#mrk) addr:$dst,
4093                      (COPY_TO_REGCLASS (i32 (INSERT_SUBREG (IMPLICIT_DEF), MaskRC:$mask, subreg)), VK1WM),
4094                      _.info128.RC:$src)>;
4095}
4096
4097multiclass avx512_load_scalar_lowering<string InstrStr, AVX512VLVectorVTInfo _,
4098                                       dag Mask, RegisterClass MaskRC> {
4099
4100def : Pat<(_.info128.VT (extract_subvector
4101                         (_.info512.VT (masked_load addr:$srcAddr, Mask,
4102                                        _.info512.ImmAllZerosV)),
4103                           (iPTR 0))),
4104          (!cast<Instruction>(InstrStr#rmkz)
4105                      (COPY_TO_REGCLASS MaskRC:$mask, VK1WM),
4106                      addr:$srcAddr)>;
4107
4108def : Pat<(_.info128.VT (extract_subvector
4109                (_.info512.VT (masked_load addr:$srcAddr, Mask,
4110                      (_.info512.VT (insert_subvector undef,
4111                            (_.info128.VT (X86vzmovl _.info128.RC:$src)),
4112                            (iPTR 0))))),
4113                (iPTR 0))),
4114          (!cast<Instruction>(InstrStr#rmk) _.info128.RC:$src,
4115                      (COPY_TO_REGCLASS MaskRC:$mask, VK1WM),
4116                      addr:$srcAddr)>;
4117
4118}
4119
4120multiclass avx512_load_scalar_lowering_subreg<string InstrStr,
4121                                              AVX512VLVectorVTInfo _,
4122                                              dag Mask, RegisterClass MaskRC,
4123                                              SubRegIndex subreg> {
4124
4125def : Pat<(_.info128.VT (extract_subvector
4126                         (_.info512.VT (masked_load addr:$srcAddr, Mask,
4127                                        _.info512.ImmAllZerosV)),
4128                           (iPTR 0))),
4129          (!cast<Instruction>(InstrStr#rmkz)
4130                      (COPY_TO_REGCLASS (i32 (INSERT_SUBREG (IMPLICIT_DEF), MaskRC:$mask, subreg)), VK1WM),
4131                      addr:$srcAddr)>;
4132
4133def : Pat<(_.info128.VT (extract_subvector
4134                (_.info512.VT (masked_load addr:$srcAddr, Mask,
4135                      (_.info512.VT (insert_subvector undef,
4136                            (_.info128.VT (X86vzmovl _.info128.RC:$src)),
4137                            (iPTR 0))))),
4138                (iPTR 0))),
4139          (!cast<Instruction>(InstrStr#rmk) _.info128.RC:$src,
4140                      (COPY_TO_REGCLASS (i32 (INSERT_SUBREG (IMPLICIT_DEF), MaskRC:$mask, subreg)), VK1WM),
4141                      addr:$srcAddr)>;
4142
4143}
4144
4145// This matches the more recent codegen from clang that avoids emitting a 512
4146// bit masked load directly. Codegen will widen 128-bit masked load to 512
4147// bits on AVX512F only targets.
4148multiclass avx512_load_scalar_lowering_subreg2<string InstrStr,
4149                                              AVX512VLVectorVTInfo _,
4150                                              dag Mask512, dag Mask128,
4151                                              RegisterClass MaskRC,
4152                                              SubRegIndex subreg> {
4153// AVX512F patterns.
4154def : Pat<(_.info128.VT (extract_subvector
4155                         (_.info512.VT (masked_load addr:$srcAddr, Mask512,
4156                                        _.info512.ImmAllZerosV)),
4157                           (iPTR 0))),
4158          (!cast<Instruction>(InstrStr#rmkz)
4159                      (COPY_TO_REGCLASS (i32 (INSERT_SUBREG (IMPLICIT_DEF), MaskRC:$mask, subreg)), VK1WM),
4160                      addr:$srcAddr)>;
4161
4162def : Pat<(_.info128.VT (extract_subvector
4163                (_.info512.VT (masked_load addr:$srcAddr, Mask512,
4164                      (_.info512.VT (insert_subvector undef,
4165                            (_.info128.VT (X86vzmovl _.info128.RC:$src)),
4166                            (iPTR 0))))),
4167                (iPTR 0))),
4168          (!cast<Instruction>(InstrStr#rmk) _.info128.RC:$src,
4169                      (COPY_TO_REGCLASS (i32 (INSERT_SUBREG (IMPLICIT_DEF), MaskRC:$mask, subreg)), VK1WM),
4170                      addr:$srcAddr)>;
4171
4172// AVX512Vl patterns.
4173def : Pat<(_.info128.VT (masked_load addr:$srcAddr, Mask128,
4174                         _.info128.ImmAllZerosV)),
4175          (!cast<Instruction>(InstrStr#rmkz)
4176                      (COPY_TO_REGCLASS (i32 (INSERT_SUBREG (IMPLICIT_DEF), MaskRC:$mask, subreg)), VK1WM),
4177                      addr:$srcAddr)>;
4178
4179def : Pat<(_.info128.VT (masked_load addr:$srcAddr, Mask128,
4180                         (_.info128.VT (X86vzmovl _.info128.RC:$src)))),
4181          (!cast<Instruction>(InstrStr#rmk) _.info128.RC:$src,
4182                      (COPY_TO_REGCLASS (i32 (INSERT_SUBREG (IMPLICIT_DEF), MaskRC:$mask, subreg)), VK1WM),
4183                      addr:$srcAddr)>;
4184}
4185
4186defm : avx512_move_scalar_lowering<"VMOVSSZ", X86Movss, fp32imm0, v4f32x_info>;
4187defm : avx512_move_scalar_lowering<"VMOVSDZ", X86Movsd, fp64imm0, v2f64x_info>;
4188
4189defm : avx512_store_scalar_lowering<"VMOVSSZ", avx512vl_f32_info,
4190                   (v16i1 (bitconvert (i16 (trunc (and GR32:$mask, (i32 1)))))), GR32>;
4191defm : avx512_store_scalar_lowering_subreg<"VMOVSSZ", avx512vl_f32_info,
4192                   (v16i1 (bitconvert (i16 (and GR16:$mask, (i16 1))))), GR16, sub_16bit>;
4193defm : avx512_store_scalar_lowering_subreg<"VMOVSDZ", avx512vl_f64_info,
4194                   (v8i1 (bitconvert (i8 (and GR8:$mask, (i8 1))))), GR8, sub_8bit>;
4195
4196defm : avx512_store_scalar_lowering_subreg2<"VMOVSSZ", avx512vl_f32_info,
4197                   (v16i1 (insert_subvector
4198                           (v16i1 immAllZerosV),
4199                           (v4i1 (extract_subvector
4200                                  (v8i1 (bitconvert (and GR8:$mask, (i8 1)))),
4201                                  (iPTR 0))),
4202                           (iPTR 0))),
4203                   (v4i1 (extract_subvector
4204                          (v8i1 (bitconvert (and GR8:$mask, (i8 1)))),
4205                          (iPTR 0))), GR8, sub_8bit>;
4206defm : avx512_store_scalar_lowering_subreg2<"VMOVSDZ", avx512vl_f64_info,
4207                   (v8i1
4208                    (extract_subvector
4209                     (v16i1
4210                      (insert_subvector
4211                       (v16i1 immAllZerosV),
4212                       (v2i1 (extract_subvector
4213                              (v8i1 (bitconvert (i8 (and GR8:$mask, (i8 1))))),
4214                              (iPTR 0))),
4215                       (iPTR 0))),
4216                     (iPTR 0))),
4217                   (v2i1 (extract_subvector
4218                          (v8i1 (bitconvert (i8 (and GR8:$mask, (i8 1))))),
4219                          (iPTR 0))), GR8, sub_8bit>;
4220
4221defm : avx512_load_scalar_lowering<"VMOVSSZ", avx512vl_f32_info,
4222                   (v16i1 (bitconvert (i16 (trunc (and GR32:$mask, (i32 1)))))), GR32>;
4223defm : avx512_load_scalar_lowering_subreg<"VMOVSSZ", avx512vl_f32_info,
4224                   (v16i1 (bitconvert (i16 (and GR16:$mask, (i16 1))))), GR16, sub_16bit>;
4225defm : avx512_load_scalar_lowering_subreg<"VMOVSDZ", avx512vl_f64_info,
4226                   (v8i1 (bitconvert (i8 (and GR8:$mask, (i8 1))))), GR8, sub_8bit>;
4227
4228defm : avx512_load_scalar_lowering_subreg2<"VMOVSSZ", avx512vl_f32_info,
4229                   (v16i1 (insert_subvector
4230                           (v16i1 immAllZerosV),
4231                           (v4i1 (extract_subvector
4232                                  (v8i1 (bitconvert (and GR8:$mask, (i8 1)))),
4233                                  (iPTR 0))),
4234                           (iPTR 0))),
4235                   (v4i1 (extract_subvector
4236                          (v8i1 (bitconvert (and GR8:$mask, (i8 1)))),
4237                          (iPTR 0))), GR8, sub_8bit>;
4238defm : avx512_load_scalar_lowering_subreg2<"VMOVSDZ", avx512vl_f64_info,
4239                   (v8i1
4240                    (extract_subvector
4241                     (v16i1
4242                      (insert_subvector
4243                       (v16i1 immAllZerosV),
4244                       (v2i1 (extract_subvector
4245                              (v8i1 (bitconvert (i8 (and GR8:$mask, (i8 1))))),
4246                              (iPTR 0))),
4247                       (iPTR 0))),
4248                     (iPTR 0))),
4249                   (v2i1 (extract_subvector
4250                          (v8i1 (bitconvert (i8 (and GR8:$mask, (i8 1))))),
4251                          (iPTR 0))), GR8, sub_8bit>;
4252
4253def : Pat<(f32 (X86selects VK1WM:$mask, (f32 FR32X:$src1), (f32 FR32X:$src2))),
4254          (COPY_TO_REGCLASS (v4f32 (VMOVSSZrrk
4255           (v4f32 (COPY_TO_REGCLASS FR32X:$src2, VR128X)),
4256           VK1WM:$mask, (v4f32 (IMPLICIT_DEF)),
4257           (v4f32 (COPY_TO_REGCLASS FR32X:$src1, VR128X)))), FR32X)>;
4258
4259def : Pat<(f32 (X86selects VK1WM:$mask, (f32 FR32X:$src1), fp32imm0)),
4260          (COPY_TO_REGCLASS (v4f32 (VMOVSSZrrkz VK1WM:$mask, (v4f32 (IMPLICIT_DEF)),
4261           (v4f32 (COPY_TO_REGCLASS FR32X:$src1, VR128X)))), FR32X)>;
4262
4263def : Pat<(f32 (X86selects VK1WM:$mask, (loadf32 addr:$src), (f32 FR32X:$src0))),
4264          (COPY_TO_REGCLASS
4265           (v4f32 (VMOVSSZrmk (v4f32 (COPY_TO_REGCLASS FR32X:$src0, VR128X)),
4266                                                       VK1WM:$mask, addr:$src)),
4267           FR32X)>;
4268def : Pat<(f32 (X86selects VK1WM:$mask, (loadf32 addr:$src), fp32imm0)),
4269          (COPY_TO_REGCLASS (v4f32 (VMOVSSZrmkz VK1WM:$mask, addr:$src)), FR32X)>;
4270
4271def : Pat<(f64 (X86selects VK1WM:$mask, (f64 FR64X:$src1), (f64 FR64X:$src2))),
4272          (COPY_TO_REGCLASS (v2f64 (VMOVSDZrrk
4273           (v2f64 (COPY_TO_REGCLASS FR64X:$src2, VR128X)),
4274           VK1WM:$mask, (v2f64 (IMPLICIT_DEF)),
4275           (v2f64 (COPY_TO_REGCLASS FR64X:$src1, VR128X)))), FR64X)>;
4276
4277def : Pat<(f64 (X86selects VK1WM:$mask, (f64 FR64X:$src1), fp64imm0)),
4278          (COPY_TO_REGCLASS (v2f64 (VMOVSDZrrkz VK1WM:$mask, (v2f64 (IMPLICIT_DEF)),
4279           (v2f64 (COPY_TO_REGCLASS FR64X:$src1, VR128X)))), FR64X)>;
4280
4281def : Pat<(f64 (X86selects VK1WM:$mask, (loadf64 addr:$src), (f64 FR64X:$src0))),
4282          (COPY_TO_REGCLASS
4283           (v2f64 (VMOVSDZrmk (v2f64 (COPY_TO_REGCLASS FR64X:$src0, VR128X)),
4284                                                       VK1WM:$mask, addr:$src)),
4285           FR64X)>;
4286def : Pat<(f64 (X86selects VK1WM:$mask, (loadf64 addr:$src), fp64imm0)),
4287          (COPY_TO_REGCLASS (v2f64 (VMOVSDZrmkz VK1WM:$mask, addr:$src)), FR64X)>;
4288
4289
4290def : Pat<(v4f32 (X86selects VK1WM:$mask, (v4f32 VR128X:$src1), (v4f32 VR128X:$src2))),
4291          (VMOVSSZrrk VR128X:$src2, VK1WM:$mask, VR128X:$src1, VR128X:$src1)>;
4292def : Pat<(v2f64 (X86selects VK1WM:$mask, (v2f64 VR128X:$src1), (v2f64 VR128X:$src2))),
4293          (VMOVSDZrrk VR128X:$src2, VK1WM:$mask, VR128X:$src1, VR128X:$src1)>;
4294
4295def : Pat<(v4f32 (X86selects VK1WM:$mask, (v4f32 VR128X:$src1), (v4f32 immAllZerosV))),
4296          (VMOVSSZrrkz VK1WM:$mask, VR128X:$src1, VR128X:$src1)>;
4297def : Pat<(v2f64 (X86selects VK1WM:$mask, (v2f64 VR128X:$src1), (v2f64 immAllZerosV))),
4298          (VMOVSDZrrkz VK1WM:$mask, VR128X:$src1, VR128X:$src1)>;
4299
4300let hasSideEffects = 0, isCodeGenOnly = 1, ForceDisassemble = 1 in {
4301  def VMOVSSZrr_REV: AVX512<0x11, MRMDestReg, (outs VR128X:$dst),
4302                           (ins VR128X:$src1, VR128X:$src2),
4303                           "vmovss\t{$src2, $src1, $dst|$dst, $src1, $src2}",
4304                           []>, XS, EVEX_4V, VEX_LIG,
4305                           FoldGenData<"VMOVSSZrr">,
4306                           Sched<[SchedWriteFShuffle.XMM]>;
4307
4308  let Constraints = "$src0 = $dst" in
4309  def VMOVSSZrrk_REV: AVX512<0x11, MRMDestReg, (outs VR128X:$dst),
4310                             (ins f32x_info.RC:$src0, f32x_info.KRCWM:$mask,
4311                                                   VR128X:$src1, VR128X:$src2),
4312                             "vmovss\t{$src2, $src1, $dst {${mask}}|"#
4313                                        "$dst {${mask}}, $src1, $src2}",
4314                             []>, EVEX_K, XS, EVEX_4V, VEX_LIG,
4315                             FoldGenData<"VMOVSSZrrk">,
4316                             Sched<[SchedWriteFShuffle.XMM]>;
4317
4318  def VMOVSSZrrkz_REV: AVX512<0x11, MRMDestReg, (outs VR128X:$dst),
4319                         (ins f32x_info.KRCWM:$mask, VR128X:$src1, VR128X:$src2),
4320                         "vmovss\t{$src2, $src1, $dst {${mask}} {z}|"#
4321                                    "$dst {${mask}} {z}, $src1, $src2}",
4322                         []>, EVEX_KZ, XS, EVEX_4V, VEX_LIG,
4323                         FoldGenData<"VMOVSSZrrkz">,
4324                         Sched<[SchedWriteFShuffle.XMM]>;
4325
4326  def VMOVSDZrr_REV: AVX512<0x11, MRMDestReg, (outs VR128X:$dst),
4327                           (ins VR128X:$src1, VR128X:$src2),
4328                           "vmovsd\t{$src2, $src1, $dst|$dst, $src1, $src2}",
4329                           []>, XD, EVEX_4V, VEX_LIG, VEX_W,
4330                           FoldGenData<"VMOVSDZrr">,
4331                           Sched<[SchedWriteFShuffle.XMM]>;
4332
4333  let Constraints = "$src0 = $dst" in
4334  def VMOVSDZrrk_REV: AVX512<0x11, MRMDestReg, (outs VR128X:$dst),
4335                             (ins f64x_info.RC:$src0, f64x_info.KRCWM:$mask,
4336                                                   VR128X:$src1, VR128X:$src2),
4337                             "vmovsd\t{$src2, $src1, $dst {${mask}}|"#
4338                                        "$dst {${mask}}, $src1, $src2}",
4339                             []>, EVEX_K, XD, EVEX_4V, VEX_LIG,
4340                             VEX_W, FoldGenData<"VMOVSDZrrk">,
4341                             Sched<[SchedWriteFShuffle.XMM]>;
4342
4343  def VMOVSDZrrkz_REV: AVX512<0x11, MRMDestReg, (outs VR128X:$dst),
4344                              (ins f64x_info.KRCWM:$mask, VR128X:$src1,
4345                                                          VR128X:$src2),
4346                              "vmovsd\t{$src2, $src1, $dst {${mask}} {z}|"#
4347                                         "$dst {${mask}} {z}, $src1, $src2}",
4348                              []>, EVEX_KZ, XD, EVEX_4V, VEX_LIG,
4349                              VEX_W, FoldGenData<"VMOVSDZrrkz">,
4350                              Sched<[SchedWriteFShuffle.XMM]>;
4351}
4352
4353def : InstAlias<"vmovss.s\t{$src2, $src1, $dst|$dst, $src1, $src2}",
4354                (VMOVSSZrr_REV VR128X:$dst, VR128X:$src1, VR128X:$src2), 0>;
4355def : InstAlias<"vmovss.s\t{$src2, $src1, $dst {${mask}}|"#
4356                             "$dst {${mask}}, $src1, $src2}",
4357                (VMOVSSZrrk_REV VR128X:$dst, VK1WM:$mask,
4358                                VR128X:$src1, VR128X:$src2), 0>;
4359def : InstAlias<"vmovss.s\t{$src2, $src1, $dst {${mask}} {z}|"#
4360                             "$dst {${mask}} {z}, $src1, $src2}",
4361                (VMOVSSZrrkz_REV VR128X:$dst, VK1WM:$mask,
4362                                 VR128X:$src1, VR128X:$src2), 0>;
4363def : InstAlias<"vmovsd.s\t{$src2, $src1, $dst|$dst, $src1, $src2}",
4364                (VMOVSDZrr_REV VR128X:$dst, VR128X:$src1, VR128X:$src2), 0>;
4365def : InstAlias<"vmovsd.s\t{$src2, $src1, $dst {${mask}}|"#
4366                             "$dst {${mask}}, $src1, $src2}",
4367                (VMOVSDZrrk_REV VR128X:$dst, VK1WM:$mask,
4368                                VR128X:$src1, VR128X:$src2), 0>;
4369def : InstAlias<"vmovsd.s\t{$src2, $src1, $dst {${mask}} {z}|"#
4370                             "$dst {${mask}} {z}, $src1, $src2}",
4371                (VMOVSDZrrkz_REV VR128X:$dst, VK1WM:$mask,
4372                                 VR128X:$src1, VR128X:$src2), 0>;
4373
4374let Predicates = [HasAVX512, OptForSize] in {
4375  def : Pat<(v4f32 (X86vzmovl (v4f32 VR128X:$src))),
4376            (VMOVSSZrr (v4f32 (AVX512_128_SET0)), VR128X:$src)>;
4377  def : Pat<(v4i32 (X86vzmovl (v4i32 VR128X:$src))),
4378            (VMOVSSZrr (v4i32 (AVX512_128_SET0)), VR128X:$src)>;
4379
4380  // Move low f32 and clear high bits.
4381  def : Pat<(v8f32 (X86vzmovl (v8f32 VR256X:$src))),
4382            (SUBREG_TO_REG (i32 0),
4383             (v4f32 (VMOVSSZrr (v4f32 (AVX512_128_SET0)),
4384              (v4f32 (EXTRACT_SUBREG (v8f32 VR256X:$src), sub_xmm)))), sub_xmm)>;
4385  def : Pat<(v8i32 (X86vzmovl (v8i32 VR256X:$src))),
4386            (SUBREG_TO_REG (i32 0),
4387             (v4i32 (VMOVSSZrr (v4i32 (AVX512_128_SET0)),
4388              (v4i32 (EXTRACT_SUBREG (v8i32 VR256X:$src), sub_xmm)))), sub_xmm)>;
4389
4390  def : Pat<(v16f32 (X86vzmovl (v16f32 VR512:$src))),
4391            (SUBREG_TO_REG (i32 0),
4392             (v4f32 (VMOVSSZrr (v4f32 (AVX512_128_SET0)),
4393              (v4f32 (EXTRACT_SUBREG (v16f32 VR512:$src), sub_xmm)))), sub_xmm)>;
4394  def : Pat<(v16i32 (X86vzmovl (v16i32 VR512:$src))),
4395            (SUBREG_TO_REG (i32 0),
4396             (v4i32 (VMOVSSZrr (v4i32 (AVX512_128_SET0)),
4397              (v4i32 (EXTRACT_SUBREG (v16i32 VR512:$src), sub_xmm)))), sub_xmm)>;
4398}
4399
4400// Use 128-bit blends for OptForSpeed since BLENDs have better throughput than
4401// VMOVSS/SD. Unfortunately, loses the ability to use XMM16-31.
4402let Predicates = [HasAVX512, OptForSpeed] in {
4403  def : Pat<(v16f32 (X86vzmovl (v16f32 VR512:$src))),
4404            (SUBREG_TO_REG (i32 0),
4405             (v4f32 (VBLENDPSrri (v4f32 (V_SET0)),
4406                          (v4f32 (EXTRACT_SUBREG (v16f32 VR512:$src), sub_xmm)),
4407                          (i8 1))), sub_xmm)>;
4408  def : Pat<(v16i32 (X86vzmovl (v16i32 VR512:$src))),
4409            (SUBREG_TO_REG (i32 0),
4410             (v4i32 (VPBLENDWrri (v4i32 (V_SET0)),
4411                          (v4i32 (EXTRACT_SUBREG (v16i32 VR512:$src), sub_xmm)),
4412                          (i8 3))), sub_xmm)>;
4413}
4414
4415let Predicates = [HasAVX512] in {
4416  def : Pat<(v4f32 (scalar_to_vector (loadf32 addr:$src))),
4417            (VMOVSSZrm addr:$src)>;
4418  def : Pat<(v2f64 (scalar_to_vector (loadf64 addr:$src))),
4419            (VMOVSDZrm addr:$src)>;
4420
4421  // Represent the same patterns above but in the form they appear for
4422  // 256-bit types
4423  def : Pat<(v8f32 (X86vzload32 addr:$src)),
4424            (SUBREG_TO_REG (i32 0), (VMOVSSZrm addr:$src), sub_xmm)>;
4425  def : Pat<(v4f64 (X86vzload64 addr:$src)),
4426            (SUBREG_TO_REG (i32 0), (VMOVSDZrm addr:$src), sub_xmm)>;
4427
4428  // Represent the same patterns above but in the form they appear for
4429  // 512-bit types
4430  def : Pat<(v16f32 (X86vzload32 addr:$src)),
4431            (SUBREG_TO_REG (i32 0), (VMOVSSZrm addr:$src), sub_xmm)>;
4432  def : Pat<(v8f64 (X86vzload64 addr:$src)),
4433            (SUBREG_TO_REG (i32 0), (VMOVSDZrm addr:$src), sub_xmm)>;
4434}
4435
4436let ExeDomain = SSEPackedInt, SchedRW = [SchedWriteVecLogic.XMM] in {
4437def VMOVZPQILo2PQIZrr : AVX512XSI<0x7E, MRMSrcReg, (outs VR128X:$dst),
4438                                (ins VR128X:$src),
4439                                "vmovq\t{$src, $dst|$dst, $src}",
4440                                [(set VR128X:$dst, (v2i64 (X86vzmovl
4441                                                   (v2i64 VR128X:$src))))]>,
4442                                EVEX, VEX_W;
4443}
4444
4445let Predicates = [HasAVX512] in {
4446  def : Pat<(v4i32 (X86vzmovl (v4i32 (scalar_to_vector GR32:$src)))),
4447            (VMOVDI2PDIZrr GR32:$src)>;
4448
4449  def : Pat<(v2i64 (X86vzmovl (v2i64 (scalar_to_vector GR64:$src)))),
4450            (VMOV64toPQIZrr GR64:$src)>;
4451
4452  // AVX 128-bit movd/movq instruction write zeros in the high 128-bit part.
4453  def : Pat<(v4i32 (X86vzload32 addr:$src)),
4454            (VMOVDI2PDIZrm addr:$src)>;
4455  def : Pat<(v8i32 (X86vzload32 addr:$src)),
4456            (SUBREG_TO_REG (i32 0), (v4i32 (VMOVDI2PDIZrm addr:$src)), sub_xmm)>;
4457  def : Pat<(v2f64 (X86vzmovl (v2f64 VR128X:$src))),
4458            (VMOVZPQILo2PQIZrr VR128X:$src)>;
4459  def : Pat<(v2i64 (X86vzload64 addr:$src)),
4460            (VMOVQI2PQIZrm addr:$src)>;
4461  def : Pat<(v4i64 (X86vzload64 addr:$src)),
4462            (SUBREG_TO_REG (i64 0), (v2i64 (VMOVQI2PQIZrm addr:$src)), sub_xmm)>;
4463
4464  // Use regular 128-bit instructions to match 512-bit scalar_to_vec+zext.
4465  def : Pat<(v16i32 (X86vzload32 addr:$src)),
4466            (SUBREG_TO_REG (i32 0), (v4i32 (VMOVDI2PDIZrm addr:$src)), sub_xmm)>;
4467  def : Pat<(v8i64 (X86vzload64 addr:$src)),
4468            (SUBREG_TO_REG (i64 0), (v2i64 (VMOVQI2PQIZrm addr:$src)), sub_xmm)>;
4469
4470  def : Pat<(v4f64 (X86vzmovl (v4f64 VR256X:$src))),
4471            (SUBREG_TO_REG (i32 0),
4472             (v2f64 (VMOVZPQILo2PQIZrr
4473                     (v2f64 (EXTRACT_SUBREG (v4f64 VR256X:$src), sub_xmm)))),
4474             sub_xmm)>;
4475  def : Pat<(v4i64 (X86vzmovl (v4i64 VR256X:$src))),
4476            (SUBREG_TO_REG (i32 0),
4477             (v2i64 (VMOVZPQILo2PQIZrr
4478                     (v2i64 (EXTRACT_SUBREG (v4i64 VR256X:$src), sub_xmm)))),
4479             sub_xmm)>;
4480
4481  def : Pat<(v8f64 (X86vzmovl (v8f64 VR512:$src))),
4482            (SUBREG_TO_REG (i32 0),
4483             (v2f64 (VMOVZPQILo2PQIZrr
4484                     (v2f64 (EXTRACT_SUBREG (v8f64 VR512:$src), sub_xmm)))),
4485             sub_xmm)>;
4486  def : Pat<(v8i64 (X86vzmovl (v8i64 VR512:$src))),
4487            (SUBREG_TO_REG (i32 0),
4488             (v2i64 (VMOVZPQILo2PQIZrr
4489                     (v2i64 (EXTRACT_SUBREG (v8i64 VR512:$src), sub_xmm)))),
4490             sub_xmm)>;
4491}
4492
4493//===----------------------------------------------------------------------===//
4494// AVX-512 - Non-temporals
4495//===----------------------------------------------------------------------===//
4496
4497def VMOVNTDQAZrm : AVX512PI<0x2A, MRMSrcMem, (outs VR512:$dst),
4498                      (ins i512mem:$src), "vmovntdqa\t{$src, $dst|$dst, $src}",
4499                      [], SSEPackedInt>, Sched<[SchedWriteVecMoveLS.ZMM.RM]>,
4500                      EVEX, T8PD, EVEX_V512, EVEX_CD8<64, CD8VF>;
4501
4502let Predicates = [HasVLX] in {
4503  def VMOVNTDQAZ256rm : AVX512PI<0x2A, MRMSrcMem, (outs VR256X:$dst),
4504                       (ins i256mem:$src),
4505                       "vmovntdqa\t{$src, $dst|$dst, $src}",
4506                       [], SSEPackedInt>, Sched<[SchedWriteVecMoveLS.YMM.RM]>,
4507                       EVEX, T8PD, EVEX_V256, EVEX_CD8<64, CD8VF>;
4508
4509  def VMOVNTDQAZ128rm : AVX512PI<0x2A, MRMSrcMem, (outs VR128X:$dst),
4510                      (ins i128mem:$src),
4511                      "vmovntdqa\t{$src, $dst|$dst, $src}",
4512                      [], SSEPackedInt>, Sched<[SchedWriteVecMoveLS.XMM.RM]>,
4513                      EVEX, T8PD, EVEX_V128, EVEX_CD8<64, CD8VF>;
4514}
4515
4516multiclass avx512_movnt<bits<8> opc, string OpcodeStr, X86VectorVTInfo _,
4517                        X86SchedWriteMoveLS Sched,
4518                        PatFrag st_frag = alignednontemporalstore> {
4519  let SchedRW = [Sched.MR], AddedComplexity = 400 in
4520  def mr : AVX512PI<opc, MRMDestMem, (outs), (ins _.MemOp:$dst, _.RC:$src),
4521                    !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
4522                    [(st_frag (_.VT _.RC:$src), addr:$dst)],
4523                    _.ExeDomain>, EVEX, EVEX_CD8<_.EltSize, CD8VF>;
4524}
4525
4526multiclass avx512_movnt_vl<bits<8> opc, string OpcodeStr,
4527                           AVX512VLVectorVTInfo VTInfo,
4528                           X86SchedWriteMoveLSWidths Sched> {
4529  let Predicates = [HasAVX512] in
4530    defm Z : avx512_movnt<opc, OpcodeStr, VTInfo.info512, Sched.ZMM>, EVEX_V512;
4531
4532  let Predicates = [HasAVX512, HasVLX] in {
4533    defm Z256 : avx512_movnt<opc, OpcodeStr, VTInfo.info256, Sched.YMM>, EVEX_V256;
4534    defm Z128 : avx512_movnt<opc, OpcodeStr, VTInfo.info128, Sched.XMM>, EVEX_V128;
4535  }
4536}
4537
4538defm VMOVNTDQ : avx512_movnt_vl<0xE7, "vmovntdq", avx512vl_i64_info,
4539                                SchedWriteVecMoveLSNT>, PD;
4540defm VMOVNTPD : avx512_movnt_vl<0x2B, "vmovntpd", avx512vl_f64_info,
4541                                SchedWriteFMoveLSNT>, PD, VEX_W;
4542defm VMOVNTPS : avx512_movnt_vl<0x2B, "vmovntps", avx512vl_f32_info,
4543                                SchedWriteFMoveLSNT>, PS;
4544
4545let Predicates = [HasAVX512], AddedComplexity = 400 in {
4546  def : Pat<(alignednontemporalstore (v16i32 VR512:$src), addr:$dst),
4547            (VMOVNTDQZmr addr:$dst, VR512:$src)>;
4548  def : Pat<(alignednontemporalstore (v32i16 VR512:$src), addr:$dst),
4549            (VMOVNTDQZmr addr:$dst, VR512:$src)>;
4550  def : Pat<(alignednontemporalstore (v64i8 VR512:$src), addr:$dst),
4551            (VMOVNTDQZmr addr:$dst, VR512:$src)>;
4552
4553  def : Pat<(v8f64 (alignednontemporalload addr:$src)),
4554            (VMOVNTDQAZrm addr:$src)>;
4555  def : Pat<(v16f32 (alignednontemporalload addr:$src)),
4556            (VMOVNTDQAZrm addr:$src)>;
4557  def : Pat<(v8i64 (alignednontemporalload addr:$src)),
4558            (VMOVNTDQAZrm addr:$src)>;
4559  def : Pat<(v16i32 (alignednontemporalload addr:$src)),
4560            (VMOVNTDQAZrm addr:$src)>;
4561  def : Pat<(v32i16 (alignednontemporalload addr:$src)),
4562            (VMOVNTDQAZrm addr:$src)>;
4563  def : Pat<(v64i8 (alignednontemporalload addr:$src)),
4564            (VMOVNTDQAZrm addr:$src)>;
4565}
4566
4567let Predicates = [HasVLX], AddedComplexity = 400 in {
4568  def : Pat<(alignednontemporalstore (v8i32 VR256X:$src), addr:$dst),
4569            (VMOVNTDQZ256mr addr:$dst, VR256X:$src)>;
4570  def : Pat<(alignednontemporalstore (v16i16 VR256X:$src), addr:$dst),
4571            (VMOVNTDQZ256mr addr:$dst, VR256X:$src)>;
4572  def : Pat<(alignednontemporalstore (v32i8 VR256X:$src), addr:$dst),
4573            (VMOVNTDQZ256mr addr:$dst, VR256X:$src)>;
4574
4575  def : Pat<(v4f64 (alignednontemporalload addr:$src)),
4576            (VMOVNTDQAZ256rm addr:$src)>;
4577  def : Pat<(v8f32 (alignednontemporalload addr:$src)),
4578            (VMOVNTDQAZ256rm addr:$src)>;
4579  def : Pat<(v4i64 (alignednontemporalload addr:$src)),
4580            (VMOVNTDQAZ256rm addr:$src)>;
4581  def : Pat<(v8i32 (alignednontemporalload addr:$src)),
4582            (VMOVNTDQAZ256rm addr:$src)>;
4583  def : Pat<(v16i16 (alignednontemporalload addr:$src)),
4584            (VMOVNTDQAZ256rm addr:$src)>;
4585  def : Pat<(v32i8 (alignednontemporalload addr:$src)),
4586            (VMOVNTDQAZ256rm addr:$src)>;
4587
4588  def : Pat<(alignednontemporalstore (v4i32 VR128X:$src), addr:$dst),
4589            (VMOVNTDQZ128mr addr:$dst, VR128X:$src)>;
4590  def : Pat<(alignednontemporalstore (v8i16 VR128X:$src), addr:$dst),
4591            (VMOVNTDQZ128mr addr:$dst, VR128X:$src)>;
4592  def : Pat<(alignednontemporalstore (v16i8 VR128X:$src), addr:$dst),
4593            (VMOVNTDQZ128mr addr:$dst, VR128X:$src)>;
4594
4595  def : Pat<(v2f64 (alignednontemporalload addr:$src)),
4596            (VMOVNTDQAZ128rm addr:$src)>;
4597  def : Pat<(v4f32 (alignednontemporalload addr:$src)),
4598            (VMOVNTDQAZ128rm addr:$src)>;
4599  def : Pat<(v2i64 (alignednontemporalload addr:$src)),
4600            (VMOVNTDQAZ128rm addr:$src)>;
4601  def : Pat<(v4i32 (alignednontemporalload addr:$src)),
4602            (VMOVNTDQAZ128rm addr:$src)>;
4603  def : Pat<(v8i16 (alignednontemporalload addr:$src)),
4604            (VMOVNTDQAZ128rm addr:$src)>;
4605  def : Pat<(v16i8 (alignednontemporalload addr:$src)),
4606            (VMOVNTDQAZ128rm addr:$src)>;
4607}
4608
4609//===----------------------------------------------------------------------===//
4610// AVX-512 - Integer arithmetic
4611//
4612multiclass avx512_binop_rm<bits<8> opc, string OpcodeStr, SDNode OpNode,
4613                           X86VectorVTInfo _, X86FoldableSchedWrite sched,
4614                           bit IsCommutable = 0> {
4615  defm rr : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
4616                    (ins _.RC:$src1, _.RC:$src2), OpcodeStr,
4617                    "$src2, $src1", "$src1, $src2",
4618                    (_.VT (OpNode _.RC:$src1, _.RC:$src2)),
4619                    IsCommutable, IsCommutable>, AVX512BIBase, EVEX_4V,
4620                    Sched<[sched]>;
4621
4622  defm rm : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
4623                  (ins _.RC:$src1, _.MemOp:$src2), OpcodeStr,
4624                  "$src2, $src1", "$src1, $src2",
4625                  (_.VT (OpNode _.RC:$src1, (_.LdFrag addr:$src2)))>,
4626                  AVX512BIBase, EVEX_4V,
4627                  Sched<[sched.Folded, sched.ReadAfterFold]>;
4628}
4629
4630multiclass avx512_binop_rmb<bits<8> opc, string OpcodeStr, SDNode OpNode,
4631                            X86VectorVTInfo _, X86FoldableSchedWrite sched,
4632                            bit IsCommutable = 0> :
4633           avx512_binop_rm<opc, OpcodeStr, OpNode, _, sched, IsCommutable> {
4634  defm rmb : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
4635                  (ins _.RC:$src1, _.ScalarMemOp:$src2), OpcodeStr,
4636                  "${src2}"#_.BroadcastStr#", $src1",
4637                  "$src1, ${src2}"#_.BroadcastStr,
4638                  (_.VT (OpNode _.RC:$src1,
4639                                (_.BroadcastLdFrag addr:$src2)))>,
4640                  AVX512BIBase, EVEX_4V, EVEX_B,
4641                  Sched<[sched.Folded, sched.ReadAfterFold]>;
4642}
4643
4644multiclass avx512_binop_rm_vl<bits<8> opc, string OpcodeStr, SDNode OpNode,
4645                              AVX512VLVectorVTInfo VTInfo,
4646                              X86SchedWriteWidths sched, Predicate prd,
4647                              bit IsCommutable = 0> {
4648  let Predicates = [prd] in
4649    defm Z : avx512_binop_rm<opc, OpcodeStr, OpNode, VTInfo.info512, sched.ZMM,
4650                             IsCommutable>, EVEX_V512;
4651
4652  let Predicates = [prd, HasVLX] in {
4653    defm Z256 : avx512_binop_rm<opc, OpcodeStr, OpNode, VTInfo.info256,
4654                                sched.YMM, IsCommutable>, EVEX_V256;
4655    defm Z128 : avx512_binop_rm<opc, OpcodeStr, OpNode, VTInfo.info128,
4656                                sched.XMM, IsCommutable>, EVEX_V128;
4657  }
4658}
4659
4660multiclass avx512_binop_rmb_vl<bits<8> opc, string OpcodeStr, SDNode OpNode,
4661                               AVX512VLVectorVTInfo VTInfo,
4662                               X86SchedWriteWidths sched, Predicate prd,
4663                               bit IsCommutable = 0> {
4664  let Predicates = [prd] in
4665    defm Z : avx512_binop_rmb<opc, OpcodeStr, OpNode, VTInfo.info512, sched.ZMM,
4666                             IsCommutable>, EVEX_V512;
4667
4668  let Predicates = [prd, HasVLX] in {
4669    defm Z256 : avx512_binop_rmb<opc, OpcodeStr, OpNode, VTInfo.info256,
4670                                 sched.YMM, IsCommutable>, EVEX_V256;
4671    defm Z128 : avx512_binop_rmb<opc, OpcodeStr, OpNode, VTInfo.info128,
4672                                 sched.XMM, IsCommutable>, EVEX_V128;
4673  }
4674}
4675
4676multiclass avx512_binop_rm_vl_q<bits<8> opc, string OpcodeStr, SDNode OpNode,
4677                                X86SchedWriteWidths sched, Predicate prd,
4678                                bit IsCommutable = 0> {
4679  defm NAME : avx512_binop_rmb_vl<opc, OpcodeStr, OpNode, avx512vl_i64_info,
4680                                  sched, prd, IsCommutable>,
4681                                  VEX_W, EVEX_CD8<64, CD8VF>;
4682}
4683
4684multiclass avx512_binop_rm_vl_d<bits<8> opc, string OpcodeStr, SDNode OpNode,
4685                                X86SchedWriteWidths sched, Predicate prd,
4686                                bit IsCommutable = 0> {
4687  defm NAME : avx512_binop_rmb_vl<opc, OpcodeStr, OpNode, avx512vl_i32_info,
4688                                  sched, prd, IsCommutable>, EVEX_CD8<32, CD8VF>;
4689}
4690
4691multiclass avx512_binop_rm_vl_w<bits<8> opc, string OpcodeStr, SDNode OpNode,
4692                                X86SchedWriteWidths sched, Predicate prd,
4693                                bit IsCommutable = 0> {
4694  defm NAME : avx512_binop_rm_vl<opc, OpcodeStr, OpNode, avx512vl_i16_info,
4695                                 sched, prd, IsCommutable>, EVEX_CD8<16, CD8VF>,
4696                                 VEX_WIG;
4697}
4698
4699multiclass avx512_binop_rm_vl_b<bits<8> opc, string OpcodeStr, SDNode OpNode,
4700                                X86SchedWriteWidths sched, Predicate prd,
4701                                bit IsCommutable = 0> {
4702  defm NAME : avx512_binop_rm_vl<opc, OpcodeStr, OpNode, avx512vl_i8_info,
4703                                 sched, prd, IsCommutable>, EVEX_CD8<8, CD8VF>,
4704                                 VEX_WIG;
4705}
4706
4707multiclass avx512_binop_rm_vl_dq<bits<8> opc_d, bits<8> opc_q, string OpcodeStr,
4708                                 SDNode OpNode, X86SchedWriteWidths sched,
4709                                 Predicate prd, bit IsCommutable = 0> {
4710  defm Q : avx512_binop_rm_vl_q<opc_q, OpcodeStr#"q", OpNode, sched, prd,
4711                                   IsCommutable>;
4712
4713  defm D : avx512_binop_rm_vl_d<opc_d, OpcodeStr#"d", OpNode, sched, prd,
4714                                   IsCommutable>;
4715}
4716
4717multiclass avx512_binop_rm_vl_bw<bits<8> opc_b, bits<8> opc_w, string OpcodeStr,
4718                                 SDNode OpNode, X86SchedWriteWidths sched,
4719                                 Predicate prd, bit IsCommutable = 0> {
4720  defm W : avx512_binop_rm_vl_w<opc_w, OpcodeStr#"w", OpNode, sched, prd,
4721                                   IsCommutable>;
4722
4723  defm B : avx512_binop_rm_vl_b<opc_b, OpcodeStr#"b", OpNode, sched, prd,
4724                                   IsCommutable>;
4725}
4726
4727multiclass avx512_binop_rm_vl_all<bits<8> opc_b, bits<8> opc_w,
4728                                  bits<8> opc_d, bits<8> opc_q,
4729                                  string OpcodeStr, SDNode OpNode,
4730                                  X86SchedWriteWidths sched,
4731                                  bit IsCommutable = 0> {
4732  defm NAME : avx512_binop_rm_vl_dq<opc_d, opc_q, OpcodeStr, OpNode,
4733                                    sched, HasAVX512, IsCommutable>,
4734              avx512_binop_rm_vl_bw<opc_b, opc_w, OpcodeStr, OpNode,
4735                                    sched, HasBWI, IsCommutable>;
4736}
4737
4738multiclass avx512_binop_rm2<bits<8> opc, string OpcodeStr,
4739                            X86FoldableSchedWrite sched,
4740                            SDNode OpNode,X86VectorVTInfo _Src,
4741                            X86VectorVTInfo _Dst, X86VectorVTInfo _Brdct,
4742                            bit IsCommutable = 0> {
4743  defm rr : AVX512_maskable<opc, MRMSrcReg, _Dst, (outs _Dst.RC:$dst),
4744                            (ins _Src.RC:$src1, _Src.RC:$src2), OpcodeStr,
4745                            "$src2, $src1","$src1, $src2",
4746                            (_Dst.VT (OpNode
4747                                         (_Src.VT _Src.RC:$src1),
4748                                         (_Src.VT _Src.RC:$src2))),
4749                            IsCommutable>,
4750                            AVX512BIBase, EVEX_4V, Sched<[sched]>;
4751  defm rm : AVX512_maskable<opc, MRMSrcMem, _Dst, (outs _Dst.RC:$dst),
4752                        (ins _Src.RC:$src1, _Src.MemOp:$src2), OpcodeStr,
4753                        "$src2, $src1", "$src1, $src2",
4754                        (_Dst.VT (OpNode (_Src.VT _Src.RC:$src1),
4755                                      (_Src.LdFrag addr:$src2)))>,
4756                        AVX512BIBase, EVEX_4V,
4757                        Sched<[sched.Folded, sched.ReadAfterFold]>;
4758
4759  defm rmb : AVX512_maskable<opc, MRMSrcMem, _Dst, (outs _Dst.RC:$dst),
4760                    (ins _Src.RC:$src1, _Brdct.ScalarMemOp:$src2),
4761                    OpcodeStr,
4762                    "${src2}"#_Brdct.BroadcastStr#", $src1",
4763                     "$src1, ${src2}"#_Brdct.BroadcastStr,
4764                    (_Dst.VT (OpNode (_Src.VT _Src.RC:$src1), (bitconvert
4765                                 (_Brdct.VT (_Brdct.BroadcastLdFrag addr:$src2)))))>,
4766                    AVX512BIBase, EVEX_4V, EVEX_B,
4767                    Sched<[sched.Folded, sched.ReadAfterFold]>;
4768}
4769
4770defm VPADD : avx512_binop_rm_vl_all<0xFC, 0xFD, 0xFE, 0xD4, "vpadd", add,
4771                                    SchedWriteVecALU, 1>;
4772defm VPSUB : avx512_binop_rm_vl_all<0xF8, 0xF9, 0xFA, 0xFB, "vpsub", sub,
4773                                    SchedWriteVecALU, 0>;
4774defm VPADDS : avx512_binop_rm_vl_bw<0xEC, 0xED, "vpadds", saddsat,
4775                                    SchedWriteVecALU, HasBWI, 1>;
4776defm VPSUBS : avx512_binop_rm_vl_bw<0xE8, 0xE9, "vpsubs", ssubsat,
4777                                    SchedWriteVecALU, HasBWI, 0>;
4778defm VPADDUS : avx512_binop_rm_vl_bw<0xDC, 0xDD, "vpaddus", uaddsat,
4779                                     SchedWriteVecALU, HasBWI, 1>;
4780defm VPSUBUS : avx512_binop_rm_vl_bw<0xD8, 0xD9, "vpsubus", usubsat,
4781                                     SchedWriteVecALU, HasBWI, 0>;
4782defm VPMULLD : avx512_binop_rm_vl_d<0x40, "vpmulld", mul,
4783                                    SchedWritePMULLD, HasAVX512, 1>, T8PD;
4784defm VPMULLW : avx512_binop_rm_vl_w<0xD5, "vpmullw", mul,
4785                                    SchedWriteVecIMul, HasBWI, 1>;
4786defm VPMULLQ : avx512_binop_rm_vl_q<0x40, "vpmullq", mul,
4787                                    SchedWriteVecIMul, HasDQI, 1>, T8PD,
4788                                    NotEVEX2VEXConvertible;
4789defm VPMULHW : avx512_binop_rm_vl_w<0xE5, "vpmulhw", mulhs, SchedWriteVecIMul,
4790                                    HasBWI, 1>;
4791defm VPMULHUW : avx512_binop_rm_vl_w<0xE4, "vpmulhuw", mulhu, SchedWriteVecIMul,
4792                                     HasBWI, 1>;
4793defm VPMULHRSW : avx512_binop_rm_vl_w<0x0B, "vpmulhrsw", X86mulhrs,
4794                                      SchedWriteVecIMul, HasBWI, 1>, T8PD;
4795defm VPAVG : avx512_binop_rm_vl_bw<0xE0, 0xE3, "vpavg", X86avg,
4796                                   SchedWriteVecALU, HasBWI, 1>;
4797defm VPMULDQ : avx512_binop_rm_vl_q<0x28, "vpmuldq", X86pmuldq,
4798                                    SchedWriteVecIMul, HasAVX512, 1>, T8PD;
4799defm VPMULUDQ : avx512_binop_rm_vl_q<0xF4, "vpmuludq", X86pmuludq,
4800                                     SchedWriteVecIMul, HasAVX512, 1>;
4801
4802multiclass avx512_binop_all<bits<8> opc, string OpcodeStr,
4803                            X86SchedWriteWidths sched,
4804                            AVX512VLVectorVTInfo _SrcVTInfo,
4805                            AVX512VLVectorVTInfo _DstVTInfo,
4806                            SDNode OpNode, Predicate prd,  bit IsCommutable = 0> {
4807  let Predicates = [prd] in
4808    defm NAME#Z : avx512_binop_rm2<opc, OpcodeStr, sched.ZMM, OpNode,
4809                                 _SrcVTInfo.info512, _DstVTInfo.info512,
4810                                 v8i64_info, IsCommutable>,
4811                                  EVEX_V512, EVEX_CD8<64, CD8VF>, VEX_W;
4812  let Predicates = [HasVLX, prd] in {
4813    defm NAME#Z256 : avx512_binop_rm2<opc, OpcodeStr, sched.YMM, OpNode,
4814                                      _SrcVTInfo.info256, _DstVTInfo.info256,
4815                                      v4i64x_info, IsCommutable>,
4816                                      EVEX_V256, EVEX_CD8<64, CD8VF>, VEX_W;
4817    defm NAME#Z128 : avx512_binop_rm2<opc, OpcodeStr, sched.XMM, OpNode,
4818                                      _SrcVTInfo.info128, _DstVTInfo.info128,
4819                                      v2i64x_info, IsCommutable>,
4820                                     EVEX_V128, EVEX_CD8<64, CD8VF>, VEX_W;
4821  }
4822}
4823
4824defm VPMULTISHIFTQB : avx512_binop_all<0x83, "vpmultishiftqb", SchedWriteVecALU,
4825                                avx512vl_i8_info, avx512vl_i8_info,
4826                                X86multishift, HasVBMI, 0>, T8PD;
4827
4828multiclass avx512_packs_rmb<bits<8> opc, string OpcodeStr, SDNode OpNode,
4829                            X86VectorVTInfo _Src, X86VectorVTInfo _Dst,
4830                            X86FoldableSchedWrite sched> {
4831  defm rmb : AVX512_maskable<opc, MRMSrcMem, _Dst, (outs _Dst.RC:$dst),
4832                    (ins _Src.RC:$src1, _Src.ScalarMemOp:$src2),
4833                    OpcodeStr,
4834                    "${src2}"#_Src.BroadcastStr#", $src1",
4835                     "$src1, ${src2}"#_Src.BroadcastStr,
4836                    (_Dst.VT (OpNode (_Src.VT _Src.RC:$src1), (bitconvert
4837                                 (_Src.VT (_Src.BroadcastLdFrag addr:$src2)))))>,
4838                    EVEX_4V, EVEX_B, EVEX_CD8<_Src.EltSize, CD8VF>,
4839                    Sched<[sched.Folded, sched.ReadAfterFold]>;
4840}
4841
4842multiclass avx512_packs_rm<bits<8> opc, string OpcodeStr,
4843                            SDNode OpNode,X86VectorVTInfo _Src,
4844                            X86VectorVTInfo _Dst, X86FoldableSchedWrite sched,
4845                            bit IsCommutable = 0> {
4846  defm rr : AVX512_maskable<opc, MRMSrcReg, _Dst, (outs _Dst.RC:$dst),
4847                            (ins _Src.RC:$src1, _Src.RC:$src2), OpcodeStr,
4848                            "$src2, $src1","$src1, $src2",
4849                            (_Dst.VT (OpNode
4850                                         (_Src.VT _Src.RC:$src1),
4851                                         (_Src.VT _Src.RC:$src2))),
4852                            IsCommutable, IsCommutable>,
4853                            EVEX_CD8<_Src.EltSize, CD8VF>, EVEX_4V, Sched<[sched]>;
4854  defm rm : AVX512_maskable<opc, MRMSrcMem, _Dst, (outs _Dst.RC:$dst),
4855                        (ins _Src.RC:$src1, _Src.MemOp:$src2), OpcodeStr,
4856                        "$src2, $src1", "$src1, $src2",
4857                        (_Dst.VT (OpNode (_Src.VT _Src.RC:$src1),
4858                                      (_Src.LdFrag addr:$src2)))>,
4859                         EVEX_4V, EVEX_CD8<_Src.EltSize, CD8VF>,
4860                         Sched<[sched.Folded, sched.ReadAfterFold]>;
4861}
4862
4863multiclass avx512_packs_all_i32_i16<bits<8> opc, string OpcodeStr,
4864                                    SDNode OpNode> {
4865  let Predicates = [HasBWI] in
4866  defm NAME#Z : avx512_packs_rm<opc, OpcodeStr, OpNode, v16i32_info,
4867                                 v32i16_info, SchedWriteShuffle.ZMM>,
4868                avx512_packs_rmb<opc, OpcodeStr, OpNode, v16i32_info,
4869                                 v32i16_info, SchedWriteShuffle.ZMM>, EVEX_V512;
4870  let Predicates = [HasBWI, HasVLX] in {
4871    defm NAME#Z256 : avx512_packs_rm<opc, OpcodeStr, OpNode, v8i32x_info,
4872                                     v16i16x_info, SchedWriteShuffle.YMM>,
4873                     avx512_packs_rmb<opc, OpcodeStr, OpNode, v8i32x_info,
4874                                      v16i16x_info, SchedWriteShuffle.YMM>,
4875                                      EVEX_V256;
4876    defm NAME#Z128 : avx512_packs_rm<opc, OpcodeStr, OpNode, v4i32x_info,
4877                                     v8i16x_info, SchedWriteShuffle.XMM>,
4878                     avx512_packs_rmb<opc, OpcodeStr, OpNode, v4i32x_info,
4879                                      v8i16x_info, SchedWriteShuffle.XMM>,
4880                                      EVEX_V128;
4881  }
4882}
4883multiclass avx512_packs_all_i16_i8<bits<8> opc, string OpcodeStr,
4884                            SDNode OpNode> {
4885  let Predicates = [HasBWI] in
4886  defm NAME#Z : avx512_packs_rm<opc, OpcodeStr, OpNode, v32i16_info, v64i8_info,
4887                                SchedWriteShuffle.ZMM>, EVEX_V512, VEX_WIG;
4888  let Predicates = [HasBWI, HasVLX] in {
4889    defm NAME#Z256 : avx512_packs_rm<opc, OpcodeStr, OpNode, v16i16x_info,
4890                                     v32i8x_info, SchedWriteShuffle.YMM>,
4891                                     EVEX_V256, VEX_WIG;
4892    defm NAME#Z128 : avx512_packs_rm<opc, OpcodeStr, OpNode, v8i16x_info,
4893                                     v16i8x_info, SchedWriteShuffle.XMM>,
4894                                     EVEX_V128, VEX_WIG;
4895  }
4896}
4897
4898multiclass avx512_vpmadd<bits<8> opc, string OpcodeStr,
4899                            SDNode OpNode, AVX512VLVectorVTInfo _Src,
4900                            AVX512VLVectorVTInfo _Dst, bit IsCommutable = 0> {
4901  let Predicates = [HasBWI] in
4902  defm NAME#Z : avx512_packs_rm<opc, OpcodeStr, OpNode, _Src.info512,
4903                                _Dst.info512, SchedWriteVecIMul.ZMM,
4904                                IsCommutable>, EVEX_V512;
4905  let Predicates = [HasBWI, HasVLX] in {
4906    defm NAME#Z256 : avx512_packs_rm<opc, OpcodeStr, OpNode, _Src.info256,
4907                                     _Dst.info256, SchedWriteVecIMul.YMM,
4908                                     IsCommutable>, EVEX_V256;
4909    defm NAME#Z128 : avx512_packs_rm<opc, OpcodeStr, OpNode, _Src.info128,
4910                                     _Dst.info128, SchedWriteVecIMul.XMM,
4911                                     IsCommutable>, EVEX_V128;
4912  }
4913}
4914
4915defm VPACKSSDW : avx512_packs_all_i32_i16<0x6B, "vpackssdw", X86Packss>, AVX512BIBase;
4916defm VPACKUSDW : avx512_packs_all_i32_i16<0x2b, "vpackusdw", X86Packus>, AVX5128IBase;
4917defm VPACKSSWB : avx512_packs_all_i16_i8 <0x63, "vpacksswb", X86Packss>, AVX512BIBase;
4918defm VPACKUSWB : avx512_packs_all_i16_i8 <0x67, "vpackuswb", X86Packus>, AVX512BIBase;
4919
4920defm VPMADDUBSW : avx512_vpmadd<0x04, "vpmaddubsw", X86vpmaddubsw,
4921                     avx512vl_i8_info, avx512vl_i16_info>, AVX512BIBase, T8PD, VEX_WIG;
4922defm VPMADDWD   : avx512_vpmadd<0xF5, "vpmaddwd", X86vpmaddwd,
4923                     avx512vl_i16_info, avx512vl_i32_info, 1>, AVX512BIBase, VEX_WIG;
4924
4925defm VPMAXSB : avx512_binop_rm_vl_b<0x3C, "vpmaxsb", smax,
4926                                    SchedWriteVecALU, HasBWI, 1>, T8PD;
4927defm VPMAXSW : avx512_binop_rm_vl_w<0xEE, "vpmaxsw", smax,
4928                                    SchedWriteVecALU, HasBWI, 1>;
4929defm VPMAXSD : avx512_binop_rm_vl_d<0x3D, "vpmaxsd", smax,
4930                                    SchedWriteVecALU, HasAVX512, 1>, T8PD;
4931defm VPMAXSQ : avx512_binop_rm_vl_q<0x3D, "vpmaxsq", smax,
4932                                    SchedWriteVecALU, HasAVX512, 1>, T8PD,
4933                                    NotEVEX2VEXConvertible;
4934
4935defm VPMAXUB : avx512_binop_rm_vl_b<0xDE, "vpmaxub", umax,
4936                                    SchedWriteVecALU, HasBWI, 1>;
4937defm VPMAXUW : avx512_binop_rm_vl_w<0x3E, "vpmaxuw", umax,
4938                                    SchedWriteVecALU, HasBWI, 1>, T8PD;
4939defm VPMAXUD : avx512_binop_rm_vl_d<0x3F, "vpmaxud", umax,
4940                                    SchedWriteVecALU, HasAVX512, 1>, T8PD;
4941defm VPMAXUQ : avx512_binop_rm_vl_q<0x3F, "vpmaxuq", umax,
4942                                    SchedWriteVecALU, HasAVX512, 1>, T8PD,
4943                                    NotEVEX2VEXConvertible;
4944
4945defm VPMINSB : avx512_binop_rm_vl_b<0x38, "vpminsb", smin,
4946                                    SchedWriteVecALU, HasBWI, 1>, T8PD;
4947defm VPMINSW : avx512_binop_rm_vl_w<0xEA, "vpminsw", smin,
4948                                    SchedWriteVecALU, HasBWI, 1>;
4949defm VPMINSD : avx512_binop_rm_vl_d<0x39, "vpminsd", smin,
4950                                    SchedWriteVecALU, HasAVX512, 1>, T8PD;
4951defm VPMINSQ : avx512_binop_rm_vl_q<0x39, "vpminsq", smin,
4952                                    SchedWriteVecALU, HasAVX512, 1>, T8PD,
4953                                    NotEVEX2VEXConvertible;
4954
4955defm VPMINUB : avx512_binop_rm_vl_b<0xDA, "vpminub", umin,
4956                                    SchedWriteVecALU, HasBWI, 1>;
4957defm VPMINUW : avx512_binop_rm_vl_w<0x3A, "vpminuw", umin,
4958                                    SchedWriteVecALU, HasBWI, 1>, T8PD;
4959defm VPMINUD : avx512_binop_rm_vl_d<0x3B, "vpminud", umin,
4960                                    SchedWriteVecALU, HasAVX512, 1>, T8PD;
4961defm VPMINUQ : avx512_binop_rm_vl_q<0x3B, "vpminuq", umin,
4962                                    SchedWriteVecALU, HasAVX512, 1>, T8PD,
4963                                    NotEVEX2VEXConvertible;
4964
4965// PMULLQ: Use 512bit version to implement 128/256 bit in case NoVLX.
4966let Predicates = [HasDQI, NoVLX] in {
4967  def : Pat<(v4i64 (mul (v4i64 VR256X:$src1), (v4i64 VR256X:$src2))),
4968            (EXTRACT_SUBREG
4969                (VPMULLQZrr
4970                    (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src1, sub_ymm),
4971                    (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src2, sub_ymm)),
4972             sub_ymm)>;
4973  def : Pat<(v4i64 (mul (v4i64 VR256X:$src1), (v4i64 (X86VBroadcastld64 addr:$src2)))),
4974            (EXTRACT_SUBREG
4975                (VPMULLQZrmb
4976                    (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src1, sub_ymm),
4977                    addr:$src2),
4978             sub_ymm)>;
4979
4980  def : Pat<(v2i64 (mul (v2i64 VR128X:$src1), (v2i64 VR128X:$src2))),
4981            (EXTRACT_SUBREG
4982                (VPMULLQZrr
4983                    (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src1, sub_xmm),
4984                    (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src2, sub_xmm)),
4985             sub_xmm)>;
4986  def : Pat<(v2i64 (mul (v2i64 VR128X:$src1), (v2i64 (X86VBroadcastld64 addr:$src2)))),
4987            (EXTRACT_SUBREG
4988                (VPMULLQZrmb
4989                    (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src1, sub_xmm),
4990                    addr:$src2),
4991             sub_xmm)>;
4992}
4993
4994multiclass avx512_min_max_lowering<string Instr, SDNode OpNode> {
4995  def : Pat<(v4i64 (OpNode VR256X:$src1, VR256X:$src2)),
4996            (EXTRACT_SUBREG
4997                (!cast<Instruction>(Instr#"rr")
4998                    (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src1, sub_ymm),
4999                    (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src2, sub_ymm)),
5000             sub_ymm)>;
5001  def : Pat<(v4i64 (OpNode (v4i64 VR256X:$src1), (v4i64 (X86VBroadcastld64 addr:$src2)))),
5002            (EXTRACT_SUBREG
5003                (!cast<Instruction>(Instr#"rmb")
5004                    (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src1, sub_ymm),
5005                    addr:$src2),
5006             sub_ymm)>;
5007
5008  def : Pat<(v2i64 (OpNode VR128X:$src1, VR128X:$src2)),
5009            (EXTRACT_SUBREG
5010                (!cast<Instruction>(Instr#"rr")
5011                    (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src1, sub_xmm),
5012                    (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src2, sub_xmm)),
5013             sub_xmm)>;
5014  def : Pat<(v2i64 (OpNode (v2i64 VR128X:$src1), (v2i64 (X86VBroadcastld64 addr:$src2)))),
5015            (EXTRACT_SUBREG
5016                (!cast<Instruction>(Instr#"rmb")
5017                    (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src1, sub_xmm),
5018                    addr:$src2),
5019             sub_xmm)>;
5020}
5021
5022let Predicates = [HasAVX512, NoVLX] in {
5023  defm : avx512_min_max_lowering<"VPMAXUQZ", umax>;
5024  defm : avx512_min_max_lowering<"VPMINUQZ", umin>;
5025  defm : avx512_min_max_lowering<"VPMAXSQZ", smax>;
5026  defm : avx512_min_max_lowering<"VPMINSQZ", smin>;
5027}
5028
5029//===----------------------------------------------------------------------===//
5030// AVX-512  Logical Instructions
5031//===----------------------------------------------------------------------===//
5032
5033defm VPAND : avx512_binop_rm_vl_dq<0xDB, 0xDB, "vpand", and,
5034                                   SchedWriteVecLogic, HasAVX512, 1>;
5035defm VPOR : avx512_binop_rm_vl_dq<0xEB, 0xEB, "vpor", or,
5036                                  SchedWriteVecLogic, HasAVX512, 1>;
5037defm VPXOR : avx512_binop_rm_vl_dq<0xEF, 0xEF, "vpxor", xor,
5038                                   SchedWriteVecLogic, HasAVX512, 1>;
5039defm VPANDN : avx512_binop_rm_vl_dq<0xDF, 0xDF, "vpandn", X86andnp,
5040                                    SchedWriteVecLogic, HasAVX512>;
5041
5042let Predicates = [HasVLX] in {
5043  def : Pat<(v16i8 (and VR128X:$src1, VR128X:$src2)),
5044            (VPANDQZ128rr VR128X:$src1, VR128X:$src2)>;
5045  def : Pat<(v8i16 (and VR128X:$src1, VR128X:$src2)),
5046            (VPANDQZ128rr VR128X:$src1, VR128X:$src2)>;
5047
5048  def : Pat<(v16i8 (or VR128X:$src1, VR128X:$src2)),
5049            (VPORQZ128rr VR128X:$src1, VR128X:$src2)>;
5050  def : Pat<(v8i16 (or VR128X:$src1, VR128X:$src2)),
5051            (VPORQZ128rr VR128X:$src1, VR128X:$src2)>;
5052
5053  def : Pat<(v16i8 (xor VR128X:$src1, VR128X:$src2)),
5054            (VPXORQZ128rr VR128X:$src1, VR128X:$src2)>;
5055  def : Pat<(v8i16 (xor VR128X:$src1, VR128X:$src2)),
5056            (VPXORQZ128rr VR128X:$src1, VR128X:$src2)>;
5057
5058  def : Pat<(v16i8 (X86andnp VR128X:$src1, VR128X:$src2)),
5059            (VPANDNQZ128rr VR128X:$src1, VR128X:$src2)>;
5060  def : Pat<(v8i16 (X86andnp VR128X:$src1, VR128X:$src2)),
5061            (VPANDNQZ128rr VR128X:$src1, VR128X:$src2)>;
5062
5063  def : Pat<(and VR128X:$src1, (loadv16i8 addr:$src2)),
5064            (VPANDQZ128rm VR128X:$src1, addr:$src2)>;
5065  def : Pat<(and VR128X:$src1, (loadv8i16 addr:$src2)),
5066            (VPANDQZ128rm VR128X:$src1, addr:$src2)>;
5067
5068  def : Pat<(or VR128X:$src1, (loadv16i8 addr:$src2)),
5069            (VPORQZ128rm VR128X:$src1, addr:$src2)>;
5070  def : Pat<(or VR128X:$src1, (loadv8i16 addr:$src2)),
5071            (VPORQZ128rm VR128X:$src1, addr:$src2)>;
5072
5073  def : Pat<(xor VR128X:$src1, (loadv16i8 addr:$src2)),
5074            (VPXORQZ128rm VR128X:$src1, addr:$src2)>;
5075  def : Pat<(xor VR128X:$src1, (loadv8i16 addr:$src2)),
5076            (VPXORQZ128rm VR128X:$src1, addr:$src2)>;
5077
5078  def : Pat<(X86andnp VR128X:$src1, (loadv16i8 addr:$src2)),
5079            (VPANDNQZ128rm VR128X:$src1, addr:$src2)>;
5080  def : Pat<(X86andnp VR128X:$src1, (loadv8i16 addr:$src2)),
5081            (VPANDNQZ128rm VR128X:$src1, addr:$src2)>;
5082
5083  def : Pat<(v32i8 (and VR256X:$src1, VR256X:$src2)),
5084            (VPANDQZ256rr VR256X:$src1, VR256X:$src2)>;
5085  def : Pat<(v16i16 (and VR256X:$src1, VR256X:$src2)),
5086            (VPANDQZ256rr VR256X:$src1, VR256X:$src2)>;
5087
5088  def : Pat<(v32i8 (or VR256X:$src1, VR256X:$src2)),
5089            (VPORQZ256rr VR256X:$src1, VR256X:$src2)>;
5090  def : Pat<(v16i16 (or VR256X:$src1, VR256X:$src2)),
5091            (VPORQZ256rr VR256X:$src1, VR256X:$src2)>;
5092
5093  def : Pat<(v32i8 (xor VR256X:$src1, VR256X:$src2)),
5094            (VPXORQZ256rr VR256X:$src1, VR256X:$src2)>;
5095  def : Pat<(v16i16 (xor VR256X:$src1, VR256X:$src2)),
5096            (VPXORQZ256rr VR256X:$src1, VR256X:$src2)>;
5097
5098  def : Pat<(v32i8 (X86andnp VR256X:$src1, VR256X:$src2)),
5099            (VPANDNQZ256rr VR256X:$src1, VR256X:$src2)>;
5100  def : Pat<(v16i16 (X86andnp VR256X:$src1, VR256X:$src2)),
5101            (VPANDNQZ256rr VR256X:$src1, VR256X:$src2)>;
5102
5103  def : Pat<(and VR256X:$src1, (loadv32i8 addr:$src2)),
5104            (VPANDQZ256rm VR256X:$src1, addr:$src2)>;
5105  def : Pat<(and VR256X:$src1, (loadv16i16 addr:$src2)),
5106            (VPANDQZ256rm VR256X:$src1, addr:$src2)>;
5107
5108  def : Pat<(or VR256X:$src1, (loadv32i8 addr:$src2)),
5109            (VPORQZ256rm VR256X:$src1, addr:$src2)>;
5110  def : Pat<(or VR256X:$src1, (loadv16i16 addr:$src2)),
5111            (VPORQZ256rm VR256X:$src1, addr:$src2)>;
5112
5113  def : Pat<(xor VR256X:$src1, (loadv32i8 addr:$src2)),
5114            (VPXORQZ256rm VR256X:$src1, addr:$src2)>;
5115  def : Pat<(xor VR256X:$src1, (loadv16i16 addr:$src2)),
5116            (VPXORQZ256rm VR256X:$src1, addr:$src2)>;
5117
5118  def : Pat<(X86andnp VR256X:$src1, (loadv32i8 addr:$src2)),
5119            (VPANDNQZ256rm VR256X:$src1, addr:$src2)>;
5120  def : Pat<(X86andnp VR256X:$src1, (loadv16i16 addr:$src2)),
5121            (VPANDNQZ256rm VR256X:$src1, addr:$src2)>;
5122}
5123
5124let Predicates = [HasAVX512] in {
5125  def : Pat<(v64i8 (and VR512:$src1, VR512:$src2)),
5126            (VPANDQZrr VR512:$src1, VR512:$src2)>;
5127  def : Pat<(v32i16 (and VR512:$src1, VR512:$src2)),
5128            (VPANDQZrr VR512:$src1, VR512:$src2)>;
5129
5130  def : Pat<(v64i8 (or VR512:$src1, VR512:$src2)),
5131            (VPORQZrr VR512:$src1, VR512:$src2)>;
5132  def : Pat<(v32i16 (or VR512:$src1, VR512:$src2)),
5133            (VPORQZrr VR512:$src1, VR512:$src2)>;
5134
5135  def : Pat<(v64i8 (xor VR512:$src1, VR512:$src2)),
5136            (VPXORQZrr VR512:$src1, VR512:$src2)>;
5137  def : Pat<(v32i16 (xor VR512:$src1, VR512:$src2)),
5138            (VPXORQZrr VR512:$src1, VR512:$src2)>;
5139
5140  def : Pat<(v64i8 (X86andnp VR512:$src1, VR512:$src2)),
5141            (VPANDNQZrr VR512:$src1, VR512:$src2)>;
5142  def : Pat<(v32i16 (X86andnp VR512:$src1, VR512:$src2)),
5143            (VPANDNQZrr VR512:$src1, VR512:$src2)>;
5144
5145  def : Pat<(and VR512:$src1, (loadv64i8 addr:$src2)),
5146            (VPANDQZrm VR512:$src1, addr:$src2)>;
5147  def : Pat<(and VR512:$src1, (loadv32i16 addr:$src2)),
5148            (VPANDQZrm VR512:$src1, addr:$src2)>;
5149
5150  def : Pat<(or VR512:$src1, (loadv64i8 addr:$src2)),
5151            (VPORQZrm VR512:$src1, addr:$src2)>;
5152  def : Pat<(or VR512:$src1, (loadv32i16 addr:$src2)),
5153            (VPORQZrm VR512:$src1, addr:$src2)>;
5154
5155  def : Pat<(xor VR512:$src1, (loadv64i8 addr:$src2)),
5156            (VPXORQZrm VR512:$src1, addr:$src2)>;
5157  def : Pat<(xor VR512:$src1, (loadv32i16 addr:$src2)),
5158            (VPXORQZrm VR512:$src1, addr:$src2)>;
5159
5160  def : Pat<(X86andnp VR512:$src1, (loadv64i8 addr:$src2)),
5161            (VPANDNQZrm VR512:$src1, addr:$src2)>;
5162  def : Pat<(X86andnp VR512:$src1, (loadv32i16 addr:$src2)),
5163            (VPANDNQZrm VR512:$src1, addr:$src2)>;
5164}
5165
5166// Patterns to catch vselect with different type than logic op.
5167multiclass avx512_logical_lowering<string InstrStr, SDNode OpNode,
5168                                    X86VectorVTInfo _,
5169                                    X86VectorVTInfo IntInfo> {
5170  // Masked register-register logical operations.
5171  def : Pat<(_.VT (vselect_mask _.KRCWM:$mask,
5172                   (bitconvert (IntInfo.VT (OpNode _.RC:$src1, _.RC:$src2))),
5173                   _.RC:$src0)),
5174            (!cast<Instruction>(InstrStr#rrk) _.RC:$src0, _.KRCWM:$mask,
5175             _.RC:$src1, _.RC:$src2)>;
5176
5177  def : Pat<(_.VT (vselect_mask _.KRCWM:$mask,
5178                   (bitconvert (IntInfo.VT (OpNode _.RC:$src1, _.RC:$src2))),
5179                   _.ImmAllZerosV)),
5180            (!cast<Instruction>(InstrStr#rrkz) _.KRCWM:$mask, _.RC:$src1,
5181             _.RC:$src2)>;
5182
5183  // Masked register-memory logical operations.
5184  def : Pat<(_.VT (vselect_mask _.KRCWM:$mask,
5185                   (bitconvert (IntInfo.VT (OpNode _.RC:$src1,
5186                                            (load addr:$src2)))),
5187                   _.RC:$src0)),
5188            (!cast<Instruction>(InstrStr#rmk) _.RC:$src0, _.KRCWM:$mask,
5189             _.RC:$src1, addr:$src2)>;
5190  def : Pat<(_.VT (vselect_mask _.KRCWM:$mask,
5191                   (bitconvert (IntInfo.VT (OpNode _.RC:$src1,
5192                                            (load addr:$src2)))),
5193                   _.ImmAllZerosV)),
5194            (!cast<Instruction>(InstrStr#rmkz) _.KRCWM:$mask, _.RC:$src1,
5195             addr:$src2)>;
5196}
5197
5198multiclass avx512_logical_lowering_bcast<string InstrStr, SDNode OpNode,
5199                                         X86VectorVTInfo _,
5200                                         X86VectorVTInfo IntInfo> {
5201  // Register-broadcast logical operations.
5202  def : Pat<(_.VT (vselect_mask _.KRCWM:$mask,
5203                   (bitconvert
5204                    (IntInfo.VT (OpNode _.RC:$src1,
5205                                 (IntInfo.VT (IntInfo.BroadcastLdFrag addr:$src2))))),
5206                   _.RC:$src0)),
5207            (!cast<Instruction>(InstrStr#rmbk) _.RC:$src0, _.KRCWM:$mask,
5208             _.RC:$src1, addr:$src2)>;
5209  def : Pat<(_.VT (vselect_mask _.KRCWM:$mask,
5210                   (bitconvert
5211                    (IntInfo.VT (OpNode _.RC:$src1,
5212                                 (IntInfo.VT (IntInfo.BroadcastLdFrag addr:$src2))))),
5213                   _.ImmAllZerosV)),
5214            (!cast<Instruction>(InstrStr#rmbkz)  _.KRCWM:$mask,
5215             _.RC:$src1, addr:$src2)>;
5216}
5217
5218multiclass avx512_logical_lowering_sizes<string InstrStr, SDNode OpNode,
5219                                         AVX512VLVectorVTInfo SelectInfo,
5220                                         AVX512VLVectorVTInfo IntInfo> {
5221let Predicates = [HasVLX] in {
5222  defm : avx512_logical_lowering<InstrStr#"Z128", OpNode, SelectInfo.info128,
5223                                 IntInfo.info128>;
5224  defm : avx512_logical_lowering<InstrStr#"Z256", OpNode, SelectInfo.info256,
5225                                 IntInfo.info256>;
5226}
5227let Predicates = [HasAVX512] in {
5228  defm : avx512_logical_lowering<InstrStr#"Z", OpNode, SelectInfo.info512,
5229                                 IntInfo.info512>;
5230}
5231}
5232
5233multiclass avx512_logical_lowering_sizes_bcast<string InstrStr, SDNode OpNode,
5234                                               AVX512VLVectorVTInfo SelectInfo,
5235                                               AVX512VLVectorVTInfo IntInfo> {
5236let Predicates = [HasVLX] in {
5237  defm : avx512_logical_lowering_bcast<InstrStr#"Z128", OpNode,
5238                                       SelectInfo.info128, IntInfo.info128>;
5239  defm : avx512_logical_lowering_bcast<InstrStr#"Z256", OpNode,
5240                                       SelectInfo.info256, IntInfo.info256>;
5241}
5242let Predicates = [HasAVX512] in {
5243  defm : avx512_logical_lowering_bcast<InstrStr#"Z", OpNode,
5244                                       SelectInfo.info512, IntInfo.info512>;
5245}
5246}
5247
5248multiclass avx512_logical_lowering_types<string InstrStr, SDNode OpNode> {
5249  // i64 vselect with i32/i16/i8 logic op
5250  defm : avx512_logical_lowering_sizes<InstrStr#"Q", OpNode, avx512vl_i64_info,
5251                                       avx512vl_i32_info>;
5252  defm : avx512_logical_lowering_sizes<InstrStr#"Q", OpNode, avx512vl_i64_info,
5253                                       avx512vl_i16_info>;
5254  defm : avx512_logical_lowering_sizes<InstrStr#"Q", OpNode, avx512vl_i64_info,
5255                                       avx512vl_i8_info>;
5256
5257  // i32 vselect with i64/i16/i8 logic op
5258  defm : avx512_logical_lowering_sizes<InstrStr#"D", OpNode, avx512vl_i32_info,
5259                                       avx512vl_i64_info>;
5260  defm : avx512_logical_lowering_sizes<InstrStr#"D", OpNode, avx512vl_i32_info,
5261                                       avx512vl_i16_info>;
5262  defm : avx512_logical_lowering_sizes<InstrStr#"D", OpNode, avx512vl_i32_info,
5263                                       avx512vl_i8_info>;
5264
5265  // f32 vselect with i64/i32/i16/i8 logic op
5266  defm : avx512_logical_lowering_sizes<InstrStr#"D", OpNode, avx512vl_f32_info,
5267                                       avx512vl_i64_info>;
5268  defm : avx512_logical_lowering_sizes<InstrStr#"D", OpNode, avx512vl_f32_info,
5269                                       avx512vl_i32_info>;
5270  defm : avx512_logical_lowering_sizes<InstrStr#"D", OpNode, avx512vl_f32_info,
5271                                       avx512vl_i16_info>;
5272  defm : avx512_logical_lowering_sizes<InstrStr#"D", OpNode, avx512vl_f32_info,
5273                                       avx512vl_i8_info>;
5274
5275  // f64 vselect with i64/i32/i16/i8 logic op
5276  defm : avx512_logical_lowering_sizes<InstrStr#"Q", OpNode, avx512vl_f64_info,
5277                                       avx512vl_i64_info>;
5278  defm : avx512_logical_lowering_sizes<InstrStr#"Q", OpNode, avx512vl_f64_info,
5279                                       avx512vl_i32_info>;
5280  defm : avx512_logical_lowering_sizes<InstrStr#"Q", OpNode, avx512vl_f64_info,
5281                                       avx512vl_i16_info>;
5282  defm : avx512_logical_lowering_sizes<InstrStr#"Q", OpNode, avx512vl_f64_info,
5283                                       avx512vl_i8_info>;
5284
5285  defm : avx512_logical_lowering_sizes_bcast<InstrStr#"D", OpNode,
5286                                             avx512vl_f32_info,
5287                                             avx512vl_i32_info>;
5288  defm : avx512_logical_lowering_sizes_bcast<InstrStr#"Q", OpNode,
5289                                             avx512vl_f64_info,
5290                                             avx512vl_i64_info>;
5291}
5292
5293defm : avx512_logical_lowering_types<"VPAND", and>;
5294defm : avx512_logical_lowering_types<"VPOR",  or>;
5295defm : avx512_logical_lowering_types<"VPXOR", xor>;
5296defm : avx512_logical_lowering_types<"VPANDN", X86andnp>;
5297
5298//===----------------------------------------------------------------------===//
5299// AVX-512  FP arithmetic
5300//===----------------------------------------------------------------------===//
5301
5302multiclass avx512_fp_scalar<bits<8> opc, string OpcodeStr,X86VectorVTInfo _,
5303                            SDNode OpNode, SDNode VecNode,
5304                            X86FoldableSchedWrite sched, bit IsCommutable> {
5305  let ExeDomain = _.ExeDomain, Uses = [MXCSR], mayRaiseFPException = 1 in {
5306  defm rr_Int : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
5307                           (ins _.RC:$src1, _.RC:$src2), OpcodeStr,
5308                           "$src2, $src1", "$src1, $src2",
5309                           (_.VT (VecNode _.RC:$src1, _.RC:$src2))>,
5310                           Sched<[sched]>;
5311
5312  defm rm_Int : AVX512_maskable_scalar<opc, MRMSrcMem, _, (outs _.RC:$dst),
5313                         (ins _.RC:$src1, _.IntScalarMemOp:$src2), OpcodeStr,
5314                         "$src2, $src1", "$src1, $src2",
5315                         (_.VT (VecNode _.RC:$src1,
5316                                        (_.ScalarIntMemFrags addr:$src2)))>,
5317                         Sched<[sched.Folded, sched.ReadAfterFold]>;
5318  let isCodeGenOnly = 1, Predicates = [HasAVX512] in {
5319  def rr : I< opc, MRMSrcReg, (outs _.FRC:$dst),
5320                         (ins _.FRC:$src1, _.FRC:$src2),
5321                          OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}",
5322                          [(set _.FRC:$dst, (OpNode _.FRC:$src1, _.FRC:$src2))]>,
5323                          Sched<[sched]> {
5324    let isCommutable = IsCommutable;
5325  }
5326  def rm : I< opc, MRMSrcMem, (outs _.FRC:$dst),
5327                         (ins _.FRC:$src1, _.ScalarMemOp:$src2),
5328                         OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}",
5329                         [(set _.FRC:$dst, (OpNode _.FRC:$src1,
5330                         (_.ScalarLdFrag addr:$src2)))]>,
5331                         Sched<[sched.Folded, sched.ReadAfterFold]>;
5332  }
5333  }
5334}
5335
5336multiclass avx512_fp_scalar_round<bits<8> opc, string OpcodeStr,X86VectorVTInfo _,
5337                                  SDNode VecNode, X86FoldableSchedWrite sched,
5338                                  bit IsCommutable = 0> {
5339  let ExeDomain = _.ExeDomain, Uses = [MXCSR] in
5340  defm rrb_Int : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
5341                          (ins _.RC:$src1, _.RC:$src2, AVX512RC:$rc), OpcodeStr,
5342                          "$rc, $src2, $src1", "$src1, $src2, $rc",
5343                          (VecNode (_.VT _.RC:$src1), (_.VT _.RC:$src2),
5344                          (i32 timm:$rc))>,
5345                          EVEX_B, EVEX_RC, Sched<[sched]>;
5346}
5347multiclass avx512_fp_scalar_sae<bits<8> opc, string OpcodeStr,X86VectorVTInfo _,
5348                                SDNode OpNode, SDNode VecNode, SDNode SaeNode,
5349                                X86FoldableSchedWrite sched, bit IsCommutable,
5350                                string EVEX2VexOvrd> {
5351  let ExeDomain = _.ExeDomain in {
5352  defm rr_Int : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
5353                           (ins _.RC:$src1, _.RC:$src2), OpcodeStr,
5354                           "$src2, $src1", "$src1, $src2",
5355                           (_.VT (VecNode _.RC:$src1, _.RC:$src2))>,
5356                           Sched<[sched]>, SIMD_EXC;
5357
5358  defm rm_Int : AVX512_maskable_scalar<opc, MRMSrcMem, _, (outs _.RC:$dst),
5359                         (ins _.RC:$src1, _.IntScalarMemOp:$src2), OpcodeStr,
5360                         "$src2, $src1", "$src1, $src2",
5361                         (_.VT (VecNode _.RC:$src1,
5362                                        (_.ScalarIntMemFrags addr:$src2)))>,
5363                         Sched<[sched.Folded, sched.ReadAfterFold]>, SIMD_EXC;
5364
5365  let isCodeGenOnly = 1, Predicates = [HasAVX512],
5366      Uses = [MXCSR], mayRaiseFPException = 1 in {
5367  def rr : I< opc, MRMSrcReg, (outs _.FRC:$dst),
5368                         (ins _.FRC:$src1, _.FRC:$src2),
5369                          OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}",
5370                          [(set _.FRC:$dst, (OpNode _.FRC:$src1, _.FRC:$src2))]>,
5371                          Sched<[sched]>,
5372                          EVEX2VEXOverride<EVEX2VexOvrd#"rr"> {
5373    let isCommutable = IsCommutable;
5374  }
5375  def rm : I< opc, MRMSrcMem, (outs _.FRC:$dst),
5376                         (ins _.FRC:$src1, _.ScalarMemOp:$src2),
5377                         OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}",
5378                         [(set _.FRC:$dst, (OpNode _.FRC:$src1,
5379                         (_.ScalarLdFrag addr:$src2)))]>,
5380                         Sched<[sched.Folded, sched.ReadAfterFold]>,
5381                         EVEX2VEXOverride<EVEX2VexOvrd#"rm">;
5382  }
5383
5384  let Uses = [MXCSR] in
5385  defm rrb_Int : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
5386                            (ins _.RC:$src1, _.RC:$src2), OpcodeStr,
5387                            "{sae}, $src2, $src1", "$src1, $src2, {sae}",
5388                            (SaeNode (_.VT _.RC:$src1), (_.VT _.RC:$src2))>,
5389                            EVEX_B, Sched<[sched]>;
5390  }
5391}
5392
5393multiclass avx512_binop_s_round<bits<8> opc, string OpcodeStr, SDNode OpNode,
5394                                SDNode VecNode, SDNode RndNode,
5395                                X86SchedWriteSizes sched, bit IsCommutable> {
5396  defm SSZ : avx512_fp_scalar<opc, OpcodeStr#"ss", f32x_info, OpNode, VecNode,
5397                              sched.PS.Scl, IsCommutable>,
5398             avx512_fp_scalar_round<opc, OpcodeStr#"ss", f32x_info, RndNode,
5399                              sched.PS.Scl, IsCommutable>,
5400                              XS, EVEX_4V, VEX_LIG,  EVEX_CD8<32, CD8VT1>;
5401  defm SDZ : avx512_fp_scalar<opc, OpcodeStr#"sd", f64x_info, OpNode, VecNode,
5402                              sched.PD.Scl, IsCommutable>,
5403             avx512_fp_scalar_round<opc, OpcodeStr#"sd", f64x_info, RndNode,
5404                              sched.PD.Scl, IsCommutable>,
5405                              XD, VEX_W, EVEX_4V, VEX_LIG, EVEX_CD8<64, CD8VT1>;
5406}
5407
5408multiclass avx512_binop_s_sae<bits<8> opc, string OpcodeStr, SDNode OpNode,
5409                              SDNode VecNode, SDNode SaeNode,
5410                              X86SchedWriteSizes sched, bit IsCommutable> {
5411  defm SSZ : avx512_fp_scalar_sae<opc, OpcodeStr#"ss", f32x_info, OpNode,
5412                              VecNode, SaeNode, sched.PS.Scl, IsCommutable,
5413                              NAME#"SS">,
5414                              XS, EVEX_4V, VEX_LIG,  EVEX_CD8<32, CD8VT1>;
5415  defm SDZ : avx512_fp_scalar_sae<opc, OpcodeStr#"sd", f64x_info, OpNode,
5416                              VecNode, SaeNode, sched.PD.Scl, IsCommutable,
5417                              NAME#"SD">,
5418                              XD, VEX_W, EVEX_4V, VEX_LIG, EVEX_CD8<64, CD8VT1>;
5419}
5420defm VADD : avx512_binop_s_round<0x58, "vadd", any_fadd, X86fadds, X86faddRnds,
5421                                 SchedWriteFAddSizes, 1>;
5422defm VMUL : avx512_binop_s_round<0x59, "vmul", any_fmul, X86fmuls, X86fmulRnds,
5423                                 SchedWriteFMulSizes, 1>;
5424defm VSUB : avx512_binop_s_round<0x5C, "vsub", any_fsub, X86fsubs, X86fsubRnds,
5425                                 SchedWriteFAddSizes, 0>;
5426defm VDIV : avx512_binop_s_round<0x5E, "vdiv", any_fdiv, X86fdivs, X86fdivRnds,
5427                                 SchedWriteFDivSizes, 0>;
5428defm VMIN : avx512_binop_s_sae<0x5D, "vmin", X86fmin, X86fmins, X86fminSAEs,
5429                               SchedWriteFCmpSizes, 0>;
5430defm VMAX : avx512_binop_s_sae<0x5F, "vmax", X86fmax, X86fmaxs, X86fmaxSAEs,
5431                               SchedWriteFCmpSizes, 0>;
5432
5433// MIN/MAX nodes are commutable under "unsafe-fp-math". In this case we use
5434// X86fminc and X86fmaxc instead of X86fmin and X86fmax
5435multiclass avx512_comutable_binop_s<bits<8> opc, string OpcodeStr,
5436                                    X86VectorVTInfo _, SDNode OpNode,
5437                                    X86FoldableSchedWrite sched,
5438                                    string EVEX2VEXOvrd> {
5439  let isCodeGenOnly = 1, Predicates = [HasAVX512], ExeDomain = _.ExeDomain in {
5440  def rr : I< opc, MRMSrcReg, (outs _.FRC:$dst),
5441                         (ins _.FRC:$src1, _.FRC:$src2),
5442                          OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}",
5443                          [(set _.FRC:$dst, (OpNode _.FRC:$src1, _.FRC:$src2))]>,
5444                          Sched<[sched]>, EVEX2VEXOverride<EVEX2VEXOvrd#"rr"> {
5445    let isCommutable = 1;
5446  }
5447  def rm : I< opc, MRMSrcMem, (outs _.FRC:$dst),
5448                         (ins _.FRC:$src1, _.ScalarMemOp:$src2),
5449                         OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}",
5450                         [(set _.FRC:$dst, (OpNode _.FRC:$src1,
5451                         (_.ScalarLdFrag addr:$src2)))]>,
5452                         Sched<[sched.Folded, sched.ReadAfterFold]>,
5453                         EVEX2VEXOverride<EVEX2VEXOvrd#"rm">;
5454  }
5455}
5456defm VMINCSSZ : avx512_comutable_binop_s<0x5D, "vminss", f32x_info, X86fminc,
5457                                         SchedWriteFCmp.Scl, "VMINCSS">, XS,
5458                                         EVEX_4V, VEX_LIG, EVEX_CD8<32, CD8VT1>, SIMD_EXC;
5459
5460defm VMINCSDZ : avx512_comutable_binop_s<0x5D, "vminsd", f64x_info, X86fminc,
5461                                         SchedWriteFCmp.Scl, "VMINCSD">, XD,
5462                                         VEX_W, EVEX_4V, VEX_LIG,
5463                                         EVEX_CD8<64, CD8VT1>, SIMD_EXC;
5464
5465defm VMAXCSSZ : avx512_comutable_binop_s<0x5F, "vmaxss", f32x_info, X86fmaxc,
5466                                         SchedWriteFCmp.Scl, "VMAXCSS">, XS,
5467                                         EVEX_4V, VEX_LIG, EVEX_CD8<32, CD8VT1>, SIMD_EXC;
5468
5469defm VMAXCSDZ : avx512_comutable_binop_s<0x5F, "vmaxsd", f64x_info, X86fmaxc,
5470                                         SchedWriteFCmp.Scl, "VMAXCSD">, XD,
5471                                         VEX_W, EVEX_4V, VEX_LIG,
5472                                         EVEX_CD8<64, CD8VT1>, SIMD_EXC;
5473
5474multiclass avx512_fp_packed<bits<8> opc, string OpcodeStr, SDPatternOperator OpNode,
5475                            SDPatternOperator MaskOpNode,
5476                            X86VectorVTInfo _, X86FoldableSchedWrite sched,
5477                            bit IsCommutable,
5478                            bit IsKCommutable = IsCommutable> {
5479  let ExeDomain = _.ExeDomain, hasSideEffects = 0,
5480      Uses = [MXCSR], mayRaiseFPException = 1 in {
5481  defm rr: AVX512_maskable_split<opc, MRMSrcReg, _, (outs _.RC:$dst),
5482                  (ins _.RC:$src1, _.RC:$src2), OpcodeStr#_.Suffix,
5483                  "$src2, $src1", "$src1, $src2",
5484                  (_.VT (OpNode _.RC:$src1, _.RC:$src2)),
5485                  (_.VT (MaskOpNode _.RC:$src1, _.RC:$src2)), IsCommutable,
5486                  IsKCommutable, IsKCommutable>,
5487                  EVEX_4V, Sched<[sched]>;
5488  let mayLoad = 1 in {
5489    defm rm: AVX512_maskable_split<opc, MRMSrcMem, _, (outs _.RC:$dst),
5490                    (ins _.RC:$src1, _.MemOp:$src2), OpcodeStr#_.Suffix,
5491                    "$src2, $src1", "$src1, $src2",
5492                    (OpNode _.RC:$src1, (_.LdFrag addr:$src2)),
5493                    (MaskOpNode _.RC:$src1, (_.LdFrag addr:$src2))>,
5494                    EVEX_4V, Sched<[sched.Folded, sched.ReadAfterFold]>;
5495    defm rmb: AVX512_maskable_split<opc, MRMSrcMem, _, (outs _.RC:$dst),
5496                     (ins _.RC:$src1, _.ScalarMemOp:$src2), OpcodeStr#_.Suffix,
5497                     "${src2}"#_.BroadcastStr#", $src1",
5498                     "$src1, ${src2}"#_.BroadcastStr,
5499                     (OpNode  _.RC:$src1, (_.VT (_.BroadcastLdFrag addr:$src2))),
5500                     (MaskOpNode  _.RC:$src1, (_.VT (_.BroadcastLdFrag addr:$src2)))>,
5501                     EVEX_4V, EVEX_B,
5502                     Sched<[sched.Folded, sched.ReadAfterFold]>;
5503    }
5504  }
5505}
5506
5507multiclass avx512_fp_round_packed<bits<8> opc, string OpcodeStr,
5508                                  SDPatternOperator OpNodeRnd,
5509                                  X86FoldableSchedWrite sched, X86VectorVTInfo _> {
5510  let ExeDomain = _.ExeDomain, Uses = [MXCSR] in
5511  defm rrb: AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
5512                  (ins _.RC:$src1, _.RC:$src2, AVX512RC:$rc), OpcodeStr#_.Suffix,
5513                  "$rc, $src2, $src1", "$src1, $src2, $rc",
5514                  (_.VT (OpNodeRnd _.RC:$src1, _.RC:$src2, (i32 timm:$rc)))>,
5515                  EVEX_4V, EVEX_B, EVEX_RC, Sched<[sched]>;
5516}
5517
5518multiclass avx512_fp_sae_packed<bits<8> opc, string OpcodeStr,
5519                                SDPatternOperator OpNodeSAE,
5520                                X86FoldableSchedWrite sched, X86VectorVTInfo _> {
5521  let ExeDomain = _.ExeDomain, Uses = [MXCSR] in
5522  defm rrb: AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
5523                  (ins _.RC:$src1, _.RC:$src2), OpcodeStr#_.Suffix,
5524                  "{sae}, $src2, $src1", "$src1, $src2, {sae}",
5525                  (_.VT (OpNodeSAE _.RC:$src1, _.RC:$src2))>,
5526                  EVEX_4V, EVEX_B, Sched<[sched]>;
5527}
5528
5529multiclass avx512_fp_binop_p<bits<8> opc, string OpcodeStr, SDPatternOperator OpNode,
5530                             SDPatternOperator MaskOpNode,
5531                             Predicate prd, X86SchedWriteSizes sched,
5532                             bit IsCommutable = 0,
5533                             bit IsPD128Commutable = IsCommutable> {
5534  let Predicates = [prd] in {
5535  defm PSZ : avx512_fp_packed<opc, OpcodeStr, OpNode, MaskOpNode, v16f32_info,
5536                              sched.PS.ZMM, IsCommutable>, EVEX_V512, PS,
5537                              EVEX_CD8<32, CD8VF>;
5538  defm PDZ : avx512_fp_packed<opc, OpcodeStr, OpNode, MaskOpNode, v8f64_info,
5539                              sched.PD.ZMM, IsCommutable>, EVEX_V512, PD, VEX_W,
5540                              EVEX_CD8<64, CD8VF>;
5541  }
5542
5543    // Define only if AVX512VL feature is present.
5544  let Predicates = [prd, HasVLX] in {
5545    defm PSZ128 : avx512_fp_packed<opc, OpcodeStr, OpNode, MaskOpNode, v4f32x_info,
5546                                   sched.PS.XMM, IsCommutable>, EVEX_V128, PS,
5547                                   EVEX_CD8<32, CD8VF>;
5548    defm PSZ256 : avx512_fp_packed<opc, OpcodeStr, OpNode, MaskOpNode, v8f32x_info,
5549                                   sched.PS.YMM, IsCommutable>, EVEX_V256, PS,
5550                                   EVEX_CD8<32, CD8VF>;
5551    defm PDZ128 : avx512_fp_packed<opc, OpcodeStr, OpNode, MaskOpNode, v2f64x_info,
5552                                   sched.PD.XMM, IsPD128Commutable,
5553                                   IsCommutable>, EVEX_V128, PD, VEX_W,
5554                                   EVEX_CD8<64, CD8VF>;
5555    defm PDZ256 : avx512_fp_packed<opc, OpcodeStr, OpNode, MaskOpNode, v4f64x_info,
5556                                   sched.PD.YMM, IsCommutable>, EVEX_V256, PD, VEX_W,
5557                                   EVEX_CD8<64, CD8VF>;
5558  }
5559}
5560
5561let Uses = [MXCSR] in
5562multiclass avx512_fp_binop_p_round<bits<8> opc, string OpcodeStr, SDNode OpNodeRnd,
5563                                   X86SchedWriteSizes sched> {
5564  defm PSZ : avx512_fp_round_packed<opc, OpcodeStr, OpNodeRnd, sched.PS.ZMM,
5565                                    v16f32_info>,
5566                                    EVEX_V512, PS, EVEX_CD8<32, CD8VF>;
5567  defm PDZ : avx512_fp_round_packed<opc, OpcodeStr, OpNodeRnd, sched.PD.ZMM,
5568                                    v8f64_info>,
5569                                    EVEX_V512, PD, VEX_W,EVEX_CD8<64, CD8VF>;
5570}
5571
5572let Uses = [MXCSR] in
5573multiclass avx512_fp_binop_p_sae<bits<8> opc, string OpcodeStr, SDNode OpNodeRnd,
5574                                 X86SchedWriteSizes sched> {
5575  defm PSZ : avx512_fp_sae_packed<opc, OpcodeStr, OpNodeRnd, sched.PS.ZMM,
5576                                  v16f32_info>,
5577                                  EVEX_V512, PS, EVEX_CD8<32, CD8VF>;
5578  defm PDZ : avx512_fp_sae_packed<opc, OpcodeStr, OpNodeRnd, sched.PD.ZMM,
5579                                  v8f64_info>,
5580                                  EVEX_V512, PD, VEX_W,EVEX_CD8<64, CD8VF>;
5581}
5582
5583defm VADD : avx512_fp_binop_p<0x58, "vadd", any_fadd, fadd, HasAVX512,
5584                              SchedWriteFAddSizes, 1>,
5585            avx512_fp_binop_p_round<0x58, "vadd", X86faddRnd, SchedWriteFAddSizes>;
5586defm VMUL : avx512_fp_binop_p<0x59, "vmul", any_fmul, fmul, HasAVX512,
5587                              SchedWriteFMulSizes, 1>,
5588            avx512_fp_binop_p_round<0x59, "vmul", X86fmulRnd, SchedWriteFMulSizes>;
5589defm VSUB : avx512_fp_binop_p<0x5C, "vsub", any_fsub, fsub, HasAVX512,
5590                              SchedWriteFAddSizes>,
5591            avx512_fp_binop_p_round<0x5C, "vsub", X86fsubRnd, SchedWriteFAddSizes>;
5592defm VDIV : avx512_fp_binop_p<0x5E, "vdiv", any_fdiv, fdiv, HasAVX512,
5593                              SchedWriteFDivSizes>,
5594            avx512_fp_binop_p_round<0x5E, "vdiv", X86fdivRnd, SchedWriteFDivSizes>;
5595defm VMIN : avx512_fp_binop_p<0x5D, "vmin", X86fmin, X86fmin, HasAVX512,
5596                              SchedWriteFCmpSizes, 0>,
5597            avx512_fp_binop_p_sae<0x5D, "vmin", X86fminSAE, SchedWriteFCmpSizes>;
5598defm VMAX : avx512_fp_binop_p<0x5F, "vmax", X86fmax, X86fmax, HasAVX512,
5599                              SchedWriteFCmpSizes, 0>,
5600            avx512_fp_binop_p_sae<0x5F, "vmax", X86fmaxSAE, SchedWriteFCmpSizes>;
5601let isCodeGenOnly = 1 in {
5602  defm VMINC : avx512_fp_binop_p<0x5D, "vmin", X86fminc, X86fminc, HasAVX512,
5603                                 SchedWriteFCmpSizes, 1>;
5604  defm VMAXC : avx512_fp_binop_p<0x5F, "vmax", X86fmaxc, X86fmaxc, HasAVX512,
5605                                 SchedWriteFCmpSizes, 1>;
5606}
5607let Uses = []<Register>, mayRaiseFPException = 0 in {
5608defm VAND  : avx512_fp_binop_p<0x54, "vand", null_frag, null_frag, HasDQI,
5609                               SchedWriteFLogicSizes, 1>;
5610defm VANDN : avx512_fp_binop_p<0x55, "vandn", null_frag, null_frag, HasDQI,
5611                               SchedWriteFLogicSizes, 0>;
5612defm VOR   : avx512_fp_binop_p<0x56, "vor", null_frag, null_frag, HasDQI,
5613                               SchedWriteFLogicSizes, 1>;
5614defm VXOR  : avx512_fp_binop_p<0x57, "vxor", null_frag, null_frag, HasDQI,
5615                               SchedWriteFLogicSizes, 1>;
5616}
5617
5618multiclass avx512_fp_scalef_p<bits<8> opc, string OpcodeStr, SDNode OpNode,
5619                              X86FoldableSchedWrite sched, X86VectorVTInfo _> {
5620  let ExeDomain = _.ExeDomain, Uses = [MXCSR], mayRaiseFPException = 1 in {
5621  defm rr: AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
5622                  (ins _.RC:$src1, _.RC:$src2), OpcodeStr#_.Suffix,
5623                  "$src2, $src1", "$src1, $src2",
5624                  (_.VT (OpNode _.RC:$src1, _.RC:$src2))>,
5625                  EVEX_4V, Sched<[sched]>;
5626  defm rm: AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
5627                  (ins _.RC:$src1, _.MemOp:$src2), OpcodeStr#_.Suffix,
5628                  "$src2, $src1", "$src1, $src2",
5629                  (OpNode _.RC:$src1, (_.LdFrag addr:$src2))>,
5630                  EVEX_4V, Sched<[sched.Folded, sched.ReadAfterFold]>;
5631  defm rmb: AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
5632                   (ins _.RC:$src1, _.ScalarMemOp:$src2), OpcodeStr#_.Suffix,
5633                   "${src2}"#_.BroadcastStr#", $src1",
5634                   "$src1, ${src2}"#_.BroadcastStr,
5635                   (OpNode  _.RC:$src1, (_.VT (_.BroadcastLdFrag addr:$src2)))>,
5636                   EVEX_4V, EVEX_B, Sched<[sched.Folded, sched.ReadAfterFold]>;
5637  }
5638}
5639
5640multiclass avx512_fp_scalef_scalar<bits<8> opc, string OpcodeStr, SDNode OpNode,
5641                                   X86FoldableSchedWrite sched, X86VectorVTInfo _> {
5642  let ExeDomain = _.ExeDomain, Uses = [MXCSR], mayRaiseFPException = 1 in {
5643  defm rr: AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
5644                  (ins _.RC:$src1, _.RC:$src2), OpcodeStr#_.Suffix,
5645                  "$src2, $src1", "$src1, $src2",
5646                  (_.VT (OpNode _.RC:$src1, _.RC:$src2))>,
5647                  Sched<[sched]>;
5648  defm rm: AVX512_maskable_scalar<opc, MRMSrcMem, _, (outs _.RC:$dst),
5649                  (ins _.RC:$src1, _.IntScalarMemOp:$src2), OpcodeStr#_.Suffix,
5650                  "$src2, $src1", "$src1, $src2",
5651                  (OpNode _.RC:$src1, (_.ScalarIntMemFrags addr:$src2))>,
5652                  Sched<[sched.Folded, sched.ReadAfterFold]>;
5653  }
5654}
5655
5656multiclass avx512_fp_scalef_all<bits<8> opc, bits<8> opcScaler, string OpcodeStr,
5657                                X86SchedWriteWidths sched> {
5658  defm PSZ : avx512_fp_scalef_p<opc, OpcodeStr, X86scalef, sched.ZMM, v16f32_info>,
5659             avx512_fp_round_packed<opc, OpcodeStr, X86scalefRnd, sched.ZMM, v16f32_info>,
5660                              EVEX_V512, EVEX_CD8<32, CD8VF>;
5661  defm PDZ : avx512_fp_scalef_p<opc, OpcodeStr, X86scalef, sched.ZMM, v8f64_info>,
5662             avx512_fp_round_packed<opc, OpcodeStr, X86scalefRnd, sched.ZMM, v8f64_info>,
5663                              EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>;
5664  defm SSZ : avx512_fp_scalef_scalar<opcScaler, OpcodeStr, X86scalefs, sched.Scl, f32x_info>,
5665             avx512_fp_scalar_round<opcScaler, OpcodeStr#"ss", f32x_info,
5666                                    X86scalefsRnd, sched.Scl>,
5667                                    EVEX_4V, VEX_LIG, EVEX_CD8<32, CD8VT1>;
5668  defm SDZ : avx512_fp_scalef_scalar<opcScaler, OpcodeStr, X86scalefs, sched.Scl, f64x_info>,
5669             avx512_fp_scalar_round<opcScaler, OpcodeStr#"sd", f64x_info,
5670                                    X86scalefsRnd, sched.Scl>,
5671                                    EVEX_4V, VEX_LIG, EVEX_CD8<64, CD8VT1>, VEX_W;
5672
5673  // Define only if AVX512VL feature is present.
5674  let Predicates = [HasVLX] in {
5675    defm PSZ128 : avx512_fp_scalef_p<opc, OpcodeStr, X86scalef, sched.XMM, v4f32x_info>,
5676                                   EVEX_V128, EVEX_CD8<32, CD8VF>;
5677    defm PSZ256 : avx512_fp_scalef_p<opc, OpcodeStr, X86scalef, sched.YMM, v8f32x_info>,
5678                                   EVEX_V256, EVEX_CD8<32, CD8VF>;
5679    defm PDZ128 : avx512_fp_scalef_p<opc, OpcodeStr, X86scalef, sched.XMM, v2f64x_info>,
5680                                   EVEX_V128, VEX_W, EVEX_CD8<64, CD8VF>;
5681    defm PDZ256 : avx512_fp_scalef_p<opc, OpcodeStr, X86scalef, sched.YMM, v4f64x_info>,
5682                                   EVEX_V256, VEX_W, EVEX_CD8<64, CD8VF>;
5683  }
5684}
5685defm VSCALEF : avx512_fp_scalef_all<0x2C, 0x2D, "vscalef",
5686                                    SchedWriteFAdd>, T8PD, NotEVEX2VEXConvertible;
5687
5688//===----------------------------------------------------------------------===//
5689// AVX-512  VPTESTM instructions
5690//===----------------------------------------------------------------------===//
5691
5692multiclass avx512_vptest<bits<8> opc, string OpcodeStr,
5693                         X86FoldableSchedWrite sched, X86VectorVTInfo _,
5694                         string Name> {
5695  // NOTE: Patterns are omitted in favor of manual selection in X86ISelDAGToDAG.
5696  // There are just too many permutations due to commutability and bitcasts.
5697  let ExeDomain = _.ExeDomain, hasSideEffects = 0 in {
5698  defm rr : AVX512_maskable_cmp<opc, MRMSrcReg, _, (outs _.KRC:$dst),
5699                   (ins _.RC:$src1, _.RC:$src2), OpcodeStr,
5700                      "$src2, $src1", "$src1, $src2",
5701                   (null_frag), (null_frag), 1>,
5702                   EVEX_4V, Sched<[sched]>;
5703  let mayLoad = 1 in
5704  defm rm : AVX512_maskable_cmp<opc, MRMSrcMem, _, (outs _.KRC:$dst),
5705                   (ins _.RC:$src1, _.MemOp:$src2), OpcodeStr,
5706                       "$src2, $src1", "$src1, $src2",
5707                   (null_frag), (null_frag)>,
5708                   EVEX_4V, EVEX_CD8<_.EltSize, CD8VF>,
5709                   Sched<[sched.Folded, sched.ReadAfterFold]>;
5710  }
5711}
5712
5713multiclass avx512_vptest_mb<bits<8> opc, string OpcodeStr,
5714                            X86FoldableSchedWrite sched, X86VectorVTInfo _> {
5715  let ExeDomain = _.ExeDomain, mayLoad = 1, hasSideEffects = 0 in
5716  defm rmb : AVX512_maskable_cmp<opc, MRMSrcMem, _, (outs _.KRC:$dst),
5717                    (ins _.RC:$src1, _.ScalarMemOp:$src2), OpcodeStr,
5718                    "${src2}"#_.BroadcastStr#", $src1",
5719                    "$src1, ${src2}"#_.BroadcastStr,
5720                    (null_frag), (null_frag)>,
5721                    EVEX_B, EVEX_4V, EVEX_CD8<_.EltSize, CD8VF>,
5722                    Sched<[sched.Folded, sched.ReadAfterFold]>;
5723}
5724
5725multiclass avx512_vptest_dq_sizes<bits<8> opc, string OpcodeStr,
5726                                  X86SchedWriteWidths sched,
5727                                  AVX512VLVectorVTInfo _> {
5728  let Predicates  = [HasAVX512] in
5729  defm Z : avx512_vptest<opc, OpcodeStr, sched.ZMM, _.info512, NAME>,
5730           avx512_vptest_mb<opc, OpcodeStr, sched.ZMM, _.info512>, EVEX_V512;
5731
5732  let Predicates = [HasAVX512, HasVLX] in {
5733  defm Z256 : avx512_vptest<opc, OpcodeStr, sched.YMM, _.info256, NAME>,
5734              avx512_vptest_mb<opc, OpcodeStr, sched.YMM, _.info256>, EVEX_V256;
5735  defm Z128 : avx512_vptest<opc, OpcodeStr, sched.XMM, _.info128, NAME>,
5736              avx512_vptest_mb<opc, OpcodeStr, sched.XMM, _.info128>, EVEX_V128;
5737  }
5738}
5739
5740multiclass avx512_vptest_dq<bits<8> opc, string OpcodeStr,
5741                            X86SchedWriteWidths sched> {
5742  defm D : avx512_vptest_dq_sizes<opc, OpcodeStr#"d", sched,
5743                                 avx512vl_i32_info>;
5744  defm Q : avx512_vptest_dq_sizes<opc, OpcodeStr#"q", sched,
5745                                 avx512vl_i64_info>, VEX_W;
5746}
5747
5748multiclass avx512_vptest_wb<bits<8> opc, string OpcodeStr,
5749                            X86SchedWriteWidths sched> {
5750  let Predicates = [HasBWI] in {
5751  defm WZ:    avx512_vptest<opc, OpcodeStr#"w", sched.ZMM,
5752                            v32i16_info, NAME#"W">, EVEX_V512, VEX_W;
5753  defm BZ:    avx512_vptest<opc, OpcodeStr#"b", sched.ZMM,
5754                            v64i8_info, NAME#"B">, EVEX_V512;
5755  }
5756  let Predicates = [HasVLX, HasBWI] in {
5757
5758  defm WZ256: avx512_vptest<opc, OpcodeStr#"w", sched.YMM,
5759                            v16i16x_info, NAME#"W">, EVEX_V256, VEX_W;
5760  defm WZ128: avx512_vptest<opc, OpcodeStr#"w", sched.XMM,
5761                            v8i16x_info, NAME#"W">, EVEX_V128, VEX_W;
5762  defm BZ256: avx512_vptest<opc, OpcodeStr#"b", sched.YMM,
5763                            v32i8x_info, NAME#"B">, EVEX_V256;
5764  defm BZ128: avx512_vptest<opc, OpcodeStr#"b", sched.XMM,
5765                            v16i8x_info, NAME#"B">, EVEX_V128;
5766  }
5767}
5768
5769multiclass avx512_vptest_all_forms<bits<8> opc_wb, bits<8> opc_dq, string OpcodeStr,
5770                                   X86SchedWriteWidths sched> :
5771  avx512_vptest_wb<opc_wb, OpcodeStr, sched>,
5772  avx512_vptest_dq<opc_dq, OpcodeStr, sched>;
5773
5774defm VPTESTM   : avx512_vptest_all_forms<0x26, 0x27, "vptestm",
5775                                         SchedWriteVecLogic>, T8PD;
5776defm VPTESTNM  : avx512_vptest_all_forms<0x26, 0x27, "vptestnm",
5777                                         SchedWriteVecLogic>, T8XS;
5778
5779//===----------------------------------------------------------------------===//
5780// AVX-512  Shift instructions
5781//===----------------------------------------------------------------------===//
5782
5783multiclass avx512_shift_rmi<bits<8> opc, Format ImmFormR, Format ImmFormM,
5784                            string OpcodeStr, SDNode OpNode,
5785                            X86FoldableSchedWrite sched, X86VectorVTInfo _> {
5786  let ExeDomain = _.ExeDomain in {
5787  defm ri : AVX512_maskable<opc, ImmFormR, _, (outs _.RC:$dst),
5788                   (ins _.RC:$src1, u8imm:$src2), OpcodeStr,
5789                      "$src2, $src1", "$src1, $src2",
5790                   (_.VT (OpNode _.RC:$src1, (i8 timm:$src2)))>,
5791                   Sched<[sched]>;
5792  defm mi : AVX512_maskable<opc, ImmFormM, _, (outs _.RC:$dst),
5793                   (ins _.MemOp:$src1, u8imm:$src2), OpcodeStr,
5794                       "$src2, $src1", "$src1, $src2",
5795                   (_.VT (OpNode (_.VT (_.LdFrag addr:$src1)),
5796                          (i8 timm:$src2)))>,
5797                   Sched<[sched.Folded]>;
5798  }
5799}
5800
5801multiclass avx512_shift_rmbi<bits<8> opc, Format ImmFormM,
5802                             string OpcodeStr, SDNode OpNode,
5803                             X86FoldableSchedWrite sched, X86VectorVTInfo _> {
5804  let ExeDomain = _.ExeDomain in
5805  defm mbi : AVX512_maskable<opc, ImmFormM, _, (outs _.RC:$dst),
5806                   (ins _.ScalarMemOp:$src1, u8imm:$src2), OpcodeStr,
5807      "$src2, ${src1}"#_.BroadcastStr, "${src1}"#_.BroadcastStr#", $src2",
5808     (_.VT (OpNode (_.BroadcastLdFrag addr:$src1), (i8 timm:$src2)))>,
5809     EVEX_B, Sched<[sched.Folded]>;
5810}
5811
5812multiclass avx512_shift_rrm<bits<8> opc, string OpcodeStr, SDNode OpNode,
5813                            X86FoldableSchedWrite sched, ValueType SrcVT,
5814                            X86VectorVTInfo _> {
5815   // src2 is always 128-bit
5816  let ExeDomain = _.ExeDomain in {
5817  defm rr : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
5818                   (ins _.RC:$src1, VR128X:$src2), OpcodeStr,
5819                      "$src2, $src1", "$src1, $src2",
5820                   (_.VT (OpNode _.RC:$src1, (SrcVT VR128X:$src2)))>,
5821                   AVX512BIBase, EVEX_4V, Sched<[sched]>;
5822  defm rm : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
5823                   (ins _.RC:$src1, i128mem:$src2), OpcodeStr,
5824                       "$src2, $src1", "$src1, $src2",
5825                   (_.VT (OpNode _.RC:$src1, (SrcVT (load addr:$src2))))>,
5826                   AVX512BIBase,
5827                   EVEX_4V, Sched<[sched.Folded, sched.ReadAfterFold]>;
5828  }
5829}
5830
5831multiclass avx512_shift_sizes<bits<8> opc, string OpcodeStr, SDNode OpNode,
5832                              X86SchedWriteWidths sched, ValueType SrcVT,
5833                              AVX512VLVectorVTInfo VTInfo,
5834                              Predicate prd> {
5835  let Predicates = [prd] in
5836  defm Z    : avx512_shift_rrm<opc, OpcodeStr, OpNode, sched.ZMM, SrcVT,
5837                               VTInfo.info512>, EVEX_V512,
5838                               EVEX_CD8<VTInfo.info512.EltSize, CD8VQ> ;
5839  let Predicates = [prd, HasVLX] in {
5840  defm Z256 : avx512_shift_rrm<opc, OpcodeStr, OpNode, sched.YMM, SrcVT,
5841                               VTInfo.info256>, EVEX_V256,
5842                               EVEX_CD8<VTInfo.info256.EltSize, CD8VH>;
5843  defm Z128 : avx512_shift_rrm<opc, OpcodeStr, OpNode, sched.XMM, SrcVT,
5844                               VTInfo.info128>, EVEX_V128,
5845                               EVEX_CD8<VTInfo.info128.EltSize, CD8VF>;
5846  }
5847}
5848
5849multiclass avx512_shift_types<bits<8> opcd, bits<8> opcq, bits<8> opcw,
5850                              string OpcodeStr, SDNode OpNode,
5851                              X86SchedWriteWidths sched,
5852                              bit NotEVEX2VEXConvertibleQ = 0> {
5853  defm D : avx512_shift_sizes<opcd, OpcodeStr#"d", OpNode, sched, v4i32,
5854                              avx512vl_i32_info, HasAVX512>;
5855  let notEVEX2VEXConvertible = NotEVEX2VEXConvertibleQ in
5856  defm Q : avx512_shift_sizes<opcq, OpcodeStr#"q", OpNode, sched, v2i64,
5857                              avx512vl_i64_info, HasAVX512>, VEX_W;
5858  defm W : avx512_shift_sizes<opcw, OpcodeStr#"w", OpNode, sched, v8i16,
5859                              avx512vl_i16_info, HasBWI>;
5860}
5861
5862multiclass avx512_shift_rmi_sizes<bits<8> opc, Format ImmFormR, Format ImmFormM,
5863                                  string OpcodeStr, SDNode OpNode,
5864                                  X86SchedWriteWidths sched,
5865                                  AVX512VLVectorVTInfo VTInfo> {
5866  let Predicates = [HasAVX512] in
5867  defm Z:    avx512_shift_rmi<opc, ImmFormR, ImmFormM, OpcodeStr, OpNode,
5868                              sched.ZMM, VTInfo.info512>,
5869             avx512_shift_rmbi<opc, ImmFormM, OpcodeStr, OpNode, sched.ZMM,
5870                               VTInfo.info512>, EVEX_V512;
5871  let Predicates = [HasAVX512, HasVLX] in {
5872  defm Z256: avx512_shift_rmi<opc, ImmFormR, ImmFormM, OpcodeStr, OpNode,
5873                              sched.YMM, VTInfo.info256>,
5874             avx512_shift_rmbi<opc, ImmFormM, OpcodeStr, OpNode, sched.YMM,
5875                               VTInfo.info256>, EVEX_V256;
5876  defm Z128: avx512_shift_rmi<opc, ImmFormR, ImmFormM, OpcodeStr, OpNode,
5877                              sched.XMM, VTInfo.info128>,
5878             avx512_shift_rmbi<opc, ImmFormM, OpcodeStr, OpNode, sched.XMM,
5879                               VTInfo.info128>, EVEX_V128;
5880  }
5881}
5882
5883multiclass avx512_shift_rmi_w<bits<8> opcw, Format ImmFormR, Format ImmFormM,
5884                              string OpcodeStr, SDNode OpNode,
5885                              X86SchedWriteWidths sched> {
5886  let Predicates = [HasBWI] in
5887  defm WZ:    avx512_shift_rmi<opcw, ImmFormR, ImmFormM, OpcodeStr, OpNode,
5888                               sched.ZMM, v32i16_info>, EVEX_V512, VEX_WIG;
5889  let Predicates = [HasVLX, HasBWI] in {
5890  defm WZ256: avx512_shift_rmi<opcw, ImmFormR, ImmFormM, OpcodeStr, OpNode,
5891                               sched.YMM, v16i16x_info>, EVEX_V256, VEX_WIG;
5892  defm WZ128: avx512_shift_rmi<opcw, ImmFormR, ImmFormM, OpcodeStr, OpNode,
5893                               sched.XMM, v8i16x_info>, EVEX_V128, VEX_WIG;
5894  }
5895}
5896
5897multiclass avx512_shift_rmi_dq<bits<8> opcd, bits<8> opcq,
5898                               Format ImmFormR, Format ImmFormM,
5899                               string OpcodeStr, SDNode OpNode,
5900                               X86SchedWriteWidths sched,
5901                               bit NotEVEX2VEXConvertibleQ = 0> {
5902  defm D: avx512_shift_rmi_sizes<opcd, ImmFormR, ImmFormM, OpcodeStr#"d", OpNode,
5903                                 sched, avx512vl_i32_info>, EVEX_CD8<32, CD8VF>;
5904  let notEVEX2VEXConvertible = NotEVEX2VEXConvertibleQ in
5905  defm Q: avx512_shift_rmi_sizes<opcq, ImmFormR, ImmFormM, OpcodeStr#"q", OpNode,
5906                                 sched, avx512vl_i64_info>, EVEX_CD8<64, CD8VF>, VEX_W;
5907}
5908
5909defm VPSRL : avx512_shift_rmi_dq<0x72, 0x73, MRM2r, MRM2m, "vpsrl", X86vsrli,
5910                                 SchedWriteVecShiftImm>,
5911             avx512_shift_rmi_w<0x71, MRM2r, MRM2m, "vpsrlw", X86vsrli,
5912                                SchedWriteVecShiftImm>, AVX512BIi8Base, EVEX_4V;
5913
5914defm VPSLL : avx512_shift_rmi_dq<0x72, 0x73, MRM6r, MRM6m, "vpsll", X86vshli,
5915                                 SchedWriteVecShiftImm>,
5916             avx512_shift_rmi_w<0x71, MRM6r, MRM6m, "vpsllw", X86vshli,
5917                                SchedWriteVecShiftImm>, AVX512BIi8Base, EVEX_4V;
5918
5919defm VPSRA : avx512_shift_rmi_dq<0x72, 0x72, MRM4r, MRM4m, "vpsra", X86vsrai,
5920                                 SchedWriteVecShiftImm, 1>,
5921             avx512_shift_rmi_w<0x71, MRM4r, MRM4m, "vpsraw", X86vsrai,
5922                                SchedWriteVecShiftImm>, AVX512BIi8Base, EVEX_4V;
5923
5924defm VPROR : avx512_shift_rmi_dq<0x72, 0x72, MRM0r, MRM0m, "vpror", X86vrotri,
5925                                 SchedWriteVecShiftImm>, AVX512BIi8Base, EVEX_4V;
5926defm VPROL : avx512_shift_rmi_dq<0x72, 0x72, MRM1r, MRM1m, "vprol", X86vrotli,
5927                                 SchedWriteVecShiftImm>, AVX512BIi8Base, EVEX_4V;
5928
5929defm VPSLL : avx512_shift_types<0xF2, 0xF3, 0xF1, "vpsll", X86vshl,
5930                                SchedWriteVecShift>;
5931defm VPSRA : avx512_shift_types<0xE2, 0xE2, 0xE1, "vpsra", X86vsra,
5932                                SchedWriteVecShift, 1>;
5933defm VPSRL : avx512_shift_types<0xD2, 0xD3, 0xD1, "vpsrl", X86vsrl,
5934                                SchedWriteVecShift>;
5935
5936// Use 512bit VPSRA/VPSRAI version to implement v2i64/v4i64 in case NoVLX.
5937let Predicates = [HasAVX512, NoVLX] in {
5938  def : Pat<(v4i64 (X86vsra (v4i64 VR256X:$src1), (v2i64 VR128X:$src2))),
5939            (EXTRACT_SUBREG (v8i64
5940              (VPSRAQZrr
5941                (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src1, sub_ymm)),
5942                 VR128X:$src2)), sub_ymm)>;
5943
5944  def : Pat<(v2i64 (X86vsra (v2i64 VR128X:$src1), (v2i64 VR128X:$src2))),
5945            (EXTRACT_SUBREG (v8i64
5946              (VPSRAQZrr
5947                (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR128X:$src1, sub_xmm)),
5948                 VR128X:$src2)), sub_xmm)>;
5949
5950  def : Pat<(v4i64 (X86vsrai (v4i64 VR256X:$src1), (i8 timm:$src2))),
5951            (EXTRACT_SUBREG (v8i64
5952              (VPSRAQZri
5953                (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src1, sub_ymm)),
5954                 timm:$src2)), sub_ymm)>;
5955
5956  def : Pat<(v2i64 (X86vsrai (v2i64 VR128X:$src1), (i8 timm:$src2))),
5957            (EXTRACT_SUBREG (v8i64
5958              (VPSRAQZri
5959                (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR128X:$src1, sub_xmm)),
5960                 timm:$src2)), sub_xmm)>;
5961}
5962
5963//===-------------------------------------------------------------------===//
5964// Variable Bit Shifts
5965//===-------------------------------------------------------------------===//
5966
5967multiclass avx512_var_shift<bits<8> opc, string OpcodeStr, SDNode OpNode,
5968                            X86FoldableSchedWrite sched, X86VectorVTInfo _> {
5969  let ExeDomain = _.ExeDomain in {
5970  defm rr : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
5971                   (ins _.RC:$src1, _.RC:$src2), OpcodeStr,
5972                      "$src2, $src1", "$src1, $src2",
5973                   (_.VT (OpNode _.RC:$src1, (_.VT _.RC:$src2)))>,
5974                   AVX5128IBase, EVEX_4V, Sched<[sched]>;
5975  defm rm : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
5976                   (ins _.RC:$src1, _.MemOp:$src2), OpcodeStr,
5977                       "$src2, $src1", "$src1, $src2",
5978                   (_.VT (OpNode _.RC:$src1,
5979                   (_.VT (_.LdFrag addr:$src2))))>,
5980                   AVX5128IBase, EVEX_4V, EVEX_CD8<_.EltSize, CD8VF>,
5981                   Sched<[sched.Folded, sched.ReadAfterFold]>;
5982  }
5983}
5984
5985multiclass avx512_var_shift_mb<bits<8> opc, string OpcodeStr, SDNode OpNode,
5986                               X86FoldableSchedWrite sched, X86VectorVTInfo _> {
5987  let ExeDomain = _.ExeDomain in
5988  defm rmb : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
5989                    (ins _.RC:$src1, _.ScalarMemOp:$src2), OpcodeStr,
5990                    "${src2}"#_.BroadcastStr#", $src1",
5991                    "$src1, ${src2}"#_.BroadcastStr,
5992                    (_.VT (OpNode _.RC:$src1, (_.VT (_.BroadcastLdFrag addr:$src2))))>,
5993                    AVX5128IBase, EVEX_B, EVEX_4V, EVEX_CD8<_.EltSize, CD8VF>,
5994                    Sched<[sched.Folded, sched.ReadAfterFold]>;
5995}
5996
5997multiclass avx512_var_shift_sizes<bits<8> opc, string OpcodeStr, SDNode OpNode,
5998                                  X86SchedWriteWidths sched, AVX512VLVectorVTInfo _> {
5999  let Predicates  = [HasAVX512] in
6000  defm Z : avx512_var_shift<opc, OpcodeStr, OpNode, sched.ZMM, _.info512>,
6001           avx512_var_shift_mb<opc, OpcodeStr, OpNode, sched.ZMM, _.info512>, EVEX_V512;
6002
6003  let Predicates = [HasAVX512, HasVLX] in {
6004  defm Z256 : avx512_var_shift<opc, OpcodeStr, OpNode, sched.YMM, _.info256>,
6005              avx512_var_shift_mb<opc, OpcodeStr, OpNode, sched.YMM, _.info256>, EVEX_V256;
6006  defm Z128 : avx512_var_shift<opc, OpcodeStr, OpNode, sched.XMM, _.info128>,
6007              avx512_var_shift_mb<opc, OpcodeStr, OpNode, sched.XMM, _.info128>, EVEX_V128;
6008  }
6009}
6010
6011multiclass avx512_var_shift_types<bits<8> opc, string OpcodeStr,
6012                                  SDNode OpNode, X86SchedWriteWidths sched> {
6013  defm D : avx512_var_shift_sizes<opc, OpcodeStr#"d", OpNode, sched,
6014                                 avx512vl_i32_info>;
6015  defm Q : avx512_var_shift_sizes<opc, OpcodeStr#"q", OpNode, sched,
6016                                 avx512vl_i64_info>, VEX_W;
6017}
6018
6019// Use 512bit version to implement 128/256 bit in case NoVLX.
6020multiclass avx512_var_shift_lowering<AVX512VLVectorVTInfo _, string OpcodeStr,
6021                                     SDNode OpNode, list<Predicate> p> {
6022  let Predicates = p in {
6023  def : Pat<(_.info256.VT (OpNode (_.info256.VT _.info256.RC:$src1),
6024                                  (_.info256.VT _.info256.RC:$src2))),
6025            (EXTRACT_SUBREG
6026                (!cast<Instruction>(OpcodeStr#"Zrr")
6027                    (INSERT_SUBREG (_.info512.VT (IMPLICIT_DEF)), VR256X:$src1, sub_ymm),
6028                    (INSERT_SUBREG (_.info512.VT (IMPLICIT_DEF)), VR256X:$src2, sub_ymm)),
6029             sub_ymm)>;
6030
6031  def : Pat<(_.info128.VT (OpNode (_.info128.VT _.info128.RC:$src1),
6032                                  (_.info128.VT _.info128.RC:$src2))),
6033            (EXTRACT_SUBREG
6034                (!cast<Instruction>(OpcodeStr#"Zrr")
6035                    (INSERT_SUBREG (_.info512.VT (IMPLICIT_DEF)), VR128X:$src1, sub_xmm),
6036                    (INSERT_SUBREG (_.info512.VT (IMPLICIT_DEF)), VR128X:$src2, sub_xmm)),
6037             sub_xmm)>;
6038  }
6039}
6040multiclass avx512_var_shift_w<bits<8> opc, string OpcodeStr,
6041                              SDNode OpNode, X86SchedWriteWidths sched> {
6042  let Predicates = [HasBWI] in
6043  defm WZ:    avx512_var_shift<opc, OpcodeStr, OpNode, sched.ZMM, v32i16_info>,
6044              EVEX_V512, VEX_W;
6045  let Predicates = [HasVLX, HasBWI] in {
6046
6047  defm WZ256: avx512_var_shift<opc, OpcodeStr, OpNode, sched.YMM, v16i16x_info>,
6048              EVEX_V256, VEX_W;
6049  defm WZ128: avx512_var_shift<opc, OpcodeStr, OpNode, sched.XMM, v8i16x_info>,
6050              EVEX_V128, VEX_W;
6051  }
6052}
6053
6054defm VPSLLV : avx512_var_shift_types<0x47, "vpsllv", X86vshlv, SchedWriteVarVecShift>,
6055              avx512_var_shift_w<0x12, "vpsllvw", X86vshlv, SchedWriteVarVecShift>;
6056
6057defm VPSRAV : avx512_var_shift_types<0x46, "vpsrav", X86vsrav, SchedWriteVarVecShift>,
6058              avx512_var_shift_w<0x11, "vpsravw", X86vsrav, SchedWriteVarVecShift>;
6059
6060defm VPSRLV : avx512_var_shift_types<0x45, "vpsrlv", X86vsrlv, SchedWriteVarVecShift>,
6061              avx512_var_shift_w<0x10, "vpsrlvw", X86vsrlv, SchedWriteVarVecShift>;
6062
6063defm VPRORV : avx512_var_shift_types<0x14, "vprorv", rotr, SchedWriteVarVecShift>;
6064defm VPROLV : avx512_var_shift_types<0x15, "vprolv", rotl, SchedWriteVarVecShift>;
6065
6066defm : avx512_var_shift_lowering<avx512vl_i64_info, "VPSRAVQ", X86vsrav, [HasAVX512, NoVLX]>;
6067defm : avx512_var_shift_lowering<avx512vl_i16_info, "VPSLLVW", X86vshlv, [HasBWI, NoVLX]>;
6068defm : avx512_var_shift_lowering<avx512vl_i16_info, "VPSRAVW", X86vsrav, [HasBWI, NoVLX]>;
6069defm : avx512_var_shift_lowering<avx512vl_i16_info, "VPSRLVW", X86vsrlv, [HasBWI, NoVLX]>;
6070
6071
6072// Use 512bit VPROL/VPROLI version to implement v2i64/v4i64 + v4i32/v8i32 in case NoVLX.
6073let Predicates = [HasAVX512, NoVLX] in {
6074  def : Pat<(v2i64 (rotl (v2i64 VR128X:$src1), (v2i64 VR128X:$src2))),
6075            (EXTRACT_SUBREG (v8i64
6076              (VPROLVQZrr
6077                (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR128X:$src1, sub_xmm)),
6078                (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR128X:$src2, sub_xmm)))),
6079                       sub_xmm)>;
6080  def : Pat<(v4i64 (rotl (v4i64 VR256X:$src1), (v4i64 VR256X:$src2))),
6081            (EXTRACT_SUBREG (v8i64
6082              (VPROLVQZrr
6083                (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src1, sub_ymm)),
6084                (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src2, sub_ymm)))),
6085                       sub_ymm)>;
6086
6087  def : Pat<(v4i32 (rotl (v4i32 VR128X:$src1), (v4i32 VR128X:$src2))),
6088            (EXTRACT_SUBREG (v16i32
6089              (VPROLVDZrr
6090                (v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR128X:$src1, sub_xmm)),
6091                (v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR128X:$src2, sub_xmm)))),
6092                        sub_xmm)>;
6093  def : Pat<(v8i32 (rotl (v8i32 VR256X:$src1), (v8i32 VR256X:$src2))),
6094            (EXTRACT_SUBREG (v16i32
6095              (VPROLVDZrr
6096                (v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src1, sub_ymm)),
6097                (v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src2, sub_ymm)))),
6098                        sub_ymm)>;
6099
6100  def : Pat<(v2i64 (X86vrotli (v2i64 VR128X:$src1), (i8 timm:$src2))),
6101            (EXTRACT_SUBREG (v8i64
6102              (VPROLQZri
6103                (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR128X:$src1, sub_xmm)),
6104                        timm:$src2)), sub_xmm)>;
6105  def : Pat<(v4i64 (X86vrotli (v4i64 VR256X:$src1), (i8 timm:$src2))),
6106            (EXTRACT_SUBREG (v8i64
6107              (VPROLQZri
6108                (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src1, sub_ymm)),
6109                       timm:$src2)), sub_ymm)>;
6110
6111  def : Pat<(v4i32 (X86vrotli (v4i32 VR128X:$src1), (i8 timm:$src2))),
6112            (EXTRACT_SUBREG (v16i32
6113              (VPROLDZri
6114                (v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR128X:$src1, sub_xmm)),
6115                        timm:$src2)), sub_xmm)>;
6116  def : Pat<(v8i32 (X86vrotli (v8i32 VR256X:$src1), (i8 timm:$src2))),
6117            (EXTRACT_SUBREG (v16i32
6118              (VPROLDZri
6119                (v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src1, sub_ymm)),
6120                        timm:$src2)), sub_ymm)>;
6121}
6122
6123// Use 512bit VPROR/VPRORI version to implement v2i64/v4i64 + v4i32/v8i32 in case NoVLX.
6124let Predicates = [HasAVX512, NoVLX] in {
6125  def : Pat<(v2i64 (rotr (v2i64 VR128X:$src1), (v2i64 VR128X:$src2))),
6126            (EXTRACT_SUBREG (v8i64
6127              (VPRORVQZrr
6128                (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR128X:$src1, sub_xmm)),
6129                (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR128X:$src2, sub_xmm)))),
6130                       sub_xmm)>;
6131  def : Pat<(v4i64 (rotr (v4i64 VR256X:$src1), (v4i64 VR256X:$src2))),
6132            (EXTRACT_SUBREG (v8i64
6133              (VPRORVQZrr
6134                (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src1, sub_ymm)),
6135                (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src2, sub_ymm)))),
6136                       sub_ymm)>;
6137
6138  def : Pat<(v4i32 (rotr (v4i32 VR128X:$src1), (v4i32 VR128X:$src2))),
6139            (EXTRACT_SUBREG (v16i32
6140              (VPRORVDZrr
6141                (v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR128X:$src1, sub_xmm)),
6142                (v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR128X:$src2, sub_xmm)))),
6143                        sub_xmm)>;
6144  def : Pat<(v8i32 (rotr (v8i32 VR256X:$src1), (v8i32 VR256X:$src2))),
6145            (EXTRACT_SUBREG (v16i32
6146              (VPRORVDZrr
6147                (v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src1, sub_ymm)),
6148                (v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src2, sub_ymm)))),
6149                        sub_ymm)>;
6150
6151  def : Pat<(v2i64 (X86vrotri (v2i64 VR128X:$src1), (i8 timm:$src2))),
6152            (EXTRACT_SUBREG (v8i64
6153              (VPRORQZri
6154                (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR128X:$src1, sub_xmm)),
6155                        timm:$src2)), sub_xmm)>;
6156  def : Pat<(v4i64 (X86vrotri (v4i64 VR256X:$src1), (i8 timm:$src2))),
6157            (EXTRACT_SUBREG (v8i64
6158              (VPRORQZri
6159                (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src1, sub_ymm)),
6160                       timm:$src2)), sub_ymm)>;
6161
6162  def : Pat<(v4i32 (X86vrotri (v4i32 VR128X:$src1), (i8 timm:$src2))),
6163            (EXTRACT_SUBREG (v16i32
6164              (VPRORDZri
6165                (v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR128X:$src1, sub_xmm)),
6166                        timm:$src2)), sub_xmm)>;
6167  def : Pat<(v8i32 (X86vrotri (v8i32 VR256X:$src1), (i8 timm:$src2))),
6168            (EXTRACT_SUBREG (v16i32
6169              (VPRORDZri
6170                (v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src1, sub_ymm)),
6171                        timm:$src2)), sub_ymm)>;
6172}
6173
6174//===-------------------------------------------------------------------===//
6175// 1-src variable permutation VPERMW/D/Q
6176//===-------------------------------------------------------------------===//
6177
6178multiclass avx512_vperm_dq_sizes<bits<8> opc, string OpcodeStr, SDNode OpNode,
6179                                 X86FoldableSchedWrite sched, AVX512VLVectorVTInfo _> {
6180  let Predicates  = [HasAVX512] in
6181  defm Z : avx512_var_shift<opc, OpcodeStr, OpNode, sched, _.info512>,
6182           avx512_var_shift_mb<opc, OpcodeStr, OpNode, sched, _.info512>, EVEX_V512;
6183
6184  let Predicates = [HasAVX512, HasVLX] in
6185  defm Z256 : avx512_var_shift<opc, OpcodeStr, OpNode, sched, _.info256>,
6186              avx512_var_shift_mb<opc, OpcodeStr, OpNode, sched, _.info256>, EVEX_V256;
6187}
6188
6189multiclass avx512_vpermi_dq_sizes<bits<8> opc, Format ImmFormR, Format ImmFormM,
6190                                 string OpcodeStr, SDNode OpNode,
6191                                 X86FoldableSchedWrite sched, AVX512VLVectorVTInfo VTInfo> {
6192  let Predicates = [HasAVX512] in
6193  defm Z:    avx512_shift_rmi<opc, ImmFormR, ImmFormM, OpcodeStr, OpNode,
6194                              sched, VTInfo.info512>,
6195             avx512_shift_rmbi<opc, ImmFormM, OpcodeStr, OpNode,
6196                               sched, VTInfo.info512>, EVEX_V512;
6197  let Predicates = [HasAVX512, HasVLX] in
6198  defm Z256: avx512_shift_rmi<opc, ImmFormR, ImmFormM, OpcodeStr, OpNode,
6199                              sched, VTInfo.info256>,
6200             avx512_shift_rmbi<opc, ImmFormM, OpcodeStr, OpNode,
6201                               sched, VTInfo.info256>, EVEX_V256;
6202}
6203
6204multiclass avx512_vperm_bw<bits<8> opc, string OpcodeStr,
6205                              Predicate prd, SDNode OpNode,
6206                              X86FoldableSchedWrite sched, AVX512VLVectorVTInfo _> {
6207  let Predicates = [prd] in
6208  defm Z:    avx512_var_shift<opc, OpcodeStr, OpNode, sched, _.info512>,
6209              EVEX_V512 ;
6210  let Predicates = [HasVLX, prd] in {
6211  defm Z256: avx512_var_shift<opc, OpcodeStr, OpNode, sched, _.info256>,
6212              EVEX_V256 ;
6213  defm Z128: avx512_var_shift<opc, OpcodeStr, OpNode, sched, _.info128>,
6214              EVEX_V128 ;
6215  }
6216}
6217
6218defm VPERMW  : avx512_vperm_bw<0x8D, "vpermw", HasBWI, X86VPermv,
6219                               WriteVarShuffle256, avx512vl_i16_info>, VEX_W;
6220defm VPERMB  : avx512_vperm_bw<0x8D, "vpermb", HasVBMI, X86VPermv,
6221                               WriteVarShuffle256, avx512vl_i8_info>;
6222
6223defm VPERMD : avx512_vperm_dq_sizes<0x36, "vpermd", X86VPermv,
6224                                    WriteVarShuffle256, avx512vl_i32_info>;
6225defm VPERMQ : avx512_vperm_dq_sizes<0x36, "vpermq", X86VPermv,
6226                                    WriteVarShuffle256, avx512vl_i64_info>, VEX_W;
6227defm VPERMPS : avx512_vperm_dq_sizes<0x16, "vpermps", X86VPermv,
6228                                     WriteFVarShuffle256, avx512vl_f32_info>;
6229defm VPERMPD : avx512_vperm_dq_sizes<0x16, "vpermpd", X86VPermv,
6230                                     WriteFVarShuffle256, avx512vl_f64_info>, VEX_W;
6231
6232defm VPERMQ : avx512_vpermi_dq_sizes<0x00, MRMSrcReg, MRMSrcMem, "vpermq",
6233                             X86VPermi, WriteShuffle256, avx512vl_i64_info>,
6234                             EVEX, AVX512AIi8Base, EVEX_CD8<64, CD8VF>, VEX_W;
6235defm VPERMPD : avx512_vpermi_dq_sizes<0x01, MRMSrcReg, MRMSrcMem, "vpermpd",
6236                             X86VPermi, WriteFShuffle256, avx512vl_f64_info>,
6237                             EVEX, AVX512AIi8Base, EVEX_CD8<64, CD8VF>, VEX_W;
6238
6239//===----------------------------------------------------------------------===//
6240// AVX-512 - VPERMIL
6241//===----------------------------------------------------------------------===//
6242
6243multiclass avx512_permil_vec<bits<8> OpcVar, string OpcodeStr, SDNode OpNode,
6244                             X86FoldableSchedWrite sched, X86VectorVTInfo _,
6245                             X86VectorVTInfo Ctrl> {
6246  defm rr: AVX512_maskable<OpcVar, MRMSrcReg, _, (outs _.RC:$dst),
6247                  (ins _.RC:$src1, Ctrl.RC:$src2), OpcodeStr,
6248                  "$src2, $src1", "$src1, $src2",
6249                  (_.VT (OpNode _.RC:$src1,
6250                               (Ctrl.VT Ctrl.RC:$src2)))>,
6251                  T8PD, EVEX_4V, Sched<[sched]>;
6252  defm rm: AVX512_maskable<OpcVar, MRMSrcMem, _, (outs _.RC:$dst),
6253                  (ins _.RC:$src1, Ctrl.MemOp:$src2), OpcodeStr,
6254                  "$src2, $src1", "$src1, $src2",
6255                  (_.VT (OpNode
6256                           _.RC:$src1,
6257                           (Ctrl.VT (Ctrl.LdFrag addr:$src2))))>,
6258                  T8PD, EVEX_4V, EVEX_CD8<_.EltSize, CD8VF>,
6259                  Sched<[sched.Folded, sched.ReadAfterFold]>;
6260  defm rmb: AVX512_maskable<OpcVar, MRMSrcMem, _, (outs _.RC:$dst),
6261                   (ins _.RC:$src1, _.ScalarMemOp:$src2), OpcodeStr,
6262                   "${src2}"#_.BroadcastStr#", $src1",
6263                   "$src1, ${src2}"#_.BroadcastStr,
6264                   (_.VT (OpNode
6265                            _.RC:$src1,
6266                            (Ctrl.VT (Ctrl.BroadcastLdFrag addr:$src2))))>,
6267                   T8PD, EVEX_4V, EVEX_B, EVEX_CD8<_.EltSize, CD8VF>,
6268                   Sched<[sched.Folded, sched.ReadAfterFold]>;
6269}
6270
6271multiclass avx512_permil_vec_common<string OpcodeStr, bits<8> OpcVar,
6272                                    X86SchedWriteWidths sched,
6273                                    AVX512VLVectorVTInfo _,
6274                                    AVX512VLVectorVTInfo Ctrl> {
6275  let Predicates = [HasAVX512] in {
6276    defm Z    : avx512_permil_vec<OpcVar, OpcodeStr, X86VPermilpv, sched.ZMM,
6277                                  _.info512, Ctrl.info512>, EVEX_V512;
6278  }
6279  let Predicates = [HasAVX512, HasVLX] in {
6280    defm Z128 : avx512_permil_vec<OpcVar, OpcodeStr, X86VPermilpv, sched.XMM,
6281                                  _.info128, Ctrl.info128>, EVEX_V128;
6282    defm Z256 : avx512_permil_vec<OpcVar, OpcodeStr, X86VPermilpv, sched.YMM,
6283                                  _.info256, Ctrl.info256>, EVEX_V256;
6284  }
6285}
6286
6287multiclass avx512_permil<string OpcodeStr, bits<8> OpcImm, bits<8> OpcVar,
6288                         AVX512VLVectorVTInfo _, AVX512VLVectorVTInfo Ctrl>{
6289  defm NAME: avx512_permil_vec_common<OpcodeStr, OpcVar, SchedWriteFVarShuffle,
6290                                      _, Ctrl>;
6291  defm NAME: avx512_shift_rmi_sizes<OpcImm, MRMSrcReg, MRMSrcMem, OpcodeStr,
6292                                    X86VPermilpi, SchedWriteFShuffle, _>,
6293                    EVEX, AVX512AIi8Base, EVEX_CD8<_.info128.EltSize, CD8VF>;
6294}
6295
6296let ExeDomain = SSEPackedSingle in
6297defm VPERMILPS : avx512_permil<"vpermilps", 0x04, 0x0C, avx512vl_f32_info,
6298                               avx512vl_i32_info>;
6299let ExeDomain = SSEPackedDouble in
6300defm VPERMILPD : avx512_permil<"vpermilpd", 0x05, 0x0D, avx512vl_f64_info,
6301                               avx512vl_i64_info>, VEX_W1X;
6302
6303//===----------------------------------------------------------------------===//
6304// AVX-512 - VPSHUFD, VPSHUFLW, VPSHUFHW
6305//===----------------------------------------------------------------------===//
6306
6307defm VPSHUFD : avx512_shift_rmi_sizes<0x70, MRMSrcReg, MRMSrcMem, "vpshufd",
6308                             X86PShufd, SchedWriteShuffle, avx512vl_i32_info>,
6309                             EVEX, AVX512BIi8Base, EVEX_CD8<32, CD8VF>;
6310defm VPSHUFH : avx512_shift_rmi_w<0x70, MRMSrcReg, MRMSrcMem, "vpshufhw",
6311                                  X86PShufhw, SchedWriteShuffle>,
6312                                  EVEX, AVX512XSIi8Base;
6313defm VPSHUFL : avx512_shift_rmi_w<0x70, MRMSrcReg, MRMSrcMem, "vpshuflw",
6314                                  X86PShuflw, SchedWriteShuffle>,
6315                                  EVEX, AVX512XDIi8Base;
6316
6317//===----------------------------------------------------------------------===//
6318// AVX-512 - VPSHUFB
6319//===----------------------------------------------------------------------===//
6320
6321multiclass avx512_pshufb_sizes<bits<8> opc, string OpcodeStr, SDNode OpNode,
6322                               X86SchedWriteWidths sched> {
6323  let Predicates = [HasBWI] in
6324  defm Z:    avx512_var_shift<opc, OpcodeStr, OpNode, sched.ZMM, v64i8_info>,
6325                              EVEX_V512;
6326
6327  let Predicates = [HasVLX, HasBWI] in {
6328  defm Z256: avx512_var_shift<opc, OpcodeStr, OpNode, sched.YMM, v32i8x_info>,
6329                              EVEX_V256;
6330  defm Z128: avx512_var_shift<opc, OpcodeStr, OpNode, sched.XMM, v16i8x_info>,
6331                              EVEX_V128;
6332  }
6333}
6334
6335defm VPSHUFB: avx512_pshufb_sizes<0x00, "vpshufb", X86pshufb,
6336                                  SchedWriteVarShuffle>, VEX_WIG;
6337
6338//===----------------------------------------------------------------------===//
6339// Move Low to High and High to Low packed FP Instructions
6340//===----------------------------------------------------------------------===//
6341
6342def VMOVLHPSZrr : AVX512PSI<0x16, MRMSrcReg, (outs VR128X:$dst),
6343          (ins VR128X:$src1, VR128X:$src2),
6344          "vmovlhps\t{$src2, $src1, $dst|$dst, $src1, $src2}",
6345          [(set VR128X:$dst, (v4f32 (X86Movlhps VR128X:$src1, VR128X:$src2)))]>,
6346          Sched<[SchedWriteFShuffle.XMM]>, EVEX_4V;
6347let isCommutable = 1 in
6348def VMOVHLPSZrr : AVX512PSI<0x12, MRMSrcReg, (outs VR128X:$dst),
6349          (ins VR128X:$src1, VR128X:$src2),
6350          "vmovhlps\t{$src2, $src1, $dst|$dst, $src1, $src2}",
6351          [(set VR128X:$dst, (v4f32 (X86Movhlps VR128X:$src1, VR128X:$src2)))]>,
6352          Sched<[SchedWriteFShuffle.XMM]>, EVEX_4V, NotMemoryFoldable;
6353
6354//===----------------------------------------------------------------------===//
6355// VMOVHPS/PD VMOVLPS Instructions
6356// All patterns was taken from SSS implementation.
6357//===----------------------------------------------------------------------===//
6358
6359multiclass avx512_mov_hilo_packed<bits<8> opc, string OpcodeStr,
6360                                  SDPatternOperator OpNode,
6361                                  X86VectorVTInfo _> {
6362  let hasSideEffects = 0, mayLoad = 1, ExeDomain = _.ExeDomain in
6363  def rm : AVX512<opc, MRMSrcMem, (outs _.RC:$dst),
6364                  (ins _.RC:$src1, f64mem:$src2),
6365                  !strconcat(OpcodeStr,
6366                             "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
6367                  [(set _.RC:$dst,
6368                     (OpNode _.RC:$src1,
6369                       (_.VT (bitconvert
6370                         (v2f64 (scalar_to_vector (loadf64 addr:$src2)))))))]>,
6371                  Sched<[SchedWriteFShuffle.XMM.Folded, SchedWriteFShuffle.XMM.ReadAfterFold]>, EVEX_4V;
6372}
6373
6374// No patterns for MOVLPS/MOVHPS as the Movlhps node should only be created in
6375// SSE1. And MOVLPS pattern is even more complex.
6376defm VMOVHPSZ128 : avx512_mov_hilo_packed<0x16, "vmovhps", null_frag,
6377                                  v4f32x_info>, EVEX_CD8<32, CD8VT2>, PS;
6378defm VMOVHPDZ128 : avx512_mov_hilo_packed<0x16, "vmovhpd", X86Unpckl,
6379                                  v2f64x_info>, EVEX_CD8<64, CD8VT1>, PD, VEX_W;
6380defm VMOVLPSZ128 : avx512_mov_hilo_packed<0x12, "vmovlps", null_frag,
6381                                  v4f32x_info>, EVEX_CD8<32, CD8VT2>, PS;
6382defm VMOVLPDZ128 : avx512_mov_hilo_packed<0x12, "vmovlpd", X86Movsd,
6383                                  v2f64x_info>, EVEX_CD8<64, CD8VT1>, PD, VEX_W;
6384
6385let Predicates = [HasAVX512] in {
6386  // VMOVHPD patterns
6387  def : Pat<(v2f64 (X86Unpckl VR128X:$src1, (X86vzload64 addr:$src2))),
6388            (VMOVHPDZ128rm VR128X:$src1, addr:$src2)>;
6389
6390  // VMOVLPD patterns
6391  def : Pat<(v2f64 (X86Movsd VR128X:$src1, (X86vzload64 addr:$src2))),
6392            (VMOVLPDZ128rm VR128X:$src1, addr:$src2)>;
6393}
6394
6395let SchedRW = [WriteFStore] in {
6396let mayStore = 1, hasSideEffects = 0 in
6397def VMOVHPSZ128mr : AVX512PSI<0x17, MRMDestMem, (outs),
6398                       (ins f64mem:$dst, VR128X:$src),
6399                       "vmovhps\t{$src, $dst|$dst, $src}",
6400                       []>, EVEX, EVEX_CD8<32, CD8VT2>;
6401def VMOVHPDZ128mr : AVX512PDI<0x17, MRMDestMem, (outs),
6402                       (ins f64mem:$dst, VR128X:$src),
6403                       "vmovhpd\t{$src, $dst|$dst, $src}",
6404                       [(store (f64 (extractelt
6405                                     (v2f64 (X86Unpckh VR128X:$src, VR128X:$src)),
6406                                     (iPTR 0))), addr:$dst)]>,
6407                       EVEX, EVEX_CD8<64, CD8VT1>, VEX_W;
6408let mayStore = 1, hasSideEffects = 0 in
6409def VMOVLPSZ128mr : AVX512PSI<0x13, MRMDestMem, (outs),
6410                       (ins f64mem:$dst, VR128X:$src),
6411                       "vmovlps\t{$src, $dst|$dst, $src}",
6412                       []>, EVEX, EVEX_CD8<32, CD8VT2>;
6413def VMOVLPDZ128mr : AVX512PDI<0x13, MRMDestMem, (outs),
6414                       (ins f64mem:$dst, VR128X:$src),
6415                       "vmovlpd\t{$src, $dst|$dst, $src}",
6416                       [(store (f64 (extractelt (v2f64 VR128X:$src),
6417                                     (iPTR 0))), addr:$dst)]>,
6418                       EVEX, EVEX_CD8<64, CD8VT1>, VEX_W;
6419} // SchedRW
6420
6421let Predicates = [HasAVX512] in {
6422  // VMOVHPD patterns
6423  def : Pat<(store (f64 (extractelt
6424                           (v2f64 (X86VPermilpi VR128X:$src, (i8 1))),
6425                           (iPTR 0))), addr:$dst),
6426           (VMOVHPDZ128mr addr:$dst, VR128X:$src)>;
6427}
6428//===----------------------------------------------------------------------===//
6429// FMA - Fused Multiply Operations
6430//
6431
6432multiclass avx512_fma3p_213_rm<bits<8> opc, string OpcodeStr, SDNode OpNode,
6433                               SDNode MaskOpNode, X86FoldableSchedWrite sched,
6434                               X86VectorVTInfo _, string Suff> {
6435  let Constraints = "$src1 = $dst", ExeDomain = _.ExeDomain, hasSideEffects = 0,
6436      Uses = [MXCSR], mayRaiseFPException = 1 in {
6437  defm r: AVX512_maskable_fma<opc, MRMSrcReg, _, (outs _.RC:$dst),
6438          (ins _.RC:$src2, _.RC:$src3),
6439          OpcodeStr, "$src3, $src2", "$src2, $src3",
6440          (_.VT (OpNode _.RC:$src2, _.RC:$src1, _.RC:$src3)),
6441          (_.VT (MaskOpNode _.RC:$src2, _.RC:$src1, _.RC:$src3)), 1, 1>,
6442          AVX512FMA3Base, Sched<[sched]>;
6443
6444  defm m: AVX512_maskable_fma<opc, MRMSrcMem, _, (outs _.RC:$dst),
6445          (ins _.RC:$src2, _.MemOp:$src3),
6446          OpcodeStr, "$src3, $src2", "$src2, $src3",
6447          (_.VT (OpNode _.RC:$src2, _.RC:$src1, (_.LdFrag addr:$src3))),
6448          (_.VT (MaskOpNode _.RC:$src2, _.RC:$src1, (_.LdFrag addr:$src3))), 1, 0>,
6449          AVX512FMA3Base, Sched<[sched.Folded, sched.ReadAfterFold]>;
6450
6451  defm mb: AVX512_maskable_fma<opc, MRMSrcMem, _, (outs _.RC:$dst),
6452            (ins _.RC:$src2, _.ScalarMemOp:$src3),
6453            OpcodeStr,   !strconcat("${src3}", _.BroadcastStr,", $src2"),
6454            !strconcat("$src2, ${src3}", _.BroadcastStr ),
6455            (OpNode _.RC:$src2,
6456             _.RC:$src1,(_.VT (_.BroadcastLdFrag addr:$src3))),
6457            (MaskOpNode _.RC:$src2,
6458             _.RC:$src1,(_.VT (_.BroadcastLdFrag addr:$src3))), 1, 0>,
6459            AVX512FMA3Base, EVEX_B, Sched<[sched.Folded, sched.ReadAfterFold]>;
6460  }
6461}
6462
6463multiclass avx512_fma3_213_round<bits<8> opc, string OpcodeStr, SDNode OpNode,
6464                                 X86FoldableSchedWrite sched,
6465                                 X86VectorVTInfo _, string Suff> {
6466  let Constraints = "$src1 = $dst", ExeDomain = _.ExeDomain, hasSideEffects = 0,
6467      Uses = [MXCSR] in
6468  defm rb: AVX512_maskable_fma<opc, MRMSrcReg, _, (outs _.RC:$dst),
6469          (ins _.RC:$src2, _.RC:$src3, AVX512RC:$rc),
6470          OpcodeStr, "$rc, $src3, $src2", "$src2, $src3, $rc",
6471          (_.VT ( OpNode _.RC:$src2, _.RC:$src1, _.RC:$src3, (i32 timm:$rc))),
6472          (_.VT ( OpNode _.RC:$src2, _.RC:$src1, _.RC:$src3, (i32 timm:$rc))), 1, 1>,
6473          AVX512FMA3Base, EVEX_B, EVEX_RC, Sched<[sched]>;
6474}
6475
6476multiclass avx512_fma3p_213_common<bits<8> opc, string OpcodeStr, SDNode OpNode,
6477                                   SDNode MaskOpNode, SDNode OpNodeRnd,
6478                                   X86SchedWriteWidths sched,
6479                                   AVX512VLVectorVTInfo _, string Suff> {
6480  let Predicates = [HasAVX512] in {
6481    defm Z      : avx512_fma3p_213_rm<opc, OpcodeStr, OpNode, MaskOpNode,
6482                                      sched.ZMM, _.info512, Suff>,
6483                  avx512_fma3_213_round<opc, OpcodeStr, OpNodeRnd, sched.ZMM,
6484                                        _.info512, Suff>,
6485                              EVEX_V512, EVEX_CD8<_.info512.EltSize, CD8VF>;
6486  }
6487  let Predicates = [HasVLX, HasAVX512] in {
6488    defm Z256 : avx512_fma3p_213_rm<opc, OpcodeStr, OpNode, MaskOpNode,
6489                                    sched.YMM, _.info256, Suff>,
6490                      EVEX_V256, EVEX_CD8<_.info256.EltSize, CD8VF>;
6491    defm Z128 : avx512_fma3p_213_rm<opc, OpcodeStr, OpNode, MaskOpNode,
6492                                    sched.XMM, _.info128, Suff>,
6493                      EVEX_V128, EVEX_CD8<_.info128.EltSize, CD8VF>;
6494  }
6495}
6496
6497multiclass avx512_fma3p_213_f<bits<8> opc, string OpcodeStr, SDNode OpNode,
6498                              SDNode MaskOpNode, SDNode OpNodeRnd> {
6499    defm PS : avx512_fma3p_213_common<opc, OpcodeStr#"ps", OpNode, MaskOpNode,
6500                                      OpNodeRnd, SchedWriteFMA,
6501                                      avx512vl_f32_info, "PS">;
6502    defm PD : avx512_fma3p_213_common<opc, OpcodeStr#"pd", OpNode, MaskOpNode,
6503                                      OpNodeRnd, SchedWriteFMA,
6504                                      avx512vl_f64_info, "PD">, VEX_W;
6505}
6506
6507defm VFMADD213    : avx512_fma3p_213_f<0xA8, "vfmadd213", any_fma,
6508                                       fma, X86FmaddRnd>;
6509defm VFMSUB213    : avx512_fma3p_213_f<0xAA, "vfmsub213", X86any_Fmsub,
6510                                       X86Fmsub, X86FmsubRnd>;
6511defm VFMADDSUB213 : avx512_fma3p_213_f<0xA6, "vfmaddsub213", X86Fmaddsub,
6512                                       X86Fmaddsub, X86FmaddsubRnd>;
6513defm VFMSUBADD213 : avx512_fma3p_213_f<0xA7, "vfmsubadd213", X86Fmsubadd,
6514                                       X86Fmsubadd, X86FmsubaddRnd>;
6515defm VFNMADD213   : avx512_fma3p_213_f<0xAC, "vfnmadd213", X86any_Fnmadd,
6516                                       X86Fnmadd, X86FnmaddRnd>;
6517defm VFNMSUB213   : avx512_fma3p_213_f<0xAE, "vfnmsub213", X86any_Fnmsub,
6518                                       X86Fnmsub, X86FnmsubRnd>;
6519
6520
6521multiclass avx512_fma3p_231_rm<bits<8> opc, string OpcodeStr, SDNode OpNode,
6522                               SDNode MaskOpNode, X86FoldableSchedWrite sched,
6523                               X86VectorVTInfo _, string Suff> {
6524  let Constraints = "$src1 = $dst", ExeDomain = _.ExeDomain, hasSideEffects = 0,
6525      Uses = [MXCSR], mayRaiseFPException = 1 in {
6526  defm r: AVX512_maskable_fma<opc, MRMSrcReg, _, (outs _.RC:$dst),
6527          (ins _.RC:$src2, _.RC:$src3),
6528          OpcodeStr, "$src3, $src2", "$src2, $src3",
6529          (null_frag),
6530          (_.VT (MaskOpNode _.RC:$src2, _.RC:$src3, _.RC:$src1)), 1, 1>,
6531          AVX512FMA3Base, Sched<[sched]>;
6532
6533  defm m: AVX512_maskable_fma<opc, MRMSrcMem, _, (outs _.RC:$dst),
6534          (ins _.RC:$src2, _.MemOp:$src3),
6535          OpcodeStr, "$src3, $src2", "$src2, $src3",
6536          (_.VT (OpNode _.RC:$src2, (_.LdFrag addr:$src3), _.RC:$src1)),
6537          (_.VT (MaskOpNode _.RC:$src2, (_.LdFrag addr:$src3), _.RC:$src1)), 1, 0>,
6538          AVX512FMA3Base, Sched<[sched.Folded, sched.ReadAfterFold]>;
6539
6540  defm mb: AVX512_maskable_fma<opc, MRMSrcMem, _, (outs _.RC:$dst),
6541         (ins _.RC:$src2, _.ScalarMemOp:$src3),
6542         OpcodeStr, "${src3}"#_.BroadcastStr#", $src2",
6543         "$src2, ${src3}"#_.BroadcastStr,
6544         (_.VT (OpNode _.RC:$src2,
6545                      (_.VT (_.BroadcastLdFrag addr:$src3)),
6546                      _.RC:$src1)),
6547         (_.VT (MaskOpNode _.RC:$src2,
6548                           (_.VT (_.BroadcastLdFrag addr:$src3)),
6549                           _.RC:$src1)), 1, 0>, AVX512FMA3Base, EVEX_B,
6550         Sched<[sched.Folded, sched.ReadAfterFold]>;
6551  }
6552}
6553
6554multiclass avx512_fma3_231_round<bits<8> opc, string OpcodeStr, SDNode OpNode,
6555                                 X86FoldableSchedWrite sched,
6556                                 X86VectorVTInfo _, string Suff> {
6557  let Constraints = "$src1 = $dst", ExeDomain = _.ExeDomain, hasSideEffects = 0,
6558      Uses = [MXCSR] in
6559  defm rb: AVX512_maskable_fma<opc, MRMSrcReg, _, (outs _.RC:$dst),
6560          (ins _.RC:$src2, _.RC:$src3, AVX512RC:$rc),
6561          OpcodeStr, "$rc, $src3, $src2", "$src2, $src3, $rc",
6562          (null_frag),
6563          (_.VT (OpNode _.RC:$src2, _.RC:$src3, _.RC:$src1, (i32 timm:$rc))),
6564          1, 1>, AVX512FMA3Base, EVEX_B, EVEX_RC, Sched<[sched]>;
6565}
6566
6567multiclass avx512_fma3p_231_common<bits<8> opc, string OpcodeStr, SDNode OpNode,
6568                                   SDNode MaskOpNode, SDNode OpNodeRnd,
6569                                   X86SchedWriteWidths sched,
6570                                   AVX512VLVectorVTInfo _, string Suff> {
6571  let Predicates = [HasAVX512] in {
6572    defm Z      : avx512_fma3p_231_rm<opc, OpcodeStr, OpNode, MaskOpNode,
6573                                      sched.ZMM, _.info512, Suff>,
6574                  avx512_fma3_231_round<opc, OpcodeStr, OpNodeRnd, sched.ZMM,
6575                                        _.info512, Suff>,
6576                              EVEX_V512, EVEX_CD8<_.info512.EltSize, CD8VF>;
6577  }
6578  let Predicates = [HasVLX, HasAVX512] in {
6579    defm Z256 : avx512_fma3p_231_rm<opc, OpcodeStr, OpNode, MaskOpNode,
6580                                    sched.YMM, _.info256, Suff>,
6581                      EVEX_V256, EVEX_CD8<_.info256.EltSize, CD8VF>;
6582    defm Z128 : avx512_fma3p_231_rm<opc, OpcodeStr, OpNode, MaskOpNode,
6583                                    sched.XMM, _.info128, Suff>,
6584                      EVEX_V128, EVEX_CD8<_.info128.EltSize, CD8VF>;
6585  }
6586}
6587
6588multiclass avx512_fma3p_231_f<bits<8> opc, string OpcodeStr, SDNode OpNode,
6589                              SDNode MaskOpNode, SDNode OpNodeRnd > {
6590    defm PS : avx512_fma3p_231_common<opc, OpcodeStr#"ps", OpNode, MaskOpNode,
6591                                      OpNodeRnd, SchedWriteFMA,
6592                                      avx512vl_f32_info, "PS">;
6593    defm PD : avx512_fma3p_231_common<opc, OpcodeStr#"pd", OpNode, MaskOpNode,
6594                                      OpNodeRnd, SchedWriteFMA,
6595                                      avx512vl_f64_info, "PD">, VEX_W;
6596}
6597
6598defm VFMADD231    : avx512_fma3p_231_f<0xB8, "vfmadd231", any_fma,
6599                                       fma, X86FmaddRnd>;
6600defm VFMSUB231    : avx512_fma3p_231_f<0xBA, "vfmsub231", X86any_Fmsub,
6601                                       X86Fmsub, X86FmsubRnd>;
6602defm VFMADDSUB231 : avx512_fma3p_231_f<0xB6, "vfmaddsub231", X86Fmaddsub,
6603                                       X86Fmaddsub, X86FmaddsubRnd>;
6604defm VFMSUBADD231 : avx512_fma3p_231_f<0xB7, "vfmsubadd231", X86Fmsubadd,
6605                                       X86Fmsubadd, X86FmsubaddRnd>;
6606defm VFNMADD231   : avx512_fma3p_231_f<0xBC, "vfnmadd231", X86any_Fnmadd,
6607                                       X86Fnmadd, X86FnmaddRnd>;
6608defm VFNMSUB231   : avx512_fma3p_231_f<0xBE, "vfnmsub231", X86any_Fnmsub,
6609                                       X86Fnmsub, X86FnmsubRnd>;
6610
6611multiclass avx512_fma3p_132_rm<bits<8> opc, string OpcodeStr, SDNode OpNode,
6612                               SDNode MaskOpNode, X86FoldableSchedWrite sched,
6613                               X86VectorVTInfo _, string Suff> {
6614  let Constraints = "$src1 = $dst", ExeDomain = _.ExeDomain, hasSideEffects = 0,
6615      Uses = [MXCSR], mayRaiseFPException = 1 in {
6616  defm r: AVX512_maskable_fma<opc, MRMSrcReg, _, (outs _.RC:$dst),
6617          (ins _.RC:$src2, _.RC:$src3),
6618          OpcodeStr, "$src3, $src2", "$src2, $src3",
6619          (null_frag),
6620          (_.VT (MaskOpNode _.RC:$src1, _.RC:$src3, _.RC:$src2)), 1, 1>,
6621          AVX512FMA3Base, Sched<[sched]>;
6622
6623  // Pattern is 312 order so that the load is in a different place from the
6624  // 213 and 231 patterns this helps tablegen's duplicate pattern detection.
6625  defm m: AVX512_maskable_fma<opc, MRMSrcMem, _, (outs _.RC:$dst),
6626          (ins _.RC:$src2, _.MemOp:$src3),
6627          OpcodeStr, "$src3, $src2", "$src2, $src3",
6628          (_.VT (OpNode (_.LdFrag addr:$src3), _.RC:$src1, _.RC:$src2)),
6629          (_.VT (MaskOpNode (_.LdFrag addr:$src3), _.RC:$src1, _.RC:$src2)), 1, 0>,
6630          AVX512FMA3Base, Sched<[sched.Folded, sched.ReadAfterFold]>;
6631
6632  // Pattern is 312 order so that the load is in a different place from the
6633  // 213 and 231 patterns this helps tablegen's duplicate pattern detection.
6634  defm mb: AVX512_maskable_fma<opc, MRMSrcMem, _, (outs _.RC:$dst),
6635         (ins _.RC:$src2, _.ScalarMemOp:$src3),
6636         OpcodeStr, "${src3}"#_.BroadcastStr#", $src2",
6637         "$src2, ${src3}"#_.BroadcastStr,
6638         (_.VT (OpNode (_.VT (_.BroadcastLdFrag addr:$src3)),
6639                       _.RC:$src1, _.RC:$src2)),
6640         (_.VT (MaskOpNode (_.VT (_.BroadcastLdFrag addr:$src3)),
6641                           _.RC:$src1, _.RC:$src2)), 1, 0>,
6642         AVX512FMA3Base, EVEX_B, Sched<[sched.Folded, sched.ReadAfterFold]>;
6643  }
6644}
6645
6646multiclass avx512_fma3_132_round<bits<8> opc, string OpcodeStr, SDNode OpNode,
6647                                 X86FoldableSchedWrite sched,
6648                                 X86VectorVTInfo _, string Suff> {
6649  let Constraints = "$src1 = $dst", ExeDomain = _.ExeDomain, hasSideEffects = 0,
6650      Uses = [MXCSR] in
6651  defm rb: AVX512_maskable_fma<opc, MRMSrcReg, _, (outs _.RC:$dst),
6652          (ins _.RC:$src2, _.RC:$src3, AVX512RC:$rc),
6653          OpcodeStr, "$rc, $src3, $src2", "$src2, $src3, $rc",
6654          (null_frag),
6655          (_.VT (OpNode _.RC:$src1, _.RC:$src3, _.RC:$src2, (i32 timm:$rc))),
6656          1, 1>, AVX512FMA3Base, EVEX_B, EVEX_RC, Sched<[sched]>;
6657}
6658
6659multiclass avx512_fma3p_132_common<bits<8> opc, string OpcodeStr, SDNode OpNode,
6660                                   SDNode MaskOpNode, SDNode OpNodeRnd,
6661                                   X86SchedWriteWidths sched,
6662                                   AVX512VLVectorVTInfo _, string Suff> {
6663  let Predicates = [HasAVX512] in {
6664    defm Z      : avx512_fma3p_132_rm<opc, OpcodeStr, OpNode, MaskOpNode,
6665                                      sched.ZMM, _.info512, Suff>,
6666                  avx512_fma3_132_round<opc, OpcodeStr, OpNodeRnd, sched.ZMM,
6667                                        _.info512, Suff>,
6668                              EVEX_V512, EVEX_CD8<_.info512.EltSize, CD8VF>;
6669  }
6670  let Predicates = [HasVLX, HasAVX512] in {
6671    defm Z256 : avx512_fma3p_132_rm<opc, OpcodeStr, OpNode, MaskOpNode,
6672                                    sched.YMM, _.info256, Suff>,
6673                      EVEX_V256, EVEX_CD8<_.info256.EltSize, CD8VF>;
6674    defm Z128 : avx512_fma3p_132_rm<opc, OpcodeStr, OpNode, MaskOpNode,
6675                                    sched.XMM, _.info128, Suff>,
6676                      EVEX_V128, EVEX_CD8<_.info128.EltSize, CD8VF>;
6677  }
6678}
6679
6680multiclass avx512_fma3p_132_f<bits<8> opc, string OpcodeStr, SDNode OpNode,
6681                              SDNode MaskOpNode, SDNode OpNodeRnd > {
6682    defm PS : avx512_fma3p_132_common<opc, OpcodeStr#"ps", OpNode, MaskOpNode,
6683                                      OpNodeRnd, SchedWriteFMA,
6684                                      avx512vl_f32_info, "PS">;
6685    defm PD : avx512_fma3p_132_common<opc, OpcodeStr#"pd", OpNode, MaskOpNode,
6686                                      OpNodeRnd, SchedWriteFMA,
6687                                      avx512vl_f64_info, "PD">, VEX_W;
6688}
6689
6690defm VFMADD132    : avx512_fma3p_132_f<0x98, "vfmadd132", any_fma,
6691                                       fma, X86FmaddRnd>;
6692defm VFMSUB132    : avx512_fma3p_132_f<0x9A, "vfmsub132", X86any_Fmsub,
6693                                       X86Fmsub, X86FmsubRnd>;
6694defm VFMADDSUB132 : avx512_fma3p_132_f<0x96, "vfmaddsub132", X86Fmaddsub,
6695                                       X86Fmaddsub, X86FmaddsubRnd>;
6696defm VFMSUBADD132 : avx512_fma3p_132_f<0x97, "vfmsubadd132", X86Fmsubadd,
6697                                       X86Fmsubadd, X86FmsubaddRnd>;
6698defm VFNMADD132   : avx512_fma3p_132_f<0x9C, "vfnmadd132", X86any_Fnmadd,
6699                                       X86Fnmadd, X86FnmaddRnd>;
6700defm VFNMSUB132   : avx512_fma3p_132_f<0x9E, "vfnmsub132", X86any_Fnmsub,
6701                                       X86Fnmsub, X86FnmsubRnd>;
6702
6703// Scalar FMA
6704multiclass avx512_fma3s_common<bits<8> opc, string OpcodeStr, X86VectorVTInfo _,
6705                               dag RHS_r, dag RHS_m, dag RHS_b, bit MaskOnlyReg> {
6706let Constraints = "$src1 = $dst", hasSideEffects = 0 in {
6707  defm r_Int: AVX512_maskable_3src_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
6708          (ins _.RC:$src2, _.RC:$src3), OpcodeStr,
6709          "$src3, $src2", "$src2, $src3", (null_frag), 1, 1>,
6710          AVX512FMA3Base, Sched<[SchedWriteFMA.Scl]>, SIMD_EXC;
6711
6712  let mayLoad = 1 in
6713  defm m_Int: AVX512_maskable_3src_scalar<opc, MRMSrcMem, _, (outs _.RC:$dst),
6714          (ins _.RC:$src2, _.IntScalarMemOp:$src3), OpcodeStr,
6715          "$src3, $src2", "$src2, $src3", (null_frag), 1, 1>,
6716          AVX512FMA3Base, Sched<[SchedWriteFMA.Scl.Folded, SchedWriteFMA.Scl.ReadAfterFold]>, SIMD_EXC;
6717
6718  let Uses = [MXCSR] in
6719  defm rb_Int: AVX512_maskable_3src_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
6720         (ins _.RC:$src2, _.RC:$src3, AVX512RC:$rc),
6721         OpcodeStr, "$rc, $src3, $src2", "$src2, $src3, $rc", (null_frag), 1, 1>,
6722         AVX512FMA3Base, EVEX_B, EVEX_RC, Sched<[SchedWriteFMA.Scl]>;
6723
6724  let isCodeGenOnly = 1, isCommutable = 1 in {
6725    def r     : AVX512FMA3S<opc, MRMSrcReg, (outs _.FRC:$dst),
6726                     (ins _.FRC:$src1, _.FRC:$src2, _.FRC:$src3),
6727                     !strconcat(OpcodeStr,
6728                              "\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
6729                     !if(MaskOnlyReg, [], [RHS_r])>, Sched<[SchedWriteFMA.Scl]>, SIMD_EXC;
6730    def m     : AVX512FMA3S<opc, MRMSrcMem, (outs _.FRC:$dst),
6731                    (ins _.FRC:$src1, _.FRC:$src2, _.ScalarMemOp:$src3),
6732                    !strconcat(OpcodeStr,
6733                               "\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
6734                    [RHS_m]>, Sched<[SchedWriteFMA.Scl.Folded, SchedWriteFMA.Scl.ReadAfterFold]>, SIMD_EXC;
6735
6736    let Uses = [MXCSR] in
6737    def rb    : AVX512FMA3S<opc, MRMSrcReg, (outs _.FRC:$dst),
6738                     (ins _.FRC:$src1, _.FRC:$src2, _.FRC:$src3, AVX512RC:$rc),
6739                     !strconcat(OpcodeStr,
6740                              "\t{$rc, $src3, $src2, $dst|$dst, $src2, $src3, $rc}"),
6741                     !if(MaskOnlyReg, [], [RHS_b])>, EVEX_B, EVEX_RC,
6742                     Sched<[SchedWriteFMA.Scl]>;
6743  }// isCodeGenOnly = 1
6744}// Constraints = "$src1 = $dst"
6745}
6746
6747multiclass avx512_fma3s_all<bits<8> opc213, bits<8> opc231, bits<8> opc132,
6748                            string OpcodeStr, SDNode OpNode, SDNode OpNodeRnd,
6749                            X86VectorVTInfo _, string SUFF> {
6750  let ExeDomain = _.ExeDomain in {
6751  defm NAME#213#SUFF#Z: avx512_fma3s_common<opc213, OpcodeStr#"213"#_.Suffix, _,
6752                // Operands for intrinsic are in 123 order to preserve passthu
6753                // semantics.
6754                (set _.FRC:$dst, (_.EltVT (OpNode _.FRC:$src2, _.FRC:$src1,
6755                         _.FRC:$src3))),
6756                (set _.FRC:$dst, (_.EltVT (OpNode _.FRC:$src2, _.FRC:$src1,
6757                         (_.ScalarLdFrag addr:$src3)))),
6758                (set _.FRC:$dst, (_.EltVT (OpNodeRnd _.FRC:$src2, _.FRC:$src1,
6759                         _.FRC:$src3, (i32 timm:$rc)))), 0>;
6760
6761  defm NAME#231#SUFF#Z: avx512_fma3s_common<opc231, OpcodeStr#"231"#_.Suffix, _,
6762                (set _.FRC:$dst, (_.EltVT (OpNode _.FRC:$src2, _.FRC:$src3,
6763                                          _.FRC:$src1))),
6764                (set _.FRC:$dst, (_.EltVT (OpNode _.FRC:$src2,
6765                            (_.ScalarLdFrag addr:$src3), _.FRC:$src1))),
6766                (set _.FRC:$dst, (_.EltVT (OpNodeRnd _.FRC:$src2, _.FRC:$src3,
6767                         _.FRC:$src1, (i32 timm:$rc)))), 1>;
6768
6769  // One pattern is 312 order so that the load is in a different place from the
6770  // 213 and 231 patterns this helps tablegen's duplicate pattern detection.
6771  defm NAME#132#SUFF#Z: avx512_fma3s_common<opc132, OpcodeStr#"132"#_.Suffix, _,
6772                (set _.FRC:$dst, (_.EltVT (OpNode _.FRC:$src1, _.FRC:$src3,
6773                         _.FRC:$src2))),
6774                (set _.FRC:$dst, (_.EltVT (OpNode (_.ScalarLdFrag addr:$src3),
6775                                 _.FRC:$src1, _.FRC:$src2))),
6776                (set _.FRC:$dst, (_.EltVT (OpNodeRnd _.FRC:$src1, _.FRC:$src3,
6777                         _.FRC:$src2, (i32 timm:$rc)))), 1>;
6778  }
6779}
6780
6781multiclass avx512_fma3s<bits<8> opc213, bits<8> opc231, bits<8> opc132,
6782                        string OpcodeStr, SDNode OpNode, SDNode OpNodeRnd> {
6783  let Predicates = [HasAVX512] in {
6784    defm NAME : avx512_fma3s_all<opc213, opc231, opc132, OpcodeStr, OpNode,
6785                                 OpNodeRnd, f32x_info, "SS">,
6786                                 EVEX_CD8<32, CD8VT1>, VEX_LIG;
6787    defm NAME : avx512_fma3s_all<opc213, opc231, opc132, OpcodeStr, OpNode,
6788                                 OpNodeRnd, f64x_info, "SD">,
6789                                 EVEX_CD8<64, CD8VT1>, VEX_LIG, VEX_W;
6790  }
6791}
6792
6793defm VFMADD  : avx512_fma3s<0xA9, 0xB9, 0x99, "vfmadd", any_fma, X86FmaddRnd>;
6794defm VFMSUB  : avx512_fma3s<0xAB, 0xBB, 0x9B, "vfmsub", X86any_Fmsub, X86FmsubRnd>;
6795defm VFNMADD : avx512_fma3s<0xAD, 0xBD, 0x9D, "vfnmadd", X86any_Fnmadd, X86FnmaddRnd>;
6796defm VFNMSUB : avx512_fma3s<0xAF, 0xBF, 0x9F, "vfnmsub", X86any_Fnmsub, X86FnmsubRnd>;
6797
6798multiclass avx512_scalar_fma_patterns<SDNode Op, SDNode MaskedOp,
6799                                      SDNode RndOp, string Prefix,
6800                                      string Suffix, SDNode Move,
6801                                      X86VectorVTInfo _, PatLeaf ZeroFP> {
6802  let Predicates = [HasAVX512] in {
6803    def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector
6804                (Op _.FRC:$src2,
6805                    (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0))),
6806                    _.FRC:$src3))))),
6807              (!cast<I>(Prefix#"213"#Suffix#"Zr_Int")
6808               VR128X:$src1, (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)),
6809               (_.VT (COPY_TO_REGCLASS _.FRC:$src3, VR128X)))>;
6810
6811    def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector
6812                (Op _.FRC:$src2, _.FRC:$src3,
6813                    (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0)))))))),
6814              (!cast<I>(Prefix#"231"#Suffix#"Zr_Int")
6815               VR128X:$src1, (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)),
6816               (_.VT (COPY_TO_REGCLASS _.FRC:$src3, VR128X)))>;
6817
6818    def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector
6819                (Op _.FRC:$src2,
6820                    (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0))),
6821                    (_.ScalarLdFrag addr:$src3)))))),
6822              (!cast<I>(Prefix#"213"#Suffix#"Zm_Int")
6823               VR128X:$src1, (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)),
6824               addr:$src3)>;
6825
6826    def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector
6827                (Op (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0))),
6828                    (_.ScalarLdFrag addr:$src3), _.FRC:$src2))))),
6829              (!cast<I>(Prefix#"132"#Suffix#"Zm_Int")
6830               VR128X:$src1, (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)),
6831               addr:$src3)>;
6832
6833    def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector
6834                (Op _.FRC:$src2, (_.ScalarLdFrag addr:$src3),
6835                    (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0)))))))),
6836              (!cast<I>(Prefix#"231"#Suffix#"Zm_Int")
6837               VR128X:$src1, (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)),
6838               addr:$src3)>;
6839
6840    def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector
6841               (X86selects_mask VK1WM:$mask,
6842                (MaskedOp _.FRC:$src2,
6843                    (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0))),
6844                    _.FRC:$src3),
6845                (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0)))))))),
6846              (!cast<I>(Prefix#"213"#Suffix#"Zr_Intk")
6847               VR128X:$src1, VK1WM:$mask,
6848               (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)),
6849               (_.VT (COPY_TO_REGCLASS _.FRC:$src3, VR128X)))>;
6850
6851    def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector
6852               (X86selects_mask VK1WM:$mask,
6853                (MaskedOp _.FRC:$src2,
6854                    (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0))),
6855                    (_.ScalarLdFrag addr:$src3)),
6856                (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0)))))))),
6857              (!cast<I>(Prefix#"213"#Suffix#"Zm_Intk")
6858               VR128X:$src1, VK1WM:$mask,
6859               (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)), addr:$src3)>;
6860
6861    def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector
6862               (X86selects_mask VK1WM:$mask,
6863                (MaskedOp (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0))),
6864                          (_.ScalarLdFrag addr:$src3), _.FRC:$src2),
6865                (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0)))))))),
6866              (!cast<I>(Prefix#"132"#Suffix#"Zm_Intk")
6867               VR128X:$src1, VK1WM:$mask,
6868               (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)), addr:$src3)>;
6869
6870    def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector
6871               (X86selects_mask VK1WM:$mask,
6872                (MaskedOp _.FRC:$src2, _.FRC:$src3,
6873                          (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0)))),
6874                (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0)))))))),
6875              (!cast<I>(Prefix#"231"#Suffix#"Zr_Intk")
6876               VR128X:$src1, VK1WM:$mask,
6877               (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)),
6878               (_.VT (COPY_TO_REGCLASS _.FRC:$src3, VR128X)))>;
6879
6880    def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector
6881               (X86selects_mask VK1WM:$mask,
6882                (MaskedOp _.FRC:$src2, (_.ScalarLdFrag addr:$src3),
6883                          (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0)))),
6884                (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0)))))))),
6885              (!cast<I>(Prefix#"231"#Suffix#"Zm_Intk")
6886               VR128X:$src1, VK1WM:$mask,
6887               (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)), addr:$src3)>;
6888
6889    def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector
6890               (X86selects_mask VK1WM:$mask,
6891                (MaskedOp _.FRC:$src2,
6892                          (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0))),
6893                          _.FRC:$src3),
6894                (_.EltVT ZeroFP)))))),
6895              (!cast<I>(Prefix#"213"#Suffix#"Zr_Intkz")
6896               VR128X:$src1, VK1WM:$mask,
6897               (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)),
6898               (_.VT (COPY_TO_REGCLASS _.FRC:$src3, VR128X)))>;
6899
6900    def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector
6901               (X86selects_mask VK1WM:$mask,
6902                (MaskedOp _.FRC:$src2, _.FRC:$src3,
6903                          (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0)))),
6904                (_.EltVT ZeroFP)))))),
6905              (!cast<I>(Prefix#"231"#Suffix#"Zr_Intkz")
6906               VR128X:$src1, VK1WM:$mask,
6907               (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)),
6908               (_.VT (COPY_TO_REGCLASS _.FRC:$src3, VR128X)))>;
6909
6910    def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector
6911               (X86selects_mask VK1WM:$mask,
6912                (MaskedOp _.FRC:$src2,
6913                          (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0))),
6914                          (_.ScalarLdFrag addr:$src3)),
6915                (_.EltVT ZeroFP)))))),
6916              (!cast<I>(Prefix#"213"#Suffix#"Zm_Intkz")
6917               VR128X:$src1, VK1WM:$mask,
6918               (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)), addr:$src3)>;
6919
6920    def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector
6921               (X86selects_mask VK1WM:$mask,
6922                (MaskedOp (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0))),
6923                          _.FRC:$src2, (_.ScalarLdFrag addr:$src3)),
6924                (_.EltVT ZeroFP)))))),
6925              (!cast<I>(Prefix#"132"#Suffix#"Zm_Intkz")
6926               VR128X:$src1, VK1WM:$mask,
6927               (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)), addr:$src3)>;
6928
6929    def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector
6930               (X86selects_mask VK1WM:$mask,
6931                (MaskedOp _.FRC:$src2, (_.ScalarLdFrag addr:$src3),
6932                          (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0)))),
6933                (_.EltVT ZeroFP)))))),
6934              (!cast<I>(Prefix#"231"#Suffix#"Zm_Intkz")
6935               VR128X:$src1, VK1WM:$mask,
6936               (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)), addr:$src3)>;
6937
6938    // Patterns with rounding mode.
6939    def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector
6940                (RndOp _.FRC:$src2,
6941                       (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0))),
6942                       _.FRC:$src3, (i32 timm:$rc)))))),
6943              (!cast<I>(Prefix#"213"#Suffix#"Zrb_Int")
6944               VR128X:$src1, (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)),
6945               (_.VT (COPY_TO_REGCLASS _.FRC:$src3, VR128X)), AVX512RC:$rc)>;
6946
6947    def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector
6948                (RndOp _.FRC:$src2, _.FRC:$src3,
6949                       (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0))),
6950                       (i32 timm:$rc)))))),
6951              (!cast<I>(Prefix#"231"#Suffix#"Zrb_Int")
6952               VR128X:$src1, (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)),
6953               (_.VT (COPY_TO_REGCLASS _.FRC:$src3, VR128X)), AVX512RC:$rc)>;
6954
6955    def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector
6956               (X86selects_mask VK1WM:$mask,
6957                (RndOp _.FRC:$src2,
6958                       (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0))),
6959                       _.FRC:$src3, (i32 timm:$rc)),
6960                (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0)))))))),
6961              (!cast<I>(Prefix#"213"#Suffix#"Zrb_Intk")
6962               VR128X:$src1, VK1WM:$mask,
6963               (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)),
6964               (_.VT (COPY_TO_REGCLASS _.FRC:$src3, VR128X)), AVX512RC:$rc)>;
6965
6966    def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector
6967               (X86selects_mask VK1WM:$mask,
6968                (RndOp _.FRC:$src2, _.FRC:$src3,
6969                       (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0))),
6970                       (i32 timm:$rc)),
6971                (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0)))))))),
6972              (!cast<I>(Prefix#"231"#Suffix#"Zrb_Intk")
6973               VR128X:$src1, VK1WM:$mask,
6974               (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)),
6975               (_.VT (COPY_TO_REGCLASS _.FRC:$src3, VR128X)), AVX512RC:$rc)>;
6976
6977    def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector
6978               (X86selects_mask VK1WM:$mask,
6979                (RndOp _.FRC:$src2,
6980                       (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0))),
6981                       _.FRC:$src3, (i32 timm:$rc)),
6982                (_.EltVT ZeroFP)))))),
6983              (!cast<I>(Prefix#"213"#Suffix#"Zrb_Intkz")
6984               VR128X:$src1, VK1WM:$mask,
6985               (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)),
6986               (_.VT (COPY_TO_REGCLASS _.FRC:$src3, VR128X)), AVX512RC:$rc)>;
6987
6988    def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector
6989               (X86selects_mask VK1WM:$mask,
6990                (RndOp _.FRC:$src2, _.FRC:$src3,
6991                       (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0))),
6992                       (i32 timm:$rc)),
6993                (_.EltVT ZeroFP)))))),
6994              (!cast<I>(Prefix#"231"#Suffix#"Zrb_Intkz")
6995               VR128X:$src1, VK1WM:$mask,
6996               (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)),
6997               (_.VT (COPY_TO_REGCLASS _.FRC:$src3, VR128X)), AVX512RC:$rc)>;
6998  }
6999}
7000
7001defm : avx512_scalar_fma_patterns<any_fma, fma, X86FmaddRnd, "VFMADD",
7002                                  "SS", X86Movss, v4f32x_info, fp32imm0>;
7003defm : avx512_scalar_fma_patterns<X86any_Fmsub, X86Fmsub, X86FmsubRnd, "VFMSUB",
7004                                  "SS", X86Movss, v4f32x_info, fp32imm0>;
7005defm : avx512_scalar_fma_patterns<X86any_Fnmadd, X86Fnmadd, X86FnmaddRnd, "VFNMADD",
7006                                  "SS", X86Movss, v4f32x_info, fp32imm0>;
7007defm : avx512_scalar_fma_patterns<X86any_Fnmsub, X86Fnmsub, X86FnmsubRnd, "VFNMSUB",
7008                                  "SS", X86Movss, v4f32x_info, fp32imm0>;
7009
7010defm : avx512_scalar_fma_patterns<any_fma, fma, X86FmaddRnd, "VFMADD",
7011                                  "SD", X86Movsd, v2f64x_info, fp64imm0>;
7012defm : avx512_scalar_fma_patterns<X86any_Fmsub, X86Fmsub, X86FmsubRnd, "VFMSUB",
7013                                  "SD", X86Movsd, v2f64x_info, fp64imm0>;
7014defm : avx512_scalar_fma_patterns<X86any_Fnmadd, X86Fnmadd, X86FnmaddRnd, "VFNMADD",
7015                                  "SD", X86Movsd, v2f64x_info, fp64imm0>;
7016defm : avx512_scalar_fma_patterns<X86any_Fnmsub, X86Fnmsub, X86FnmsubRnd, "VFNMSUB",
7017                                  "SD", X86Movsd, v2f64x_info, fp64imm0>;
7018
7019//===----------------------------------------------------------------------===//
7020// AVX-512  Packed Multiply of Unsigned 52-bit Integers and Add the Low 52-bit IFMA
7021//===----------------------------------------------------------------------===//
7022let Constraints = "$src1 = $dst" in {
7023multiclass avx512_pmadd52_rm<bits<8> opc, string OpcodeStr, SDNode OpNode,
7024                             X86FoldableSchedWrite sched, X86VectorVTInfo _> {
7025  // NOTE: The SDNode have the multiply operands first with the add last.
7026  // This enables commuted load patterns to be autogenerated by tablegen.
7027  let ExeDomain = _.ExeDomain in {
7028  defm r: AVX512_maskable_3src<opc, MRMSrcReg, _, (outs _.RC:$dst),
7029          (ins _.RC:$src2, _.RC:$src3),
7030          OpcodeStr, "$src3, $src2", "$src2, $src3",
7031          (_.VT (OpNode _.RC:$src2, _.RC:$src3, _.RC:$src1)), 1, 1>,
7032         AVX512FMA3Base, Sched<[sched]>;
7033
7034  defm m: AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst),
7035          (ins _.RC:$src2, _.MemOp:$src3),
7036          OpcodeStr, "$src3, $src2", "$src2, $src3",
7037          (_.VT (OpNode _.RC:$src2, (_.LdFrag addr:$src3), _.RC:$src1))>,
7038          AVX512FMA3Base, Sched<[sched.Folded, sched.ReadAfterFold]>;
7039
7040  defm mb: AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst),
7041            (ins _.RC:$src2, _.ScalarMemOp:$src3),
7042            OpcodeStr,   !strconcat("${src3}", _.BroadcastStr,", $src2"),
7043            !strconcat("$src2, ${src3}", _.BroadcastStr ),
7044            (OpNode _.RC:$src2,
7045                    (_.VT (_.BroadcastLdFrag addr:$src3)),
7046                    _.RC:$src1)>,
7047            AVX512FMA3Base, EVEX_B, Sched<[sched.Folded, sched.ReadAfterFold]>;
7048  }
7049}
7050} // Constraints = "$src1 = $dst"
7051
7052multiclass avx512_pmadd52_common<bits<8> opc, string OpcodeStr, SDNode OpNode,
7053                                 X86SchedWriteWidths sched, AVX512VLVectorVTInfo _> {
7054  let Predicates = [HasIFMA] in {
7055    defm Z      : avx512_pmadd52_rm<opc, OpcodeStr, OpNode, sched.ZMM, _.info512>,
7056                      EVEX_V512, EVEX_CD8<_.info512.EltSize, CD8VF>;
7057  }
7058  let Predicates = [HasVLX, HasIFMA] in {
7059    defm Z256 : avx512_pmadd52_rm<opc, OpcodeStr, OpNode, sched.YMM, _.info256>,
7060                      EVEX_V256, EVEX_CD8<_.info256.EltSize, CD8VF>;
7061    defm Z128 : avx512_pmadd52_rm<opc, OpcodeStr, OpNode, sched.XMM, _.info128>,
7062                      EVEX_V128, EVEX_CD8<_.info128.EltSize, CD8VF>;
7063  }
7064}
7065
7066defm VPMADD52LUQ : avx512_pmadd52_common<0xb4, "vpmadd52luq", x86vpmadd52l,
7067                                         SchedWriteVecIMul, avx512vl_i64_info>,
7068                                         VEX_W;
7069defm VPMADD52HUQ : avx512_pmadd52_common<0xb5, "vpmadd52huq", x86vpmadd52h,
7070                                         SchedWriteVecIMul, avx512vl_i64_info>,
7071                                         VEX_W;
7072
7073//===----------------------------------------------------------------------===//
7074// AVX-512  Scalar convert from sign integer to float/double
7075//===----------------------------------------------------------------------===//
7076
7077multiclass avx512_vcvtsi<bits<8> opc, SDPatternOperator OpNode, X86FoldableSchedWrite sched,
7078                    RegisterClass SrcRC, X86VectorVTInfo DstVT,
7079                    X86MemOperand x86memop, PatFrag ld_frag, string asm,
7080                    string mem, list<Register> _Uses = [MXCSR],
7081                    bit _mayRaiseFPException = 1> {
7082let ExeDomain = DstVT.ExeDomain, Uses = _Uses,
7083    mayRaiseFPException = _mayRaiseFPException in {
7084  let hasSideEffects = 0, isCodeGenOnly = 1 in {
7085    def rr : SI<opc, MRMSrcReg, (outs DstVT.FRC:$dst),
7086              (ins DstVT.FRC:$src1, SrcRC:$src),
7087              !strconcat(asm,"\t{$src, $src1, $dst|$dst, $src1, $src}"), []>,
7088              EVEX_4V, Sched<[sched, ReadDefault, ReadInt2Fpu]>;
7089    let mayLoad = 1 in
7090      def rm : SI<opc, MRMSrcMem, (outs DstVT.FRC:$dst),
7091              (ins DstVT.FRC:$src1, x86memop:$src),
7092              asm#"{"#mem#"}\t{$src, $src1, $dst|$dst, $src1, $src}", []>,
7093              EVEX_4V, Sched<[sched.Folded, sched.ReadAfterFold]>;
7094  } // hasSideEffects = 0
7095  def rr_Int : SI<opc, MRMSrcReg, (outs DstVT.RC:$dst),
7096                (ins DstVT.RC:$src1, SrcRC:$src2),
7097                !strconcat(asm,"\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
7098                [(set DstVT.RC:$dst,
7099                      (OpNode (DstVT.VT DstVT.RC:$src1), SrcRC:$src2))]>,
7100               EVEX_4V, Sched<[sched, ReadDefault, ReadInt2Fpu]>;
7101
7102  def rm_Int : SI<opc, MRMSrcMem, (outs DstVT.RC:$dst),
7103                (ins DstVT.RC:$src1, x86memop:$src2),
7104                asm#"{"#mem#"}\t{$src2, $src1, $dst|$dst, $src1, $src2}",
7105                [(set DstVT.RC:$dst,
7106                      (OpNode (DstVT.VT DstVT.RC:$src1),
7107                               (ld_frag addr:$src2)))]>,
7108                EVEX_4V, Sched<[sched.Folded, sched.ReadAfterFold]>;
7109}
7110  def : InstAlias<"v"#asm#mem#"\t{$src2, $src1, $dst|$dst, $src1, $src2}",
7111                  (!cast<Instruction>(NAME#"rr_Int") DstVT.RC:$dst,
7112                  DstVT.RC:$src1, SrcRC:$src2), 0, "att">;
7113}
7114
7115multiclass avx512_vcvtsi_round<bits<8> opc, SDNode OpNode,
7116                               X86FoldableSchedWrite sched, RegisterClass SrcRC,
7117                               X86VectorVTInfo DstVT, string asm,
7118                               string mem> {
7119  let ExeDomain = DstVT.ExeDomain, Uses = [MXCSR] in
7120  def rrb_Int : SI<opc, MRMSrcReg, (outs DstVT.RC:$dst),
7121              (ins DstVT.RC:$src1, SrcRC:$src2, AVX512RC:$rc),
7122              !strconcat(asm,
7123                  "\t{$src2, $rc, $src1, $dst|$dst, $src1, $rc, $src2}"),
7124              [(set DstVT.RC:$dst,
7125                    (OpNode (DstVT.VT DstVT.RC:$src1),
7126                             SrcRC:$src2,
7127                             (i32 timm:$rc)))]>,
7128              EVEX_4V, EVEX_B, EVEX_RC, Sched<[sched, ReadDefault, ReadInt2Fpu]>;
7129  def : InstAlias<"v"#asm#mem#"\t{$src2, $rc, $src1, $dst|$dst, $src1, $rc, $src2}",
7130                  (!cast<Instruction>(NAME#"rrb_Int") DstVT.RC:$dst,
7131                  DstVT.RC:$src1, SrcRC:$src2, AVX512RC:$rc), 0, "att">;
7132}
7133
7134multiclass avx512_vcvtsi_common<bits<8> opc, SDNode OpNode, SDNode OpNodeRnd,
7135                                X86FoldableSchedWrite sched,
7136                                RegisterClass SrcRC, X86VectorVTInfo DstVT,
7137                                X86MemOperand x86memop, PatFrag ld_frag,
7138                                string asm, string mem> {
7139  defm NAME : avx512_vcvtsi_round<opc, OpNodeRnd, sched, SrcRC, DstVT, asm, mem>,
7140              avx512_vcvtsi<opc, OpNode, sched, SrcRC, DstVT, x86memop,
7141                            ld_frag, asm, mem>, VEX_LIG;
7142}
7143
7144let Predicates = [HasAVX512] in {
7145defm VCVTSI2SSZ  : avx512_vcvtsi_common<0x2A, X86SintToFp, X86SintToFpRnd,
7146                                 WriteCvtI2SS, GR32,
7147                                 v4f32x_info, i32mem, loadi32, "cvtsi2ss", "l">,
7148                                 XS, EVEX_CD8<32, CD8VT1>;
7149defm VCVTSI642SSZ: avx512_vcvtsi_common<0x2A, X86SintToFp, X86SintToFpRnd,
7150                                 WriteCvtI2SS, GR64,
7151                                 v4f32x_info, i64mem, loadi64, "cvtsi2ss", "q">,
7152                                 XS, VEX_W, EVEX_CD8<64, CD8VT1>;
7153defm VCVTSI2SDZ  : avx512_vcvtsi<0x2A, null_frag, WriteCvtI2SD, GR32,
7154                                 v2f64x_info, i32mem, loadi32, "cvtsi2sd", "l", [], 0>,
7155                                 XD, VEX_LIG, EVEX_CD8<32, CD8VT1>;
7156defm VCVTSI642SDZ: avx512_vcvtsi_common<0x2A, X86SintToFp, X86SintToFpRnd,
7157                                 WriteCvtI2SD, GR64,
7158                                 v2f64x_info, i64mem, loadi64, "cvtsi2sd", "q">,
7159                                 XD, VEX_W, EVEX_CD8<64, CD8VT1>;
7160
7161def : InstAlias<"vcvtsi2ss\t{$src, $src1, $dst|$dst, $src1, $src}",
7162              (VCVTSI2SSZrm_Int VR128X:$dst, VR128X:$src1, i32mem:$src), 0, "att">;
7163def : InstAlias<"vcvtsi2sd\t{$src, $src1, $dst|$dst, $src1, $src}",
7164              (VCVTSI2SDZrm_Int VR128X:$dst, VR128X:$src1, i32mem:$src), 0, "att">;
7165
7166def : Pat<(f32 (any_sint_to_fp (loadi32 addr:$src))),
7167          (VCVTSI2SSZrm (f32 (IMPLICIT_DEF)), addr:$src)>;
7168def : Pat<(f32 (any_sint_to_fp (loadi64 addr:$src))),
7169          (VCVTSI642SSZrm (f32 (IMPLICIT_DEF)), addr:$src)>;
7170def : Pat<(f64 (any_sint_to_fp (loadi32 addr:$src))),
7171          (VCVTSI2SDZrm (f64 (IMPLICIT_DEF)), addr:$src)>;
7172def : Pat<(f64 (any_sint_to_fp (loadi64 addr:$src))),
7173          (VCVTSI642SDZrm (f64 (IMPLICIT_DEF)), addr:$src)>;
7174
7175def : Pat<(f32 (any_sint_to_fp GR32:$src)),
7176          (VCVTSI2SSZrr (f32 (IMPLICIT_DEF)), GR32:$src)>;
7177def : Pat<(f32 (any_sint_to_fp GR64:$src)),
7178          (VCVTSI642SSZrr (f32 (IMPLICIT_DEF)), GR64:$src)>;
7179def : Pat<(f64 (any_sint_to_fp GR32:$src)),
7180          (VCVTSI2SDZrr (f64 (IMPLICIT_DEF)), GR32:$src)>;
7181def : Pat<(f64 (any_sint_to_fp GR64:$src)),
7182          (VCVTSI642SDZrr (f64 (IMPLICIT_DEF)), GR64:$src)>;
7183
7184defm VCVTUSI2SSZ   : avx512_vcvtsi_common<0x7B, X86UintToFp, X86UintToFpRnd,
7185                                  WriteCvtI2SS, GR32,
7186                                  v4f32x_info, i32mem, loadi32,
7187                                  "cvtusi2ss", "l">, XS, EVEX_CD8<32, CD8VT1>;
7188defm VCVTUSI642SSZ : avx512_vcvtsi_common<0x7B, X86UintToFp, X86UintToFpRnd,
7189                                  WriteCvtI2SS, GR64,
7190                                  v4f32x_info, i64mem, loadi64, "cvtusi2ss", "q">,
7191                                  XS, VEX_W, EVEX_CD8<64, CD8VT1>;
7192defm VCVTUSI2SDZ   : avx512_vcvtsi<0x7B, null_frag, WriteCvtI2SD, GR32, v2f64x_info,
7193                                  i32mem, loadi32, "cvtusi2sd", "l", [], 0>,
7194                                  XD, VEX_LIG, EVEX_CD8<32, CD8VT1>;
7195defm VCVTUSI642SDZ : avx512_vcvtsi_common<0x7B, X86UintToFp, X86UintToFpRnd,
7196                                  WriteCvtI2SD, GR64,
7197                                  v2f64x_info, i64mem, loadi64, "cvtusi2sd", "q">,
7198                                  XD, VEX_W, EVEX_CD8<64, CD8VT1>;
7199
7200def : InstAlias<"vcvtusi2ss\t{$src, $src1, $dst|$dst, $src1, $src}",
7201              (VCVTUSI2SSZrm_Int VR128X:$dst, VR128X:$src1, i32mem:$src), 0, "att">;
7202def : InstAlias<"vcvtusi2sd\t{$src, $src1, $dst|$dst, $src1, $src}",
7203              (VCVTUSI2SDZrm_Int VR128X:$dst, VR128X:$src1, i32mem:$src), 0, "att">;
7204
7205def : Pat<(f32 (any_uint_to_fp (loadi32 addr:$src))),
7206          (VCVTUSI2SSZrm (f32 (IMPLICIT_DEF)), addr:$src)>;
7207def : Pat<(f32 (any_uint_to_fp (loadi64 addr:$src))),
7208          (VCVTUSI642SSZrm (f32 (IMPLICIT_DEF)), addr:$src)>;
7209def : Pat<(f64 (any_uint_to_fp (loadi32 addr:$src))),
7210          (VCVTUSI2SDZrm (f64 (IMPLICIT_DEF)), addr:$src)>;
7211def : Pat<(f64 (any_uint_to_fp (loadi64 addr:$src))),
7212          (VCVTUSI642SDZrm (f64 (IMPLICIT_DEF)), addr:$src)>;
7213
7214def : Pat<(f32 (any_uint_to_fp GR32:$src)),
7215          (VCVTUSI2SSZrr (f32 (IMPLICIT_DEF)), GR32:$src)>;
7216def : Pat<(f32 (any_uint_to_fp GR64:$src)),
7217          (VCVTUSI642SSZrr (f32 (IMPLICIT_DEF)), GR64:$src)>;
7218def : Pat<(f64 (any_uint_to_fp GR32:$src)),
7219          (VCVTUSI2SDZrr (f64 (IMPLICIT_DEF)), GR32:$src)>;
7220def : Pat<(f64 (any_uint_to_fp GR64:$src)),
7221          (VCVTUSI642SDZrr (f64 (IMPLICIT_DEF)), GR64:$src)>;
7222}
7223
7224//===----------------------------------------------------------------------===//
7225// AVX-512  Scalar convert from float/double to integer
7226//===----------------------------------------------------------------------===//
7227
7228multiclass avx512_cvt_s_int_round<bits<8> opc, X86VectorVTInfo SrcVT,
7229                                  X86VectorVTInfo DstVT, SDNode OpNode,
7230                                  SDNode OpNodeRnd,
7231                                  X86FoldableSchedWrite sched, string asm,
7232                                  string aliasStr> {
7233  let Predicates = [HasAVX512], ExeDomain = SrcVT.ExeDomain in {
7234    def rr_Int : SI<opc, MRMSrcReg, (outs DstVT.RC:$dst), (ins SrcVT.RC:$src),
7235                !strconcat(asm,"\t{$src, $dst|$dst, $src}"),
7236                [(set DstVT.RC:$dst, (OpNode (SrcVT.VT SrcVT.RC:$src)))]>,
7237                EVEX, VEX_LIG, Sched<[sched]>, SIMD_EXC;
7238    let Uses = [MXCSR] in
7239    def rrb_Int : SI<opc, MRMSrcReg, (outs DstVT.RC:$dst), (ins SrcVT.RC:$src, AVX512RC:$rc),
7240                 !strconcat(asm,"\t{$rc, $src, $dst|$dst, $src, $rc}"),
7241                 [(set DstVT.RC:$dst, (OpNodeRnd (SrcVT.VT SrcVT.RC:$src),(i32 timm:$rc)))]>,
7242                 EVEX, VEX_LIG, EVEX_B, EVEX_RC,
7243                 Sched<[sched]>;
7244    def rm_Int : SI<opc, MRMSrcMem, (outs DstVT.RC:$dst), (ins SrcVT.IntScalarMemOp:$src),
7245                !strconcat(asm,"\t{$src, $dst|$dst, $src}"),
7246                [(set DstVT.RC:$dst, (OpNode
7247                      (SrcVT.ScalarIntMemFrags addr:$src)))]>,
7248                EVEX, VEX_LIG, Sched<[sched.Folded, sched.ReadAfterFold]>, SIMD_EXC;
7249  } // Predicates = [HasAVX512]
7250
7251  def : InstAlias<"v" # asm # aliasStr # "\t{$src, $dst|$dst, $src}",
7252          (!cast<Instruction>(NAME # "rr_Int") DstVT.RC:$dst, SrcVT.RC:$src), 0, "att">;
7253  def : InstAlias<"v" # asm # aliasStr # "\t{$rc, $src, $dst|$dst, $src, $rc}",
7254          (!cast<Instruction>(NAME # "rrb_Int") DstVT.RC:$dst, SrcVT.RC:$src, AVX512RC:$rc), 0, "att">;
7255  def : InstAlias<"v" # asm # aliasStr # "\t{$src, $dst|$dst, $src}",
7256          (!cast<Instruction>(NAME # "rm_Int") DstVT.RC:$dst,
7257                                          SrcVT.IntScalarMemOp:$src), 0, "att">;
7258}
7259
7260// Convert float/double to signed/unsigned int 32/64
7261defm VCVTSS2SIZ: avx512_cvt_s_int_round<0x2D, f32x_info, i32x_info,X86cvts2si,
7262                                   X86cvts2siRnd, WriteCvtSS2I, "cvtss2si", "{l}">,
7263                                   XS, EVEX_CD8<32, CD8VT1>;
7264defm VCVTSS2SI64Z: avx512_cvt_s_int_round<0x2D, f32x_info, i64x_info, X86cvts2si,
7265                                   X86cvts2siRnd, WriteCvtSS2I, "cvtss2si", "{q}">,
7266                                   XS, VEX_W, EVEX_CD8<32, CD8VT1>;
7267defm VCVTSS2USIZ: avx512_cvt_s_int_round<0x79, f32x_info, i32x_info, X86cvts2usi,
7268                                   X86cvts2usiRnd, WriteCvtSS2I, "cvtss2usi", "{l}">,
7269                                   XS, EVEX_CD8<32, CD8VT1>;
7270defm VCVTSS2USI64Z: avx512_cvt_s_int_round<0x79, f32x_info, i64x_info, X86cvts2usi,
7271                                   X86cvts2usiRnd, WriteCvtSS2I, "cvtss2usi", "{q}">,
7272                                   XS, VEX_W, EVEX_CD8<32, CD8VT1>;
7273defm VCVTSD2SIZ: avx512_cvt_s_int_round<0x2D, f64x_info, i32x_info, X86cvts2si,
7274                                   X86cvts2siRnd, WriteCvtSD2I, "cvtsd2si", "{l}">,
7275                                   XD, EVEX_CD8<64, CD8VT1>;
7276defm VCVTSD2SI64Z: avx512_cvt_s_int_round<0x2D, f64x_info, i64x_info, X86cvts2si,
7277                                   X86cvts2siRnd, WriteCvtSD2I, "cvtsd2si", "{q}">,
7278                                   XD, VEX_W, EVEX_CD8<64, CD8VT1>;
7279defm VCVTSD2USIZ:   avx512_cvt_s_int_round<0x79, f64x_info, i32x_info, X86cvts2usi,
7280                                   X86cvts2usiRnd, WriteCvtSD2I, "cvtsd2usi", "{l}">,
7281                                   XD, EVEX_CD8<64, CD8VT1>;
7282defm VCVTSD2USI64Z: avx512_cvt_s_int_round<0x79, f64x_info, i64x_info, X86cvts2usi,
7283                                   X86cvts2usiRnd, WriteCvtSD2I, "cvtsd2usi", "{q}">,
7284                                   XD, VEX_W, EVEX_CD8<64, CD8VT1>;
7285
7286multiclass avx512_cvt_s<bits<8> opc, string asm, X86VectorVTInfo SrcVT,
7287                        X86VectorVTInfo DstVT, SDNode OpNode,
7288                        X86FoldableSchedWrite sched,
7289                        string aliasStr> {
7290  let Predicates = [HasAVX512], ExeDomain = SrcVT.ExeDomain in {
7291    let isCodeGenOnly = 1 in {
7292    def rr : AVX512<opc, MRMSrcReg, (outs DstVT.RC:$dst), (ins SrcVT.FRC:$src),
7293                !strconcat(asm,"\t{$src, $dst|$dst, $src}"),
7294                [(set DstVT.RC:$dst, (OpNode SrcVT.FRC:$src))]>,
7295                EVEX, VEX_LIG, Sched<[sched]>, SIMD_EXC;
7296    def rm : AVX512<opc, MRMSrcMem, (outs DstVT.RC:$dst), (ins SrcVT.ScalarMemOp:$src),
7297                !strconcat(asm,"\t{$src, $dst|$dst, $src}"),
7298                [(set DstVT.RC:$dst, (OpNode (SrcVT.ScalarLdFrag addr:$src)))]>,
7299                EVEX, VEX_LIG, Sched<[sched.Folded, sched.ReadAfterFold]>, SIMD_EXC;
7300    }
7301  } // Predicates = [HasAVX512]
7302}
7303
7304defm VCVTSS2SIZ: avx512_cvt_s<0x2D, "vcvtss2si", f32x_info, i32x_info,
7305                       lrint, WriteCvtSS2I,
7306                       "{l}">, XS, EVEX_CD8<32, CD8VT1>;
7307defm VCVTSS2SI64Z: avx512_cvt_s<0x2D, "vcvtss2si", f32x_info, i64x_info,
7308                       llrint, WriteCvtSS2I,
7309                       "{q}">, VEX_W, XS, EVEX_CD8<32, CD8VT1>;
7310defm VCVTSD2SIZ: avx512_cvt_s<0x2D, "vcvtsd2si", f64x_info, i32x_info,
7311                       lrint, WriteCvtSD2I,
7312                       "{l}">, XD, EVEX_CD8<64, CD8VT1>;
7313defm VCVTSD2SI64Z: avx512_cvt_s<0x2D, "vcvtsd2si", f64x_info, i64x_info,
7314                       llrint, WriteCvtSD2I,
7315                       "{q}">, VEX_W, XD, EVEX_CD8<64, CD8VT1>;
7316
7317let Predicates = [HasAVX512] in {
7318  def : Pat<(i64 (lrint FR32:$src)), (VCVTSS2SI64Zrr FR32:$src)>;
7319  def : Pat<(i64 (lrint (loadf32 addr:$src))), (VCVTSS2SI64Zrm addr:$src)>;
7320
7321  def : Pat<(i64 (lrint FR64:$src)), (VCVTSD2SI64Zrr FR64:$src)>;
7322  def : Pat<(i64 (lrint (loadf64 addr:$src))), (VCVTSD2SI64Zrm addr:$src)>;
7323}
7324
7325// Patterns used for matching vcvtsi2s{s,d} intrinsic sequences from clang
7326// which produce unnecessary vmovs{s,d} instructions
7327let Predicates = [HasAVX512] in {
7328def : Pat<(v4f32 (X86Movss
7329                   (v4f32 VR128X:$dst),
7330                   (v4f32 (scalar_to_vector (f32 (any_sint_to_fp GR64:$src)))))),
7331          (VCVTSI642SSZrr_Int VR128X:$dst, GR64:$src)>;
7332
7333def : Pat<(v4f32 (X86Movss
7334                   (v4f32 VR128X:$dst),
7335                   (v4f32 (scalar_to_vector (f32 (any_sint_to_fp (loadi64 addr:$src))))))),
7336          (VCVTSI642SSZrm_Int VR128X:$dst, addr:$src)>;
7337
7338def : Pat<(v4f32 (X86Movss
7339                   (v4f32 VR128X:$dst),
7340                   (v4f32 (scalar_to_vector (f32 (any_sint_to_fp GR32:$src)))))),
7341          (VCVTSI2SSZrr_Int VR128X:$dst, GR32:$src)>;
7342
7343def : Pat<(v4f32 (X86Movss
7344                   (v4f32 VR128X:$dst),
7345                   (v4f32 (scalar_to_vector (f32 (any_sint_to_fp (loadi32 addr:$src))))))),
7346          (VCVTSI2SSZrm_Int VR128X:$dst, addr:$src)>;
7347
7348def : Pat<(v2f64 (X86Movsd
7349                   (v2f64 VR128X:$dst),
7350                   (v2f64 (scalar_to_vector (f64 (any_sint_to_fp GR64:$src)))))),
7351          (VCVTSI642SDZrr_Int VR128X:$dst, GR64:$src)>;
7352
7353def : Pat<(v2f64 (X86Movsd
7354                   (v2f64 VR128X:$dst),
7355                   (v2f64 (scalar_to_vector (f64 (any_sint_to_fp (loadi64 addr:$src))))))),
7356          (VCVTSI642SDZrm_Int VR128X:$dst, addr:$src)>;
7357
7358def : Pat<(v2f64 (X86Movsd
7359                   (v2f64 VR128X:$dst),
7360                   (v2f64 (scalar_to_vector (f64 (any_sint_to_fp GR32:$src)))))),
7361          (VCVTSI2SDZrr_Int VR128X:$dst, GR32:$src)>;
7362
7363def : Pat<(v2f64 (X86Movsd
7364                   (v2f64 VR128X:$dst),
7365                   (v2f64 (scalar_to_vector (f64 (any_sint_to_fp (loadi32 addr:$src))))))),
7366          (VCVTSI2SDZrm_Int VR128X:$dst, addr:$src)>;
7367
7368def : Pat<(v4f32 (X86Movss
7369                   (v4f32 VR128X:$dst),
7370                   (v4f32 (scalar_to_vector (f32 (any_uint_to_fp GR64:$src)))))),
7371          (VCVTUSI642SSZrr_Int VR128X:$dst, GR64:$src)>;
7372
7373def : Pat<(v4f32 (X86Movss
7374                   (v4f32 VR128X:$dst),
7375                   (v4f32 (scalar_to_vector (f32 (any_uint_to_fp (loadi64 addr:$src))))))),
7376          (VCVTUSI642SSZrm_Int VR128X:$dst, addr:$src)>;
7377
7378def : Pat<(v4f32 (X86Movss
7379                   (v4f32 VR128X:$dst),
7380                   (v4f32 (scalar_to_vector (f32 (any_uint_to_fp GR32:$src)))))),
7381          (VCVTUSI2SSZrr_Int VR128X:$dst, GR32:$src)>;
7382
7383def : Pat<(v4f32 (X86Movss
7384                   (v4f32 VR128X:$dst),
7385                   (v4f32 (scalar_to_vector (f32 (any_uint_to_fp (loadi32 addr:$src))))))),
7386          (VCVTUSI2SSZrm_Int VR128X:$dst, addr:$src)>;
7387
7388def : Pat<(v2f64 (X86Movsd
7389                   (v2f64 VR128X:$dst),
7390                   (v2f64 (scalar_to_vector (f64 (any_uint_to_fp GR64:$src)))))),
7391          (VCVTUSI642SDZrr_Int VR128X:$dst, GR64:$src)>;
7392
7393def : Pat<(v2f64 (X86Movsd
7394                   (v2f64 VR128X:$dst),
7395                   (v2f64 (scalar_to_vector (f64 (any_uint_to_fp (loadi64 addr:$src))))))),
7396          (VCVTUSI642SDZrm_Int VR128X:$dst, addr:$src)>;
7397
7398def : Pat<(v2f64 (X86Movsd
7399                   (v2f64 VR128X:$dst),
7400                   (v2f64 (scalar_to_vector (f64 (any_uint_to_fp GR32:$src)))))),
7401          (VCVTUSI2SDZrr_Int VR128X:$dst, GR32:$src)>;
7402
7403def : Pat<(v2f64 (X86Movsd
7404                   (v2f64 VR128X:$dst),
7405                   (v2f64 (scalar_to_vector (f64 (any_uint_to_fp (loadi32 addr:$src))))))),
7406          (VCVTUSI2SDZrm_Int VR128X:$dst, addr:$src)>;
7407} // Predicates = [HasAVX512]
7408
7409// Convert float/double to signed/unsigned int 32/64 with truncation
7410multiclass avx512_cvt_s_all<bits<8> opc, string asm, X86VectorVTInfo _SrcRC,
7411                            X86VectorVTInfo _DstRC, SDNode OpNode,
7412                            SDNode OpNodeInt, SDNode OpNodeSAE,
7413                            X86FoldableSchedWrite sched, string aliasStr>{
7414let Predicates = [HasAVX512], ExeDomain = _SrcRC.ExeDomain in {
7415  let isCodeGenOnly = 1 in {
7416  def rr : AVX512<opc, MRMSrcReg, (outs _DstRC.RC:$dst), (ins _SrcRC.FRC:$src),
7417              !strconcat(asm,"\t{$src, $dst|$dst, $src}"),
7418              [(set _DstRC.RC:$dst, (OpNode _SrcRC.FRC:$src))]>,
7419              EVEX, VEX_LIG, Sched<[sched]>, SIMD_EXC;
7420  def rm : AVX512<opc, MRMSrcMem, (outs _DstRC.RC:$dst), (ins _SrcRC.ScalarMemOp:$src),
7421              !strconcat(asm,"\t{$src, $dst|$dst, $src}"),
7422              [(set _DstRC.RC:$dst, (OpNode (_SrcRC.ScalarLdFrag addr:$src)))]>,
7423              EVEX, VEX_LIG, Sched<[sched.Folded, sched.ReadAfterFold]>, SIMD_EXC;
7424  }
7425
7426  def rr_Int : AVX512<opc, MRMSrcReg, (outs _DstRC.RC:$dst), (ins _SrcRC.RC:$src),
7427            !strconcat(asm,"\t{$src, $dst|$dst, $src}"),
7428           [(set _DstRC.RC:$dst, (OpNodeInt (_SrcRC.VT _SrcRC.RC:$src)))]>,
7429           EVEX, VEX_LIG, Sched<[sched]>, SIMD_EXC;
7430  let Uses = [MXCSR] in
7431  def rrb_Int : AVX512<opc, MRMSrcReg, (outs _DstRC.RC:$dst), (ins _SrcRC.RC:$src),
7432            !strconcat(asm,"\t{{sae}, $src, $dst|$dst, $src, {sae}}"),
7433            [(set _DstRC.RC:$dst, (OpNodeSAE (_SrcRC.VT _SrcRC.RC:$src)))]>,
7434                                  EVEX, VEX_LIG, EVEX_B, Sched<[sched]>;
7435  def rm_Int : AVX512<opc, MRMSrcMem, (outs _DstRC.RC:$dst),
7436              (ins _SrcRC.IntScalarMemOp:$src),
7437              !strconcat(asm,"\t{$src, $dst|$dst, $src}"),
7438              [(set _DstRC.RC:$dst,
7439                (OpNodeInt (_SrcRC.ScalarIntMemFrags addr:$src)))]>,
7440              EVEX, VEX_LIG, Sched<[sched.Folded, sched.ReadAfterFold]>, SIMD_EXC;
7441} //HasAVX512
7442
7443  def : InstAlias<asm # aliasStr # "\t{$src, $dst|$dst, $src}",
7444          (!cast<Instruction>(NAME # "rr_Int") _DstRC.RC:$dst, _SrcRC.RC:$src), 0, "att">;
7445  def : InstAlias<asm # aliasStr # "\t{{sae}, $src, $dst|$dst, $src, {sae}}",
7446          (!cast<Instruction>(NAME # "rrb_Int") _DstRC.RC:$dst, _SrcRC.RC:$src), 0, "att">;
7447  def : InstAlias<asm # aliasStr # "\t{$src, $dst|$dst, $src}",
7448          (!cast<Instruction>(NAME # "rm_Int") _DstRC.RC:$dst,
7449                                          _SrcRC.IntScalarMemOp:$src), 0, "att">;
7450}
7451
7452defm VCVTTSS2SIZ: avx512_cvt_s_all<0x2C, "vcvttss2si", f32x_info, i32x_info,
7453                        any_fp_to_sint, X86cvtts2Int, X86cvtts2IntSAE, WriteCvtSS2I,
7454                        "{l}">, XS, EVEX_CD8<32, CD8VT1>;
7455defm VCVTTSS2SI64Z: avx512_cvt_s_all<0x2C, "vcvttss2si", f32x_info, i64x_info,
7456                        any_fp_to_sint, X86cvtts2Int, X86cvtts2IntSAE, WriteCvtSS2I,
7457                        "{q}">, VEX_W, XS, EVEX_CD8<32, CD8VT1>;
7458defm VCVTTSD2SIZ: avx512_cvt_s_all<0x2C, "vcvttsd2si", f64x_info, i32x_info,
7459                        any_fp_to_sint, X86cvtts2Int, X86cvtts2IntSAE, WriteCvtSD2I,
7460                        "{l}">, XD, EVEX_CD8<64, CD8VT1>;
7461defm VCVTTSD2SI64Z: avx512_cvt_s_all<0x2C, "vcvttsd2si", f64x_info, i64x_info,
7462                        any_fp_to_sint, X86cvtts2Int, X86cvtts2IntSAE, WriteCvtSD2I,
7463                        "{q}">, VEX_W, XD, EVEX_CD8<64, CD8VT1>;
7464
7465defm VCVTTSS2USIZ: avx512_cvt_s_all<0x78, "vcvttss2usi", f32x_info, i32x_info,
7466                        any_fp_to_uint, X86cvtts2UInt, X86cvtts2UIntSAE, WriteCvtSS2I,
7467                        "{l}">, XS, EVEX_CD8<32, CD8VT1>;
7468defm VCVTTSS2USI64Z: avx512_cvt_s_all<0x78, "vcvttss2usi", f32x_info, i64x_info,
7469                        any_fp_to_uint, X86cvtts2UInt, X86cvtts2UIntSAE, WriteCvtSS2I,
7470                        "{q}">, XS,VEX_W, EVEX_CD8<32, CD8VT1>;
7471defm VCVTTSD2USIZ: avx512_cvt_s_all<0x78, "vcvttsd2usi", f64x_info, i32x_info,
7472                        any_fp_to_uint, X86cvtts2UInt, X86cvtts2UIntSAE, WriteCvtSD2I,
7473                        "{l}">, XD, EVEX_CD8<64, CD8VT1>;
7474defm VCVTTSD2USI64Z: avx512_cvt_s_all<0x78, "vcvttsd2usi", f64x_info, i64x_info,
7475                        any_fp_to_uint, X86cvtts2UInt, X86cvtts2UIntSAE, WriteCvtSD2I,
7476                        "{q}">, XD, VEX_W, EVEX_CD8<64, CD8VT1>;
7477
7478//===----------------------------------------------------------------------===//
7479// AVX-512  Convert form float to double and back
7480//===----------------------------------------------------------------------===//
7481
7482let Uses = [MXCSR], mayRaiseFPException = 1 in
7483multiclass avx512_cvt_fp_scalar<bits<8> opc, string OpcodeStr, X86VectorVTInfo _,
7484                                X86VectorVTInfo _Src, SDNode OpNode,
7485                                X86FoldableSchedWrite sched> {
7486  defm rr_Int : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
7487                         (ins _.RC:$src1, _Src.RC:$src2), OpcodeStr,
7488                         "$src2, $src1", "$src1, $src2",
7489                         (_.VT (OpNode (_.VT _.RC:$src1),
7490                                       (_Src.VT _Src.RC:$src2)))>,
7491                         EVEX_4V, VEX_LIG, Sched<[sched]>;
7492  defm rm_Int : AVX512_maskable_scalar<opc, MRMSrcMem, _, (outs _.RC:$dst),
7493                         (ins _.RC:$src1, _Src.IntScalarMemOp:$src2), OpcodeStr,
7494                         "$src2, $src1", "$src1, $src2",
7495                         (_.VT (OpNode (_.VT _.RC:$src1),
7496                                  (_Src.ScalarIntMemFrags addr:$src2)))>,
7497                         EVEX_4V, VEX_LIG,
7498                         Sched<[sched.Folded, sched.ReadAfterFold]>;
7499
7500  let isCodeGenOnly = 1, hasSideEffects = 0 in {
7501    def rr : I<opc, MRMSrcReg, (outs _.FRC:$dst),
7502               (ins _.FRC:$src1, _Src.FRC:$src2),
7503               OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}", []>,
7504               EVEX_4V, VEX_LIG, Sched<[sched]>;
7505    let mayLoad = 1 in
7506    def rm : I<opc, MRMSrcMem, (outs _.FRC:$dst),
7507               (ins _.FRC:$src1, _Src.ScalarMemOp:$src2),
7508               OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}", []>,
7509               EVEX_4V, VEX_LIG, Sched<[sched.Folded, sched.ReadAfterFold]>;
7510  }
7511}
7512
7513// Scalar Conversion with SAE - suppress all exceptions
7514multiclass avx512_cvt_fp_sae_scalar<bits<8> opc, string OpcodeStr, X86VectorVTInfo _,
7515                                    X86VectorVTInfo _Src, SDNode OpNodeSAE,
7516                                    X86FoldableSchedWrite sched> {
7517  let Uses = [MXCSR] in
7518  defm rrb_Int : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
7519                        (ins _.RC:$src1, _Src.RC:$src2), OpcodeStr,
7520                        "{sae}, $src2, $src1", "$src1, $src2, {sae}",
7521                        (_.VT (OpNodeSAE (_.VT _.RC:$src1),
7522                                         (_Src.VT _Src.RC:$src2)))>,
7523                        EVEX_4V, VEX_LIG, EVEX_B, Sched<[sched]>;
7524}
7525
7526// Scalar Conversion with rounding control (RC)
7527multiclass avx512_cvt_fp_rc_scalar<bits<8> opc, string OpcodeStr, X86VectorVTInfo _,
7528                                   X86VectorVTInfo _Src, SDNode OpNodeRnd,
7529                                   X86FoldableSchedWrite sched> {
7530  let Uses = [MXCSR] in
7531  defm rrb_Int : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
7532                        (ins _.RC:$src1, _Src.RC:$src2, AVX512RC:$rc), OpcodeStr,
7533                        "$rc, $src2, $src1", "$src1, $src2, $rc",
7534                        (_.VT (OpNodeRnd (_.VT _.RC:$src1),
7535                                         (_Src.VT _Src.RC:$src2), (i32 timm:$rc)))>,
7536                        EVEX_4V, VEX_LIG, Sched<[sched]>,
7537                        EVEX_B, EVEX_RC;
7538}
7539multiclass avx512_cvt_fp_scalar_sd2ss<bits<8> opc, string OpcodeStr,
7540                                      SDNode OpNode, SDNode OpNodeRnd,
7541                                      X86FoldableSchedWrite sched,
7542                                      X86VectorVTInfo _src, X86VectorVTInfo _dst> {
7543  let Predicates = [HasAVX512], ExeDomain = SSEPackedSingle in {
7544    defm Z : avx512_cvt_fp_scalar<opc, OpcodeStr, _dst, _src, OpNode, sched>,
7545             avx512_cvt_fp_rc_scalar<opc, OpcodeStr, _dst, _src,
7546                               OpNodeRnd, sched>, VEX_W, EVEX_CD8<64, CD8VT1>, XD;
7547  }
7548}
7549
7550multiclass avx512_cvt_fp_scalar_ss2sd<bits<8> opc, string OpcodeStr,
7551                                      SDNode OpNode, SDNode OpNodeSAE,
7552                                      X86FoldableSchedWrite sched,
7553                                      X86VectorVTInfo _src, X86VectorVTInfo _dst> {
7554  let Predicates = [HasAVX512], ExeDomain = SSEPackedSingle in {
7555    defm Z : avx512_cvt_fp_scalar<opc, OpcodeStr, _dst, _src, OpNode, sched>,
7556             avx512_cvt_fp_sae_scalar<opc, OpcodeStr, _dst, _src, OpNodeSAE, sched>,
7557             EVEX_CD8<32, CD8VT1>, XS;
7558  }
7559}
7560defm VCVTSD2SS : avx512_cvt_fp_scalar_sd2ss<0x5A, "vcvtsd2ss", X86frounds,
7561                                         X86froundsRnd, WriteCvtSD2SS, f64x_info,
7562                                         f32x_info>;
7563defm VCVTSS2SD : avx512_cvt_fp_scalar_ss2sd<0x5A, "vcvtss2sd", X86fpexts,
7564                                          X86fpextsSAE, WriteCvtSS2SD, f32x_info,
7565                                          f64x_info>;
7566
7567def : Pat<(f64 (any_fpextend FR32X:$src)),
7568          (VCVTSS2SDZrr (f64 (IMPLICIT_DEF)), FR32X:$src)>,
7569          Requires<[HasAVX512]>;
7570def : Pat<(f64 (any_fpextend (loadf32 addr:$src))),
7571          (VCVTSS2SDZrm (f64 (IMPLICIT_DEF)), addr:$src)>,
7572          Requires<[HasAVX512, OptForSize]>;
7573
7574def : Pat<(f32 (any_fpround FR64X:$src)),
7575          (VCVTSD2SSZrr (f32 (IMPLICIT_DEF)), FR64X:$src)>,
7576           Requires<[HasAVX512]>;
7577
7578def : Pat<(v4f32 (X86Movss
7579                   (v4f32 VR128X:$dst),
7580                   (v4f32 (scalar_to_vector
7581                     (f32 (any_fpround (f64 (extractelt VR128X:$src, (iPTR 0))))))))),
7582          (VCVTSD2SSZrr_Int VR128X:$dst, VR128X:$src)>,
7583          Requires<[HasAVX512]>;
7584
7585def : Pat<(v2f64 (X86Movsd
7586                   (v2f64 VR128X:$dst),
7587                   (v2f64 (scalar_to_vector
7588                     (f64 (any_fpextend (f32 (extractelt VR128X:$src, (iPTR 0))))))))),
7589          (VCVTSS2SDZrr_Int VR128X:$dst, VR128X:$src)>,
7590          Requires<[HasAVX512]>;
7591
7592//===----------------------------------------------------------------------===//
7593// AVX-512  Vector convert from signed/unsigned integer to float/double
7594//          and from float/double to signed/unsigned integer
7595//===----------------------------------------------------------------------===//
7596
7597multiclass avx512_vcvt_fp<bits<8> opc, string OpcodeStr, X86VectorVTInfo _,
7598                          X86VectorVTInfo _Src, SDNode OpNode, SDNode MaskOpNode,
7599                          X86FoldableSchedWrite sched,
7600                          string Broadcast = _.BroadcastStr,
7601                          string Alias = "", X86MemOperand MemOp = _Src.MemOp,
7602                          RegisterClass MaskRC = _.KRCWM,
7603                          dag LdDAG = (_.VT (OpNode (_Src.VT (_Src.LdFrag addr:$src)))),
7604                          dag MaskLdDAG = (_.VT (MaskOpNode (_Src.VT (_Src.LdFrag addr:$src))))> {
7605let Uses = [MXCSR], mayRaiseFPException = 1 in {
7606  defm rr : AVX512_maskable_cvt<opc, MRMSrcReg, _, (outs _.RC:$dst),
7607                         (ins _Src.RC:$src),
7608                         (ins _.RC:$src0, MaskRC:$mask, _Src.RC:$src),
7609                         (ins MaskRC:$mask, _Src.RC:$src),
7610                          OpcodeStr, "$src", "$src",
7611                         (_.VT (OpNode (_Src.VT _Src.RC:$src))),
7612                         (vselect_mask MaskRC:$mask,
7613                                       (_.VT (MaskOpNode (_Src.VT _Src.RC:$src))),
7614                                       _.RC:$src0),
7615                         (vselect_mask MaskRC:$mask,
7616                                       (_.VT (MaskOpNode (_Src.VT _Src.RC:$src))),
7617                                       _.ImmAllZerosV)>,
7618                         EVEX, Sched<[sched]>;
7619
7620  defm rm : AVX512_maskable_cvt<opc, MRMSrcMem, _, (outs _.RC:$dst),
7621                         (ins MemOp:$src),
7622                         (ins _.RC:$src0, MaskRC:$mask, MemOp:$src),
7623                         (ins MaskRC:$mask, MemOp:$src),
7624                         OpcodeStr#Alias, "$src", "$src",
7625                         LdDAG,
7626                         (vselect_mask MaskRC:$mask, MaskLdDAG, _.RC:$src0),
7627                         (vselect_mask MaskRC:$mask, MaskLdDAG, _.ImmAllZerosV)>,
7628                         EVEX, Sched<[sched.Folded]>;
7629
7630  defm rmb : AVX512_maskable_cvt<opc, MRMSrcMem, _, (outs _.RC:$dst),
7631                         (ins _Src.ScalarMemOp:$src),
7632                         (ins _.RC:$src0, MaskRC:$mask, _Src.ScalarMemOp:$src),
7633                         (ins MaskRC:$mask, _Src.ScalarMemOp:$src),
7634                         OpcodeStr,
7635                         "${src}"#Broadcast, "${src}"#Broadcast,
7636                         (_.VT (OpNode (_Src.VT
7637                                  (_Src.BroadcastLdFrag addr:$src))
7638                            )),
7639                         (vselect_mask MaskRC:$mask,
7640                                       (_.VT
7641                                        (MaskOpNode
7642                                         (_Src.VT
7643                                          (_Src.BroadcastLdFrag addr:$src)))),
7644                                       _.RC:$src0),
7645                         (vselect_mask MaskRC:$mask,
7646                                       (_.VT
7647                                        (MaskOpNode
7648                                         (_Src.VT
7649                                          (_Src.BroadcastLdFrag addr:$src)))),
7650                                       _.ImmAllZerosV)>,
7651                         EVEX, EVEX_B, Sched<[sched.Folded]>;
7652  }
7653}
7654// Conversion with SAE - suppress all exceptions
7655multiclass avx512_vcvt_fp_sae<bits<8> opc, string OpcodeStr, X86VectorVTInfo _,
7656                              X86VectorVTInfo _Src, SDNode OpNodeSAE,
7657                              X86FoldableSchedWrite sched> {
7658  let Uses = [MXCSR] in
7659  defm rrb : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
7660                        (ins _Src.RC:$src), OpcodeStr,
7661                        "{sae}, $src", "$src, {sae}",
7662                        (_.VT (OpNodeSAE (_Src.VT _Src.RC:$src)))>,
7663                        EVEX, EVEX_B, Sched<[sched]>;
7664}
7665
7666// Conversion with rounding control (RC)
7667multiclass avx512_vcvt_fp_rc<bits<8> opc, string OpcodeStr, X86VectorVTInfo _,
7668                         X86VectorVTInfo _Src, SDNode OpNodeRnd,
7669                         X86FoldableSchedWrite sched> {
7670  let Uses = [MXCSR] in
7671  defm rrb : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
7672                        (ins _Src.RC:$src, AVX512RC:$rc), OpcodeStr,
7673                        "$rc, $src", "$src, $rc",
7674                        (_.VT (OpNodeRnd (_Src.VT _Src.RC:$src), (i32 timm:$rc)))>,
7675                        EVEX, EVEX_B, EVEX_RC, Sched<[sched]>;
7676}
7677
7678// Similar to avx512_vcvt_fp, but uses an extload for the memory form.
7679multiclass avx512_vcvt_fpextend<bits<8> opc, string OpcodeStr, X86VectorVTInfo _,
7680                                X86VectorVTInfo _Src, SDNode OpNode,
7681                                SDNode MaskOpNode,
7682                                X86FoldableSchedWrite sched,
7683                                string Broadcast = _.BroadcastStr,
7684                                string Alias = "", X86MemOperand MemOp = _Src.MemOp,
7685                                RegisterClass MaskRC = _.KRCWM>
7686  : avx512_vcvt_fp<opc, OpcodeStr, _, _Src, OpNode, MaskOpNode, sched, Broadcast,
7687                   Alias, MemOp, MaskRC,
7688                   (_.VT (!cast<PatFrag>("extload"#_Src.VTName) addr:$src)),
7689                   (_.VT (!cast<PatFrag>("extload"#_Src.VTName) addr:$src))>;
7690
7691// Extend Float to Double
7692multiclass avx512_cvtps2pd<bits<8> opc, string OpcodeStr,
7693                           X86SchedWriteWidths sched> {
7694  let Predicates = [HasAVX512] in {
7695    defm Z : avx512_vcvt_fpextend<opc, OpcodeStr, v8f64_info, v8f32x_info,
7696                            any_fpextend, fpextend, sched.ZMM>,
7697             avx512_vcvt_fp_sae<opc, OpcodeStr, v8f64_info, v8f32x_info,
7698                                X86vfpextSAE, sched.ZMM>, EVEX_V512;
7699  }
7700  let Predicates = [HasVLX] in {
7701    defm Z128 : avx512_vcvt_fpextend<opc, OpcodeStr, v2f64x_info, v4f32x_info,
7702                               X86any_vfpext, X86vfpext, sched.XMM, "{1to2}",
7703                               "", f64mem>, EVEX_V128;
7704    defm Z256 : avx512_vcvt_fpextend<opc, OpcodeStr, v4f64x_info, v4f32x_info,
7705                                     any_fpextend, fpextend, sched.YMM>, EVEX_V256;
7706  }
7707}
7708
7709// Truncate Double to Float
7710multiclass avx512_cvtpd2ps<bits<8> opc, string OpcodeStr, X86SchedWriteWidths sched> {
7711  let Predicates = [HasAVX512] in {
7712    defm Z : avx512_vcvt_fp<opc, OpcodeStr, v8f32x_info, v8f64_info,
7713                            X86any_vfpround, X86vfpround, sched.ZMM>,
7714             avx512_vcvt_fp_rc<opc, OpcodeStr, v8f32x_info, v8f64_info,
7715                               X86vfproundRnd, sched.ZMM>, EVEX_V512;
7716  }
7717  let Predicates = [HasVLX] in {
7718    defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v4f32x_info, v2f64x_info,
7719                               null_frag, null_frag, sched.XMM, "{1to2}", "{x}",
7720                               f128mem, VK2WM>, EVEX_V128;
7721    defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v4f32x_info, v4f64x_info,
7722                               X86any_vfpround, X86vfpround,
7723                               sched.YMM, "{1to4}", "{y}">, EVEX_V256;
7724  }
7725
7726  def : InstAlias<OpcodeStr#"x\t{$src, $dst|$dst, $src}",
7727                  (!cast<Instruction>(NAME # "Z128rr") VR128X:$dst, VR128X:$src), 0, "att">;
7728  def : InstAlias<OpcodeStr#"x\t{$src, $dst {${mask}}|$dst {${mask}}, $src}",
7729                  (!cast<Instruction>(NAME # "Z128rrk") VR128X:$dst,
7730                  VK2WM:$mask, VR128X:$src), 0, "att">;
7731  def : InstAlias<OpcodeStr#"x\t{$src, $dst {${mask}} {z}|"
7732                  "$dst {${mask}} {z}, $src}",
7733                  (!cast<Instruction>(NAME # "Z128rrkz") VR128X:$dst,
7734                  VK2WM:$mask, VR128X:$src), 0, "att">;
7735  def : InstAlias<OpcodeStr#"x\t{${src}{1to2}, $dst|$dst, ${src}{1to2}}",
7736                  (!cast<Instruction>(NAME # "Z128rmb") VR128X:$dst, f64mem:$src), 0, "att">;
7737  def : InstAlias<OpcodeStr#"x\t{${src}{1to2}, $dst {${mask}}|"
7738                  "$dst {${mask}}, ${src}{1to2}}",
7739                  (!cast<Instruction>(NAME # "Z128rmbk") VR128X:$dst,
7740                  VK2WM:$mask, f64mem:$src), 0, "att">;
7741  def : InstAlias<OpcodeStr#"x\t{${src}{1to2}, $dst {${mask}} {z}|"
7742                  "$dst {${mask}} {z}, ${src}{1to2}}",
7743                  (!cast<Instruction>(NAME # "Z128rmbkz") VR128X:$dst,
7744                  VK2WM:$mask, f64mem:$src), 0, "att">;
7745
7746  def : InstAlias<OpcodeStr#"y\t{$src, $dst|$dst, $src}",
7747                  (!cast<Instruction>(NAME # "Z256rr") VR128X:$dst, VR256X:$src), 0, "att">;
7748  def : InstAlias<OpcodeStr#"y\t{$src, $dst {${mask}}|$dst {${mask}}, $src}",
7749                  (!cast<Instruction>(NAME # "Z256rrk") VR128X:$dst,
7750                  VK4WM:$mask, VR256X:$src), 0, "att">;
7751  def : InstAlias<OpcodeStr#"y\t{$src, $dst {${mask}} {z}|"
7752                  "$dst {${mask}} {z}, $src}",
7753                  (!cast<Instruction>(NAME # "Z256rrkz") VR128X:$dst,
7754                  VK4WM:$mask, VR256X:$src), 0, "att">;
7755  def : InstAlias<OpcodeStr#"y\t{${src}{1to4}, $dst|$dst, ${src}{1to4}}",
7756                  (!cast<Instruction>(NAME # "Z256rmb") VR128X:$dst, f64mem:$src), 0, "att">;
7757  def : InstAlias<OpcodeStr#"y\t{${src}{1to4}, $dst {${mask}}|"
7758                  "$dst {${mask}}, ${src}{1to4}}",
7759                  (!cast<Instruction>(NAME # "Z256rmbk") VR128X:$dst,
7760                  VK4WM:$mask, f64mem:$src), 0, "att">;
7761  def : InstAlias<OpcodeStr#"y\t{${src}{1to4}, $dst {${mask}} {z}|"
7762                  "$dst {${mask}} {z}, ${src}{1to4}}",
7763                  (!cast<Instruction>(NAME # "Z256rmbkz") VR128X:$dst,
7764                  VK4WM:$mask, f64mem:$src), 0, "att">;
7765}
7766
7767defm VCVTPD2PS : avx512_cvtpd2ps<0x5A, "vcvtpd2ps", SchedWriteCvtPD2PS>,
7768                                  VEX_W, PD, EVEX_CD8<64, CD8VF>;
7769defm VCVTPS2PD : avx512_cvtps2pd<0x5A, "vcvtps2pd", SchedWriteCvtPS2PD>,
7770                                  PS, EVEX_CD8<32, CD8VH>;
7771
7772let Predicates = [HasVLX] in {
7773  // Special patterns to allow use of X86vmfpround for masking. Instruction
7774  // patterns have been disabled with null_frag.
7775  def : Pat<(X86any_vfpround (v2f64 VR128X:$src)),
7776            (VCVTPD2PSZ128rr VR128X:$src)>;
7777  def : Pat<(X86vmfpround (v2f64 VR128X:$src), (v4f32 VR128X:$src0),
7778                          VK2WM:$mask),
7779            (VCVTPD2PSZ128rrk VR128X:$src0, VK2WM:$mask, VR128X:$src)>;
7780  def : Pat<(X86vmfpround (v2f64 VR128X:$src), v4f32x_info.ImmAllZerosV,
7781                          VK2WM:$mask),
7782            (VCVTPD2PSZ128rrkz VK2WM:$mask, VR128X:$src)>;
7783
7784  def : Pat<(X86any_vfpround (loadv2f64 addr:$src)),
7785            (VCVTPD2PSZ128rm addr:$src)>;
7786  def : Pat<(X86vmfpround (loadv2f64 addr:$src), (v4f32 VR128X:$src0),
7787                          VK2WM:$mask),
7788            (VCVTPD2PSZ128rmk VR128X:$src0, VK2WM:$mask, addr:$src)>;
7789  def : Pat<(X86vmfpround (loadv2f64 addr:$src), v4f32x_info.ImmAllZerosV,
7790                          VK2WM:$mask),
7791            (VCVTPD2PSZ128rmkz VK2WM:$mask, addr:$src)>;
7792
7793  def : Pat<(X86any_vfpround (v2f64 (X86VBroadcastld64 addr:$src))),
7794            (VCVTPD2PSZ128rmb addr:$src)>;
7795  def : Pat<(X86vmfpround (v2f64 (X86VBroadcastld64 addr:$src)),
7796                          (v4f32 VR128X:$src0), VK2WM:$mask),
7797            (VCVTPD2PSZ128rmbk VR128X:$src0, VK2WM:$mask, addr:$src)>;
7798  def : Pat<(X86vmfpround (v2f64 (X86VBroadcastld64 addr:$src)),
7799                          v4f32x_info.ImmAllZerosV, VK2WM:$mask),
7800            (VCVTPD2PSZ128rmbkz VK2WM:$mask, addr:$src)>;
7801}
7802
7803// Convert Signed/Unsigned Doubleword to Double
7804let Uses = []<Register>, mayRaiseFPException = 0 in
7805multiclass avx512_cvtdq2pd<bits<8> opc, string OpcodeStr, SDNode OpNode,
7806                           SDNode MaskOpNode, SDNode OpNode128,
7807                           SDNode MaskOpNode128,
7808                           X86SchedWriteWidths sched> {
7809  // No rounding in this op
7810  let Predicates = [HasAVX512] in
7811    defm Z : avx512_vcvt_fp<opc, OpcodeStr, v8f64_info, v8i32x_info, OpNode,
7812                            MaskOpNode, sched.ZMM>, EVEX_V512;
7813
7814  let Predicates = [HasVLX] in {
7815    defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v2f64x_info, v4i32x_info,
7816                               OpNode128, MaskOpNode128, sched.XMM, "{1to2}",
7817                               "", i64mem, VK2WM,
7818                               (v2f64 (OpNode128 (bc_v4i32
7819                                (v2i64
7820                                 (scalar_to_vector (loadi64 addr:$src)))))),
7821                               (v2f64 (MaskOpNode128 (bc_v4i32
7822                                (v2i64
7823                                 (scalar_to_vector (loadi64 addr:$src))))))>,
7824                               EVEX_V128;
7825    defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v4f64x_info, v4i32x_info, OpNode,
7826                               MaskOpNode, sched.YMM>, EVEX_V256;
7827  }
7828}
7829
7830// Convert Signed/Unsigned Doubleword to Float
7831multiclass avx512_cvtdq2ps<bits<8> opc, string OpcodeStr, SDNode OpNode,
7832                           SDNode MaskOpNode, SDNode OpNodeRnd,
7833                           X86SchedWriteWidths sched> {
7834  let Predicates = [HasAVX512] in
7835    defm Z : avx512_vcvt_fp<opc, OpcodeStr, v16f32_info, v16i32_info, OpNode,
7836                            MaskOpNode, sched.ZMM>,
7837             avx512_vcvt_fp_rc<opc, OpcodeStr, v16f32_info, v16i32_info,
7838                               OpNodeRnd, sched.ZMM>, EVEX_V512;
7839
7840  let Predicates = [HasVLX] in {
7841    defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v4f32x_info, v4i32x_info, OpNode,
7842                               MaskOpNode, sched.XMM>, EVEX_V128;
7843    defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v8f32x_info, v8i32x_info, OpNode,
7844                               MaskOpNode, sched.YMM>, EVEX_V256;
7845  }
7846}
7847
7848// Convert Float to Signed/Unsigned Doubleword with truncation
7849multiclass avx512_cvttps2dq<bits<8> opc, string OpcodeStr, SDNode OpNode,
7850                            SDNode MaskOpNode,
7851                            SDNode OpNodeSAE, X86SchedWriteWidths sched> {
7852  let Predicates = [HasAVX512] in {
7853    defm Z : avx512_vcvt_fp<opc, OpcodeStr, v16i32_info, v16f32_info, OpNode,
7854                            MaskOpNode, sched.ZMM>,
7855             avx512_vcvt_fp_sae<opc, OpcodeStr, v16i32_info, v16f32_info,
7856                                OpNodeSAE, sched.ZMM>, EVEX_V512;
7857  }
7858  let Predicates = [HasVLX] in {
7859    defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v4i32x_info, v4f32x_info, OpNode,
7860                               MaskOpNode, sched.XMM>, EVEX_V128;
7861    defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v8i32x_info, v8f32x_info, OpNode,
7862                               MaskOpNode, sched.YMM>, EVEX_V256;
7863  }
7864}
7865
7866// Convert Float to Signed/Unsigned Doubleword
7867multiclass avx512_cvtps2dq<bits<8> opc, string OpcodeStr, SDNode OpNode,
7868                           SDNode MaskOpNode, SDNode OpNodeRnd,
7869                           X86SchedWriteWidths sched> {
7870  let Predicates = [HasAVX512] in {
7871    defm Z : avx512_vcvt_fp<opc, OpcodeStr, v16i32_info, v16f32_info, OpNode,
7872                            MaskOpNode, sched.ZMM>,
7873             avx512_vcvt_fp_rc<opc, OpcodeStr, v16i32_info, v16f32_info,
7874                                OpNodeRnd, sched.ZMM>, EVEX_V512;
7875  }
7876  let Predicates = [HasVLX] in {
7877    defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v4i32x_info, v4f32x_info, OpNode,
7878                               MaskOpNode, sched.XMM>, EVEX_V128;
7879    defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v8i32x_info, v8f32x_info, OpNode,
7880                               MaskOpNode, sched.YMM>, EVEX_V256;
7881  }
7882}
7883
7884// Convert Double to Signed/Unsigned Doubleword with truncation
7885multiclass avx512_cvttpd2dq<bits<8> opc, string OpcodeStr, SDNode OpNode,
7886                            SDNode MaskOpNode, SDNode OpNodeSAE,
7887                            X86SchedWriteWidths sched> {
7888  let Predicates = [HasAVX512] in {
7889    defm Z : avx512_vcvt_fp<opc, OpcodeStr, v8i32x_info, v8f64_info, OpNode,
7890                            MaskOpNode, sched.ZMM>,
7891             avx512_vcvt_fp_sae<opc, OpcodeStr, v8i32x_info, v8f64_info,
7892                                OpNodeSAE, sched.ZMM>, EVEX_V512;
7893  }
7894  let Predicates = [HasVLX] in {
7895    // we need "x"/"y" suffixes in order to distinguish between 128 and 256
7896    // memory forms of these instructions in Asm Parser. They have the same
7897    // dest type - 'v4i32x_info'. We also specify the broadcast string explicitly
7898    // due to the same reason.
7899    defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v4i32x_info, v2f64x_info,
7900                               null_frag, null_frag, sched.XMM, "{1to2}", "{x}", f128mem,
7901                               VK2WM>, EVEX_V128;
7902    defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v4i32x_info, v4f64x_info, OpNode,
7903                               MaskOpNode, sched.YMM, "{1to4}", "{y}">, EVEX_V256;
7904  }
7905
7906  def : InstAlias<OpcodeStr#"x\t{$src, $dst|$dst, $src}",
7907                  (!cast<Instruction>(NAME # "Z128rr") VR128X:$dst,
7908                  VR128X:$src), 0, "att">;
7909  def : InstAlias<OpcodeStr#"x\t{$src, $dst {${mask}}|$dst {${mask}}, $src}",
7910                  (!cast<Instruction>(NAME # "Z128rrk") VR128X:$dst,
7911                  VK2WM:$mask, VR128X:$src), 0, "att">;
7912  def : InstAlias<OpcodeStr#"x\t{$src, $dst {${mask}} {z}|$dst {${mask}} {z}, $src}",
7913                  (!cast<Instruction>(NAME # "Z128rrkz") VR128X:$dst,
7914                  VK2WM:$mask, VR128X:$src), 0, "att">;
7915  def : InstAlias<OpcodeStr#"x\t{${src}{1to2}, $dst|$dst, ${src}{1to2}}",
7916                  (!cast<Instruction>(NAME # "Z128rmb") VR128X:$dst,
7917                  f64mem:$src), 0, "att">;
7918  def : InstAlias<OpcodeStr#"x\t{${src}{1to2}, $dst {${mask}}|"
7919                  "$dst {${mask}}, ${src}{1to2}}",
7920                  (!cast<Instruction>(NAME # "Z128rmbk") VR128X:$dst,
7921                  VK2WM:$mask, f64mem:$src), 0, "att">;
7922  def : InstAlias<OpcodeStr#"x\t{${src}{1to2}, $dst {${mask}} {z}|"
7923                  "$dst {${mask}} {z}, ${src}{1to2}}",
7924                  (!cast<Instruction>(NAME # "Z128rmbkz") VR128X:$dst,
7925                  VK2WM:$mask, f64mem:$src), 0, "att">;
7926
7927  def : InstAlias<OpcodeStr#"y\t{$src, $dst|$dst, $src}",
7928                  (!cast<Instruction>(NAME # "Z256rr") VR128X:$dst,
7929                  VR256X:$src), 0, "att">;
7930  def : InstAlias<OpcodeStr#"y\t{$src, $dst {${mask}}|$dst {${mask}}, $src}",
7931                  (!cast<Instruction>(NAME # "Z256rrk") VR128X:$dst,
7932                  VK4WM:$mask, VR256X:$src), 0, "att">;
7933  def : InstAlias<OpcodeStr#"y\t{$src, $dst {${mask}} {z}|$dst {${mask}} {z}, $src}",
7934                  (!cast<Instruction>(NAME # "Z256rrkz") VR128X:$dst,
7935                  VK4WM:$mask, VR256X:$src), 0, "att">;
7936  def : InstAlias<OpcodeStr#"y\t{${src}{1to4}, $dst|$dst, ${src}{1to4}}",
7937                  (!cast<Instruction>(NAME # "Z256rmb") VR128X:$dst,
7938                  f64mem:$src), 0, "att">;
7939  def : InstAlias<OpcodeStr#"y\t{${src}{1to4}, $dst {${mask}}|"
7940                  "$dst {${mask}}, ${src}{1to4}}",
7941                  (!cast<Instruction>(NAME # "Z256rmbk") VR128X:$dst,
7942                  VK4WM:$mask, f64mem:$src), 0, "att">;
7943  def : InstAlias<OpcodeStr#"y\t{${src}{1to4}, $dst {${mask}} {z}|"
7944                  "$dst {${mask}} {z}, ${src}{1to4}}",
7945                  (!cast<Instruction>(NAME # "Z256rmbkz") VR128X:$dst,
7946                  VK4WM:$mask, f64mem:$src), 0, "att">;
7947}
7948
7949// Convert Double to Signed/Unsigned Doubleword
7950multiclass avx512_cvtpd2dq<bits<8> opc, string OpcodeStr, SDNode OpNode,
7951                           SDNode MaskOpNode, SDNode OpNodeRnd,
7952                           X86SchedWriteWidths sched> {
7953  let Predicates = [HasAVX512] in {
7954    defm Z : avx512_vcvt_fp<opc, OpcodeStr, v8i32x_info, v8f64_info, OpNode,
7955                            MaskOpNode, sched.ZMM>,
7956             avx512_vcvt_fp_rc<opc, OpcodeStr, v8i32x_info, v8f64_info,
7957                               OpNodeRnd, sched.ZMM>, EVEX_V512;
7958  }
7959  let Predicates = [HasVLX] in {
7960    // we need "x"/"y" suffixes in order to distinguish between 128 and 256
7961    // memory forms of these instructions in Asm Parcer. They have the same
7962    // dest type - 'v4i32x_info'. We also specify the broadcast string explicitly
7963    // due to the same reason.
7964    defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v4i32x_info, v2f64x_info,
7965                               null_frag, null_frag, sched.XMM, "{1to2}", "{x}", f128mem,
7966                               VK2WM>, EVEX_V128;
7967    defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v4i32x_info, v4f64x_info, OpNode,
7968                               MaskOpNode, sched.YMM, "{1to4}", "{y}">, EVEX_V256;
7969  }
7970
7971  def : InstAlias<OpcodeStr#"x\t{$src, $dst|$dst, $src}",
7972                  (!cast<Instruction>(NAME # "Z128rr") VR128X:$dst, VR128X:$src), 0, "att">;
7973  def : InstAlias<OpcodeStr#"x\t{$src, $dst {${mask}}|$dst {${mask}}, $src}",
7974                  (!cast<Instruction>(NAME # "Z128rrk") VR128X:$dst,
7975                  VK2WM:$mask, VR128X:$src), 0, "att">;
7976  def : InstAlias<OpcodeStr#"x\t{$src, $dst {${mask}} {z}|$dst {${mask}} {z}, $src}",
7977                  (!cast<Instruction>(NAME # "Z128rrkz") VR128X:$dst,
7978                  VK2WM:$mask, VR128X:$src), 0, "att">;
7979  def : InstAlias<OpcodeStr#"x\t{${src}{1to2}, $dst|$dst, ${src}{1to2}}",
7980                  (!cast<Instruction>(NAME # "Z128rmb") VR128X:$dst,
7981                  f64mem:$src), 0, "att">;
7982  def : InstAlias<OpcodeStr#"x\t{${src}{1to2}, $dst {${mask}}|"
7983                  "$dst {${mask}}, ${src}{1to2}}",
7984                  (!cast<Instruction>(NAME # "Z128rmbk") VR128X:$dst,
7985                  VK2WM:$mask, f64mem:$src), 0, "att">;
7986  def : InstAlias<OpcodeStr#"x\t{${src}{1to2}, $dst {${mask}} {z}|"
7987                  "$dst {${mask}} {z}, ${src}{1to2}}",
7988                  (!cast<Instruction>(NAME # "Z128rmbkz") VR128X:$dst,
7989                  VK2WM:$mask, f64mem:$src), 0, "att">;
7990
7991  def : InstAlias<OpcodeStr#"y\t{$src, $dst|$dst, $src}",
7992                  (!cast<Instruction>(NAME # "Z256rr") VR128X:$dst, VR256X:$src), 0, "att">;
7993  def : InstAlias<OpcodeStr#"y\t{$src, $dst {${mask}}|$dst {${mask}}, $src}",
7994                  (!cast<Instruction>(NAME # "Z256rrk") VR128X:$dst,
7995                  VK4WM:$mask, VR256X:$src), 0, "att">;
7996  def : InstAlias<OpcodeStr#"y\t{$src, $dst {${mask}} {z}|$dst {${mask}} {z}, $src}",
7997                  (!cast<Instruction>(NAME # "Z256rrkz") VR128X:$dst,
7998                  VK4WM:$mask, VR256X:$src), 0, "att">;
7999  def : InstAlias<OpcodeStr#"y\t{${src}{1to4}, $dst|$dst, ${src}{1to4}}",
8000                  (!cast<Instruction>(NAME # "Z256rmb") VR128X:$dst,
8001                  f64mem:$src), 0, "att">;
8002  def : InstAlias<OpcodeStr#"y\t{${src}{1to4}, $dst {${mask}}|"
8003                  "$dst {${mask}}, ${src}{1to4}}",
8004                  (!cast<Instruction>(NAME # "Z256rmbk") VR128X:$dst,
8005                  VK4WM:$mask, f64mem:$src), 0, "att">;
8006  def : InstAlias<OpcodeStr#"y\t{${src}{1to4}, $dst {${mask}} {z}|"
8007                  "$dst {${mask}} {z}, ${src}{1to4}}",
8008                  (!cast<Instruction>(NAME # "Z256rmbkz") VR128X:$dst,
8009                  VK4WM:$mask, f64mem:$src), 0, "att">;
8010}
8011
8012// Convert Double to Signed/Unsigned Quardword
8013multiclass avx512_cvtpd2qq<bits<8> opc, string OpcodeStr, SDNode OpNode,
8014                           SDNode MaskOpNode, SDNode OpNodeRnd,
8015                           X86SchedWriteWidths sched> {
8016  let Predicates = [HasDQI] in {
8017    defm Z : avx512_vcvt_fp<opc, OpcodeStr, v8i64_info, v8f64_info, OpNode,
8018                            MaskOpNode, sched.ZMM>,
8019             avx512_vcvt_fp_rc<opc, OpcodeStr, v8i64_info, v8f64_info,
8020                               OpNodeRnd, sched.ZMM>, EVEX_V512;
8021  }
8022  let Predicates = [HasDQI, HasVLX] in {
8023    defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v2i64x_info, v2f64x_info, OpNode,
8024                               MaskOpNode, sched.XMM>, EVEX_V128;
8025    defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v4i64x_info, v4f64x_info, OpNode,
8026                               MaskOpNode, sched.YMM>, EVEX_V256;
8027  }
8028}
8029
8030// Convert Double to Signed/Unsigned Quardword with truncation
8031multiclass avx512_cvttpd2qq<bits<8> opc, string OpcodeStr, SDNode OpNode,
8032                            SDNode MaskOpNode, SDNode OpNodeRnd,
8033                            X86SchedWriteWidths sched> {
8034  let Predicates = [HasDQI] in {
8035    defm Z : avx512_vcvt_fp<opc, OpcodeStr, v8i64_info, v8f64_info, OpNode,
8036                            MaskOpNode, sched.ZMM>,
8037             avx512_vcvt_fp_sae<opc, OpcodeStr, v8i64_info, v8f64_info,
8038                                OpNodeRnd, sched.ZMM>, EVEX_V512;
8039  }
8040  let Predicates = [HasDQI, HasVLX] in {
8041    defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v2i64x_info, v2f64x_info, OpNode,
8042                               MaskOpNode, sched.XMM>, EVEX_V128;
8043    defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v4i64x_info, v4f64x_info, OpNode,
8044                               MaskOpNode, sched.YMM>, EVEX_V256;
8045  }
8046}
8047
8048// Convert Signed/Unsigned Quardword to Double
8049multiclass avx512_cvtqq2pd<bits<8> opc, string OpcodeStr, SDNode OpNode,
8050                           SDNode MaskOpNode, SDNode OpNodeRnd,
8051                           X86SchedWriteWidths sched> {
8052  let Predicates = [HasDQI] in {
8053    defm Z : avx512_vcvt_fp<opc, OpcodeStr, v8f64_info, v8i64_info, OpNode,
8054                            MaskOpNode, sched.ZMM>,
8055             avx512_vcvt_fp_rc<opc, OpcodeStr, v8f64_info, v8i64_info,
8056                               OpNodeRnd, sched.ZMM>, EVEX_V512;
8057  }
8058  let Predicates = [HasDQI, HasVLX] in {
8059    defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v2f64x_info, v2i64x_info, OpNode,
8060                               MaskOpNode, sched.XMM>, EVEX_V128, NotEVEX2VEXConvertible;
8061    defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v4f64x_info, v4i64x_info, OpNode,
8062                               MaskOpNode, sched.YMM>, EVEX_V256, NotEVEX2VEXConvertible;
8063  }
8064}
8065
8066// Convert Float to Signed/Unsigned Quardword
8067multiclass avx512_cvtps2qq<bits<8> opc, string OpcodeStr, SDNode OpNode,
8068                           SDNode MaskOpNode, SDNode OpNodeRnd,
8069                           X86SchedWriteWidths sched> {
8070  let Predicates = [HasDQI] in {
8071    defm Z : avx512_vcvt_fp<opc, OpcodeStr, v8i64_info, v8f32x_info, OpNode,
8072                            MaskOpNode, sched.ZMM>,
8073             avx512_vcvt_fp_rc<opc, OpcodeStr, v8i64_info, v8f32x_info,
8074                               OpNodeRnd, sched.ZMM>, EVEX_V512;
8075  }
8076  let Predicates = [HasDQI, HasVLX] in {
8077    // Explicitly specified broadcast string, since we take only 2 elements
8078    // from v4f32x_info source
8079    defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v2i64x_info, v4f32x_info, OpNode,
8080                               MaskOpNode, sched.XMM, "{1to2}", "", f64mem, VK2WM,
8081                               (v2i64 (OpNode (bc_v4f32
8082                                (v2f64
8083                                 (scalar_to_vector (loadf64 addr:$src)))))),
8084                               (v2i64 (MaskOpNode (bc_v4f32
8085                                (v2f64
8086                                 (scalar_to_vector (loadf64 addr:$src))))))>,
8087                               EVEX_V128;
8088    defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v4i64x_info, v4f32x_info, OpNode,
8089                               MaskOpNode, sched.YMM>, EVEX_V256;
8090  }
8091}
8092
8093// Convert Float to Signed/Unsigned Quardword with truncation
8094multiclass avx512_cvttps2qq<bits<8> opc, string OpcodeStr, SDNode OpNode,
8095                            SDNode MaskOpNode, SDNode OpNodeRnd,
8096                            X86SchedWriteWidths sched> {
8097  let Predicates = [HasDQI] in {
8098    defm Z : avx512_vcvt_fp<opc, OpcodeStr, v8i64_info, v8f32x_info, OpNode,
8099                            MaskOpNode, sched.ZMM>,
8100             avx512_vcvt_fp_sae<opc, OpcodeStr, v8i64_info, v8f32x_info,
8101                                OpNodeRnd, sched.ZMM>, EVEX_V512;
8102  }
8103  let Predicates = [HasDQI, HasVLX] in {
8104    // Explicitly specified broadcast string, since we take only 2 elements
8105    // from v4f32x_info source
8106    defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v2i64x_info, v4f32x_info, OpNode,
8107                               MaskOpNode, sched.XMM, "{1to2}", "", f64mem, VK2WM,
8108                               (v2i64 (OpNode (bc_v4f32
8109                                (v2f64
8110                                 (scalar_to_vector (loadf64 addr:$src)))))),
8111                               (v2i64 (MaskOpNode (bc_v4f32
8112                                (v2f64
8113                                 (scalar_to_vector (loadf64 addr:$src))))))>,
8114                               EVEX_V128;
8115    defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v4i64x_info, v4f32x_info, OpNode,
8116                               MaskOpNode, sched.YMM>, EVEX_V256;
8117  }
8118}
8119
8120// Convert Signed/Unsigned Quardword to Float
8121multiclass avx512_cvtqq2ps<bits<8> opc, string OpcodeStr, SDNode OpNode,
8122                           SDNode MaskOpNode, SDNode OpNodeRnd,
8123                           X86SchedWriteWidths sched> {
8124  let Predicates = [HasDQI] in {
8125    defm Z : avx512_vcvt_fp<opc, OpcodeStr, v8f32x_info, v8i64_info, OpNode,
8126                            MaskOpNode, sched.ZMM>,
8127             avx512_vcvt_fp_rc<opc, OpcodeStr, v8f32x_info, v8i64_info,
8128                               OpNodeRnd, sched.ZMM>, EVEX_V512;
8129  }
8130  let Predicates = [HasDQI, HasVLX] in {
8131    // we need "x"/"y" suffixes in order to distinguish between 128 and 256
8132    // memory forms of these instructions in Asm Parcer. They have the same
8133    // dest type - 'v4i32x_info'. We also specify the broadcast string explicitly
8134    // due to the same reason.
8135    defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v4f32x_info, v2i64x_info, null_frag,
8136                               null_frag, sched.XMM, "{1to2}", "{x}", i128mem, VK2WM>,
8137                               EVEX_V128, NotEVEX2VEXConvertible;
8138    defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v4f32x_info, v4i64x_info, OpNode,
8139                               MaskOpNode, sched.YMM, "{1to4}", "{y}">, EVEX_V256,
8140                               NotEVEX2VEXConvertible;
8141  }
8142
8143  def : InstAlias<OpcodeStr#"x\t{$src, $dst|$dst, $src}",
8144                  (!cast<Instruction>(NAME # "Z128rr") VR128X:$dst,
8145                  VR128X:$src), 0, "att">;
8146  def : InstAlias<OpcodeStr#"x\t{$src, $dst {${mask}}|$dst {${mask}}, $src}",
8147                  (!cast<Instruction>(NAME # "Z128rrk") VR128X:$dst,
8148                  VK2WM:$mask, VR128X:$src), 0, "att">;
8149  def : InstAlias<OpcodeStr#"x\t{$src, $dst {${mask}} {z}|$dst {${mask}} {z}, $src}",
8150                  (!cast<Instruction>(NAME # "Z128rrkz") VR128X:$dst,
8151                  VK2WM:$mask, VR128X:$src), 0, "att">;
8152  def : InstAlias<OpcodeStr#"x\t{${src}{1to2}, $dst|$dst, ${src}{1to2}}",
8153                  (!cast<Instruction>(NAME # "Z128rmb") VR128X:$dst,
8154                  i64mem:$src), 0, "att">;
8155  def : InstAlias<OpcodeStr#"x\t{${src}{1to2}, $dst {${mask}}|"
8156                  "$dst {${mask}}, ${src}{1to2}}",
8157                  (!cast<Instruction>(NAME # "Z128rmbk") VR128X:$dst,
8158                  VK2WM:$mask, i64mem:$src), 0, "att">;
8159  def : InstAlias<OpcodeStr#"x\t{${src}{1to2}, $dst {${mask}} {z}|"
8160                  "$dst {${mask}} {z}, ${src}{1to2}}",
8161                  (!cast<Instruction>(NAME # "Z128rmbkz") VR128X:$dst,
8162                  VK2WM:$mask, i64mem:$src), 0, "att">;
8163
8164  def : InstAlias<OpcodeStr#"y\t{$src, $dst|$dst, $src}",
8165                  (!cast<Instruction>(NAME # "Z256rr") VR128X:$dst,
8166                  VR256X:$src), 0, "att">;
8167  def : InstAlias<OpcodeStr#"y\t{$src, $dst {${mask}}|"
8168                  "$dst {${mask}}, $src}",
8169                  (!cast<Instruction>(NAME # "Z256rrk") VR128X:$dst,
8170                  VK4WM:$mask, VR256X:$src), 0, "att">;
8171  def : InstAlias<OpcodeStr#"y\t{$src, $dst {${mask}} {z}|"
8172                  "$dst {${mask}} {z}, $src}",
8173                  (!cast<Instruction>(NAME # "Z256rrkz") VR128X:$dst,
8174                  VK4WM:$mask, VR256X:$src), 0, "att">;
8175  def : InstAlias<OpcodeStr#"y\t{${src}{1to4}, $dst|$dst, ${src}{1to4}}",
8176                  (!cast<Instruction>(NAME # "Z256rmb") VR128X:$dst,
8177                  i64mem:$src), 0, "att">;
8178  def : InstAlias<OpcodeStr#"y\t{${src}{1to4}, $dst {${mask}}|"
8179                  "$dst {${mask}}, ${src}{1to4}}",
8180                  (!cast<Instruction>(NAME # "Z256rmbk") VR128X:$dst,
8181                  VK4WM:$mask, i64mem:$src), 0, "att">;
8182  def : InstAlias<OpcodeStr#"y\t{${src}{1to4}, $dst {${mask}} {z}|"
8183                  "$dst {${mask}} {z}, ${src}{1to4}}",
8184                  (!cast<Instruction>(NAME # "Z256rmbkz") VR128X:$dst,
8185                  VK4WM:$mask, i64mem:$src), 0, "att">;
8186}
8187
8188defm VCVTDQ2PD : avx512_cvtdq2pd<0xE6, "vcvtdq2pd", any_sint_to_fp, sint_to_fp,
8189                                 X86any_VSintToFP, X86VSintToFP,
8190                                 SchedWriteCvtDQ2PD>, XS, EVEX_CD8<32, CD8VH>;
8191
8192defm VCVTDQ2PS : avx512_cvtdq2ps<0x5B, "vcvtdq2ps", any_sint_to_fp, sint_to_fp,
8193                                X86VSintToFpRnd, SchedWriteCvtDQ2PS>,
8194                                PS, EVEX_CD8<32, CD8VF>;
8195
8196defm VCVTTPS2DQ : avx512_cvttps2dq<0x5B, "vcvttps2dq", X86any_cvttp2si,
8197                                 X86cvttp2si, X86cvttp2siSAE,
8198                                 SchedWriteCvtPS2DQ>, XS, EVEX_CD8<32, CD8VF>;
8199
8200defm VCVTTPD2DQ : avx512_cvttpd2dq<0xE6, "vcvttpd2dq", X86any_cvttp2si,
8201                                 X86cvttp2si, X86cvttp2siSAE,
8202                                 SchedWriteCvtPD2DQ>,
8203                                 PD, VEX_W, EVEX_CD8<64, CD8VF>;
8204
8205defm VCVTTPS2UDQ : avx512_cvttps2dq<0x78, "vcvttps2udq", X86any_cvttp2ui,
8206                                 X86cvttp2ui, X86cvttp2uiSAE,
8207                                 SchedWriteCvtPS2DQ>, PS, EVEX_CD8<32, CD8VF>;
8208
8209defm VCVTTPD2UDQ : avx512_cvttpd2dq<0x78, "vcvttpd2udq", X86any_cvttp2ui,
8210                                 X86cvttp2ui, X86cvttp2uiSAE,
8211                                 SchedWriteCvtPD2DQ>,
8212                                 PS, VEX_W, EVEX_CD8<64, CD8VF>;
8213
8214defm VCVTUDQ2PD : avx512_cvtdq2pd<0x7A, "vcvtudq2pd", any_uint_to_fp,
8215                                  uint_to_fp, X86any_VUintToFP, X86VUintToFP,
8216                                  SchedWriteCvtDQ2PD>, XS, EVEX_CD8<32, CD8VH>;
8217
8218defm VCVTUDQ2PS : avx512_cvtdq2ps<0x7A, "vcvtudq2ps", any_uint_to_fp,
8219                                 uint_to_fp, X86VUintToFpRnd,
8220                                 SchedWriteCvtDQ2PS>, XD, EVEX_CD8<32, CD8VF>;
8221
8222defm VCVTPS2DQ : avx512_cvtps2dq<0x5B, "vcvtps2dq", X86cvtp2Int, X86cvtp2Int,
8223                                 X86cvtp2IntRnd, SchedWriteCvtPS2DQ>, PD,
8224                                 EVEX_CD8<32, CD8VF>;
8225
8226defm VCVTPD2DQ : avx512_cvtpd2dq<0xE6, "vcvtpd2dq", X86cvtp2Int, X86cvtp2Int,
8227                                 X86cvtp2IntRnd, SchedWriteCvtPD2DQ>, XD,
8228                                 VEX_W, EVEX_CD8<64, CD8VF>;
8229
8230defm VCVTPS2UDQ : avx512_cvtps2dq<0x79, "vcvtps2udq", X86cvtp2UInt, X86cvtp2UInt,
8231                                 X86cvtp2UIntRnd, SchedWriteCvtPS2DQ>,
8232                                 PS, EVEX_CD8<32, CD8VF>;
8233
8234defm VCVTPD2UDQ : avx512_cvtpd2dq<0x79, "vcvtpd2udq", X86cvtp2UInt, X86cvtp2UInt,
8235                                 X86cvtp2UIntRnd, SchedWriteCvtPD2DQ>, VEX_W,
8236                                 PS, EVEX_CD8<64, CD8VF>;
8237
8238defm VCVTPD2QQ : avx512_cvtpd2qq<0x7B, "vcvtpd2qq", X86cvtp2Int, X86cvtp2Int,
8239                                 X86cvtp2IntRnd, SchedWriteCvtPD2DQ>, VEX_W,
8240                                 PD, EVEX_CD8<64, CD8VF>;
8241
8242defm VCVTPS2QQ : avx512_cvtps2qq<0x7B, "vcvtps2qq", X86cvtp2Int, X86cvtp2Int,
8243                                 X86cvtp2IntRnd, SchedWriteCvtPS2DQ>, PD,
8244                                 EVEX_CD8<32, CD8VH>;
8245
8246defm VCVTPD2UQQ : avx512_cvtpd2qq<0x79, "vcvtpd2uqq", X86cvtp2UInt, X86cvtp2UInt,
8247                                 X86cvtp2UIntRnd, SchedWriteCvtPD2DQ>, VEX_W,
8248                                 PD, EVEX_CD8<64, CD8VF>;
8249
8250defm VCVTPS2UQQ : avx512_cvtps2qq<0x79, "vcvtps2uqq", X86cvtp2UInt, X86cvtp2UInt,
8251                                 X86cvtp2UIntRnd, SchedWriteCvtPS2DQ>, PD,
8252                                 EVEX_CD8<32, CD8VH>;
8253
8254defm VCVTTPD2QQ : avx512_cvttpd2qq<0x7A, "vcvttpd2qq", X86any_cvttp2si,
8255                                 X86cvttp2si, X86cvttp2siSAE,
8256                                 SchedWriteCvtPD2DQ>, VEX_W,
8257                                 PD, EVEX_CD8<64, CD8VF>;
8258
8259defm VCVTTPS2QQ : avx512_cvttps2qq<0x7A, "vcvttps2qq", X86any_cvttp2si,
8260                                 X86cvttp2si, X86cvttp2siSAE,
8261                                 SchedWriteCvtPS2DQ>, PD,
8262                                 EVEX_CD8<32, CD8VH>;
8263
8264defm VCVTTPD2UQQ : avx512_cvttpd2qq<0x78, "vcvttpd2uqq", X86any_cvttp2ui,
8265                                 X86cvttp2ui, X86cvttp2uiSAE,
8266                                 SchedWriteCvtPD2DQ>, VEX_W,
8267                                 PD, EVEX_CD8<64, CD8VF>;
8268
8269defm VCVTTPS2UQQ : avx512_cvttps2qq<0x78, "vcvttps2uqq", X86any_cvttp2ui,
8270                                 X86cvttp2ui, X86cvttp2uiSAE,
8271                                 SchedWriteCvtPS2DQ>, PD,
8272                                 EVEX_CD8<32, CD8VH>;
8273
8274defm VCVTQQ2PD : avx512_cvtqq2pd<0xE6, "vcvtqq2pd", any_sint_to_fp,
8275                            sint_to_fp, X86VSintToFpRnd,
8276                            SchedWriteCvtDQ2PD>, VEX_W, XS, EVEX_CD8<64, CD8VF>;
8277
8278defm VCVTUQQ2PD : avx512_cvtqq2pd<0x7A, "vcvtuqq2pd", any_uint_to_fp,
8279                            uint_to_fp, X86VUintToFpRnd, SchedWriteCvtDQ2PD>,
8280                            VEX_W, XS, EVEX_CD8<64, CD8VF>;
8281
8282defm VCVTQQ2PS : avx512_cvtqq2ps<0x5B, "vcvtqq2ps", any_sint_to_fp,
8283                            sint_to_fp, X86VSintToFpRnd, SchedWriteCvtDQ2PS>,
8284                            VEX_W, PS, EVEX_CD8<64, CD8VF>;
8285
8286defm VCVTUQQ2PS : avx512_cvtqq2ps<0x7A, "vcvtuqq2ps", any_uint_to_fp,
8287                            uint_to_fp, X86VUintToFpRnd, SchedWriteCvtDQ2PS>,
8288                            VEX_W, XD, EVEX_CD8<64, CD8VF>;
8289
8290let Predicates = [HasVLX] in {
8291  // Special patterns to allow use of X86mcvtp2Int for masking. Instruction
8292  // patterns have been disabled with null_frag.
8293  def : Pat<(v4i32 (X86cvtp2Int (v2f64 VR128X:$src))),
8294            (VCVTPD2DQZ128rr VR128X:$src)>;
8295  def : Pat<(X86mcvtp2Int (v2f64 VR128X:$src), (v4i32 VR128X:$src0),
8296                          VK2WM:$mask),
8297            (VCVTPD2DQZ128rrk VR128X:$src0, VK2WM:$mask, VR128X:$src)>;
8298  def : Pat<(X86mcvtp2Int (v2f64 VR128X:$src), v4i32x_info.ImmAllZerosV,
8299                          VK2WM:$mask),
8300            (VCVTPD2DQZ128rrkz VK2WM:$mask, VR128X:$src)>;
8301
8302  def : Pat<(v4i32 (X86cvtp2Int (loadv2f64 addr:$src))),
8303            (VCVTPD2DQZ128rm addr:$src)>;
8304  def : Pat<(X86mcvtp2Int (loadv2f64 addr:$src), (v4i32 VR128X:$src0),
8305                          VK2WM:$mask),
8306            (VCVTPD2DQZ128rmk VR128X:$src0, VK2WM:$mask, addr:$src)>;
8307  def : Pat<(X86mcvtp2Int (loadv2f64 addr:$src), v4i32x_info.ImmAllZerosV,
8308                          VK2WM:$mask),
8309            (VCVTPD2DQZ128rmkz VK2WM:$mask, addr:$src)>;
8310
8311  def : Pat<(v4i32 (X86cvtp2Int (v2f64 (X86VBroadcastld64 addr:$src)))),
8312            (VCVTPD2DQZ128rmb addr:$src)>;
8313  def : Pat<(X86mcvtp2Int (v2f64 (X86VBroadcastld64 addr:$src)),
8314                          (v4i32 VR128X:$src0), VK2WM:$mask),
8315            (VCVTPD2DQZ128rmbk VR128X:$src0, VK2WM:$mask, addr:$src)>;
8316  def : Pat<(X86mcvtp2Int (v2f64 (X86VBroadcastld64 addr:$src)),
8317                          v4i32x_info.ImmAllZerosV, VK2WM:$mask),
8318            (VCVTPD2DQZ128rmbkz VK2WM:$mask, addr:$src)>;
8319
8320  // Special patterns to allow use of X86mcvttp2si for masking. Instruction
8321  // patterns have been disabled with null_frag.
8322  def : Pat<(v4i32 (X86any_cvttp2si (v2f64 VR128X:$src))),
8323            (VCVTTPD2DQZ128rr VR128X:$src)>;
8324  def : Pat<(X86mcvttp2si (v2f64 VR128X:$src), (v4i32 VR128X:$src0),
8325                          VK2WM:$mask),
8326            (VCVTTPD2DQZ128rrk VR128X:$src0, VK2WM:$mask, VR128X:$src)>;
8327  def : Pat<(X86mcvttp2si (v2f64 VR128X:$src), v4i32x_info.ImmAllZerosV,
8328                          VK2WM:$mask),
8329            (VCVTTPD2DQZ128rrkz VK2WM:$mask, VR128X:$src)>;
8330
8331  def : Pat<(v4i32 (X86any_cvttp2si (loadv2f64 addr:$src))),
8332            (VCVTTPD2DQZ128rm addr:$src)>;
8333  def : Pat<(X86mcvttp2si (loadv2f64 addr:$src), (v4i32 VR128X:$src0),
8334                          VK2WM:$mask),
8335            (VCVTTPD2DQZ128rmk VR128X:$src0, VK2WM:$mask, addr:$src)>;
8336  def : Pat<(X86mcvttp2si (loadv2f64 addr:$src), v4i32x_info.ImmAllZerosV,
8337                          VK2WM:$mask),
8338            (VCVTTPD2DQZ128rmkz VK2WM:$mask, addr:$src)>;
8339
8340  def : Pat<(v4i32 (X86any_cvttp2si (v2f64 (X86VBroadcastld64 addr:$src)))),
8341            (VCVTTPD2DQZ128rmb addr:$src)>;
8342  def : Pat<(X86mcvttp2si (v2f64 (X86VBroadcastld64 addr:$src)),
8343                          (v4i32 VR128X:$src0), VK2WM:$mask),
8344            (VCVTTPD2DQZ128rmbk VR128X:$src0, VK2WM:$mask, addr:$src)>;
8345  def : Pat<(X86mcvttp2si (v2f64 (X86VBroadcastld64 addr:$src)),
8346                          v4i32x_info.ImmAllZerosV, VK2WM:$mask),
8347            (VCVTTPD2DQZ128rmbkz VK2WM:$mask, addr:$src)>;
8348
8349  // Special patterns to allow use of X86mcvtp2UInt for masking. Instruction
8350  // patterns have been disabled with null_frag.
8351  def : Pat<(v4i32 (X86cvtp2UInt (v2f64 VR128X:$src))),
8352            (VCVTPD2UDQZ128rr VR128X:$src)>;
8353  def : Pat<(X86mcvtp2UInt (v2f64 VR128X:$src), (v4i32 VR128X:$src0),
8354                           VK2WM:$mask),
8355            (VCVTPD2UDQZ128rrk VR128X:$src0, VK2WM:$mask, VR128X:$src)>;
8356  def : Pat<(X86mcvtp2UInt (v2f64 VR128X:$src), v4i32x_info.ImmAllZerosV,
8357                           VK2WM:$mask),
8358            (VCVTPD2UDQZ128rrkz VK2WM:$mask, VR128X:$src)>;
8359
8360  def : Pat<(v4i32 (X86cvtp2UInt (loadv2f64 addr:$src))),
8361            (VCVTPD2UDQZ128rm addr:$src)>;
8362  def : Pat<(X86mcvtp2UInt (loadv2f64 addr:$src), (v4i32 VR128X:$src0),
8363                           VK2WM:$mask),
8364            (VCVTPD2UDQZ128rmk VR128X:$src0, VK2WM:$mask, addr:$src)>;
8365  def : Pat<(X86mcvtp2UInt (loadv2f64 addr:$src), v4i32x_info.ImmAllZerosV,
8366                           VK2WM:$mask),
8367            (VCVTPD2UDQZ128rmkz VK2WM:$mask, addr:$src)>;
8368
8369  def : Pat<(v4i32 (X86cvtp2UInt (v2f64 (X86VBroadcastld64 addr:$src)))),
8370            (VCVTPD2UDQZ128rmb addr:$src)>;
8371  def : Pat<(X86mcvtp2UInt (v2f64 (X86VBroadcastld64 addr:$src)),
8372                           (v4i32 VR128X:$src0), VK2WM:$mask),
8373            (VCVTPD2UDQZ128rmbk VR128X:$src0, VK2WM:$mask, addr:$src)>;
8374  def : Pat<(X86mcvtp2UInt (v2f64 (X86VBroadcastld64 addr:$src)),
8375                           v4i32x_info.ImmAllZerosV, VK2WM:$mask),
8376            (VCVTPD2UDQZ128rmbkz VK2WM:$mask, addr:$src)>;
8377
8378  // Special patterns to allow use of X86mcvtp2UInt for masking. Instruction
8379  // patterns have been disabled with null_frag.
8380  def : Pat<(v4i32 (X86any_cvttp2ui (v2f64 VR128X:$src))),
8381            (VCVTTPD2UDQZ128rr VR128X:$src)>;
8382  def : Pat<(X86mcvttp2ui (v2f64 VR128X:$src), (v4i32 VR128X:$src0),
8383                          VK2WM:$mask),
8384            (VCVTTPD2UDQZ128rrk VR128X:$src0, VK2WM:$mask, VR128X:$src)>;
8385  def : Pat<(X86mcvttp2ui (v2f64 VR128X:$src), v4i32x_info.ImmAllZerosV,
8386                          VK2WM:$mask),
8387            (VCVTTPD2UDQZ128rrkz VK2WM:$mask, VR128X:$src)>;
8388
8389  def : Pat<(v4i32 (X86any_cvttp2ui (loadv2f64 addr:$src))),
8390            (VCVTTPD2UDQZ128rm addr:$src)>;
8391  def : Pat<(X86mcvttp2ui (loadv2f64 addr:$src), (v4i32 VR128X:$src0),
8392                          VK2WM:$mask),
8393            (VCVTTPD2UDQZ128rmk VR128X:$src0, VK2WM:$mask, addr:$src)>;
8394  def : Pat<(X86mcvttp2ui (loadv2f64 addr:$src), v4i32x_info.ImmAllZerosV,
8395                          VK2WM:$mask),
8396            (VCVTTPD2UDQZ128rmkz VK2WM:$mask, addr:$src)>;
8397
8398  def : Pat<(v4i32 (X86any_cvttp2ui (v2f64 (X86VBroadcastld64 addr:$src)))),
8399            (VCVTTPD2UDQZ128rmb addr:$src)>;
8400  def : Pat<(X86mcvttp2ui (v2f64 (X86VBroadcastld64 addr:$src)),
8401                          (v4i32 VR128X:$src0), VK2WM:$mask),
8402            (VCVTTPD2UDQZ128rmbk VR128X:$src0, VK2WM:$mask, addr:$src)>;
8403  def : Pat<(X86mcvttp2ui (v2f64 (X86VBroadcastld64 addr:$src)),
8404                          v4i32x_info.ImmAllZerosV, VK2WM:$mask),
8405            (VCVTTPD2UDQZ128rmbkz VK2WM:$mask, addr:$src)>;
8406}
8407
8408let Predicates = [HasDQI, HasVLX] in {
8409  def : Pat<(v2i64 (X86cvtp2Int (bc_v4f32 (v2f64 (X86vzload64 addr:$src))))),
8410            (VCVTPS2QQZ128rm addr:$src)>;
8411  def : Pat<(v2i64 (vselect_mask VK2WM:$mask,
8412                                 (X86cvtp2Int (bc_v4f32 (v2f64 (X86vzload64 addr:$src)))),
8413                                 VR128X:$src0)),
8414            (VCVTPS2QQZ128rmk VR128X:$src0, VK2WM:$mask, addr:$src)>;
8415  def : Pat<(v2i64 (vselect_mask VK2WM:$mask,
8416                                 (X86cvtp2Int (bc_v4f32 (v2f64 (X86vzload64 addr:$src)))),
8417                                 v2i64x_info.ImmAllZerosV)),
8418            (VCVTPS2QQZ128rmkz VK2WM:$mask, addr:$src)>;
8419
8420  def : Pat<(v2i64 (X86cvtp2UInt (bc_v4f32 (v2f64 (X86vzload64 addr:$src))))),
8421            (VCVTPS2UQQZ128rm addr:$src)>;
8422  def : Pat<(v2i64 (vselect_mask VK2WM:$mask,
8423                                 (X86cvtp2UInt (bc_v4f32 (v2f64 (X86vzload64 addr:$src)))),
8424                                 VR128X:$src0)),
8425            (VCVTPS2UQQZ128rmk VR128X:$src0, VK2WM:$mask, addr:$src)>;
8426  def : Pat<(v2i64 (vselect_mask VK2WM:$mask,
8427                                 (X86cvtp2UInt (bc_v4f32 (v2f64 (X86vzload64 addr:$src)))),
8428                                 v2i64x_info.ImmAllZerosV)),
8429            (VCVTPS2UQQZ128rmkz VK2WM:$mask, addr:$src)>;
8430
8431  def : Pat<(v2i64 (X86any_cvttp2si (bc_v4f32 (v2f64 (X86vzload64 addr:$src))))),
8432            (VCVTTPS2QQZ128rm addr:$src)>;
8433  def : Pat<(v2i64 (vselect_mask VK2WM:$mask,
8434                                 (X86cvttp2si (bc_v4f32 (v2f64 (X86vzload64 addr:$src)))),
8435                                 VR128X:$src0)),
8436            (VCVTTPS2QQZ128rmk VR128X:$src0, VK2WM:$mask, addr:$src)>;
8437  def : Pat<(v2i64 (vselect_mask VK2WM:$mask,
8438                                 (X86cvttp2si (bc_v4f32 (v2f64 (X86vzload64 addr:$src)))),
8439                                 v2i64x_info.ImmAllZerosV)),
8440            (VCVTTPS2QQZ128rmkz VK2WM:$mask, addr:$src)>;
8441
8442  def : Pat<(v2i64 (X86any_cvttp2ui (bc_v4f32 (v2f64 (X86vzload64 addr:$src))))),
8443            (VCVTTPS2UQQZ128rm addr:$src)>;
8444  def : Pat<(v2i64 (vselect_mask VK2WM:$mask,
8445                                 (X86cvttp2ui (bc_v4f32 (v2f64 (X86vzload64 addr:$src)))),
8446                                 VR128X:$src0)),
8447            (VCVTTPS2UQQZ128rmk VR128X:$src0, VK2WM:$mask, addr:$src)>;
8448  def : Pat<(v2i64 (vselect_mask VK2WM:$mask,
8449                                 (X86cvttp2ui (bc_v4f32 (v2f64 (X86vzload64 addr:$src)))),
8450                                 v2i64x_info.ImmAllZerosV)),
8451            (VCVTTPS2UQQZ128rmkz VK2WM:$mask, addr:$src)>;
8452}
8453
8454let Predicates = [HasVLX] in {
8455  def : Pat<(v2f64 (X86any_VSintToFP (bc_v4i32 (v2i64 (X86vzload64 addr:$src))))),
8456            (VCVTDQ2PDZ128rm addr:$src)>;
8457  def : Pat<(v2f64 (vselect_mask VK2WM:$mask,
8458                                 (X86VSintToFP (bc_v4i32 (v2i64 (X86vzload64 addr:$src)))),
8459                                 VR128X:$src0)),
8460            (VCVTDQ2PDZ128rmk VR128X:$src0, VK2WM:$mask, addr:$src)>;
8461  def : Pat<(v2f64 (vselect_mask VK2WM:$mask,
8462                                 (X86VSintToFP (bc_v4i32 (v2i64 (X86vzload64 addr:$src)))),
8463                                 v2f64x_info.ImmAllZerosV)),
8464            (VCVTDQ2PDZ128rmkz VK2WM:$mask, addr:$src)>;
8465
8466  def : Pat<(v2f64 (X86any_VUintToFP (bc_v4i32 (v2i64 (X86vzload64 addr:$src))))),
8467            (VCVTUDQ2PDZ128rm addr:$src)>;
8468  def : Pat<(v2f64 (vselect_mask VK2WM:$mask,
8469                                 (X86VUintToFP (bc_v4i32 (v2i64 (X86vzload64 addr:$src)))),
8470                                 VR128X:$src0)),
8471            (VCVTUDQ2PDZ128rmk VR128X:$src0, VK2WM:$mask, addr:$src)>;
8472  def : Pat<(v2f64 (vselect_mask VK2WM:$mask,
8473                                 (X86VUintToFP (bc_v4i32 (v2i64 (X86vzload64 addr:$src)))),
8474                                 v2f64x_info.ImmAllZerosV)),
8475            (VCVTUDQ2PDZ128rmkz VK2WM:$mask, addr:$src)>;
8476}
8477
8478let Predicates = [HasDQI, HasVLX] in {
8479  // Special patterns to allow use of X86VMSintToFP for masking. Instruction
8480  // patterns have been disabled with null_frag.
8481  def : Pat<(v4f32 (X86any_VSintToFP (v2i64 VR128X:$src))),
8482            (VCVTQQ2PSZ128rr VR128X:$src)>;
8483  def : Pat<(X86VMSintToFP (v2i64 VR128X:$src), (v4f32 VR128X:$src0),
8484                           VK2WM:$mask),
8485            (VCVTQQ2PSZ128rrk VR128X:$src0, VK2WM:$mask, VR128X:$src)>;
8486  def : Pat<(X86VMSintToFP (v2i64 VR128X:$src), v4f32x_info.ImmAllZerosV,
8487                           VK2WM:$mask),
8488            (VCVTQQ2PSZ128rrkz VK2WM:$mask, VR128X:$src)>;
8489
8490  def : Pat<(v4f32 (X86any_VSintToFP (loadv2i64 addr:$src))),
8491            (VCVTQQ2PSZ128rm addr:$src)>;
8492  def : Pat<(X86VMSintToFP (loadv2i64 addr:$src), (v4f32 VR128X:$src0),
8493                           VK2WM:$mask),
8494            (VCVTQQ2PSZ128rmk VR128X:$src0, VK2WM:$mask, addr:$src)>;
8495  def : Pat<(X86VMSintToFP (loadv2i64 addr:$src), v4f32x_info.ImmAllZerosV,
8496                           VK2WM:$mask),
8497            (VCVTQQ2PSZ128rmkz VK2WM:$mask, addr:$src)>;
8498
8499  def : Pat<(v4f32 (X86any_VSintToFP (v2i64 (X86VBroadcastld64 addr:$src)))),
8500            (VCVTQQ2PSZ128rmb addr:$src)>;
8501  def : Pat<(X86VMSintToFP (v2i64 (X86VBroadcastld64 addr:$src)),
8502                           (v4f32 VR128X:$src0), VK2WM:$mask),
8503            (VCVTQQ2PSZ128rmbk VR128X:$src0, VK2WM:$mask, addr:$src)>;
8504  def : Pat<(X86VMSintToFP (v2i64 (X86VBroadcastld64 addr:$src)),
8505                           v4f32x_info.ImmAllZerosV, VK2WM:$mask),
8506            (VCVTQQ2PSZ128rmbkz VK2WM:$mask, addr:$src)>;
8507
8508  // Special patterns to allow use of X86VMUintToFP for masking. Instruction
8509  // patterns have been disabled with null_frag.
8510  def : Pat<(v4f32 (X86any_VUintToFP (v2i64 VR128X:$src))),
8511            (VCVTUQQ2PSZ128rr VR128X:$src)>;
8512  def : Pat<(X86VMUintToFP (v2i64 VR128X:$src), (v4f32 VR128X:$src0),
8513                           VK2WM:$mask),
8514            (VCVTUQQ2PSZ128rrk VR128X:$src0, VK2WM:$mask, VR128X:$src)>;
8515  def : Pat<(X86VMUintToFP (v2i64 VR128X:$src), v4f32x_info.ImmAllZerosV,
8516                           VK2WM:$mask),
8517            (VCVTUQQ2PSZ128rrkz VK2WM:$mask, VR128X:$src)>;
8518
8519  def : Pat<(v4f32 (X86any_VUintToFP (loadv2i64 addr:$src))),
8520            (VCVTUQQ2PSZ128rm addr:$src)>;
8521  def : Pat<(X86VMUintToFP (loadv2i64 addr:$src), (v4f32 VR128X:$src0),
8522                           VK2WM:$mask),
8523            (VCVTUQQ2PSZ128rmk VR128X:$src0, VK2WM:$mask, addr:$src)>;
8524  def : Pat<(X86VMUintToFP (loadv2i64 addr:$src), v4f32x_info.ImmAllZerosV,
8525                           VK2WM:$mask),
8526            (VCVTUQQ2PSZ128rmkz VK2WM:$mask, addr:$src)>;
8527
8528  def : Pat<(v4f32 (X86any_VUintToFP (v2i64 (X86VBroadcastld64 addr:$src)))),
8529            (VCVTUQQ2PSZ128rmb addr:$src)>;
8530  def : Pat<(X86VMUintToFP (v2i64 (X86VBroadcastld64 addr:$src)),
8531                           (v4f32 VR128X:$src0), VK2WM:$mask),
8532            (VCVTUQQ2PSZ128rmbk VR128X:$src0, VK2WM:$mask, addr:$src)>;
8533  def : Pat<(X86VMUintToFP (v2i64 (X86VBroadcastld64 addr:$src)),
8534                           v4f32x_info.ImmAllZerosV, VK2WM:$mask),
8535            (VCVTUQQ2PSZ128rmbkz VK2WM:$mask, addr:$src)>;
8536}
8537
8538//===----------------------------------------------------------------------===//
8539// Half precision conversion instructions
8540//===----------------------------------------------------------------------===//
8541
8542let Uses = [MXCSR], mayRaiseFPException = 1 in
8543multiclass avx512_cvtph2ps<X86VectorVTInfo _dest, X86VectorVTInfo _src,
8544                           X86MemOperand x86memop, dag ld_dag,
8545                           X86FoldableSchedWrite sched> {
8546  defm rr : AVX512_maskable_split<0x13, MRMSrcReg, _dest ,(outs _dest.RC:$dst),
8547                            (ins _src.RC:$src), "vcvtph2ps", "$src", "$src",
8548                            (X86any_cvtph2ps (_src.VT _src.RC:$src)),
8549                            (X86cvtph2ps (_src.VT _src.RC:$src))>,
8550                            T8PD, Sched<[sched]>;
8551  defm rm : AVX512_maskable_split<0x13, MRMSrcMem, _dest, (outs _dest.RC:$dst),
8552                            (ins x86memop:$src), "vcvtph2ps", "$src", "$src",
8553                            (X86any_cvtph2ps (_src.VT ld_dag)),
8554                            (X86cvtph2ps (_src.VT ld_dag))>,
8555                            T8PD, Sched<[sched.Folded]>;
8556}
8557
8558multiclass avx512_cvtph2ps_sae<X86VectorVTInfo _dest, X86VectorVTInfo _src,
8559                               X86FoldableSchedWrite sched> {
8560  let Uses = [MXCSR] in
8561  defm rrb : AVX512_maskable<0x13, MRMSrcReg, _dest, (outs _dest.RC:$dst),
8562                             (ins _src.RC:$src), "vcvtph2ps",
8563                             "{sae}, $src", "$src, {sae}",
8564                             (X86cvtph2psSAE (_src.VT _src.RC:$src))>,
8565                             T8PD, EVEX_B, Sched<[sched]>;
8566}
8567
8568let Predicates = [HasAVX512] in
8569  defm VCVTPH2PSZ : avx512_cvtph2ps<v16f32_info, v16i16x_info, f256mem,
8570                                    (load addr:$src), WriteCvtPH2PSZ>,
8571                    avx512_cvtph2ps_sae<v16f32_info, v16i16x_info, WriteCvtPH2PSZ>,
8572                    EVEX, EVEX_V512, EVEX_CD8<32, CD8VH>;
8573
8574let Predicates = [HasVLX] in {
8575  defm VCVTPH2PSZ256 : avx512_cvtph2ps<v8f32x_info, v8i16x_info, f128mem,
8576                       (load addr:$src), WriteCvtPH2PSY>, EVEX, EVEX_V256,
8577                       EVEX_CD8<32, CD8VH>;
8578  defm VCVTPH2PSZ128 : avx512_cvtph2ps<v4f32x_info, v8i16x_info, f64mem,
8579                       (bitconvert (v2i64 (X86vzload64 addr:$src))),
8580                       WriteCvtPH2PS>, EVEX, EVEX_V128,
8581                       EVEX_CD8<32, CD8VH>;
8582
8583  // Pattern match vcvtph2ps of a scalar i64 load.
8584  def : Pat<(v4f32 (X86any_cvtph2ps (v8i16 (bitconvert
8585              (v2i64 (scalar_to_vector (loadi64 addr:$src))))))),
8586            (VCVTPH2PSZ128rm addr:$src)>;
8587}
8588
8589multiclass avx512_cvtps2ph<X86VectorVTInfo _dest, X86VectorVTInfo _src,
8590                           X86MemOperand x86memop, SchedWrite RR, SchedWrite MR> {
8591let ExeDomain = GenericDomain, Uses = [MXCSR], mayRaiseFPException = 1 in {
8592  def rr : AVX512AIi8<0x1D, MRMDestReg, (outs _dest.RC:$dst),
8593             (ins _src.RC:$src1, i32u8imm:$src2),
8594             "vcvtps2ph\t{$src2, $src1, $dst|$dst, $src1, $src2}",
8595             [(set _dest.RC:$dst,
8596                   (X86any_cvtps2ph (_src.VT _src.RC:$src1), (i32 timm:$src2)))]>,
8597             Sched<[RR]>;
8598  let Constraints = "$src0 = $dst" in
8599  def rrk : AVX512AIi8<0x1D, MRMDestReg, (outs _dest.RC:$dst),
8600             (ins _dest.RC:$src0, _src.KRCWM:$mask, _src.RC:$src1, i32u8imm:$src2),
8601             "vcvtps2ph\t{$src2, $src1, $dst {${mask}}|$dst {${mask}}, $src1, $src2}",
8602             [(set _dest.RC:$dst,
8603                   (X86mcvtps2ph (_src.VT _src.RC:$src1), (i32 timm:$src2),
8604                                 _dest.RC:$src0, _src.KRCWM:$mask))]>,
8605             Sched<[RR]>, EVEX_K;
8606  def rrkz : AVX512AIi8<0x1D, MRMDestReg, (outs _dest.RC:$dst),
8607             (ins _src.KRCWM:$mask, _src.RC:$src1, i32u8imm:$src2),
8608             "vcvtps2ph\t{$src2, $src1, $dst {${mask}} {z}|$dst {${mask}} {z}, $src1, $src2}",
8609             [(set _dest.RC:$dst,
8610                   (X86mcvtps2ph (_src.VT _src.RC:$src1), (i32 timm:$src2),
8611                                 _dest.ImmAllZerosV, _src.KRCWM:$mask))]>,
8612             Sched<[RR]>, EVEX_KZ;
8613  let hasSideEffects = 0, mayStore = 1 in {
8614    def mr : AVX512AIi8<0x1D, MRMDestMem, (outs),
8615               (ins x86memop:$dst, _src.RC:$src1, i32u8imm:$src2),
8616               "vcvtps2ph\t{$src2, $src1, $dst|$dst, $src1, $src2}", []>,
8617               Sched<[MR]>;
8618    def mrk : AVX512AIi8<0x1D, MRMDestMem, (outs),
8619               (ins x86memop:$dst, _dest.KRCWM:$mask, _src.RC:$src1, i32u8imm:$src2),
8620               "vcvtps2ph\t{$src2, $src1, $dst {${mask}}|$dst {${mask}}, $src1, $src2}", []>,
8621                EVEX_K, Sched<[MR]>, NotMemoryFoldable;
8622  }
8623}
8624}
8625
8626multiclass avx512_cvtps2ph_sae<X86VectorVTInfo _dest, X86VectorVTInfo _src,
8627                               SchedWrite Sched> {
8628  let hasSideEffects = 0, Uses = [MXCSR] in
8629  defm rrb : AVX512_maskable_in_asm<0x1D, MRMDestReg, _dest,
8630                   (outs _dest.RC:$dst),
8631                   (ins _src.RC:$src1, i32u8imm:$src2),
8632                   "vcvtps2ph", "$src2, {sae}, $src1", "$src1, {sae}, $src2", []>,
8633                   EVEX_B, AVX512AIi8Base, Sched<[Sched]>;
8634}
8635
8636let Predicates = [HasAVX512] in {
8637  defm VCVTPS2PHZ : avx512_cvtps2ph<v16i16x_info, v16f32_info, f256mem,
8638                                    WriteCvtPS2PHZ, WriteCvtPS2PHZSt>,
8639                    avx512_cvtps2ph_sae<v16i16x_info, v16f32_info, WriteCvtPS2PHZ>,
8640                                        EVEX, EVEX_V512, EVEX_CD8<32, CD8VH>;
8641
8642  def : Pat<(store (v16i16 (X86any_cvtps2ph VR512:$src1, timm:$src2)), addr:$dst),
8643            (VCVTPS2PHZmr addr:$dst, VR512:$src1, timm:$src2)>;
8644}
8645
8646let Predicates = [HasVLX] in {
8647  defm VCVTPS2PHZ256 : avx512_cvtps2ph<v8i16x_info, v8f32x_info, f128mem,
8648                                       WriteCvtPS2PHY, WriteCvtPS2PHYSt>,
8649                                       EVEX, EVEX_V256, EVEX_CD8<32, CD8VH>;
8650  defm VCVTPS2PHZ128 : avx512_cvtps2ph<v8i16x_info, v4f32x_info, f64mem,
8651                                       WriteCvtPS2PH, WriteCvtPS2PHSt>,
8652                                       EVEX, EVEX_V128, EVEX_CD8<32, CD8VH>;
8653
8654  def : Pat<(store (f64 (extractelt
8655                         (bc_v2f64 (v8i16 (X86any_cvtps2ph VR128X:$src1, timm:$src2))),
8656                         (iPTR 0))), addr:$dst),
8657            (VCVTPS2PHZ128mr addr:$dst, VR128X:$src1, timm:$src2)>;
8658  def : Pat<(store (i64 (extractelt
8659                         (bc_v2i64 (v8i16 (X86any_cvtps2ph VR128X:$src1, timm:$src2))),
8660                         (iPTR 0))), addr:$dst),
8661            (VCVTPS2PHZ128mr addr:$dst, VR128X:$src1, timm:$src2)>;
8662  def : Pat<(store (v8i16 (X86any_cvtps2ph VR256X:$src1, timm:$src2)), addr:$dst),
8663            (VCVTPS2PHZ256mr addr:$dst, VR256X:$src1, timm:$src2)>;
8664}
8665
8666//  Unordered/Ordered scalar fp compare with Sae and set EFLAGS
8667multiclass avx512_ord_cmp_sae<bits<8> opc, X86VectorVTInfo _,
8668                            string OpcodeStr, Domain d,
8669                            X86FoldableSchedWrite sched = WriteFComX> {
8670  let hasSideEffects = 0, Uses = [MXCSR] in
8671  def rrb: AVX512<opc, MRMSrcReg, (outs), (ins _.RC:$src1, _.RC:$src2),
8672                  !strconcat(OpcodeStr, "\t{{sae}, $src2, $src1|$src1, $src2, {sae}}"), []>,
8673                  EVEX, EVEX_B, VEX_LIG, EVEX_V128, Sched<[sched]>;
8674}
8675
8676let Defs = [EFLAGS], Predicates = [HasAVX512] in {
8677  defm VUCOMISSZ : avx512_ord_cmp_sae<0x2E, v4f32x_info, "vucomiss", SSEPackedSingle>,
8678                                   AVX512PSIi8Base, EVEX_CD8<32, CD8VT1>;
8679  defm VUCOMISDZ : avx512_ord_cmp_sae<0x2E, v2f64x_info, "vucomisd", SSEPackedDouble>,
8680                                   AVX512PDIi8Base, VEX_W, EVEX_CD8<64, CD8VT1>;
8681  defm VCOMISSZ : avx512_ord_cmp_sae<0x2F, v4f32x_info, "vcomiss", SSEPackedSingle>,
8682                                   AVX512PSIi8Base, EVEX_CD8<32, CD8VT1>;
8683  defm VCOMISDZ : avx512_ord_cmp_sae<0x2F, v2f64x_info, "vcomisd", SSEPackedDouble>,
8684                                   AVX512PDIi8Base, VEX_W, EVEX_CD8<64, CD8VT1>;
8685}
8686
8687let Defs = [EFLAGS], Predicates = [HasAVX512] in {
8688  defm VUCOMISSZ : sse12_ord_cmp<0x2E, FR32X, X86any_fcmp, f32, f32mem, loadf32,
8689                                 "ucomiss", SSEPackedSingle>, PS, EVEX, VEX_LIG,
8690                                 EVEX_CD8<32, CD8VT1>;
8691  defm VUCOMISDZ : sse12_ord_cmp<0x2E, FR64X, X86any_fcmp, f64, f64mem, loadf64,
8692                                  "ucomisd", SSEPackedDouble>, PD, EVEX,
8693                                  VEX_LIG, VEX_W, EVEX_CD8<64, CD8VT1>;
8694  defm VCOMISSZ  : sse12_ord_cmp<0x2F, FR32X, X86strict_fcmps, f32, f32mem, loadf32,
8695                                 "comiss", SSEPackedSingle>, PS, EVEX, VEX_LIG,
8696                                 EVEX_CD8<32, CD8VT1>;
8697  defm VCOMISDZ  : sse12_ord_cmp<0x2F, FR64X, X86strict_fcmps, f64, f64mem, loadf64,
8698                                 "comisd", SSEPackedDouble>, PD, EVEX,
8699                                  VEX_LIG, VEX_W, EVEX_CD8<64, CD8VT1>;
8700  let isCodeGenOnly = 1 in {
8701    defm VUCOMISSZ  : sse12_ord_cmp_int<0x2E, VR128X, X86ucomi, v4f32, ssmem,
8702                          sse_load_f32, "ucomiss", SSEPackedSingle>, PS, EVEX, VEX_LIG,
8703                          EVEX_CD8<32, CD8VT1>;
8704    defm VUCOMISDZ  : sse12_ord_cmp_int<0x2E, VR128X, X86ucomi, v2f64, sdmem,
8705                          sse_load_f64, "ucomisd", SSEPackedDouble>, PD, EVEX,
8706                          VEX_LIG, VEX_W, EVEX_CD8<64, CD8VT1>;
8707
8708    defm VCOMISSZ  : sse12_ord_cmp_int<0x2F, VR128X, X86comi, v4f32, ssmem,
8709                          sse_load_f32, "comiss", SSEPackedSingle>, PS, EVEX, VEX_LIG,
8710                          EVEX_CD8<32, CD8VT1>;
8711    defm VCOMISDZ  : sse12_ord_cmp_int<0x2F, VR128X, X86comi, v2f64, sdmem,
8712                          sse_load_f64, "comisd", SSEPackedDouble>, PD, EVEX,
8713                          VEX_LIG, VEX_W, EVEX_CD8<64, CD8VT1>;
8714  }
8715}
8716
8717/// avx512_fp14_s rcp14ss, rcp14sd, rsqrt14ss, rsqrt14sd
8718multiclass avx512_fp14_s<bits<8> opc, string OpcodeStr, SDNode OpNode,
8719                         X86FoldableSchedWrite sched, X86VectorVTInfo _> {
8720  let Predicates = [HasAVX512], ExeDomain = _.ExeDomain, Uses = [MXCSR] in {
8721  defm rr : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
8722                           (ins _.RC:$src1, _.RC:$src2), OpcodeStr,
8723                           "$src2, $src1", "$src1, $src2",
8724                           (OpNode (_.VT _.RC:$src1), (_.VT _.RC:$src2))>,
8725                           EVEX_4V, VEX_LIG, Sched<[sched]>;
8726  defm rm : AVX512_maskable_scalar<opc, MRMSrcMem, _, (outs _.RC:$dst),
8727                         (ins _.RC:$src1, _.IntScalarMemOp:$src2), OpcodeStr,
8728                         "$src2, $src1", "$src1, $src2",
8729                         (OpNode (_.VT _.RC:$src1),
8730                          (_.ScalarIntMemFrags addr:$src2))>, EVEX_4V, VEX_LIG,
8731                          Sched<[sched.Folded, sched.ReadAfterFold]>;
8732}
8733}
8734
8735defm VRCP14SSZ : avx512_fp14_s<0x4D, "vrcp14ss", X86rcp14s, SchedWriteFRcp.Scl,
8736                               f32x_info>, EVEX_CD8<32, CD8VT1>,
8737                               T8PD;
8738defm VRCP14SDZ : avx512_fp14_s<0x4D, "vrcp14sd", X86rcp14s, SchedWriteFRcp.Scl,
8739                               f64x_info>, VEX_W, EVEX_CD8<64, CD8VT1>,
8740                               T8PD;
8741defm VRSQRT14SSZ : avx512_fp14_s<0x4F, "vrsqrt14ss", X86rsqrt14s,
8742                                 SchedWriteFRsqrt.Scl, f32x_info>,
8743                                 EVEX_CD8<32, CD8VT1>, T8PD;
8744defm VRSQRT14SDZ : avx512_fp14_s<0x4F, "vrsqrt14sd", X86rsqrt14s,
8745                                 SchedWriteFRsqrt.Scl, f64x_info>, VEX_W,
8746                                 EVEX_CD8<64, CD8VT1>, T8PD;
8747
8748/// avx512_fp14_p rcp14ps, rcp14pd, rsqrt14ps, rsqrt14pd
8749multiclass avx512_fp14_p<bits<8> opc, string OpcodeStr, SDNode OpNode,
8750                         X86FoldableSchedWrite sched, X86VectorVTInfo _> {
8751  let ExeDomain = _.ExeDomain in {
8752  defm r: AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
8753                         (ins _.RC:$src), OpcodeStr, "$src", "$src",
8754                         (_.VT (OpNode _.RC:$src))>, EVEX, T8PD,
8755                         Sched<[sched]>;
8756  defm m: AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
8757                         (ins _.MemOp:$src), OpcodeStr, "$src", "$src",
8758                         (OpNode (_.VT
8759                           (bitconvert (_.LdFrag addr:$src))))>, EVEX, T8PD,
8760                         Sched<[sched.Folded, sched.ReadAfterFold]>;
8761  defm mb: AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
8762                          (ins _.ScalarMemOp:$src), OpcodeStr,
8763                          "${src}"#_.BroadcastStr, "${src}"#_.BroadcastStr,
8764                          (OpNode (_.VT
8765                            (_.BroadcastLdFrag addr:$src)))>,
8766                          EVEX, T8PD, EVEX_B, Sched<[sched.Folded, sched.ReadAfterFold]>;
8767  }
8768}
8769
8770let Uses = [MXCSR] in
8771multiclass avx512_fp14_p_vl_all<bits<8> opc, string OpcodeStr, SDNode OpNode,
8772                                X86SchedWriteWidths sched> {
8773  defm PSZ : avx512_fp14_p<opc, !strconcat(OpcodeStr, "ps"), OpNode, sched.ZMM,
8774                           v16f32_info>, EVEX_V512, EVEX_CD8<32, CD8VF>;
8775  defm PDZ : avx512_fp14_p<opc, !strconcat(OpcodeStr, "pd"), OpNode, sched.ZMM,
8776                           v8f64_info>, EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>;
8777
8778  // Define only if AVX512VL feature is present.
8779  let Predicates = [HasVLX] in {
8780    defm PSZ128 : avx512_fp14_p<opc, !strconcat(OpcodeStr, "ps"),
8781                                OpNode, sched.XMM, v4f32x_info>,
8782                               EVEX_V128, EVEX_CD8<32, CD8VF>;
8783    defm PSZ256 : avx512_fp14_p<opc, !strconcat(OpcodeStr, "ps"),
8784                                OpNode, sched.YMM, v8f32x_info>,
8785                               EVEX_V256, EVEX_CD8<32, CD8VF>;
8786    defm PDZ128 : avx512_fp14_p<opc, !strconcat(OpcodeStr, "pd"),
8787                                OpNode, sched.XMM, v2f64x_info>,
8788                               EVEX_V128, VEX_W, EVEX_CD8<64, CD8VF>;
8789    defm PDZ256 : avx512_fp14_p<opc, !strconcat(OpcodeStr, "pd"),
8790                                OpNode, sched.YMM, v4f64x_info>,
8791                               EVEX_V256, VEX_W, EVEX_CD8<64, CD8VF>;
8792  }
8793}
8794
8795defm VRSQRT14 : avx512_fp14_p_vl_all<0x4E, "vrsqrt14", X86rsqrt14, SchedWriteFRsqrt>;
8796defm VRCP14 : avx512_fp14_p_vl_all<0x4C, "vrcp14", X86rcp14, SchedWriteFRcp>;
8797
8798/// avx512_fp28_s rcp28ss, rcp28sd, rsqrt28ss, rsqrt28sd
8799multiclass avx512_fp28_s<bits<8> opc, string OpcodeStr,X86VectorVTInfo _,
8800                         SDNode OpNode, SDNode OpNodeSAE,
8801                         X86FoldableSchedWrite sched> {
8802  let ExeDomain = _.ExeDomain, Uses = [MXCSR] in {
8803  defm r : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
8804                           (ins _.RC:$src1, _.RC:$src2), OpcodeStr,
8805                           "$src2, $src1", "$src1, $src2",
8806                           (OpNode (_.VT _.RC:$src1), (_.VT _.RC:$src2))>,
8807                           Sched<[sched]>, SIMD_EXC;
8808
8809  defm rb : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
8810                            (ins _.RC:$src1, _.RC:$src2), OpcodeStr,
8811                            "{sae}, $src2, $src1", "$src1, $src2, {sae}",
8812                            (OpNodeSAE (_.VT _.RC:$src1), (_.VT _.RC:$src2))>,
8813                            EVEX_B, Sched<[sched]>;
8814
8815  defm m : AVX512_maskable_scalar<opc, MRMSrcMem, _, (outs _.RC:$dst),
8816                         (ins _.RC:$src1, _.IntScalarMemOp:$src2), OpcodeStr,
8817                         "$src2, $src1", "$src1, $src2",
8818                         (OpNode (_.VT _.RC:$src1), (_.ScalarIntMemFrags addr:$src2))>,
8819                         Sched<[sched.Folded, sched.ReadAfterFold]>, SIMD_EXC;
8820  }
8821}
8822
8823multiclass avx512_eri_s<bits<8> opc, string OpcodeStr, SDNode OpNode,
8824                        SDNode OpNodeSAE, X86FoldableSchedWrite sched> {
8825  defm SSZ : avx512_fp28_s<opc, OpcodeStr#"ss", f32x_info, OpNode, OpNodeSAE,
8826                           sched>, EVEX_CD8<32, CD8VT1>, VEX_LIG;
8827  defm SDZ : avx512_fp28_s<opc, OpcodeStr#"sd", f64x_info, OpNode, OpNodeSAE,
8828                           sched>, EVEX_CD8<64, CD8VT1>, VEX_LIG, VEX_W;
8829}
8830
8831let Predicates = [HasERI] in {
8832  defm VRCP28   : avx512_eri_s<0xCB, "vrcp28", X86rcp28s, X86rcp28SAEs,
8833                               SchedWriteFRcp.Scl>, T8PD, EVEX_4V;
8834  defm VRSQRT28 : avx512_eri_s<0xCD, "vrsqrt28", X86rsqrt28s, X86rsqrt28SAEs,
8835                               SchedWriteFRsqrt.Scl>, T8PD, EVEX_4V;
8836}
8837
8838defm VGETEXP   : avx512_eri_s<0x43, "vgetexp", X86fgetexps, X86fgetexpSAEs,
8839                              SchedWriteFRnd.Scl>, T8PD, EVEX_4V;
8840/// avx512_fp28_p rcp28ps, rcp28pd, rsqrt28ps, rsqrt28pd
8841
8842multiclass avx512_fp28_p<bits<8> opc, string OpcodeStr, X86VectorVTInfo _,
8843                         SDNode OpNode, X86FoldableSchedWrite sched> {
8844  let ExeDomain = _.ExeDomain, Uses = [MXCSR], mayRaiseFPException = 1 in {
8845  defm r : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
8846                         (ins _.RC:$src), OpcodeStr, "$src", "$src",
8847                         (OpNode (_.VT _.RC:$src))>,
8848                         Sched<[sched]>;
8849
8850  defm m : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
8851                         (ins _.MemOp:$src), OpcodeStr, "$src", "$src",
8852                         (OpNode (_.VT
8853                             (bitconvert (_.LdFrag addr:$src))))>,
8854                          Sched<[sched.Folded, sched.ReadAfterFold]>;
8855
8856  defm mb : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
8857                         (ins _.ScalarMemOp:$src), OpcodeStr,
8858                         "${src}"#_.BroadcastStr, "${src}"#_.BroadcastStr,
8859                         (OpNode (_.VT
8860                                  (_.BroadcastLdFrag addr:$src)))>,
8861                         EVEX_B, Sched<[sched.Folded, sched.ReadAfterFold]>;
8862  }
8863}
8864multiclass avx512_fp28_p_sae<bits<8> opc, string OpcodeStr, X86VectorVTInfo _,
8865                         SDNode OpNode, X86FoldableSchedWrite sched> {
8866  let ExeDomain = _.ExeDomain, Uses = [MXCSR] in
8867  defm rb : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
8868                        (ins _.RC:$src), OpcodeStr,
8869                        "{sae}, $src", "$src, {sae}",
8870                        (OpNode (_.VT _.RC:$src))>,
8871                        EVEX_B, Sched<[sched]>;
8872}
8873
8874multiclass  avx512_eri<bits<8> opc, string OpcodeStr, SDNode OpNode,
8875                       SDNode OpNodeSAE, X86SchedWriteWidths sched> {
8876   defm PSZ : avx512_fp28_p<opc, OpcodeStr#"ps", v16f32_info, OpNode, sched.ZMM>,
8877              avx512_fp28_p_sae<opc, OpcodeStr#"ps", v16f32_info, OpNodeSAE, sched.ZMM>,
8878              T8PD, EVEX_V512, EVEX_CD8<32, CD8VF>;
8879   defm PDZ : avx512_fp28_p<opc, OpcodeStr#"pd", v8f64_info, OpNode, sched.ZMM>,
8880              avx512_fp28_p_sae<opc, OpcodeStr#"pd", v8f64_info, OpNodeSAE, sched.ZMM>,
8881              T8PD, EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>;
8882}
8883
8884multiclass avx512_fp_unaryop_packed<bits<8> opc, string OpcodeStr,
8885                                  SDNode OpNode, X86SchedWriteWidths sched> {
8886  // Define only if AVX512VL feature is present.
8887  let Predicates = [HasVLX] in {
8888    defm PSZ128 : avx512_fp28_p<opc, OpcodeStr#"ps", v4f32x_info, OpNode,
8889                                sched.XMM>,
8890                                EVEX_V128, T8PD, EVEX_CD8<32, CD8VF>;
8891    defm PSZ256 : avx512_fp28_p<opc, OpcodeStr#"ps", v8f32x_info, OpNode,
8892                                sched.YMM>,
8893                                EVEX_V256, T8PD, EVEX_CD8<32, CD8VF>;
8894    defm PDZ128 : avx512_fp28_p<opc, OpcodeStr#"pd", v2f64x_info, OpNode,
8895                                sched.XMM>,
8896                                EVEX_V128, VEX_W, T8PD, EVEX_CD8<64, CD8VF>;
8897    defm PDZ256 : avx512_fp28_p<opc, OpcodeStr#"pd", v4f64x_info, OpNode,
8898                                sched.YMM>,
8899                                EVEX_V256, VEX_W, T8PD, EVEX_CD8<64, CD8VF>;
8900  }
8901}
8902
8903let Predicates = [HasERI] in {
8904 defm VRSQRT28 : avx512_eri<0xCC, "vrsqrt28", X86rsqrt28, X86rsqrt28SAE,
8905                            SchedWriteFRsqrt>, EVEX;
8906 defm VRCP28   : avx512_eri<0xCA, "vrcp28", X86rcp28, X86rcp28SAE,
8907                            SchedWriteFRcp>, EVEX;
8908 defm VEXP2    : avx512_eri<0xC8, "vexp2", X86exp2, X86exp2SAE,
8909                            SchedWriteFAdd>, EVEX;
8910}
8911defm VGETEXP   : avx512_eri<0x42, "vgetexp", X86fgetexp, X86fgetexpSAE,
8912                            SchedWriteFRnd>,
8913                 avx512_fp_unaryop_packed<0x42, "vgetexp", X86fgetexp,
8914                                          SchedWriteFRnd>, EVEX;
8915
8916multiclass avx512_sqrt_packed_round<bits<8> opc, string OpcodeStr,
8917                                    X86FoldableSchedWrite sched, X86VectorVTInfo _>{
8918  let ExeDomain = _.ExeDomain in
8919  defm rb: AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
8920                         (ins _.RC:$src, AVX512RC:$rc), OpcodeStr, "$rc, $src", "$src, $rc",
8921                         (_.VT (X86fsqrtRnd _.RC:$src, (i32 timm:$rc)))>,
8922                         EVEX, EVEX_B, EVEX_RC, Sched<[sched]>;
8923}
8924
8925multiclass avx512_sqrt_packed<bits<8> opc, string OpcodeStr,
8926                              X86FoldableSchedWrite sched, X86VectorVTInfo _>{
8927  let ExeDomain = _.ExeDomain, Uses = [MXCSR], mayRaiseFPException = 1 in {
8928  defm r: AVX512_maskable_split<opc, MRMSrcReg, _, (outs _.RC:$dst),
8929                         (ins _.RC:$src), OpcodeStr, "$src", "$src",
8930                         (_.VT (any_fsqrt _.RC:$src)),
8931                         (_.VT (fsqrt _.RC:$src))>, EVEX,
8932                         Sched<[sched]>;
8933  defm m: AVX512_maskable_split<opc, MRMSrcMem, _, (outs _.RC:$dst),
8934                         (ins _.MemOp:$src), OpcodeStr, "$src", "$src",
8935                         (any_fsqrt (_.VT (_.LdFrag addr:$src))),
8936                         (fsqrt (_.VT (_.LdFrag addr:$src)))>, EVEX,
8937                         Sched<[sched.Folded, sched.ReadAfterFold]>;
8938  defm mb: AVX512_maskable_split<opc, MRMSrcMem, _, (outs _.RC:$dst),
8939                          (ins _.ScalarMemOp:$src), OpcodeStr,
8940                          "${src}"#_.BroadcastStr, "${src}"#_.BroadcastStr,
8941                          (any_fsqrt (_.VT (_.BroadcastLdFrag addr:$src))),
8942                          (fsqrt (_.VT (_.BroadcastLdFrag addr:$src)))>,
8943                          EVEX, EVEX_B, Sched<[sched.Folded, sched.ReadAfterFold]>;
8944  }
8945}
8946
8947let Uses = [MXCSR], mayRaiseFPException = 1 in
8948multiclass avx512_sqrt_packed_all<bits<8> opc, string OpcodeStr,
8949                                  X86SchedWriteSizes sched> {
8950  defm PSZ : avx512_sqrt_packed<opc, !strconcat(OpcodeStr, "ps"),
8951                                sched.PS.ZMM, v16f32_info>,
8952                                EVEX_V512, PS, EVEX_CD8<32, CD8VF>;
8953  defm PDZ : avx512_sqrt_packed<opc, !strconcat(OpcodeStr, "pd"),
8954                                sched.PD.ZMM, v8f64_info>,
8955                                EVEX_V512, VEX_W, PD, EVEX_CD8<64, CD8VF>;
8956  // Define only if AVX512VL feature is present.
8957  let Predicates = [HasVLX] in {
8958    defm PSZ128 : avx512_sqrt_packed<opc, !strconcat(OpcodeStr, "ps"),
8959                                     sched.PS.XMM, v4f32x_info>,
8960                                     EVEX_V128, PS, EVEX_CD8<32, CD8VF>;
8961    defm PSZ256 : avx512_sqrt_packed<opc, !strconcat(OpcodeStr, "ps"),
8962                                     sched.PS.YMM, v8f32x_info>,
8963                                     EVEX_V256, PS, EVEX_CD8<32, CD8VF>;
8964    defm PDZ128 : avx512_sqrt_packed<opc, !strconcat(OpcodeStr, "pd"),
8965                                     sched.PD.XMM, v2f64x_info>,
8966                                     EVEX_V128, VEX_W, PD, EVEX_CD8<64, CD8VF>;
8967    defm PDZ256 : avx512_sqrt_packed<opc, !strconcat(OpcodeStr, "pd"),
8968                                     sched.PD.YMM, v4f64x_info>,
8969                                     EVEX_V256, VEX_W, PD, EVEX_CD8<64, CD8VF>;
8970  }
8971}
8972
8973let Uses = [MXCSR] in
8974multiclass avx512_sqrt_packed_all_round<bits<8> opc, string OpcodeStr,
8975                                        X86SchedWriteSizes sched> {
8976  defm PSZ : avx512_sqrt_packed_round<opc, !strconcat(OpcodeStr, "ps"),
8977                                      sched.PS.ZMM, v16f32_info>,
8978                                      EVEX_V512, PS, EVEX_CD8<32, CD8VF>;
8979  defm PDZ : avx512_sqrt_packed_round<opc, !strconcat(OpcodeStr, "pd"),
8980                                      sched.PD.ZMM, v8f64_info>,
8981                                      EVEX_V512, VEX_W, PD, EVEX_CD8<64, CD8VF>;
8982}
8983
8984multiclass avx512_sqrt_scalar<bits<8> opc, string OpcodeStr, X86FoldableSchedWrite sched,
8985                              X86VectorVTInfo _, string Name> {
8986  let ExeDomain = _.ExeDomain in {
8987    defm r_Int : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
8988                         (ins _.RC:$src1, _.RC:$src2), OpcodeStr,
8989                         "$src2, $src1", "$src1, $src2",
8990                         (X86fsqrts (_.VT _.RC:$src1),
8991                                    (_.VT _.RC:$src2))>,
8992                         Sched<[sched]>, SIMD_EXC;
8993    defm m_Int : AVX512_maskable_scalar<opc, MRMSrcMem, _, (outs _.RC:$dst),
8994                         (ins _.RC:$src1, _.IntScalarMemOp:$src2), OpcodeStr,
8995                         "$src2, $src1", "$src1, $src2",
8996                         (X86fsqrts (_.VT _.RC:$src1),
8997                                    (_.ScalarIntMemFrags addr:$src2))>,
8998                         Sched<[sched.Folded, sched.ReadAfterFold]>, SIMD_EXC;
8999    let Uses = [MXCSR] in
9000    defm rb_Int : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
9001                         (ins _.RC:$src1, _.RC:$src2, AVX512RC:$rc), OpcodeStr,
9002                         "$rc, $src2, $src1", "$src1, $src2, $rc",
9003                         (X86fsqrtRnds (_.VT _.RC:$src1),
9004                                     (_.VT _.RC:$src2),
9005                                     (i32 timm:$rc))>,
9006                         EVEX_B, EVEX_RC, Sched<[sched]>;
9007
9008    let isCodeGenOnly = 1, hasSideEffects = 0, Predicates=[HasAVX512] in {
9009      def r : I<opc, MRMSrcReg, (outs _.FRC:$dst),
9010                (ins _.FRC:$src1, _.FRC:$src2),
9011                OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}", []>,
9012                Sched<[sched]>, SIMD_EXC;
9013      let mayLoad = 1 in
9014        def m : I<opc, MRMSrcMem, (outs _.FRC:$dst),
9015                  (ins _.FRC:$src1, _.ScalarMemOp:$src2),
9016                  OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}", []>,
9017                  Sched<[sched.Folded, sched.ReadAfterFold]>, SIMD_EXC;
9018    }
9019  }
9020
9021  let Predicates = [HasAVX512] in {
9022    def : Pat<(_.EltVT (any_fsqrt _.FRC:$src)),
9023              (!cast<Instruction>(Name#Zr)
9024                  (_.EltVT (IMPLICIT_DEF)), _.FRC:$src)>;
9025  }
9026
9027  let Predicates = [HasAVX512, OptForSize] in {
9028    def : Pat<(_.EltVT (any_fsqrt (load addr:$src))),
9029              (!cast<Instruction>(Name#Zm)
9030                  (_.EltVT (IMPLICIT_DEF)), addr:$src)>;
9031  }
9032}
9033
9034multiclass avx512_sqrt_scalar_all<bits<8> opc, string OpcodeStr,
9035                                  X86SchedWriteSizes sched> {
9036  defm SSZ : avx512_sqrt_scalar<opc, OpcodeStr#"ss", sched.PS.Scl, f32x_info, NAME#"SS">,
9037                        EVEX_CD8<32, CD8VT1>, EVEX_4V, XS;
9038  defm SDZ : avx512_sqrt_scalar<opc, OpcodeStr#"sd", sched.PD.Scl, f64x_info, NAME#"SD">,
9039                        EVEX_CD8<64, CD8VT1>, EVEX_4V, XD, VEX_W;
9040}
9041
9042defm VSQRT : avx512_sqrt_packed_all<0x51, "vsqrt", SchedWriteFSqrtSizes>,
9043             avx512_sqrt_packed_all_round<0x51, "vsqrt", SchedWriteFSqrtSizes>;
9044
9045defm VSQRT : avx512_sqrt_scalar_all<0x51, "vsqrt", SchedWriteFSqrtSizes>, VEX_LIG;
9046
9047multiclass avx512_rndscale_scalar<bits<8> opc, string OpcodeStr,
9048                                  X86FoldableSchedWrite sched, X86VectorVTInfo _> {
9049  let ExeDomain = _.ExeDomain in {
9050  defm r_Int : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
9051                           (ins _.RC:$src1, _.RC:$src2, i32u8imm:$src3), OpcodeStr,
9052                           "$src3, $src2, $src1", "$src1, $src2, $src3",
9053                           (_.VT (X86RndScales (_.VT _.RC:$src1), (_.VT _.RC:$src2),
9054                           (i32 timm:$src3)))>,
9055                           Sched<[sched]>, SIMD_EXC;
9056
9057  let Uses = [MXCSR] in
9058  defm rb_Int : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
9059                         (ins _.RC:$src1, _.RC:$src2, i32u8imm:$src3), OpcodeStr,
9060                         "$src3, {sae}, $src2, $src1", "$src1, $src2, {sae}, $src3",
9061                         (_.VT (X86RndScalesSAE (_.VT _.RC:$src1), (_.VT _.RC:$src2),
9062                         (i32 timm:$src3)))>, EVEX_B,
9063                         Sched<[sched]>;
9064
9065  defm m_Int : AVX512_maskable_scalar<opc, MRMSrcMem, _, (outs _.RC:$dst),
9066                         (ins _.RC:$src1, _.IntScalarMemOp:$src2, i32u8imm:$src3),
9067                         OpcodeStr,
9068                         "$src3, $src2, $src1", "$src1, $src2, $src3",
9069                         (_.VT (X86RndScales _.RC:$src1,
9070                                (_.ScalarIntMemFrags addr:$src2), (i32 timm:$src3)))>,
9071                         Sched<[sched.Folded, sched.ReadAfterFold]>, SIMD_EXC;
9072
9073  let isCodeGenOnly = 1, hasSideEffects = 0, Predicates = [HasAVX512] in {
9074    def r : I<opc, MRMSrcReg, (outs _.FRC:$dst),
9075               (ins _.FRC:$src1, _.FRC:$src2, i32u8imm:$src3),
9076               OpcodeStr#"\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}",
9077               []>, Sched<[sched]>, SIMD_EXC;
9078
9079    let mayLoad = 1 in
9080      def m : I<opc, MRMSrcMem, (outs _.FRC:$dst),
9081                 (ins _.FRC:$src1, _.ScalarMemOp:$src2, i32u8imm:$src3),
9082                 OpcodeStr#"\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}",
9083                 []>, Sched<[sched.Folded, sched.ReadAfterFold]>, SIMD_EXC;
9084  }
9085  }
9086
9087  let Predicates = [HasAVX512] in {
9088    def : Pat<(X86any_VRndScale _.FRC:$src1, timm:$src2),
9089              (_.EltVT (!cast<Instruction>(NAME#r) (_.EltVT (IMPLICIT_DEF)),
9090               _.FRC:$src1, timm:$src2))>;
9091  }
9092
9093  let Predicates = [HasAVX512, OptForSize] in {
9094    def : Pat<(X86any_VRndScale (_.ScalarLdFrag addr:$src1), timm:$src2),
9095              (_.EltVT (!cast<Instruction>(NAME#m) (_.EltVT (IMPLICIT_DEF)),
9096               addr:$src1, timm:$src2))>;
9097  }
9098}
9099
9100defm VRNDSCALESSZ : avx512_rndscale_scalar<0x0A, "vrndscaless",
9101                                           SchedWriteFRnd.Scl, f32x_info>,
9102                                           AVX512AIi8Base, EVEX_4V, VEX_LIG,
9103                                           EVEX_CD8<32, CD8VT1>;
9104
9105defm VRNDSCALESDZ : avx512_rndscale_scalar<0x0B, "vrndscalesd",
9106                                           SchedWriteFRnd.Scl, f64x_info>,
9107                                           VEX_W, AVX512AIi8Base, EVEX_4V, VEX_LIG,
9108                                           EVEX_CD8<64, CD8VT1>;
9109
9110multiclass avx512_masked_scalar<SDNode OpNode, string OpcPrefix, SDNode Move,
9111                                dag Mask, X86VectorVTInfo _, PatLeaf ZeroFP,
9112                                dag OutMask, Predicate BasePredicate> {
9113  let Predicates = [BasePredicate] in {
9114    def : Pat<(Move _.VT:$src1, (scalar_to_vector (X86selects_mask Mask,
9115               (OpNode (extractelt _.VT:$src2, (iPTR 0))),
9116               (extractelt _.VT:$dst, (iPTR 0))))),
9117              (!cast<Instruction>("V"#OpcPrefix#r_Intk)
9118               _.VT:$dst, OutMask, _.VT:$src2, _.VT:$src1)>;
9119
9120    def : Pat<(Move _.VT:$src1, (scalar_to_vector (X86selects_mask Mask,
9121               (OpNode (extractelt _.VT:$src2, (iPTR 0))),
9122               ZeroFP))),
9123              (!cast<Instruction>("V"#OpcPrefix#r_Intkz)
9124               OutMask, _.VT:$src2, _.VT:$src1)>;
9125  }
9126}
9127
9128defm : avx512_masked_scalar<fsqrt, "SQRTSSZ", X86Movss,
9129                            (v1i1 (scalar_to_vector (i8 (trunc (i32 GR32:$mask))))), v4f32x_info,
9130                            fp32imm0, (COPY_TO_REGCLASS  $mask, VK1WM), HasAVX512>;
9131defm : avx512_masked_scalar<fsqrt, "SQRTSDZ", X86Movsd,
9132                            (v1i1 (scalar_to_vector (i8 (trunc (i32 GR32:$mask))))), v2f64x_info,
9133                            fp64imm0, (COPY_TO_REGCLASS  $mask, VK1WM), HasAVX512>;
9134
9135
9136//-------------------------------------------------
9137// Integer truncate and extend operations
9138//-------------------------------------------------
9139
9140// PatFrags that contain a select and a truncate op. The take operands in the
9141// same order as X86vmtrunc, X86vmtruncs, X86vmtruncus. This allows us to pass
9142// either to the multiclasses.
9143def select_trunc : PatFrag<(ops node:$src, node:$src0, node:$mask),
9144                           (vselect_mask node:$mask,
9145                                         (trunc node:$src), node:$src0)>;
9146def select_truncs : PatFrag<(ops node:$src, node:$src0, node:$mask),
9147                            (vselect_mask node:$mask,
9148                                          (X86vtruncs node:$src), node:$src0)>;
9149def select_truncus : PatFrag<(ops node:$src, node:$src0, node:$mask),
9150                             (vselect_mask node:$mask,
9151                                           (X86vtruncus node:$src), node:$src0)>;
9152
9153multiclass avx512_trunc_common<bits<8> opc, string OpcodeStr, SDNode OpNode,
9154                              SDPatternOperator MaskNode,
9155                              X86FoldableSchedWrite sched, X86VectorVTInfo SrcInfo,
9156                              X86VectorVTInfo DestInfo, X86MemOperand x86memop> {
9157  let ExeDomain = DestInfo.ExeDomain in {
9158  def rr : AVX512XS8I<opc, MRMDestReg, (outs DestInfo.RC:$dst),
9159             (ins SrcInfo.RC:$src),
9160             OpcodeStr # "\t{$src, $dst|$dst, $src}",
9161             [(set DestInfo.RC:$dst,
9162                   (DestInfo.VT (OpNode (SrcInfo.VT SrcInfo.RC:$src))))]>,
9163             EVEX, Sched<[sched]>;
9164  let Constraints = "$src0 = $dst" in
9165  def rrk : AVX512XS8I<opc, MRMDestReg, (outs DestInfo.RC:$dst),
9166             (ins DestInfo.RC:$src0, SrcInfo.KRCWM:$mask, SrcInfo.RC:$src),
9167             OpcodeStr # "\t{$src, $dst {${mask}}|$dst {${mask}}, $src}",
9168             [(set DestInfo.RC:$dst,
9169                   (MaskNode (SrcInfo.VT SrcInfo.RC:$src),
9170                             (DestInfo.VT DestInfo.RC:$src0),
9171                             SrcInfo.KRCWM:$mask))]>,
9172             EVEX, EVEX_K, Sched<[sched]>;
9173  def rrkz : AVX512XS8I<opc, MRMDestReg, (outs DestInfo.RC:$dst),
9174             (ins SrcInfo.KRCWM:$mask, SrcInfo.RC:$src),
9175             OpcodeStr # "\t{$src, $dst {${mask}} {z}|$dst {${mask}} {z}, $src}",
9176             [(set DestInfo.RC:$dst,
9177                   (DestInfo.VT (MaskNode (SrcInfo.VT SrcInfo.RC:$src),
9178                             DestInfo.ImmAllZerosV, SrcInfo.KRCWM:$mask)))]>,
9179             EVEX, EVEX_KZ, Sched<[sched]>;
9180  }
9181
9182  let mayStore = 1, hasSideEffects = 0, ExeDomain = DestInfo.ExeDomain in {
9183    def mr : AVX512XS8I<opc, MRMDestMem, (outs),
9184               (ins x86memop:$dst, SrcInfo.RC:$src),
9185               OpcodeStr # "\t{$src, $dst|$dst, $src}", []>,
9186               EVEX, Sched<[sched.Folded]>;
9187
9188    def mrk : AVX512XS8I<opc, MRMDestMem, (outs),
9189               (ins x86memop:$dst, SrcInfo.KRCWM:$mask, SrcInfo.RC:$src),
9190               OpcodeStr # "\t{$src, $dst {${mask}}|$dst {${mask}}, $src}", []>,
9191               EVEX, EVEX_K, Sched<[sched.Folded]>, NotMemoryFoldable;
9192  }//mayStore = 1, hasSideEffects = 0
9193}
9194
9195multiclass avx512_trunc_mr_lowering<X86VectorVTInfo SrcInfo,
9196                                    X86VectorVTInfo DestInfo,
9197                                    PatFrag truncFrag, PatFrag mtruncFrag,
9198                                    string Name> {
9199
9200  def : Pat<(truncFrag (SrcInfo.VT SrcInfo.RC:$src), addr:$dst),
9201            (!cast<Instruction>(Name#SrcInfo.ZSuffix#mr)
9202                                    addr:$dst, SrcInfo.RC:$src)>;
9203
9204  def : Pat<(mtruncFrag (SrcInfo.VT SrcInfo.RC:$src), addr:$dst,
9205                        SrcInfo.KRCWM:$mask),
9206            (!cast<Instruction>(Name#SrcInfo.ZSuffix#mrk)
9207                            addr:$dst, SrcInfo.KRCWM:$mask, SrcInfo.RC:$src)>;
9208}
9209
9210multiclass avx512_trunc<bits<8> opc, string OpcodeStr, SDNode OpNode128,
9211                        SDNode OpNode256, SDNode OpNode512,
9212                        SDPatternOperator MaskNode128,
9213                        SDPatternOperator MaskNode256,
9214                        SDPatternOperator MaskNode512,
9215                        X86FoldableSchedWrite sched,
9216                        AVX512VLVectorVTInfo VTSrcInfo,
9217                        X86VectorVTInfo DestInfoZ128,
9218                        X86VectorVTInfo DestInfoZ256, X86VectorVTInfo DestInfoZ,
9219                        X86MemOperand x86memopZ128, X86MemOperand x86memopZ256,
9220                        X86MemOperand x86memopZ, PatFrag truncFrag,
9221                        PatFrag mtruncFrag, Predicate prd = HasAVX512>{
9222
9223  let Predicates = [HasVLX, prd] in {
9224    defm Z128:  avx512_trunc_common<opc, OpcodeStr, OpNode128, MaskNode128, sched,
9225                             VTSrcInfo.info128, DestInfoZ128, x86memopZ128>,
9226                avx512_trunc_mr_lowering<VTSrcInfo.info128, DestInfoZ128,
9227                             truncFrag, mtruncFrag, NAME>, EVEX_V128;
9228
9229    defm Z256:  avx512_trunc_common<opc, OpcodeStr, OpNode256, MaskNode256, sched,
9230                             VTSrcInfo.info256, DestInfoZ256, x86memopZ256>,
9231                avx512_trunc_mr_lowering<VTSrcInfo.info256, DestInfoZ256,
9232                             truncFrag, mtruncFrag, NAME>, EVEX_V256;
9233  }
9234  let Predicates = [prd] in
9235    defm Z:     avx512_trunc_common<opc, OpcodeStr, OpNode512, MaskNode512, sched,
9236                             VTSrcInfo.info512, DestInfoZ, x86memopZ>,
9237                avx512_trunc_mr_lowering<VTSrcInfo.info512, DestInfoZ,
9238                             truncFrag, mtruncFrag, NAME>, EVEX_V512;
9239}
9240
9241multiclass avx512_trunc_qb<bits<8> opc, string OpcodeStr, SDNode OpNode,
9242                           SDPatternOperator MaskNode,
9243                           X86FoldableSchedWrite sched, PatFrag StoreNode,
9244                           PatFrag MaskedStoreNode, SDNode InVecNode,
9245                           SDPatternOperator InVecMaskNode> {
9246  defm NAME: avx512_trunc<opc, OpcodeStr, InVecNode, InVecNode, InVecNode,
9247                          InVecMaskNode, InVecMaskNode, InVecMaskNode, sched,
9248                          avx512vl_i64_info, v16i8x_info, v16i8x_info,
9249                          v16i8x_info, i16mem, i32mem, i64mem, StoreNode,
9250                          MaskedStoreNode>, EVEX_CD8<8, CD8VO>;
9251}
9252
9253multiclass avx512_trunc_qw<bits<8> opc, string OpcodeStr, SDNode OpNode,
9254                           SDPatternOperator MaskNode,
9255                           X86FoldableSchedWrite sched, PatFrag StoreNode,
9256                           PatFrag MaskedStoreNode, SDNode InVecNode,
9257                           SDPatternOperator InVecMaskNode> {
9258  defm NAME: avx512_trunc<opc, OpcodeStr, InVecNode, InVecNode, OpNode,
9259                          InVecMaskNode, InVecMaskNode, MaskNode, sched,
9260                          avx512vl_i64_info, v8i16x_info, v8i16x_info,
9261                          v8i16x_info, i32mem, i64mem, i128mem, StoreNode,
9262                          MaskedStoreNode>, EVEX_CD8<16, CD8VQ>;
9263}
9264
9265multiclass avx512_trunc_qd<bits<8> opc, string OpcodeStr, SDNode OpNode,
9266                           SDPatternOperator MaskNode,
9267                           X86FoldableSchedWrite sched, PatFrag StoreNode,
9268                           PatFrag MaskedStoreNode, SDNode InVecNode,
9269                           SDPatternOperator InVecMaskNode> {
9270  defm NAME: avx512_trunc<opc, OpcodeStr, InVecNode, OpNode, OpNode,
9271                          InVecMaskNode, MaskNode, MaskNode, sched,
9272                          avx512vl_i64_info, v4i32x_info, v4i32x_info,
9273                          v8i32x_info, i64mem, i128mem, i256mem, StoreNode,
9274                          MaskedStoreNode>, EVEX_CD8<32, CD8VH>;
9275}
9276
9277multiclass avx512_trunc_db<bits<8> opc, string OpcodeStr, SDNode OpNode,
9278                           SDPatternOperator MaskNode,
9279                           X86FoldableSchedWrite sched, PatFrag StoreNode,
9280                           PatFrag MaskedStoreNode, SDNode InVecNode,
9281                           SDPatternOperator InVecMaskNode> {
9282  defm NAME: avx512_trunc<opc, OpcodeStr, InVecNode, InVecNode, OpNode,
9283                          InVecMaskNode, InVecMaskNode, MaskNode, sched,
9284                          avx512vl_i32_info, v16i8x_info, v16i8x_info,
9285                          v16i8x_info, i32mem, i64mem, i128mem, StoreNode,
9286                          MaskedStoreNode>, EVEX_CD8<8, CD8VQ>;
9287}
9288
9289multiclass avx512_trunc_dw<bits<8> opc, string OpcodeStr, SDNode OpNode,
9290                           SDPatternOperator MaskNode,
9291                           X86FoldableSchedWrite sched, PatFrag StoreNode,
9292                           PatFrag MaskedStoreNode, SDNode InVecNode,
9293                           SDPatternOperator InVecMaskNode> {
9294  defm NAME: avx512_trunc<opc, OpcodeStr, InVecNode, OpNode, OpNode,
9295                          InVecMaskNode, MaskNode, MaskNode, sched,
9296                          avx512vl_i32_info, v8i16x_info, v8i16x_info,
9297                          v16i16x_info, i64mem, i128mem, i256mem, StoreNode,
9298                          MaskedStoreNode>, EVEX_CD8<16, CD8VH>;
9299}
9300
9301multiclass avx512_trunc_wb<bits<8> opc, string OpcodeStr, SDNode OpNode,
9302                           SDPatternOperator MaskNode,
9303                           X86FoldableSchedWrite sched, PatFrag StoreNode,
9304                           PatFrag MaskedStoreNode, SDNode InVecNode,
9305                           SDPatternOperator InVecMaskNode> {
9306  defm NAME: avx512_trunc<opc, OpcodeStr, InVecNode, OpNode, OpNode,
9307                          InVecMaskNode, MaskNode, MaskNode, sched,
9308                          avx512vl_i16_info, v16i8x_info, v16i8x_info,
9309                          v32i8x_info, i64mem, i128mem, i256mem, StoreNode,
9310                          MaskedStoreNode, HasBWI>, EVEX_CD8<16, CD8VH>;
9311}
9312
9313defm VPMOVQB    : avx512_trunc_qb<0x32, "vpmovqb",   trunc, select_trunc,
9314                                  WriteShuffle256, truncstorevi8,
9315                                  masked_truncstorevi8, X86vtrunc, X86vmtrunc>;
9316defm VPMOVSQB   : avx512_trunc_qb<0x22, "vpmovsqb",  X86vtruncs, select_truncs,
9317                                  WriteShuffle256, truncstore_s_vi8,
9318                                  masked_truncstore_s_vi8, X86vtruncs,
9319                                  X86vmtruncs>;
9320defm VPMOVUSQB  : avx512_trunc_qb<0x12, "vpmovusqb", X86vtruncus,
9321                                  select_truncus, WriteShuffle256,
9322                                  truncstore_us_vi8, masked_truncstore_us_vi8,
9323                                  X86vtruncus, X86vmtruncus>;
9324
9325defm VPMOVQW    : avx512_trunc_qw<0x34, "vpmovqw", trunc, select_trunc,
9326                                  WriteShuffle256, truncstorevi16,
9327                                  masked_truncstorevi16, X86vtrunc, X86vmtrunc>;
9328defm VPMOVSQW   : avx512_trunc_qw<0x24, "vpmovsqw",  X86vtruncs, select_truncs,
9329                                  WriteShuffle256, truncstore_s_vi16,
9330                                  masked_truncstore_s_vi16, X86vtruncs,
9331                                  X86vmtruncs>;
9332defm VPMOVUSQW  : avx512_trunc_qw<0x14, "vpmovusqw", X86vtruncus,
9333                                  select_truncus, WriteShuffle256,
9334                                  truncstore_us_vi16, masked_truncstore_us_vi16,
9335                                  X86vtruncus, X86vmtruncus>;
9336
9337defm VPMOVQD    : avx512_trunc_qd<0x35, "vpmovqd", trunc, select_trunc,
9338                                  WriteShuffle256, truncstorevi32,
9339                                  masked_truncstorevi32, X86vtrunc, X86vmtrunc>;
9340defm VPMOVSQD   : avx512_trunc_qd<0x25, "vpmovsqd",  X86vtruncs, select_truncs,
9341                                  WriteShuffle256, truncstore_s_vi32,
9342                                  masked_truncstore_s_vi32, X86vtruncs,
9343                                  X86vmtruncs>;
9344defm VPMOVUSQD  : avx512_trunc_qd<0x15, "vpmovusqd", X86vtruncus,
9345                                  select_truncus, WriteShuffle256,
9346                                  truncstore_us_vi32, masked_truncstore_us_vi32,
9347                                  X86vtruncus, X86vmtruncus>;
9348
9349defm VPMOVDB    : avx512_trunc_db<0x31, "vpmovdb", trunc, select_trunc,
9350                                  WriteShuffle256, truncstorevi8,
9351                                  masked_truncstorevi8, X86vtrunc, X86vmtrunc>;
9352defm VPMOVSDB   : avx512_trunc_db<0x21, "vpmovsdb", X86vtruncs, select_truncs,
9353                                  WriteShuffle256, truncstore_s_vi8,
9354                                  masked_truncstore_s_vi8, X86vtruncs,
9355                                  X86vmtruncs>;
9356defm VPMOVUSDB  : avx512_trunc_db<0x11, "vpmovusdb",  X86vtruncus,
9357                                  select_truncus, WriteShuffle256,
9358                                  truncstore_us_vi8, masked_truncstore_us_vi8,
9359                                  X86vtruncus, X86vmtruncus>;
9360
9361defm VPMOVDW    : avx512_trunc_dw<0x33, "vpmovdw", trunc, select_trunc,
9362                                  WriteShuffle256, truncstorevi16,
9363                                  masked_truncstorevi16, X86vtrunc, X86vmtrunc>;
9364defm VPMOVSDW   : avx512_trunc_dw<0x23, "vpmovsdw", X86vtruncs, select_truncs,
9365                                  WriteShuffle256, truncstore_s_vi16,
9366                                  masked_truncstore_s_vi16, X86vtruncs,
9367                                  X86vmtruncs>;
9368defm VPMOVUSDW  : avx512_trunc_dw<0x13, "vpmovusdw", X86vtruncus,
9369                                  select_truncus, WriteShuffle256,
9370                                  truncstore_us_vi16, masked_truncstore_us_vi16,
9371                                  X86vtruncus, X86vmtruncus>;
9372
9373defm VPMOVWB    : avx512_trunc_wb<0x30, "vpmovwb", trunc, select_trunc,
9374                                  WriteShuffle256, truncstorevi8,
9375                                  masked_truncstorevi8, X86vtrunc,
9376                                  X86vmtrunc>;
9377defm VPMOVSWB   : avx512_trunc_wb<0x20, "vpmovswb", X86vtruncs, select_truncs,
9378                                  WriteShuffle256, truncstore_s_vi8,
9379                                  masked_truncstore_s_vi8, X86vtruncs,
9380                                  X86vmtruncs>;
9381defm VPMOVUSWB  : avx512_trunc_wb<0x10, "vpmovuswb", X86vtruncus,
9382                                  select_truncus, WriteShuffle256,
9383                                  truncstore_us_vi8, masked_truncstore_us_vi8,
9384                                  X86vtruncus, X86vmtruncus>;
9385
9386let Predicates = [HasAVX512, NoVLX] in {
9387def: Pat<(v8i16 (trunc (v8i32 VR256X:$src))),
9388         (v8i16 (EXTRACT_SUBREG
9389                 (v16i16 (VPMOVDWZrr (v16i32 (INSERT_SUBREG (IMPLICIT_DEF),
9390                                          VR256X:$src, sub_ymm)))), sub_xmm))>;
9391def: Pat<(v4i32 (trunc (v4i64 VR256X:$src))),
9392         (v4i32 (EXTRACT_SUBREG
9393                 (v8i32 (VPMOVQDZrr (v8i64 (INSERT_SUBREG (IMPLICIT_DEF),
9394                                           VR256X:$src, sub_ymm)))), sub_xmm))>;
9395}
9396
9397let Predicates = [HasBWI, NoVLX] in {
9398def: Pat<(v16i8 (trunc (v16i16 VR256X:$src))),
9399         (v16i8 (EXTRACT_SUBREG (VPMOVWBZrr (v32i16 (INSERT_SUBREG (IMPLICIT_DEF),
9400                                            VR256X:$src, sub_ymm))), sub_xmm))>;
9401}
9402
9403// Without BWI we can't use vXi16/vXi8 vselect so we have to use vmtrunc nodes.
9404multiclass mtrunc_lowering<string InstrName, SDNode OpNode,
9405                           X86VectorVTInfo DestInfo,
9406                           X86VectorVTInfo SrcInfo> {
9407  def : Pat<(DestInfo.VT (OpNode (SrcInfo.VT SrcInfo.RC:$src),
9408                                 DestInfo.RC:$src0,
9409                                 SrcInfo.KRCWM:$mask)),
9410            (!cast<Instruction>(InstrName#"rrk") DestInfo.RC:$src0,
9411                                                 SrcInfo.KRCWM:$mask,
9412                                                 SrcInfo.RC:$src)>;
9413
9414  def : Pat<(DestInfo.VT (OpNode (SrcInfo.VT SrcInfo.RC:$src),
9415                                 DestInfo.ImmAllZerosV,
9416                                 SrcInfo.KRCWM:$mask)),
9417            (!cast<Instruction>(InstrName#"rrkz") SrcInfo.KRCWM:$mask,
9418                                                  SrcInfo.RC:$src)>;
9419}
9420
9421let Predicates = [HasVLX] in {
9422defm : mtrunc_lowering<"VPMOVDWZ256", X86vmtrunc, v8i16x_info, v8i32x_info>;
9423defm : mtrunc_lowering<"VPMOVSDWZ256", X86vmtruncs, v8i16x_info, v8i32x_info>;
9424defm : mtrunc_lowering<"VPMOVUSDWZ256", X86vmtruncus, v8i16x_info, v8i32x_info>;
9425}
9426
9427let Predicates = [HasAVX512] in {
9428defm : mtrunc_lowering<"VPMOVDWZ", X86vmtrunc, v16i16x_info, v16i32_info>;
9429defm : mtrunc_lowering<"VPMOVSDWZ", X86vmtruncs, v16i16x_info, v16i32_info>;
9430defm : mtrunc_lowering<"VPMOVUSDWZ", X86vmtruncus, v16i16x_info, v16i32_info>;
9431
9432defm : mtrunc_lowering<"VPMOVDBZ", X86vmtrunc, v16i8x_info, v16i32_info>;
9433defm : mtrunc_lowering<"VPMOVSDBZ", X86vmtruncs, v16i8x_info, v16i32_info>;
9434defm : mtrunc_lowering<"VPMOVUSDBZ", X86vmtruncus, v16i8x_info, v16i32_info>;
9435
9436defm : mtrunc_lowering<"VPMOVQWZ", X86vmtrunc, v8i16x_info, v8i64_info>;
9437defm : mtrunc_lowering<"VPMOVSQWZ", X86vmtruncs, v8i16x_info, v8i64_info>;
9438defm : mtrunc_lowering<"VPMOVUSQWZ", X86vmtruncus, v8i16x_info, v8i64_info>;
9439}
9440
9441multiclass WriteShuffle256_common<bits<8> opc, string OpcodeStr, X86FoldableSchedWrite sched,
9442              X86VectorVTInfo DestInfo, X86VectorVTInfo SrcInfo,
9443              X86MemOperand x86memop, PatFrag LdFrag, SDNode OpNode>{
9444  let ExeDomain = DestInfo.ExeDomain in {
9445  defm rr   : AVX512_maskable<opc, MRMSrcReg, DestInfo, (outs DestInfo.RC:$dst),
9446                    (ins SrcInfo.RC:$src), OpcodeStr ,"$src", "$src",
9447                    (DestInfo.VT (OpNode (SrcInfo.VT SrcInfo.RC:$src)))>,
9448                  EVEX, Sched<[sched]>;
9449
9450  defm rm : AVX512_maskable<opc, MRMSrcMem, DestInfo, (outs DestInfo.RC:$dst),
9451                  (ins x86memop:$src), OpcodeStr ,"$src", "$src",
9452                  (DestInfo.VT (LdFrag addr:$src))>,
9453                EVEX, Sched<[sched.Folded]>;
9454  }
9455}
9456
9457multiclass WriteShuffle256_BW<bits<8> opc, string OpcodeStr,
9458          SDNode OpNode, SDNode InVecNode, string ExtTy,
9459          X86FoldableSchedWrite sched, PatFrag LdFrag = !cast<PatFrag>(ExtTy#"extloadvi8")> {
9460  let Predicates = [HasVLX, HasBWI] in {
9461    defm Z128:  WriteShuffle256_common<opc, OpcodeStr, sched, v8i16x_info,
9462                    v16i8x_info, i64mem, LdFrag, InVecNode>,
9463                     EVEX_CD8<8, CD8VH>, T8PD, EVEX_V128, VEX_WIG;
9464
9465    defm Z256:  WriteShuffle256_common<opc, OpcodeStr, sched, v16i16x_info,
9466                    v16i8x_info, i128mem, LdFrag, OpNode>,
9467                     EVEX_CD8<8, CD8VH>, T8PD, EVEX_V256, VEX_WIG;
9468  }
9469  let Predicates = [HasBWI] in {
9470    defm Z   :  WriteShuffle256_common<opc, OpcodeStr, sched, v32i16_info,
9471                    v32i8x_info, i256mem, LdFrag, OpNode>,
9472                     EVEX_CD8<8, CD8VH>, T8PD, EVEX_V512, VEX_WIG;
9473  }
9474}
9475
9476multiclass WriteShuffle256_BD<bits<8> opc, string OpcodeStr,
9477          SDNode OpNode, SDNode InVecNode, string ExtTy,
9478          X86FoldableSchedWrite sched, PatFrag LdFrag = !cast<PatFrag>(ExtTy#"extloadvi8")> {
9479  let Predicates = [HasVLX, HasAVX512] in {
9480    defm Z128:  WriteShuffle256_common<opc, OpcodeStr, sched, v4i32x_info,
9481                   v16i8x_info, i32mem, LdFrag, InVecNode>,
9482                         EVEX_CD8<8, CD8VQ>, T8PD, EVEX_V128, VEX_WIG;
9483
9484    defm Z256:  WriteShuffle256_common<opc, OpcodeStr, sched, v8i32x_info,
9485                   v16i8x_info, i64mem, LdFrag, InVecNode>,
9486                         EVEX_CD8<8, CD8VQ>, T8PD, EVEX_V256, VEX_WIG;
9487  }
9488  let Predicates = [HasAVX512] in {
9489    defm Z   :  WriteShuffle256_common<opc, OpcodeStr, sched, v16i32_info,
9490                   v16i8x_info, i128mem, LdFrag, OpNode>,
9491                         EVEX_CD8<8, CD8VQ>, T8PD, EVEX_V512, VEX_WIG;
9492  }
9493}
9494
9495multiclass WriteShuffle256_BQ<bits<8> opc, string OpcodeStr,
9496          SDNode OpNode, SDNode InVecNode, string ExtTy,
9497          X86FoldableSchedWrite sched, PatFrag LdFrag = !cast<PatFrag>(ExtTy#"extloadvi8")> {
9498  let Predicates = [HasVLX, HasAVX512] in {
9499    defm Z128:  WriteShuffle256_common<opc, OpcodeStr, sched, v2i64x_info,
9500                   v16i8x_info, i16mem, LdFrag, InVecNode>,
9501                     EVEX_CD8<8, CD8VO>, T8PD, EVEX_V128, VEX_WIG;
9502
9503    defm Z256:  WriteShuffle256_common<opc, OpcodeStr, sched, v4i64x_info,
9504                   v16i8x_info, i32mem, LdFrag, InVecNode>,
9505                     EVEX_CD8<8, CD8VO>, T8PD, EVEX_V256, VEX_WIG;
9506  }
9507  let Predicates = [HasAVX512] in {
9508    defm Z   :  WriteShuffle256_common<opc, OpcodeStr, sched, v8i64_info,
9509                   v16i8x_info, i64mem, LdFrag, InVecNode>,
9510                     EVEX_CD8<8, CD8VO>, T8PD, EVEX_V512, VEX_WIG;
9511  }
9512}
9513
9514multiclass WriteShuffle256_WD<bits<8> opc, string OpcodeStr,
9515         SDNode OpNode, SDNode InVecNode, string ExtTy,
9516         X86FoldableSchedWrite sched, PatFrag LdFrag = !cast<PatFrag>(ExtTy#"extloadvi16")> {
9517  let Predicates = [HasVLX, HasAVX512] in {
9518    defm Z128:  WriteShuffle256_common<opc, OpcodeStr, sched, v4i32x_info,
9519                   v8i16x_info, i64mem, LdFrag, InVecNode>,
9520                     EVEX_CD8<16, CD8VH>, T8PD, EVEX_V128, VEX_WIG;
9521
9522    defm Z256:  WriteShuffle256_common<opc, OpcodeStr, sched, v8i32x_info,
9523                   v8i16x_info, i128mem, LdFrag, OpNode>,
9524                     EVEX_CD8<16, CD8VH>, T8PD, EVEX_V256, VEX_WIG;
9525  }
9526  let Predicates = [HasAVX512] in {
9527    defm Z   :  WriteShuffle256_common<opc, OpcodeStr, sched, v16i32_info,
9528                   v16i16x_info, i256mem, LdFrag, OpNode>,
9529                     EVEX_CD8<16, CD8VH>, T8PD, EVEX_V512, VEX_WIG;
9530  }
9531}
9532
9533multiclass WriteShuffle256_WQ<bits<8> opc, string OpcodeStr,
9534         SDNode OpNode, SDNode InVecNode, string ExtTy,
9535         X86FoldableSchedWrite sched, PatFrag LdFrag = !cast<PatFrag>(ExtTy#"extloadvi16")> {
9536  let Predicates = [HasVLX, HasAVX512] in {
9537    defm Z128:  WriteShuffle256_common<opc, OpcodeStr, sched, v2i64x_info,
9538                   v8i16x_info, i32mem, LdFrag, InVecNode>,
9539                     EVEX_CD8<16, CD8VQ>, T8PD, EVEX_V128, VEX_WIG;
9540
9541    defm Z256:  WriteShuffle256_common<opc, OpcodeStr, sched, v4i64x_info,
9542                   v8i16x_info, i64mem, LdFrag, InVecNode>,
9543                     EVEX_CD8<16, CD8VQ>, T8PD, EVEX_V256, VEX_WIG;
9544  }
9545  let Predicates = [HasAVX512] in {
9546    defm Z   :  WriteShuffle256_common<opc, OpcodeStr, sched, v8i64_info,
9547                   v8i16x_info, i128mem, LdFrag, OpNode>,
9548                     EVEX_CD8<16, CD8VQ>, T8PD, EVEX_V512, VEX_WIG;
9549  }
9550}
9551
9552multiclass WriteShuffle256_DQ<bits<8> opc, string OpcodeStr,
9553         SDNode OpNode, SDNode InVecNode, string ExtTy,
9554         X86FoldableSchedWrite sched, PatFrag LdFrag = !cast<PatFrag>(ExtTy#"extloadvi32")> {
9555
9556  let Predicates = [HasVLX, HasAVX512] in {
9557    defm Z128:  WriteShuffle256_common<opc, OpcodeStr, sched, v2i64x_info,
9558                   v4i32x_info, i64mem, LdFrag, InVecNode>,
9559                     EVEX_CD8<32, CD8VH>, T8PD, EVEX_V128;
9560
9561    defm Z256:  WriteShuffle256_common<opc, OpcodeStr, sched, v4i64x_info,
9562                   v4i32x_info, i128mem, LdFrag, OpNode>,
9563                     EVEX_CD8<32, CD8VH>, T8PD, EVEX_V256;
9564  }
9565  let Predicates = [HasAVX512] in {
9566    defm Z   :  WriteShuffle256_common<opc, OpcodeStr, sched, v8i64_info,
9567                   v8i32x_info, i256mem, LdFrag, OpNode>,
9568                     EVEX_CD8<32, CD8VH>, T8PD, EVEX_V512;
9569  }
9570}
9571
9572defm VPMOVZXBW : WriteShuffle256_BW<0x30, "vpmovzxbw", zext, zext_invec, "z", WriteShuffle256>;
9573defm VPMOVZXBD : WriteShuffle256_BD<0x31, "vpmovzxbd", zext, zext_invec, "z", WriteShuffle256>;
9574defm VPMOVZXBQ : WriteShuffle256_BQ<0x32, "vpmovzxbq", zext, zext_invec, "z", WriteShuffle256>;
9575defm VPMOVZXWD : WriteShuffle256_WD<0x33, "vpmovzxwd", zext, zext_invec, "z", WriteShuffle256>;
9576defm VPMOVZXWQ : WriteShuffle256_WQ<0x34, "vpmovzxwq", zext, zext_invec, "z", WriteShuffle256>;
9577defm VPMOVZXDQ : WriteShuffle256_DQ<0x35, "vpmovzxdq", zext, zext_invec, "z", WriteShuffle256>;
9578
9579defm VPMOVSXBW: WriteShuffle256_BW<0x20, "vpmovsxbw", sext, sext_invec, "s", WriteShuffle256>;
9580defm VPMOVSXBD: WriteShuffle256_BD<0x21, "vpmovsxbd", sext, sext_invec, "s", WriteShuffle256>;
9581defm VPMOVSXBQ: WriteShuffle256_BQ<0x22, "vpmovsxbq", sext, sext_invec, "s", WriteShuffle256>;
9582defm VPMOVSXWD: WriteShuffle256_WD<0x23, "vpmovsxwd", sext, sext_invec, "s", WriteShuffle256>;
9583defm VPMOVSXWQ: WriteShuffle256_WQ<0x24, "vpmovsxwq", sext, sext_invec, "s", WriteShuffle256>;
9584defm VPMOVSXDQ: WriteShuffle256_DQ<0x25, "vpmovsxdq", sext, sext_invec, "s", WriteShuffle256>;
9585
9586
9587// Patterns that we also need any extend versions of. aext_vector_inreg
9588// is currently legalized to zext_vector_inreg.
9589multiclass AVX512_pmovx_patterns_base<string OpcPrefix, SDNode ExtOp> {
9590  // 256-bit patterns
9591  let Predicates = [HasVLX, HasBWI] in {
9592    def : Pat<(v16i16 (ExtOp (loadv16i8 addr:$src))),
9593              (!cast<I>(OpcPrefix#BWZ256rm) addr:$src)>;
9594  }
9595
9596  let Predicates = [HasVLX] in {
9597    def : Pat<(v8i32 (ExtOp (loadv8i16 addr:$src))),
9598              (!cast<I>(OpcPrefix#WDZ256rm) addr:$src)>;
9599
9600    def : Pat<(v4i64 (ExtOp (loadv4i32 addr:$src))),
9601              (!cast<I>(OpcPrefix#DQZ256rm) addr:$src)>;
9602  }
9603
9604  // 512-bit patterns
9605  let Predicates = [HasBWI] in {
9606    def : Pat<(v32i16 (ExtOp (loadv32i8 addr:$src))),
9607              (!cast<I>(OpcPrefix#BWZrm) addr:$src)>;
9608  }
9609  let Predicates = [HasAVX512] in {
9610    def : Pat<(v16i32 (ExtOp (loadv16i8 addr:$src))),
9611              (!cast<I>(OpcPrefix#BDZrm) addr:$src)>;
9612    def : Pat<(v16i32 (ExtOp (loadv16i16 addr:$src))),
9613              (!cast<I>(OpcPrefix#WDZrm) addr:$src)>;
9614
9615    def : Pat<(v8i64 (ExtOp (loadv8i16 addr:$src))),
9616              (!cast<I>(OpcPrefix#WQZrm) addr:$src)>;
9617
9618    def : Pat<(v8i64 (ExtOp (loadv8i32 addr:$src))),
9619              (!cast<I>(OpcPrefix#DQZrm) addr:$src)>;
9620  }
9621}
9622
9623multiclass AVX512_pmovx_patterns<string OpcPrefix, SDNode ExtOp,
9624                                 SDNode InVecOp> :
9625    AVX512_pmovx_patterns_base<OpcPrefix, ExtOp> {
9626  // 128-bit patterns
9627  let Predicates = [HasVLX, HasBWI] in {
9628  def : Pat<(v8i16 (InVecOp (bc_v16i8 (v2i64 (scalar_to_vector (loadi64 addr:$src)))))),
9629            (!cast<I>(OpcPrefix#BWZ128rm) addr:$src)>;
9630  def : Pat<(v8i16 (InVecOp (bc_v16i8 (v2f64 (scalar_to_vector (loadf64 addr:$src)))))),
9631            (!cast<I>(OpcPrefix#BWZ128rm) addr:$src)>;
9632  def : Pat<(v8i16 (InVecOp (bc_v16i8 (v2i64 (X86vzload64 addr:$src))))),
9633            (!cast<I>(OpcPrefix#BWZ128rm) addr:$src)>;
9634  }
9635  let Predicates = [HasVLX] in {
9636  def : Pat<(v4i32 (InVecOp (bc_v16i8 (v4i32 (scalar_to_vector (loadi32 addr:$src)))))),
9637            (!cast<I>(OpcPrefix#BDZ128rm) addr:$src)>;
9638  def : Pat<(v4i32 (InVecOp (bc_v16i8 (v4i32 (X86vzload32 addr:$src))))),
9639            (!cast<I>(OpcPrefix#BDZ128rm) addr:$src)>;
9640
9641  def : Pat<(v2i64 (InVecOp (bc_v16i8 (v4i32 (scalar_to_vector (extloadi32i16 addr:$src)))))),
9642            (!cast<I>(OpcPrefix#BQZ128rm) addr:$src)>;
9643
9644  def : Pat<(v4i32 (InVecOp (bc_v8i16 (v2i64 (scalar_to_vector (loadi64 addr:$src)))))),
9645            (!cast<I>(OpcPrefix#WDZ128rm) addr:$src)>;
9646  def : Pat<(v4i32 (InVecOp (bc_v8i16 (v2f64 (scalar_to_vector (loadf64 addr:$src)))))),
9647            (!cast<I>(OpcPrefix#WDZ128rm) addr:$src)>;
9648  def : Pat<(v4i32 (InVecOp (bc_v8i16 (v2i64 (X86vzload64 addr:$src))))),
9649            (!cast<I>(OpcPrefix#WDZ128rm) addr:$src)>;
9650
9651  def : Pat<(v2i64 (InVecOp (bc_v8i16 (v4i32 (scalar_to_vector (loadi32 addr:$src)))))),
9652            (!cast<I>(OpcPrefix#WQZ128rm) addr:$src)>;
9653  def : Pat<(v2i64 (InVecOp (bc_v8i16 (v4i32 (X86vzload32 addr:$src))))),
9654            (!cast<I>(OpcPrefix#WQZ128rm) addr:$src)>;
9655
9656  def : Pat<(v2i64 (InVecOp (bc_v4i32 (v2i64 (scalar_to_vector (loadi64 addr:$src)))))),
9657            (!cast<I>(OpcPrefix#DQZ128rm) addr:$src)>;
9658  def : Pat<(v2i64 (InVecOp (bc_v4i32 (v2f64 (scalar_to_vector (loadf64 addr:$src)))))),
9659            (!cast<I>(OpcPrefix#DQZ128rm) addr:$src)>;
9660  def : Pat<(v2i64 (InVecOp (bc_v4i32 (v2i64 (X86vzload64 addr:$src))))),
9661            (!cast<I>(OpcPrefix#DQZ128rm) addr:$src)>;
9662  }
9663  let Predicates = [HasVLX] in {
9664  def : Pat<(v8i32 (InVecOp (bc_v16i8 (v2i64 (scalar_to_vector (loadi64 addr:$src)))))),
9665            (!cast<I>(OpcPrefix#BDZ256rm) addr:$src)>;
9666  def : Pat<(v8i32 (InVecOp (bc_v16i8 (v2i64 (scalar_to_vector (loadf64 addr:$src)))))),
9667            (!cast<I>(OpcPrefix#BDZ256rm) addr:$src)>;
9668  def : Pat<(v8i32 (InVecOp (bc_v16i8 (v2i64 (X86vzload64 addr:$src))))),
9669            (!cast<I>(OpcPrefix#BDZ256rm) addr:$src)>;
9670
9671  def : Pat<(v4i64 (InVecOp (bc_v16i8 (v4i32 (scalar_to_vector (loadi32 addr:$src)))))),
9672            (!cast<I>(OpcPrefix#BQZ256rm) addr:$src)>;
9673  def : Pat<(v4i64 (InVecOp (bc_v16i8 (v4i32 (X86vzload32 addr:$src))))),
9674            (!cast<I>(OpcPrefix#BQZ256rm) addr:$src)>;
9675
9676  def : Pat<(v4i64 (InVecOp (bc_v8i16 (v2i64 (scalar_to_vector (loadi64 addr:$src)))))),
9677            (!cast<I>(OpcPrefix#WQZ256rm) addr:$src)>;
9678  def : Pat<(v4i64 (InVecOp (bc_v8i16 (v2i64 (scalar_to_vector (loadf64 addr:$src)))))),
9679            (!cast<I>(OpcPrefix#WQZ256rm) addr:$src)>;
9680  def : Pat<(v4i64 (InVecOp (bc_v8i16 (v2i64 (X86vzload64 addr:$src))))),
9681            (!cast<I>(OpcPrefix#WQZ256rm) addr:$src)>;
9682  }
9683  // 512-bit patterns
9684  let Predicates = [HasAVX512] in {
9685  def : Pat<(v8i64 (InVecOp (bc_v16i8 (v2i64 (scalar_to_vector (loadi64 addr:$src)))))),
9686            (!cast<I>(OpcPrefix#BQZrm) addr:$src)>;
9687  def : Pat<(v8i64 (InVecOp (bc_v16i8 (v2f64 (scalar_to_vector (loadf64 addr:$src)))))),
9688            (!cast<I>(OpcPrefix#BQZrm) addr:$src)>;
9689  def : Pat<(v8i64 (InVecOp (bc_v16i8 (v2i64 (X86vzload64 addr:$src))))),
9690            (!cast<I>(OpcPrefix#BQZrm) addr:$src)>;
9691  }
9692}
9693
9694defm : AVX512_pmovx_patterns<"VPMOVSX", sext, sext_invec>;
9695defm : AVX512_pmovx_patterns<"VPMOVZX", zext, zext_invec>;
9696
9697// Without BWI we can't do a trunc from v16i16 to v16i8. DAG combine can merge
9698// ext+trunc aggressively making it impossible to legalize the DAG to this
9699// pattern directly.
9700let Predicates = [HasAVX512, NoBWI] in {
9701def: Pat<(v16i8 (trunc (v16i16 VR256X:$src))),
9702         (VPMOVDBZrr (v16i32 (VPMOVZXWDZrr VR256X:$src)))>;
9703def: Pat<(v16i8 (trunc (loadv16i16 addr:$src))),
9704         (VPMOVDBZrr (v16i32 (VPMOVZXWDZrm addr:$src)))>;
9705}
9706
9707//===----------------------------------------------------------------------===//
9708// GATHER - SCATTER Operations
9709
9710// FIXME: Improve scheduling of gather/scatter instructions.
9711multiclass avx512_gather<bits<8> opc, string OpcodeStr, X86VectorVTInfo _,
9712                         X86MemOperand memop, RegisterClass MaskRC = _.KRCWM> {
9713  let Constraints = "@earlyclobber $dst, $src1 = $dst, $mask = $mask_wb",
9714      ExeDomain = _.ExeDomain, mayLoad = 1, hasSideEffects = 0 in
9715  def rm  : AVX5128I<opc, MRMSrcMem, (outs _.RC:$dst, MaskRC:$mask_wb),
9716            (ins _.RC:$src1, MaskRC:$mask, memop:$src2),
9717            !strconcat(OpcodeStr#_.Suffix,
9718            "\t{$src2, ${dst} {${mask}}|${dst} {${mask}}, $src2}"),
9719            []>, EVEX, EVEX_K, EVEX_CD8<_.EltSize, CD8VT1>, Sched<[WriteLoad]>;
9720}
9721
9722multiclass avx512_gather_q_pd<bits<8> dopc, bits<8> qopc,
9723                        AVX512VLVectorVTInfo _, string OpcodeStr, string SUFF> {
9724  defm NAME#D#SUFF#Z: avx512_gather<dopc, OpcodeStr#"d", _.info512,
9725                                      vy512xmem>, EVEX_V512, VEX_W;
9726  defm NAME#Q#SUFF#Z: avx512_gather<qopc, OpcodeStr#"q", _.info512,
9727                                      vz512mem>, EVEX_V512, VEX_W;
9728let Predicates = [HasVLX] in {
9729  defm NAME#D#SUFF#Z256: avx512_gather<dopc, OpcodeStr#"d", _.info256,
9730                              vx256xmem>, EVEX_V256, VEX_W;
9731  defm NAME#Q#SUFF#Z256: avx512_gather<qopc, OpcodeStr#"q", _.info256,
9732                              vy256xmem>, EVEX_V256, VEX_W;
9733  defm NAME#D#SUFF#Z128: avx512_gather<dopc, OpcodeStr#"d", _.info128,
9734                              vx128xmem>, EVEX_V128, VEX_W;
9735  defm NAME#Q#SUFF#Z128: avx512_gather<qopc, OpcodeStr#"q", _.info128,
9736                              vx128xmem>, EVEX_V128, VEX_W;
9737}
9738}
9739
9740multiclass avx512_gather_d_ps<bits<8> dopc, bits<8> qopc,
9741                       AVX512VLVectorVTInfo _, string OpcodeStr, string SUFF> {
9742  defm NAME#D#SUFF#Z: avx512_gather<dopc, OpcodeStr#"d", _.info512, vz512mem>,
9743                                       EVEX_V512;
9744  defm NAME#Q#SUFF#Z: avx512_gather<qopc, OpcodeStr#"q", _.info256, vz256mem>,
9745                                       EVEX_V512;
9746let Predicates = [HasVLX] in {
9747  defm NAME#D#SUFF#Z256: avx512_gather<dopc, OpcodeStr#"d", _.info256,
9748                                          vy256xmem>, EVEX_V256;
9749  defm NAME#Q#SUFF#Z256: avx512_gather<qopc, OpcodeStr#"q", _.info128,
9750                                          vy128xmem>, EVEX_V256;
9751  defm NAME#D#SUFF#Z128: avx512_gather<dopc, OpcodeStr#"d", _.info128,
9752                                          vx128xmem>, EVEX_V128;
9753  defm NAME#Q#SUFF#Z128: avx512_gather<qopc, OpcodeStr#"q", _.info128,
9754                                          vx64xmem, VK2WM>, EVEX_V128;
9755}
9756}
9757
9758
9759defm VGATHER : avx512_gather_q_pd<0x92, 0x93, avx512vl_f64_info, "vgather", "PD">,
9760               avx512_gather_d_ps<0x92, 0x93, avx512vl_f32_info, "vgather", "PS">;
9761
9762defm VPGATHER : avx512_gather_q_pd<0x90, 0x91, avx512vl_i64_info, "vpgather", "Q">,
9763                avx512_gather_d_ps<0x90, 0x91, avx512vl_i32_info, "vpgather", "D">;
9764
9765multiclass avx512_scatter<bits<8> opc, string OpcodeStr, X86VectorVTInfo _,
9766                          X86MemOperand memop, RegisterClass MaskRC = _.KRCWM> {
9767
9768let mayStore = 1, Constraints = "$mask = $mask_wb", ExeDomain = _.ExeDomain,
9769    hasSideEffects = 0 in
9770
9771  def mr  : AVX5128I<opc, MRMDestMem, (outs MaskRC:$mask_wb),
9772            (ins memop:$dst, MaskRC:$mask, _.RC:$src),
9773            !strconcat(OpcodeStr#_.Suffix,
9774            "\t{$src, ${dst} {${mask}}|${dst} {${mask}}, $src}"),
9775            []>, EVEX, EVEX_K, EVEX_CD8<_.EltSize, CD8VT1>,
9776            Sched<[WriteStore]>;
9777}
9778
9779multiclass avx512_scatter_q_pd<bits<8> dopc, bits<8> qopc,
9780                        AVX512VLVectorVTInfo _, string OpcodeStr, string SUFF> {
9781  defm NAME#D#SUFF#Z: avx512_scatter<dopc, OpcodeStr#"d", _.info512,
9782                                      vy512xmem>, EVEX_V512, VEX_W;
9783  defm NAME#Q#SUFF#Z: avx512_scatter<qopc, OpcodeStr#"q", _.info512,
9784                                      vz512mem>, EVEX_V512, VEX_W;
9785let Predicates = [HasVLX] in {
9786  defm NAME#D#SUFF#Z256: avx512_scatter<dopc, OpcodeStr#"d", _.info256,
9787                              vx256xmem>, EVEX_V256, VEX_W;
9788  defm NAME#Q#SUFF#Z256: avx512_scatter<qopc, OpcodeStr#"q", _.info256,
9789                              vy256xmem>, EVEX_V256, VEX_W;
9790  defm NAME#D#SUFF#Z128: avx512_scatter<dopc, OpcodeStr#"d", _.info128,
9791                              vx128xmem>, EVEX_V128, VEX_W;
9792  defm NAME#Q#SUFF#Z128: avx512_scatter<qopc, OpcodeStr#"q", _.info128,
9793                              vx128xmem>, EVEX_V128, VEX_W;
9794}
9795}
9796
9797multiclass avx512_scatter_d_ps<bits<8> dopc, bits<8> qopc,
9798                       AVX512VLVectorVTInfo _, string OpcodeStr, string SUFF> {
9799  defm NAME#D#SUFF#Z: avx512_scatter<dopc, OpcodeStr#"d", _.info512, vz512mem>,
9800                                       EVEX_V512;
9801  defm NAME#Q#SUFF#Z: avx512_scatter<qopc, OpcodeStr#"q", _.info256, vz256mem>,
9802                                       EVEX_V512;
9803let Predicates = [HasVLX] in {
9804  defm NAME#D#SUFF#Z256: avx512_scatter<dopc, OpcodeStr#"d", _.info256,
9805                                          vy256xmem>, EVEX_V256;
9806  defm NAME#Q#SUFF#Z256: avx512_scatter<qopc, OpcodeStr#"q", _.info128,
9807                                          vy128xmem>, EVEX_V256;
9808  defm NAME#D#SUFF#Z128: avx512_scatter<dopc, OpcodeStr#"d", _.info128,
9809                                          vx128xmem>, EVEX_V128;
9810  defm NAME#Q#SUFF#Z128: avx512_scatter<qopc, OpcodeStr#"q", _.info128,
9811                                          vx64xmem, VK2WM>, EVEX_V128;
9812}
9813}
9814
9815defm VSCATTER : avx512_scatter_q_pd<0xA2, 0xA3, avx512vl_f64_info, "vscatter", "PD">,
9816               avx512_scatter_d_ps<0xA2, 0xA3, avx512vl_f32_info, "vscatter", "PS">;
9817
9818defm VPSCATTER : avx512_scatter_q_pd<0xA0, 0xA1, avx512vl_i64_info, "vpscatter", "Q">,
9819                avx512_scatter_d_ps<0xA0, 0xA1, avx512vl_i32_info, "vpscatter", "D">;
9820
9821// prefetch
9822multiclass avx512_gather_scatter_prefetch<bits<8> opc, Format F, string OpcodeStr,
9823                       RegisterClass KRC, X86MemOperand memop> {
9824  let Predicates = [HasPFI], mayLoad = 1, mayStore = 1 in
9825  def m  : AVX5128I<opc, F, (outs), (ins KRC:$mask, memop:$src),
9826            !strconcat(OpcodeStr, "\t{$src {${mask}}|{${mask}}, $src}"), []>,
9827            EVEX, EVEX_K, Sched<[WriteLoad]>;
9828}
9829
9830defm VGATHERPF0DPS: avx512_gather_scatter_prefetch<0xC6, MRM1m, "vgatherpf0dps",
9831                     VK16WM, vz512mem>, EVEX_V512, EVEX_CD8<32, CD8VT1>;
9832
9833defm VGATHERPF0QPS: avx512_gather_scatter_prefetch<0xC7, MRM1m, "vgatherpf0qps",
9834                     VK8WM, vz256mem>, EVEX_V512, EVEX_CD8<64, CD8VT1>;
9835
9836defm VGATHERPF0DPD: avx512_gather_scatter_prefetch<0xC6, MRM1m, "vgatherpf0dpd",
9837                     VK8WM, vy512xmem>, EVEX_V512, VEX_W, EVEX_CD8<32, CD8VT1>;
9838
9839defm VGATHERPF0QPD: avx512_gather_scatter_prefetch<0xC7, MRM1m, "vgatherpf0qpd",
9840                     VK8WM, vz512mem>, EVEX_V512, VEX_W, EVEX_CD8<64, CD8VT1>;
9841
9842defm VGATHERPF1DPS: avx512_gather_scatter_prefetch<0xC6, MRM2m, "vgatherpf1dps",
9843                     VK16WM, vz512mem>, EVEX_V512, EVEX_CD8<32, CD8VT1>;
9844
9845defm VGATHERPF1QPS: avx512_gather_scatter_prefetch<0xC7, MRM2m, "vgatherpf1qps",
9846                     VK8WM, vz256mem>, EVEX_V512, EVEX_CD8<64, CD8VT1>;
9847
9848defm VGATHERPF1DPD: avx512_gather_scatter_prefetch<0xC6, MRM2m, "vgatherpf1dpd",
9849                     VK8WM, vy512xmem>, EVEX_V512, VEX_W, EVEX_CD8<32, CD8VT1>;
9850
9851defm VGATHERPF1QPD: avx512_gather_scatter_prefetch<0xC7, MRM2m, "vgatherpf1qpd",
9852                     VK8WM, vz512mem>, EVEX_V512, VEX_W, EVEX_CD8<64, CD8VT1>;
9853
9854defm VSCATTERPF0DPS: avx512_gather_scatter_prefetch<0xC6, MRM5m, "vscatterpf0dps",
9855                     VK16WM, vz512mem>, EVEX_V512, EVEX_CD8<32, CD8VT1>;
9856
9857defm VSCATTERPF0QPS: avx512_gather_scatter_prefetch<0xC7, MRM5m, "vscatterpf0qps",
9858                     VK8WM, vz256mem>, EVEX_V512, EVEX_CD8<64, CD8VT1>;
9859
9860defm VSCATTERPF0DPD: avx512_gather_scatter_prefetch<0xC6, MRM5m, "vscatterpf0dpd",
9861                     VK8WM, vy512xmem>, EVEX_V512, VEX_W, EVEX_CD8<32, CD8VT1>;
9862
9863defm VSCATTERPF0QPD: avx512_gather_scatter_prefetch<0xC7, MRM5m, "vscatterpf0qpd",
9864                     VK8WM, vz512mem>, EVEX_V512, VEX_W, EVEX_CD8<64, CD8VT1>;
9865
9866defm VSCATTERPF1DPS: avx512_gather_scatter_prefetch<0xC6, MRM6m, "vscatterpf1dps",
9867                     VK16WM, vz512mem>, EVEX_V512, EVEX_CD8<32, CD8VT1>;
9868
9869defm VSCATTERPF1QPS: avx512_gather_scatter_prefetch<0xC7, MRM6m, "vscatterpf1qps",
9870                     VK8WM, vz256mem>, EVEX_V512, EVEX_CD8<64, CD8VT1>;
9871
9872defm VSCATTERPF1DPD: avx512_gather_scatter_prefetch<0xC6, MRM6m, "vscatterpf1dpd",
9873                     VK8WM, vy512xmem>, EVEX_V512, VEX_W, EVEX_CD8<32, CD8VT1>;
9874
9875defm VSCATTERPF1QPD: avx512_gather_scatter_prefetch<0xC7, MRM6m, "vscatterpf1qpd",
9876                     VK8WM, vz512mem>, EVEX_V512, VEX_W, EVEX_CD8<64, CD8VT1>;
9877
9878multiclass cvt_by_vec_width<bits<8> opc, X86VectorVTInfo Vec, string OpcodeStr > {
9879def rr : AVX512XS8I<opc, MRMSrcReg, (outs Vec.RC:$dst), (ins Vec.KRC:$src),
9880                  !strconcat(OpcodeStr#Vec.Suffix, "\t{$src, $dst|$dst, $src}"),
9881                  [(set Vec.RC:$dst, (Vec.VT (sext Vec.KRC:$src)))]>,
9882                  EVEX, Sched<[WriteMove]>; // TODO - WriteVecTrunc?
9883}
9884
9885multiclass cvt_mask_by_elt_width<bits<8> opc, AVX512VLVectorVTInfo VTInfo,
9886                                 string OpcodeStr, Predicate prd> {
9887let Predicates = [prd] in
9888  defm Z : cvt_by_vec_width<opc, VTInfo.info512, OpcodeStr>, EVEX_V512;
9889
9890  let Predicates = [prd, HasVLX] in {
9891    defm Z256 : cvt_by_vec_width<opc, VTInfo.info256, OpcodeStr>, EVEX_V256;
9892    defm Z128 : cvt_by_vec_width<opc, VTInfo.info128, OpcodeStr>, EVEX_V128;
9893  }
9894}
9895
9896defm VPMOVM2B : cvt_mask_by_elt_width<0x28, avx512vl_i8_info, "vpmovm2" , HasBWI>;
9897defm VPMOVM2W : cvt_mask_by_elt_width<0x28, avx512vl_i16_info, "vpmovm2", HasBWI> , VEX_W;
9898defm VPMOVM2D : cvt_mask_by_elt_width<0x38, avx512vl_i32_info, "vpmovm2", HasDQI>;
9899defm VPMOVM2Q : cvt_mask_by_elt_width<0x38, avx512vl_i64_info, "vpmovm2", HasDQI> , VEX_W;
9900
9901multiclass convert_vector_to_mask_common<bits<8> opc, X86VectorVTInfo _, string OpcodeStr > {
9902    def rr : AVX512XS8I<opc, MRMSrcReg, (outs _.KRC:$dst), (ins _.RC:$src),
9903                        !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
9904                        [(set _.KRC:$dst, (X86pcmpgtm _.ImmAllZerosV, (_.VT _.RC:$src)))]>,
9905                        EVEX, Sched<[WriteMove]>;
9906}
9907
9908// Use 512bit version to implement 128/256 bit in case NoVLX.
9909multiclass convert_vector_to_mask_lowering<X86VectorVTInfo ExtendInfo,
9910                                           X86VectorVTInfo _,
9911                                           string Name> {
9912
9913  def : Pat<(_.KVT (X86pcmpgtm _.ImmAllZerosV, (_.VT _.RC:$src))),
9914            (_.KVT (COPY_TO_REGCLASS
9915                     (!cast<Instruction>(Name#"Zrr")
9916                       (INSERT_SUBREG (ExtendInfo.VT (IMPLICIT_DEF)),
9917                                      _.RC:$src, _.SubRegIdx)),
9918                   _.KRC))>;
9919}
9920
9921multiclass avx512_convert_vector_to_mask<bits<8> opc, string OpcodeStr,
9922                                   AVX512VLVectorVTInfo VTInfo, Predicate prd> {
9923  let Predicates = [prd] in
9924    defm Z : convert_vector_to_mask_common <opc, VTInfo.info512, OpcodeStr>,
9925                                            EVEX_V512;
9926
9927  let Predicates = [prd, HasVLX] in {
9928    defm Z256 : convert_vector_to_mask_common<opc, VTInfo.info256, OpcodeStr>,
9929                                              EVEX_V256;
9930    defm Z128 : convert_vector_to_mask_common<opc, VTInfo.info128, OpcodeStr>,
9931                                               EVEX_V128;
9932  }
9933  let Predicates = [prd, NoVLX] in {
9934    defm Z256_Alt : convert_vector_to_mask_lowering<VTInfo.info512, VTInfo.info256, NAME>;
9935    defm Z128_Alt : convert_vector_to_mask_lowering<VTInfo.info512, VTInfo.info128, NAME>;
9936  }
9937}
9938
9939defm VPMOVB2M : avx512_convert_vector_to_mask<0x29, "vpmovb2m",
9940                                              avx512vl_i8_info, HasBWI>;
9941defm VPMOVW2M : avx512_convert_vector_to_mask<0x29, "vpmovw2m",
9942                                              avx512vl_i16_info, HasBWI>, VEX_W;
9943defm VPMOVD2M : avx512_convert_vector_to_mask<0x39, "vpmovd2m",
9944                                              avx512vl_i32_info, HasDQI>;
9945defm VPMOVQ2M : avx512_convert_vector_to_mask<0x39, "vpmovq2m",
9946                                              avx512vl_i64_info, HasDQI>, VEX_W;
9947
9948// Patterns for handling sext from a mask register to v16i8/v16i16 when DQI
9949// is available, but BWI is not. We can't handle this in lowering because
9950// a target independent DAG combine likes to combine sext and trunc.
9951let Predicates = [HasDQI, NoBWI] in {
9952  def : Pat<(v16i8 (sext (v16i1 VK16:$src))),
9953            (VPMOVDBZrr (v16i32 (VPMOVM2DZrr VK16:$src)))>;
9954  def : Pat<(v16i16 (sext (v16i1 VK16:$src))),
9955            (VPMOVDWZrr (v16i32 (VPMOVM2DZrr VK16:$src)))>;
9956}
9957
9958let Predicates = [HasDQI, NoBWI, HasVLX] in {
9959  def : Pat<(v8i16 (sext (v8i1 VK8:$src))),
9960            (VPMOVDWZ256rr (v8i32 (VPMOVM2DZ256rr VK8:$src)))>;
9961}
9962
9963//===----------------------------------------------------------------------===//
9964// AVX-512 - COMPRESS and EXPAND
9965//
9966
9967multiclass compress_by_vec_width_common<bits<8> opc, X86VectorVTInfo _,
9968                                 string OpcodeStr, X86FoldableSchedWrite sched> {
9969  defm rr : AVX512_maskable<opc, MRMDestReg, _, (outs _.RC:$dst),
9970              (ins _.RC:$src1), OpcodeStr, "$src1", "$src1",
9971              (null_frag)>, AVX5128IBase,
9972              Sched<[sched]>;
9973
9974  let mayStore = 1, hasSideEffects = 0 in
9975  def mr : AVX5128I<opc, MRMDestMem, (outs),
9976              (ins _.MemOp:$dst, _.RC:$src),
9977              OpcodeStr # "\t{$src, $dst|$dst, $src}",
9978              []>, EVEX_CD8<_.EltSize, CD8VT1>,
9979              Sched<[sched.Folded]>;
9980
9981  def mrk : AVX5128I<opc, MRMDestMem, (outs),
9982              (ins _.MemOp:$dst, _.KRCWM:$mask, _.RC:$src),
9983              OpcodeStr # "\t{$src, $dst {${mask}}|$dst {${mask}}, $src}",
9984              []>,
9985              EVEX_K, EVEX_CD8<_.EltSize, CD8VT1>,
9986              Sched<[sched.Folded]>;
9987}
9988
9989multiclass compress_by_vec_width_lowering<X86VectorVTInfo _, string Name> {
9990  def : Pat<(X86mCompressingStore (_.VT _.RC:$src), addr:$dst, _.KRCWM:$mask),
9991            (!cast<Instruction>(Name#_.ZSuffix#mrk)
9992                            addr:$dst, _.KRCWM:$mask, _.RC:$src)>;
9993
9994  def : Pat<(X86compress (_.VT _.RC:$src), _.RC:$src0, _.KRCWM:$mask),
9995            (!cast<Instruction>(Name#_.ZSuffix#rrk)
9996                            _.RC:$src0, _.KRCWM:$mask, _.RC:$src)>;
9997  def : Pat<(X86compress (_.VT _.RC:$src), _.ImmAllZerosV, _.KRCWM:$mask),
9998            (!cast<Instruction>(Name#_.ZSuffix#rrkz)
9999                            _.KRCWM:$mask, _.RC:$src)>;
10000}
10001
10002multiclass compress_by_elt_width<bits<8> opc, string OpcodeStr,
10003                                 X86FoldableSchedWrite sched,
10004                                 AVX512VLVectorVTInfo VTInfo,
10005                                 Predicate Pred = HasAVX512> {
10006  let Predicates = [Pred] in
10007  defm Z : compress_by_vec_width_common<opc, VTInfo.info512, OpcodeStr, sched>,
10008           compress_by_vec_width_lowering<VTInfo.info512, NAME>, EVEX_V512;
10009
10010  let Predicates = [Pred, HasVLX] in {
10011    defm Z256 : compress_by_vec_width_common<opc, VTInfo.info256, OpcodeStr, sched>,
10012                compress_by_vec_width_lowering<VTInfo.info256, NAME>, EVEX_V256;
10013    defm Z128 : compress_by_vec_width_common<opc, VTInfo.info128, OpcodeStr, sched>,
10014                compress_by_vec_width_lowering<VTInfo.info128, NAME>, EVEX_V128;
10015  }
10016}
10017
10018// FIXME: Is there a better scheduler class for VPCOMPRESS?
10019defm VPCOMPRESSD : compress_by_elt_width <0x8B, "vpcompressd", WriteVarShuffle256,
10020                                          avx512vl_i32_info>, EVEX, NotMemoryFoldable;
10021defm VPCOMPRESSQ : compress_by_elt_width <0x8B, "vpcompressq", WriteVarShuffle256,
10022                                          avx512vl_i64_info>, EVEX, VEX_W, NotMemoryFoldable;
10023defm VCOMPRESSPS : compress_by_elt_width <0x8A, "vcompressps", WriteVarShuffle256,
10024                                          avx512vl_f32_info>, EVEX, NotMemoryFoldable;
10025defm VCOMPRESSPD : compress_by_elt_width <0x8A, "vcompresspd", WriteVarShuffle256,
10026                                          avx512vl_f64_info>, EVEX, VEX_W, NotMemoryFoldable;
10027
10028// expand
10029multiclass expand_by_vec_width<bits<8> opc, X86VectorVTInfo _,
10030                                 string OpcodeStr, X86FoldableSchedWrite sched> {
10031  defm rr : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
10032              (ins _.RC:$src1), OpcodeStr, "$src1", "$src1",
10033              (null_frag)>, AVX5128IBase,
10034              Sched<[sched]>;
10035
10036  defm rm : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
10037              (ins _.MemOp:$src1), OpcodeStr, "$src1", "$src1",
10038              (null_frag)>,
10039            AVX5128IBase, EVEX_CD8<_.EltSize, CD8VT1>,
10040            Sched<[sched.Folded, sched.ReadAfterFold]>;
10041}
10042
10043multiclass expand_by_vec_width_lowering<X86VectorVTInfo _, string Name> {
10044
10045  def : Pat<(_.VT (X86mExpandingLoad addr:$src, _.KRCWM:$mask, undef)),
10046            (!cast<Instruction>(Name#_.ZSuffix#rmkz)
10047                                        _.KRCWM:$mask, addr:$src)>;
10048
10049  def : Pat<(_.VT (X86mExpandingLoad addr:$src, _.KRCWM:$mask, _.ImmAllZerosV)),
10050            (!cast<Instruction>(Name#_.ZSuffix#rmkz)
10051                                        _.KRCWM:$mask, addr:$src)>;
10052
10053  def : Pat<(_.VT (X86mExpandingLoad addr:$src, _.KRCWM:$mask,
10054                                               (_.VT _.RC:$src0))),
10055            (!cast<Instruction>(Name#_.ZSuffix#rmk)
10056                            _.RC:$src0, _.KRCWM:$mask, addr:$src)>;
10057
10058  def : Pat<(X86expand (_.VT _.RC:$src), _.RC:$src0, _.KRCWM:$mask),
10059            (!cast<Instruction>(Name#_.ZSuffix#rrk)
10060                            _.RC:$src0, _.KRCWM:$mask, _.RC:$src)>;
10061  def : Pat<(X86expand (_.VT _.RC:$src), _.ImmAllZerosV, _.KRCWM:$mask),
10062            (!cast<Instruction>(Name#_.ZSuffix#rrkz)
10063                            _.KRCWM:$mask, _.RC:$src)>;
10064}
10065
10066multiclass expand_by_elt_width<bits<8> opc, string OpcodeStr,
10067                               X86FoldableSchedWrite sched,
10068                               AVX512VLVectorVTInfo VTInfo,
10069                               Predicate Pred = HasAVX512> {
10070  let Predicates = [Pred] in
10071  defm Z : expand_by_vec_width<opc, VTInfo.info512, OpcodeStr, sched>,
10072           expand_by_vec_width_lowering<VTInfo.info512, NAME>, EVEX_V512;
10073
10074  let Predicates = [Pred, HasVLX] in {
10075    defm Z256 : expand_by_vec_width<opc, VTInfo.info256, OpcodeStr, sched>,
10076                expand_by_vec_width_lowering<VTInfo.info256, NAME>, EVEX_V256;
10077    defm Z128 : expand_by_vec_width<opc, VTInfo.info128, OpcodeStr, sched>,
10078                expand_by_vec_width_lowering<VTInfo.info128, NAME>, EVEX_V128;
10079  }
10080}
10081
10082// FIXME: Is there a better scheduler class for VPEXPAND?
10083defm VPEXPANDD : expand_by_elt_width <0x89, "vpexpandd", WriteVarShuffle256,
10084                                      avx512vl_i32_info>, EVEX;
10085defm VPEXPANDQ : expand_by_elt_width <0x89, "vpexpandq", WriteVarShuffle256,
10086                                      avx512vl_i64_info>, EVEX, VEX_W;
10087defm VEXPANDPS : expand_by_elt_width <0x88, "vexpandps", WriteVarShuffle256,
10088                                      avx512vl_f32_info>, EVEX;
10089defm VEXPANDPD : expand_by_elt_width <0x88, "vexpandpd", WriteVarShuffle256,
10090                                      avx512vl_f64_info>, EVEX, VEX_W;
10091
10092//handle instruction  reg_vec1 = op(reg_vec,imm)
10093//                               op(mem_vec,imm)
10094//                               op(broadcast(eltVt),imm)
10095//all instruction created with FROUND_CURRENT
10096multiclass avx512_unary_fp_packed_imm<bits<8> opc, string OpcodeStr,
10097                                      SDNode OpNode, SDNode MaskOpNode,
10098                                      X86FoldableSchedWrite sched,
10099                                      X86VectorVTInfo _> {
10100  let ExeDomain = _.ExeDomain, Uses = [MXCSR], mayRaiseFPException = 1 in {
10101  defm rri : AVX512_maskable_split<opc, MRMSrcReg, _, (outs _.RC:$dst),
10102                      (ins _.RC:$src1, i32u8imm:$src2),
10103                      OpcodeStr#_.Suffix, "$src2, $src1", "$src1, $src2",
10104                      (OpNode (_.VT _.RC:$src1), (i32 timm:$src2)),
10105                      (MaskOpNode (_.VT _.RC:$src1), (i32 timm:$src2))>,
10106                      Sched<[sched]>;
10107  defm rmi : AVX512_maskable_split<opc, MRMSrcMem, _, (outs _.RC:$dst),
10108                    (ins _.MemOp:$src1, i32u8imm:$src2),
10109                    OpcodeStr#_.Suffix, "$src2, $src1", "$src1, $src2",
10110                    (OpNode (_.VT (bitconvert (_.LdFrag addr:$src1))),
10111                            (i32 timm:$src2)),
10112                    (MaskOpNode (_.VT (bitconvert (_.LdFrag addr:$src1))),
10113                                (i32 timm:$src2))>,
10114                    Sched<[sched.Folded, sched.ReadAfterFold]>;
10115  defm rmbi : AVX512_maskable_split<opc, MRMSrcMem, _, (outs _.RC:$dst),
10116                    (ins _.ScalarMemOp:$src1, i32u8imm:$src2),
10117                    OpcodeStr#_.Suffix, "$src2, ${src1}"#_.BroadcastStr,
10118                    "${src1}"#_.BroadcastStr#", $src2",
10119                    (OpNode (_.VT (_.BroadcastLdFrag addr:$src1)),
10120                            (i32 timm:$src2)),
10121                    (MaskOpNode (_.VT (_.BroadcastLdFrag addr:$src1)),
10122                                (i32 timm:$src2))>, EVEX_B,
10123                    Sched<[sched.Folded, sched.ReadAfterFold]>;
10124  }
10125}
10126
10127//handle instruction  reg_vec1 = op(reg_vec2,reg_vec3,imm),{sae}
10128multiclass avx512_unary_fp_sae_packed_imm<bits<8> opc, string OpcodeStr,
10129                                          SDNode OpNode, X86FoldableSchedWrite sched,
10130                                          X86VectorVTInfo _> {
10131  let ExeDomain = _.ExeDomain, Uses = [MXCSR] in
10132  defm rrib : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
10133                      (ins _.RC:$src1, i32u8imm:$src2),
10134                      OpcodeStr#_.Suffix, "$src2, {sae}, $src1",
10135                      "$src1, {sae}, $src2",
10136                      (OpNode (_.VT _.RC:$src1),
10137                              (i32 timm:$src2))>,
10138                      EVEX_B, Sched<[sched]>;
10139}
10140
10141multiclass avx512_common_unary_fp_sae_packed_imm<string OpcodeStr,
10142            AVX512VLVectorVTInfo _, bits<8> opc, SDNode OpNode,
10143            SDNode MaskOpNode, SDNode OpNodeSAE, X86SchedWriteWidths sched,
10144            Predicate prd>{
10145  let Predicates = [prd] in {
10146    defm Z    : avx512_unary_fp_packed_imm<opc, OpcodeStr, OpNode, MaskOpNode,
10147                                           sched.ZMM, _.info512>,
10148                avx512_unary_fp_sae_packed_imm<opc, OpcodeStr, OpNodeSAE,
10149                                               sched.ZMM, _.info512>, EVEX_V512;
10150  }
10151  let Predicates = [prd, HasVLX] in {
10152    defm Z128 : avx512_unary_fp_packed_imm<opc, OpcodeStr, OpNode, MaskOpNode,
10153                                           sched.XMM, _.info128>, EVEX_V128;
10154    defm Z256 : avx512_unary_fp_packed_imm<opc, OpcodeStr, OpNode, MaskOpNode,
10155                                           sched.YMM, _.info256>, EVEX_V256;
10156  }
10157}
10158
10159//handle instruction  reg_vec1 = op(reg_vec2,reg_vec3,imm)
10160//                               op(reg_vec2,mem_vec,imm)
10161//                               op(reg_vec2,broadcast(eltVt),imm)
10162//all instruction created with FROUND_CURRENT
10163multiclass avx512_fp_packed_imm<bits<8> opc, string OpcodeStr, SDNode OpNode,
10164                                X86FoldableSchedWrite sched, X86VectorVTInfo _>{
10165  let ExeDomain = _.ExeDomain, Uses = [MXCSR], mayRaiseFPException = 1 in {
10166  defm rri : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
10167                      (ins _.RC:$src1, _.RC:$src2, i32u8imm:$src3),
10168                      OpcodeStr, "$src3, $src2, $src1", "$src1, $src2, $src3",
10169                      (OpNode (_.VT _.RC:$src1),
10170                              (_.VT _.RC:$src2),
10171                              (i32 timm:$src3))>,
10172                      Sched<[sched]>;
10173  defm rmi : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
10174                    (ins _.RC:$src1, _.MemOp:$src2, i32u8imm:$src3),
10175                    OpcodeStr, "$src3, $src2, $src1", "$src1, $src2, $src3",
10176                    (OpNode (_.VT _.RC:$src1),
10177                            (_.VT (bitconvert (_.LdFrag addr:$src2))),
10178                            (i32 timm:$src3))>,
10179                    Sched<[sched.Folded, sched.ReadAfterFold]>;
10180  defm rmbi : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
10181                    (ins _.RC:$src1, _.ScalarMemOp:$src2, i32u8imm:$src3),
10182                    OpcodeStr, "$src3, ${src2}"#_.BroadcastStr#", $src1",
10183                    "$src1, ${src2}"#_.BroadcastStr#", $src3",
10184                    (OpNode (_.VT _.RC:$src1),
10185                            (_.VT (_.BroadcastLdFrag addr:$src2)),
10186                            (i32 timm:$src3))>, EVEX_B,
10187                    Sched<[sched.Folded, sched.ReadAfterFold]>;
10188  }
10189}
10190
10191//handle instruction  reg_vec1 = op(reg_vec2,reg_vec3,imm)
10192//                               op(reg_vec2,mem_vec,imm)
10193multiclass avx512_3Op_rm_imm8<bits<8> opc, string OpcodeStr, SDNode OpNode,
10194                              X86FoldableSchedWrite sched, X86VectorVTInfo DestInfo,
10195                              X86VectorVTInfo SrcInfo>{
10196  let ExeDomain = DestInfo.ExeDomain in {
10197  defm rri : AVX512_maskable<opc, MRMSrcReg, DestInfo, (outs DestInfo.RC:$dst),
10198                  (ins SrcInfo.RC:$src1, SrcInfo.RC:$src2, u8imm:$src3),
10199                  OpcodeStr, "$src3, $src2, $src1", "$src1, $src2, $src3",
10200                  (DestInfo.VT (OpNode (SrcInfo.VT SrcInfo.RC:$src1),
10201                               (SrcInfo.VT SrcInfo.RC:$src2),
10202                               (i8 timm:$src3)))>,
10203                  Sched<[sched]>;
10204  defm rmi : AVX512_maskable<opc, MRMSrcMem, DestInfo, (outs DestInfo.RC:$dst),
10205                (ins SrcInfo.RC:$src1, SrcInfo.MemOp:$src2, u8imm:$src3),
10206                OpcodeStr, "$src3, $src2, $src1", "$src1, $src2, $src3",
10207                (DestInfo.VT (OpNode (SrcInfo.VT SrcInfo.RC:$src1),
10208                             (SrcInfo.VT (bitconvert
10209                                                (SrcInfo.LdFrag addr:$src2))),
10210                             (i8 timm:$src3)))>,
10211                Sched<[sched.Folded, sched.ReadAfterFold]>;
10212  }
10213}
10214
10215//handle instruction  reg_vec1 = op(reg_vec2,reg_vec3,imm)
10216//                               op(reg_vec2,mem_vec,imm)
10217//                               op(reg_vec2,broadcast(eltVt),imm)
10218multiclass avx512_3Op_imm8<bits<8> opc, string OpcodeStr, SDNode OpNode,
10219                           X86FoldableSchedWrite sched, X86VectorVTInfo _>:
10220  avx512_3Op_rm_imm8<opc, OpcodeStr, OpNode, sched, _, _>{
10221
10222  let ExeDomain = _.ExeDomain in
10223  defm rmbi : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
10224                    (ins _.RC:$src1, _.ScalarMemOp:$src2, u8imm:$src3),
10225                    OpcodeStr, "$src3, ${src2}"#_.BroadcastStr#", $src1",
10226                    "$src1, ${src2}"#_.BroadcastStr#", $src3",
10227                    (OpNode (_.VT _.RC:$src1),
10228                            (_.VT (_.BroadcastLdFrag addr:$src2)),
10229                            (i8 timm:$src3))>, EVEX_B,
10230                    Sched<[sched.Folded, sched.ReadAfterFold]>;
10231}
10232
10233//handle scalar instruction  reg_vec1 = op(reg_vec2,reg_vec3,imm)
10234//                                      op(reg_vec2,mem_scalar,imm)
10235multiclass avx512_fp_scalar_imm<bits<8> opc, string OpcodeStr, SDNode OpNode,
10236                                X86FoldableSchedWrite sched, X86VectorVTInfo _> {
10237  let ExeDomain = _.ExeDomain, Uses = [MXCSR], mayRaiseFPException = 1 in {
10238  defm rri : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
10239                      (ins _.RC:$src1, _.RC:$src2, i32u8imm:$src3),
10240                      OpcodeStr, "$src3, $src2, $src1", "$src1, $src2, $src3",
10241                      (OpNode (_.VT _.RC:$src1),
10242                              (_.VT _.RC:$src2),
10243                              (i32 timm:$src3))>,
10244                      Sched<[sched]>;
10245  defm rmi : AVX512_maskable_scalar<opc, MRMSrcMem, _, (outs _.RC:$dst),
10246                    (ins _.RC:$src1, _.IntScalarMemOp:$src2, i32u8imm:$src3),
10247                    OpcodeStr, "$src3, $src2, $src1", "$src1, $src2, $src3",
10248                    (OpNode (_.VT _.RC:$src1),
10249                            (_.ScalarIntMemFrags addr:$src2),
10250                            (i32 timm:$src3))>,
10251                    Sched<[sched.Folded, sched.ReadAfterFold]>;
10252  }
10253}
10254
10255//handle instruction  reg_vec1 = op(reg_vec2,reg_vec3,imm),{sae}
10256multiclass avx512_fp_sae_packed_imm<bits<8> opc, string OpcodeStr,
10257                                    SDNode OpNode, X86FoldableSchedWrite sched,
10258                                    X86VectorVTInfo _> {
10259  let ExeDomain = _.ExeDomain, Uses = [MXCSR] in
10260  defm rrib : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
10261                      (ins _.RC:$src1, _.RC:$src2, i32u8imm:$src3),
10262                      OpcodeStr, "$src3, {sae}, $src2, $src1",
10263                      "$src1, $src2, {sae}, $src3",
10264                      (OpNode (_.VT _.RC:$src1),
10265                              (_.VT _.RC:$src2),
10266                              (i32 timm:$src3))>,
10267                      EVEX_B, Sched<[sched]>;
10268}
10269
10270//handle scalar instruction  reg_vec1 = op(reg_vec2,reg_vec3,imm),{sae}
10271multiclass avx512_fp_sae_scalar_imm<bits<8> opc, string OpcodeStr, SDNode OpNode,
10272                                    X86FoldableSchedWrite sched, X86VectorVTInfo _> {
10273  let ExeDomain = _.ExeDomain, Uses = [MXCSR] in
10274  defm NAME#rrib : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
10275                      (ins _.RC:$src1, _.RC:$src2, i32u8imm:$src3),
10276                      OpcodeStr, "$src3, {sae}, $src2, $src1",
10277                      "$src1, $src2, {sae}, $src3",
10278                      (OpNode (_.VT _.RC:$src1),
10279                              (_.VT _.RC:$src2),
10280                              (i32 timm:$src3))>,
10281                      EVEX_B, Sched<[sched]>;
10282}
10283
10284multiclass avx512_common_fp_sae_packed_imm<string OpcodeStr,
10285            AVX512VLVectorVTInfo _, bits<8> opc, SDNode OpNode,
10286            SDNode OpNodeSAE, X86SchedWriteWidths sched, Predicate prd>{
10287  let Predicates = [prd] in {
10288    defm Z    : avx512_fp_packed_imm<opc, OpcodeStr, OpNode, sched.ZMM, _.info512>,
10289                avx512_fp_sae_packed_imm<opc, OpcodeStr, OpNodeSAE, sched.ZMM, _.info512>,
10290                                  EVEX_V512;
10291
10292  }
10293  let Predicates = [prd, HasVLX] in {
10294    defm Z128 : avx512_fp_packed_imm<opc, OpcodeStr, OpNode, sched.XMM, _.info128>,
10295                                  EVEX_V128;
10296    defm Z256 : avx512_fp_packed_imm<opc, OpcodeStr, OpNode, sched.YMM, _.info256>,
10297                                  EVEX_V256;
10298  }
10299}
10300
10301multiclass avx512_common_3Op_rm_imm8<bits<8> opc, SDNode OpNode, string OpStr,
10302                   X86SchedWriteWidths sched, AVX512VLVectorVTInfo DestInfo,
10303                   AVX512VLVectorVTInfo SrcInfo, Predicate Pred = HasBWI> {
10304  let Predicates = [Pred] in {
10305    defm Z    : avx512_3Op_rm_imm8<opc, OpStr, OpNode, sched.ZMM, DestInfo.info512,
10306                           SrcInfo.info512>, EVEX_V512, AVX512AIi8Base, EVEX_4V;
10307  }
10308  let Predicates = [Pred, HasVLX] in {
10309    defm Z128 : avx512_3Op_rm_imm8<opc, OpStr, OpNode, sched.XMM, DestInfo.info128,
10310                           SrcInfo.info128>, EVEX_V128, AVX512AIi8Base, EVEX_4V;
10311    defm Z256 : avx512_3Op_rm_imm8<opc, OpStr, OpNode, sched.YMM, DestInfo.info256,
10312                           SrcInfo.info256>, EVEX_V256, AVX512AIi8Base, EVEX_4V;
10313  }
10314}
10315
10316multiclass avx512_common_3Op_imm8<string OpcodeStr, AVX512VLVectorVTInfo _,
10317                                  bits<8> opc, SDNode OpNode, X86SchedWriteWidths sched,
10318                                  Predicate Pred = HasAVX512> {
10319  let Predicates = [Pred] in {
10320    defm Z    : avx512_3Op_imm8<opc, OpcodeStr, OpNode, sched.ZMM, _.info512>,
10321                                EVEX_V512;
10322  }
10323  let Predicates = [Pred, HasVLX] in {
10324    defm Z128 : avx512_3Op_imm8<opc, OpcodeStr, OpNode, sched.XMM, _.info128>,
10325                                EVEX_V128;
10326    defm Z256 : avx512_3Op_imm8<opc, OpcodeStr, OpNode, sched.YMM, _.info256>,
10327                                EVEX_V256;
10328  }
10329}
10330
10331multiclass avx512_common_fp_sae_scalar_imm<string OpcodeStr,
10332                  X86VectorVTInfo _, bits<8> opc, SDNode OpNode,
10333                  SDNode OpNodeSAE, X86SchedWriteWidths sched, Predicate prd> {
10334  let Predicates = [prd] in {
10335     defm Z : avx512_fp_scalar_imm<opc, OpcodeStr, OpNode, sched.XMM, _>,
10336              avx512_fp_sae_scalar_imm<opc, OpcodeStr, OpNodeSAE, sched.XMM, _>;
10337  }
10338}
10339
10340multiclass avx512_common_unary_fp_sae_packed_imm_all<string OpcodeStr,
10341                    bits<8> opcPs, bits<8> opcPd, SDNode OpNode,
10342                    SDNode MaskOpNode, SDNode OpNodeSAE,
10343                    X86SchedWriteWidths sched, Predicate prd>{
10344  defm PS : avx512_common_unary_fp_sae_packed_imm<OpcodeStr, avx512vl_f32_info,
10345                            opcPs, OpNode, MaskOpNode, OpNodeSAE, sched, prd>,
10346                            EVEX_CD8<32, CD8VF>;
10347  defm PD : avx512_common_unary_fp_sae_packed_imm<OpcodeStr, avx512vl_f64_info,
10348                            opcPd, OpNode, MaskOpNode, OpNodeSAE, sched, prd>,
10349                            EVEX_CD8<64, CD8VF>, VEX_W;
10350}
10351
10352defm VREDUCE   : avx512_common_unary_fp_sae_packed_imm_all<"vreduce", 0x56, 0x56,
10353                              X86VReduce, X86VReduce, X86VReduceSAE,
10354                              SchedWriteFRnd, HasDQI>, AVX512AIi8Base, EVEX;
10355defm VRNDSCALE : avx512_common_unary_fp_sae_packed_imm_all<"vrndscale", 0x08, 0x09,
10356                              X86any_VRndScale, X86VRndScale, X86VRndScaleSAE,
10357                              SchedWriteFRnd, HasAVX512>,
10358                              AVX512AIi8Base, EVEX;
10359defm VGETMANT : avx512_common_unary_fp_sae_packed_imm_all<"vgetmant", 0x26, 0x26,
10360                              X86VGetMant, X86VGetMant, X86VGetMantSAE,
10361                              SchedWriteFRnd, HasAVX512>, AVX512AIi8Base, EVEX;
10362
10363defm VRANGEPD : avx512_common_fp_sae_packed_imm<"vrangepd", avx512vl_f64_info,
10364                                                0x50, X86VRange, X86VRangeSAE,
10365                                                SchedWriteFAdd, HasDQI>,
10366      AVX512AIi8Base, EVEX_4V, EVEX_CD8<64, CD8VF>, VEX_W;
10367defm VRANGEPS : avx512_common_fp_sae_packed_imm<"vrangeps", avx512vl_f32_info,
10368                                                0x50, X86VRange, X86VRangeSAE,
10369                                                SchedWriteFAdd, HasDQI>,
10370      AVX512AIi8Base, EVEX_4V, EVEX_CD8<32, CD8VF>;
10371
10372defm VRANGESD: avx512_common_fp_sae_scalar_imm<"vrangesd",
10373      f64x_info, 0x51, X86Ranges, X86RangesSAE, SchedWriteFAdd, HasDQI>,
10374      AVX512AIi8Base, VEX_LIG, EVEX_4V, EVEX_CD8<64, CD8VT1>, VEX_W;
10375defm VRANGESS: avx512_common_fp_sae_scalar_imm<"vrangess", f32x_info,
10376      0x51, X86Ranges, X86RangesSAE, SchedWriteFAdd, HasDQI>,
10377      AVX512AIi8Base, VEX_LIG, EVEX_4V, EVEX_CD8<32, CD8VT1>;
10378
10379defm VREDUCESD: avx512_common_fp_sae_scalar_imm<"vreducesd", f64x_info,
10380      0x57, X86Reduces, X86ReducesSAE, SchedWriteFRnd, HasDQI>,
10381      AVX512AIi8Base, VEX_LIG, EVEX_4V, EVEX_CD8<64, CD8VT1>, VEX_W;
10382defm VREDUCESS: avx512_common_fp_sae_scalar_imm<"vreducess", f32x_info,
10383      0x57, X86Reduces, X86ReducesSAE, SchedWriteFRnd, HasDQI>,
10384      AVX512AIi8Base, VEX_LIG, EVEX_4V, EVEX_CD8<32, CD8VT1>;
10385
10386defm VGETMANTSD: avx512_common_fp_sae_scalar_imm<"vgetmantsd", f64x_info,
10387      0x27, X86GetMants, X86GetMantsSAE, SchedWriteFRnd, HasAVX512>,
10388      AVX512AIi8Base, VEX_LIG, EVEX_4V, EVEX_CD8<64, CD8VT1>, VEX_W;
10389defm VGETMANTSS: avx512_common_fp_sae_scalar_imm<"vgetmantss", f32x_info,
10390      0x27, X86GetMants, X86GetMantsSAE, SchedWriteFRnd, HasAVX512>,
10391      AVX512AIi8Base, VEX_LIG, EVEX_4V, EVEX_CD8<32, CD8VT1>;
10392
10393multiclass avx512_shuff_packed_128_common<bits<8> opc, string OpcodeStr,
10394                                          X86FoldableSchedWrite sched,
10395                                          X86VectorVTInfo _,
10396                                          X86VectorVTInfo CastInfo,
10397                                          string EVEX2VEXOvrd> {
10398  let ExeDomain = _.ExeDomain in {
10399  defm rri : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
10400                  (ins _.RC:$src1, _.RC:$src2, u8imm:$src3),
10401                  OpcodeStr, "$src3, $src2, $src1", "$src1, $src2, $src3",
10402                  (_.VT (bitconvert
10403                         (CastInfo.VT (X86Shuf128 _.RC:$src1, _.RC:$src2,
10404                                                  (i8 timm:$src3)))))>,
10405                  Sched<[sched]>, EVEX2VEXOverride<EVEX2VEXOvrd#"rr">;
10406  defm rmi : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
10407                (ins _.RC:$src1, _.MemOp:$src2, u8imm:$src3),
10408                OpcodeStr, "$src3, $src2, $src1", "$src1, $src2, $src3",
10409                (_.VT
10410                 (bitconvert
10411                  (CastInfo.VT (X86Shuf128 _.RC:$src1,
10412                                           (CastInfo.LdFrag addr:$src2),
10413                                           (i8 timm:$src3)))))>,
10414                Sched<[sched.Folded, sched.ReadAfterFold]>,
10415                EVEX2VEXOverride<EVEX2VEXOvrd#"rm">;
10416  defm rmbi : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
10417                    (ins _.RC:$src1, _.ScalarMemOp:$src2, u8imm:$src3),
10418                    OpcodeStr, "$src3, ${src2}"#_.BroadcastStr#", $src1",
10419                    "$src1, ${src2}"#_.BroadcastStr#", $src3",
10420                    (_.VT
10421                     (bitconvert
10422                      (CastInfo.VT
10423                       (X86Shuf128 _.RC:$src1,
10424                                   (_.BroadcastLdFrag addr:$src2),
10425                                   (i8 timm:$src3)))))>, EVEX_B,
10426                    Sched<[sched.Folded, sched.ReadAfterFold]>;
10427  }
10428}
10429
10430multiclass avx512_shuff_packed_128<string OpcodeStr, X86FoldableSchedWrite sched,
10431                                   AVX512VLVectorVTInfo _,
10432                                   AVX512VLVectorVTInfo CastInfo, bits<8> opc,
10433                                   string EVEX2VEXOvrd>{
10434  let Predicates = [HasAVX512] in
10435  defm Z : avx512_shuff_packed_128_common<opc, OpcodeStr, sched,
10436                                          _.info512, CastInfo.info512, "">, EVEX_V512;
10437
10438  let Predicates = [HasAVX512, HasVLX] in
10439  defm Z256 : avx512_shuff_packed_128_common<opc, OpcodeStr, sched,
10440                                             _.info256, CastInfo.info256,
10441                                             EVEX2VEXOvrd>, EVEX_V256;
10442}
10443
10444defm VSHUFF32X4 : avx512_shuff_packed_128<"vshuff32x4", WriteFShuffle256,
10445      avx512vl_f32_info, avx512vl_f64_info, 0x23, "VPERM2F128">, AVX512AIi8Base, EVEX_4V, EVEX_CD8<32, CD8VF>;
10446defm VSHUFF64X2 : avx512_shuff_packed_128<"vshuff64x2", WriteFShuffle256,
10447      avx512vl_f64_info, avx512vl_f64_info, 0x23, "VPERM2F128">, AVX512AIi8Base, EVEX_4V, EVEX_CD8<64, CD8VF>, VEX_W;
10448defm VSHUFI32X4 : avx512_shuff_packed_128<"vshufi32x4", WriteFShuffle256,
10449      avx512vl_i32_info, avx512vl_i64_info, 0x43, "VPERM2I128">, AVX512AIi8Base, EVEX_4V, EVEX_CD8<32, CD8VF>;
10450defm VSHUFI64X2 : avx512_shuff_packed_128<"vshufi64x2", WriteFShuffle256,
10451      avx512vl_i64_info, avx512vl_i64_info, 0x43, "VPERM2I128">, AVX512AIi8Base, EVEX_4V, EVEX_CD8<64, CD8VF>, VEX_W;
10452
10453multiclass avx512_valign<bits<8> opc, string OpcodeStr,
10454                         X86FoldableSchedWrite sched, X86VectorVTInfo _>{
10455  // NOTE: EVEX2VEXOverride changed back to Unset for 256-bit at the
10456  // instantiation of this class.
10457  let ExeDomain = _.ExeDomain in {
10458  defm rri : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
10459                  (ins _.RC:$src1, _.RC:$src2, u8imm:$src3),
10460                  OpcodeStr, "$src3, $src2, $src1", "$src1, $src2, $src3",
10461                  (_.VT (X86VAlign _.RC:$src1, _.RC:$src2, (i8 timm:$src3)))>,
10462                  Sched<[sched]>, EVEX2VEXOverride<"VPALIGNRrri">;
10463  defm rmi : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
10464                (ins _.RC:$src1, _.MemOp:$src2, u8imm:$src3),
10465                OpcodeStr, "$src3, $src2, $src1", "$src1, $src2, $src3",
10466                (_.VT (X86VAlign _.RC:$src1,
10467                                 (bitconvert (_.LdFrag addr:$src2)),
10468                                 (i8 timm:$src3)))>,
10469                Sched<[sched.Folded, sched.ReadAfterFold]>,
10470                EVEX2VEXOverride<"VPALIGNRrmi">;
10471
10472  defm rmbi : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
10473                   (ins _.RC:$src1, _.ScalarMemOp:$src2, u8imm:$src3),
10474                   OpcodeStr, "$src3, ${src2}"#_.BroadcastStr#", $src1",
10475                   "$src1, ${src2}"#_.BroadcastStr#", $src3",
10476                   (X86VAlign _.RC:$src1,
10477                              (_.VT (_.BroadcastLdFrag addr:$src2)),
10478                              (i8 timm:$src3))>, EVEX_B,
10479                   Sched<[sched.Folded, sched.ReadAfterFold]>;
10480  }
10481}
10482
10483multiclass avx512_valign_common<string OpcodeStr, X86SchedWriteWidths sched,
10484                                AVX512VLVectorVTInfo _> {
10485  let Predicates = [HasAVX512] in {
10486    defm Z    : avx512_valign<0x03, OpcodeStr, sched.ZMM, _.info512>,
10487                                AVX512AIi8Base, EVEX_4V, EVEX_V512;
10488  }
10489  let Predicates = [HasAVX512, HasVLX] in {
10490    defm Z128 : avx512_valign<0x03, OpcodeStr, sched.XMM, _.info128>,
10491                                AVX512AIi8Base, EVEX_4V, EVEX_V128;
10492    // We can't really override the 256-bit version so change it back to unset.
10493    let EVEX2VEXOverride = ? in
10494    defm Z256 : avx512_valign<0x03, OpcodeStr, sched.YMM, _.info256>,
10495                                AVX512AIi8Base, EVEX_4V, EVEX_V256;
10496  }
10497}
10498
10499defm VALIGND: avx512_valign_common<"valignd", SchedWriteShuffle,
10500                                   avx512vl_i32_info>, EVEX_CD8<32, CD8VF>;
10501defm VALIGNQ: avx512_valign_common<"valignq", SchedWriteShuffle,
10502                                   avx512vl_i64_info>, EVEX_CD8<64, CD8VF>,
10503                                   VEX_W;
10504
10505defm VPALIGNR: avx512_common_3Op_rm_imm8<0x0F, X86PAlignr, "vpalignr",
10506                                         SchedWriteShuffle, avx512vl_i8_info,
10507                                         avx512vl_i8_info>, EVEX_CD8<8, CD8VF>;
10508
10509// Fragments to help convert valignq into masked valignd. Or valignq/valignd
10510// into vpalignr.
10511def ValignqImm32XForm : SDNodeXForm<timm, [{
10512  return getI8Imm(N->getZExtValue() * 2, SDLoc(N));
10513}]>;
10514def ValignqImm8XForm : SDNodeXForm<timm, [{
10515  return getI8Imm(N->getZExtValue() * 8, SDLoc(N));
10516}]>;
10517def ValigndImm8XForm : SDNodeXForm<timm, [{
10518  return getI8Imm(N->getZExtValue() * 4, SDLoc(N));
10519}]>;
10520
10521multiclass avx512_vpalign_mask_lowering<string OpcodeStr, SDNode OpNode,
10522                                        X86VectorVTInfo From, X86VectorVTInfo To,
10523                                        SDNodeXForm ImmXForm> {
10524  def : Pat<(To.VT (vselect_mask To.KRCWM:$mask,
10525                                 (bitconvert
10526                                  (From.VT (OpNode From.RC:$src1, From.RC:$src2,
10527                                                   timm:$src3))),
10528                                 To.RC:$src0)),
10529            (!cast<Instruction>(OpcodeStr#"rrik") To.RC:$src0, To.KRCWM:$mask,
10530                                                  To.RC:$src1, To.RC:$src2,
10531                                                  (ImmXForm timm:$src3))>;
10532
10533  def : Pat<(To.VT (vselect_mask To.KRCWM:$mask,
10534                                 (bitconvert
10535                                  (From.VT (OpNode From.RC:$src1, From.RC:$src2,
10536                                                   timm:$src3))),
10537                                 To.ImmAllZerosV)),
10538            (!cast<Instruction>(OpcodeStr#"rrikz") To.KRCWM:$mask,
10539                                                   To.RC:$src1, To.RC:$src2,
10540                                                   (ImmXForm timm:$src3))>;
10541
10542  def : Pat<(To.VT (vselect_mask To.KRCWM:$mask,
10543                                 (bitconvert
10544                                  (From.VT (OpNode From.RC:$src1,
10545                                                   (From.LdFrag addr:$src2),
10546                                           timm:$src3))),
10547                                 To.RC:$src0)),
10548            (!cast<Instruction>(OpcodeStr#"rmik") To.RC:$src0, To.KRCWM:$mask,
10549                                                  To.RC:$src1, addr:$src2,
10550                                                  (ImmXForm timm:$src3))>;
10551
10552  def : Pat<(To.VT (vselect_mask To.KRCWM:$mask,
10553                                 (bitconvert
10554                                  (From.VT (OpNode From.RC:$src1,
10555                                                   (From.LdFrag addr:$src2),
10556                                           timm:$src3))),
10557                                 To.ImmAllZerosV)),
10558            (!cast<Instruction>(OpcodeStr#"rmikz") To.KRCWM:$mask,
10559                                                   To.RC:$src1, addr:$src2,
10560                                                   (ImmXForm timm:$src3))>;
10561}
10562
10563multiclass avx512_vpalign_mask_lowering_mb<string OpcodeStr, SDNode OpNode,
10564                                           X86VectorVTInfo From,
10565                                           X86VectorVTInfo To,
10566                                           SDNodeXForm ImmXForm> :
10567      avx512_vpalign_mask_lowering<OpcodeStr, OpNode, From, To, ImmXForm> {
10568  def : Pat<(From.VT (OpNode From.RC:$src1,
10569                             (bitconvert (To.VT (To.BroadcastLdFrag addr:$src2))),
10570                             timm:$src3)),
10571            (!cast<Instruction>(OpcodeStr#"rmbi") To.RC:$src1, addr:$src2,
10572                                                  (ImmXForm timm:$src3))>;
10573
10574  def : Pat<(To.VT (vselect_mask To.KRCWM:$mask,
10575                                 (bitconvert
10576                                  (From.VT (OpNode From.RC:$src1,
10577                                           (bitconvert
10578                                            (To.VT (To.BroadcastLdFrag addr:$src2))),
10579                                           timm:$src3))),
10580                                 To.RC:$src0)),
10581            (!cast<Instruction>(OpcodeStr#"rmbik") To.RC:$src0, To.KRCWM:$mask,
10582                                                   To.RC:$src1, addr:$src2,
10583                                                   (ImmXForm timm:$src3))>;
10584
10585  def : Pat<(To.VT (vselect_mask To.KRCWM:$mask,
10586                                 (bitconvert
10587                                  (From.VT (OpNode From.RC:$src1,
10588                                           (bitconvert
10589                                            (To.VT (To.BroadcastLdFrag addr:$src2))),
10590                                           timm:$src3))),
10591                                 To.ImmAllZerosV)),
10592            (!cast<Instruction>(OpcodeStr#"rmbikz") To.KRCWM:$mask,
10593                                                    To.RC:$src1, addr:$src2,
10594                                                    (ImmXForm timm:$src3))>;
10595}
10596
10597let Predicates = [HasAVX512] in {
10598  // For 512-bit we lower to the widest element type we can. So we only need
10599  // to handle converting valignq to valignd.
10600  defm : avx512_vpalign_mask_lowering_mb<"VALIGNDZ", X86VAlign, v8i64_info,
10601                                         v16i32_info, ValignqImm32XForm>;
10602}
10603
10604let Predicates = [HasVLX] in {
10605  // For 128-bit we lower to the widest element type we can. So we only need
10606  // to handle converting valignq to valignd.
10607  defm : avx512_vpalign_mask_lowering_mb<"VALIGNDZ128", X86VAlign, v2i64x_info,
10608                                         v4i32x_info, ValignqImm32XForm>;
10609  // For 256-bit we lower to the widest element type we can. So we only need
10610  // to handle converting valignq to valignd.
10611  defm : avx512_vpalign_mask_lowering_mb<"VALIGNDZ256", X86VAlign, v4i64x_info,
10612                                         v8i32x_info, ValignqImm32XForm>;
10613}
10614
10615let Predicates = [HasVLX, HasBWI] in {
10616  // We can turn 128 and 256 bit VALIGND/VALIGNQ into VPALIGNR.
10617  defm : avx512_vpalign_mask_lowering<"VPALIGNRZ128", X86VAlign, v2i64x_info,
10618                                      v16i8x_info, ValignqImm8XForm>;
10619  defm : avx512_vpalign_mask_lowering<"VPALIGNRZ128", X86VAlign, v4i32x_info,
10620                                      v16i8x_info, ValigndImm8XForm>;
10621}
10622
10623defm VDBPSADBW: avx512_common_3Op_rm_imm8<0x42, X86dbpsadbw, "vdbpsadbw",
10624                SchedWritePSADBW, avx512vl_i16_info, avx512vl_i8_info>,
10625                EVEX_CD8<8, CD8VF>, NotEVEX2VEXConvertible;
10626
10627multiclass avx512_unary_rm<bits<8> opc, string OpcodeStr, SDNode OpNode,
10628                           X86FoldableSchedWrite sched, X86VectorVTInfo _> {
10629  let ExeDomain = _.ExeDomain in {
10630  defm rr : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
10631                    (ins _.RC:$src1), OpcodeStr,
10632                    "$src1", "$src1",
10633                    (_.VT (OpNode (_.VT _.RC:$src1)))>, EVEX, AVX5128IBase,
10634                    Sched<[sched]>;
10635
10636  defm rm : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
10637                  (ins _.MemOp:$src1), OpcodeStr,
10638                  "$src1", "$src1",
10639                  (_.VT (OpNode (_.VT (bitconvert (_.LdFrag addr:$src1)))))>,
10640            EVEX, AVX5128IBase, EVEX_CD8<_.EltSize, CD8VF>,
10641            Sched<[sched.Folded]>;
10642  }
10643}
10644
10645multiclass avx512_unary_rmb<bits<8> opc, string OpcodeStr, SDNode OpNode,
10646                            X86FoldableSchedWrite sched, X86VectorVTInfo _> :
10647           avx512_unary_rm<opc, OpcodeStr, OpNode, sched, _> {
10648  defm rmb : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
10649                  (ins _.ScalarMemOp:$src1), OpcodeStr,
10650                  "${src1}"#_.BroadcastStr,
10651                  "${src1}"#_.BroadcastStr,
10652                  (_.VT (OpNode (_.VT (_.BroadcastLdFrag addr:$src1))))>,
10653             EVEX, AVX5128IBase, EVEX_B, EVEX_CD8<_.EltSize, CD8VF>,
10654             Sched<[sched.Folded]>;
10655}
10656
10657multiclass avx512_unary_rm_vl<bits<8> opc, string OpcodeStr, SDNode OpNode,
10658                              X86SchedWriteWidths sched,
10659                              AVX512VLVectorVTInfo VTInfo, Predicate prd> {
10660  let Predicates = [prd] in
10661    defm Z : avx512_unary_rm<opc, OpcodeStr, OpNode, sched.ZMM, VTInfo.info512>,
10662                             EVEX_V512;
10663
10664  let Predicates = [prd, HasVLX] in {
10665    defm Z256 : avx512_unary_rm<opc, OpcodeStr, OpNode, sched.YMM, VTInfo.info256>,
10666                              EVEX_V256;
10667    defm Z128 : avx512_unary_rm<opc, OpcodeStr, OpNode, sched.XMM, VTInfo.info128>,
10668                              EVEX_V128;
10669  }
10670}
10671
10672multiclass avx512_unary_rmb_vl<bits<8> opc, string OpcodeStr, SDNode OpNode,
10673                               X86SchedWriteWidths sched, AVX512VLVectorVTInfo VTInfo,
10674                               Predicate prd> {
10675  let Predicates = [prd] in
10676    defm Z : avx512_unary_rmb<opc, OpcodeStr, OpNode, sched.ZMM, VTInfo.info512>,
10677                              EVEX_V512;
10678
10679  let Predicates = [prd, HasVLX] in {
10680    defm Z256 : avx512_unary_rmb<opc, OpcodeStr, OpNode, sched.YMM, VTInfo.info256>,
10681                                 EVEX_V256;
10682    defm Z128 : avx512_unary_rmb<opc, OpcodeStr, OpNode, sched.XMM, VTInfo.info128>,
10683                                 EVEX_V128;
10684  }
10685}
10686
10687multiclass avx512_unary_rm_vl_dq<bits<8> opc_d, bits<8> opc_q, string OpcodeStr,
10688                                 SDNode OpNode, X86SchedWriteWidths sched,
10689                                 Predicate prd> {
10690  defm Q : avx512_unary_rmb_vl<opc_q, OpcodeStr#"q", OpNode, sched,
10691                               avx512vl_i64_info, prd>, VEX_W;
10692  defm D : avx512_unary_rmb_vl<opc_d, OpcodeStr#"d", OpNode, sched,
10693                               avx512vl_i32_info, prd>;
10694}
10695
10696multiclass avx512_unary_rm_vl_bw<bits<8> opc_b, bits<8> opc_w, string OpcodeStr,
10697                                 SDNode OpNode, X86SchedWriteWidths sched,
10698                                 Predicate prd> {
10699  defm W : avx512_unary_rm_vl<opc_w, OpcodeStr#"w", OpNode, sched,
10700                              avx512vl_i16_info, prd>, VEX_WIG;
10701  defm B : avx512_unary_rm_vl<opc_b, OpcodeStr#"b", OpNode, sched,
10702                              avx512vl_i8_info, prd>, VEX_WIG;
10703}
10704
10705multiclass avx512_unary_rm_vl_all<bits<8> opc_b, bits<8> opc_w,
10706                                  bits<8> opc_d, bits<8> opc_q,
10707                                  string OpcodeStr, SDNode OpNode,
10708                                  X86SchedWriteWidths sched> {
10709  defm NAME : avx512_unary_rm_vl_dq<opc_d, opc_q, OpcodeStr, OpNode, sched,
10710                                    HasAVX512>,
10711              avx512_unary_rm_vl_bw<opc_b, opc_w, OpcodeStr, OpNode, sched,
10712                                    HasBWI>;
10713}
10714
10715defm VPABS : avx512_unary_rm_vl_all<0x1C, 0x1D, 0x1E, 0x1F, "vpabs", abs,
10716                                    SchedWriteVecALU>;
10717
10718// VPABS: Use 512bit version to implement 128/256 bit in case NoVLX.
10719let Predicates = [HasAVX512, NoVLX] in {
10720  def : Pat<(v4i64 (abs VR256X:$src)),
10721            (EXTRACT_SUBREG
10722                (VPABSQZrr
10723                    (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src, sub_ymm)),
10724             sub_ymm)>;
10725  def : Pat<(v2i64 (abs VR128X:$src)),
10726            (EXTRACT_SUBREG
10727                (VPABSQZrr
10728                    (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm)),
10729             sub_xmm)>;
10730}
10731
10732// Use 512bit version to implement 128/256 bit.
10733multiclass avx512_unary_lowering<string InstrStr, SDNode OpNode,
10734                                 AVX512VLVectorVTInfo _, Predicate prd> {
10735  let Predicates = [prd, NoVLX] in {
10736    def : Pat<(_.info256.VT (OpNode (_.info256.VT _.info256.RC:$src1))),
10737              (EXTRACT_SUBREG
10738                (!cast<Instruction>(InstrStr # "Zrr")
10739                  (INSERT_SUBREG(_.info512.VT(IMPLICIT_DEF)),
10740                                 _.info256.RC:$src1,
10741                                 _.info256.SubRegIdx)),
10742              _.info256.SubRegIdx)>;
10743
10744    def : Pat<(_.info128.VT (OpNode (_.info128.VT _.info128.RC:$src1))),
10745              (EXTRACT_SUBREG
10746                (!cast<Instruction>(InstrStr # "Zrr")
10747                  (INSERT_SUBREG(_.info512.VT(IMPLICIT_DEF)),
10748                                 _.info128.RC:$src1,
10749                                 _.info128.SubRegIdx)),
10750              _.info128.SubRegIdx)>;
10751  }
10752}
10753
10754defm VPLZCNT    : avx512_unary_rm_vl_dq<0x44, 0x44, "vplzcnt", ctlz,
10755                                        SchedWriteVecIMul, HasCDI>;
10756
10757// FIXME: Is there a better scheduler class for VPCONFLICT?
10758defm VPCONFLICT : avx512_unary_rm_vl_dq<0xC4, 0xC4, "vpconflict", X86Conflict,
10759                                        SchedWriteVecALU, HasCDI>;
10760
10761// VPLZCNT: Use 512bit version to implement 128/256 bit in case NoVLX.
10762defm : avx512_unary_lowering<"VPLZCNTQ", ctlz, avx512vl_i64_info, HasCDI>;
10763defm : avx512_unary_lowering<"VPLZCNTD", ctlz, avx512vl_i32_info, HasCDI>;
10764
10765//===---------------------------------------------------------------------===//
10766// Counts number of ones - VPOPCNTD and VPOPCNTQ
10767//===---------------------------------------------------------------------===//
10768
10769// FIXME: Is there a better scheduler class for VPOPCNTD/VPOPCNTQ?
10770defm VPOPCNT : avx512_unary_rm_vl_dq<0x55, 0x55, "vpopcnt", ctpop,
10771                                     SchedWriteVecALU, HasVPOPCNTDQ>;
10772
10773defm : avx512_unary_lowering<"VPOPCNTQ", ctpop, avx512vl_i64_info, HasVPOPCNTDQ>;
10774defm : avx512_unary_lowering<"VPOPCNTD", ctpop, avx512vl_i32_info, HasVPOPCNTDQ>;
10775
10776//===---------------------------------------------------------------------===//
10777// Replicate Single FP - MOVSHDUP and MOVSLDUP
10778//===---------------------------------------------------------------------===//
10779
10780multiclass avx512_replicate<bits<8> opc, string OpcodeStr, SDNode OpNode,
10781                            X86SchedWriteWidths sched> {
10782  defm NAME:       avx512_unary_rm_vl<opc, OpcodeStr, OpNode, sched,
10783                                      avx512vl_f32_info, HasAVX512>, XS;
10784}
10785
10786defm VMOVSHDUP : avx512_replicate<0x16, "vmovshdup", X86Movshdup,
10787                                  SchedWriteFShuffle>;
10788defm VMOVSLDUP : avx512_replicate<0x12, "vmovsldup", X86Movsldup,
10789                                  SchedWriteFShuffle>;
10790
10791//===----------------------------------------------------------------------===//
10792// AVX-512 - MOVDDUP
10793//===----------------------------------------------------------------------===//
10794
10795multiclass avx512_movddup_128<bits<8> opc, string OpcodeStr,
10796                              X86FoldableSchedWrite sched, X86VectorVTInfo _> {
10797  let ExeDomain = _.ExeDomain in {
10798  defm rr : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
10799                   (ins _.RC:$src), OpcodeStr, "$src", "$src",
10800                   (_.VT (X86VBroadcast (_.VT _.RC:$src)))>, EVEX,
10801                   Sched<[sched]>;
10802  defm rm : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
10803                 (ins _.ScalarMemOp:$src), OpcodeStr, "$src", "$src",
10804                 (_.VT (_.BroadcastLdFrag addr:$src))>,
10805                 EVEX, EVEX_CD8<_.EltSize, CD8VH>,
10806                 Sched<[sched.Folded]>;
10807  }
10808}
10809
10810multiclass avx512_movddup_common<bits<8> opc, string OpcodeStr, SDNode OpNode,
10811                                 X86SchedWriteWidths sched, AVX512VLVectorVTInfo VTInfo> {
10812  defm Z : avx512_unary_rm<opc, OpcodeStr, X86Movddup, sched.ZMM,
10813                           VTInfo.info512>, EVEX_V512;
10814
10815  let Predicates = [HasAVX512, HasVLX] in {
10816    defm Z256 : avx512_unary_rm<opc, OpcodeStr, X86Movddup, sched.YMM,
10817                                VTInfo.info256>, EVEX_V256;
10818    defm Z128 : avx512_movddup_128<opc, OpcodeStr, sched.XMM,
10819                                   VTInfo.info128>, EVEX_V128;
10820  }
10821}
10822
10823multiclass avx512_movddup<bits<8> opc, string OpcodeStr, SDNode OpNode,
10824                          X86SchedWriteWidths sched> {
10825  defm NAME:      avx512_movddup_common<opc, OpcodeStr, OpNode, sched,
10826                                        avx512vl_f64_info>, XD, VEX_W;
10827}
10828
10829defm VMOVDDUP : avx512_movddup<0x12, "vmovddup", X86Movddup, SchedWriteFShuffle>;
10830
10831let Predicates = [HasVLX] in {
10832def : Pat<(v2f64 (X86VBroadcast f64:$src)),
10833          (VMOVDDUPZ128rr (v2f64 (COPY_TO_REGCLASS FR64X:$src, VR128X)))>;
10834
10835def : Pat<(vselect_mask (v2i1 VK2WM:$mask), (v2f64 (X86VBroadcast f64:$src)),
10836                        (v2f64 VR128X:$src0)),
10837          (VMOVDDUPZ128rrk VR128X:$src0, VK2WM:$mask,
10838                           (v2f64 (COPY_TO_REGCLASS FR64X:$src, VR128X)))>;
10839def : Pat<(vselect_mask (v2i1 VK2WM:$mask), (v2f64 (X86VBroadcast f64:$src)),
10840                        immAllZerosV),
10841          (VMOVDDUPZ128rrkz VK2WM:$mask, (v2f64 (COPY_TO_REGCLASS FR64X:$src, VR128X)))>;
10842}
10843
10844//===----------------------------------------------------------------------===//
10845// AVX-512 - Unpack Instructions
10846//===----------------------------------------------------------------------===//
10847
10848let Uses = []<Register>, mayRaiseFPException = 0 in {
10849defm VUNPCKH : avx512_fp_binop_p<0x15, "vunpckh", X86Unpckh, X86Unpckh, HasAVX512,
10850                                 SchedWriteFShuffleSizes, 0, 1>;
10851defm VUNPCKL : avx512_fp_binop_p<0x14, "vunpckl", X86Unpckl, X86Unpckl, HasAVX512,
10852                                 SchedWriteFShuffleSizes>;
10853}
10854
10855defm VPUNPCKLBW : avx512_binop_rm_vl_b<0x60, "vpunpcklbw", X86Unpckl,
10856                                       SchedWriteShuffle, HasBWI>;
10857defm VPUNPCKHBW : avx512_binop_rm_vl_b<0x68, "vpunpckhbw", X86Unpckh,
10858                                       SchedWriteShuffle, HasBWI>;
10859defm VPUNPCKLWD : avx512_binop_rm_vl_w<0x61, "vpunpcklwd", X86Unpckl,
10860                                       SchedWriteShuffle, HasBWI>;
10861defm VPUNPCKHWD : avx512_binop_rm_vl_w<0x69, "vpunpckhwd", X86Unpckh,
10862                                       SchedWriteShuffle, HasBWI>;
10863
10864defm VPUNPCKLDQ : avx512_binop_rm_vl_d<0x62, "vpunpckldq", X86Unpckl,
10865                                       SchedWriteShuffle, HasAVX512>;
10866defm VPUNPCKHDQ : avx512_binop_rm_vl_d<0x6A, "vpunpckhdq", X86Unpckh,
10867                                       SchedWriteShuffle, HasAVX512>;
10868defm VPUNPCKLQDQ : avx512_binop_rm_vl_q<0x6C, "vpunpcklqdq", X86Unpckl,
10869                                        SchedWriteShuffle, HasAVX512>;
10870defm VPUNPCKHQDQ : avx512_binop_rm_vl_q<0x6D, "vpunpckhqdq", X86Unpckh,
10871                                        SchedWriteShuffle, HasAVX512>;
10872
10873//===----------------------------------------------------------------------===//
10874// AVX-512 - Extract & Insert Integer Instructions
10875//===----------------------------------------------------------------------===//
10876
10877multiclass avx512_extract_elt_bw_m<bits<8> opc, string OpcodeStr, SDNode OpNode,
10878                                                            X86VectorVTInfo _> {
10879  def mr : AVX512Ii8<opc, MRMDestMem, (outs),
10880              (ins _.ScalarMemOp:$dst, _.RC:$src1, u8imm:$src2),
10881              OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}",
10882              [(store (_.EltVT (trunc (OpNode (_.VT _.RC:$src1), timm:$src2))),
10883                       addr:$dst)]>,
10884              EVEX, EVEX_CD8<_.EltSize, CD8VT1>, Sched<[WriteVecExtractSt]>;
10885}
10886
10887multiclass avx512_extract_elt_b<string OpcodeStr, X86VectorVTInfo _> {
10888  let Predicates = [HasBWI] in {
10889    def rr : AVX512Ii8<0x14, MRMDestReg, (outs GR32orGR64:$dst),
10890                  (ins _.RC:$src1, u8imm:$src2),
10891                  OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}",
10892                  [(set GR32orGR64:$dst,
10893                        (X86pextrb (_.VT _.RC:$src1), timm:$src2))]>,
10894                  EVEX, TAPD, Sched<[WriteVecExtract]>;
10895
10896    defm NAME : avx512_extract_elt_bw_m<0x14, OpcodeStr, X86pextrb, _>, TAPD;
10897  }
10898}
10899
10900multiclass avx512_extract_elt_w<string OpcodeStr, X86VectorVTInfo _> {
10901  let Predicates = [HasBWI] in {
10902    def rr : AVX512Ii8<0xC5, MRMSrcReg, (outs GR32orGR64:$dst),
10903                  (ins _.RC:$src1, u8imm:$src2),
10904                  OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}",
10905                  [(set GR32orGR64:$dst,
10906                        (X86pextrw (_.VT _.RC:$src1), timm:$src2))]>,
10907                  EVEX, PD, Sched<[WriteVecExtract]>;
10908
10909    let hasSideEffects = 0, isCodeGenOnly = 1, ForceDisassemble = 1 in
10910    def rr_REV : AVX512Ii8<0x15, MRMDestReg, (outs GR32orGR64:$dst),
10911                   (ins _.RC:$src1, u8imm:$src2),
10912                   OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}", []>,
10913                   EVEX, TAPD, FoldGenData<NAME#rr>,
10914                   Sched<[WriteVecExtract]>;
10915
10916    defm NAME : avx512_extract_elt_bw_m<0x15, OpcodeStr, X86pextrw, _>, TAPD;
10917  }
10918}
10919
10920multiclass avx512_extract_elt_dq<string OpcodeStr, X86VectorVTInfo _,
10921                                                            RegisterClass GRC> {
10922  let Predicates = [HasDQI] in {
10923    def rr : AVX512Ii8<0x16, MRMDestReg, (outs GRC:$dst),
10924                  (ins _.RC:$src1, u8imm:$src2),
10925                  OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}",
10926                  [(set GRC:$dst,
10927                      (extractelt (_.VT _.RC:$src1), imm:$src2))]>,
10928                  EVEX, TAPD, Sched<[WriteVecExtract]>;
10929
10930    def mr : AVX512Ii8<0x16, MRMDestMem, (outs),
10931                (ins _.ScalarMemOp:$dst, _.RC:$src1, u8imm:$src2),
10932                OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}",
10933                [(store (extractelt (_.VT _.RC:$src1),
10934                                    imm:$src2),addr:$dst)]>,
10935                EVEX, EVEX_CD8<_.EltSize, CD8VT1>, TAPD,
10936                Sched<[WriteVecExtractSt]>;
10937  }
10938}
10939
10940defm VPEXTRBZ : avx512_extract_elt_b<"vpextrb", v16i8x_info>, VEX_WIG;
10941defm VPEXTRWZ : avx512_extract_elt_w<"vpextrw", v8i16x_info>, VEX_WIG;
10942defm VPEXTRDZ : avx512_extract_elt_dq<"vpextrd", v4i32x_info, GR32>;
10943defm VPEXTRQZ : avx512_extract_elt_dq<"vpextrq", v2i64x_info, GR64>, VEX_W;
10944
10945multiclass avx512_insert_elt_m<bits<8> opc, string OpcodeStr, SDNode OpNode,
10946                                            X86VectorVTInfo _, PatFrag LdFrag,
10947                                            SDPatternOperator immoperator> {
10948  def rm : AVX512Ii8<opc, MRMSrcMem, (outs _.RC:$dst),
10949      (ins _.RC:$src1,  _.ScalarMemOp:$src2, u8imm:$src3),
10950      OpcodeStr#"\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}",
10951      [(set _.RC:$dst,
10952          (_.VT (OpNode _.RC:$src1, (LdFrag addr:$src2), immoperator:$src3)))]>,
10953      EVEX_4V, EVEX_CD8<_.EltSize, CD8VT1>, Sched<[WriteVecInsert.Folded, WriteVecInsert.ReadAfterFold]>;
10954}
10955
10956multiclass avx512_insert_elt_bw<bits<8> opc, string OpcodeStr, SDNode OpNode,
10957                                            X86VectorVTInfo _, PatFrag LdFrag> {
10958  let Predicates = [HasBWI] in {
10959    def rr : AVX512Ii8<opc, MRMSrcReg, (outs _.RC:$dst),
10960        (ins _.RC:$src1, GR32orGR64:$src2, u8imm:$src3),
10961        OpcodeStr#"\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}",
10962        [(set _.RC:$dst,
10963            (OpNode _.RC:$src1, GR32orGR64:$src2, timm:$src3))]>, EVEX_4V,
10964        Sched<[WriteVecInsert]>;
10965
10966    defm NAME : avx512_insert_elt_m<opc, OpcodeStr, OpNode, _, LdFrag, timm>;
10967  }
10968}
10969
10970multiclass avx512_insert_elt_dq<bits<8> opc, string OpcodeStr,
10971                                         X86VectorVTInfo _, RegisterClass GRC> {
10972  let Predicates = [HasDQI] in {
10973    def rr : AVX512Ii8<opc, MRMSrcReg, (outs _.RC:$dst),
10974        (ins _.RC:$src1, GRC:$src2, u8imm:$src3),
10975        OpcodeStr#"\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}",
10976        [(set _.RC:$dst,
10977            (_.VT (insertelt _.RC:$src1, GRC:$src2, imm:$src3)))]>,
10978        EVEX_4V, TAPD, Sched<[WriteVecInsert]>;
10979
10980    defm NAME : avx512_insert_elt_m<opc, OpcodeStr, insertelt, _,
10981                                    _.ScalarLdFrag, imm>, TAPD;
10982  }
10983}
10984
10985defm VPINSRBZ : avx512_insert_elt_bw<0x20, "vpinsrb", X86pinsrb, v16i8x_info,
10986                                     extloadi8>, TAPD, VEX_WIG;
10987defm VPINSRWZ : avx512_insert_elt_bw<0xC4, "vpinsrw", X86pinsrw, v8i16x_info,
10988                                     extloadi16>, PD, VEX_WIG;
10989defm VPINSRDZ : avx512_insert_elt_dq<0x22, "vpinsrd", v4i32x_info, GR32>;
10990defm VPINSRQZ : avx512_insert_elt_dq<0x22, "vpinsrq", v2i64x_info, GR64>, VEX_W;
10991
10992//===----------------------------------------------------------------------===//
10993// VSHUFPS - VSHUFPD Operations
10994//===----------------------------------------------------------------------===//
10995
10996multiclass avx512_shufp<string OpcodeStr, AVX512VLVectorVTInfo VTInfo_I,
10997                        AVX512VLVectorVTInfo VTInfo_FP>{
10998  defm NAME: avx512_common_3Op_imm8<OpcodeStr, VTInfo_FP, 0xC6, X86Shufp,
10999                                    SchedWriteFShuffle>,
11000                                    EVEX_CD8<VTInfo_FP.info512.EltSize, CD8VF>,
11001                                    AVX512AIi8Base, EVEX_4V;
11002}
11003
11004defm VSHUFPS: avx512_shufp<"vshufps", avx512vl_i32_info, avx512vl_f32_info>, PS;
11005defm VSHUFPD: avx512_shufp<"vshufpd", avx512vl_i64_info, avx512vl_f64_info>, PD, VEX_W;
11006
11007//===----------------------------------------------------------------------===//
11008// AVX-512 - Byte shift Left/Right
11009//===----------------------------------------------------------------------===//
11010
11011multiclass avx512_shift_packed<bits<8> opc, SDNode OpNode, Format MRMr,
11012                               Format MRMm, string OpcodeStr,
11013                               X86FoldableSchedWrite sched, X86VectorVTInfo _>{
11014  def ri : AVX512<opc, MRMr,
11015             (outs _.RC:$dst), (ins _.RC:$src1, u8imm:$src2),
11016             !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
11017             [(set _.RC:$dst,(_.VT (OpNode _.RC:$src1, (i8 timm:$src2))))]>,
11018             Sched<[sched]>;
11019  def mi : AVX512<opc, MRMm,
11020           (outs _.RC:$dst), (ins _.MemOp:$src1, u8imm:$src2),
11021           !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
11022           [(set _.RC:$dst,(_.VT (OpNode
11023                                 (_.VT (bitconvert (_.LdFrag addr:$src1))),
11024                                 (i8 timm:$src2))))]>,
11025           Sched<[sched.Folded, sched.ReadAfterFold]>;
11026}
11027
11028multiclass avx512_shift_packed_all<bits<8> opc, SDNode OpNode, Format MRMr,
11029                                   Format MRMm, string OpcodeStr,
11030                                   X86SchedWriteWidths sched, Predicate prd>{
11031  let Predicates = [prd] in
11032    defm Z : avx512_shift_packed<opc, OpNode, MRMr, MRMm, OpcodeStr,
11033                                 sched.ZMM, v64i8_info>, EVEX_V512;
11034  let Predicates = [prd, HasVLX] in {
11035    defm Z256 : avx512_shift_packed<opc, OpNode, MRMr, MRMm, OpcodeStr,
11036                                    sched.YMM, v32i8x_info>, EVEX_V256;
11037    defm Z128 : avx512_shift_packed<opc, OpNode, MRMr, MRMm, OpcodeStr,
11038                                    sched.XMM, v16i8x_info>, EVEX_V128;
11039  }
11040}
11041defm VPSLLDQ : avx512_shift_packed_all<0x73, X86vshldq, MRM7r, MRM7m, "vpslldq",
11042                                       SchedWriteShuffle, HasBWI>,
11043                                       AVX512PDIi8Base, EVEX_4V, VEX_WIG;
11044defm VPSRLDQ : avx512_shift_packed_all<0x73, X86vshrdq, MRM3r, MRM3m, "vpsrldq",
11045                                       SchedWriteShuffle, HasBWI>,
11046                                       AVX512PDIi8Base, EVEX_4V, VEX_WIG;
11047
11048multiclass avx512_psadbw_packed<bits<8> opc, SDNode OpNode,
11049                                string OpcodeStr, X86FoldableSchedWrite sched,
11050                                X86VectorVTInfo _dst, X86VectorVTInfo _src> {
11051  let isCommutable = 1 in
11052  def rr : AVX512BI<opc, MRMSrcReg,
11053             (outs _dst.RC:$dst), (ins _src.RC:$src1, _src.RC:$src2),
11054             !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
11055             [(set _dst.RC:$dst,(_dst.VT
11056                                (OpNode (_src.VT _src.RC:$src1),
11057                                        (_src.VT _src.RC:$src2))))]>,
11058             Sched<[sched]>;
11059  def rm : AVX512BI<opc, MRMSrcMem,
11060           (outs _dst.RC:$dst), (ins _src.RC:$src1, _src.MemOp:$src2),
11061           !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
11062           [(set _dst.RC:$dst,(_dst.VT
11063                              (OpNode (_src.VT _src.RC:$src1),
11064                              (_src.VT (bitconvert
11065                                        (_src.LdFrag addr:$src2))))))]>,
11066           Sched<[sched.Folded, sched.ReadAfterFold]>;
11067}
11068
11069multiclass avx512_psadbw_packed_all<bits<8> opc, SDNode OpNode,
11070                                    string OpcodeStr, X86SchedWriteWidths sched,
11071                                    Predicate prd> {
11072  let Predicates = [prd] in
11073    defm Z : avx512_psadbw_packed<opc, OpNode, OpcodeStr, sched.ZMM,
11074                                  v8i64_info, v64i8_info>, EVEX_V512;
11075  let Predicates = [prd, HasVLX] in {
11076    defm Z256 : avx512_psadbw_packed<opc, OpNode, OpcodeStr, sched.YMM,
11077                                     v4i64x_info, v32i8x_info>, EVEX_V256;
11078    defm Z128 : avx512_psadbw_packed<opc, OpNode, OpcodeStr, sched.XMM,
11079                                     v2i64x_info, v16i8x_info>, EVEX_V128;
11080  }
11081}
11082
11083defm VPSADBW : avx512_psadbw_packed_all<0xf6, X86psadbw, "vpsadbw",
11084                                        SchedWritePSADBW, HasBWI>, EVEX_4V, VEX_WIG;
11085
11086// Transforms to swizzle an immediate to enable better matching when
11087// memory operand isn't in the right place.
11088def VPTERNLOG321_imm8 : SDNodeXForm<timm, [{
11089  // Convert a VPTERNLOG immediate by swapping operand 0 and operand 2.
11090  uint8_t Imm = N->getZExtValue();
11091  // Swap bits 1/4 and 3/6.
11092  uint8_t NewImm = Imm & 0xa5;
11093  if (Imm & 0x02) NewImm |= 0x10;
11094  if (Imm & 0x10) NewImm |= 0x02;
11095  if (Imm & 0x08) NewImm |= 0x40;
11096  if (Imm & 0x40) NewImm |= 0x08;
11097  return getI8Imm(NewImm, SDLoc(N));
11098}]>;
11099def VPTERNLOG213_imm8 : SDNodeXForm<timm, [{
11100  // Convert a VPTERNLOG immediate by swapping operand 1 and operand 2.
11101  uint8_t Imm = N->getZExtValue();
11102  // Swap bits 2/4 and 3/5.
11103  uint8_t NewImm = Imm & 0xc3;
11104  if (Imm & 0x04) NewImm |= 0x10;
11105  if (Imm & 0x10) NewImm |= 0x04;
11106  if (Imm & 0x08) NewImm |= 0x20;
11107  if (Imm & 0x20) NewImm |= 0x08;
11108  return getI8Imm(NewImm, SDLoc(N));
11109}]>;
11110def VPTERNLOG132_imm8 : SDNodeXForm<timm, [{
11111  // Convert a VPTERNLOG immediate by swapping operand 1 and operand 2.
11112  uint8_t Imm = N->getZExtValue();
11113  // Swap bits 1/2 and 5/6.
11114  uint8_t NewImm = Imm & 0x99;
11115  if (Imm & 0x02) NewImm |= 0x04;
11116  if (Imm & 0x04) NewImm |= 0x02;
11117  if (Imm & 0x20) NewImm |= 0x40;
11118  if (Imm & 0x40) NewImm |= 0x20;
11119  return getI8Imm(NewImm, SDLoc(N));
11120}]>;
11121def VPTERNLOG231_imm8 : SDNodeXForm<timm, [{
11122  // Convert a VPTERNLOG immediate by moving operand 1 to the end.
11123  uint8_t Imm = N->getZExtValue();
11124  // Move bits 1->2, 2->4, 3->6, 4->1, 5->3, 6->5
11125  uint8_t NewImm = Imm & 0x81;
11126  if (Imm & 0x02) NewImm |= 0x04;
11127  if (Imm & 0x04) NewImm |= 0x10;
11128  if (Imm & 0x08) NewImm |= 0x40;
11129  if (Imm & 0x10) NewImm |= 0x02;
11130  if (Imm & 0x20) NewImm |= 0x08;
11131  if (Imm & 0x40) NewImm |= 0x20;
11132  return getI8Imm(NewImm, SDLoc(N));
11133}]>;
11134def VPTERNLOG312_imm8 : SDNodeXForm<timm, [{
11135  // Convert a VPTERNLOG immediate by moving operand 2 to the beginning.
11136  uint8_t Imm = N->getZExtValue();
11137  // Move bits 1->4, 2->1, 3->5, 4->2, 5->6, 6->3
11138  uint8_t NewImm = Imm & 0x81;
11139  if (Imm & 0x02) NewImm |= 0x10;
11140  if (Imm & 0x04) NewImm |= 0x02;
11141  if (Imm & 0x08) NewImm |= 0x20;
11142  if (Imm & 0x10) NewImm |= 0x04;
11143  if (Imm & 0x20) NewImm |= 0x40;
11144  if (Imm & 0x40) NewImm |= 0x08;
11145  return getI8Imm(NewImm, SDLoc(N));
11146}]>;
11147
11148multiclass avx512_ternlog<bits<8> opc, string OpcodeStr, SDNode OpNode,
11149                          X86FoldableSchedWrite sched, X86VectorVTInfo _,
11150                          string Name>{
11151  let Constraints = "$src1 = $dst", ExeDomain = _.ExeDomain in {
11152  defm rri : AVX512_maskable_3src<opc, MRMSrcReg, _, (outs _.RC:$dst),
11153                      (ins _.RC:$src2, _.RC:$src3, u8imm:$src4),
11154                      OpcodeStr, "$src4, $src3, $src2", "$src2, $src3, $src4",
11155                      (OpNode (_.VT _.RC:$src1),
11156                              (_.VT _.RC:$src2),
11157                              (_.VT _.RC:$src3),
11158                              (i8 timm:$src4)), 1, 1>,
11159                      AVX512AIi8Base, EVEX_4V, Sched<[sched]>;
11160  defm rmi : AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst),
11161                    (ins _.RC:$src2, _.MemOp:$src3, u8imm:$src4),
11162                    OpcodeStr, "$src4, $src3, $src2", "$src2, $src3, $src4",
11163                    (OpNode (_.VT _.RC:$src1),
11164                            (_.VT _.RC:$src2),
11165                            (_.VT (bitconvert (_.LdFrag addr:$src3))),
11166                            (i8 timm:$src4)), 1, 0>,
11167                    AVX512AIi8Base, EVEX_4V, EVEX_CD8<_.EltSize, CD8VF>,
11168                    Sched<[sched.Folded, sched.ReadAfterFold]>;
11169  defm rmbi : AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst),
11170                    (ins _.RC:$src2, _.ScalarMemOp:$src3, u8imm:$src4),
11171                    OpcodeStr, "$src4, ${src3}"#_.BroadcastStr#", $src2",
11172                    "$src2, ${src3}"#_.BroadcastStr#", $src4",
11173                    (OpNode (_.VT _.RC:$src1),
11174                            (_.VT _.RC:$src2),
11175                            (_.VT (_.BroadcastLdFrag addr:$src3)),
11176                            (i8 timm:$src4)), 1, 0>, EVEX_B,
11177                    AVX512AIi8Base, EVEX_4V, EVEX_CD8<_.EltSize, CD8VF>,
11178                    Sched<[sched.Folded, sched.ReadAfterFold]>;
11179  }// Constraints = "$src1 = $dst"
11180
11181  // Additional patterns for matching passthru operand in other positions.
11182  def : Pat<(_.VT (vselect_mask _.KRCWM:$mask,
11183                   (OpNode _.RC:$src3, _.RC:$src2, _.RC:$src1, (i8 timm:$src4)),
11184                   _.RC:$src1)),
11185            (!cast<Instruction>(Name#_.ZSuffix#rrik) _.RC:$src1, _.KRCWM:$mask,
11186             _.RC:$src2, _.RC:$src3, (VPTERNLOG321_imm8 timm:$src4))>;
11187  def : Pat<(_.VT (vselect_mask _.KRCWM:$mask,
11188                   (OpNode _.RC:$src2, _.RC:$src1, _.RC:$src3, (i8 timm:$src4)),
11189                   _.RC:$src1)),
11190            (!cast<Instruction>(Name#_.ZSuffix#rrik) _.RC:$src1, _.KRCWM:$mask,
11191             _.RC:$src2, _.RC:$src3, (VPTERNLOG213_imm8 timm:$src4))>;
11192
11193  // Additional patterns for matching zero masking with loads in other
11194  // positions.
11195  def : Pat<(_.VT (vselect_mask _.KRCWM:$mask,
11196                   (OpNode (bitconvert (_.LdFrag addr:$src3)),
11197                    _.RC:$src2, _.RC:$src1, (i8 timm:$src4)),
11198                   _.ImmAllZerosV)),
11199            (!cast<Instruction>(Name#_.ZSuffix#rmikz) _.RC:$src1, _.KRCWM:$mask,
11200             _.RC:$src2, addr:$src3, (VPTERNLOG321_imm8 timm:$src4))>;
11201  def : Pat<(_.VT (vselect_mask _.KRCWM:$mask,
11202                   (OpNode _.RC:$src1, (bitconvert (_.LdFrag addr:$src3)),
11203                    _.RC:$src2, (i8 timm:$src4)),
11204                   _.ImmAllZerosV)),
11205            (!cast<Instruction>(Name#_.ZSuffix#rmikz) _.RC:$src1, _.KRCWM:$mask,
11206             _.RC:$src2, addr:$src3, (VPTERNLOG132_imm8 timm:$src4))>;
11207
11208  // Additional patterns for matching masked loads with different
11209  // operand orders.
11210  def : Pat<(_.VT (vselect_mask _.KRCWM:$mask,
11211                   (OpNode _.RC:$src1, (bitconvert (_.LdFrag addr:$src3)),
11212                    _.RC:$src2, (i8 timm:$src4)),
11213                   _.RC:$src1)),
11214            (!cast<Instruction>(Name#_.ZSuffix#rmik) _.RC:$src1, _.KRCWM:$mask,
11215             _.RC:$src2, addr:$src3, (VPTERNLOG132_imm8 timm:$src4))>;
11216  def : Pat<(_.VT (vselect_mask _.KRCWM:$mask,
11217                   (OpNode (bitconvert (_.LdFrag addr:$src3)),
11218                    _.RC:$src2, _.RC:$src1, (i8 timm:$src4)),
11219                   _.RC:$src1)),
11220            (!cast<Instruction>(Name#_.ZSuffix#rmik) _.RC:$src1, _.KRCWM:$mask,
11221             _.RC:$src2, addr:$src3, (VPTERNLOG321_imm8 timm:$src4))>;
11222  def : Pat<(_.VT (vselect_mask _.KRCWM:$mask,
11223                   (OpNode _.RC:$src2, _.RC:$src1,
11224                    (bitconvert (_.LdFrag addr:$src3)), (i8 timm:$src4)),
11225                   _.RC:$src1)),
11226            (!cast<Instruction>(Name#_.ZSuffix#rmik) _.RC:$src1, _.KRCWM:$mask,
11227             _.RC:$src2, addr:$src3, (VPTERNLOG213_imm8 timm:$src4))>;
11228  def : Pat<(_.VT (vselect_mask _.KRCWM:$mask,
11229                   (OpNode _.RC:$src2, (bitconvert (_.LdFrag addr:$src3)),
11230                    _.RC:$src1, (i8 timm:$src4)),
11231                   _.RC:$src1)),
11232            (!cast<Instruction>(Name#_.ZSuffix#rmik) _.RC:$src1, _.KRCWM:$mask,
11233             _.RC:$src2, addr:$src3, (VPTERNLOG231_imm8 timm:$src4))>;
11234  def : Pat<(_.VT (vselect_mask _.KRCWM:$mask,
11235                   (OpNode (bitconvert (_.LdFrag addr:$src3)),
11236                    _.RC:$src1, _.RC:$src2, (i8 timm:$src4)),
11237                   _.RC:$src1)),
11238            (!cast<Instruction>(Name#_.ZSuffix#rmik) _.RC:$src1, _.KRCWM:$mask,
11239             _.RC:$src2, addr:$src3, (VPTERNLOG312_imm8 timm:$src4))>;
11240
11241  // Additional patterns for matching zero masking with broadcasts in other
11242  // positions.
11243  def : Pat<(_.VT (vselect_mask _.KRCWM:$mask,
11244                   (OpNode (_.BroadcastLdFrag addr:$src3),
11245                    _.RC:$src2, _.RC:$src1, (i8 timm:$src4)),
11246                   _.ImmAllZerosV)),
11247            (!cast<Instruction>(Name#_.ZSuffix#rmbikz) _.RC:$src1,
11248             _.KRCWM:$mask, _.RC:$src2, addr:$src3,
11249             (VPTERNLOG321_imm8 timm:$src4))>;
11250  def : Pat<(_.VT (vselect_mask _.KRCWM:$mask,
11251                   (OpNode _.RC:$src1,
11252                    (_.BroadcastLdFrag addr:$src3),
11253                    _.RC:$src2, (i8 timm:$src4)),
11254                   _.ImmAllZerosV)),
11255            (!cast<Instruction>(Name#_.ZSuffix#rmbikz) _.RC:$src1,
11256             _.KRCWM:$mask, _.RC:$src2, addr:$src3,
11257             (VPTERNLOG132_imm8 timm:$src4))>;
11258
11259  // Additional patterns for matching masked broadcasts with different
11260  // operand orders.
11261  def : Pat<(_.VT (vselect_mask _.KRCWM:$mask,
11262                   (OpNode _.RC:$src1, (_.BroadcastLdFrag addr:$src3),
11263                    _.RC:$src2, (i8 timm:$src4)),
11264                   _.RC:$src1)),
11265            (!cast<Instruction>(Name#_.ZSuffix#rmbik) _.RC:$src1, _.KRCWM:$mask,
11266             _.RC:$src2, addr:$src3, (VPTERNLOG132_imm8 timm:$src4))>;
11267  def : Pat<(_.VT (vselect_mask _.KRCWM:$mask,
11268                   (OpNode (_.BroadcastLdFrag addr:$src3),
11269                    _.RC:$src2, _.RC:$src1, (i8 timm:$src4)),
11270                   _.RC:$src1)),
11271            (!cast<Instruction>(Name#_.ZSuffix#rmbik) _.RC:$src1, _.KRCWM:$mask,
11272             _.RC:$src2, addr:$src3, (VPTERNLOG321_imm8 timm:$src4))>;
11273  def : Pat<(_.VT (vselect_mask _.KRCWM:$mask,
11274                   (OpNode _.RC:$src2, _.RC:$src1,
11275                    (_.BroadcastLdFrag addr:$src3),
11276                    (i8 timm:$src4)), _.RC:$src1)),
11277            (!cast<Instruction>(Name#_.ZSuffix#rmbik) _.RC:$src1, _.KRCWM:$mask,
11278             _.RC:$src2, addr:$src3, (VPTERNLOG213_imm8 timm:$src4))>;
11279  def : Pat<(_.VT (vselect_mask _.KRCWM:$mask,
11280                   (OpNode _.RC:$src2,
11281                    (_.BroadcastLdFrag addr:$src3),
11282                    _.RC:$src1, (i8 timm:$src4)),
11283                   _.RC:$src1)),
11284            (!cast<Instruction>(Name#_.ZSuffix#rmbik) _.RC:$src1, _.KRCWM:$mask,
11285             _.RC:$src2, addr:$src3, (VPTERNLOG231_imm8 timm:$src4))>;
11286  def : Pat<(_.VT (vselect_mask _.KRCWM:$mask,
11287                   (OpNode (_.BroadcastLdFrag addr:$src3),
11288                    _.RC:$src1, _.RC:$src2, (i8 timm:$src4)),
11289                   _.RC:$src1)),
11290            (!cast<Instruction>(Name#_.ZSuffix#rmbik) _.RC:$src1, _.KRCWM:$mask,
11291             _.RC:$src2, addr:$src3, (VPTERNLOG312_imm8 timm:$src4))>;
11292}
11293
11294multiclass avx512_common_ternlog<string OpcodeStr, X86SchedWriteWidths sched,
11295                                 AVX512VLVectorVTInfo _> {
11296  let Predicates = [HasAVX512] in
11297    defm Z    : avx512_ternlog<0x25, OpcodeStr, X86vpternlog, sched.ZMM,
11298                               _.info512, NAME>, EVEX_V512;
11299  let Predicates = [HasAVX512, HasVLX] in {
11300    defm Z128 : avx512_ternlog<0x25, OpcodeStr, X86vpternlog, sched.XMM,
11301                               _.info128, NAME>, EVEX_V128;
11302    defm Z256 : avx512_ternlog<0x25, OpcodeStr, X86vpternlog, sched.YMM,
11303                               _.info256, NAME>, EVEX_V256;
11304  }
11305}
11306
11307defm VPTERNLOGD : avx512_common_ternlog<"vpternlogd", SchedWriteVecALU,
11308                                        avx512vl_i32_info>;
11309defm VPTERNLOGQ : avx512_common_ternlog<"vpternlogq", SchedWriteVecALU,
11310                                        avx512vl_i64_info>, VEX_W;
11311
11312// Patterns to implement vnot using vpternlog instead of creating all ones
11313// using pcmpeq or vpternlog and then xoring with that. The value 15 is chosen
11314// so that the result is only dependent on src0. But we use the same source
11315// for all operands to prevent a false dependency.
11316// TODO: We should maybe have a more generalized algorithm for folding to
11317// vpternlog.
11318let Predicates = [HasAVX512] in {
11319  def : Pat<(xor VR512:$src, (v64i8 immAllOnesV)),
11320            (VPTERNLOGQZrri VR512:$src, VR512:$src, VR512:$src, (i8 15))>;
11321  def : Pat<(xor VR512:$src, (v32i16 immAllOnesV)),
11322            (VPTERNLOGQZrri VR512:$src, VR512:$src, VR512:$src, (i8 15))>;
11323  def : Pat<(xor VR512:$src, (v16i32 immAllOnesV)),
11324            (VPTERNLOGQZrri VR512:$src, VR512:$src, VR512:$src, (i8 15))>;
11325  def : Pat<(xor VR512:$src, (v8i64 immAllOnesV)),
11326            (VPTERNLOGQZrri VR512:$src, VR512:$src, VR512:$src, (i8 15))>;
11327}
11328
11329let Predicates = [HasAVX512, NoVLX] in {
11330  def : Pat<(xor VR128X:$src, (v16i8 immAllOnesV)),
11331            (EXTRACT_SUBREG
11332             (VPTERNLOGQZrri
11333              (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
11334              (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
11335              (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
11336              (i8 15)), sub_xmm)>;
11337  def : Pat<(xor VR128X:$src, (v8i16 immAllOnesV)),
11338            (EXTRACT_SUBREG
11339             (VPTERNLOGQZrri
11340              (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
11341              (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
11342              (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
11343              (i8 15)), sub_xmm)>;
11344  def : Pat<(xor VR128X:$src, (v4i32 immAllOnesV)),
11345            (EXTRACT_SUBREG
11346             (VPTERNLOGQZrri
11347              (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
11348              (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
11349              (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
11350              (i8 15)), sub_xmm)>;
11351  def : Pat<(xor VR128X:$src, (v2i64 immAllOnesV)),
11352            (EXTRACT_SUBREG
11353             (VPTERNLOGQZrri
11354              (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
11355              (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
11356              (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
11357              (i8 15)), sub_xmm)>;
11358
11359  def : Pat<(xor VR256X:$src, (v32i8 immAllOnesV)),
11360            (EXTRACT_SUBREG
11361             (VPTERNLOGQZrri
11362              (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src, sub_ymm),
11363              (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src, sub_ymm),
11364              (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src, sub_ymm),
11365              (i8 15)), sub_ymm)>;
11366  def : Pat<(xor VR256X:$src, (v16i16 immAllOnesV)),
11367            (EXTRACT_SUBREG
11368             (VPTERNLOGQZrri
11369              (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src, sub_ymm),
11370              (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src, sub_ymm),
11371              (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src, sub_ymm),
11372              (i8 15)), sub_ymm)>;
11373  def : Pat<(xor VR256X:$src, (v8i32 immAllOnesV)),
11374            (EXTRACT_SUBREG
11375             (VPTERNLOGQZrri
11376              (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src, sub_ymm),
11377              (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src, sub_ymm),
11378              (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src, sub_ymm),
11379              (i8 15)), sub_ymm)>;
11380  def : Pat<(xor VR256X:$src, (v4i64 immAllOnesV)),
11381            (EXTRACT_SUBREG
11382             (VPTERNLOGQZrri
11383              (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src, sub_ymm),
11384              (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src, sub_ymm),
11385              (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src, sub_ymm),
11386              (i8 15)), sub_ymm)>;
11387}
11388
11389let Predicates = [HasVLX] in {
11390  def : Pat<(xor VR128X:$src, (v16i8 immAllOnesV)),
11391            (VPTERNLOGQZ128rri VR128X:$src, VR128X:$src, VR128X:$src, (i8 15))>;
11392  def : Pat<(xor VR128X:$src, (v8i16 immAllOnesV)),
11393            (VPTERNLOGQZ128rri VR128X:$src, VR128X:$src, VR128X:$src, (i8 15))>;
11394  def : Pat<(xor VR128X:$src, (v4i32 immAllOnesV)),
11395            (VPTERNLOGQZ128rri VR128X:$src, VR128X:$src, VR128X:$src, (i8 15))>;
11396  def : Pat<(xor VR128X:$src, (v2i64 immAllOnesV)),
11397            (VPTERNLOGQZ128rri VR128X:$src, VR128X:$src, VR128X:$src, (i8 15))>;
11398
11399  def : Pat<(xor VR256X:$src, (v32i8 immAllOnesV)),
11400            (VPTERNLOGQZ256rri VR256X:$src, VR256X:$src, VR256X:$src, (i8 15))>;
11401  def : Pat<(xor VR256X:$src, (v16i16 immAllOnesV)),
11402            (VPTERNLOGQZ256rri VR256X:$src, VR256X:$src, VR256X:$src, (i8 15))>;
11403  def : Pat<(xor VR256X:$src, (v8i32 immAllOnesV)),
11404            (VPTERNLOGQZ256rri VR256X:$src, VR256X:$src, VR256X:$src, (i8 15))>;
11405  def : Pat<(xor VR256X:$src, (v4i64 immAllOnesV)),
11406            (VPTERNLOGQZ256rri VR256X:$src, VR256X:$src, VR256X:$src, (i8 15))>;
11407}
11408
11409//===----------------------------------------------------------------------===//
11410// AVX-512 - FixupImm
11411//===----------------------------------------------------------------------===//
11412
11413multiclass avx512_fixupimm_packed<bits<8> opc, string OpcodeStr,
11414                                  X86FoldableSchedWrite sched, X86VectorVTInfo _,
11415                                  X86VectorVTInfo TblVT>{
11416  let Constraints = "$src1 = $dst", ExeDomain = _.ExeDomain,
11417      Uses = [MXCSR], mayRaiseFPException = 1 in {
11418    defm rri : AVX512_maskable_3src<opc, MRMSrcReg, _, (outs _.RC:$dst),
11419                        (ins _.RC:$src2, _.RC:$src3, i32u8imm:$src4),
11420                         OpcodeStr#_.Suffix, "$src4, $src3, $src2", "$src2, $src3, $src4",
11421                        (X86VFixupimm (_.VT _.RC:$src1),
11422                                      (_.VT _.RC:$src2),
11423                                      (TblVT.VT _.RC:$src3),
11424                                      (i32 timm:$src4))>, Sched<[sched]>;
11425    defm rmi : AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst),
11426                      (ins _.RC:$src2, _.MemOp:$src3, i32u8imm:$src4),
11427                      OpcodeStr#_.Suffix, "$src4, $src3, $src2", "$src2, $src3, $src4",
11428                      (X86VFixupimm (_.VT _.RC:$src1),
11429                                    (_.VT _.RC:$src2),
11430                                    (TblVT.VT (bitconvert (TblVT.LdFrag addr:$src3))),
11431                                    (i32 timm:$src4))>,
11432                      Sched<[sched.Folded, sched.ReadAfterFold]>;
11433    defm rmbi : AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst),
11434                      (ins _.RC:$src2, _.ScalarMemOp:$src3, i32u8imm:$src4),
11435                    OpcodeStr#_.Suffix, "$src4, ${src3}"#_.BroadcastStr#", $src2",
11436                    "$src2, ${src3}"#_.BroadcastStr#", $src4",
11437                      (X86VFixupimm (_.VT _.RC:$src1),
11438                                    (_.VT _.RC:$src2),
11439                                    (TblVT.VT (TblVT.BroadcastLdFrag addr:$src3)),
11440                                    (i32 timm:$src4))>,
11441                    EVEX_B, Sched<[sched.Folded, sched.ReadAfterFold]>;
11442  } // Constraints = "$src1 = $dst"
11443}
11444
11445multiclass avx512_fixupimm_packed_sae<bits<8> opc, string OpcodeStr,
11446                                      X86FoldableSchedWrite sched,
11447                                      X86VectorVTInfo _, X86VectorVTInfo TblVT>
11448  : avx512_fixupimm_packed<opc, OpcodeStr, sched, _, TblVT> {
11449let Constraints = "$src1 = $dst", ExeDomain = _.ExeDomain, Uses = [MXCSR] in {
11450  defm rrib : AVX512_maskable_3src<opc, MRMSrcReg, _, (outs _.RC:$dst),
11451                      (ins _.RC:$src2, _.RC:$src3, i32u8imm:$src4),
11452                      OpcodeStr#_.Suffix, "$src4, {sae}, $src3, $src2",
11453                      "$src2, $src3, {sae}, $src4",
11454                      (X86VFixupimmSAE (_.VT _.RC:$src1),
11455                                       (_.VT _.RC:$src2),
11456                                       (TblVT.VT _.RC:$src3),
11457                                       (i32 timm:$src4))>,
11458                      EVEX_B, Sched<[sched]>;
11459  }
11460}
11461
11462multiclass avx512_fixupimm_scalar<bits<8> opc, string OpcodeStr,
11463                                  X86FoldableSchedWrite sched, X86VectorVTInfo _,
11464                                  X86VectorVTInfo _src3VT> {
11465  let Constraints = "$src1 = $dst" , Predicates = [HasAVX512],
11466      ExeDomain = _.ExeDomain in {
11467    defm rri : AVX512_maskable_3src_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
11468                      (ins _.RC:$src2, _.RC:$src3, i32u8imm:$src4),
11469                      OpcodeStr#_.Suffix, "$src4, $src3, $src2", "$src2, $src3, $src4",
11470                      (X86VFixupimms (_.VT _.RC:$src1),
11471                                     (_.VT _.RC:$src2),
11472                                     (_src3VT.VT _src3VT.RC:$src3),
11473                                     (i32 timm:$src4))>, Sched<[sched]>, SIMD_EXC;
11474    let Uses = [MXCSR] in
11475    defm rrib : AVX512_maskable_3src_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
11476                      (ins _.RC:$src2, _.RC:$src3, i32u8imm:$src4),
11477                      OpcodeStr#_.Suffix, "$src4, {sae}, $src3, $src2",
11478                      "$src2, $src3, {sae}, $src4",
11479                      (X86VFixupimmSAEs (_.VT _.RC:$src1),
11480                                        (_.VT _.RC:$src2),
11481                                        (_src3VT.VT _src3VT.RC:$src3),
11482                                        (i32 timm:$src4))>,
11483                      EVEX_B, Sched<[sched.Folded, sched.ReadAfterFold]>;
11484    defm rmi : AVX512_maskable_3src_scalar<opc, MRMSrcMem, _, (outs _.RC:$dst),
11485                     (ins _.RC:$src2, _.ScalarMemOp:$src3, i32u8imm:$src4),
11486                     OpcodeStr#_.Suffix, "$src4, $src3, $src2", "$src2, $src3, $src4",
11487                     (X86VFixupimms (_.VT _.RC:$src1),
11488                                    (_.VT _.RC:$src2),
11489                                    (_src3VT.VT (scalar_to_vector
11490                                              (_src3VT.ScalarLdFrag addr:$src3))),
11491                                    (i32 timm:$src4))>,
11492                     Sched<[sched.Folded, sched.ReadAfterFold]>, SIMD_EXC;
11493  }
11494}
11495
11496multiclass avx512_fixupimm_packed_all<X86SchedWriteWidths sched,
11497                                      AVX512VLVectorVTInfo _Vec,
11498                                      AVX512VLVectorVTInfo _Tbl> {
11499  let Predicates = [HasAVX512] in
11500    defm Z    : avx512_fixupimm_packed_sae<0x54, "vfixupimm", sched.ZMM,
11501                                _Vec.info512, _Tbl.info512>, AVX512AIi8Base,
11502                                EVEX_4V, EVEX_V512;
11503  let Predicates = [HasAVX512, HasVLX] in {
11504    defm Z128 : avx512_fixupimm_packed<0x54, "vfixupimm", sched.XMM,
11505                            _Vec.info128, _Tbl.info128>, AVX512AIi8Base,
11506                            EVEX_4V, EVEX_V128;
11507    defm Z256 : avx512_fixupimm_packed<0x54, "vfixupimm", sched.YMM,
11508                            _Vec.info256, _Tbl.info256>, AVX512AIi8Base,
11509                            EVEX_4V, EVEX_V256;
11510  }
11511}
11512
11513defm VFIXUPIMMSSZ : avx512_fixupimm_scalar<0x55, "vfixupimm",
11514                                           SchedWriteFAdd.Scl, f32x_info, v4i32x_info>,
11515                          AVX512AIi8Base, VEX_LIG, EVEX_4V, EVEX_CD8<32, CD8VT1>;
11516defm VFIXUPIMMSDZ : avx512_fixupimm_scalar<0x55, "vfixupimm",
11517                                           SchedWriteFAdd.Scl, f64x_info, v2i64x_info>,
11518                          AVX512AIi8Base, VEX_LIG, EVEX_4V, EVEX_CD8<64, CD8VT1>, VEX_W;
11519defm VFIXUPIMMPS : avx512_fixupimm_packed_all<SchedWriteFAdd, avx512vl_f32_info,
11520                         avx512vl_i32_info>, EVEX_CD8<32, CD8VF>;
11521defm VFIXUPIMMPD : avx512_fixupimm_packed_all<SchedWriteFAdd, avx512vl_f64_info,
11522                         avx512vl_i64_info>, EVEX_CD8<64, CD8VF>, VEX_W;
11523
11524// Patterns used to select SSE scalar fp arithmetic instructions from
11525// either:
11526//
11527// (1) a scalar fp operation followed by a blend
11528//
11529// The effect is that the backend no longer emits unnecessary vector
11530// insert instructions immediately after SSE scalar fp instructions
11531// like addss or mulss.
11532//
11533// For example, given the following code:
11534//   __m128 foo(__m128 A, __m128 B) {
11535//     A[0] += B[0];
11536//     return A;
11537//   }
11538//
11539// Previously we generated:
11540//   addss %xmm0, %xmm1
11541//   movss %xmm1, %xmm0
11542//
11543// We now generate:
11544//   addss %xmm1, %xmm0
11545//
11546// (2) a vector packed single/double fp operation followed by a vector insert
11547//
11548// The effect is that the backend converts the packed fp instruction
11549// followed by a vector insert into a single SSE scalar fp instruction.
11550//
11551// For example, given the following code:
11552//   __m128 foo(__m128 A, __m128 B) {
11553//     __m128 C = A + B;
11554//     return (__m128) {c[0], a[1], a[2], a[3]};
11555//   }
11556//
11557// Previously we generated:
11558//   addps %xmm0, %xmm1
11559//   movss %xmm1, %xmm0
11560//
11561// We now generate:
11562//   addss %xmm1, %xmm0
11563
11564// TODO: Some canonicalization in lowering would simplify the number of
11565// patterns we have to try to match.
11566multiclass AVX512_scalar_math_fp_patterns<SDNode Op, SDNode MaskedOp,
11567                                          string OpcPrefix, SDNode MoveNode,
11568                                          X86VectorVTInfo _, PatLeaf ZeroFP> {
11569  let Predicates = [HasAVX512] in {
11570    // extracted scalar math op with insert via movss
11571    def : Pat<(MoveNode
11572               (_.VT VR128X:$dst),
11573               (_.VT (scalar_to_vector
11574                      (Op (_.EltVT (extractelt (_.VT VR128X:$dst), (iPTR 0))),
11575                          _.FRC:$src)))),
11576              (!cast<Instruction>("V"#OpcPrefix#"Zrr_Int") _.VT:$dst,
11577               (_.VT (COPY_TO_REGCLASS _.FRC:$src, VR128X)))>;
11578    def : Pat<(MoveNode
11579               (_.VT VR128X:$dst),
11580               (_.VT (scalar_to_vector
11581                      (Op (_.EltVT (extractelt (_.VT VR128X:$dst), (iPTR 0))),
11582                          (_.ScalarLdFrag addr:$src))))),
11583              (!cast<Instruction>("V"#OpcPrefix#"Zrm_Int") _.VT:$dst, addr:$src)>;
11584
11585    // extracted masked scalar math op with insert via movss
11586    def : Pat<(MoveNode (_.VT VR128X:$src1),
11587               (scalar_to_vector
11588                (X86selects_mask VK1WM:$mask,
11589                            (MaskedOp (_.EltVT
11590                                       (extractelt (_.VT VR128X:$src1), (iPTR 0))),
11591                                      _.FRC:$src2),
11592                            _.FRC:$src0))),
11593              (!cast<Instruction>("V"#OpcPrefix#"Zrr_Intk")
11594               (_.VT (COPY_TO_REGCLASS _.FRC:$src0, VR128X)),
11595               VK1WM:$mask, _.VT:$src1,
11596               (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)))>;
11597    def : Pat<(MoveNode (_.VT VR128X:$src1),
11598               (scalar_to_vector
11599                (X86selects_mask VK1WM:$mask,
11600                            (MaskedOp (_.EltVT
11601                                       (extractelt (_.VT VR128X:$src1), (iPTR 0))),
11602                                      (_.ScalarLdFrag addr:$src2)),
11603                            _.FRC:$src0))),
11604              (!cast<Instruction>("V"#OpcPrefix#"Zrm_Intk")
11605               (_.VT (COPY_TO_REGCLASS _.FRC:$src0, VR128X)),
11606               VK1WM:$mask, _.VT:$src1, addr:$src2)>;
11607
11608    // extracted masked scalar math op with insert via movss
11609    def : Pat<(MoveNode (_.VT VR128X:$src1),
11610               (scalar_to_vector
11611                (X86selects_mask VK1WM:$mask,
11612                            (MaskedOp (_.EltVT
11613                                       (extractelt (_.VT VR128X:$src1), (iPTR 0))),
11614                                      _.FRC:$src2), (_.EltVT ZeroFP)))),
11615      (!cast<I>("V"#OpcPrefix#"Zrr_Intkz")
11616          VK1WM:$mask, _.VT:$src1,
11617          (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)))>;
11618    def : Pat<(MoveNode (_.VT VR128X:$src1),
11619               (scalar_to_vector
11620                (X86selects_mask VK1WM:$mask,
11621                            (MaskedOp (_.EltVT
11622                                       (extractelt (_.VT VR128X:$src1), (iPTR 0))),
11623                                      (_.ScalarLdFrag addr:$src2)), (_.EltVT ZeroFP)))),
11624      (!cast<I>("V"#OpcPrefix#"Zrm_Intkz") VK1WM:$mask, _.VT:$src1, addr:$src2)>;
11625  }
11626}
11627
11628defm : AVX512_scalar_math_fp_patterns<any_fadd, fadd, "ADDSS", X86Movss, v4f32x_info, fp32imm0>;
11629defm : AVX512_scalar_math_fp_patterns<any_fsub, fsub, "SUBSS", X86Movss, v4f32x_info, fp32imm0>;
11630defm : AVX512_scalar_math_fp_patterns<any_fmul, fmul, "MULSS", X86Movss, v4f32x_info, fp32imm0>;
11631defm : AVX512_scalar_math_fp_patterns<any_fdiv, fdiv, "DIVSS", X86Movss, v4f32x_info, fp32imm0>;
11632
11633defm : AVX512_scalar_math_fp_patterns<any_fadd, fadd, "ADDSD", X86Movsd, v2f64x_info, fp64imm0>;
11634defm : AVX512_scalar_math_fp_patterns<any_fsub, fsub, "SUBSD", X86Movsd, v2f64x_info, fp64imm0>;
11635defm : AVX512_scalar_math_fp_patterns<any_fmul, fmul, "MULSD", X86Movsd, v2f64x_info, fp64imm0>;
11636defm : AVX512_scalar_math_fp_patterns<any_fdiv, fdiv, "DIVSD", X86Movsd, v2f64x_info, fp64imm0>;
11637
11638multiclass AVX512_scalar_unary_math_patterns<SDNode OpNode, string OpcPrefix,
11639                                             SDNode Move, X86VectorVTInfo _> {
11640  let Predicates = [HasAVX512] in {
11641    def : Pat<(_.VT (Move _.VT:$dst,
11642                     (scalar_to_vector (OpNode (extractelt _.VT:$src, 0))))),
11643              (!cast<Instruction>("V"#OpcPrefix#"Zr_Int") _.VT:$dst, _.VT:$src)>;
11644  }
11645}
11646
11647defm : AVX512_scalar_unary_math_patterns<any_fsqrt, "SQRTSS", X86Movss, v4f32x_info>;
11648defm : AVX512_scalar_unary_math_patterns<any_fsqrt, "SQRTSD", X86Movsd, v2f64x_info>;
11649
11650//===----------------------------------------------------------------------===//
11651// AES instructions
11652//===----------------------------------------------------------------------===//
11653
11654multiclass avx512_vaes<bits<8> Op, string OpStr, string IntPrefix> {
11655  let Predicates = [HasVLX, HasVAES] in {
11656    defm Z128 : AESI_binop_rm_int<Op, OpStr,
11657                                  !cast<Intrinsic>(IntPrefix),
11658                                  loadv2i64, 0, VR128X, i128mem>,
11659                  EVEX_4V, EVEX_CD8<64, CD8VF>, EVEX_V128, VEX_WIG;
11660    defm Z256 : AESI_binop_rm_int<Op, OpStr,
11661                                  !cast<Intrinsic>(IntPrefix#"_256"),
11662                                  loadv4i64, 0, VR256X, i256mem>,
11663                  EVEX_4V, EVEX_CD8<64, CD8VF>, EVEX_V256, VEX_WIG;
11664    }
11665    let Predicates = [HasAVX512, HasVAES] in
11666    defm Z    : AESI_binop_rm_int<Op, OpStr,
11667                                  !cast<Intrinsic>(IntPrefix#"_512"),
11668                                  loadv8i64, 0, VR512, i512mem>,
11669                  EVEX_4V, EVEX_CD8<64, CD8VF>, EVEX_V512, VEX_WIG;
11670}
11671
11672defm VAESENC      : avx512_vaes<0xDC, "vaesenc", "int_x86_aesni_aesenc">;
11673defm VAESENCLAST  : avx512_vaes<0xDD, "vaesenclast", "int_x86_aesni_aesenclast">;
11674defm VAESDEC      : avx512_vaes<0xDE, "vaesdec", "int_x86_aesni_aesdec">;
11675defm VAESDECLAST  : avx512_vaes<0xDF, "vaesdeclast", "int_x86_aesni_aesdeclast">;
11676
11677//===----------------------------------------------------------------------===//
11678// PCLMUL instructions - Carry less multiplication
11679//===----------------------------------------------------------------------===//
11680
11681let Predicates = [HasAVX512, HasVPCLMULQDQ] in
11682defm VPCLMULQDQZ : vpclmulqdq<VR512, i512mem, loadv8i64, int_x86_pclmulqdq_512>,
11683                              EVEX_4V, EVEX_V512, EVEX_CD8<64, CD8VF>, VEX_WIG;
11684
11685let Predicates = [HasVLX, HasVPCLMULQDQ] in {
11686defm VPCLMULQDQZ128 : vpclmulqdq<VR128X, i128mem, loadv2i64, int_x86_pclmulqdq>,
11687                              EVEX_4V, EVEX_V128, EVEX_CD8<64, CD8VF>, VEX_WIG;
11688
11689defm VPCLMULQDQZ256: vpclmulqdq<VR256X, i256mem, loadv4i64,
11690                                int_x86_pclmulqdq_256>, EVEX_4V, EVEX_V256,
11691                                EVEX_CD8<64, CD8VF>, VEX_WIG;
11692}
11693
11694// Aliases
11695defm : vpclmulqdq_aliases<"VPCLMULQDQZ", VR512, i512mem>;
11696defm : vpclmulqdq_aliases<"VPCLMULQDQZ128", VR128X, i128mem>;
11697defm : vpclmulqdq_aliases<"VPCLMULQDQZ256", VR256X, i256mem>;
11698
11699//===----------------------------------------------------------------------===//
11700// VBMI2
11701//===----------------------------------------------------------------------===//
11702
11703multiclass VBMI2_shift_var_rm<bits<8> Op, string OpStr, SDNode OpNode,
11704                              X86FoldableSchedWrite sched, X86VectorVTInfo VTI> {
11705  let Constraints = "$src1 = $dst",
11706      ExeDomain   = VTI.ExeDomain in {
11707    defm r:   AVX512_maskable_3src<Op, MRMSrcReg, VTI, (outs VTI.RC:$dst),
11708                (ins VTI.RC:$src2, VTI.RC:$src3), OpStr,
11709                "$src3, $src2", "$src2, $src3",
11710                (VTI.VT (OpNode VTI.RC:$src1, VTI.RC:$src2, VTI.RC:$src3))>,
11711                AVX512FMA3Base, Sched<[sched]>;
11712    defm m:   AVX512_maskable_3src<Op, MRMSrcMem, VTI, (outs VTI.RC:$dst),
11713                (ins VTI.RC:$src2, VTI.MemOp:$src3), OpStr,
11714                "$src3, $src2", "$src2, $src3",
11715                (VTI.VT (OpNode VTI.RC:$src1, VTI.RC:$src2,
11716                        (VTI.VT (VTI.LdFrag addr:$src3))))>,
11717                AVX512FMA3Base,
11718                Sched<[sched.Folded, sched.ReadAfterFold]>;
11719  }
11720}
11721
11722multiclass VBMI2_shift_var_rmb<bits<8> Op, string OpStr, SDNode OpNode,
11723                               X86FoldableSchedWrite sched, X86VectorVTInfo VTI>
11724         : VBMI2_shift_var_rm<Op, OpStr, OpNode, sched, VTI> {
11725  let Constraints = "$src1 = $dst",
11726      ExeDomain   = VTI.ExeDomain in
11727  defm mb:  AVX512_maskable_3src<Op, MRMSrcMem, VTI, (outs VTI.RC:$dst),
11728              (ins VTI.RC:$src2, VTI.ScalarMemOp:$src3), OpStr,
11729              "${src3}"#VTI.BroadcastStr#", $src2",
11730              "$src2, ${src3}"#VTI.BroadcastStr,
11731              (OpNode VTI.RC:$src1, VTI.RC:$src2,
11732               (VTI.VT (VTI.BroadcastLdFrag addr:$src3)))>,
11733              AVX512FMA3Base, EVEX_B,
11734              Sched<[sched.Folded, sched.ReadAfterFold]>;
11735}
11736
11737multiclass VBMI2_shift_var_rm_common<bits<8> Op, string OpStr, SDNode OpNode,
11738                                     X86SchedWriteWidths sched, AVX512VLVectorVTInfo VTI> {
11739  let Predicates = [HasVBMI2] in
11740  defm Z      : VBMI2_shift_var_rm<Op, OpStr, OpNode, sched.ZMM, VTI.info512>,
11741                                   EVEX_V512;
11742  let Predicates = [HasVBMI2, HasVLX] in {
11743    defm Z256 : VBMI2_shift_var_rm<Op, OpStr, OpNode, sched.YMM, VTI.info256>,
11744                                   EVEX_V256;
11745    defm Z128 : VBMI2_shift_var_rm<Op, OpStr, OpNode, sched.XMM, VTI.info128>,
11746                                   EVEX_V128;
11747  }
11748}
11749
11750multiclass VBMI2_shift_var_rmb_common<bits<8> Op, string OpStr, SDNode OpNode,
11751                                      X86SchedWriteWidths sched, AVX512VLVectorVTInfo VTI> {
11752  let Predicates = [HasVBMI2] in
11753  defm Z      : VBMI2_shift_var_rmb<Op, OpStr, OpNode, sched.ZMM, VTI.info512>,
11754                                    EVEX_V512;
11755  let Predicates = [HasVBMI2, HasVLX] in {
11756    defm Z256 : VBMI2_shift_var_rmb<Op, OpStr, OpNode, sched.YMM, VTI.info256>,
11757                                    EVEX_V256;
11758    defm Z128 : VBMI2_shift_var_rmb<Op, OpStr, OpNode, sched.XMM, VTI.info128>,
11759                                    EVEX_V128;
11760  }
11761}
11762multiclass VBMI2_shift_var<bits<8> wOp, bits<8> dqOp, string Prefix,
11763                           SDNode OpNode, X86SchedWriteWidths sched> {
11764  defm W : VBMI2_shift_var_rm_common<wOp, Prefix#"w", OpNode, sched,
11765             avx512vl_i16_info>, VEX_W, EVEX_CD8<16, CD8VF>;
11766  defm D : VBMI2_shift_var_rmb_common<dqOp, Prefix#"d", OpNode, sched,
11767             avx512vl_i32_info>, EVEX_CD8<32, CD8VF>;
11768  defm Q : VBMI2_shift_var_rmb_common<dqOp, Prefix#"q", OpNode, sched,
11769             avx512vl_i64_info>, VEX_W, EVEX_CD8<64, CD8VF>;
11770}
11771
11772multiclass VBMI2_shift_imm<bits<8> wOp, bits<8> dqOp, string Prefix,
11773                           SDNode OpNode, X86SchedWriteWidths sched> {
11774  defm W : avx512_common_3Op_rm_imm8<wOp, OpNode, Prefix#"w", sched,
11775             avx512vl_i16_info, avx512vl_i16_info, HasVBMI2>,
11776             VEX_W, EVEX_CD8<16, CD8VF>;
11777  defm D : avx512_common_3Op_imm8<Prefix#"d", avx512vl_i32_info, dqOp,
11778             OpNode, sched, HasVBMI2>, AVX512AIi8Base, EVEX_4V, EVEX_CD8<32, CD8VF>;
11779  defm Q : avx512_common_3Op_imm8<Prefix#"q", avx512vl_i64_info, dqOp, OpNode,
11780             sched, HasVBMI2>, AVX512AIi8Base, EVEX_4V, EVEX_CD8<64, CD8VF>, VEX_W;
11781}
11782
11783// Concat & Shift
11784defm VPSHLDV : VBMI2_shift_var<0x70, 0x71, "vpshldv", X86VShldv, SchedWriteVecIMul>;
11785defm VPSHRDV : VBMI2_shift_var<0x72, 0x73, "vpshrdv", X86VShrdv, SchedWriteVecIMul>;
11786defm VPSHLD  : VBMI2_shift_imm<0x70, 0x71, "vpshld", X86VShld, SchedWriteVecIMul>;
11787defm VPSHRD  : VBMI2_shift_imm<0x72, 0x73, "vpshrd", X86VShrd, SchedWriteVecIMul>;
11788
11789// Compress
11790defm VPCOMPRESSB : compress_by_elt_width<0x63, "vpcompressb", WriteVarShuffle256,
11791                                         avx512vl_i8_info, HasVBMI2>, EVEX,
11792                                         NotMemoryFoldable;
11793defm VPCOMPRESSW : compress_by_elt_width <0x63, "vpcompressw", WriteVarShuffle256,
11794                                          avx512vl_i16_info, HasVBMI2>, EVEX, VEX_W,
11795                                          NotMemoryFoldable;
11796// Expand
11797defm VPEXPANDB : expand_by_elt_width <0x62, "vpexpandb", WriteVarShuffle256,
11798                                      avx512vl_i8_info, HasVBMI2>, EVEX;
11799defm VPEXPANDW : expand_by_elt_width <0x62, "vpexpandw", WriteVarShuffle256,
11800                                      avx512vl_i16_info, HasVBMI2>, EVEX, VEX_W;
11801
11802//===----------------------------------------------------------------------===//
11803// VNNI
11804//===----------------------------------------------------------------------===//
11805
11806let Constraints = "$src1 = $dst" in
11807multiclass VNNI_rmb<bits<8> Op, string OpStr, SDNode OpNode,
11808                    X86FoldableSchedWrite sched, X86VectorVTInfo VTI,
11809                    bit IsCommutable> {
11810  let ExeDomain = VTI.ExeDomain in {
11811  defm r  :   AVX512_maskable_3src<Op, MRMSrcReg, VTI, (outs VTI.RC:$dst),
11812                                   (ins VTI.RC:$src2, VTI.RC:$src3), OpStr,
11813                                   "$src3, $src2", "$src2, $src3",
11814                                   (VTI.VT (OpNode VTI.RC:$src1,
11815                                            VTI.RC:$src2, VTI.RC:$src3)),
11816                                   IsCommutable, IsCommutable>,
11817                                   EVEX_4V, T8PD, Sched<[sched]>;
11818  defm m  :   AVX512_maskable_3src<Op, MRMSrcMem, VTI, (outs VTI.RC:$dst),
11819                                   (ins VTI.RC:$src2, VTI.MemOp:$src3), OpStr,
11820                                   "$src3, $src2", "$src2, $src3",
11821                                   (VTI.VT (OpNode VTI.RC:$src1, VTI.RC:$src2,
11822                                            (VTI.VT (VTI.LdFrag addr:$src3))))>,
11823                                   EVEX_4V, EVEX_CD8<32, CD8VF>, T8PD,
11824                                   Sched<[sched.Folded, sched.ReadAfterFold]>;
11825  defm mb :   AVX512_maskable_3src<Op, MRMSrcMem, VTI, (outs VTI.RC:$dst),
11826                                   (ins VTI.RC:$src2, VTI.ScalarMemOp:$src3),
11827                                   OpStr, "${src3}"#VTI.BroadcastStr#", $src2",
11828                                   "$src2, ${src3}"#VTI.BroadcastStr,
11829                                   (OpNode VTI.RC:$src1, VTI.RC:$src2,
11830                                    (VTI.VT (VTI.BroadcastLdFrag addr:$src3)))>,
11831                                   EVEX_4V, EVEX_CD8<32, CD8VF>, EVEX_B,
11832                                   T8PD, Sched<[sched.Folded, sched.ReadAfterFold]>;
11833  }
11834}
11835
11836multiclass VNNI_common<bits<8> Op, string OpStr, SDNode OpNode,
11837                       X86SchedWriteWidths sched, bit IsCommutable> {
11838  let Predicates = [HasVNNI] in
11839  defm Z      :   VNNI_rmb<Op, OpStr, OpNode, sched.ZMM, v16i32_info,
11840                           IsCommutable>, EVEX_V512;
11841  let Predicates = [HasVNNI, HasVLX] in {
11842    defm Z256 :   VNNI_rmb<Op, OpStr, OpNode, sched.YMM, v8i32x_info,
11843                           IsCommutable>, EVEX_V256;
11844    defm Z128 :   VNNI_rmb<Op, OpStr, OpNode, sched.XMM, v4i32x_info,
11845                           IsCommutable>, EVEX_V128;
11846  }
11847}
11848
11849// FIXME: Is there a better scheduler class for VPDP?
11850defm VPDPBUSD   : VNNI_common<0x50, "vpdpbusd", X86Vpdpbusd, SchedWriteVecIMul, 0>;
11851defm VPDPBUSDS  : VNNI_common<0x51, "vpdpbusds", X86Vpdpbusds, SchedWriteVecIMul, 0>;
11852defm VPDPWSSD   : VNNI_common<0x52, "vpdpwssd", X86Vpdpwssd, SchedWriteVecIMul, 1>;
11853defm VPDPWSSDS  : VNNI_common<0x53, "vpdpwssds", X86Vpdpwssds, SchedWriteVecIMul, 1>;
11854
11855// Patterns to match VPDPWSSD from existing instructions/intrinsics.
11856let Predicates = [HasVNNI] in {
11857  def : Pat<(v16i32 (add VR512:$src1,
11858                         (X86vpmaddwd_su VR512:$src2, VR512:$src3))),
11859            (VPDPWSSDZr VR512:$src1, VR512:$src2, VR512:$src3)>;
11860  def : Pat<(v16i32 (add VR512:$src1,
11861                         (X86vpmaddwd_su VR512:$src2, (load addr:$src3)))),
11862            (VPDPWSSDZm VR512:$src1, VR512:$src2, addr:$src3)>;
11863}
11864let Predicates = [HasVNNI,HasVLX] in {
11865  def : Pat<(v8i32 (add VR256X:$src1,
11866                        (X86vpmaddwd_su VR256X:$src2, VR256X:$src3))),
11867            (VPDPWSSDZ256r VR256X:$src1, VR256X:$src2, VR256X:$src3)>;
11868  def : Pat<(v8i32 (add VR256X:$src1,
11869                        (X86vpmaddwd_su VR256X:$src2, (load addr:$src3)))),
11870            (VPDPWSSDZ256m VR256X:$src1, VR256X:$src2, addr:$src3)>;
11871  def : Pat<(v4i32 (add VR128X:$src1,
11872                        (X86vpmaddwd_su VR128X:$src2, VR128X:$src3))),
11873            (VPDPWSSDZ128r VR128X:$src1, VR128X:$src2, VR128X:$src3)>;
11874  def : Pat<(v4i32 (add VR128X:$src1,
11875                        (X86vpmaddwd_su VR128X:$src2, (load addr:$src3)))),
11876            (VPDPWSSDZ128m VR128X:$src1, VR128X:$src2, addr:$src3)>;
11877}
11878
11879//===----------------------------------------------------------------------===//
11880// Bit Algorithms
11881//===----------------------------------------------------------------------===//
11882
11883// FIXME: Is there a better scheduler class for VPOPCNTB/VPOPCNTW?
11884defm VPOPCNTB : avx512_unary_rm_vl<0x54, "vpopcntb", ctpop, SchedWriteVecALU,
11885                                   avx512vl_i8_info, HasBITALG>;
11886defm VPOPCNTW : avx512_unary_rm_vl<0x54, "vpopcntw", ctpop, SchedWriteVecALU,
11887                                   avx512vl_i16_info, HasBITALG>, VEX_W;
11888
11889defm : avx512_unary_lowering<"VPOPCNTB", ctpop, avx512vl_i8_info, HasBITALG>;
11890defm : avx512_unary_lowering<"VPOPCNTW", ctpop, avx512vl_i16_info, HasBITALG>;
11891
11892def X86Vpshufbitqmb_su : PatFrag<(ops node:$src1, node:$src2),
11893                                 (X86Vpshufbitqmb node:$src1, node:$src2), [{
11894  return N->hasOneUse();
11895}]>;
11896
11897multiclass VPSHUFBITQMB_rm<X86FoldableSchedWrite sched, X86VectorVTInfo VTI> {
11898  defm rr : AVX512_maskable_cmp<0x8F, MRMSrcReg, VTI, (outs VTI.KRC:$dst),
11899                                (ins VTI.RC:$src1, VTI.RC:$src2),
11900                                "vpshufbitqmb",
11901                                "$src2, $src1", "$src1, $src2",
11902                                (X86Vpshufbitqmb (VTI.VT VTI.RC:$src1),
11903                                (VTI.VT VTI.RC:$src2)),
11904                                (X86Vpshufbitqmb_su (VTI.VT VTI.RC:$src1),
11905                                (VTI.VT VTI.RC:$src2))>, EVEX_4V, T8PD,
11906                                Sched<[sched]>;
11907  defm rm : AVX512_maskable_cmp<0x8F, MRMSrcMem, VTI, (outs VTI.KRC:$dst),
11908                                (ins VTI.RC:$src1, VTI.MemOp:$src2),
11909                                "vpshufbitqmb",
11910                                "$src2, $src1", "$src1, $src2",
11911                                (X86Vpshufbitqmb (VTI.VT VTI.RC:$src1),
11912                                (VTI.VT (VTI.LdFrag addr:$src2))),
11913                                (X86Vpshufbitqmb_su (VTI.VT VTI.RC:$src1),
11914                                (VTI.VT (VTI.LdFrag addr:$src2)))>,
11915                                EVEX_4V, EVEX_CD8<8, CD8VF>, T8PD,
11916                                Sched<[sched.Folded, sched.ReadAfterFold]>;
11917}
11918
11919multiclass VPSHUFBITQMB_common<X86SchedWriteWidths sched, AVX512VLVectorVTInfo VTI> {
11920  let Predicates = [HasBITALG] in
11921  defm Z      : VPSHUFBITQMB_rm<sched.ZMM, VTI.info512>, EVEX_V512;
11922  let Predicates = [HasBITALG, HasVLX] in {
11923    defm Z256 : VPSHUFBITQMB_rm<sched.YMM, VTI.info256>, EVEX_V256;
11924    defm Z128 : VPSHUFBITQMB_rm<sched.XMM, VTI.info128>, EVEX_V128;
11925  }
11926}
11927
11928// FIXME: Is there a better scheduler class for VPSHUFBITQMB?
11929defm VPSHUFBITQMB : VPSHUFBITQMB_common<SchedWriteVecIMul, avx512vl_i8_info>;
11930
11931//===----------------------------------------------------------------------===//
11932// GFNI
11933//===----------------------------------------------------------------------===//
11934
11935multiclass GF2P8MULB_avx512_common<bits<8> Op, string OpStr, SDNode OpNode,
11936                                   X86SchedWriteWidths sched> {
11937  let Predicates = [HasGFNI, HasAVX512, HasBWI] in
11938  defm Z      : avx512_binop_rm<Op, OpStr, OpNode, v64i8_info, sched.ZMM, 1>,
11939                                EVEX_V512;
11940  let Predicates = [HasGFNI, HasVLX, HasBWI] in {
11941    defm Z256 : avx512_binop_rm<Op, OpStr, OpNode, v32i8x_info, sched.YMM, 1>,
11942                                EVEX_V256;
11943    defm Z128 : avx512_binop_rm<Op, OpStr, OpNode, v16i8x_info, sched.XMM, 1>,
11944                                EVEX_V128;
11945  }
11946}
11947
11948defm VGF2P8MULB : GF2P8MULB_avx512_common<0xCF, "vgf2p8mulb", X86GF2P8mulb,
11949                                          SchedWriteVecALU>,
11950                                          EVEX_CD8<8, CD8VF>, T8PD;
11951
11952multiclass GF2P8AFFINE_avx512_rmb_imm<bits<8> Op, string OpStr, SDNode OpNode,
11953                                      X86FoldableSchedWrite sched, X86VectorVTInfo VTI,
11954                                      X86VectorVTInfo BcstVTI>
11955           : avx512_3Op_rm_imm8<Op, OpStr, OpNode, sched, VTI, VTI> {
11956  let ExeDomain = VTI.ExeDomain in
11957  defm rmbi : AVX512_maskable<Op, MRMSrcMem, VTI, (outs VTI.RC:$dst),
11958                (ins VTI.RC:$src1, VTI.ScalarMemOp:$src2, u8imm:$src3),
11959                OpStr, "$src3, ${src2}"#BcstVTI.BroadcastStr#", $src1",
11960                "$src1, ${src2}"#BcstVTI.BroadcastStr#", $src3",
11961                (OpNode (VTI.VT VTI.RC:$src1),
11962                 (bitconvert (BcstVTI.VT (X86VBroadcastld64 addr:$src2))),
11963                 (i8 timm:$src3))>, EVEX_B,
11964                 Sched<[sched.Folded, sched.ReadAfterFold]>;
11965}
11966
11967multiclass GF2P8AFFINE_avx512_common<bits<8> Op, string OpStr, SDNode OpNode,
11968                                     X86SchedWriteWidths sched> {
11969  let Predicates = [HasGFNI, HasAVX512, HasBWI] in
11970  defm Z      : GF2P8AFFINE_avx512_rmb_imm<Op, OpStr, OpNode, sched.ZMM,
11971                                           v64i8_info, v8i64_info>, EVEX_V512;
11972  let Predicates = [HasGFNI, HasVLX, HasBWI] in {
11973    defm Z256 : GF2P8AFFINE_avx512_rmb_imm<Op, OpStr, OpNode, sched.YMM,
11974                                           v32i8x_info, v4i64x_info>, EVEX_V256;
11975    defm Z128 : GF2P8AFFINE_avx512_rmb_imm<Op, OpStr, OpNode, sched.XMM,
11976                                           v16i8x_info, v2i64x_info>, EVEX_V128;
11977  }
11978}
11979
11980defm VGF2P8AFFINEINVQB : GF2P8AFFINE_avx512_common<0xCF, "vgf2p8affineinvqb",
11981                         X86GF2P8affineinvqb, SchedWriteVecIMul>,
11982                         EVEX_4V, EVEX_CD8<8, CD8VF>, VEX_W, AVX512AIi8Base;
11983defm VGF2P8AFFINEQB    : GF2P8AFFINE_avx512_common<0xCE, "vgf2p8affineqb",
11984                         X86GF2P8affineqb, SchedWriteVecIMul>,
11985                         EVEX_4V, EVEX_CD8<8, CD8VF>, VEX_W, AVX512AIi8Base;
11986
11987
11988//===----------------------------------------------------------------------===//
11989// AVX5124FMAPS
11990//===----------------------------------------------------------------------===//
11991
11992let hasSideEffects = 0, mayLoad = 1, ExeDomain = SSEPackedSingle,
11993    Constraints = "$src1 = $dst", Uses = [MXCSR], mayRaiseFPException = 1 in {
11994defm V4FMADDPSrm : AVX512_maskable_3src_in_asm<0x9A, MRMSrcMem, v16f32_info,
11995                    (outs VR512:$dst), (ins VR512:$src2, f128mem:$src3),
11996                    "v4fmaddps", "$src3, $src2", "$src2, $src3",
11997                    []>, EVEX_V512, EVEX_4V, T8XD, EVEX_CD8<32, CD8VQ>,
11998                    Sched<[SchedWriteFMA.ZMM.Folded]>;
11999
12000defm V4FNMADDPSrm : AVX512_maskable_3src_in_asm<0xAA, MRMSrcMem, v16f32_info,
12001                     (outs VR512:$dst), (ins VR512:$src2, f128mem:$src3),
12002                     "v4fnmaddps", "$src3, $src2", "$src2, $src3",
12003                     []>, EVEX_V512, EVEX_4V, T8XD, EVEX_CD8<32, CD8VQ>,
12004                     Sched<[SchedWriteFMA.ZMM.Folded]>;
12005
12006defm V4FMADDSSrm : AVX512_maskable_3src_in_asm<0x9B, MRMSrcMem, f32x_info,
12007                    (outs VR128X:$dst), (ins  VR128X:$src2, f128mem:$src3),
12008                    "v4fmaddss", "$src3, $src2", "$src2, $src3",
12009                    []>, VEX_LIG, EVEX_4V, T8XD, EVEX_CD8<32, CD8VF>,
12010                    Sched<[SchedWriteFMA.Scl.Folded]>;
12011
12012defm V4FNMADDSSrm : AVX512_maskable_3src_in_asm<0xAB, MRMSrcMem, f32x_info,
12013                     (outs VR128X:$dst), (ins VR128X:$src2, f128mem:$src3),
12014                     "v4fnmaddss", "$src3, $src2", "$src2, $src3",
12015                     []>, VEX_LIG, EVEX_4V, T8XD, EVEX_CD8<32, CD8VF>,
12016                     Sched<[SchedWriteFMA.Scl.Folded]>;
12017}
12018
12019//===----------------------------------------------------------------------===//
12020// AVX5124VNNIW
12021//===----------------------------------------------------------------------===//
12022
12023let hasSideEffects = 0, mayLoad = 1, ExeDomain = SSEPackedInt,
12024    Constraints = "$src1 = $dst" in {
12025defm VP4DPWSSDrm : AVX512_maskable_3src_in_asm<0x52, MRMSrcMem, v16i32_info,
12026                    (outs VR512:$dst), (ins VR512:$src2, f128mem:$src3),
12027                     "vp4dpwssd", "$src3, $src2", "$src2, $src3",
12028                    []>, EVEX_V512, EVEX_4V, T8XD, EVEX_CD8<32, CD8VQ>,
12029                    Sched<[SchedWriteFMA.ZMM.Folded]>;
12030
12031defm VP4DPWSSDSrm : AVX512_maskable_3src_in_asm<0x53, MRMSrcMem, v16i32_info,
12032                     (outs VR512:$dst), (ins VR512:$src2, f128mem:$src3),
12033                     "vp4dpwssds", "$src3, $src2", "$src2, $src3",
12034                     []>, EVEX_V512, EVEX_4V, T8XD, EVEX_CD8<32, CD8VQ>,
12035                     Sched<[SchedWriteFMA.ZMM.Folded]>;
12036}
12037
12038let hasSideEffects = 0 in {
12039  let mayStore = 1, SchedRW = [WriteFStoreX] in
12040  def MASKPAIR16STORE : PseudoI<(outs), (ins anymem:$dst, VK16PAIR:$src), []>;
12041  let mayLoad = 1, SchedRW = [WriteFLoadX] in
12042  def MASKPAIR16LOAD : PseudoI<(outs VK16PAIR:$dst), (ins anymem:$src), []>;
12043}
12044
12045//===----------------------------------------------------------------------===//
12046// VP2INTERSECT
12047//===----------------------------------------------------------------------===//
12048
12049multiclass avx512_vp2intersect_modes<X86FoldableSchedWrite sched, X86VectorVTInfo _> {
12050  def rr : I<0x68, MRMSrcReg,
12051                  (outs _.KRPC:$dst),
12052                  (ins _.RC:$src1, _.RC:$src2),
12053                  !strconcat("vp2intersect", _.Suffix,
12054                             "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
12055                  [(set _.KRPC:$dst, (X86vp2intersect
12056                            _.RC:$src1, (_.VT _.RC:$src2)))]>,
12057                  EVEX_4V, T8XD, Sched<[sched]>;
12058
12059  def rm : I<0x68, MRMSrcMem,
12060                  (outs _.KRPC:$dst),
12061                  (ins  _.RC:$src1, _.MemOp:$src2),
12062                  !strconcat("vp2intersect", _.Suffix,
12063                             "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
12064                  [(set _.KRPC:$dst, (X86vp2intersect
12065                            _.RC:$src1, (_.VT (bitconvert (_.LdFrag addr:$src2)))))]>,
12066                  EVEX_4V, T8XD, EVEX_CD8<_.EltSize, CD8VF>,
12067                  Sched<[sched.Folded, sched.ReadAfterFold]>;
12068
12069  def rmb : I<0x68, MRMSrcMem,
12070                  (outs _.KRPC:$dst),
12071                  (ins _.RC:$src1, _.ScalarMemOp:$src2),
12072                  !strconcat("vp2intersect", _.Suffix, "\t{${src2}", _.BroadcastStr,
12073                             ", $src1, $dst|$dst, $src1, ${src2}", _.BroadcastStr ,"}"),
12074                  [(set _.KRPC:$dst, (X86vp2intersect
12075                             _.RC:$src1, (_.VT (_.BroadcastLdFrag addr:$src2))))]>,
12076                  EVEX_4V, T8XD, EVEX_B, EVEX_CD8<_.EltSize, CD8VF>,
12077                  Sched<[sched.Folded, sched.ReadAfterFold]>;
12078}
12079
12080multiclass avx512_vp2intersect<X86SchedWriteWidths sched, AVX512VLVectorVTInfo _> {
12081  let Predicates  = [HasAVX512, HasVP2INTERSECT] in
12082    defm Z : avx512_vp2intersect_modes<sched.ZMM, _.info512>, EVEX_V512;
12083
12084  let Predicates = [HasAVX512, HasVP2INTERSECT, HasVLX] in {
12085    defm Z256 : avx512_vp2intersect_modes<sched.YMM, _.info256>, EVEX_V256;
12086    defm Z128 : avx512_vp2intersect_modes<sched.XMM, _.info128>, EVEX_V128;
12087  }
12088}
12089
12090defm VP2INTERSECTD : avx512_vp2intersect<SchedWriteVecALU, avx512vl_i32_info>;
12091defm VP2INTERSECTQ : avx512_vp2intersect<SchedWriteVecALU, avx512vl_i64_info>, VEX_W;
12092
12093multiclass avx512_binop_all2<bits<8> opc, string OpcodeStr,
12094                             X86SchedWriteWidths sched,
12095                             AVX512VLVectorVTInfo _SrcVTInfo,
12096                             AVX512VLVectorVTInfo _DstVTInfo,
12097                             SDNode OpNode, Predicate prd,
12098                             bit IsCommutable = 0> {
12099  let Predicates = [prd] in
12100    defm NAME#Z : avx512_binop_rm2<opc, OpcodeStr, sched.ZMM, OpNode,
12101                                   _SrcVTInfo.info512, _DstVTInfo.info512,
12102                                   _SrcVTInfo.info512, IsCommutable>,
12103                                   EVEX_V512, EVEX_CD8<32, CD8VF>;
12104  let Predicates = [HasVLX, prd] in {
12105    defm NAME#Z256 : avx512_binop_rm2<opc, OpcodeStr, sched.YMM, OpNode,
12106                                      _SrcVTInfo.info256, _DstVTInfo.info256,
12107                                      _SrcVTInfo.info256, IsCommutable>,
12108                                     EVEX_V256, EVEX_CD8<32, CD8VF>;
12109    defm NAME#Z128 : avx512_binop_rm2<opc, OpcodeStr, sched.XMM, OpNode,
12110                                      _SrcVTInfo.info128, _DstVTInfo.info128,
12111                                      _SrcVTInfo.info128, IsCommutable>,
12112                                      EVEX_V128, EVEX_CD8<32, CD8VF>;
12113  }
12114}
12115
12116let ExeDomain = SSEPackedSingle in
12117defm VCVTNE2PS2BF16 : avx512_binop_all2<0x72, "vcvtne2ps2bf16",
12118                                        SchedWriteCvtPD2PS, //FIXME: Should be SchedWriteCvtPS2BF
12119                                        avx512vl_f32_info, avx512vl_i16_info,
12120                                        X86cvtne2ps2bf16, HasBF16, 0>, T8XD;
12121
12122// Truncate Float to BFloat16
12123multiclass avx512_cvtps2bf16<bits<8> opc, string OpcodeStr,
12124                             X86SchedWriteWidths sched> {
12125  let ExeDomain = SSEPackedSingle in {
12126  let Predicates = [HasBF16], Uses = []<Register>, mayRaiseFPException = 0 in {
12127    defm Z : avx512_vcvt_fp<opc, OpcodeStr, v16i16x_info, v16f32_info,
12128                            X86cvtneps2bf16, X86cvtneps2bf16, sched.ZMM>, EVEX_V512;
12129  }
12130  let Predicates = [HasBF16, HasVLX] in {
12131    let Uses = []<Register>, mayRaiseFPException = 0 in {
12132    defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v8i16x_info, v4f32x_info,
12133                               null_frag, null_frag, sched.XMM, "{1to4}", "{x}", f128mem,
12134                               VK4WM>, EVEX_V128;
12135    defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v8i16x_info, v8f32x_info,
12136                               X86cvtneps2bf16, X86cvtneps2bf16,
12137                               sched.YMM, "{1to8}", "{y}">, EVEX_V256;
12138    }
12139  } // Predicates = [HasBF16, HasVLX]
12140  } // ExeDomain = SSEPackedSingle
12141
12142  def : InstAlias<OpcodeStr#"x\t{$src, $dst|$dst, $src}",
12143                  (!cast<Instruction>(NAME # "Z128rr") VR128X:$dst,
12144                  VR128X:$src), 0>;
12145  def : InstAlias<OpcodeStr#"x\t{$src, $dst|$dst, $src}",
12146                  (!cast<Instruction>(NAME # "Z128rm") VR128X:$dst,
12147                  f128mem:$src), 0, "intel">;
12148  def : InstAlias<OpcodeStr#"y\t{$src, $dst|$dst, $src}",
12149                  (!cast<Instruction>(NAME # "Z256rr") VR128X:$dst,
12150                  VR256X:$src), 0>;
12151  def : InstAlias<OpcodeStr#"y\t{$src, $dst|$dst, $src}",
12152                  (!cast<Instruction>(NAME # "Z256rm") VR128X:$dst,
12153                  f256mem:$src), 0, "intel">;
12154}
12155
12156defm VCVTNEPS2BF16 : avx512_cvtps2bf16<0x72, "vcvtneps2bf16",
12157                                       SchedWriteCvtPD2PS>, T8XS,
12158                                       EVEX_CD8<32, CD8VF>;
12159
12160let Predicates = [HasBF16, HasVLX] in {
12161  // Special patterns to allow use of X86mcvtneps2bf16 for masking. Instruction
12162  // patterns have been disabled with null_frag.
12163  def : Pat<(v8i16 (X86cvtneps2bf16 (v4f32 VR128X:$src))),
12164            (VCVTNEPS2BF16Z128rr VR128X:$src)>;
12165  def : Pat<(X86mcvtneps2bf16 (v4f32 VR128X:$src), (v8i16 VR128X:$src0),
12166                              VK4WM:$mask),
12167            (VCVTNEPS2BF16Z128rrk VR128X:$src0, VK4WM:$mask, VR128X:$src)>;
12168  def : Pat<(X86mcvtneps2bf16 (v4f32 VR128X:$src), v8i16x_info.ImmAllZerosV,
12169                              VK4WM:$mask),
12170            (VCVTNEPS2BF16Z128rrkz VK4WM:$mask, VR128X:$src)>;
12171
12172  def : Pat<(v8i16 (X86cvtneps2bf16 (loadv4f32 addr:$src))),
12173            (VCVTNEPS2BF16Z128rm addr:$src)>;
12174  def : Pat<(X86mcvtneps2bf16 (loadv4f32 addr:$src), (v8i16 VR128X:$src0),
12175                              VK4WM:$mask),
12176            (VCVTNEPS2BF16Z128rmk VR128X:$src0, VK4WM:$mask, addr:$src)>;
12177  def : Pat<(X86mcvtneps2bf16 (loadv4f32 addr:$src), v8i16x_info.ImmAllZerosV,
12178                              VK4WM:$mask),
12179            (VCVTNEPS2BF16Z128rmkz VK4WM:$mask, addr:$src)>;
12180
12181  def : Pat<(v8i16 (X86cvtneps2bf16 (v4f32
12182                                     (X86VBroadcastld32 addr:$src)))),
12183            (VCVTNEPS2BF16Z128rmb addr:$src)>;
12184  def : Pat<(X86mcvtneps2bf16 (v4f32 (X86VBroadcastld32 addr:$src)),
12185                              (v8i16 VR128X:$src0), VK4WM:$mask),
12186            (VCVTNEPS2BF16Z128rmbk VR128X:$src0, VK4WM:$mask, addr:$src)>;
12187  def : Pat<(X86mcvtneps2bf16 (v4f32 (X86VBroadcastld32 addr:$src)),
12188                              v8i16x_info.ImmAllZerosV, VK4WM:$mask),
12189            (VCVTNEPS2BF16Z128rmbkz VK4WM:$mask, addr:$src)>;
12190}
12191
12192let Constraints = "$src1 = $dst" in {
12193multiclass avx512_dpbf16ps_rm<bits<8> opc, string OpcodeStr, SDNode OpNode,
12194                              X86FoldableSchedWrite sched,
12195                              X86VectorVTInfo _, X86VectorVTInfo src_v> {
12196  defm r: AVX512_maskable_3src<opc, MRMSrcReg, _, (outs _.RC:$dst),
12197                           (ins src_v.RC:$src2, src_v.RC:$src3),
12198                           OpcodeStr, "$src3, $src2", "$src2, $src3",
12199                           (_.VT (OpNode _.RC:$src1, src_v.RC:$src2, src_v.RC:$src3))>,
12200                           EVEX_4V, Sched<[sched]>;
12201
12202  defm m: AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst),
12203                               (ins src_v.RC:$src2, src_v.MemOp:$src3),
12204                               OpcodeStr, "$src3, $src2", "$src2, $src3",
12205                               (_.VT (OpNode _.RC:$src1, src_v.RC:$src2,
12206                               (src_v.LdFrag addr:$src3)))>, EVEX_4V,
12207                               Sched<[sched.Folded, sched.ReadAfterFold]>;
12208
12209  defm mb: AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst),
12210                  (ins src_v.RC:$src2, src_v.ScalarMemOp:$src3),
12211                  OpcodeStr,
12212                  !strconcat("${src3}", _.BroadcastStr,", $src2"),
12213                  !strconcat("$src2, ${src3}", _.BroadcastStr),
12214                  (_.VT (OpNode _.RC:$src1, src_v.RC:$src2,
12215                  (src_v.VT (src_v.BroadcastLdFrag addr:$src3))))>,
12216                  EVEX_B, EVEX_4V, Sched<[sched.Folded, sched.ReadAfterFold]>;
12217
12218}
12219} // Constraints = "$src1 = $dst"
12220
12221multiclass avx512_dpbf16ps_sizes<bits<8> opc, string OpcodeStr, SDNode OpNode,
12222                                 X86SchedWriteWidths sched, AVX512VLVectorVTInfo _,
12223                                 AVX512VLVectorVTInfo src_v, Predicate prd> {
12224  let Predicates = [prd] in {
12225    defm Z    : avx512_dpbf16ps_rm<opc, OpcodeStr, OpNode, sched.ZMM, _.info512,
12226                                   src_v.info512>, EVEX_V512;
12227  }
12228  let Predicates = [HasVLX, prd] in {
12229    defm Z256 : avx512_dpbf16ps_rm<opc, OpcodeStr, OpNode, sched.YMM, _.info256,
12230                                   src_v.info256>, EVEX_V256;
12231    defm Z128 : avx512_dpbf16ps_rm<opc, OpcodeStr, OpNode, sched.XMM, _.info128,
12232                                   src_v.info128>, EVEX_V128;
12233  }
12234}
12235
12236let ExeDomain = SSEPackedSingle in
12237defm VDPBF16PS : avx512_dpbf16ps_sizes<0x52, "vdpbf16ps", X86dpbf16ps, SchedWriteFMA,
12238                                       avx512vl_f32_info, avx512vl_i32_info,
12239                                       HasBF16>, T8XS, EVEX_CD8<32, CD8VF>;
12240