xref: /freebsd/contrib/llvm-project/llvm/lib/Target/X86/X86InstrAVX512.td (revision 924226fba12cc9a228c73b956e1b7fa24c60b055)
1//===-- X86InstrAVX512.td - AVX512 Instruction Set ---------*- tablegen -*-===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file describes the X86 AVX512 instruction set, defining the
10// instructions, and properties of the instructions which are needed for code
11// generation, machine code emission, and analysis.
12//
13//===----------------------------------------------------------------------===//
14
15// Group template arguments that can be derived from the vector type (EltNum x
16// EltVT).  These are things like the register class for the writemask, etc.
17// The idea is to pass one of these as the template argument rather than the
18// individual arguments.
19// The template is also used for scalar types, in this case numelts is 1.
20class X86VectorVTInfo<int numelts, ValueType eltvt, RegisterClass rc,
21                      string suffix = ""> {
22  RegisterClass RC = rc;
23  ValueType EltVT = eltvt;
24  int NumElts = numelts;
25
26  // Corresponding mask register class.
27  RegisterClass KRC = !cast<RegisterClass>("VK" # NumElts);
28
29  // Corresponding mask register pair class.
30  RegisterOperand KRPC = !if (!gt(NumElts, 16), ?,
31                              !cast<RegisterOperand>("VK" # NumElts # "Pair"));
32
33  // Corresponding write-mask register class.
34  RegisterClass KRCWM = !cast<RegisterClass>("VK" # NumElts # "WM");
35
36  // The mask VT.
37  ValueType KVT = !cast<ValueType>("v" # NumElts # "i1");
38
39  // Suffix used in the instruction mnemonic.
40  string Suffix = suffix;
41
42  // VTName is a string name for vector VT. For vector types it will be
43  // v # NumElts # EltVT, so for vector of 8 elements of i32 it will be v8i32
44  // It is a little bit complex for scalar types, where NumElts = 1.
45  // In this case we build v4f32 or v2f64
46  string VTName = "v" # !if (!eq (NumElts, 1),
47                        !if (!eq (EltVT.Size, 16), 8,
48                        !if (!eq (EltVT.Size, 32), 4,
49                        !if (!eq (EltVT.Size, 64), 2, NumElts))), NumElts) # EltVT;
50
51  // The vector VT.
52  ValueType VT = !cast<ValueType>(VTName);
53
54  string EltTypeName = !cast<string>(EltVT);
55  // Size of the element type in bits, e.g. 32 for v16i32.
56  string EltSizeName = !subst("i", "", !subst("f", "", EltTypeName));
57  int EltSize = EltVT.Size;
58
59  // "i" for integer types and "f" for floating-point types
60  string TypeVariantName = !subst(EltSizeName, "", EltTypeName);
61
62  // Size of RC in bits, e.g. 512 for VR512.
63  int Size = VT.Size;
64
65  // The corresponding memory operand, e.g. i512mem for VR512.
66  X86MemOperand MemOp = !cast<X86MemOperand>(TypeVariantName # Size # "mem");
67  X86MemOperand ScalarMemOp = !cast<X86MemOperand>(EltVT # "mem");
68  // FP scalar memory operand for intrinsics - ssmem/sdmem.
69  Operand IntScalarMemOp = !if (!eq (EltTypeName, "f16"), !cast<Operand>("shmem"),
70                           !if (!eq (EltTypeName, "f32"), !cast<Operand>("ssmem"),
71                           !if (!eq (EltTypeName, "f64"), !cast<Operand>("sdmem"), ?)));
72
73  // Load patterns
74  PatFrag LdFrag = !cast<PatFrag>("load" # VTName);
75
76  PatFrag AlignedLdFrag = !cast<PatFrag>("alignedload" # VTName);
77
78  PatFrag ScalarLdFrag = !cast<PatFrag>("load" # EltVT);
79  PatFrag BroadcastLdFrag = !cast<PatFrag>("X86VBroadcastld" # EltSizeName);
80
81  PatFrags ScalarIntMemFrags = !if (!eq (EltTypeName, "f16"), !cast<PatFrags>("sse_load_f16"),
82                               !if (!eq (EltTypeName, "f32"), !cast<PatFrags>("sse_load_f32"),
83                               !if (!eq (EltTypeName, "f64"), !cast<PatFrags>("sse_load_f64"), ?)));
84
85  // The string to specify embedded broadcast in assembly.
86  string BroadcastStr = "{1to" # NumElts # "}";
87
88  // 8-bit compressed displacement tuple/subvector format.  This is only
89  // defined for NumElts <= 8.
90  CD8VForm CD8TupleForm = !if (!eq (!srl(NumElts, 4), 0),
91                               !cast<CD8VForm>("CD8VT" # NumElts), ?);
92
93  SubRegIndex SubRegIdx = !if (!eq (Size, 128), sub_xmm,
94                          !if (!eq (Size, 256), sub_ymm, ?));
95
96  Domain ExeDomain = !if (!eq (EltTypeName, "f32"), SSEPackedSingle,
97                     !if (!eq (EltTypeName, "f64"), SSEPackedDouble,
98                     !if (!eq (EltTypeName, "f16"), SSEPackedSingle, // FIXME?
99                     SSEPackedInt)));
100
101  RegisterClass FRC = !if (!eq (EltTypeName, "f32"), FR32X,
102                      !if (!eq (EltTypeName, "f16"), FR16X,
103                      FR64X));
104
105  dag ImmAllZerosV = (VT immAllZerosV);
106
107  string ZSuffix = !if (!eq (Size, 128), "Z128",
108                   !if (!eq (Size, 256), "Z256", "Z"));
109}
110
111def v64i8_info  : X86VectorVTInfo<64,  i8, VR512, "b">;
112def v32i16_info : X86VectorVTInfo<32, i16, VR512, "w">;
113def v16i32_info : X86VectorVTInfo<16, i32, VR512, "d">;
114def v8i64_info  : X86VectorVTInfo<8,  i64, VR512, "q">;
115def v32f16_info : X86VectorVTInfo<32, f16, VR512, "ph">;
116def v16f32_info : X86VectorVTInfo<16, f32, VR512, "ps">;
117def v8f64_info  : X86VectorVTInfo<8,  f64, VR512, "pd">;
118
119// "x" in v32i8x_info means RC = VR256X
120def v32i8x_info  : X86VectorVTInfo<32,  i8, VR256X, "b">;
121def v16i16x_info : X86VectorVTInfo<16, i16, VR256X, "w">;
122def v8i32x_info  : X86VectorVTInfo<8,  i32, VR256X, "d">;
123def v4i64x_info  : X86VectorVTInfo<4,  i64, VR256X, "q">;
124def v16f16x_info : X86VectorVTInfo<16, f16, VR256X, "ph">;
125def v8f32x_info  : X86VectorVTInfo<8,  f32, VR256X, "ps">;
126def v4f64x_info  : X86VectorVTInfo<4,  f64, VR256X, "pd">;
127
128def v16i8x_info  : X86VectorVTInfo<16,  i8, VR128X, "b">;
129def v8i16x_info  : X86VectorVTInfo<8,  i16, VR128X, "w">;
130def v4i32x_info  : X86VectorVTInfo<4,  i32, VR128X, "d">;
131def v2i64x_info  : X86VectorVTInfo<2,  i64, VR128X, "q">;
132def v8f16x_info  : X86VectorVTInfo<8,  f16, VR128X, "ph">;
133def v4f32x_info  : X86VectorVTInfo<4,  f32, VR128X, "ps">;
134def v2f64x_info  : X86VectorVTInfo<2,  f64, VR128X, "pd">;
135
136// We map scalar types to the smallest (128-bit) vector type
137// with the appropriate element type. This allows to use the same masking logic.
138def i32x_info    : X86VectorVTInfo<1,  i32, GR32, "si">;
139def i64x_info    : X86VectorVTInfo<1,  i64, GR64, "sq">;
140def f16x_info    : X86VectorVTInfo<1,  f16, VR128X, "sh">;
141def f32x_info    : X86VectorVTInfo<1,  f32, VR128X, "ss">;
142def f64x_info    : X86VectorVTInfo<1,  f64, VR128X, "sd">;
143
144class AVX512VLVectorVTInfo<X86VectorVTInfo i512, X86VectorVTInfo i256,
145                           X86VectorVTInfo i128> {
146  X86VectorVTInfo info512 = i512;
147  X86VectorVTInfo info256 = i256;
148  X86VectorVTInfo info128 = i128;
149}
150
151def avx512vl_i8_info  : AVX512VLVectorVTInfo<v64i8_info, v32i8x_info,
152                                             v16i8x_info>;
153def avx512vl_i16_info : AVX512VLVectorVTInfo<v32i16_info, v16i16x_info,
154                                             v8i16x_info>;
155def avx512vl_i32_info : AVX512VLVectorVTInfo<v16i32_info, v8i32x_info,
156                                             v4i32x_info>;
157def avx512vl_i64_info : AVX512VLVectorVTInfo<v8i64_info, v4i64x_info,
158                                             v2i64x_info>;
159def avx512vl_f16_info : AVX512VLVectorVTInfo<v32f16_info, v16f16x_info,
160                                             v8f16x_info>;
161def avx512vl_f32_info : AVX512VLVectorVTInfo<v16f32_info, v8f32x_info,
162                                             v4f32x_info>;
163def avx512vl_f64_info : AVX512VLVectorVTInfo<v8f64_info, v4f64x_info,
164                                             v2f64x_info>;
165
166class X86KVectorVTInfo<RegisterClass _krc, RegisterClass _krcwm,
167                       ValueType _vt> {
168  RegisterClass KRC = _krc;
169  RegisterClass KRCWM = _krcwm;
170  ValueType KVT = _vt;
171}
172
173def v1i1_info : X86KVectorVTInfo<VK1, VK1WM, v1i1>;
174def v2i1_info : X86KVectorVTInfo<VK2, VK2WM, v2i1>;
175def v4i1_info : X86KVectorVTInfo<VK4, VK4WM, v4i1>;
176def v8i1_info : X86KVectorVTInfo<VK8, VK8WM, v8i1>;
177def v16i1_info : X86KVectorVTInfo<VK16, VK16WM, v16i1>;
178def v32i1_info : X86KVectorVTInfo<VK32, VK32WM, v32i1>;
179def v64i1_info : X86KVectorVTInfo<VK64, VK64WM, v64i1>;
180
181// Used for matching masked operations. Ensures the operation part only has a
182// single use.
183def vselect_mask : PatFrag<(ops node:$mask, node:$src1, node:$src2),
184                           (vselect node:$mask, node:$src1, node:$src2), [{
185  return isProfitableToFormMaskedOp(N);
186}]>;
187
188def X86selects_mask : PatFrag<(ops node:$mask, node:$src1, node:$src2),
189                              (X86selects node:$mask, node:$src1, node:$src2), [{
190  return isProfitableToFormMaskedOp(N);
191}]>;
192
193// This multiclass generates the masking variants from the non-masking
194// variant.  It only provides the assembly pieces for the masking variants.
195// It assumes custom ISel patterns for masking which can be provided as
196// template arguments.
197multiclass AVX512_maskable_custom<bits<8> O, Format F,
198                                  dag Outs,
199                                  dag Ins, dag MaskingIns, dag ZeroMaskingIns,
200                                  string OpcodeStr,
201                                  string AttSrcAsm, string IntelSrcAsm,
202                                  list<dag> Pattern,
203                                  list<dag> MaskingPattern,
204                                  list<dag> ZeroMaskingPattern,
205                                  string MaskingConstraint = "",
206                                  bit IsCommutable = 0,
207                                  bit IsKCommutable = 0,
208                                  bit IsKZCommutable = IsCommutable,
209                                  string ClobberConstraint = ""> {
210  let isCommutable = IsCommutable, Constraints = ClobberConstraint in
211    def NAME: AVX512<O, F, Outs, Ins,
212                       OpcodeStr#"\t{"#AttSrcAsm#", $dst|"#
213                                     "$dst, "#IntelSrcAsm#"}",
214                       Pattern>;
215
216  // Prefer over VMOV*rrk Pat<>
217  let isCommutable = IsKCommutable in
218    def NAME#k: AVX512<O, F, Outs, MaskingIns,
219                       OpcodeStr#"\t{"#AttSrcAsm#", $dst {${mask}}|"#
220                                     "$dst {${mask}}, "#IntelSrcAsm#"}",
221                       MaskingPattern>,
222              EVEX_K {
223      // In case of the 3src subclass this is overridden with a let.
224      string Constraints = !if(!eq(ClobberConstraint, ""), MaskingConstraint,
225                               !if(!eq(MaskingConstraint, ""), ClobberConstraint,
226                                   !strconcat(ClobberConstraint, ", ", MaskingConstraint)));
227    }
228
229  // Zero mask does not add any restrictions to commute operands transformation.
230  // So, it is Ok to use IsCommutable instead of IsKCommutable.
231  let isCommutable = IsKZCommutable, // Prefer over VMOV*rrkz Pat<>
232      Constraints = ClobberConstraint in
233    def NAME#kz: AVX512<O, F, Outs, ZeroMaskingIns,
234                       OpcodeStr#"\t{"#AttSrcAsm#", $dst {${mask}} {z}|"#
235                                     "$dst {${mask}} {z}, "#IntelSrcAsm#"}",
236                       ZeroMaskingPattern>,
237              EVEX_KZ;
238}
239
240
241// Common base class of AVX512_maskable and AVX512_maskable_3src.
242multiclass AVX512_maskable_common<bits<8> O, Format F, X86VectorVTInfo _,
243                                  dag Outs,
244                                  dag Ins, dag MaskingIns, dag ZeroMaskingIns,
245                                  string OpcodeStr,
246                                  string AttSrcAsm, string IntelSrcAsm,
247                                  dag RHS, dag MaskingRHS,
248                                  SDPatternOperator Select = vselect_mask,
249                                  string MaskingConstraint = "",
250                                  bit IsCommutable = 0,
251                                  bit IsKCommutable = 0,
252                                  bit IsKZCommutable = IsCommutable,
253                                  string ClobberConstraint = ""> :
254  AVX512_maskable_custom<O, F, Outs, Ins, MaskingIns, ZeroMaskingIns, OpcodeStr,
255                         AttSrcAsm, IntelSrcAsm,
256                         [(set _.RC:$dst, RHS)],
257                         [(set _.RC:$dst, MaskingRHS)],
258                         [(set _.RC:$dst,
259                               (Select _.KRCWM:$mask, RHS, _.ImmAllZerosV))],
260                         MaskingConstraint, IsCommutable,
261                         IsKCommutable, IsKZCommutable, ClobberConstraint>;
262
263// This multiclass generates the unconditional/non-masking, the masking and
264// the zero-masking variant of the vector instruction.  In the masking case, the
265// preserved vector elements come from a new dummy input operand tied to $dst.
266// This version uses a separate dag for non-masking and masking.
267multiclass AVX512_maskable_split<bits<8> O, Format F, X86VectorVTInfo _,
268                           dag Outs, dag Ins, string OpcodeStr,
269                           string AttSrcAsm, string IntelSrcAsm,
270                           dag RHS, dag MaskRHS,
271                           string ClobberConstraint = "",
272                           bit IsCommutable = 0, bit IsKCommutable = 0,
273                           bit IsKZCommutable = IsCommutable> :
274   AVX512_maskable_custom<O, F, Outs, Ins,
275                          !con((ins _.RC:$src0, _.KRCWM:$mask), Ins),
276                          !con((ins _.KRCWM:$mask), Ins),
277                          OpcodeStr, AttSrcAsm, IntelSrcAsm,
278                          [(set _.RC:$dst, RHS)],
279                          [(set _.RC:$dst,
280                              (vselect_mask _.KRCWM:$mask, MaskRHS, _.RC:$src0))],
281                          [(set _.RC:$dst,
282                              (vselect_mask _.KRCWM:$mask, MaskRHS, _.ImmAllZerosV))],
283                          "$src0 = $dst", IsCommutable, IsKCommutable,
284                          IsKZCommutable, ClobberConstraint>;
285
286// This multiclass generates the unconditional/non-masking, the masking and
287// the zero-masking variant of the vector instruction.  In the masking case, the
288// preserved vector elements come from a new dummy input operand tied to $dst.
289multiclass AVX512_maskable<bits<8> O, Format F, X86VectorVTInfo _,
290                           dag Outs, dag Ins, string OpcodeStr,
291                           string AttSrcAsm, string IntelSrcAsm,
292                           dag RHS,
293                           bit IsCommutable = 0, bit IsKCommutable = 0,
294                           bit IsKZCommutable = IsCommutable,
295                           SDPatternOperator Select = vselect_mask,
296                           string ClobberConstraint = ""> :
297   AVX512_maskable_common<O, F, _, Outs, Ins,
298                          !con((ins _.RC:$src0, _.KRCWM:$mask), Ins),
299                          !con((ins _.KRCWM:$mask), Ins),
300                          OpcodeStr, AttSrcAsm, IntelSrcAsm, RHS,
301                          (Select _.KRCWM:$mask, RHS, _.RC:$src0),
302                          Select, "$src0 = $dst", IsCommutable, IsKCommutable,
303                          IsKZCommutable, ClobberConstraint>;
304
305// This multiclass generates the unconditional/non-masking, the masking and
306// the zero-masking variant of the scalar instruction.
307multiclass AVX512_maskable_scalar<bits<8> O, Format F, X86VectorVTInfo _,
308                           dag Outs, dag Ins, string OpcodeStr,
309                           string AttSrcAsm, string IntelSrcAsm,
310                           dag RHS> :
311   AVX512_maskable<O, F, _, Outs, Ins, OpcodeStr, AttSrcAsm, IntelSrcAsm,
312                   RHS, 0, 0, 0, X86selects_mask>;
313
314// Similar to AVX512_maskable but in this case one of the source operands
315// ($src1) is already tied to $dst so we just use that for the preserved
316// vector elements.  NOTE that the NonTiedIns (the ins dag) should exclude
317// $src1.
318multiclass AVX512_maskable_3src<bits<8> O, Format F, X86VectorVTInfo _,
319                                dag Outs, dag NonTiedIns, string OpcodeStr,
320                                string AttSrcAsm, string IntelSrcAsm,
321                                dag RHS,
322                                bit IsCommutable = 0,
323                                bit IsKCommutable = 0,
324                                SDPatternOperator Select = vselect_mask,
325                                bit MaskOnly = 0> :
326   AVX512_maskable_common<O, F, _, Outs,
327                          !con((ins _.RC:$src1), NonTiedIns),
328                          !con((ins _.RC:$src1, _.KRCWM:$mask), NonTiedIns),
329                          !con((ins _.RC:$src1, _.KRCWM:$mask), NonTiedIns),
330                          OpcodeStr, AttSrcAsm, IntelSrcAsm,
331                          !if(MaskOnly, (null_frag), RHS),
332                          (Select _.KRCWM:$mask, RHS, _.RC:$src1),
333                          Select, "", IsCommutable, IsKCommutable>;
334
335// Similar to AVX512_maskable_3src but in this case the input VT for the tied
336// operand differs from the output VT. This requires a bitconvert on
337// the preserved vector going into the vselect.
338// NOTE: The unmasked pattern is disabled.
339multiclass AVX512_maskable_3src_cast<bits<8> O, Format F, X86VectorVTInfo OutVT,
340                                     X86VectorVTInfo InVT,
341                                     dag Outs, dag NonTiedIns, string OpcodeStr,
342                                     string AttSrcAsm, string IntelSrcAsm,
343                                     dag RHS, bit IsCommutable = 0> :
344   AVX512_maskable_common<O, F, OutVT, Outs,
345                          !con((ins InVT.RC:$src1), NonTiedIns),
346                          !con((ins InVT.RC:$src1, InVT.KRCWM:$mask), NonTiedIns),
347                          !con((ins InVT.RC:$src1, InVT.KRCWM:$mask), NonTiedIns),
348                          OpcodeStr, AttSrcAsm, IntelSrcAsm, (null_frag),
349                          (vselect_mask InVT.KRCWM:$mask, RHS,
350                           (bitconvert InVT.RC:$src1)),
351                           vselect_mask, "", IsCommutable>;
352
353multiclass AVX512_maskable_3src_scalar<bits<8> O, Format F, X86VectorVTInfo _,
354                                     dag Outs, dag NonTiedIns, string OpcodeStr,
355                                     string AttSrcAsm, string IntelSrcAsm,
356                                     dag RHS,
357                                     bit IsCommutable = 0,
358                                     bit IsKCommutable = 0,
359                                     bit MaskOnly = 0> :
360   AVX512_maskable_3src<O, F, _, Outs, NonTiedIns, OpcodeStr, AttSrcAsm,
361                        IntelSrcAsm, RHS, IsCommutable, IsKCommutable,
362                        X86selects_mask, MaskOnly>;
363
364multiclass AVX512_maskable_in_asm<bits<8> O, Format F, X86VectorVTInfo _,
365                                  dag Outs, dag Ins,
366                                  string OpcodeStr,
367                                  string AttSrcAsm, string IntelSrcAsm,
368                                  list<dag> Pattern> :
369   AVX512_maskable_custom<O, F, Outs, Ins,
370                          !con((ins _.RC:$src0, _.KRCWM:$mask), Ins),
371                          !con((ins _.KRCWM:$mask), Ins),
372                          OpcodeStr, AttSrcAsm, IntelSrcAsm, Pattern, [], [],
373                          "$src0 = $dst">;
374
375multiclass AVX512_maskable_3src_in_asm<bits<8> O, Format F, X86VectorVTInfo _,
376                                       dag Outs, dag NonTiedIns,
377                                       string OpcodeStr,
378                                       string AttSrcAsm, string IntelSrcAsm,
379                                       list<dag> Pattern> :
380   AVX512_maskable_custom<O, F, Outs,
381                          !con((ins _.RC:$src1), NonTiedIns),
382                          !con((ins _.RC:$src1, _.KRCWM:$mask), NonTiedIns),
383                          !con((ins _.RC:$src1, _.KRCWM:$mask), NonTiedIns),
384                          OpcodeStr, AttSrcAsm, IntelSrcAsm, Pattern, [], [],
385                          "">;
386
387// Instruction with mask that puts result in mask register,
388// like "compare" and "vptest"
389multiclass AVX512_maskable_custom_cmp<bits<8> O, Format F,
390                                  dag Outs,
391                                  dag Ins, dag MaskingIns,
392                                  string OpcodeStr,
393                                  string AttSrcAsm, string IntelSrcAsm,
394                                  list<dag> Pattern,
395                                  list<dag> MaskingPattern,
396                                  bit IsCommutable = 0> {
397    let isCommutable = IsCommutable in {
398    def NAME: AVX512<O, F, Outs, Ins,
399                       OpcodeStr#"\t{"#AttSrcAsm#", $dst|"#
400                                     "$dst, "#IntelSrcAsm#"}",
401                       Pattern>;
402
403    def NAME#k: AVX512<O, F, Outs, MaskingIns,
404                       OpcodeStr#"\t{"#AttSrcAsm#", $dst {${mask}}|"#
405                                     "$dst {${mask}}, "#IntelSrcAsm#"}",
406                       MaskingPattern>, EVEX_K;
407    }
408}
409
410multiclass AVX512_maskable_common_cmp<bits<8> O, Format F, X86VectorVTInfo _,
411                                  dag Outs,
412                                  dag Ins, dag MaskingIns,
413                                  string OpcodeStr,
414                                  string AttSrcAsm, string IntelSrcAsm,
415                                  dag RHS, dag MaskingRHS,
416                                  bit IsCommutable = 0> :
417  AVX512_maskable_custom_cmp<O, F, Outs, Ins, MaskingIns, OpcodeStr,
418                         AttSrcAsm, IntelSrcAsm,
419                         [(set _.KRC:$dst, RHS)],
420                         [(set _.KRC:$dst, MaskingRHS)], IsCommutable>;
421
422multiclass AVX512_maskable_cmp<bits<8> O, Format F, X86VectorVTInfo _,
423                           dag Outs, dag Ins, string OpcodeStr,
424                           string AttSrcAsm, string IntelSrcAsm,
425                           dag RHS, dag RHS_su, bit IsCommutable = 0> :
426   AVX512_maskable_common_cmp<O, F, _, Outs, Ins,
427                          !con((ins _.KRCWM:$mask), Ins),
428                          OpcodeStr, AttSrcAsm, IntelSrcAsm, RHS,
429                          (and _.KRCWM:$mask, RHS_su), IsCommutable>;
430
431// Used by conversion instructions.
432multiclass AVX512_maskable_cvt<bits<8> O, Format F, X86VectorVTInfo _,
433                                  dag Outs,
434                                  dag Ins, dag MaskingIns, dag ZeroMaskingIns,
435                                  string OpcodeStr,
436                                  string AttSrcAsm, string IntelSrcAsm,
437                                  dag RHS, dag MaskingRHS, dag ZeroMaskingRHS> :
438  AVX512_maskable_custom<O, F, Outs, Ins, MaskingIns, ZeroMaskingIns, OpcodeStr,
439                         AttSrcAsm, IntelSrcAsm,
440                         [(set _.RC:$dst, RHS)],
441                         [(set _.RC:$dst, MaskingRHS)],
442                         [(set _.RC:$dst, ZeroMaskingRHS)],
443                         "$src0 = $dst">;
444
445multiclass AVX512_maskable_fma<bits<8> O, Format F, X86VectorVTInfo _,
446                               dag Outs, dag NonTiedIns, string OpcodeStr,
447                               string AttSrcAsm, string IntelSrcAsm,
448                               dag RHS, dag MaskingRHS, bit IsCommutable,
449                               bit IsKCommutable> :
450   AVX512_maskable_custom<O, F, Outs,
451                          !con((ins _.RC:$src1), NonTiedIns),
452                          !con((ins _.RC:$src1, _.KRCWM:$mask), NonTiedIns),
453                          !con((ins _.RC:$src1, _.KRCWM:$mask), NonTiedIns),
454                          OpcodeStr, AttSrcAsm, IntelSrcAsm,
455                          [(set _.RC:$dst, RHS)],
456                          [(set _.RC:$dst,
457                            (vselect_mask _.KRCWM:$mask, MaskingRHS, _.RC:$src1))],
458                          [(set _.RC:$dst,
459                            (vselect_mask _.KRCWM:$mask, MaskingRHS, _.ImmAllZerosV))],
460                          "", IsCommutable, IsKCommutable>;
461
462// Alias instruction that maps zero vector to pxor / xorp* for AVX-512.
463// This is expanded by ExpandPostRAPseudos to an xorps / vxorps, and then
464// swizzled by ExecutionDomainFix to pxor.
465// We set canFoldAsLoad because this can be converted to a constant-pool
466// load of an all-zeros value if folding it would be beneficial.
467let isReMaterializable = 1, isAsCheapAsAMove = 1, canFoldAsLoad = 1,
468    isPseudo = 1, Predicates = [HasAVX512], SchedRW = [WriteZero] in {
469def AVX512_512_SET0 : I<0, Pseudo, (outs VR512:$dst), (ins), "",
470               [(set VR512:$dst, (v16i32 immAllZerosV))]>;
471def AVX512_512_SETALLONES : I<0, Pseudo, (outs VR512:$dst), (ins), "",
472               [(set VR512:$dst, (v16i32 immAllOnesV))]>;
473}
474
475let Predicates = [HasAVX512] in {
476def : Pat<(v64i8 immAllZerosV), (AVX512_512_SET0)>;
477def : Pat<(v32i16 immAllZerosV), (AVX512_512_SET0)>;
478def : Pat<(v8i64 immAllZerosV), (AVX512_512_SET0)>;
479def : Pat<(v16f32 immAllZerosV), (AVX512_512_SET0)>;
480def : Pat<(v8f64 immAllZerosV), (AVX512_512_SET0)>;
481}
482
483// Alias instructions that allow VPTERNLOG to be used with a mask to create
484// a mix of all ones and all zeros elements. This is done this way to force
485// the same register to be used as input for all three sources.
486let isPseudo = 1, Predicates = [HasAVX512], SchedRW = [WriteVecALU] in {
487def AVX512_512_SEXT_MASK_32 : I<0, Pseudo, (outs VR512:$dst),
488                                (ins VK16WM:$mask), "",
489                           [(set VR512:$dst, (vselect (v16i1 VK16WM:$mask),
490                                                      (v16i32 immAllOnesV),
491                                                      (v16i32 immAllZerosV)))]>;
492def AVX512_512_SEXT_MASK_64 : I<0, Pseudo, (outs VR512:$dst),
493                                (ins VK8WM:$mask), "",
494                [(set VR512:$dst, (vselect (v8i1 VK8WM:$mask),
495                                           (v8i64 immAllOnesV),
496                                           (v8i64 immAllZerosV)))]>;
497}
498
499let isReMaterializable = 1, isAsCheapAsAMove = 1, canFoldAsLoad = 1,
500    isPseudo = 1, Predicates = [HasAVX512], SchedRW = [WriteZero] in {
501def AVX512_128_SET0 : I<0, Pseudo, (outs VR128X:$dst), (ins), "",
502               [(set VR128X:$dst, (v4i32 immAllZerosV))]>;
503def AVX512_256_SET0 : I<0, Pseudo, (outs VR256X:$dst), (ins), "",
504               [(set VR256X:$dst, (v8i32 immAllZerosV))]>;
505}
506
507let Predicates = [HasAVX512] in {
508def : Pat<(v8i16 immAllZerosV), (AVX512_128_SET0)>;
509def : Pat<(v16i8 immAllZerosV), (AVX512_128_SET0)>;
510def : Pat<(v2i64 immAllZerosV), (AVX512_128_SET0)>;
511def : Pat<(v4f32 immAllZerosV), (AVX512_128_SET0)>;
512def : Pat<(v2f64 immAllZerosV), (AVX512_128_SET0)>;
513def : Pat<(v32i8 immAllZerosV), (AVX512_256_SET0)>;
514def : Pat<(v16i16 immAllZerosV), (AVX512_256_SET0)>;
515def : Pat<(v4i64 immAllZerosV), (AVX512_256_SET0)>;
516def : Pat<(v8f32 immAllZerosV), (AVX512_256_SET0)>;
517def : Pat<(v4f64 immAllZerosV), (AVX512_256_SET0)>;
518}
519
520let Predicates = [HasFP16] in {
521def : Pat<(v8f16 immAllZerosV), (AVX512_128_SET0)>;
522def : Pat<(v16f16 immAllZerosV), (AVX512_256_SET0)>;
523def : Pat<(v32f16 immAllZerosV), (AVX512_512_SET0)>;
524}
525
526// Alias instructions that map fld0 to xorps for sse or vxorps for avx.
527// This is expanded by ExpandPostRAPseudos.
528let isReMaterializable = 1, isAsCheapAsAMove = 1, canFoldAsLoad = 1,
529    isPseudo = 1, SchedRW = [WriteZero], Predicates = [HasAVX512] in {
530  def AVX512_FsFLD0SS : I<0, Pseudo, (outs FR32X:$dst), (ins), "",
531                          [(set FR32X:$dst, fp32imm0)]>;
532  def AVX512_FsFLD0SD : I<0, Pseudo, (outs FR64X:$dst), (ins), "",
533                          [(set FR64X:$dst, fp64imm0)]>;
534  def AVX512_FsFLD0F128 : I<0, Pseudo, (outs VR128X:$dst), (ins), "",
535                            [(set VR128X:$dst, fp128imm0)]>;
536}
537
538let isReMaterializable = 1, isAsCheapAsAMove = 1, canFoldAsLoad = 1,
539    isPseudo = 1, SchedRW = [WriteZero], Predicates = [HasFP16] in {
540  def AVX512_FsFLD0SH : I<0, Pseudo, (outs FR16X:$dst), (ins), "",
541                          [(set FR16X:$dst, fp16imm0)]>;
542}
543
544//===----------------------------------------------------------------------===//
545// AVX-512 - VECTOR INSERT
546//
547
548// Supports two different pattern operators for mask and unmasked ops. Allows
549// null_frag to be passed for one.
550multiclass vinsert_for_size_split<int Opcode, X86VectorVTInfo From,
551                                  X86VectorVTInfo To,
552                                  SDPatternOperator vinsert_insert,
553                                  SDPatternOperator vinsert_for_mask,
554                                  X86FoldableSchedWrite sched> {
555  let hasSideEffects = 0, ExeDomain = To.ExeDomain in {
556    defm rr : AVX512_maskable_split<Opcode, MRMSrcReg, To, (outs To.RC:$dst),
557                   (ins To.RC:$src1, From.RC:$src2, u8imm:$src3),
558                   "vinsert" # From.EltTypeName # "x" # From.NumElts,
559                   "$src3, $src2, $src1", "$src1, $src2, $src3",
560                   (vinsert_insert:$src3 (To.VT To.RC:$src1),
561                                         (From.VT From.RC:$src2),
562                                         (iPTR imm)),
563                   (vinsert_for_mask:$src3 (To.VT To.RC:$src1),
564                                           (From.VT From.RC:$src2),
565                                           (iPTR imm))>,
566                   AVX512AIi8Base, EVEX_4V, Sched<[sched]>;
567    let mayLoad = 1 in
568    defm rm : AVX512_maskable_split<Opcode, MRMSrcMem, To, (outs To.RC:$dst),
569                   (ins To.RC:$src1, From.MemOp:$src2, u8imm:$src3),
570                   "vinsert" # From.EltTypeName # "x" # From.NumElts,
571                   "$src3, $src2, $src1", "$src1, $src2, $src3",
572                   (vinsert_insert:$src3 (To.VT To.RC:$src1),
573                               (From.VT (From.LdFrag addr:$src2)),
574                               (iPTR imm)),
575                   (vinsert_for_mask:$src3 (To.VT To.RC:$src1),
576                               (From.VT (From.LdFrag addr:$src2)),
577                               (iPTR imm))>, AVX512AIi8Base, EVEX_4V,
578                   EVEX_CD8<From.EltSize, From.CD8TupleForm>,
579                   Sched<[sched.Folded, sched.ReadAfterFold]>;
580  }
581}
582
583// Passes the same pattern operator for masked and unmasked ops.
584multiclass vinsert_for_size<int Opcode, X86VectorVTInfo From,
585                            X86VectorVTInfo To,
586                            SDPatternOperator vinsert_insert,
587                            X86FoldableSchedWrite sched> :
588  vinsert_for_size_split<Opcode, From, To, vinsert_insert, vinsert_insert, sched>;
589
590multiclass vinsert_for_size_lowering<string InstrStr, X86VectorVTInfo From,
591                       X86VectorVTInfo To, PatFrag vinsert_insert,
592                       SDNodeXForm INSERT_get_vinsert_imm , list<Predicate> p> {
593  let Predicates = p in {
594    def : Pat<(vinsert_insert:$ins
595                     (To.VT To.RC:$src1), (From.VT From.RC:$src2), (iPTR imm)),
596              (To.VT (!cast<Instruction>(InstrStr#"rr")
597                     To.RC:$src1, From.RC:$src2,
598                     (INSERT_get_vinsert_imm To.RC:$ins)))>;
599
600    def : Pat<(vinsert_insert:$ins
601                  (To.VT To.RC:$src1),
602                  (From.VT (From.LdFrag addr:$src2)),
603                  (iPTR imm)),
604              (To.VT (!cast<Instruction>(InstrStr#"rm")
605                  To.RC:$src1, addr:$src2,
606                  (INSERT_get_vinsert_imm To.RC:$ins)))>;
607  }
608}
609
610multiclass vinsert_for_type<ValueType EltVT32, int Opcode128,
611                            ValueType EltVT64, int Opcode256,
612                            X86FoldableSchedWrite sched> {
613
614  let Predicates = [HasVLX] in
615    defm NAME # "32x4Z256" : vinsert_for_size<Opcode128,
616                                 X86VectorVTInfo< 4, EltVT32, VR128X>,
617                                 X86VectorVTInfo< 8, EltVT32, VR256X>,
618                                 vinsert128_insert, sched>, EVEX_V256;
619
620  defm NAME # "32x4Z" : vinsert_for_size<Opcode128,
621                                 X86VectorVTInfo< 4, EltVT32, VR128X>,
622                                 X86VectorVTInfo<16, EltVT32, VR512>,
623                                 vinsert128_insert, sched>, EVEX_V512;
624
625  defm NAME # "64x4Z" : vinsert_for_size<Opcode256,
626                                 X86VectorVTInfo< 4, EltVT64, VR256X>,
627                                 X86VectorVTInfo< 8, EltVT64, VR512>,
628                                 vinsert256_insert, sched>, VEX_W, EVEX_V512;
629
630  // Even with DQI we'd like to only use these instructions for masking.
631  let Predicates = [HasVLX, HasDQI] in
632    defm NAME # "64x2Z256" : vinsert_for_size_split<Opcode128,
633                                   X86VectorVTInfo< 2, EltVT64, VR128X>,
634                                   X86VectorVTInfo< 4, EltVT64, VR256X>,
635                                   null_frag, vinsert128_insert, sched>,
636                                   VEX_W1X, EVEX_V256;
637
638  // Even with DQI we'd like to only use these instructions for masking.
639  let Predicates = [HasDQI] in {
640    defm NAME # "64x2Z" : vinsert_for_size_split<Opcode128,
641                                 X86VectorVTInfo< 2, EltVT64, VR128X>,
642                                 X86VectorVTInfo< 8, EltVT64, VR512>,
643                                 null_frag, vinsert128_insert, sched>,
644                                 VEX_W, EVEX_V512;
645
646    defm NAME # "32x8Z" : vinsert_for_size_split<Opcode256,
647                                   X86VectorVTInfo< 8, EltVT32, VR256X>,
648                                   X86VectorVTInfo<16, EltVT32, VR512>,
649                                   null_frag, vinsert256_insert, sched>,
650                                   EVEX_V512;
651  }
652}
653
654// FIXME: Is there a better scheduler class for VINSERTF/VINSERTI?
655defm VINSERTF : vinsert_for_type<f32, 0x18, f64, 0x1a, WriteFShuffle256>;
656defm VINSERTI : vinsert_for_type<i32, 0x38, i64, 0x3a, WriteShuffle256>;
657
658// Codegen pattern with the alternative types,
659// Even with AVX512DQ we'll still use these for unmasked operations.
660defm : vinsert_for_size_lowering<"VINSERTF32x4Z256", v2f64x_info, v4f64x_info,
661              vinsert128_insert, INSERT_get_vinsert128_imm, [HasVLX]>;
662defm : vinsert_for_size_lowering<"VINSERTI32x4Z256", v2i64x_info, v4i64x_info,
663              vinsert128_insert, INSERT_get_vinsert128_imm, [HasVLX]>;
664
665defm : vinsert_for_size_lowering<"VINSERTF32x4Z", v2f64x_info, v8f64_info,
666              vinsert128_insert, INSERT_get_vinsert128_imm, [HasAVX512]>;
667defm : vinsert_for_size_lowering<"VINSERTI32x4Z", v2i64x_info, v8i64_info,
668              vinsert128_insert, INSERT_get_vinsert128_imm, [HasAVX512]>;
669
670defm : vinsert_for_size_lowering<"VINSERTF64x4Z", v8f32x_info, v16f32_info,
671              vinsert256_insert, INSERT_get_vinsert256_imm, [HasAVX512]>;
672defm : vinsert_for_size_lowering<"VINSERTI64x4Z", v8i32x_info, v16i32_info,
673              vinsert256_insert, INSERT_get_vinsert256_imm, [HasAVX512]>;
674
675// Codegen pattern with the alternative types insert VEC128 into VEC256
676defm : vinsert_for_size_lowering<"VINSERTI32x4Z256", v8i16x_info, v16i16x_info,
677              vinsert128_insert, INSERT_get_vinsert128_imm, [HasVLX]>;
678defm : vinsert_for_size_lowering<"VINSERTI32x4Z256", v16i8x_info, v32i8x_info,
679              vinsert128_insert, INSERT_get_vinsert128_imm, [HasVLX]>;
680defm : vinsert_for_size_lowering<"VINSERTF32x4Z256", v8f16x_info, v16f16x_info,
681              vinsert128_insert, INSERT_get_vinsert128_imm, [HasFP16, HasVLX]>;
682// Codegen pattern with the alternative types insert VEC128 into VEC512
683defm : vinsert_for_size_lowering<"VINSERTI32x4Z", v8i16x_info, v32i16_info,
684              vinsert128_insert, INSERT_get_vinsert128_imm, [HasAVX512]>;
685defm : vinsert_for_size_lowering<"VINSERTI32x4Z", v16i8x_info, v64i8_info,
686               vinsert128_insert, INSERT_get_vinsert128_imm, [HasAVX512]>;
687defm : vinsert_for_size_lowering<"VINSERTF32x4Z", v8f16x_info, v32f16_info,
688              vinsert128_insert, INSERT_get_vinsert128_imm, [HasFP16]>;
689// Codegen pattern with the alternative types insert VEC256 into VEC512
690defm : vinsert_for_size_lowering<"VINSERTI64x4Z", v16i16x_info, v32i16_info,
691              vinsert256_insert, INSERT_get_vinsert256_imm, [HasAVX512]>;
692defm : vinsert_for_size_lowering<"VINSERTI64x4Z", v32i8x_info, v64i8_info,
693              vinsert256_insert, INSERT_get_vinsert256_imm, [HasAVX512]>;
694defm : vinsert_for_size_lowering<"VINSERTF64x4Z", v16f16x_info, v32f16_info,
695              vinsert256_insert, INSERT_get_vinsert256_imm, [HasFP16]>;
696
697
698multiclass vinsert_for_mask_cast<string InstrStr, X86VectorVTInfo From,
699                                 X86VectorVTInfo To, X86VectorVTInfo Cast,
700                                 PatFrag vinsert_insert,
701                                 SDNodeXForm INSERT_get_vinsert_imm,
702                                 list<Predicate> p> {
703let Predicates = p in {
704  def : Pat<(Cast.VT
705             (vselect_mask Cast.KRCWM:$mask,
706                           (bitconvert
707                            (vinsert_insert:$ins (To.VT To.RC:$src1),
708                                                 (From.VT From.RC:$src2),
709                                                 (iPTR imm))),
710                           Cast.RC:$src0)),
711            (!cast<Instruction>(InstrStr#"rrk")
712             Cast.RC:$src0, Cast.KRCWM:$mask, To.RC:$src1, From.RC:$src2,
713             (INSERT_get_vinsert_imm To.RC:$ins))>;
714  def : Pat<(Cast.VT
715             (vselect_mask Cast.KRCWM:$mask,
716                           (bitconvert
717                            (vinsert_insert:$ins (To.VT To.RC:$src1),
718                                                 (From.VT
719                                                  (bitconvert
720                                                   (From.LdFrag addr:$src2))),
721                                                 (iPTR imm))),
722                           Cast.RC:$src0)),
723            (!cast<Instruction>(InstrStr#"rmk")
724             Cast.RC:$src0, Cast.KRCWM:$mask, To.RC:$src1, addr:$src2,
725             (INSERT_get_vinsert_imm To.RC:$ins))>;
726
727  def : Pat<(Cast.VT
728             (vselect_mask Cast.KRCWM:$mask,
729                           (bitconvert
730                            (vinsert_insert:$ins (To.VT To.RC:$src1),
731                                                 (From.VT From.RC:$src2),
732                                                 (iPTR imm))),
733                           Cast.ImmAllZerosV)),
734            (!cast<Instruction>(InstrStr#"rrkz")
735             Cast.KRCWM:$mask, To.RC:$src1, From.RC:$src2,
736             (INSERT_get_vinsert_imm To.RC:$ins))>;
737  def : Pat<(Cast.VT
738             (vselect_mask Cast.KRCWM:$mask,
739                           (bitconvert
740                            (vinsert_insert:$ins (To.VT To.RC:$src1),
741                                                 (From.VT (From.LdFrag addr:$src2)),
742                                                 (iPTR imm))),
743                           Cast.ImmAllZerosV)),
744            (!cast<Instruction>(InstrStr#"rmkz")
745             Cast.KRCWM:$mask, To.RC:$src1, addr:$src2,
746             (INSERT_get_vinsert_imm To.RC:$ins))>;
747}
748}
749
750defm : vinsert_for_mask_cast<"VINSERTF32x4Z256", v2f64x_info, v4f64x_info,
751                             v8f32x_info, vinsert128_insert,
752                             INSERT_get_vinsert128_imm, [HasVLX]>;
753defm : vinsert_for_mask_cast<"VINSERTF64x2Z256", v4f32x_info, v8f32x_info,
754                             v4f64x_info, vinsert128_insert,
755                             INSERT_get_vinsert128_imm, [HasDQI, HasVLX]>;
756
757defm : vinsert_for_mask_cast<"VINSERTI32x4Z256", v2i64x_info, v4i64x_info,
758                             v8i32x_info, vinsert128_insert,
759                             INSERT_get_vinsert128_imm, [HasVLX]>;
760defm : vinsert_for_mask_cast<"VINSERTI32x4Z256", v8i16x_info, v16i16x_info,
761                             v8i32x_info, vinsert128_insert,
762                             INSERT_get_vinsert128_imm, [HasVLX]>;
763defm : vinsert_for_mask_cast<"VINSERTI32x4Z256", v16i8x_info, v32i8x_info,
764                             v8i32x_info, vinsert128_insert,
765                             INSERT_get_vinsert128_imm, [HasVLX]>;
766defm : vinsert_for_mask_cast<"VINSERTF64x2Z256", v4i32x_info, v8i32x_info,
767                             v4i64x_info, vinsert128_insert,
768                             INSERT_get_vinsert128_imm, [HasDQI, HasVLX]>;
769defm : vinsert_for_mask_cast<"VINSERTF64x2Z256", v8i16x_info, v16i16x_info,
770                             v4i64x_info, vinsert128_insert,
771                             INSERT_get_vinsert128_imm, [HasDQI, HasVLX]>;
772defm : vinsert_for_mask_cast<"VINSERTF64x2Z256", v16i8x_info, v32i8x_info,
773                             v4i64x_info, vinsert128_insert,
774                             INSERT_get_vinsert128_imm, [HasDQI, HasVLX]>;
775
776defm : vinsert_for_mask_cast<"VINSERTF32x4Z", v2f64x_info, v8f64_info,
777                             v16f32_info, vinsert128_insert,
778                             INSERT_get_vinsert128_imm, [HasAVX512]>;
779defm : vinsert_for_mask_cast<"VINSERTF64x2Z", v4f32x_info, v16f32_info,
780                             v8f64_info, vinsert128_insert,
781                             INSERT_get_vinsert128_imm, [HasDQI]>;
782
783defm : vinsert_for_mask_cast<"VINSERTI32x4Z", v2i64x_info, v8i64_info,
784                             v16i32_info, vinsert128_insert,
785                             INSERT_get_vinsert128_imm, [HasAVX512]>;
786defm : vinsert_for_mask_cast<"VINSERTI32x4Z", v8i16x_info, v32i16_info,
787                             v16i32_info, vinsert128_insert,
788                             INSERT_get_vinsert128_imm, [HasAVX512]>;
789defm : vinsert_for_mask_cast<"VINSERTI32x4Z", v16i8x_info, v64i8_info,
790                             v16i32_info, vinsert128_insert,
791                             INSERT_get_vinsert128_imm, [HasAVX512]>;
792defm : vinsert_for_mask_cast<"VINSERTI64x2Z", v4i32x_info, v16i32_info,
793                             v8i64_info, vinsert128_insert,
794                             INSERT_get_vinsert128_imm, [HasDQI]>;
795defm : vinsert_for_mask_cast<"VINSERTI64x2Z", v8i16x_info, v32i16_info,
796                             v8i64_info, vinsert128_insert,
797                             INSERT_get_vinsert128_imm, [HasDQI]>;
798defm : vinsert_for_mask_cast<"VINSERTI64x2Z", v16i8x_info, v64i8_info,
799                             v8i64_info, vinsert128_insert,
800                             INSERT_get_vinsert128_imm, [HasDQI]>;
801
802defm : vinsert_for_mask_cast<"VINSERTF32x8Z", v4f64x_info, v8f64_info,
803                             v16f32_info, vinsert256_insert,
804                             INSERT_get_vinsert256_imm, [HasDQI]>;
805defm : vinsert_for_mask_cast<"VINSERTF64x4Z", v8f32x_info, v16f32_info,
806                             v8f64_info, vinsert256_insert,
807                             INSERT_get_vinsert256_imm, [HasAVX512]>;
808
809defm : vinsert_for_mask_cast<"VINSERTI32x8Z", v4i64x_info, v8i64_info,
810                             v16i32_info, vinsert256_insert,
811                             INSERT_get_vinsert256_imm, [HasDQI]>;
812defm : vinsert_for_mask_cast<"VINSERTI32x8Z", v16i16x_info, v32i16_info,
813                             v16i32_info, vinsert256_insert,
814                             INSERT_get_vinsert256_imm, [HasDQI]>;
815defm : vinsert_for_mask_cast<"VINSERTI32x8Z", v32i8x_info, v64i8_info,
816                             v16i32_info, vinsert256_insert,
817                             INSERT_get_vinsert256_imm, [HasDQI]>;
818defm : vinsert_for_mask_cast<"VINSERTI64x4Z", v8i32x_info, v16i32_info,
819                             v8i64_info, vinsert256_insert,
820                             INSERT_get_vinsert256_imm, [HasAVX512]>;
821defm : vinsert_for_mask_cast<"VINSERTI64x4Z", v16i16x_info, v32i16_info,
822                             v8i64_info, vinsert256_insert,
823                             INSERT_get_vinsert256_imm, [HasAVX512]>;
824defm : vinsert_for_mask_cast<"VINSERTI64x4Z", v32i8x_info, v64i8_info,
825                             v8i64_info, vinsert256_insert,
826                             INSERT_get_vinsert256_imm, [HasAVX512]>;
827
828// vinsertps - insert f32 to XMM
829let ExeDomain = SSEPackedSingle in {
830let isCommutable = 1 in
831def VINSERTPSZrr : AVX512AIi8<0x21, MRMSrcReg, (outs VR128X:$dst),
832      (ins VR128X:$src1, VR128X:$src2, u8imm:$src3),
833      "vinsertps\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}",
834      [(set VR128X:$dst, (X86insertps VR128X:$src1, VR128X:$src2, timm:$src3))]>,
835      EVEX_4V, Sched<[SchedWriteFShuffle.XMM]>;
836def VINSERTPSZrm: AVX512AIi8<0x21, MRMSrcMem, (outs VR128X:$dst),
837      (ins VR128X:$src1, f32mem:$src2, u8imm:$src3),
838      "vinsertps\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}",
839      [(set VR128X:$dst, (X86insertps VR128X:$src1,
840                          (v4f32 (scalar_to_vector (loadf32 addr:$src2))),
841                          timm:$src3))]>,
842      EVEX_4V, EVEX_CD8<32, CD8VT1>,
843      Sched<[SchedWriteFShuffle.XMM.Folded, SchedWriteFShuffle.XMM.ReadAfterFold]>;
844}
845
846//===----------------------------------------------------------------------===//
847// AVX-512 VECTOR EXTRACT
848//---
849
850// Supports two different pattern operators for mask and unmasked ops. Allows
851// null_frag to be passed for one.
852multiclass vextract_for_size_split<int Opcode,
853                                   X86VectorVTInfo From, X86VectorVTInfo To,
854                                   SDPatternOperator vextract_extract,
855                                   SDPatternOperator vextract_for_mask,
856                                   SchedWrite SchedRR, SchedWrite SchedMR> {
857
858  let hasSideEffects = 0, ExeDomain = To.ExeDomain in {
859    defm rr : AVX512_maskable_split<Opcode, MRMDestReg, To, (outs To.RC:$dst),
860                (ins From.RC:$src1, u8imm:$idx),
861                "vextract" # To.EltTypeName # "x" # To.NumElts,
862                "$idx, $src1", "$src1, $idx",
863                (vextract_extract:$idx (From.VT From.RC:$src1), (iPTR imm)),
864                (vextract_for_mask:$idx (From.VT From.RC:$src1), (iPTR imm))>,
865                AVX512AIi8Base, EVEX, Sched<[SchedRR]>;
866
867    def mr  : AVX512AIi8<Opcode, MRMDestMem, (outs),
868                    (ins To.MemOp:$dst, From.RC:$src1, u8imm:$idx),
869                    "vextract" # To.EltTypeName # "x" # To.NumElts #
870                        "\t{$idx, $src1, $dst|$dst, $src1, $idx}",
871                    [(store (To.VT (vextract_extract:$idx
872                                    (From.VT From.RC:$src1), (iPTR imm))),
873                             addr:$dst)]>, EVEX,
874                    Sched<[SchedMR]>;
875
876    let mayStore = 1, hasSideEffects = 0 in
877    def mrk : AVX512AIi8<Opcode, MRMDestMem, (outs),
878                    (ins To.MemOp:$dst, To.KRCWM:$mask,
879                                        From.RC:$src1, u8imm:$idx),
880                     "vextract" # To.EltTypeName # "x" # To.NumElts #
881                          "\t{$idx, $src1, $dst {${mask}}|"
882                          "$dst {${mask}}, $src1, $idx}", []>,
883                    EVEX_K, EVEX, Sched<[SchedMR]>, NotMemoryFoldable;
884  }
885}
886
887// Passes the same pattern operator for masked and unmasked ops.
888multiclass vextract_for_size<int Opcode, X86VectorVTInfo From,
889                             X86VectorVTInfo To,
890                             SDPatternOperator vextract_extract,
891                             SchedWrite SchedRR, SchedWrite SchedMR> :
892  vextract_for_size_split<Opcode, From, To, vextract_extract, vextract_extract, SchedRR, SchedMR>;
893
894// Codegen pattern for the alternative types
895multiclass vextract_for_size_lowering<string InstrStr, X86VectorVTInfo From,
896                X86VectorVTInfo To, PatFrag vextract_extract,
897                SDNodeXForm EXTRACT_get_vextract_imm, list<Predicate> p> {
898  let Predicates = p in {
899     def : Pat<(vextract_extract:$ext (From.VT From.RC:$src1), (iPTR imm)),
900               (To.VT (!cast<Instruction>(InstrStr#"rr")
901                          From.RC:$src1,
902                          (EXTRACT_get_vextract_imm To.RC:$ext)))>;
903     def : Pat<(store (To.VT (vextract_extract:$ext (From.VT From.RC:$src1),
904                              (iPTR imm))), addr:$dst),
905               (!cast<Instruction>(InstrStr#"mr") addr:$dst, From.RC:$src1,
906                (EXTRACT_get_vextract_imm To.RC:$ext))>;
907  }
908}
909
910multiclass vextract_for_type<ValueType EltVT32, int Opcode128,
911                             ValueType EltVT64, int Opcode256,
912                             SchedWrite SchedRR, SchedWrite SchedMR> {
913  let Predicates = [HasAVX512] in {
914    defm NAME # "32x4Z" : vextract_for_size<Opcode128,
915                                   X86VectorVTInfo<16, EltVT32, VR512>,
916                                   X86VectorVTInfo< 4, EltVT32, VR128X>,
917                                   vextract128_extract, SchedRR, SchedMR>,
918                                       EVEX_V512, EVEX_CD8<32, CD8VT4>;
919    defm NAME # "64x4Z" : vextract_for_size<Opcode256,
920                                   X86VectorVTInfo< 8, EltVT64, VR512>,
921                                   X86VectorVTInfo< 4, EltVT64, VR256X>,
922                                   vextract256_extract, SchedRR, SchedMR>,
923                                       VEX_W, EVEX_V512, EVEX_CD8<64, CD8VT4>;
924  }
925  let Predicates = [HasVLX] in
926    defm NAME # "32x4Z256" : vextract_for_size<Opcode128,
927                                 X86VectorVTInfo< 8, EltVT32, VR256X>,
928                                 X86VectorVTInfo< 4, EltVT32, VR128X>,
929                                 vextract128_extract, SchedRR, SchedMR>,
930                                     EVEX_V256, EVEX_CD8<32, CD8VT4>;
931
932  // Even with DQI we'd like to only use these instructions for masking.
933  let Predicates = [HasVLX, HasDQI] in
934    defm NAME # "64x2Z256" : vextract_for_size_split<Opcode128,
935                                 X86VectorVTInfo< 4, EltVT64, VR256X>,
936                                 X86VectorVTInfo< 2, EltVT64, VR128X>,
937                                 null_frag, vextract128_extract, SchedRR, SchedMR>,
938                                     VEX_W1X, EVEX_V256, EVEX_CD8<64, CD8VT2>;
939
940  // Even with DQI we'd like to only use these instructions for masking.
941  let Predicates = [HasDQI] in {
942    defm NAME # "64x2Z" : vextract_for_size_split<Opcode128,
943                                 X86VectorVTInfo< 8, EltVT64, VR512>,
944                                 X86VectorVTInfo< 2, EltVT64, VR128X>,
945                                 null_frag, vextract128_extract, SchedRR, SchedMR>,
946                                     VEX_W, EVEX_V512, EVEX_CD8<64, CD8VT2>;
947    defm NAME # "32x8Z" : vextract_for_size_split<Opcode256,
948                                 X86VectorVTInfo<16, EltVT32, VR512>,
949                                 X86VectorVTInfo< 8, EltVT32, VR256X>,
950                                 null_frag, vextract256_extract, SchedRR, SchedMR>,
951                                     EVEX_V512, EVEX_CD8<32, CD8VT8>;
952  }
953}
954
955// TODO - replace WriteFStore/WriteVecStore with X86SchedWriteMoveLSWidths types.
956defm VEXTRACTF : vextract_for_type<f32, 0x19, f64, 0x1b, WriteFShuffle256, WriteFStore>;
957defm VEXTRACTI : vextract_for_type<i32, 0x39, i64, 0x3b, WriteShuffle256, WriteVecStore>;
958
959// extract_subvector codegen patterns with the alternative types.
960// Even with AVX512DQ we'll still use these for unmasked operations.
961defm : vextract_for_size_lowering<"VEXTRACTF32x4Z", v8f64_info, v2f64x_info,
962          vextract128_extract, EXTRACT_get_vextract128_imm, [HasAVX512]>;
963defm : vextract_for_size_lowering<"VEXTRACTI32x4Z", v8i64_info, v2i64x_info,
964          vextract128_extract, EXTRACT_get_vextract128_imm, [HasAVX512]>;
965
966defm : vextract_for_size_lowering<"VEXTRACTF64x4Z", v16f32_info, v8f32x_info,
967          vextract256_extract, EXTRACT_get_vextract256_imm, [HasAVX512]>;
968defm : vextract_for_size_lowering<"VEXTRACTI64x4Z", v16i32_info, v8i32x_info,
969          vextract256_extract, EXTRACT_get_vextract256_imm, [HasAVX512]>;
970
971defm : vextract_for_size_lowering<"VEXTRACTF32x4Z256", v4f64x_info, v2f64x_info,
972          vextract128_extract, EXTRACT_get_vextract128_imm, [HasVLX]>;
973defm : vextract_for_size_lowering<"VEXTRACTI32x4Z256", v4i64x_info, v2i64x_info,
974          vextract128_extract, EXTRACT_get_vextract128_imm, [HasVLX]>;
975
976// Codegen pattern with the alternative types extract VEC128 from VEC256
977defm : vextract_for_size_lowering<"VEXTRACTI32x4Z256", v16i16x_info, v8i16x_info,
978          vextract128_extract, EXTRACT_get_vextract128_imm, [HasVLX]>;
979defm : vextract_for_size_lowering<"VEXTRACTI32x4Z256", v32i8x_info, v16i8x_info,
980          vextract128_extract, EXTRACT_get_vextract128_imm, [HasVLX]>;
981defm : vextract_for_size_lowering<"VEXTRACTF32x4Z256", v16f16x_info, v8f16x_info,
982          vextract128_extract, EXTRACT_get_vextract128_imm, [HasFP16, HasVLX]>;
983
984// Codegen pattern with the alternative types extract VEC128 from VEC512
985defm : vextract_for_size_lowering<"VEXTRACTI32x4Z", v32i16_info, v8i16x_info,
986                 vextract128_extract, EXTRACT_get_vextract128_imm, [HasAVX512]>;
987defm : vextract_for_size_lowering<"VEXTRACTI32x4Z", v64i8_info, v16i8x_info,
988                 vextract128_extract, EXTRACT_get_vextract128_imm, [HasAVX512]>;
989defm : vextract_for_size_lowering<"VEXTRACTF32x4Z", v32f16_info, v8f16x_info,
990                 vextract128_extract, EXTRACT_get_vextract128_imm, [HasFP16]>;
991// Codegen pattern with the alternative types extract VEC256 from VEC512
992defm : vextract_for_size_lowering<"VEXTRACTI64x4Z", v32i16_info, v16i16x_info,
993                 vextract256_extract, EXTRACT_get_vextract256_imm, [HasAVX512]>;
994defm : vextract_for_size_lowering<"VEXTRACTI64x4Z", v64i8_info, v32i8x_info,
995                 vextract256_extract, EXTRACT_get_vextract256_imm, [HasAVX512]>;
996defm : vextract_for_size_lowering<"VEXTRACTF64x4Z", v32f16_info, v16f16x_info,
997                 vextract256_extract, EXTRACT_get_vextract256_imm, [HasFP16]>;
998
999
1000// A 128-bit extract from bits [255:128] of a 512-bit vector should use a
1001// smaller extract to enable EVEX->VEX.
1002let Predicates = [NoVLX] in {
1003def : Pat<(v2i64 (extract_subvector (v8i64 VR512:$src), (iPTR 2))),
1004          (v2i64 (VEXTRACTI128rr
1005                  (v4i64 (EXTRACT_SUBREG (v8i64 VR512:$src), sub_ymm)),
1006                  (iPTR 1)))>;
1007def : Pat<(v2f64 (extract_subvector (v8f64 VR512:$src), (iPTR 2))),
1008          (v2f64 (VEXTRACTF128rr
1009                  (v4f64 (EXTRACT_SUBREG (v8f64 VR512:$src), sub_ymm)),
1010                  (iPTR 1)))>;
1011def : Pat<(v4i32 (extract_subvector (v16i32 VR512:$src), (iPTR 4))),
1012          (v4i32 (VEXTRACTI128rr
1013                  (v8i32 (EXTRACT_SUBREG (v16i32 VR512:$src), sub_ymm)),
1014                  (iPTR 1)))>;
1015def : Pat<(v4f32 (extract_subvector (v16f32 VR512:$src), (iPTR 4))),
1016          (v4f32 (VEXTRACTF128rr
1017                  (v8f32 (EXTRACT_SUBREG (v16f32 VR512:$src), sub_ymm)),
1018                  (iPTR 1)))>;
1019def : Pat<(v8i16 (extract_subvector (v32i16 VR512:$src), (iPTR 8))),
1020          (v8i16 (VEXTRACTI128rr
1021                  (v16i16 (EXTRACT_SUBREG (v32i16 VR512:$src), sub_ymm)),
1022                  (iPTR 1)))>;
1023def : Pat<(v16i8 (extract_subvector (v64i8 VR512:$src), (iPTR 16))),
1024          (v16i8 (VEXTRACTI128rr
1025                  (v32i8 (EXTRACT_SUBREG (v64i8 VR512:$src), sub_ymm)),
1026                  (iPTR 1)))>;
1027}
1028
1029// A 128-bit extract from bits [255:128] of a 512-bit vector should use a
1030// smaller extract to enable EVEX->VEX.
1031let Predicates = [HasVLX] in {
1032def : Pat<(v2i64 (extract_subvector (v8i64 VR512:$src), (iPTR 2))),
1033          (v2i64 (VEXTRACTI32x4Z256rr
1034                  (v4i64 (EXTRACT_SUBREG (v8i64 VR512:$src), sub_ymm)),
1035                  (iPTR 1)))>;
1036def : Pat<(v2f64 (extract_subvector (v8f64 VR512:$src), (iPTR 2))),
1037          (v2f64 (VEXTRACTF32x4Z256rr
1038                  (v4f64 (EXTRACT_SUBREG (v8f64 VR512:$src), sub_ymm)),
1039                  (iPTR 1)))>;
1040def : Pat<(v4i32 (extract_subvector (v16i32 VR512:$src), (iPTR 4))),
1041          (v4i32 (VEXTRACTI32x4Z256rr
1042                  (v8i32 (EXTRACT_SUBREG (v16i32 VR512:$src), sub_ymm)),
1043                  (iPTR 1)))>;
1044def : Pat<(v4f32 (extract_subvector (v16f32 VR512:$src), (iPTR 4))),
1045          (v4f32 (VEXTRACTF32x4Z256rr
1046                  (v8f32 (EXTRACT_SUBREG (v16f32 VR512:$src), sub_ymm)),
1047                  (iPTR 1)))>;
1048def : Pat<(v8i16 (extract_subvector (v32i16 VR512:$src), (iPTR 8))),
1049          (v8i16 (VEXTRACTI32x4Z256rr
1050                  (v16i16 (EXTRACT_SUBREG (v32i16 VR512:$src), sub_ymm)),
1051                  (iPTR 1)))>;
1052def : Pat<(v16i8 (extract_subvector (v64i8 VR512:$src), (iPTR 16))),
1053          (v16i8 (VEXTRACTI32x4Z256rr
1054                  (v32i8 (EXTRACT_SUBREG (v64i8 VR512:$src), sub_ymm)),
1055                  (iPTR 1)))>;
1056}
1057
1058let Predicates = [HasFP16, HasVLX] in
1059def : Pat<(v8f16 (extract_subvector (v32f16 VR512:$src), (iPTR 8))),
1060          (v8f16 (VEXTRACTF32x4Z256rr
1061                  (v16f16 (EXTRACT_SUBREG (v32f16 VR512:$src), sub_ymm)),
1062                  (iPTR 1)))>;
1063
1064
1065// Additional patterns for handling a bitcast between the vselect and the
1066// extract_subvector.
1067multiclass vextract_for_mask_cast<string InstrStr, X86VectorVTInfo From,
1068                                  X86VectorVTInfo To, X86VectorVTInfo Cast,
1069                                  PatFrag vextract_extract,
1070                                  SDNodeXForm EXTRACT_get_vextract_imm,
1071                                  list<Predicate> p> {
1072let Predicates = p in {
1073  def : Pat<(Cast.VT (vselect_mask Cast.KRCWM:$mask,
1074                                   (bitconvert
1075                                    (To.VT (vextract_extract:$ext
1076                                            (From.VT From.RC:$src), (iPTR imm)))),
1077                                   To.RC:$src0)),
1078            (Cast.VT (!cast<Instruction>(InstrStr#"rrk")
1079                      Cast.RC:$src0, Cast.KRCWM:$mask, From.RC:$src,
1080                      (EXTRACT_get_vextract_imm To.RC:$ext)))>;
1081
1082  def : Pat<(Cast.VT (vselect_mask Cast.KRCWM:$mask,
1083                                   (bitconvert
1084                                    (To.VT (vextract_extract:$ext
1085                                            (From.VT From.RC:$src), (iPTR imm)))),
1086                                   Cast.ImmAllZerosV)),
1087            (Cast.VT (!cast<Instruction>(InstrStr#"rrkz")
1088                      Cast.KRCWM:$mask, From.RC:$src,
1089                      (EXTRACT_get_vextract_imm To.RC:$ext)))>;
1090}
1091}
1092
1093defm : vextract_for_mask_cast<"VEXTRACTF32x4Z256", v4f64x_info, v2f64x_info,
1094                              v4f32x_info, vextract128_extract,
1095                              EXTRACT_get_vextract128_imm, [HasVLX]>;
1096defm : vextract_for_mask_cast<"VEXTRACTF64x2Z256", v8f32x_info, v4f32x_info,
1097                              v2f64x_info, vextract128_extract,
1098                              EXTRACT_get_vextract128_imm, [HasDQI, HasVLX]>;
1099
1100defm : vextract_for_mask_cast<"VEXTRACTI32x4Z256", v4i64x_info, v2i64x_info,
1101                              v4i32x_info, vextract128_extract,
1102                              EXTRACT_get_vextract128_imm, [HasVLX]>;
1103defm : vextract_for_mask_cast<"VEXTRACTI32x4Z256", v16i16x_info, v8i16x_info,
1104                              v4i32x_info, vextract128_extract,
1105                              EXTRACT_get_vextract128_imm, [HasVLX]>;
1106defm : vextract_for_mask_cast<"VEXTRACTI32x4Z256", v32i8x_info, v16i8x_info,
1107                              v4i32x_info, vextract128_extract,
1108                              EXTRACT_get_vextract128_imm, [HasVLX]>;
1109defm : vextract_for_mask_cast<"VEXTRACTI64x2Z256", v8i32x_info, v4i32x_info,
1110                              v2i64x_info, vextract128_extract,
1111                              EXTRACT_get_vextract128_imm, [HasDQI, HasVLX]>;
1112defm : vextract_for_mask_cast<"VEXTRACTI64x2Z256", v16i16x_info, v8i16x_info,
1113                              v2i64x_info, vextract128_extract,
1114                              EXTRACT_get_vextract128_imm, [HasDQI, HasVLX]>;
1115defm : vextract_for_mask_cast<"VEXTRACTI64x2Z256", v32i8x_info, v16i8x_info,
1116                              v2i64x_info, vextract128_extract,
1117                              EXTRACT_get_vextract128_imm, [HasDQI, HasVLX]>;
1118
1119defm : vextract_for_mask_cast<"VEXTRACTF32x4Z", v8f64_info, v2f64x_info,
1120                              v4f32x_info, vextract128_extract,
1121                              EXTRACT_get_vextract128_imm, [HasAVX512]>;
1122defm : vextract_for_mask_cast<"VEXTRACTF64x2Z", v16f32_info, v4f32x_info,
1123                              v2f64x_info, vextract128_extract,
1124                              EXTRACT_get_vextract128_imm, [HasDQI]>;
1125
1126defm : vextract_for_mask_cast<"VEXTRACTI32x4Z", v8i64_info, v2i64x_info,
1127                              v4i32x_info, vextract128_extract,
1128                              EXTRACT_get_vextract128_imm, [HasAVX512]>;
1129defm : vextract_for_mask_cast<"VEXTRACTI32x4Z", v32i16_info, v8i16x_info,
1130                              v4i32x_info, vextract128_extract,
1131                              EXTRACT_get_vextract128_imm, [HasAVX512]>;
1132defm : vextract_for_mask_cast<"VEXTRACTI32x4Z", v64i8_info, v16i8x_info,
1133                              v4i32x_info, vextract128_extract,
1134                              EXTRACT_get_vextract128_imm, [HasAVX512]>;
1135defm : vextract_for_mask_cast<"VEXTRACTI64x2Z", v16i32_info, v4i32x_info,
1136                              v2i64x_info, vextract128_extract,
1137                              EXTRACT_get_vextract128_imm, [HasDQI]>;
1138defm : vextract_for_mask_cast<"VEXTRACTI64x2Z", v32i16_info, v8i16x_info,
1139                              v2i64x_info, vextract128_extract,
1140                              EXTRACT_get_vextract128_imm, [HasDQI]>;
1141defm : vextract_for_mask_cast<"VEXTRACTI64x2Z", v64i8_info, v16i8x_info,
1142                              v2i64x_info, vextract128_extract,
1143                              EXTRACT_get_vextract128_imm, [HasDQI]>;
1144
1145defm : vextract_for_mask_cast<"VEXTRACTF32x8Z", v8f64_info, v4f64x_info,
1146                              v8f32x_info, vextract256_extract,
1147                              EXTRACT_get_vextract256_imm, [HasDQI]>;
1148defm : vextract_for_mask_cast<"VEXTRACTF64x4Z", v16f32_info, v8f32x_info,
1149                              v4f64x_info, vextract256_extract,
1150                              EXTRACT_get_vextract256_imm, [HasAVX512]>;
1151
1152defm : vextract_for_mask_cast<"VEXTRACTI32x8Z", v8i64_info, v4i64x_info,
1153                              v8i32x_info, vextract256_extract,
1154                              EXTRACT_get_vextract256_imm, [HasDQI]>;
1155defm : vextract_for_mask_cast<"VEXTRACTI32x8Z", v32i16_info, v16i16x_info,
1156                              v8i32x_info, vextract256_extract,
1157                              EXTRACT_get_vextract256_imm, [HasDQI]>;
1158defm : vextract_for_mask_cast<"VEXTRACTI32x8Z", v64i8_info, v32i8x_info,
1159                              v8i32x_info, vextract256_extract,
1160                              EXTRACT_get_vextract256_imm, [HasDQI]>;
1161defm : vextract_for_mask_cast<"VEXTRACTI64x4Z", v16i32_info, v8i32x_info,
1162                              v4i64x_info, vextract256_extract,
1163                              EXTRACT_get_vextract256_imm, [HasAVX512]>;
1164defm : vextract_for_mask_cast<"VEXTRACTI64x4Z", v32i16_info, v16i16x_info,
1165                              v4i64x_info, vextract256_extract,
1166                              EXTRACT_get_vextract256_imm, [HasAVX512]>;
1167defm : vextract_for_mask_cast<"VEXTRACTI64x4Z", v64i8_info, v32i8x_info,
1168                              v4i64x_info, vextract256_extract,
1169                              EXTRACT_get_vextract256_imm, [HasAVX512]>;
1170
1171// vextractps - extract 32 bits from XMM
1172def VEXTRACTPSZrr : AVX512AIi8<0x17, MRMDestReg, (outs GR32orGR64:$dst),
1173      (ins VR128X:$src1, u8imm:$src2),
1174      "vextractps\t{$src2, $src1, $dst|$dst, $src1, $src2}",
1175      [(set GR32orGR64:$dst, (extractelt (bc_v4i32 (v4f32 VR128X:$src1)), imm:$src2))]>,
1176      EVEX, VEX_WIG, Sched<[WriteVecExtract]>;
1177
1178def VEXTRACTPSZmr : AVX512AIi8<0x17, MRMDestMem, (outs),
1179      (ins f32mem:$dst, VR128X:$src1, u8imm:$src2),
1180      "vextractps\t{$src2, $src1, $dst|$dst, $src1, $src2}",
1181      [(store (extractelt (bc_v4i32 (v4f32 VR128X:$src1)), imm:$src2),
1182                          addr:$dst)]>,
1183      EVEX, VEX_WIG, EVEX_CD8<32, CD8VT1>, Sched<[WriteVecExtractSt]>;
1184
1185//===---------------------------------------------------------------------===//
1186// AVX-512 BROADCAST
1187//---
1188// broadcast with a scalar argument.
1189multiclass avx512_broadcast_scalar<string Name, X86VectorVTInfo DestInfo,
1190                                   X86VectorVTInfo SrcInfo> {
1191  def : Pat<(DestInfo.VT (X86VBroadcast SrcInfo.FRC:$src)),
1192            (!cast<Instruction>(Name#DestInfo.ZSuffix#rr)
1193             (SrcInfo.VT (COPY_TO_REGCLASS SrcInfo.FRC:$src, SrcInfo.RC)))>;
1194  def : Pat<(DestInfo.VT (vselect_mask DestInfo.KRCWM:$mask,
1195                                       (X86VBroadcast SrcInfo.FRC:$src),
1196                                       DestInfo.RC:$src0)),
1197            (!cast<Instruction>(Name#DestInfo.ZSuffix#rrk)
1198             DestInfo.RC:$src0, DestInfo.KRCWM:$mask,
1199             (SrcInfo.VT (COPY_TO_REGCLASS SrcInfo.FRC:$src, SrcInfo.RC)))>;
1200  def : Pat<(DestInfo.VT (vselect_mask DestInfo.KRCWM:$mask,
1201                                       (X86VBroadcast SrcInfo.FRC:$src),
1202                                       DestInfo.ImmAllZerosV)),
1203            (!cast<Instruction>(Name#DestInfo.ZSuffix#rrkz)
1204             DestInfo.KRCWM:$mask, (SrcInfo.VT (COPY_TO_REGCLASS SrcInfo.FRC:$src, SrcInfo.RC)))>;
1205}
1206
1207// Split version to allow mask and broadcast node to be different types. This
1208// helps support the 32x2 broadcasts.
1209multiclass avx512_broadcast_rm_split<bits<8> opc, string OpcodeStr,
1210                                     SchedWrite SchedRR, SchedWrite SchedRM,
1211                                     X86VectorVTInfo MaskInfo,
1212                                     X86VectorVTInfo DestInfo,
1213                                     X86VectorVTInfo SrcInfo,
1214                                     bit IsConvertibleToThreeAddress,
1215                                     SDPatternOperator UnmaskedOp = X86VBroadcast,
1216                                     SDPatternOperator UnmaskedBcastOp = SrcInfo.BroadcastLdFrag> {
1217  let hasSideEffects = 0 in
1218  def rr : AVX512PI<opc, MRMSrcReg, (outs MaskInfo.RC:$dst), (ins SrcInfo.RC:$src),
1219                    !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
1220                    [(set MaskInfo.RC:$dst,
1221                      (MaskInfo.VT
1222                       (bitconvert
1223                        (DestInfo.VT
1224                         (UnmaskedOp (SrcInfo.VT SrcInfo.RC:$src))))))],
1225                    DestInfo.ExeDomain>, T8PD, EVEX, Sched<[SchedRR]>;
1226  def rrkz : AVX512PI<opc, MRMSrcReg, (outs MaskInfo.RC:$dst),
1227                      (ins MaskInfo.KRCWM:$mask, SrcInfo.RC:$src),
1228                      !strconcat(OpcodeStr, "\t{$src, ${dst} {${mask}} {z}|",
1229                       "${dst} {${mask}} {z}, $src}"),
1230                       [(set MaskInfo.RC:$dst,
1231                         (vselect_mask MaskInfo.KRCWM:$mask,
1232                          (MaskInfo.VT
1233                           (bitconvert
1234                            (DestInfo.VT
1235                             (X86VBroadcast (SrcInfo.VT SrcInfo.RC:$src))))),
1236                          MaskInfo.ImmAllZerosV))],
1237                       DestInfo.ExeDomain>, T8PD, EVEX, EVEX_KZ, Sched<[SchedRR]>;
1238  let Constraints = "$src0 = $dst" in
1239  def rrk : AVX512PI<opc, MRMSrcReg, (outs MaskInfo.RC:$dst),
1240                     (ins MaskInfo.RC:$src0, MaskInfo.KRCWM:$mask,
1241                          SrcInfo.RC:$src),
1242                     !strconcat(OpcodeStr, "\t{$src, ${dst} {${mask}}|",
1243                     "${dst} {${mask}}, $src}"),
1244                     [(set MaskInfo.RC:$dst,
1245                       (vselect_mask MaskInfo.KRCWM:$mask,
1246                        (MaskInfo.VT
1247                         (bitconvert
1248                          (DestInfo.VT
1249                           (X86VBroadcast (SrcInfo.VT SrcInfo.RC:$src))))),
1250                        MaskInfo.RC:$src0))],
1251                      DestInfo.ExeDomain>, T8PD, EVEX, EVEX_K, Sched<[SchedRR]>;
1252
1253  let hasSideEffects = 0, mayLoad = 1 in
1254  def rm : AVX512PI<opc, MRMSrcMem, (outs MaskInfo.RC:$dst),
1255                    (ins SrcInfo.ScalarMemOp:$src),
1256                    !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
1257                    [(set MaskInfo.RC:$dst,
1258                      (MaskInfo.VT
1259                       (bitconvert
1260                        (DestInfo.VT
1261                         (UnmaskedBcastOp addr:$src)))))],
1262                    DestInfo.ExeDomain>, T8PD, EVEX,
1263                    EVEX_CD8<SrcInfo.EltSize, CD8VT1>, Sched<[SchedRM]>;
1264
1265  def rmkz : AVX512PI<opc, MRMSrcMem, (outs MaskInfo.RC:$dst),
1266                      (ins MaskInfo.KRCWM:$mask, SrcInfo.ScalarMemOp:$src),
1267                      !strconcat(OpcodeStr, "\t{$src, ${dst} {${mask}} {z}|",
1268                       "${dst} {${mask}} {z}, $src}"),
1269                       [(set MaskInfo.RC:$dst,
1270                         (vselect_mask MaskInfo.KRCWM:$mask,
1271                          (MaskInfo.VT
1272                           (bitconvert
1273                            (DestInfo.VT
1274                             (SrcInfo.BroadcastLdFrag addr:$src)))),
1275                          MaskInfo.ImmAllZerosV))],
1276                       DestInfo.ExeDomain>, T8PD, EVEX, EVEX_KZ,
1277                       EVEX_CD8<SrcInfo.EltSize, CD8VT1>, Sched<[SchedRM]>;
1278
1279  let Constraints = "$src0 = $dst",
1280      isConvertibleToThreeAddress = IsConvertibleToThreeAddress in
1281  def rmk : AVX512PI<opc, MRMSrcMem, (outs MaskInfo.RC:$dst),
1282                     (ins MaskInfo.RC:$src0, MaskInfo.KRCWM:$mask,
1283                          SrcInfo.ScalarMemOp:$src),
1284                     !strconcat(OpcodeStr, "\t{$src, ${dst} {${mask}}|",
1285                     "${dst} {${mask}}, $src}"),
1286                     [(set MaskInfo.RC:$dst,
1287                       (vselect_mask MaskInfo.KRCWM:$mask,
1288                        (MaskInfo.VT
1289                         (bitconvert
1290                          (DestInfo.VT
1291                           (SrcInfo.BroadcastLdFrag addr:$src)))),
1292                        MaskInfo.RC:$src0))],
1293                      DestInfo.ExeDomain>, T8PD, EVEX, EVEX_K,
1294                      EVEX_CD8<SrcInfo.EltSize, CD8VT1>, Sched<[SchedRM]>;
1295}
1296
1297// Helper class to force mask and broadcast result to same type.
1298multiclass avx512_broadcast_rm<bits<8> opc, string OpcodeStr,
1299                               SchedWrite SchedRR, SchedWrite SchedRM,
1300                               X86VectorVTInfo DestInfo,
1301                               X86VectorVTInfo SrcInfo,
1302                               bit IsConvertibleToThreeAddress> :
1303  avx512_broadcast_rm_split<opc, OpcodeStr, SchedRR, SchedRM,
1304                            DestInfo, DestInfo, SrcInfo,
1305                            IsConvertibleToThreeAddress>;
1306
1307multiclass avx512_fp_broadcast_sd<bits<8> opc, string OpcodeStr,
1308                                  AVX512VLVectorVTInfo _> {
1309  let Predicates = [HasAVX512] in {
1310    defm Z  : avx512_broadcast_rm<opc, OpcodeStr, WriteFShuffle256,
1311                                  WriteFShuffle256Ld, _.info512, _.info128, 1>,
1312              avx512_broadcast_scalar<NAME, _.info512, _.info128>,
1313              EVEX_V512;
1314  }
1315
1316  let Predicates = [HasVLX] in {
1317    defm Z256  : avx512_broadcast_rm<opc, OpcodeStr, WriteFShuffle256,
1318                                     WriteFShuffle256Ld, _.info256, _.info128, 1>,
1319                 avx512_broadcast_scalar<NAME, _.info256, _.info128>,
1320                 EVEX_V256;
1321  }
1322}
1323
1324multiclass avx512_fp_broadcast_ss<bits<8> opc, string OpcodeStr,
1325                                  AVX512VLVectorVTInfo _> {
1326  let Predicates = [HasAVX512] in {
1327    defm Z  : avx512_broadcast_rm<opc, OpcodeStr, WriteFShuffle256,
1328                                  WriteFShuffle256Ld, _.info512, _.info128, 1>,
1329              avx512_broadcast_scalar<NAME, _.info512, _.info128>,
1330              EVEX_V512;
1331  }
1332
1333  let Predicates = [HasVLX] in {
1334    defm Z256  : avx512_broadcast_rm<opc, OpcodeStr, WriteFShuffle256,
1335                                     WriteFShuffle256Ld, _.info256, _.info128, 1>,
1336                 avx512_broadcast_scalar<NAME, _.info256, _.info128>,
1337                 EVEX_V256;
1338    defm Z128  : avx512_broadcast_rm<opc, OpcodeStr, WriteFShuffle256,
1339                                     WriteFShuffle256Ld, _.info128, _.info128, 1>,
1340                 avx512_broadcast_scalar<NAME, _.info128, _.info128>,
1341                 EVEX_V128;
1342  }
1343}
1344defm VBROADCASTSS  : avx512_fp_broadcast_ss<0x18, "vbroadcastss",
1345                                       avx512vl_f32_info>;
1346defm VBROADCASTSD  : avx512_fp_broadcast_sd<0x19, "vbroadcastsd",
1347                                       avx512vl_f64_info>, VEX_W1X;
1348
1349multiclass avx512_int_broadcast_reg<bits<8> opc, SchedWrite SchedRR,
1350                                    X86VectorVTInfo _, SDPatternOperator OpNode,
1351                                    RegisterClass SrcRC> {
1352  // Fold with a mask even if it has multiple uses since it is cheap.
1353  let ExeDomain = _.ExeDomain in
1354  defm rr : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
1355                          (ins SrcRC:$src),
1356                          "vpbroadcast"#_.Suffix, "$src", "$src",
1357                          (_.VT (OpNode SrcRC:$src)), /*IsCommutable*/0,
1358                          /*IsKCommutable*/0, /*IsKZCommutable*/0, vselect>,
1359                          T8PD, EVEX, Sched<[SchedRR]>;
1360}
1361
1362multiclass avx512_int_broadcastbw_reg<bits<8> opc, string Name, SchedWrite SchedRR,
1363                                    X86VectorVTInfo _, SDPatternOperator OpNode,
1364                                    RegisterClass SrcRC, SubRegIndex Subreg> {
1365  let hasSideEffects = 0, ExeDomain = _.ExeDomain in
1366  defm rr : AVX512_maskable_custom<opc, MRMSrcReg,
1367                         (outs _.RC:$dst), (ins GR32:$src),
1368                         !con((ins _.RC:$src0, _.KRCWM:$mask), (ins GR32:$src)),
1369                         !con((ins _.KRCWM:$mask), (ins GR32:$src)),
1370                         "vpbroadcast"#_.Suffix, "$src", "$src", [], [], [],
1371                         "$src0 = $dst">, T8PD, EVEX, Sched<[SchedRR]>;
1372
1373  def : Pat <(_.VT (OpNode SrcRC:$src)),
1374             (!cast<Instruction>(Name#rr)
1375              (i32 (INSERT_SUBREG (i32 (IMPLICIT_DEF)), SrcRC:$src, Subreg)))>;
1376
1377  // Fold with a mask even if it has multiple uses since it is cheap.
1378  def : Pat <(vselect _.KRCWM:$mask, (_.VT (OpNode SrcRC:$src)), _.RC:$src0),
1379             (!cast<Instruction>(Name#rrk) _.RC:$src0, _.KRCWM:$mask,
1380              (i32 (INSERT_SUBREG (i32 (IMPLICIT_DEF)), SrcRC:$src, Subreg)))>;
1381
1382  def : Pat <(vselect _.KRCWM:$mask, (_.VT (OpNode SrcRC:$src)), _.ImmAllZerosV),
1383             (!cast<Instruction>(Name#rrkz) _.KRCWM:$mask,
1384              (i32 (INSERT_SUBREG (i32 (IMPLICIT_DEF)), SrcRC:$src, Subreg)))>;
1385}
1386
1387multiclass avx512_int_broadcastbw_reg_vl<bits<8> opc, string Name,
1388                      AVX512VLVectorVTInfo _, SDPatternOperator OpNode,
1389                      RegisterClass SrcRC, SubRegIndex Subreg, Predicate prd> {
1390  let Predicates = [prd] in
1391    defm Z : avx512_int_broadcastbw_reg<opc, Name#Z, WriteShuffle256, _.info512,
1392              OpNode, SrcRC, Subreg>, EVEX_V512;
1393  let Predicates = [prd, HasVLX] in {
1394    defm Z256 : avx512_int_broadcastbw_reg<opc, Name#Z256, WriteShuffle256,
1395              _.info256, OpNode, SrcRC, Subreg>, EVEX_V256;
1396    defm Z128 : avx512_int_broadcastbw_reg<opc, Name#Z128, WriteShuffle,
1397              _.info128, OpNode, SrcRC, Subreg>, EVEX_V128;
1398  }
1399}
1400
1401multiclass avx512_int_broadcast_reg_vl<bits<8> opc, AVX512VLVectorVTInfo _,
1402                                       SDPatternOperator OpNode,
1403                                       RegisterClass SrcRC, Predicate prd> {
1404  let Predicates = [prd] in
1405    defm Z : avx512_int_broadcast_reg<opc, WriteShuffle256, _.info512, OpNode,
1406                                      SrcRC>, EVEX_V512;
1407  let Predicates = [prd, HasVLX] in {
1408    defm Z256 : avx512_int_broadcast_reg<opc, WriteShuffle256, _.info256, OpNode,
1409                                         SrcRC>, EVEX_V256;
1410    defm Z128 : avx512_int_broadcast_reg<opc, WriteShuffle, _.info128, OpNode,
1411                                         SrcRC>, EVEX_V128;
1412  }
1413}
1414
1415defm VPBROADCASTBr : avx512_int_broadcastbw_reg_vl<0x7A, "VPBROADCASTBr",
1416                       avx512vl_i8_info, X86VBroadcast, GR8, sub_8bit, HasBWI>;
1417defm VPBROADCASTWr : avx512_int_broadcastbw_reg_vl<0x7B, "VPBROADCASTWr",
1418                       avx512vl_i16_info, X86VBroadcast, GR16, sub_16bit,
1419                       HasBWI>;
1420defm VPBROADCASTDr : avx512_int_broadcast_reg_vl<0x7C, avx512vl_i32_info,
1421                                                 X86VBroadcast, GR32, HasAVX512>;
1422defm VPBROADCASTQr : avx512_int_broadcast_reg_vl<0x7C, avx512vl_i64_info,
1423                                                 X86VBroadcast, GR64, HasAVX512>, VEX_W;
1424
1425multiclass avx512_int_broadcast_rm_vl<bits<8> opc, string OpcodeStr,
1426                                      AVX512VLVectorVTInfo _, Predicate prd,
1427                                      bit IsConvertibleToThreeAddress> {
1428  let Predicates = [prd] in {
1429    defm Z :   avx512_broadcast_rm<opc, OpcodeStr, WriteShuffle256,
1430                                   WriteShuffle256Ld, _.info512, _.info128,
1431                                   IsConvertibleToThreeAddress>,
1432                                  EVEX_V512;
1433  }
1434  let Predicates = [prd, HasVLX] in {
1435    defm Z256 : avx512_broadcast_rm<opc, OpcodeStr, WriteShuffle256,
1436                                    WriteShuffle256Ld, _.info256, _.info128,
1437                                    IsConvertibleToThreeAddress>,
1438                                 EVEX_V256;
1439    defm Z128 : avx512_broadcast_rm<opc, OpcodeStr, WriteShuffle,
1440                                    WriteShuffleXLd, _.info128, _.info128,
1441                                    IsConvertibleToThreeAddress>,
1442                                 EVEX_V128;
1443  }
1444}
1445
1446defm VPBROADCASTB  : avx512_int_broadcast_rm_vl<0x78, "vpbroadcastb",
1447                                           avx512vl_i8_info, HasBWI, 0>;
1448defm VPBROADCASTW  : avx512_int_broadcast_rm_vl<0x79, "vpbroadcastw",
1449                                           avx512vl_i16_info, HasBWI, 0>;
1450defm VPBROADCASTD  : avx512_int_broadcast_rm_vl<0x58, "vpbroadcastd",
1451                                           avx512vl_i32_info, HasAVX512, 1>;
1452defm VPBROADCASTQ  : avx512_int_broadcast_rm_vl<0x59, "vpbroadcastq",
1453                                           avx512vl_i64_info, HasAVX512, 1>, VEX_W1X;
1454
1455multiclass avx512_subvec_broadcast_rm<bits<8> opc, string OpcodeStr,
1456                                      SDPatternOperator OpNode,
1457                                      X86VectorVTInfo _Dst,
1458                                      X86VectorVTInfo _Src> {
1459  defm rm : AVX512_maskable<opc, MRMSrcMem, _Dst, (outs _Dst.RC:$dst),
1460                           (ins _Src.MemOp:$src), OpcodeStr, "$src", "$src",
1461                           (_Dst.VT (OpNode addr:$src))>,
1462                           Sched<[SchedWriteShuffle.YMM.Folded]>,
1463                           AVX5128IBase, EVEX;
1464}
1465
1466// This should be used for the AVX512DQ broadcast instructions. It disables
1467// the unmasked patterns so that we only use the DQ instructions when masking
1468//  is requested.
1469multiclass avx512_subvec_broadcast_rm_dq<bits<8> opc, string OpcodeStr,
1470                                         SDPatternOperator OpNode,
1471                                         X86VectorVTInfo _Dst,
1472                                         X86VectorVTInfo _Src> {
1473  let hasSideEffects = 0, mayLoad = 1 in
1474  defm rm : AVX512_maskable_split<opc, MRMSrcMem, _Dst, (outs _Dst.RC:$dst),
1475                           (ins _Src.MemOp:$src), OpcodeStr, "$src", "$src",
1476                           (null_frag),
1477                           (_Dst.VT (OpNode addr:$src))>,
1478                           Sched<[SchedWriteShuffle.YMM.Folded]>,
1479                           AVX5128IBase, EVEX;
1480}
1481let Predicates = [HasFP16] in {
1482  def : Pat<(v32f16 (X86VBroadcastld16 addr:$src)),
1483            (VPBROADCASTWZrm addr:$src)>;
1484
1485  def : Pat<(v32f16 (X86VBroadcast (v8f16 VR128X:$src))),
1486            (VPBROADCASTWZrr VR128X:$src)>;
1487  def : Pat<(v32f16 (X86VBroadcast (f16 FR16X:$src))),
1488            (VPBROADCASTWZrr (COPY_TO_REGCLASS FR16X:$src, VR128X))>;
1489}
1490let Predicates = [HasVLX, HasFP16] in {
1491  def : Pat<(v8f16 (X86VBroadcastld16 addr:$src)),
1492            (VPBROADCASTWZ128rm addr:$src)>;
1493  def : Pat<(v16f16 (X86VBroadcastld16 addr:$src)),
1494            (VPBROADCASTWZ256rm addr:$src)>;
1495
1496  def : Pat<(v8f16 (X86VBroadcast (v8f16 VR128X:$src))),
1497            (VPBROADCASTWZ128rr VR128X:$src)>;
1498  def : Pat<(v16f16 (X86VBroadcast (v8f16 VR128X:$src))),
1499            (VPBROADCASTWZ256rr VR128X:$src)>;
1500
1501  def : Pat<(v8f16 (X86VBroadcast (f16 FR16X:$src))),
1502            (VPBROADCASTWZ128rr (COPY_TO_REGCLASS FR16X:$src, VR128X))>;
1503  def : Pat<(v16f16 (X86VBroadcast (f16 FR16X:$src))),
1504            (VPBROADCASTWZ256rr (COPY_TO_REGCLASS FR16X:$src, VR128X))>;
1505}
1506
1507//===----------------------------------------------------------------------===//
1508// AVX-512 BROADCAST SUBVECTORS
1509//
1510
1511defm VBROADCASTI32X4 : avx512_subvec_broadcast_rm<0x5a, "vbroadcasti32x4",
1512                       X86SubVBroadcastld128, v16i32_info, v4i32x_info>,
1513                       EVEX_V512, EVEX_CD8<32, CD8VT4>;
1514defm VBROADCASTF32X4 : avx512_subvec_broadcast_rm<0x1a, "vbroadcastf32x4",
1515                       X86SubVBroadcastld128, v16f32_info, v4f32x_info>,
1516                       EVEX_V512, EVEX_CD8<32, CD8VT4>;
1517defm VBROADCASTI64X4 : avx512_subvec_broadcast_rm<0x5b, "vbroadcasti64x4",
1518                       X86SubVBroadcastld256, v8i64_info, v4i64x_info>, VEX_W,
1519                       EVEX_V512, EVEX_CD8<64, CD8VT4>;
1520defm VBROADCASTF64X4 : avx512_subvec_broadcast_rm<0x1b, "vbroadcastf64x4",
1521                       X86SubVBroadcastld256, v8f64_info, v4f64x_info>, VEX_W,
1522                       EVEX_V512, EVEX_CD8<64, CD8VT4>;
1523
1524let Predicates = [HasAVX512] in {
1525def : Pat<(v8f64 (X86SubVBroadcastld256 addr:$src)),
1526          (VBROADCASTF64X4rm addr:$src)>;
1527def : Pat<(v16f32 (X86SubVBroadcastld256 addr:$src)),
1528          (VBROADCASTF64X4rm addr:$src)>;
1529def : Pat<(v32f16 (X86SubVBroadcastld256 addr:$src)),
1530          (VBROADCASTF64X4rm addr:$src)>;
1531def : Pat<(v8i64 (X86SubVBroadcastld256 addr:$src)),
1532          (VBROADCASTI64X4rm addr:$src)>;
1533def : Pat<(v16i32 (X86SubVBroadcastld256 addr:$src)),
1534          (VBROADCASTI64X4rm addr:$src)>;
1535def : Pat<(v32i16 (X86SubVBroadcastld256 addr:$src)),
1536          (VBROADCASTI64X4rm addr:$src)>;
1537def : Pat<(v64i8 (X86SubVBroadcastld256 addr:$src)),
1538          (VBROADCASTI64X4rm addr:$src)>;
1539
1540def : Pat<(v8f64 (X86SubVBroadcastld128 addr:$src)),
1541          (VBROADCASTF32X4rm addr:$src)>;
1542def : Pat<(v16f32 (X86SubVBroadcastld128 addr:$src)),
1543          (VBROADCASTF32X4rm addr:$src)>;
1544def : Pat<(v32f16 (X86SubVBroadcastld128 addr:$src)),
1545          (VBROADCASTF32X4rm addr:$src)>;
1546def : Pat<(v8i64 (X86SubVBroadcastld128 addr:$src)),
1547          (VBROADCASTI32X4rm addr:$src)>;
1548def : Pat<(v16i32 (X86SubVBroadcastld128 addr:$src)),
1549          (VBROADCASTI32X4rm addr:$src)>;
1550def : Pat<(v32i16 (X86SubVBroadcastld128 addr:$src)),
1551          (VBROADCASTI32X4rm addr:$src)>;
1552def : Pat<(v64i8 (X86SubVBroadcastld128 addr:$src)),
1553          (VBROADCASTI32X4rm addr:$src)>;
1554
1555// Patterns for selects of bitcasted operations.
1556def : Pat<(vselect_mask VK16WM:$mask,
1557                        (bc_v16f32 (v8f64 (X86SubVBroadcastld128 addr:$src))),
1558                        (v16f32 immAllZerosV)),
1559          (VBROADCASTF32X4rmkz VK16WM:$mask, addr:$src)>;
1560def : Pat<(vselect_mask VK16WM:$mask,
1561                        (bc_v16f32 (v8f64 (X86SubVBroadcastld128 addr:$src))),
1562                        VR512:$src0),
1563          (VBROADCASTF32X4rmk VR512:$src0, VK16WM:$mask, addr:$src)>;
1564def : Pat<(vselect_mask VK16WM:$mask,
1565                        (bc_v16i32 (v8i64 (X86SubVBroadcastld128 addr:$src))),
1566                        (v16i32 immAllZerosV)),
1567          (VBROADCASTI32X4rmkz VK16WM:$mask, addr:$src)>;
1568def : Pat<(vselect_mask VK16WM:$mask,
1569                        (bc_v16i32 (v8i64 (X86SubVBroadcastld128 addr:$src))),
1570                        VR512:$src0),
1571          (VBROADCASTI32X4rmk VR512:$src0, VK16WM:$mask, addr:$src)>;
1572
1573def : Pat<(vselect_mask VK8WM:$mask,
1574                        (bc_v8f64 (v16f32 (X86SubVBroadcastld256 addr:$src))),
1575                        (v8f64 immAllZerosV)),
1576          (VBROADCASTF64X4rmkz VK8WM:$mask, addr:$src)>;
1577def : Pat<(vselect_mask VK8WM:$mask,
1578                        (bc_v8f64 (v16f32 (X86SubVBroadcastld256 addr:$src))),
1579                        VR512:$src0),
1580          (VBROADCASTF64X4rmk VR512:$src0, VK8WM:$mask, addr:$src)>;
1581def : Pat<(vselect_mask VK8WM:$mask,
1582                        (bc_v8i64 (v16i32 (X86SubVBroadcastld256 addr:$src))),
1583                        (v8i64 immAllZerosV)),
1584          (VBROADCASTI64X4rmkz VK8WM:$mask, addr:$src)>;
1585def : Pat<(vselect_mask VK8WM:$mask,
1586                        (bc_v8i64 (v16i32 (X86SubVBroadcastld256 addr:$src))),
1587                        VR512:$src0),
1588          (VBROADCASTI64X4rmk VR512:$src0, VK8WM:$mask, addr:$src)>;
1589}
1590
1591let Predicates = [HasVLX] in {
1592defm VBROADCASTI32X4Z256 : avx512_subvec_broadcast_rm<0x5a, "vbroadcasti32x4",
1593                           X86SubVBroadcastld128, v8i32x_info, v4i32x_info>,
1594                           EVEX_V256, EVEX_CD8<32, CD8VT4>;
1595defm VBROADCASTF32X4Z256 : avx512_subvec_broadcast_rm<0x1a, "vbroadcastf32x4",
1596                           X86SubVBroadcastld128, v8f32x_info, v4f32x_info>,
1597                           EVEX_V256, EVEX_CD8<32, CD8VT4>;
1598
1599def : Pat<(v4f64 (X86SubVBroadcastld128 addr:$src)),
1600          (VBROADCASTF32X4Z256rm addr:$src)>;
1601def : Pat<(v8f32 (X86SubVBroadcastld128 addr:$src)),
1602          (VBROADCASTF32X4Z256rm addr:$src)>;
1603def : Pat<(v16f16 (X86SubVBroadcastld128 addr:$src)),
1604          (VBROADCASTF32X4Z256rm addr:$src)>;
1605def : Pat<(v4i64 (X86SubVBroadcastld128 addr:$src)),
1606          (VBROADCASTI32X4Z256rm addr:$src)>;
1607def : Pat<(v8i32 (X86SubVBroadcastld128 addr:$src)),
1608          (VBROADCASTI32X4Z256rm addr:$src)>;
1609def : Pat<(v16i16 (X86SubVBroadcastld128 addr:$src)),
1610          (VBROADCASTI32X4Z256rm addr:$src)>;
1611def : Pat<(v32i8 (X86SubVBroadcastld128 addr:$src)),
1612          (VBROADCASTI32X4Z256rm addr:$src)>;
1613
1614// Patterns for selects of bitcasted operations.
1615def : Pat<(vselect_mask VK8WM:$mask,
1616                        (bc_v8f32 (v4f64 (X86SubVBroadcastld128 addr:$src))),
1617                        (v8f32 immAllZerosV)),
1618          (VBROADCASTF32X4Z256rmkz VK8WM:$mask, addr:$src)>;
1619def : Pat<(vselect_mask VK8WM:$mask,
1620                        (bc_v8f32 (v4f64 (X86SubVBroadcastld128 addr:$src))),
1621                        VR256X:$src0),
1622          (VBROADCASTF32X4Z256rmk VR256X:$src0, VK8WM:$mask, addr:$src)>;
1623def : Pat<(vselect_mask VK8WM:$mask,
1624                        (bc_v8i32 (v4i64 (X86SubVBroadcastld128 addr:$src))),
1625                        (v8i32 immAllZerosV)),
1626          (VBROADCASTI32X4Z256rmkz VK8WM:$mask, addr:$src)>;
1627def : Pat<(vselect_mask VK8WM:$mask,
1628                        (bc_v8i32 (v4i64 (X86SubVBroadcastld128 addr:$src))),
1629                        VR256X:$src0),
1630          (VBROADCASTI32X4Z256rmk VR256X:$src0, VK8WM:$mask, addr:$src)>;
1631}
1632
1633let Predicates = [HasVLX, HasDQI] in {
1634defm VBROADCASTI64X2Z128 : avx512_subvec_broadcast_rm_dq<0x5a, "vbroadcasti64x2",
1635                           X86SubVBroadcastld128, v4i64x_info, v2i64x_info>, VEX_W1X,
1636                           EVEX_V256, EVEX_CD8<64, CD8VT2>;
1637defm VBROADCASTF64X2Z128 : avx512_subvec_broadcast_rm_dq<0x1a, "vbroadcastf64x2",
1638                           X86SubVBroadcastld128, v4f64x_info, v2f64x_info>, VEX_W1X,
1639                           EVEX_V256, EVEX_CD8<64, CD8VT2>;
1640
1641// Patterns for selects of bitcasted operations.
1642def : Pat<(vselect_mask VK4WM:$mask,
1643                        (bc_v4f64 (v8f32 (X86SubVBroadcastld128 addr:$src))),
1644                        (v4f64 immAllZerosV)),
1645          (VBROADCASTF64X2Z128rmkz VK4WM:$mask, addr:$src)>;
1646def : Pat<(vselect_mask VK4WM:$mask,
1647                        (bc_v4f64 (v8f32 (X86SubVBroadcastld128 addr:$src))),
1648                        VR256X:$src0),
1649          (VBROADCASTF64X2Z128rmk VR256X:$src0, VK4WM:$mask, addr:$src)>;
1650def : Pat<(vselect_mask VK4WM:$mask,
1651                        (bc_v4i64 (v8i32 (X86SubVBroadcastld128 addr:$src))),
1652                        (v4i64 immAllZerosV)),
1653          (VBROADCASTI64X2Z128rmkz VK4WM:$mask, addr:$src)>;
1654def : Pat<(vselect_mask VK4WM:$mask,
1655                        (bc_v4i64 (v8i32 (X86SubVBroadcastld128 addr:$src))),
1656                        VR256X:$src0),
1657          (VBROADCASTI64X2Z128rmk VR256X:$src0, VK4WM:$mask, addr:$src)>;
1658}
1659
1660let Predicates = [HasDQI] in {
1661defm VBROADCASTI64X2 : avx512_subvec_broadcast_rm_dq<0x5a, "vbroadcasti64x2",
1662                       X86SubVBroadcastld128, v8i64_info, v2i64x_info>, VEX_W,
1663                       EVEX_V512, EVEX_CD8<64, CD8VT2>;
1664defm VBROADCASTI32X8 : avx512_subvec_broadcast_rm_dq<0x5b, "vbroadcasti32x8",
1665                       X86SubVBroadcastld256, v16i32_info, v8i32x_info>,
1666                       EVEX_V512, EVEX_CD8<32, CD8VT8>;
1667defm VBROADCASTF64X2 : avx512_subvec_broadcast_rm_dq<0x1a, "vbroadcastf64x2",
1668                       X86SubVBroadcastld128, v8f64_info, v2f64x_info>, VEX_W,
1669                       EVEX_V512, EVEX_CD8<64, CD8VT2>;
1670defm VBROADCASTF32X8 : avx512_subvec_broadcast_rm_dq<0x1b, "vbroadcastf32x8",
1671                       X86SubVBroadcastld256, v16f32_info, v8f32x_info>,
1672                       EVEX_V512, EVEX_CD8<32, CD8VT8>;
1673
1674// Patterns for selects of bitcasted operations.
1675def : Pat<(vselect_mask VK16WM:$mask,
1676                        (bc_v16f32 (v8f64 (X86SubVBroadcastld256 addr:$src))),
1677                        (v16f32 immAllZerosV)),
1678          (VBROADCASTF32X8rmkz VK16WM:$mask, addr:$src)>;
1679def : Pat<(vselect_mask VK16WM:$mask,
1680                        (bc_v16f32 (v8f64 (X86SubVBroadcastld256 addr:$src))),
1681                        VR512:$src0),
1682          (VBROADCASTF32X8rmk VR512:$src0, VK16WM:$mask, addr:$src)>;
1683def : Pat<(vselect_mask VK16WM:$mask,
1684                        (bc_v16i32 (v8i64 (X86SubVBroadcastld256 addr:$src))),
1685                        (v16i32 immAllZerosV)),
1686          (VBROADCASTI32X8rmkz VK16WM:$mask, addr:$src)>;
1687def : Pat<(vselect_mask VK16WM:$mask,
1688                        (bc_v16i32 (v8i64 (X86SubVBroadcastld256 addr:$src))),
1689                        VR512:$src0),
1690          (VBROADCASTI32X8rmk VR512:$src0, VK16WM:$mask, addr:$src)>;
1691
1692def : Pat<(vselect_mask VK8WM:$mask,
1693                        (bc_v8f64 (v16f32 (X86SubVBroadcastld128 addr:$src))),
1694                        (v8f64 immAllZerosV)),
1695          (VBROADCASTF64X2rmkz VK8WM:$mask, addr:$src)>;
1696def : Pat<(vselect_mask VK8WM:$mask,
1697                        (bc_v8f64 (v16f32 (X86SubVBroadcastld128 addr:$src))),
1698                        VR512:$src0),
1699          (VBROADCASTF64X2rmk VR512:$src0, VK8WM:$mask, addr:$src)>;
1700def : Pat<(vselect_mask VK8WM:$mask,
1701                        (bc_v8i64 (v16i32 (X86SubVBroadcastld128 addr:$src))),
1702                        (v8i64 immAllZerosV)),
1703          (VBROADCASTI64X2rmkz VK8WM:$mask, addr:$src)>;
1704def : Pat<(vselect_mask VK8WM:$mask,
1705                        (bc_v8i64 (v16i32 (X86SubVBroadcastld128 addr:$src))),
1706                        VR512:$src0),
1707          (VBROADCASTI64X2rmk VR512:$src0, VK8WM:$mask, addr:$src)>;
1708}
1709
1710multiclass avx512_common_broadcast_32x2<bits<8> opc, string OpcodeStr,
1711                                        AVX512VLVectorVTInfo _Dst,
1712                                        AVX512VLVectorVTInfo _Src> {
1713  let Predicates = [HasDQI] in
1714    defm Z :    avx512_broadcast_rm_split<opc, OpcodeStr, WriteShuffle256,
1715                                          WriteShuffle256Ld, _Dst.info512,
1716                                          _Src.info512, _Src.info128, 0, null_frag, null_frag>,
1717                                          EVEX_V512;
1718  let Predicates = [HasDQI, HasVLX] in
1719    defm Z256 : avx512_broadcast_rm_split<opc, OpcodeStr, WriteShuffle256,
1720                                          WriteShuffle256Ld, _Dst.info256,
1721                                          _Src.info256, _Src.info128, 0, null_frag, null_frag>,
1722                                          EVEX_V256;
1723}
1724
1725multiclass avx512_common_broadcast_i32x2<bits<8> opc, string OpcodeStr,
1726                                         AVX512VLVectorVTInfo _Dst,
1727                                         AVX512VLVectorVTInfo _Src> :
1728  avx512_common_broadcast_32x2<opc, OpcodeStr, _Dst, _Src> {
1729
1730  let Predicates = [HasDQI, HasVLX] in
1731    defm Z128 : avx512_broadcast_rm_split<opc, OpcodeStr, WriteShuffle,
1732                                          WriteShuffleXLd, _Dst.info128,
1733                                          _Src.info128, _Src.info128, 0, null_frag, null_frag>,
1734                                          EVEX_V128;
1735}
1736
1737defm VBROADCASTI32X2  : avx512_common_broadcast_i32x2<0x59, "vbroadcasti32x2",
1738                                          avx512vl_i32_info, avx512vl_i64_info>;
1739defm VBROADCASTF32X2  : avx512_common_broadcast_32x2<0x19, "vbroadcastf32x2",
1740                                          avx512vl_f32_info, avx512vl_f64_info>;
1741
1742//===----------------------------------------------------------------------===//
1743// AVX-512 BROADCAST MASK TO VECTOR REGISTER
1744//---
1745multiclass avx512_mask_broadcastm<bits<8> opc, string OpcodeStr,
1746                                  X86VectorVTInfo _, RegisterClass KRC> {
1747  def rr : AVX512XS8I<opc, MRMSrcReg, (outs _.RC:$dst), (ins KRC:$src),
1748                  !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
1749                  [(set _.RC:$dst, (_.VT (X86VBroadcastm KRC:$src)))]>,
1750                  EVEX, Sched<[WriteShuffle]>;
1751}
1752
1753multiclass avx512_mask_broadcast<bits<8> opc, string OpcodeStr,
1754                                 AVX512VLVectorVTInfo VTInfo, RegisterClass KRC> {
1755  let Predicates = [HasCDI] in
1756    defm Z : avx512_mask_broadcastm<opc, OpcodeStr, VTInfo.info512, KRC>, EVEX_V512;
1757  let Predicates = [HasCDI, HasVLX] in {
1758    defm Z256 : avx512_mask_broadcastm<opc, OpcodeStr, VTInfo.info256, KRC>, EVEX_V256;
1759    defm Z128 : avx512_mask_broadcastm<opc, OpcodeStr, VTInfo.info128, KRC>, EVEX_V128;
1760  }
1761}
1762
1763defm VPBROADCASTMW2D : avx512_mask_broadcast<0x3A, "vpbroadcastmw2d",
1764                                               avx512vl_i32_info, VK16>;
1765defm VPBROADCASTMB2Q : avx512_mask_broadcast<0x2A, "vpbroadcastmb2q",
1766                                               avx512vl_i64_info, VK8>, VEX_W;
1767
1768//===----------------------------------------------------------------------===//
1769// -- VPERMI2 - 3 source operands form --
1770multiclass avx512_perm_i<bits<8> opc, string OpcodeStr,
1771                         X86FoldableSchedWrite sched,
1772                         X86VectorVTInfo _, X86VectorVTInfo IdxVT> {
1773let Constraints = "$src1 = $dst", ExeDomain = _.ExeDomain,
1774    hasSideEffects = 0 in {
1775  defm rr: AVX512_maskable_3src_cast<opc, MRMSrcReg, _, IdxVT, (outs _.RC:$dst),
1776          (ins _.RC:$src2, _.RC:$src3),
1777          OpcodeStr, "$src3, $src2", "$src2, $src3",
1778          (_.VT (X86VPermt2 _.RC:$src2, IdxVT.RC:$src1, _.RC:$src3)), 1>,
1779          EVEX_4V, AVX5128IBase, Sched<[sched]>;
1780
1781  let mayLoad = 1 in
1782  defm rm: AVX512_maskable_3src_cast<opc, MRMSrcMem, _, IdxVT, (outs _.RC:$dst),
1783            (ins _.RC:$src2, _.MemOp:$src3),
1784            OpcodeStr, "$src3, $src2", "$src2, $src3",
1785            (_.VT (X86VPermt2 _.RC:$src2, IdxVT.RC:$src1,
1786                   (_.VT (_.LdFrag addr:$src3)))), 1>,
1787            EVEX_4V, AVX5128IBase, Sched<[sched.Folded, sched.ReadAfterFold]>;
1788  }
1789}
1790
1791multiclass avx512_perm_i_mb<bits<8> opc, string OpcodeStr,
1792                            X86FoldableSchedWrite sched,
1793                            X86VectorVTInfo _, X86VectorVTInfo IdxVT> {
1794  let Constraints = "$src1 = $dst", ExeDomain = _.ExeDomain,
1795      hasSideEffects = 0, mayLoad = 1 in
1796  defm rmb: AVX512_maskable_3src_cast<opc, MRMSrcMem, _, IdxVT, (outs _.RC:$dst),
1797              (ins _.RC:$src2, _.ScalarMemOp:$src3),
1798              OpcodeStr,   !strconcat("${src3}", _.BroadcastStr,", $src2"),
1799              !strconcat("$src2, ${src3}", _.BroadcastStr ),
1800              (_.VT (X86VPermt2 _.RC:$src2,
1801               IdxVT.RC:$src1,(_.VT (_.BroadcastLdFrag addr:$src3)))), 1>,
1802              AVX5128IBase, EVEX_4V, EVEX_B,
1803              Sched<[sched.Folded, sched.ReadAfterFold]>;
1804}
1805
1806multiclass avx512_perm_i_sizes<bits<8> opc, string OpcodeStr,
1807                               X86FoldableSchedWrite sched,
1808                               AVX512VLVectorVTInfo VTInfo,
1809                               AVX512VLVectorVTInfo ShuffleMask> {
1810  defm NAME: avx512_perm_i<opc, OpcodeStr, sched, VTInfo.info512,
1811                           ShuffleMask.info512>,
1812            avx512_perm_i_mb<opc, OpcodeStr, sched, VTInfo.info512,
1813                             ShuffleMask.info512>, EVEX_V512;
1814  let Predicates = [HasVLX] in {
1815  defm NAME#128: avx512_perm_i<opc, OpcodeStr, sched, VTInfo.info128,
1816                               ShuffleMask.info128>,
1817                 avx512_perm_i_mb<opc, OpcodeStr, sched, VTInfo.info128,
1818                                  ShuffleMask.info128>, EVEX_V128;
1819  defm NAME#256: avx512_perm_i<opc, OpcodeStr, sched, VTInfo.info256,
1820                               ShuffleMask.info256>,
1821                 avx512_perm_i_mb<opc, OpcodeStr, sched, VTInfo.info256,
1822                                  ShuffleMask.info256>, EVEX_V256;
1823  }
1824}
1825
1826multiclass avx512_perm_i_sizes_bw<bits<8> opc, string OpcodeStr,
1827                                  X86FoldableSchedWrite sched,
1828                                  AVX512VLVectorVTInfo VTInfo,
1829                                  AVX512VLVectorVTInfo Idx,
1830                                  Predicate Prd> {
1831  let Predicates = [Prd] in
1832  defm NAME: avx512_perm_i<opc, OpcodeStr, sched, VTInfo.info512,
1833                           Idx.info512>, EVEX_V512;
1834  let Predicates = [Prd, HasVLX] in {
1835  defm NAME#128: avx512_perm_i<opc, OpcodeStr, sched, VTInfo.info128,
1836                               Idx.info128>, EVEX_V128;
1837  defm NAME#256: avx512_perm_i<opc, OpcodeStr, sched, VTInfo.info256,
1838                               Idx.info256>,  EVEX_V256;
1839  }
1840}
1841
1842defm VPERMI2D  : avx512_perm_i_sizes<0x76, "vpermi2d", WriteVarShuffle256,
1843                  avx512vl_i32_info, avx512vl_i32_info>, EVEX_CD8<32, CD8VF>;
1844defm VPERMI2Q  : avx512_perm_i_sizes<0x76, "vpermi2q", WriteVarShuffle256,
1845                  avx512vl_i64_info, avx512vl_i64_info>, VEX_W, EVEX_CD8<64, CD8VF>;
1846defm VPERMI2W  : avx512_perm_i_sizes_bw<0x75, "vpermi2w", WriteVarShuffle256,
1847                  avx512vl_i16_info, avx512vl_i16_info, HasBWI>,
1848                  VEX_W, EVEX_CD8<16, CD8VF>;
1849defm VPERMI2B  : avx512_perm_i_sizes_bw<0x75, "vpermi2b", WriteVarShuffle256,
1850                  avx512vl_i8_info, avx512vl_i8_info, HasVBMI>,
1851                  EVEX_CD8<8, CD8VF>;
1852defm VPERMI2PS : avx512_perm_i_sizes<0x77, "vpermi2ps", WriteFVarShuffle256,
1853                  avx512vl_f32_info, avx512vl_i32_info>, EVEX_CD8<32, CD8VF>;
1854defm VPERMI2PD : avx512_perm_i_sizes<0x77, "vpermi2pd", WriteFVarShuffle256,
1855                  avx512vl_f64_info, avx512vl_i64_info>, VEX_W, EVEX_CD8<64, CD8VF>;
1856
1857// Extra patterns to deal with extra bitcasts due to passthru and index being
1858// different types on the fp versions.
1859multiclass avx512_perm_i_lowering<string InstrStr, X86VectorVTInfo _,
1860                                  X86VectorVTInfo IdxVT,
1861                                  X86VectorVTInfo CastVT> {
1862  def : Pat<(_.VT (vselect_mask _.KRCWM:$mask,
1863                                (X86VPermt2 (_.VT _.RC:$src2),
1864                                            (IdxVT.VT (bitconvert
1865                                                       (CastVT.VT _.RC:$src1))),
1866                                            _.RC:$src3),
1867                                (_.VT (bitconvert (CastVT.VT _.RC:$src1))))),
1868            (!cast<Instruction>(InstrStr#"rrk") _.RC:$src1, _.KRCWM:$mask,
1869                                                _.RC:$src2, _.RC:$src3)>;
1870  def : Pat<(_.VT (vselect_mask _.KRCWM:$mask,
1871                                (X86VPermt2 _.RC:$src2,
1872                                            (IdxVT.VT (bitconvert
1873                                                       (CastVT.VT _.RC:$src1))),
1874                                            (_.LdFrag addr:$src3)),
1875                                (_.VT (bitconvert  (CastVT.VT _.RC:$src1))))),
1876            (!cast<Instruction>(InstrStr#"rmk") _.RC:$src1, _.KRCWM:$mask,
1877                                                _.RC:$src2, addr:$src3)>;
1878  def : Pat<(_.VT (vselect_mask _.KRCWM:$mask,
1879                                 (X86VPermt2 _.RC:$src2,
1880                                             (IdxVT.VT (bitconvert  (CastVT.VT _.RC:$src1))),
1881                                             (_.BroadcastLdFrag addr:$src3)),
1882                                 (_.VT (bitconvert  (CastVT.VT _.RC:$src1))))),
1883            (!cast<Instruction>(InstrStr#"rmbk") _.RC:$src1, _.KRCWM:$mask,
1884                                                 _.RC:$src2, addr:$src3)>;
1885}
1886
1887// TODO: Should we add more casts? The vXi64 case is common due to ABI.
1888defm : avx512_perm_i_lowering<"VPERMI2PS", v16f32_info, v16i32_info, v8i64_info>;
1889defm : avx512_perm_i_lowering<"VPERMI2PS256", v8f32x_info, v8i32x_info, v4i64x_info>;
1890defm : avx512_perm_i_lowering<"VPERMI2PS128", v4f32x_info, v4i32x_info, v2i64x_info>;
1891
1892// VPERMT2
1893multiclass avx512_perm_t<bits<8> opc, string OpcodeStr,
1894                         X86FoldableSchedWrite sched,
1895                         X86VectorVTInfo _, X86VectorVTInfo IdxVT> {
1896let Constraints = "$src1 = $dst", ExeDomain = _.ExeDomain in {
1897  defm rr: AVX512_maskable_3src<opc, MRMSrcReg, _, (outs _.RC:$dst),
1898          (ins IdxVT.RC:$src2, _.RC:$src3),
1899          OpcodeStr, "$src3, $src2", "$src2, $src3",
1900          (_.VT (X86VPermt2 _.RC:$src1, IdxVT.RC:$src2, _.RC:$src3)), 1>,
1901          EVEX_4V, AVX5128IBase, Sched<[sched]>;
1902
1903  defm rm: AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst),
1904            (ins IdxVT.RC:$src2, _.MemOp:$src3),
1905            OpcodeStr, "$src3, $src2", "$src2, $src3",
1906            (_.VT (X86VPermt2 _.RC:$src1, IdxVT.RC:$src2,
1907                   (_.LdFrag addr:$src3))), 1>,
1908            EVEX_4V, AVX5128IBase, Sched<[sched.Folded, sched.ReadAfterFold]>;
1909  }
1910}
1911multiclass avx512_perm_t_mb<bits<8> opc, string OpcodeStr,
1912                            X86FoldableSchedWrite sched,
1913                            X86VectorVTInfo _, X86VectorVTInfo IdxVT> {
1914  let Constraints = "$src1 = $dst", ExeDomain = _.ExeDomain in
1915  defm rmb: AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst),
1916              (ins IdxVT.RC:$src2, _.ScalarMemOp:$src3),
1917              OpcodeStr,   !strconcat("${src3}", _.BroadcastStr,", $src2"),
1918              !strconcat("$src2, ${src3}", _.BroadcastStr ),
1919              (_.VT (X86VPermt2 _.RC:$src1,
1920               IdxVT.RC:$src2,(_.VT (_.BroadcastLdFrag addr:$src3)))), 1>,
1921              AVX5128IBase, EVEX_4V, EVEX_B,
1922              Sched<[sched.Folded, sched.ReadAfterFold]>;
1923}
1924
1925multiclass avx512_perm_t_sizes<bits<8> opc, string OpcodeStr,
1926                               X86FoldableSchedWrite sched,
1927                               AVX512VLVectorVTInfo VTInfo,
1928                               AVX512VLVectorVTInfo ShuffleMask> {
1929  defm NAME: avx512_perm_t<opc, OpcodeStr, sched, VTInfo.info512,
1930                              ShuffleMask.info512>,
1931            avx512_perm_t_mb<opc, OpcodeStr, sched, VTInfo.info512,
1932                              ShuffleMask.info512>, EVEX_V512;
1933  let Predicates = [HasVLX] in {
1934  defm NAME#128: avx512_perm_t<opc, OpcodeStr, sched, VTInfo.info128,
1935                              ShuffleMask.info128>,
1936                 avx512_perm_t_mb<opc, OpcodeStr, sched, VTInfo.info128,
1937                              ShuffleMask.info128>, EVEX_V128;
1938  defm NAME#256: avx512_perm_t<opc, OpcodeStr, sched, VTInfo.info256,
1939                              ShuffleMask.info256>,
1940                 avx512_perm_t_mb<opc, OpcodeStr, sched, VTInfo.info256,
1941                              ShuffleMask.info256>, EVEX_V256;
1942  }
1943}
1944
1945multiclass avx512_perm_t_sizes_bw<bits<8> opc, string OpcodeStr,
1946                                  X86FoldableSchedWrite sched,
1947                                  AVX512VLVectorVTInfo VTInfo,
1948                                  AVX512VLVectorVTInfo Idx, Predicate Prd> {
1949  let Predicates = [Prd] in
1950  defm NAME: avx512_perm_t<opc, OpcodeStr, sched, VTInfo.info512,
1951                           Idx.info512>, EVEX_V512;
1952  let Predicates = [Prd, HasVLX] in {
1953  defm NAME#128: avx512_perm_t<opc, OpcodeStr, sched, VTInfo.info128,
1954                               Idx.info128>, EVEX_V128;
1955  defm NAME#256: avx512_perm_t<opc, OpcodeStr, sched, VTInfo.info256,
1956                               Idx.info256>, EVEX_V256;
1957  }
1958}
1959
1960defm VPERMT2D  : avx512_perm_t_sizes<0x7E, "vpermt2d", WriteVarShuffle256,
1961                  avx512vl_i32_info, avx512vl_i32_info>, EVEX_CD8<32, CD8VF>;
1962defm VPERMT2Q  : avx512_perm_t_sizes<0x7E, "vpermt2q", WriteVarShuffle256,
1963                  avx512vl_i64_info, avx512vl_i64_info>, VEX_W, EVEX_CD8<64, CD8VF>;
1964defm VPERMT2W  : avx512_perm_t_sizes_bw<0x7D, "vpermt2w", WriteVarShuffle256,
1965                  avx512vl_i16_info, avx512vl_i16_info, HasBWI>,
1966                  VEX_W, EVEX_CD8<16, CD8VF>;
1967defm VPERMT2B  : avx512_perm_t_sizes_bw<0x7D, "vpermt2b", WriteVarShuffle256,
1968                  avx512vl_i8_info, avx512vl_i8_info, HasVBMI>,
1969                  EVEX_CD8<8, CD8VF>;
1970defm VPERMT2PS : avx512_perm_t_sizes<0x7F, "vpermt2ps", WriteFVarShuffle256,
1971                  avx512vl_f32_info, avx512vl_i32_info>, EVEX_CD8<32, CD8VF>;
1972defm VPERMT2PD : avx512_perm_t_sizes<0x7F, "vpermt2pd", WriteFVarShuffle256,
1973                  avx512vl_f64_info, avx512vl_i64_info>, VEX_W, EVEX_CD8<64, CD8VF>;
1974
1975//===----------------------------------------------------------------------===//
1976// AVX-512 - BLEND using mask
1977//
1978
1979multiclass WriteFVarBlendask<bits<8> opc, string OpcodeStr,
1980                             X86FoldableSchedWrite sched, X86VectorVTInfo _> {
1981  let ExeDomain = _.ExeDomain, hasSideEffects = 0 in {
1982  def rr : AVX5128I<opc, MRMSrcReg, (outs _.RC:$dst),
1983             (ins _.RC:$src1, _.RC:$src2),
1984             !strconcat(OpcodeStr,
1985             "\t{$src2, $src1, ${dst}|${dst}, $src1, $src2}"), []>,
1986             EVEX_4V, Sched<[sched]>;
1987  def rrk : AVX5128I<opc, MRMSrcReg, (outs _.RC:$dst),
1988             (ins _.KRCWM:$mask, _.RC:$src1, _.RC:$src2),
1989             !strconcat(OpcodeStr,
1990             "\t{$src2, $src1, ${dst} {${mask}}|${dst} {${mask}}, $src1, $src2}"),
1991             []>, EVEX_4V, EVEX_K, Sched<[sched]>;
1992  def rrkz : AVX5128I<opc, MRMSrcReg, (outs _.RC:$dst),
1993             (ins _.KRCWM:$mask, _.RC:$src1, _.RC:$src2),
1994             !strconcat(OpcodeStr,
1995             "\t{$src2, $src1, ${dst} {${mask}} {z}|${dst} {${mask}} {z}, $src1, $src2}"),
1996             []>, EVEX_4V, EVEX_KZ, Sched<[sched]>, NotMemoryFoldable;
1997  let mayLoad = 1 in {
1998  def rm  : AVX5128I<opc, MRMSrcMem, (outs _.RC:$dst),
1999             (ins _.RC:$src1, _.MemOp:$src2),
2000             !strconcat(OpcodeStr,
2001             "\t{$src2, $src1, ${dst}|${dst}, $src1, $src2}"),
2002             []>, EVEX_4V, EVEX_CD8<_.EltSize, CD8VF>,
2003             Sched<[sched.Folded, sched.ReadAfterFold]>;
2004  def rmk : AVX5128I<opc, MRMSrcMem, (outs _.RC:$dst),
2005             (ins _.KRCWM:$mask, _.RC:$src1, _.MemOp:$src2),
2006             !strconcat(OpcodeStr,
2007             "\t{$src2, $src1, ${dst} {${mask}}|${dst} {${mask}}, $src1, $src2}"),
2008             []>, EVEX_4V, EVEX_K, EVEX_CD8<_.EltSize, CD8VF>,
2009             Sched<[sched.Folded, sched.ReadAfterFold]>;
2010  def rmkz : AVX5128I<opc, MRMSrcMem, (outs _.RC:$dst),
2011             (ins _.KRCWM:$mask, _.RC:$src1, _.MemOp:$src2),
2012             !strconcat(OpcodeStr,
2013             "\t{$src2, $src1, ${dst} {${mask}} {z}|${dst} {${mask}} {z}, $src1, $src2}"),
2014             []>, EVEX_4V, EVEX_KZ, EVEX_CD8<_.EltSize, CD8VF>,
2015             Sched<[sched.Folded, sched.ReadAfterFold]>, NotMemoryFoldable;
2016  }
2017  }
2018}
2019multiclass WriteFVarBlendask_rmb<bits<8> opc, string OpcodeStr,
2020                                 X86FoldableSchedWrite sched, X86VectorVTInfo _> {
2021  let ExeDomain = _.ExeDomain, mayLoad = 1, hasSideEffects = 0 in {
2022  def rmbk : AVX5128I<opc, MRMSrcMem, (outs _.RC:$dst),
2023      (ins _.KRCWM:$mask, _.RC:$src1, _.ScalarMemOp:$src2),
2024       !strconcat(OpcodeStr,
2025            "\t{${src2}", _.BroadcastStr, ", $src1, $dst {${mask}}|",
2026            "$dst {${mask}}, $src1, ${src2}", _.BroadcastStr, "}"), []>,
2027      EVEX_4V, EVEX_K, EVEX_B, EVEX_CD8<_.EltSize, CD8VF>,
2028      Sched<[sched.Folded, sched.ReadAfterFold]>;
2029
2030  def rmbkz : AVX5128I<opc, MRMSrcMem, (outs _.RC:$dst),
2031      (ins _.KRCWM:$mask, _.RC:$src1, _.ScalarMemOp:$src2),
2032       !strconcat(OpcodeStr,
2033            "\t{${src2}", _.BroadcastStr, ", $src1, $dst {${mask}} {z}|",
2034            "$dst {${mask}} {z}, $src1, ${src2}", _.BroadcastStr, "}"), []>,
2035      EVEX_4V, EVEX_KZ, EVEX_B, EVEX_CD8<_.EltSize, CD8VF>,
2036      Sched<[sched.Folded, sched.ReadAfterFold]>, NotMemoryFoldable;
2037
2038  def rmb : AVX5128I<opc, MRMSrcMem, (outs _.RC:$dst),
2039      (ins _.RC:$src1, _.ScalarMemOp:$src2),
2040       !strconcat(OpcodeStr,
2041            "\t{${src2}", _.BroadcastStr, ", $src1, $dst|",
2042            "$dst, $src1, ${src2}", _.BroadcastStr, "}"), []>,
2043      EVEX_4V, EVEX_B, EVEX_CD8<_.EltSize, CD8VF>,
2044      Sched<[sched.Folded, sched.ReadAfterFold]>;
2045  }
2046}
2047
2048multiclass blendmask_dq<bits<8> opc, string OpcodeStr, X86SchedWriteWidths sched,
2049                        AVX512VLVectorVTInfo VTInfo> {
2050  defm Z : WriteFVarBlendask<opc, OpcodeStr, sched.ZMM, VTInfo.info512>,
2051           WriteFVarBlendask_rmb<opc, OpcodeStr, sched.ZMM, VTInfo.info512>,
2052                                 EVEX_V512;
2053
2054  let Predicates = [HasVLX] in {
2055    defm Z256 : WriteFVarBlendask<opc, OpcodeStr, sched.YMM, VTInfo.info256>,
2056                WriteFVarBlendask_rmb<opc, OpcodeStr, sched.YMM, VTInfo.info256>,
2057                                      EVEX_V256;
2058    defm Z128 : WriteFVarBlendask<opc, OpcodeStr, sched.XMM, VTInfo.info128>,
2059                WriteFVarBlendask_rmb<opc, OpcodeStr, sched.XMM, VTInfo.info128>,
2060                                      EVEX_V128;
2061  }
2062}
2063
2064multiclass blendmask_bw<bits<8> opc, string OpcodeStr, X86SchedWriteWidths sched,
2065                        AVX512VLVectorVTInfo VTInfo> {
2066  let Predicates = [HasBWI] in
2067    defm Z : WriteFVarBlendask<opc, OpcodeStr, sched.ZMM, VTInfo.info512>,
2068                               EVEX_V512;
2069
2070  let Predicates = [HasBWI, HasVLX] in {
2071    defm Z256 : WriteFVarBlendask<opc, OpcodeStr, sched.YMM, VTInfo.info256>,
2072                                  EVEX_V256;
2073    defm Z128 : WriteFVarBlendask<opc, OpcodeStr, sched.XMM, VTInfo.info128>,
2074                                  EVEX_V128;
2075  }
2076}
2077
2078defm VBLENDMPS : blendmask_dq<0x65, "vblendmps", SchedWriteFVarBlend,
2079                              avx512vl_f32_info>;
2080defm VBLENDMPD : blendmask_dq<0x65, "vblendmpd", SchedWriteFVarBlend,
2081                              avx512vl_f64_info>, VEX_W;
2082defm VPBLENDMD : blendmask_dq<0x64, "vpblendmd", SchedWriteVarBlend,
2083                              avx512vl_i32_info>;
2084defm VPBLENDMQ : blendmask_dq<0x64, "vpblendmq", SchedWriteVarBlend,
2085                              avx512vl_i64_info>, VEX_W;
2086defm VPBLENDMB : blendmask_bw<0x66, "vpblendmb", SchedWriteVarBlend,
2087                              avx512vl_i8_info>;
2088defm VPBLENDMW : blendmask_bw<0x66, "vpblendmw", SchedWriteVarBlend,
2089                              avx512vl_i16_info>, VEX_W;
2090
2091//===----------------------------------------------------------------------===//
2092// Compare Instructions
2093//===----------------------------------------------------------------------===//
2094
2095// avx512_cmp_scalar - AVX512 CMPSS and CMPSD
2096
2097multiclass avx512_cmp_scalar<X86VectorVTInfo _, SDNode OpNode, SDNode OpNodeSAE,
2098                             PatFrag OpNode_su, PatFrag OpNodeSAE_su,
2099                             X86FoldableSchedWrite sched> {
2100  defm  rr_Int  : AVX512_maskable_cmp<0xC2, MRMSrcReg, _,
2101                      (outs _.KRC:$dst),
2102                      (ins _.RC:$src1, _.RC:$src2, u8imm:$cc),
2103                      "vcmp"#_.Suffix,
2104                      "$cc, $src2, $src1", "$src1, $src2, $cc",
2105                      (OpNode (_.VT _.RC:$src1), (_.VT _.RC:$src2), timm:$cc),
2106                      (OpNode_su (_.VT _.RC:$src1), (_.VT _.RC:$src2),
2107                                 timm:$cc)>, EVEX_4V, VEX_LIG, Sched<[sched]>, SIMD_EXC;
2108  let mayLoad = 1 in
2109  defm  rm_Int  : AVX512_maskable_cmp<0xC2, MRMSrcMem, _,
2110                    (outs _.KRC:$dst),
2111                    (ins _.RC:$src1, _.IntScalarMemOp:$src2, u8imm:$cc),
2112                    "vcmp"#_.Suffix,
2113                    "$cc, $src2, $src1", "$src1, $src2, $cc",
2114                    (OpNode (_.VT _.RC:$src1), (_.ScalarIntMemFrags addr:$src2),
2115                        timm:$cc),
2116                    (OpNode_su (_.VT _.RC:$src1), (_.ScalarIntMemFrags addr:$src2),
2117                        timm:$cc)>, EVEX_4V, VEX_LIG, EVEX_CD8<_.EltSize, CD8VT1>,
2118                    Sched<[sched.Folded, sched.ReadAfterFold]>, SIMD_EXC;
2119
2120  let Uses = [MXCSR] in
2121  defm  rrb_Int  : AVX512_maskable_cmp<0xC2, MRMSrcReg, _,
2122                     (outs _.KRC:$dst),
2123                     (ins _.RC:$src1, _.RC:$src2, u8imm:$cc),
2124                     "vcmp"#_.Suffix,
2125                     "$cc, {sae}, $src2, $src1","$src1, $src2, {sae}, $cc",
2126                     (OpNodeSAE (_.VT _.RC:$src1), (_.VT _.RC:$src2),
2127                                timm:$cc),
2128                     (OpNodeSAE_su (_.VT _.RC:$src1), (_.VT _.RC:$src2),
2129                                   timm:$cc)>,
2130                     EVEX_4V, VEX_LIG, EVEX_B, Sched<[sched]>;
2131
2132  let isCodeGenOnly = 1 in {
2133    let isCommutable = 1 in
2134    def rr : AVX512Ii8<0xC2, MRMSrcReg,
2135                (outs _.KRC:$dst), (ins _.FRC:$src1, _.FRC:$src2, u8imm:$cc),
2136                !strconcat("vcmp", _.Suffix,
2137                           "\t{$cc, $src2, $src1, $dst|$dst, $src1, $src2, $cc}"),
2138                [(set _.KRC:$dst, (OpNode _.FRC:$src1,
2139                                          _.FRC:$src2,
2140                                          timm:$cc))]>,
2141                EVEX_4V, VEX_LIG, Sched<[sched]>, SIMD_EXC;
2142    def rm : AVX512Ii8<0xC2, MRMSrcMem,
2143              (outs _.KRC:$dst),
2144              (ins _.FRC:$src1, _.ScalarMemOp:$src2, u8imm:$cc),
2145              !strconcat("vcmp", _.Suffix,
2146                         "\t{$cc, $src2, $src1, $dst|$dst, $src1, $src2, $cc}"),
2147              [(set _.KRC:$dst, (OpNode _.FRC:$src1,
2148                                        (_.ScalarLdFrag addr:$src2),
2149                                        timm:$cc))]>,
2150              EVEX_4V, VEX_LIG, EVEX_CD8<_.EltSize, CD8VT1>,
2151              Sched<[sched.Folded, sched.ReadAfterFold]>, SIMD_EXC;
2152  }
2153}
2154
2155def X86cmpms_su : PatFrag<(ops node:$src1, node:$src2, node:$cc),
2156                          (X86cmpms node:$src1, node:$src2, node:$cc), [{
2157  return N->hasOneUse();
2158}]>;
2159def X86cmpmsSAE_su : PatFrag<(ops node:$src1, node:$src2, node:$cc),
2160                          (X86cmpmsSAE node:$src1, node:$src2, node:$cc), [{
2161  return N->hasOneUse();
2162}]>;
2163
2164let Predicates = [HasAVX512] in {
2165  let ExeDomain = SSEPackedSingle in
2166  defm VCMPSSZ : avx512_cmp_scalar<f32x_info, X86cmpms, X86cmpmsSAE,
2167                                   X86cmpms_su, X86cmpmsSAE_su,
2168                                   SchedWriteFCmp.Scl>, AVX512XSIi8Base;
2169  let ExeDomain = SSEPackedDouble in
2170  defm VCMPSDZ : avx512_cmp_scalar<f64x_info, X86cmpms, X86cmpmsSAE,
2171                                   X86cmpms_su, X86cmpmsSAE_su,
2172                                   SchedWriteFCmp.Scl>, AVX512XDIi8Base, VEX_W;
2173}
2174let Predicates = [HasFP16], ExeDomain = SSEPackedSingle in
2175  defm VCMPSHZ : avx512_cmp_scalar<f16x_info, X86cmpms, X86cmpmsSAE,
2176                                   X86cmpms_su, X86cmpmsSAE_su,
2177                                   SchedWriteFCmp.Scl>, AVX512XSIi8Base, TA;
2178
2179multiclass avx512_icmp_packed<bits<8> opc, string OpcodeStr,
2180                              X86FoldableSchedWrite sched,
2181                              X86VectorVTInfo _, bit IsCommutable> {
2182  let isCommutable = IsCommutable, hasSideEffects = 0 in
2183  def rr : AVX512BI<opc, MRMSrcReg,
2184             (outs _.KRC:$dst), (ins _.RC:$src1, _.RC:$src2),
2185             !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
2186             []>, EVEX_4V, Sched<[sched]>;
2187  let mayLoad = 1, hasSideEffects = 0 in
2188  def rm : AVX512BI<opc, MRMSrcMem,
2189             (outs _.KRC:$dst), (ins _.RC:$src1, _.MemOp:$src2),
2190             !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
2191             []>, EVEX_4V, Sched<[sched.Folded, sched.ReadAfterFold]>;
2192  let isCommutable = IsCommutable, hasSideEffects = 0 in
2193  def rrk : AVX512BI<opc, MRMSrcReg,
2194              (outs _.KRC:$dst), (ins _.KRCWM:$mask, _.RC:$src1, _.RC:$src2),
2195              !strconcat(OpcodeStr, "\t{$src2, $src1, $dst {${mask}}|",
2196                          "$dst {${mask}}, $src1, $src2}"),
2197              []>, EVEX_4V, EVEX_K, Sched<[sched]>;
2198  let mayLoad = 1, hasSideEffects = 0 in
2199  def rmk : AVX512BI<opc, MRMSrcMem,
2200              (outs _.KRC:$dst), (ins _.KRCWM:$mask, _.RC:$src1, _.MemOp:$src2),
2201              !strconcat(OpcodeStr, "\t{$src2, $src1, $dst {${mask}}|",
2202                          "$dst {${mask}}, $src1, $src2}"),
2203              []>, EVEX_4V, EVEX_K, Sched<[sched.Folded, sched.ReadAfterFold]>;
2204}
2205
2206multiclass avx512_icmp_packed_rmb<bits<8> opc, string OpcodeStr,
2207                                  X86FoldableSchedWrite sched, X86VectorVTInfo _,
2208                                  bit IsCommutable> :
2209           avx512_icmp_packed<opc, OpcodeStr, sched, _, IsCommutable> {
2210  let mayLoad = 1, hasSideEffects = 0 in {
2211  def rmb : AVX512BI<opc, MRMSrcMem,
2212              (outs _.KRC:$dst), (ins _.RC:$src1, _.ScalarMemOp:$src2),
2213              !strconcat(OpcodeStr, "\t{${src2}", _.BroadcastStr, ", $src1, $dst",
2214                                    "|$dst, $src1, ${src2}", _.BroadcastStr, "}"),
2215              []>, EVEX_4V, EVEX_B, Sched<[sched.Folded, sched.ReadAfterFold]>;
2216  def rmbk : AVX512BI<opc, MRMSrcMem,
2217               (outs _.KRC:$dst), (ins _.KRCWM:$mask, _.RC:$src1,
2218                                       _.ScalarMemOp:$src2),
2219               !strconcat(OpcodeStr,
2220                          "\t{${src2}", _.BroadcastStr, ", $src1, $dst {${mask}}|",
2221                          "$dst {${mask}}, $src1, ${src2}", _.BroadcastStr, "}"),
2222               []>, EVEX_4V, EVEX_K, EVEX_B,
2223               Sched<[sched.Folded, sched.ReadAfterFold]>;
2224  }
2225}
2226
2227multiclass avx512_icmp_packed_vl<bits<8> opc, string OpcodeStr,
2228                                 X86SchedWriteWidths sched,
2229                                 AVX512VLVectorVTInfo VTInfo, Predicate prd,
2230                                 bit IsCommutable = 0> {
2231  let Predicates = [prd] in
2232  defm Z : avx512_icmp_packed<opc, OpcodeStr, sched.ZMM,
2233                              VTInfo.info512, IsCommutable>, EVEX_V512;
2234
2235  let Predicates = [prd, HasVLX] in {
2236    defm Z256 : avx512_icmp_packed<opc, OpcodeStr, sched.YMM,
2237                                   VTInfo.info256, IsCommutable>, EVEX_V256;
2238    defm Z128 : avx512_icmp_packed<opc, OpcodeStr, sched.XMM,
2239                                   VTInfo.info128, IsCommutable>, EVEX_V128;
2240  }
2241}
2242
2243multiclass avx512_icmp_packed_rmb_vl<bits<8> opc, string OpcodeStr,
2244                                     X86SchedWriteWidths sched,
2245                                     AVX512VLVectorVTInfo VTInfo,
2246                                     Predicate prd, bit IsCommutable = 0> {
2247  let Predicates = [prd] in
2248  defm Z : avx512_icmp_packed_rmb<opc, OpcodeStr, sched.ZMM,
2249                                  VTInfo.info512, IsCommutable>, EVEX_V512;
2250
2251  let Predicates = [prd, HasVLX] in {
2252    defm Z256 : avx512_icmp_packed_rmb<opc, OpcodeStr, sched.YMM,
2253                                       VTInfo.info256, IsCommutable>, EVEX_V256;
2254    defm Z128 : avx512_icmp_packed_rmb<opc, OpcodeStr, sched.XMM,
2255                                       VTInfo.info128, IsCommutable>, EVEX_V128;
2256  }
2257}
2258
2259// This fragment treats X86cmpm as commutable to help match loads in both
2260// operands for PCMPEQ.
2261def X86setcc_commute : SDNode<"ISD::SETCC", SDTSetCC, [SDNPCommutative]>;
2262def X86pcmpgtm : PatFrag<(ops node:$src1, node:$src2),
2263                         (setcc node:$src1, node:$src2, SETGT)>;
2264
2265// AddedComplexity is needed because the explicit SETEQ/SETGT CondCode doesn't
2266// increase the pattern complexity the way an immediate would.
2267let AddedComplexity = 2 in {
2268// FIXME: Is there a better scheduler class for VPCMP?
2269defm VPCMPEQB : avx512_icmp_packed_vl<0x74, "vpcmpeqb",
2270                      SchedWriteVecALU, avx512vl_i8_info, HasBWI, 1>,
2271                EVEX_CD8<8, CD8VF>, VEX_WIG;
2272
2273defm VPCMPEQW : avx512_icmp_packed_vl<0x75, "vpcmpeqw",
2274                      SchedWriteVecALU, avx512vl_i16_info, HasBWI, 1>,
2275                EVEX_CD8<16, CD8VF>, VEX_WIG;
2276
2277defm VPCMPEQD : avx512_icmp_packed_rmb_vl<0x76, "vpcmpeqd",
2278                      SchedWriteVecALU, avx512vl_i32_info, HasAVX512, 1>,
2279                EVEX_CD8<32, CD8VF>;
2280
2281defm VPCMPEQQ : avx512_icmp_packed_rmb_vl<0x29, "vpcmpeqq",
2282                      SchedWriteVecALU, avx512vl_i64_info, HasAVX512, 1>,
2283                T8PD, VEX_W, EVEX_CD8<64, CD8VF>;
2284
2285defm VPCMPGTB : avx512_icmp_packed_vl<0x64, "vpcmpgtb",
2286                      SchedWriteVecALU, avx512vl_i8_info, HasBWI>,
2287                EVEX_CD8<8, CD8VF>, VEX_WIG;
2288
2289defm VPCMPGTW : avx512_icmp_packed_vl<0x65, "vpcmpgtw",
2290                      SchedWriteVecALU, avx512vl_i16_info, HasBWI>,
2291                EVEX_CD8<16, CD8VF>, VEX_WIG;
2292
2293defm VPCMPGTD : avx512_icmp_packed_rmb_vl<0x66, "vpcmpgtd",
2294                      SchedWriteVecALU, avx512vl_i32_info, HasAVX512>,
2295                EVEX_CD8<32, CD8VF>;
2296
2297defm VPCMPGTQ : avx512_icmp_packed_rmb_vl<0x37, "vpcmpgtq",
2298                      SchedWriteVecALU, avx512vl_i64_info, HasAVX512>,
2299                T8PD, VEX_W, EVEX_CD8<64, CD8VF>;
2300}
2301
2302def X86pcmpm_imm : SDNodeXForm<setcc, [{
2303  ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(2))->get();
2304  uint8_t SSECC = X86::getVPCMPImmForCond(CC);
2305  return getI8Imm(SSECC, SDLoc(N));
2306}]>;
2307
2308// Swapped operand version of the above.
2309def X86pcmpm_imm_commute : SDNodeXForm<setcc, [{
2310  ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(2))->get();
2311  uint8_t SSECC = X86::getVPCMPImmForCond(CC);
2312  SSECC = X86::getSwappedVPCMPImm(SSECC);
2313  return getI8Imm(SSECC, SDLoc(N));
2314}]>;
2315
2316multiclass avx512_icmp_cc<bits<8> opc, string Suffix, PatFrag Frag,
2317                          PatFrag Frag_su,
2318                          X86FoldableSchedWrite sched,
2319                          X86VectorVTInfo _, string Name> {
2320  let isCommutable = 1 in
2321  def rri : AVX512AIi8<opc, MRMSrcReg,
2322             (outs _.KRC:$dst), (ins _.RC:$src1, _.RC:$src2, u8imm:$cc),
2323             !strconcat("vpcmp", Suffix,
2324                        "\t{$cc, $src2, $src1, $dst|$dst, $src1, $src2, $cc}"),
2325             [(set _.KRC:$dst, (_.KVT (Frag:$cc (_.VT _.RC:$src1),
2326                                                (_.VT _.RC:$src2),
2327                                                cond)))]>,
2328             EVEX_4V, Sched<[sched]>;
2329  def rmi : AVX512AIi8<opc, MRMSrcMem,
2330             (outs _.KRC:$dst), (ins _.RC:$src1, _.MemOp:$src2, u8imm:$cc),
2331             !strconcat("vpcmp", Suffix,
2332                        "\t{$cc, $src2, $src1, $dst|$dst, $src1, $src2, $cc}"),
2333             [(set _.KRC:$dst, (_.KVT
2334                                (Frag:$cc
2335                                 (_.VT _.RC:$src1),
2336                                 (_.VT (_.LdFrag addr:$src2)),
2337                                 cond)))]>,
2338             EVEX_4V, Sched<[sched.Folded, sched.ReadAfterFold]>;
2339  let isCommutable = 1 in
2340  def rrik : AVX512AIi8<opc, MRMSrcReg,
2341              (outs _.KRC:$dst), (ins _.KRCWM:$mask, _.RC:$src1, _.RC:$src2,
2342                                      u8imm:$cc),
2343              !strconcat("vpcmp", Suffix,
2344                         "\t{$cc, $src2, $src1, $dst {${mask}}|",
2345                         "$dst {${mask}}, $src1, $src2, $cc}"),
2346              [(set _.KRC:$dst, (and _.KRCWM:$mask,
2347                                     (_.KVT (Frag_su:$cc (_.VT _.RC:$src1),
2348                                                         (_.VT _.RC:$src2),
2349                                                         cond))))]>,
2350              EVEX_4V, EVEX_K, Sched<[sched]>;
2351  def rmik : AVX512AIi8<opc, MRMSrcMem,
2352              (outs _.KRC:$dst), (ins _.KRCWM:$mask, _.RC:$src1, _.MemOp:$src2,
2353                                    u8imm:$cc),
2354              !strconcat("vpcmp", Suffix,
2355                         "\t{$cc, $src2, $src1, $dst {${mask}}|",
2356                         "$dst {${mask}}, $src1, $src2, $cc}"),
2357              [(set _.KRC:$dst, (and _.KRCWM:$mask,
2358                                     (_.KVT
2359                                      (Frag_su:$cc
2360                                       (_.VT _.RC:$src1),
2361                                       (_.VT (_.LdFrag addr:$src2)),
2362                                       cond))))]>,
2363              EVEX_4V, EVEX_K, Sched<[sched.Folded, sched.ReadAfterFold]>;
2364
2365  def : Pat<(_.KVT (Frag:$cc (_.LdFrag addr:$src2),
2366                             (_.VT _.RC:$src1), cond)),
2367            (!cast<Instruction>(Name#_.ZSuffix#"rmi")
2368             _.RC:$src1, addr:$src2, (X86pcmpm_imm_commute $cc))>;
2369
2370  def : Pat<(and _.KRCWM:$mask,
2371                 (_.KVT (Frag_su:$cc (_.LdFrag addr:$src2),
2372                                     (_.VT _.RC:$src1), cond))),
2373            (!cast<Instruction>(Name#_.ZSuffix#"rmik")
2374             _.KRCWM:$mask, _.RC:$src1, addr:$src2,
2375             (X86pcmpm_imm_commute $cc))>;
2376}
2377
2378multiclass avx512_icmp_cc_rmb<bits<8> opc, string Suffix, PatFrag Frag,
2379                              PatFrag Frag_su, X86FoldableSchedWrite sched,
2380                              X86VectorVTInfo _, string Name> :
2381           avx512_icmp_cc<opc, Suffix, Frag, Frag_su, sched, _, Name> {
2382  def rmib : AVX512AIi8<opc, MRMSrcMem,
2383             (outs _.KRC:$dst), (ins _.RC:$src1, _.ScalarMemOp:$src2,
2384                                     u8imm:$cc),
2385             !strconcat("vpcmp", Suffix,
2386                        "\t{$cc, ${src2}", _.BroadcastStr, ", $src1, $dst|",
2387                        "$dst, $src1, ${src2}", _.BroadcastStr, ", $cc}"),
2388             [(set _.KRC:$dst, (_.KVT (Frag:$cc
2389                                       (_.VT _.RC:$src1),
2390                                       (_.BroadcastLdFrag addr:$src2),
2391                                       cond)))]>,
2392             EVEX_4V, EVEX_B, Sched<[sched.Folded, sched.ReadAfterFold]>;
2393  def rmibk : AVX512AIi8<opc, MRMSrcMem,
2394              (outs _.KRC:$dst), (ins _.KRCWM:$mask, _.RC:$src1,
2395                                       _.ScalarMemOp:$src2, u8imm:$cc),
2396              !strconcat("vpcmp", Suffix,
2397                  "\t{$cc, ${src2}", _.BroadcastStr, ", $src1, $dst {${mask}}|",
2398                  "$dst {${mask}}, $src1, ${src2}", _.BroadcastStr, ", $cc}"),
2399              [(set _.KRC:$dst, (and _.KRCWM:$mask,
2400                                     (_.KVT (Frag_su:$cc
2401                                             (_.VT _.RC:$src1),
2402                                             (_.BroadcastLdFrag addr:$src2),
2403                                             cond))))]>,
2404              EVEX_4V, EVEX_K, EVEX_B, Sched<[sched.Folded, sched.ReadAfterFold]>;
2405
2406  def : Pat<(_.KVT (Frag:$cc (_.BroadcastLdFrag addr:$src2),
2407                    (_.VT _.RC:$src1), cond)),
2408            (!cast<Instruction>(Name#_.ZSuffix#"rmib")
2409             _.RC:$src1, addr:$src2, (X86pcmpm_imm_commute $cc))>;
2410
2411  def : Pat<(and _.KRCWM:$mask,
2412                 (_.KVT (Frag_su:$cc (_.BroadcastLdFrag addr:$src2),
2413                                     (_.VT _.RC:$src1), cond))),
2414            (!cast<Instruction>(Name#_.ZSuffix#"rmibk")
2415             _.KRCWM:$mask, _.RC:$src1, addr:$src2,
2416             (X86pcmpm_imm_commute $cc))>;
2417}
2418
2419multiclass avx512_icmp_cc_vl<bits<8> opc, string Suffix, PatFrag Frag,
2420                             PatFrag Frag_su, X86SchedWriteWidths sched,
2421                             AVX512VLVectorVTInfo VTInfo, Predicate prd> {
2422  let Predicates = [prd] in
2423  defm Z : avx512_icmp_cc<opc, Suffix, Frag, Frag_su,
2424                          sched.ZMM, VTInfo.info512, NAME>, EVEX_V512;
2425
2426  let Predicates = [prd, HasVLX] in {
2427    defm Z256 : avx512_icmp_cc<opc, Suffix, Frag, Frag_su,
2428                               sched.YMM, VTInfo.info256, NAME>, EVEX_V256;
2429    defm Z128 : avx512_icmp_cc<opc, Suffix, Frag, Frag_su,
2430                               sched.XMM, VTInfo.info128, NAME>, EVEX_V128;
2431  }
2432}
2433
2434multiclass avx512_icmp_cc_rmb_vl<bits<8> opc, string Suffix, PatFrag Frag,
2435                                 PatFrag Frag_su, X86SchedWriteWidths sched,
2436                                 AVX512VLVectorVTInfo VTInfo, Predicate prd> {
2437  let Predicates = [prd] in
2438  defm Z : avx512_icmp_cc_rmb<opc, Suffix, Frag, Frag_su,
2439                              sched.ZMM, VTInfo.info512, NAME>, EVEX_V512;
2440
2441  let Predicates = [prd, HasVLX] in {
2442    defm Z256 : avx512_icmp_cc_rmb<opc, Suffix, Frag, Frag_su,
2443                                   sched.YMM, VTInfo.info256, NAME>, EVEX_V256;
2444    defm Z128 : avx512_icmp_cc_rmb<opc, Suffix, Frag, Frag_su,
2445                                   sched.XMM, VTInfo.info128, NAME>, EVEX_V128;
2446  }
2447}
2448
2449def X86pcmpm : PatFrag<(ops node:$src1, node:$src2, node:$cc),
2450                       (setcc node:$src1, node:$src2, node:$cc), [{
2451  ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(2))->get();
2452  return !ISD::isUnsignedIntSetCC(CC);
2453}], X86pcmpm_imm>;
2454
2455def X86pcmpm_su : PatFrag<(ops node:$src1, node:$src2, node:$cc),
2456                          (setcc node:$src1, node:$src2, node:$cc), [{
2457  ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(2))->get();
2458  return N->hasOneUse() && !ISD::isUnsignedIntSetCC(CC);
2459}], X86pcmpm_imm>;
2460
2461def X86pcmpum : PatFrag<(ops node:$src1, node:$src2, node:$cc),
2462                        (setcc node:$src1, node:$src2, node:$cc), [{
2463  ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(2))->get();
2464  return ISD::isUnsignedIntSetCC(CC);
2465}], X86pcmpm_imm>;
2466
2467def X86pcmpum_su : PatFrag<(ops node:$src1, node:$src2, node:$cc),
2468                           (setcc node:$src1, node:$src2, node:$cc), [{
2469  ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(2))->get();
2470  return N->hasOneUse() && ISD::isUnsignedIntSetCC(CC);
2471}], X86pcmpm_imm>;
2472
2473// FIXME: Is there a better scheduler class for VPCMP/VPCMPU?
2474defm VPCMPB : avx512_icmp_cc_vl<0x3F, "b", X86pcmpm, X86pcmpm_su,
2475                                SchedWriteVecALU, avx512vl_i8_info, HasBWI>,
2476                                EVEX_CD8<8, CD8VF>;
2477defm VPCMPUB : avx512_icmp_cc_vl<0x3E, "ub", X86pcmpum, X86pcmpum_su,
2478                                 SchedWriteVecALU, avx512vl_i8_info, HasBWI>,
2479                                 EVEX_CD8<8, CD8VF>;
2480
2481defm VPCMPW : avx512_icmp_cc_vl<0x3F, "w", X86pcmpm, X86pcmpm_su,
2482                                SchedWriteVecALU, avx512vl_i16_info, HasBWI>,
2483                                VEX_W, EVEX_CD8<16, CD8VF>;
2484defm VPCMPUW : avx512_icmp_cc_vl<0x3E, "uw", X86pcmpum, X86pcmpum_su,
2485                                 SchedWriteVecALU, avx512vl_i16_info, HasBWI>,
2486                                 VEX_W, EVEX_CD8<16, CD8VF>;
2487
2488defm VPCMPD : avx512_icmp_cc_rmb_vl<0x1F, "d", X86pcmpm, X86pcmpm_su,
2489                                    SchedWriteVecALU, avx512vl_i32_info,
2490                                    HasAVX512>, EVEX_CD8<32, CD8VF>;
2491defm VPCMPUD : avx512_icmp_cc_rmb_vl<0x1E, "ud", X86pcmpum, X86pcmpum_su,
2492                                     SchedWriteVecALU, avx512vl_i32_info,
2493                                     HasAVX512>, EVEX_CD8<32, CD8VF>;
2494
2495defm VPCMPQ : avx512_icmp_cc_rmb_vl<0x1F, "q", X86pcmpm, X86pcmpm_su,
2496                                    SchedWriteVecALU, avx512vl_i64_info,
2497                                    HasAVX512>, VEX_W, EVEX_CD8<64, CD8VF>;
2498defm VPCMPUQ : avx512_icmp_cc_rmb_vl<0x1E, "uq", X86pcmpum, X86pcmpum_su,
2499                                     SchedWriteVecALU, avx512vl_i64_info,
2500                                     HasAVX512>, VEX_W, EVEX_CD8<64, CD8VF>;
2501
2502def X86cmpm_su : PatFrag<(ops node:$src1, node:$src2, node:$cc),
2503                         (X86cmpm node:$src1, node:$src2, node:$cc), [{
2504  return N->hasOneUse();
2505}]>;
2506
2507def X86cmpm_imm_commute : SDNodeXForm<timm, [{
2508  uint8_t Imm = X86::getSwappedVCMPImm(N->getZExtValue() & 0x1f);
2509  return getI8Imm(Imm, SDLoc(N));
2510}]>;
2511
2512multiclass avx512_vcmp_common<X86FoldableSchedWrite sched, X86VectorVTInfo _,
2513                              string Name> {
2514let Uses = [MXCSR], mayRaiseFPException = 1 in {
2515  defm  rri  : AVX512_maskable_cmp<0xC2, MRMSrcReg, _,
2516                   (outs _.KRC:$dst), (ins _.RC:$src1, _.RC:$src2,u8imm:$cc),
2517                   "vcmp"#_.Suffix,
2518                   "$cc, $src2, $src1", "$src1, $src2, $cc",
2519                   (X86any_cmpm (_.VT _.RC:$src1), (_.VT _.RC:$src2), timm:$cc),
2520                   (X86cmpm_su (_.VT _.RC:$src1), (_.VT _.RC:$src2), timm:$cc),
2521                   1>, Sched<[sched]>;
2522
2523  defm  rmi  : AVX512_maskable_cmp<0xC2, MRMSrcMem, _,
2524                (outs _.KRC:$dst),(ins _.RC:$src1, _.MemOp:$src2, u8imm:$cc),
2525                "vcmp"#_.Suffix,
2526                "$cc, $src2, $src1", "$src1, $src2, $cc",
2527                (X86any_cmpm (_.VT _.RC:$src1), (_.VT (_.LdFrag addr:$src2)),
2528                             timm:$cc),
2529                (X86cmpm_su (_.VT _.RC:$src1), (_.VT (_.LdFrag addr:$src2)),
2530                            timm:$cc)>,
2531                Sched<[sched.Folded, sched.ReadAfterFold]>;
2532
2533  defm  rmbi : AVX512_maskable_cmp<0xC2, MRMSrcMem, _,
2534                (outs _.KRC:$dst),
2535                (ins _.RC:$src1, _.ScalarMemOp:$src2, u8imm:$cc),
2536                "vcmp"#_.Suffix,
2537                "$cc, ${src2}"#_.BroadcastStr#", $src1",
2538                "$src1, ${src2}"#_.BroadcastStr#", $cc",
2539                (X86any_cmpm (_.VT _.RC:$src1),
2540                             (_.VT (_.BroadcastLdFrag addr:$src2)),
2541                             timm:$cc),
2542                (X86cmpm_su (_.VT _.RC:$src1),
2543                            (_.VT (_.BroadcastLdFrag addr:$src2)),
2544                            timm:$cc)>,
2545                EVEX_B, Sched<[sched.Folded, sched.ReadAfterFold]>;
2546  }
2547
2548  // Patterns for selecting with loads in other operand.
2549  def : Pat<(X86any_cmpm (_.LdFrag addr:$src2), (_.VT _.RC:$src1),
2550                         timm:$cc),
2551            (!cast<Instruction>(Name#_.ZSuffix#"rmi") _.RC:$src1, addr:$src2,
2552                                                      (X86cmpm_imm_commute timm:$cc))>;
2553
2554  def : Pat<(and _.KRCWM:$mask, (X86cmpm_su (_.LdFrag addr:$src2),
2555                                            (_.VT _.RC:$src1),
2556                                            timm:$cc)),
2557            (!cast<Instruction>(Name#_.ZSuffix#"rmik") _.KRCWM:$mask,
2558                                                       _.RC:$src1, addr:$src2,
2559                                                       (X86cmpm_imm_commute timm:$cc))>;
2560
2561  def : Pat<(X86any_cmpm (_.BroadcastLdFrag addr:$src2),
2562                         (_.VT _.RC:$src1), timm:$cc),
2563            (!cast<Instruction>(Name#_.ZSuffix#"rmbi") _.RC:$src1, addr:$src2,
2564                                                       (X86cmpm_imm_commute timm:$cc))>;
2565
2566  def : Pat<(and _.KRCWM:$mask, (X86cmpm_su (_.BroadcastLdFrag addr:$src2),
2567                                            (_.VT _.RC:$src1),
2568                                            timm:$cc)),
2569            (!cast<Instruction>(Name#_.ZSuffix#"rmbik") _.KRCWM:$mask,
2570                                                        _.RC:$src1, addr:$src2,
2571                                                        (X86cmpm_imm_commute timm:$cc))>;
2572
2573  // Patterns for mask intrinsics.
2574  def : Pat<(X86cmpmm (_.VT _.RC:$src1), (_.VT _.RC:$src2), timm:$cc,
2575                      (_.KVT immAllOnesV)),
2576            (!cast<Instruction>(Name#_.ZSuffix#"rri") _.RC:$src1, _.RC:$src2, timm:$cc)>;
2577
2578  def : Pat<(X86cmpmm (_.VT _.RC:$src1), (_.VT _.RC:$src2), timm:$cc, _.KRCWM:$mask),
2579            (!cast<Instruction>(Name#_.ZSuffix#"rrik") _.KRCWM:$mask, _.RC:$src1,
2580                                                       _.RC:$src2, timm:$cc)>;
2581
2582  def : Pat<(X86cmpmm (_.VT _.RC:$src1), (_.VT (_.LdFrag addr:$src2)), timm:$cc,
2583                      (_.KVT immAllOnesV)),
2584            (!cast<Instruction>(Name#_.ZSuffix#"rmi") _.RC:$src1, addr:$src2, timm:$cc)>;
2585
2586  def : Pat<(X86cmpmm (_.VT _.RC:$src1), (_.VT (_.LdFrag addr:$src2)), timm:$cc,
2587                      _.KRCWM:$mask),
2588            (!cast<Instruction>(Name#_.ZSuffix#"rmik") _.KRCWM:$mask, _.RC:$src1,
2589                                                       addr:$src2, timm:$cc)>;
2590
2591  def : Pat<(X86cmpmm (_.VT _.RC:$src1), (_.VT (_.BroadcastLdFrag addr:$src2)), timm:$cc,
2592                      (_.KVT immAllOnesV)),
2593            (!cast<Instruction>(Name#_.ZSuffix#"rmbi") _.RC:$src1, addr:$src2, timm:$cc)>;
2594
2595  def : Pat<(X86cmpmm (_.VT _.RC:$src1), (_.VT (_.BroadcastLdFrag addr:$src2)), timm:$cc,
2596                      _.KRCWM:$mask),
2597            (!cast<Instruction>(Name#_.ZSuffix#"rmbik") _.KRCWM:$mask, _.RC:$src1,
2598                                                        addr:$src2, timm:$cc)>;
2599
2600  // Patterns for mask intrinsics with loads in other operand.
2601  def : Pat<(X86cmpmm (_.VT (_.LdFrag addr:$src2)), (_.VT _.RC:$src1), timm:$cc,
2602                      (_.KVT immAllOnesV)),
2603            (!cast<Instruction>(Name#_.ZSuffix#"rmi") _.RC:$src1, addr:$src2,
2604                                                      (X86cmpm_imm_commute timm:$cc))>;
2605
2606  def : Pat<(X86cmpmm (_.VT (_.LdFrag addr:$src2)), (_.VT _.RC:$src1), timm:$cc,
2607                      _.KRCWM:$mask),
2608            (!cast<Instruction>(Name#_.ZSuffix#"rmik") _.KRCWM:$mask,
2609                                                       _.RC:$src1, addr:$src2,
2610                                                       (X86cmpm_imm_commute timm:$cc))>;
2611
2612  def : Pat<(X86cmpmm (_.VT (_.BroadcastLdFrag addr:$src2)), (_.VT _.RC:$src1), timm:$cc,
2613                      (_.KVT immAllOnesV)),
2614            (!cast<Instruction>(Name#_.ZSuffix#"rmbi") _.RC:$src1, addr:$src2,
2615                                                       (X86cmpm_imm_commute timm:$cc))>;
2616
2617  def : Pat<(X86cmpmm (_.VT (_.BroadcastLdFrag addr:$src2)), (_.VT _.RC:$src1), timm:$cc,
2618                      _.KRCWM:$mask),
2619            (!cast<Instruction>(Name#_.ZSuffix#"rmbik") _.KRCWM:$mask,
2620                                                        _.RC:$src1, addr:$src2,
2621                                                        (X86cmpm_imm_commute  timm:$cc))>;
2622}
2623
2624multiclass avx512_vcmp_sae<X86FoldableSchedWrite sched, X86VectorVTInfo _> {
2625  // comparison code form (VCMP[EQ/LT/LE/...]
2626  let Uses = [MXCSR] in
2627  defm  rrib  : AVX512_maskable_custom_cmp<0xC2, MRMSrcReg, (outs _.KRC:$dst),
2628                     (ins _.RC:$src1, _.RC:$src2, u8imm:$cc),
2629                     (ins _.KRCWM:$mask, _.RC:$src1, _.RC:$src2, u8imm:$cc),
2630                     "vcmp"#_.Suffix,
2631                     "$cc, {sae}, $src2, $src1",
2632                     "$src1, $src2, {sae}, $cc",
2633                     [(set _.KRC:$dst, (X86cmpmmSAE (_.VT _.RC:$src1),
2634                                        (_.VT _.RC:$src2), timm:$cc, (_.KVT immAllOnesV)))],
2635                     [(set _.KRC:$dst, (X86cmpmmSAE (_.VT _.RC:$src1),
2636                                        (_.VT _.RC:$src2), timm:$cc, _.KRCWM:$mask))]>,
2637                     EVEX_B, Sched<[sched]>;
2638}
2639
2640multiclass avx512_vcmp<X86SchedWriteWidths sched, AVX512VLVectorVTInfo _,
2641                       Predicate Pred = HasAVX512> {
2642  let Predicates = [Pred] in {
2643    defm Z    : avx512_vcmp_common<sched.ZMM, _.info512, NAME>,
2644                avx512_vcmp_sae<sched.ZMM, _.info512>, EVEX_V512;
2645
2646  }
2647  let Predicates = [Pred,HasVLX] in {
2648   defm Z128 : avx512_vcmp_common<sched.XMM, _.info128, NAME>, EVEX_V128;
2649   defm Z256 : avx512_vcmp_common<sched.YMM, _.info256, NAME>, EVEX_V256;
2650  }
2651}
2652
2653defm VCMPPD : avx512_vcmp<SchedWriteFCmp, avx512vl_f64_info>,
2654                          AVX512PDIi8Base, EVEX_4V, EVEX_CD8<64, CD8VF>, VEX_W;
2655defm VCMPPS : avx512_vcmp<SchedWriteFCmp, avx512vl_f32_info>,
2656                          AVX512PSIi8Base, EVEX_4V, EVEX_CD8<32, CD8VF>;
2657defm VCMPPH : avx512_vcmp<SchedWriteFCmp, avx512vl_f16_info, HasFP16>,
2658                          AVX512PSIi8Base, EVEX_4V, EVEX_CD8<16, CD8VF>, TA;
2659
2660// Patterns to select fp compares with load as first operand.
2661let Predicates = [HasAVX512] in {
2662  def : Pat<(v1i1 (X86cmpms (loadf64 addr:$src2), FR64X:$src1, timm:$cc)),
2663            (VCMPSDZrm FR64X:$src1, addr:$src2, (X86cmpm_imm_commute timm:$cc))>;
2664
2665  def : Pat<(v1i1 (X86cmpms (loadf32 addr:$src2), FR32X:$src1, timm:$cc)),
2666            (VCMPSSZrm FR32X:$src1, addr:$src2, (X86cmpm_imm_commute timm:$cc))>;
2667}
2668
2669let Predicates = [HasFP16] in {
2670  def : Pat<(v1i1 (X86cmpms (loadf16 addr:$src2), FR16X:$src1, timm:$cc)),
2671            (VCMPSHZrm FR16X:$src1, addr:$src2, (X86cmpm_imm_commute timm:$cc))>;
2672}
2673
2674// ----------------------------------------------------------------
2675// FPClass
2676
2677def X86Vfpclasss_su : PatFrag<(ops node:$src1, node:$src2),
2678                              (X86Vfpclasss node:$src1, node:$src2), [{
2679  return N->hasOneUse();
2680}]>;
2681
2682def X86Vfpclass_su : PatFrag<(ops node:$src1, node:$src2),
2683                             (X86Vfpclass node:$src1, node:$src2), [{
2684  return N->hasOneUse();
2685}]>;
2686
2687//handle fpclass instruction  mask =  op(reg_scalar,imm)
2688//                                    op(mem_scalar,imm)
2689multiclass avx512_scalar_fpclass<bits<8> opc, string OpcodeStr,
2690                                 X86FoldableSchedWrite sched, X86VectorVTInfo _,
2691                                 Predicate prd> {
2692  let Predicates = [prd], ExeDomain = _.ExeDomain, Uses = [MXCSR] in {
2693      def rr : AVX512<opc, MRMSrcReg, (outs _.KRC:$dst),
2694                      (ins _.RC:$src1, i32u8imm:$src2),
2695                      OpcodeStr#_.Suffix#"\t{$src2, $src1, $dst|$dst, $src1, $src2}",
2696                      [(set _.KRC:$dst,(X86Vfpclasss (_.VT _.RC:$src1),
2697                              (i32 timm:$src2)))]>,
2698                      Sched<[sched]>;
2699      def rrk : AVX512<opc, MRMSrcReg, (outs _.KRC:$dst),
2700                      (ins _.KRCWM:$mask, _.RC:$src1, i32u8imm:$src2),
2701                      OpcodeStr#_.Suffix#
2702                      "\t{$src2, $src1, $dst {${mask}}|$dst {${mask}}, $src1, $src2}",
2703                      [(set _.KRC:$dst,(and _.KRCWM:$mask,
2704                                      (X86Vfpclasss_su (_.VT _.RC:$src1),
2705                                      (i32 timm:$src2))))]>,
2706                      EVEX_K, Sched<[sched]>;
2707    def rm : AVX512<opc, MRMSrcMem, (outs _.KRC:$dst),
2708                    (ins _.IntScalarMemOp:$src1, i32u8imm:$src2),
2709                    OpcodeStr#_.Suffix#
2710                              "\t{$src2, $src1, $dst|$dst, $src1, $src2}",
2711                    [(set _.KRC:$dst,
2712                          (X86Vfpclasss (_.ScalarIntMemFrags addr:$src1),
2713                                        (i32 timm:$src2)))]>,
2714                    Sched<[sched.Folded, sched.ReadAfterFold]>;
2715    def rmk : AVX512<opc, MRMSrcMem, (outs _.KRC:$dst),
2716                    (ins _.KRCWM:$mask, _.IntScalarMemOp:$src1, i32u8imm:$src2),
2717                    OpcodeStr#_.Suffix#
2718                    "\t{$src2, $src1, $dst {${mask}}|$dst {${mask}}, $src1, $src2}",
2719                    [(set _.KRC:$dst,(and _.KRCWM:$mask,
2720                        (X86Vfpclasss_su (_.ScalarIntMemFrags addr:$src1),
2721                            (i32 timm:$src2))))]>,
2722                    EVEX_K, Sched<[sched.Folded, sched.ReadAfterFold]>;
2723  }
2724}
2725
2726//handle fpclass instruction mask = fpclass(reg_vec, reg_vec, imm)
2727//                                  fpclass(reg_vec, mem_vec, imm)
2728//                                  fpclass(reg_vec, broadcast(eltVt), imm)
2729multiclass avx512_vector_fpclass<bits<8> opc, string OpcodeStr,
2730                                 X86FoldableSchedWrite sched, X86VectorVTInfo _,
2731                                 string mem>{
2732  let ExeDomain = _.ExeDomain, Uses = [MXCSR] in {
2733  def rr : AVX512<opc, MRMSrcReg, (outs _.KRC:$dst),
2734                      (ins _.RC:$src1, i32u8imm:$src2),
2735                      OpcodeStr#_.Suffix#"\t{$src2, $src1, $dst|$dst, $src1, $src2}",
2736                      [(set _.KRC:$dst,(X86Vfpclass (_.VT _.RC:$src1),
2737                                       (i32 timm:$src2)))]>,
2738                      Sched<[sched]>;
2739  def rrk : AVX512<opc, MRMSrcReg, (outs _.KRC:$dst),
2740                      (ins _.KRCWM:$mask, _.RC:$src1, i32u8imm:$src2),
2741                      OpcodeStr#_.Suffix#
2742                      "\t{$src2, $src1, $dst {${mask}}|$dst {${mask}}, $src1, $src2}",
2743                      [(set _.KRC:$dst,(and _.KRCWM:$mask,
2744                                       (X86Vfpclass_su (_.VT _.RC:$src1),
2745                                       (i32 timm:$src2))))]>,
2746                      EVEX_K, Sched<[sched]>;
2747  def rm : AVX512<opc, MRMSrcMem, (outs _.KRC:$dst),
2748                    (ins _.MemOp:$src1, i32u8imm:$src2),
2749                    OpcodeStr#_.Suffix#"{"#mem#"}"#
2750                    "\t{$src2, $src1, $dst|$dst, $src1, $src2}",
2751                    [(set _.KRC:$dst,(X86Vfpclass
2752                                     (_.VT (_.LdFrag addr:$src1)),
2753                                     (i32 timm:$src2)))]>,
2754                    Sched<[sched.Folded, sched.ReadAfterFold]>;
2755  def rmk : AVX512<opc, MRMSrcMem, (outs _.KRC:$dst),
2756                    (ins _.KRCWM:$mask, _.MemOp:$src1, i32u8imm:$src2),
2757                    OpcodeStr#_.Suffix#"{"#mem#"}"#
2758                    "\t{$src2, $src1, $dst {${mask}}|$dst {${mask}}, $src1, $src2}",
2759                    [(set _.KRC:$dst, (and _.KRCWM:$mask, (X86Vfpclass_su
2760                                  (_.VT (_.LdFrag addr:$src1)),
2761                                  (i32 timm:$src2))))]>,
2762                    EVEX_K, Sched<[sched.Folded, sched.ReadAfterFold]>;
2763  def rmb : AVX512<opc, MRMSrcMem, (outs _.KRC:$dst),
2764                    (ins _.ScalarMemOp:$src1, i32u8imm:$src2),
2765                    OpcodeStr#_.Suffix#"\t{$src2, ${src1}"#
2766                                      _.BroadcastStr#", $dst|$dst, ${src1}"
2767                                                  #_.BroadcastStr#", $src2}",
2768                    [(set _.KRC:$dst,(X86Vfpclass
2769                                     (_.VT (_.BroadcastLdFrag addr:$src1)),
2770                                     (i32 timm:$src2)))]>,
2771                    EVEX_B, Sched<[sched.Folded, sched.ReadAfterFold]>;
2772  def rmbk : AVX512<opc, MRMSrcMem, (outs _.KRC:$dst),
2773                    (ins _.KRCWM:$mask, _.ScalarMemOp:$src1, i32u8imm:$src2),
2774                    OpcodeStr#_.Suffix#"\t{$src2, ${src1}"#
2775                          _.BroadcastStr#", $dst {${mask}}|$dst {${mask}}, ${src1}"#
2776                                                   _.BroadcastStr#", $src2}",
2777                    [(set _.KRC:$dst,(and _.KRCWM:$mask, (X86Vfpclass_su
2778                                     (_.VT (_.BroadcastLdFrag addr:$src1)),
2779                                     (i32 timm:$src2))))]>,
2780                    EVEX_B, EVEX_K,  Sched<[sched.Folded, sched.ReadAfterFold]>;
2781  }
2782
2783  // Allow registers or broadcast with the x, y, z suffix we use to disambiguate
2784  // the memory form.
2785  def : InstAlias<OpcodeStr#_.Suffix#mem#
2786                  "\t{$src2, $src1, $dst|$dst, $src1, $src2}",
2787                  (!cast<Instruction>(NAME#"rr")
2788                   _.KRC:$dst, _.RC:$src1, i32u8imm:$src2), 0, "att">;
2789  def : InstAlias<OpcodeStr#_.Suffix#mem#
2790                  "\t{$src2, $src1, $dst {${mask}}|$dst {${mask}}, $src1, $src2}",
2791                  (!cast<Instruction>(NAME#"rrk")
2792                   _.KRC:$dst, _.KRCWM:$mask, _.RC:$src1, i32u8imm:$src2), 0, "att">;
2793  def : InstAlias<OpcodeStr#_.Suffix#mem#
2794                  "\t{$src2, ${src1}"#_.BroadcastStr#", $dst|$dst, ${src1}"#
2795                  _.BroadcastStr#", $src2}",
2796                  (!cast<Instruction>(NAME#"rmb")
2797                   _.KRC:$dst, _.ScalarMemOp:$src1, i32u8imm:$src2), 0, "att">;
2798  def : InstAlias<OpcodeStr#_.Suffix#mem#
2799                  "\t{$src2, ${src1}"#_.BroadcastStr#", $dst {${mask}}|"
2800                  "$dst {${mask}}, ${src1}"#_.BroadcastStr#", $src2}",
2801                  (!cast<Instruction>(NAME#"rmbk")
2802                   _.KRC:$dst, _.KRCWM:$mask, _.ScalarMemOp:$src1, i32u8imm:$src2), 0, "att">;
2803}
2804
2805multiclass avx512_vector_fpclass_all<string OpcodeStr, AVX512VLVectorVTInfo _,
2806                                     bits<8> opc, X86SchedWriteWidths sched,
2807                                     Predicate prd>{
2808  let Predicates = [prd] in {
2809    defm Z    : avx512_vector_fpclass<opc, OpcodeStr, sched.ZMM,
2810                                      _.info512, "z">, EVEX_V512;
2811  }
2812  let Predicates = [prd, HasVLX] in {
2813    defm Z128 : avx512_vector_fpclass<opc, OpcodeStr, sched.XMM,
2814                                      _.info128, "x">, EVEX_V128;
2815    defm Z256 : avx512_vector_fpclass<opc, OpcodeStr, sched.YMM,
2816                                      _.info256, "y">, EVEX_V256;
2817  }
2818}
2819
2820multiclass avx512_fp_fpclass_all<string OpcodeStr, bits<8> opcVec,
2821                                 bits<8> opcScalar, X86SchedWriteWidths sched> {
2822  defm PH : avx512_vector_fpclass_all<OpcodeStr,  avx512vl_f16_info, opcVec,
2823                                      sched, HasFP16>,
2824                                      EVEX_CD8<16, CD8VF>, AVX512PSIi8Base, TA;
2825  defm SHZ : avx512_scalar_fpclass<opcScalar, OpcodeStr,
2826                                   sched.Scl, f16x_info, HasFP16>,
2827                                   EVEX_CD8<16, CD8VT1>, AVX512PSIi8Base, TA;
2828  defm PS : avx512_vector_fpclass_all<OpcodeStr,  avx512vl_f32_info, opcVec,
2829                                      sched, HasDQI>,
2830                                      EVEX_CD8<32, CD8VF>, AVX512AIi8Base;
2831  defm PD : avx512_vector_fpclass_all<OpcodeStr,  avx512vl_f64_info, opcVec,
2832                                      sched, HasDQI>,
2833                                      EVEX_CD8<64, CD8VF>, AVX512AIi8Base, VEX_W;
2834  defm SSZ : avx512_scalar_fpclass<opcScalar, OpcodeStr,
2835                                   sched.Scl, f32x_info, HasDQI>, VEX_LIG,
2836                                   EVEX_CD8<32, CD8VT1>, AVX512AIi8Base;
2837  defm SDZ : avx512_scalar_fpclass<opcScalar, OpcodeStr,
2838                                   sched.Scl, f64x_info, HasDQI>, VEX_LIG,
2839                                   EVEX_CD8<64, CD8VT1>, AVX512AIi8Base, VEX_W;
2840}
2841
2842defm VFPCLASS : avx512_fp_fpclass_all<"vfpclass", 0x66, 0x67, SchedWriteFCmp>, EVEX;
2843
2844//-----------------------------------------------------------------
2845// Mask register copy, including
2846// - copy between mask registers
2847// - load/store mask registers
2848// - copy from GPR to mask register and vice versa
2849//
2850multiclass avx512_mask_mov<bits<8> opc_kk, bits<8> opc_km, bits<8> opc_mk,
2851                         string OpcodeStr, RegisterClass KRC,
2852                         ValueType vvt, X86MemOperand x86memop> {
2853  let isMoveReg = 1, hasSideEffects = 0, SchedRW = [WriteMove] in
2854  def kk : I<opc_kk, MRMSrcReg, (outs KRC:$dst), (ins KRC:$src),
2855             !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), []>,
2856             Sched<[WriteMove]>;
2857  def km : I<opc_km, MRMSrcMem, (outs KRC:$dst), (ins x86memop:$src),
2858             !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
2859             [(set KRC:$dst, (vvt (load addr:$src)))]>,
2860             Sched<[WriteLoad]>;
2861  def mk : I<opc_mk, MRMDestMem, (outs), (ins x86memop:$dst, KRC:$src),
2862             !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
2863             [(store KRC:$src, addr:$dst)]>,
2864             Sched<[WriteStore]>;
2865}
2866
2867multiclass avx512_mask_mov_gpr<bits<8> opc_kr, bits<8> opc_rk,
2868                             string OpcodeStr,
2869                             RegisterClass KRC, RegisterClass GRC> {
2870  let hasSideEffects = 0 in {
2871    def kr : I<opc_kr, MRMSrcReg, (outs KRC:$dst), (ins GRC:$src),
2872               !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), []>,
2873               Sched<[WriteMove]>;
2874    def rk : I<opc_rk, MRMSrcReg, (outs GRC:$dst), (ins KRC:$src),
2875               !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), []>,
2876               Sched<[WriteMove]>;
2877  }
2878}
2879
2880let Predicates = [HasDQI] in
2881  defm KMOVB : avx512_mask_mov<0x90, 0x90, 0x91, "kmovb", VK8, v8i1, i8mem>,
2882               avx512_mask_mov_gpr<0x92, 0x93, "kmovb", VK8, GR32>,
2883               VEX, PD;
2884
2885let Predicates = [HasAVX512] in
2886  defm KMOVW : avx512_mask_mov<0x90, 0x90, 0x91, "kmovw", VK16, v16i1, i16mem>,
2887               avx512_mask_mov_gpr<0x92, 0x93, "kmovw", VK16, GR32>,
2888               VEX, PS;
2889
2890let Predicates = [HasBWI] in {
2891  defm KMOVD : avx512_mask_mov<0x90, 0x90, 0x91, "kmovd", VK32, v32i1,i32mem>,
2892               VEX, PD, VEX_W;
2893  defm KMOVD : avx512_mask_mov_gpr<0x92, 0x93, "kmovd", VK32, GR32>,
2894               VEX, XD;
2895  defm KMOVQ : avx512_mask_mov<0x90, 0x90, 0x91, "kmovq", VK64, v64i1, i64mem>,
2896               VEX, PS, VEX_W;
2897  defm KMOVQ : avx512_mask_mov_gpr<0x92, 0x93, "kmovq", VK64, GR64>,
2898               VEX, XD, VEX_W;
2899}
2900
2901// GR from/to mask register
2902def : Pat<(v16i1 (bitconvert (i16 GR16:$src))),
2903          (COPY_TO_REGCLASS (i32 (INSERT_SUBREG (IMPLICIT_DEF), GR16:$src, sub_16bit)), VK16)>;
2904def : Pat<(i16 (bitconvert (v16i1 VK16:$src))),
2905          (EXTRACT_SUBREG (i32 (COPY_TO_REGCLASS VK16:$src, GR32)), sub_16bit)>;
2906def : Pat<(i8 (trunc (i16 (bitconvert (v16i1 VK16:$src))))),
2907          (EXTRACT_SUBREG (i32 (COPY_TO_REGCLASS VK16:$src, GR32)), sub_8bit)>;
2908
2909def : Pat<(v8i1 (bitconvert (i8 GR8:$src))),
2910          (COPY_TO_REGCLASS (i32 (INSERT_SUBREG (IMPLICIT_DEF), GR8:$src, sub_8bit)), VK8)>;
2911def : Pat<(i8 (bitconvert (v8i1 VK8:$src))),
2912          (EXTRACT_SUBREG (i32 (COPY_TO_REGCLASS VK8:$src, GR32)), sub_8bit)>;
2913
2914def : Pat<(i32 (zext (i16 (bitconvert (v16i1 VK16:$src))))),
2915          (KMOVWrk VK16:$src)>;
2916def : Pat<(i64 (zext (i16 (bitconvert (v16i1 VK16:$src))))),
2917          (SUBREG_TO_REG (i64 0), (KMOVWrk VK16:$src), sub_32bit)>;
2918def : Pat<(i32 (anyext (i16 (bitconvert (v16i1 VK16:$src))))),
2919          (COPY_TO_REGCLASS VK16:$src, GR32)>;
2920def : Pat<(i64 (anyext (i16 (bitconvert (v16i1 VK16:$src))))),
2921          (INSERT_SUBREG (IMPLICIT_DEF), (COPY_TO_REGCLASS VK16:$src, GR32), sub_32bit)>;
2922
2923def : Pat<(i32 (zext (i8 (bitconvert (v8i1 VK8:$src))))),
2924          (KMOVBrk VK8:$src)>, Requires<[HasDQI]>;
2925def : Pat<(i64 (zext (i8 (bitconvert (v8i1 VK8:$src))))),
2926          (SUBREG_TO_REG (i64 0), (KMOVBrk VK8:$src), sub_32bit)>, Requires<[HasDQI]>;
2927def : Pat<(i32 (anyext (i8 (bitconvert (v8i1 VK8:$src))))),
2928          (COPY_TO_REGCLASS VK8:$src, GR32)>;
2929def : Pat<(i64 (anyext (i8 (bitconvert (v8i1 VK8:$src))))),
2930          (INSERT_SUBREG (IMPLICIT_DEF), (COPY_TO_REGCLASS VK8:$src, GR32), sub_32bit)>;
2931
2932def : Pat<(v32i1 (bitconvert (i32 GR32:$src))),
2933          (COPY_TO_REGCLASS GR32:$src, VK32)>;
2934def : Pat<(i32 (bitconvert (v32i1 VK32:$src))),
2935          (COPY_TO_REGCLASS VK32:$src, GR32)>;
2936def : Pat<(v64i1 (bitconvert (i64 GR64:$src))),
2937          (COPY_TO_REGCLASS GR64:$src, VK64)>;
2938def : Pat<(i64 (bitconvert (v64i1 VK64:$src))),
2939          (COPY_TO_REGCLASS VK64:$src, GR64)>;
2940
2941// Load/store kreg
2942let Predicates = [HasDQI] in {
2943  def : Pat<(v1i1 (load addr:$src)),
2944            (COPY_TO_REGCLASS (KMOVBkm addr:$src), VK1)>;
2945  def : Pat<(v2i1 (load addr:$src)),
2946            (COPY_TO_REGCLASS (KMOVBkm addr:$src), VK2)>;
2947  def : Pat<(v4i1 (load addr:$src)),
2948            (COPY_TO_REGCLASS (KMOVBkm addr:$src), VK4)>;
2949}
2950
2951let Predicates = [HasAVX512] in {
2952  def : Pat<(v8i1 (bitconvert (i8 (load addr:$src)))),
2953            (COPY_TO_REGCLASS (MOVZX32rm8 addr:$src), VK8)>;
2954  def : Pat<(v16i1 (bitconvert (loadi16 addr:$src))),
2955            (KMOVWkm addr:$src)>;
2956}
2957
2958def X86kextract : SDNode<"ISD::EXTRACT_VECTOR_ELT",
2959                         SDTypeProfile<1, 2, [SDTCisVT<0, i8>,
2960                                              SDTCVecEltisVT<1, i1>,
2961                                              SDTCisPtrTy<2>]>>;
2962
2963let Predicates = [HasAVX512] in {
2964  multiclass operation_gpr_mask_copy_lowering<RegisterClass maskRC, ValueType maskVT> {
2965    def : Pat<(maskVT (scalar_to_vector GR32:$src)),
2966              (COPY_TO_REGCLASS GR32:$src, maskRC)>;
2967
2968    def : Pat<(maskVT (scalar_to_vector GR8:$src)),
2969              (COPY_TO_REGCLASS (INSERT_SUBREG (i32 (IMPLICIT_DEF)), GR8:$src, sub_8bit), maskRC)>;
2970
2971    def : Pat<(i8 (X86kextract maskRC:$src, (iPTR 0))),
2972              (EXTRACT_SUBREG (i32 (COPY_TO_REGCLASS maskRC:$src, GR32)), sub_8bit)>;
2973
2974    def : Pat<(i32 (anyext (i8 (X86kextract maskRC:$src, (iPTR 0))))),
2975              (i32 (COPY_TO_REGCLASS maskRC:$src, GR32))>;
2976  }
2977
2978  defm : operation_gpr_mask_copy_lowering<VK1,  v1i1>;
2979  defm : operation_gpr_mask_copy_lowering<VK2,  v2i1>;
2980  defm : operation_gpr_mask_copy_lowering<VK4,  v4i1>;
2981  defm : operation_gpr_mask_copy_lowering<VK8,  v8i1>;
2982  defm : operation_gpr_mask_copy_lowering<VK16,  v16i1>;
2983  defm : operation_gpr_mask_copy_lowering<VK32,  v32i1>;
2984  defm : operation_gpr_mask_copy_lowering<VK64,  v64i1>;
2985
2986  def : Pat<(insert_subvector (v16i1 immAllZerosV),
2987                              (v1i1 (scalar_to_vector GR8:$src)), (iPTR 0)),
2988            (KMOVWkr (AND32ri8
2989                      (INSERT_SUBREG (i32 (IMPLICIT_DEF)), GR8:$src, sub_8bit),
2990                      (i32 1)))>;
2991}
2992
2993// Mask unary operation
2994// - KNOT
2995multiclass avx512_mask_unop<bits<8> opc, string OpcodeStr,
2996                            RegisterClass KRC, SDPatternOperator OpNode,
2997                            X86FoldableSchedWrite sched, Predicate prd> {
2998  let Predicates = [prd] in
2999    def rr : I<opc, MRMSrcReg, (outs KRC:$dst), (ins KRC:$src),
3000               !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
3001               [(set KRC:$dst, (OpNode KRC:$src))]>,
3002               Sched<[sched]>;
3003}
3004
3005multiclass avx512_mask_unop_all<bits<8> opc, string OpcodeStr,
3006                                SDPatternOperator OpNode,
3007                                X86FoldableSchedWrite sched> {
3008  defm B : avx512_mask_unop<opc, !strconcat(OpcodeStr, "b"), VK8, OpNode,
3009                            sched, HasDQI>, VEX, PD;
3010  defm W : avx512_mask_unop<opc, !strconcat(OpcodeStr, "w"), VK16, OpNode,
3011                            sched, HasAVX512>, VEX, PS;
3012  defm D : avx512_mask_unop<opc, !strconcat(OpcodeStr, "d"), VK32, OpNode,
3013                            sched, HasBWI>, VEX, PD, VEX_W;
3014  defm Q : avx512_mask_unop<opc, !strconcat(OpcodeStr, "q"), VK64, OpNode,
3015                            sched, HasBWI>, VEX, PS, VEX_W;
3016}
3017
3018// TODO - do we need a X86SchedWriteWidths::KMASK type?
3019defm KNOT : avx512_mask_unop_all<0x44, "knot", vnot, SchedWriteVecLogic.XMM>;
3020
3021// KNL does not support KMOVB, 8-bit mask is promoted to 16-bit
3022let Predicates = [HasAVX512, NoDQI] in
3023def : Pat<(vnot VK8:$src),
3024          (COPY_TO_REGCLASS (KNOTWrr (COPY_TO_REGCLASS VK8:$src, VK16)), VK8)>;
3025
3026def : Pat<(vnot VK4:$src),
3027          (COPY_TO_REGCLASS (KNOTWrr (COPY_TO_REGCLASS VK4:$src, VK16)), VK4)>;
3028def : Pat<(vnot VK2:$src),
3029          (COPY_TO_REGCLASS (KNOTWrr (COPY_TO_REGCLASS VK2:$src, VK16)), VK2)>;
3030def : Pat<(vnot VK1:$src),
3031          (COPY_TO_REGCLASS (KNOTWrr (COPY_TO_REGCLASS VK1:$src, VK16)), VK2)>;
3032
3033// Mask binary operation
3034// - KAND, KANDN, KOR, KXNOR, KXOR
3035multiclass avx512_mask_binop<bits<8> opc, string OpcodeStr,
3036                           RegisterClass KRC, SDPatternOperator OpNode,
3037                           X86FoldableSchedWrite sched, Predicate prd,
3038                           bit IsCommutable> {
3039  let Predicates = [prd], isCommutable = IsCommutable in
3040    def rr : I<opc, MRMSrcReg, (outs KRC:$dst), (ins KRC:$src1, KRC:$src2),
3041               !strconcat(OpcodeStr,
3042                          "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
3043               [(set KRC:$dst, (OpNode KRC:$src1, KRC:$src2))]>,
3044               Sched<[sched]>;
3045}
3046
3047multiclass avx512_mask_binop_all<bits<8> opc, string OpcodeStr,
3048                                 SDPatternOperator OpNode,
3049                                 X86FoldableSchedWrite sched, bit IsCommutable,
3050                                 Predicate prdW = HasAVX512> {
3051  defm B : avx512_mask_binop<opc, !strconcat(OpcodeStr, "b"), VK8, OpNode,
3052                             sched, HasDQI, IsCommutable>, VEX_4V, VEX_L, PD;
3053  defm W : avx512_mask_binop<opc, !strconcat(OpcodeStr, "w"), VK16, OpNode,
3054                             sched, prdW, IsCommutable>, VEX_4V, VEX_L, PS;
3055  defm D : avx512_mask_binop<opc, !strconcat(OpcodeStr, "d"), VK32, OpNode,
3056                             sched, HasBWI, IsCommutable>, VEX_4V, VEX_L, VEX_W, PD;
3057  defm Q : avx512_mask_binop<opc, !strconcat(OpcodeStr, "q"), VK64, OpNode,
3058                             sched, HasBWI, IsCommutable>, VEX_4V, VEX_L, VEX_W, PS;
3059}
3060
3061// These nodes use 'vnot' instead of 'not' to support vectors.
3062def vandn : PatFrag<(ops node:$i0, node:$i1), (and (vnot node:$i0), node:$i1)>;
3063def vxnor : PatFrag<(ops node:$i0, node:$i1), (vnot (xor node:$i0, node:$i1))>;
3064
3065// TODO - do we need a X86SchedWriteWidths::KMASK type?
3066defm KAND  : avx512_mask_binop_all<0x41, "kand",  and,     SchedWriteVecLogic.XMM, 1>;
3067defm KOR   : avx512_mask_binop_all<0x45, "kor",   or,      SchedWriteVecLogic.XMM, 1>;
3068defm KXNOR : avx512_mask_binop_all<0x46, "kxnor", vxnor,   SchedWriteVecLogic.XMM, 1>;
3069defm KXOR  : avx512_mask_binop_all<0x47, "kxor",  xor,     SchedWriteVecLogic.XMM, 1>;
3070defm KANDN : avx512_mask_binop_all<0x42, "kandn", vandn,   SchedWriteVecLogic.XMM, 0>;
3071defm KADD  : avx512_mask_binop_all<0x4A, "kadd",  X86kadd, SchedWriteVecLogic.XMM, 1, HasDQI>;
3072
3073multiclass avx512_binop_pat<SDPatternOperator VOpNode,
3074                            Instruction Inst> {
3075  // With AVX512F, 8-bit mask is promoted to 16-bit mask,
3076  // for the DQI set, this type is legal and KxxxB instruction is used
3077  let Predicates = [NoDQI] in
3078  def : Pat<(VOpNode VK8:$src1, VK8:$src2),
3079            (COPY_TO_REGCLASS
3080              (Inst (COPY_TO_REGCLASS VK8:$src1, VK16),
3081                    (COPY_TO_REGCLASS VK8:$src2, VK16)), VK8)>;
3082
3083  // All types smaller than 8 bits require conversion anyway
3084  def : Pat<(VOpNode VK1:$src1, VK1:$src2),
3085        (COPY_TO_REGCLASS (Inst
3086                           (COPY_TO_REGCLASS VK1:$src1, VK16),
3087                           (COPY_TO_REGCLASS VK1:$src2, VK16)), VK1)>;
3088  def : Pat<(VOpNode VK2:$src1, VK2:$src2),
3089        (COPY_TO_REGCLASS (Inst
3090                           (COPY_TO_REGCLASS VK2:$src1, VK16),
3091                           (COPY_TO_REGCLASS VK2:$src2, VK16)), VK2)>;
3092  def : Pat<(VOpNode VK4:$src1, VK4:$src2),
3093        (COPY_TO_REGCLASS (Inst
3094                           (COPY_TO_REGCLASS VK4:$src1, VK16),
3095                           (COPY_TO_REGCLASS VK4:$src2, VK16)), VK4)>;
3096}
3097
3098defm : avx512_binop_pat<and,   KANDWrr>;
3099defm : avx512_binop_pat<vandn, KANDNWrr>;
3100defm : avx512_binop_pat<or,    KORWrr>;
3101defm : avx512_binop_pat<vxnor, KXNORWrr>;
3102defm : avx512_binop_pat<xor,   KXORWrr>;
3103
3104// Mask unpacking
3105multiclass avx512_mask_unpck<string Suffix, X86KVectorVTInfo Dst,
3106                             X86KVectorVTInfo Src, X86FoldableSchedWrite sched,
3107                             Predicate prd> {
3108  let Predicates = [prd] in {
3109    let hasSideEffects = 0 in
3110    def rr : I<0x4b, MRMSrcReg, (outs Dst.KRC:$dst),
3111               (ins Src.KRC:$src1, Src.KRC:$src2),
3112               "kunpck"#Suffix#"\t{$src2, $src1, $dst|$dst, $src1, $src2}", []>,
3113               VEX_4V, VEX_L, Sched<[sched]>;
3114
3115    def : Pat<(Dst.KVT (concat_vectors Src.KRC:$src1, Src.KRC:$src2)),
3116              (!cast<Instruction>(NAME#rr) Src.KRC:$src2, Src.KRC:$src1)>;
3117  }
3118}
3119
3120defm KUNPCKBW : avx512_mask_unpck<"bw", v16i1_info, v8i1_info,  WriteShuffle, HasAVX512>, PD;
3121defm KUNPCKWD : avx512_mask_unpck<"wd", v32i1_info, v16i1_info, WriteShuffle, HasBWI>, PS;
3122defm KUNPCKDQ : avx512_mask_unpck<"dq", v64i1_info, v32i1_info, WriteShuffle, HasBWI>, PS, VEX_W;
3123
3124// Mask bit testing
3125multiclass avx512_mask_testop<bits<8> opc, string OpcodeStr, RegisterClass KRC,
3126                              SDNode OpNode, X86FoldableSchedWrite sched,
3127                              Predicate prd> {
3128  let Predicates = [prd], Defs = [EFLAGS] in
3129    def rr : I<opc, MRMSrcReg, (outs), (ins KRC:$src1, KRC:$src2),
3130               !strconcat(OpcodeStr, "\t{$src2, $src1|$src1, $src2}"),
3131               [(set EFLAGS, (OpNode KRC:$src1, KRC:$src2))]>,
3132               Sched<[sched]>;
3133}
3134
3135multiclass avx512_mask_testop_w<bits<8> opc, string OpcodeStr, SDNode OpNode,
3136                                X86FoldableSchedWrite sched,
3137                                Predicate prdW = HasAVX512> {
3138  defm B : avx512_mask_testop<opc, OpcodeStr#"b", VK8, OpNode, sched, HasDQI>,
3139                                                                VEX, PD;
3140  defm W : avx512_mask_testop<opc, OpcodeStr#"w", VK16, OpNode, sched, prdW>,
3141                                                                VEX, PS;
3142  defm Q : avx512_mask_testop<opc, OpcodeStr#"q", VK64, OpNode, sched, HasBWI>,
3143                                                                VEX, PS, VEX_W;
3144  defm D : avx512_mask_testop<opc, OpcodeStr#"d", VK32, OpNode, sched, HasBWI>,
3145                                                                VEX, PD, VEX_W;
3146}
3147
3148// TODO - do we need a X86SchedWriteWidths::KMASK type?
3149defm KORTEST : avx512_mask_testop_w<0x98, "kortest", X86kortest, SchedWriteVecLogic.XMM>;
3150defm KTEST   : avx512_mask_testop_w<0x99, "ktest", X86ktest, SchedWriteVecLogic.XMM, HasDQI>;
3151
3152// Mask shift
3153multiclass avx512_mask_shiftop<bits<8> opc, string OpcodeStr, RegisterClass KRC,
3154                               SDNode OpNode, X86FoldableSchedWrite sched> {
3155  let Predicates = [HasAVX512] in
3156    def ri : Ii8<opc, MRMSrcReg, (outs KRC:$dst), (ins KRC:$src, u8imm:$imm),
3157                 !strconcat(OpcodeStr,
3158                            "\t{$imm, $src, $dst|$dst, $src, $imm}"),
3159                            [(set KRC:$dst, (OpNode KRC:$src, (i8 timm:$imm)))]>,
3160                 Sched<[sched]>;
3161}
3162
3163multiclass avx512_mask_shiftop_w<bits<8> opc1, bits<8> opc2, string OpcodeStr,
3164                                 SDNode OpNode, X86FoldableSchedWrite sched> {
3165  defm W : avx512_mask_shiftop<opc1, !strconcat(OpcodeStr, "w"), VK16, OpNode,
3166                               sched>, VEX, TAPD, VEX_W;
3167  let Predicates = [HasDQI] in
3168  defm B : avx512_mask_shiftop<opc1, !strconcat(OpcodeStr, "b"), VK8, OpNode,
3169                               sched>, VEX, TAPD;
3170  let Predicates = [HasBWI] in {
3171  defm Q : avx512_mask_shiftop<opc2, !strconcat(OpcodeStr, "q"), VK64, OpNode,
3172                               sched>, VEX, TAPD, VEX_W;
3173  defm D : avx512_mask_shiftop<opc2, !strconcat(OpcodeStr, "d"), VK32, OpNode,
3174                               sched>, VEX, TAPD;
3175  }
3176}
3177
3178defm KSHIFTL : avx512_mask_shiftop_w<0x32, 0x33, "kshiftl", X86kshiftl, WriteShuffle>;
3179defm KSHIFTR : avx512_mask_shiftop_w<0x30, 0x31, "kshiftr", X86kshiftr, WriteShuffle>;
3180
3181// Patterns for comparing 128/256-bit integer vectors using 512-bit instruction.
3182multiclass axv512_icmp_packed_cc_no_vlx_lowering<PatFrag Frag, PatFrag Frag_su,
3183                                                 string InstStr,
3184                                                 X86VectorVTInfo Narrow,
3185                                                 X86VectorVTInfo Wide> {
3186def : Pat<(Narrow.KVT (Frag:$cc (Narrow.VT Narrow.RC:$src1),
3187                                (Narrow.VT Narrow.RC:$src2), cond)),
3188          (COPY_TO_REGCLASS
3189           (!cast<Instruction>(InstStr#"Zrri")
3190            (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src1, Narrow.SubRegIdx)),
3191            (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src2, Narrow.SubRegIdx)),
3192            (X86pcmpm_imm $cc)), Narrow.KRC)>;
3193
3194def : Pat<(Narrow.KVT (and Narrow.KRC:$mask,
3195                           (Narrow.KVT (Frag_su:$cc (Narrow.VT Narrow.RC:$src1),
3196                                                    (Narrow.VT Narrow.RC:$src2),
3197                                                    cond)))),
3198          (COPY_TO_REGCLASS (!cast<Instruction>(InstStr#"Zrrik")
3199           (COPY_TO_REGCLASS Narrow.KRC:$mask, Wide.KRC),
3200           (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src1, Narrow.SubRegIdx)),
3201           (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src2, Narrow.SubRegIdx)),
3202           (X86pcmpm_imm $cc)), Narrow.KRC)>;
3203}
3204
3205multiclass axv512_icmp_packed_cc_rmb_no_vlx_lowering<PatFrag Frag, PatFrag Frag_su,
3206                                                     string InstStr,
3207                                                     X86VectorVTInfo Narrow,
3208                                                     X86VectorVTInfo Wide> {
3209// Broadcast load.
3210def : Pat<(Narrow.KVT (Frag:$cc (Narrow.VT Narrow.RC:$src1),
3211                                (Narrow.BroadcastLdFrag addr:$src2), cond)),
3212          (COPY_TO_REGCLASS
3213           (!cast<Instruction>(InstStr#"Zrmib")
3214            (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src1, Narrow.SubRegIdx)),
3215            addr:$src2, (X86pcmpm_imm $cc)), Narrow.KRC)>;
3216
3217def : Pat<(Narrow.KVT (and Narrow.KRC:$mask,
3218                           (Narrow.KVT
3219                            (Frag_su:$cc (Narrow.VT Narrow.RC:$src1),
3220                                         (Narrow.BroadcastLdFrag addr:$src2),
3221                                         cond)))),
3222          (COPY_TO_REGCLASS (!cast<Instruction>(InstStr#"Zrmibk")
3223           (COPY_TO_REGCLASS Narrow.KRC:$mask, Wide.KRC),
3224           (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src1, Narrow.SubRegIdx)),
3225           addr:$src2, (X86pcmpm_imm $cc)), Narrow.KRC)>;
3226
3227// Commuted with broadcast load.
3228def : Pat<(Narrow.KVT (Frag:$cc (Narrow.BroadcastLdFrag addr:$src2),
3229                                (Narrow.VT Narrow.RC:$src1),
3230                                cond)),
3231          (COPY_TO_REGCLASS
3232           (!cast<Instruction>(InstStr#"Zrmib")
3233            (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src1, Narrow.SubRegIdx)),
3234            addr:$src2, (X86pcmpm_imm_commute $cc)), Narrow.KRC)>;
3235
3236def : Pat<(Narrow.KVT (and Narrow.KRC:$mask,
3237                           (Narrow.KVT
3238                            (Frag_su:$cc (Narrow.BroadcastLdFrag addr:$src2),
3239                                         (Narrow.VT Narrow.RC:$src1),
3240                                         cond)))),
3241          (COPY_TO_REGCLASS (!cast<Instruction>(InstStr#"Zrmibk")
3242           (COPY_TO_REGCLASS Narrow.KRC:$mask, Wide.KRC),
3243           (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src1, Narrow.SubRegIdx)),
3244           addr:$src2, (X86pcmpm_imm_commute $cc)), Narrow.KRC)>;
3245}
3246
3247// Same as above, but for fp types which don't use PatFrags.
3248multiclass axv512_cmp_packed_cc_no_vlx_lowering<string InstStr,
3249                                                X86VectorVTInfo Narrow,
3250                                                X86VectorVTInfo Wide> {
3251def : Pat<(Narrow.KVT (X86cmpm (Narrow.VT Narrow.RC:$src1),
3252                               (Narrow.VT Narrow.RC:$src2), timm:$cc)),
3253          (COPY_TO_REGCLASS
3254           (!cast<Instruction>(InstStr#"Zrri")
3255            (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src1, Narrow.SubRegIdx)),
3256            (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src2, Narrow.SubRegIdx)),
3257            timm:$cc), Narrow.KRC)>;
3258
3259def : Pat<(Narrow.KVT (and Narrow.KRC:$mask,
3260                           (X86cmpm_su (Narrow.VT Narrow.RC:$src1),
3261                                       (Narrow.VT Narrow.RC:$src2), timm:$cc))),
3262          (COPY_TO_REGCLASS (!cast<Instruction>(InstStr#"Zrrik")
3263           (COPY_TO_REGCLASS Narrow.KRC:$mask, Wide.KRC),
3264           (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src1, Narrow.SubRegIdx)),
3265           (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src2, Narrow.SubRegIdx)),
3266           timm:$cc), Narrow.KRC)>;
3267
3268// Broadcast load.
3269def : Pat<(Narrow.KVT (X86cmpm (Narrow.VT Narrow.RC:$src1),
3270                               (Narrow.VT (Narrow.BroadcastLdFrag addr:$src2)), timm:$cc)),
3271          (COPY_TO_REGCLASS
3272           (!cast<Instruction>(InstStr#"Zrmbi")
3273            (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src1, Narrow.SubRegIdx)),
3274            addr:$src2, timm:$cc), Narrow.KRC)>;
3275
3276def : Pat<(Narrow.KVT (and Narrow.KRC:$mask,
3277                           (X86cmpm_su (Narrow.VT Narrow.RC:$src1),
3278                                       (Narrow.VT (Narrow.BroadcastLdFrag addr:$src2)), timm:$cc))),
3279          (COPY_TO_REGCLASS (!cast<Instruction>(InstStr#"Zrmbik")
3280           (COPY_TO_REGCLASS Narrow.KRC:$mask, Wide.KRC),
3281           (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src1, Narrow.SubRegIdx)),
3282           addr:$src2, timm:$cc), Narrow.KRC)>;
3283
3284// Commuted with broadcast load.
3285def : Pat<(Narrow.KVT (X86cmpm (Narrow.VT (Narrow.BroadcastLdFrag addr:$src2)),
3286                               (Narrow.VT Narrow.RC:$src1), timm:$cc)),
3287          (COPY_TO_REGCLASS
3288           (!cast<Instruction>(InstStr#"Zrmbi")
3289            (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src1, Narrow.SubRegIdx)),
3290            addr:$src2, (X86cmpm_imm_commute timm:$cc)), Narrow.KRC)>;
3291
3292def : Pat<(Narrow.KVT (and Narrow.KRC:$mask,
3293                           (X86cmpm_su (Narrow.VT (Narrow.BroadcastLdFrag addr:$src2)),
3294                                       (Narrow.VT Narrow.RC:$src1), timm:$cc))),
3295          (COPY_TO_REGCLASS (!cast<Instruction>(InstStr#"Zrmbik")
3296           (COPY_TO_REGCLASS Narrow.KRC:$mask, Wide.KRC),
3297           (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src1, Narrow.SubRegIdx)),
3298           addr:$src2, (X86cmpm_imm_commute timm:$cc)), Narrow.KRC)>;
3299}
3300
3301let Predicates = [HasAVX512, NoVLX] in {
3302  defm : axv512_icmp_packed_cc_no_vlx_lowering<X86pcmpm, X86pcmpm_su, "VPCMPD", v8i32x_info, v16i32_info>;
3303  defm : axv512_icmp_packed_cc_no_vlx_lowering<X86pcmpum, X86pcmpum_su, "VPCMPUD", v8i32x_info, v16i32_info>;
3304
3305  defm : axv512_icmp_packed_cc_no_vlx_lowering<X86pcmpm, X86pcmpm_su, "VPCMPD", v4i32x_info, v16i32_info>;
3306  defm : axv512_icmp_packed_cc_no_vlx_lowering<X86pcmpum, X86pcmpum_su, "VPCMPUD", v4i32x_info, v16i32_info>;
3307
3308  defm : axv512_icmp_packed_cc_no_vlx_lowering<X86pcmpm, X86pcmpm_su, "VPCMPQ", v4i64x_info, v8i64_info>;
3309  defm : axv512_icmp_packed_cc_no_vlx_lowering<X86pcmpum, X86pcmpum_su, "VPCMPUQ", v4i64x_info, v8i64_info>;
3310
3311  defm : axv512_icmp_packed_cc_no_vlx_lowering<X86pcmpm, X86pcmpm_su, "VPCMPQ", v2i64x_info, v8i64_info>;
3312  defm : axv512_icmp_packed_cc_no_vlx_lowering<X86pcmpum, X86pcmpum_su, "VPCMPUQ", v2i64x_info, v8i64_info>;
3313
3314  defm : axv512_icmp_packed_cc_rmb_no_vlx_lowering<X86pcmpm, X86pcmpm_su, "VPCMPD", v8i32x_info, v16i32_info>;
3315  defm : axv512_icmp_packed_cc_rmb_no_vlx_lowering<X86pcmpum, X86pcmpum_su, "VPCMPUD", v8i32x_info, v16i32_info>;
3316
3317  defm : axv512_icmp_packed_cc_rmb_no_vlx_lowering<X86pcmpm, X86pcmpm_su, "VPCMPD", v4i32x_info, v16i32_info>;
3318  defm : axv512_icmp_packed_cc_rmb_no_vlx_lowering<X86pcmpum, X86pcmpum_su, "VPCMPUD", v4i32x_info, v16i32_info>;
3319
3320  defm : axv512_icmp_packed_cc_rmb_no_vlx_lowering<X86pcmpm, X86pcmpm_su, "VPCMPQ", v4i64x_info, v8i64_info>;
3321  defm : axv512_icmp_packed_cc_rmb_no_vlx_lowering<X86pcmpum, X86pcmpum_su, "VPCMPUQ", v4i64x_info, v8i64_info>;
3322
3323  defm : axv512_icmp_packed_cc_rmb_no_vlx_lowering<X86pcmpm, X86pcmpm_su, "VPCMPQ", v2i64x_info, v8i64_info>;
3324  defm : axv512_icmp_packed_cc_rmb_no_vlx_lowering<X86pcmpum, X86pcmpum_su, "VPCMPUQ", v2i64x_info, v8i64_info>;
3325
3326  defm : axv512_cmp_packed_cc_no_vlx_lowering<"VCMPPS", v8f32x_info, v16f32_info>;
3327  defm : axv512_cmp_packed_cc_no_vlx_lowering<"VCMPPS", v4f32x_info, v16f32_info>;
3328  defm : axv512_cmp_packed_cc_no_vlx_lowering<"VCMPPD", v4f64x_info, v8f64_info>;
3329  defm : axv512_cmp_packed_cc_no_vlx_lowering<"VCMPPD", v2f64x_info, v8f64_info>;
3330}
3331
3332let Predicates = [HasBWI, NoVLX] in {
3333  defm : axv512_icmp_packed_cc_no_vlx_lowering<X86pcmpm, X86pcmpm_su, "VPCMPB", v32i8x_info, v64i8_info>;
3334  defm : axv512_icmp_packed_cc_no_vlx_lowering<X86pcmpum, X86pcmpum_su, "VPCMPUB", v32i8x_info, v64i8_info>;
3335
3336  defm : axv512_icmp_packed_cc_no_vlx_lowering<X86pcmpm, X86pcmpm_su, "VPCMPB", v16i8x_info, v64i8_info>;
3337  defm : axv512_icmp_packed_cc_no_vlx_lowering<X86pcmpum, X86pcmpum_su, "VPCMPUB", v16i8x_info, v64i8_info>;
3338
3339  defm : axv512_icmp_packed_cc_no_vlx_lowering<X86pcmpm, X86pcmpm_su, "VPCMPW", v16i16x_info, v32i16_info>;
3340  defm : axv512_icmp_packed_cc_no_vlx_lowering<X86pcmpum, X86pcmpum_su, "VPCMPUW", v16i16x_info, v32i16_info>;
3341
3342  defm : axv512_icmp_packed_cc_no_vlx_lowering<X86pcmpm, X86pcmpm_su, "VPCMPW", v8i16x_info, v32i16_info>;
3343  defm : axv512_icmp_packed_cc_no_vlx_lowering<X86pcmpum, X86pcmpum_su, "VPCMPUW", v8i16x_info, v32i16_info>;
3344}
3345
3346// Mask setting all 0s or 1s
3347multiclass avx512_mask_setop<RegisterClass KRC, ValueType VT, SDPatternOperator Val> {
3348  let Predicates = [HasAVX512] in
3349    let isReMaterializable = 1, isAsCheapAsAMove = 1, isPseudo = 1,
3350        SchedRW = [WriteZero] in
3351      def NAME# : I<0, Pseudo, (outs KRC:$dst), (ins), "",
3352                     [(set KRC:$dst, (VT Val))]>;
3353}
3354
3355multiclass avx512_mask_setop_w<SDPatternOperator Val> {
3356  defm W : avx512_mask_setop<VK16, v16i1, Val>;
3357  defm D : avx512_mask_setop<VK32,  v32i1, Val>;
3358  defm Q : avx512_mask_setop<VK64, v64i1, Val>;
3359}
3360
3361defm KSET0 : avx512_mask_setop_w<immAllZerosV>;
3362defm KSET1 : avx512_mask_setop_w<immAllOnesV>;
3363
3364// With AVX-512 only, 8-bit mask is promoted to 16-bit mask.
3365let Predicates = [HasAVX512] in {
3366  def : Pat<(v8i1 immAllZerosV), (COPY_TO_REGCLASS (KSET0W), VK8)>;
3367  def : Pat<(v4i1 immAllZerosV), (COPY_TO_REGCLASS (KSET0W), VK4)>;
3368  def : Pat<(v2i1 immAllZerosV), (COPY_TO_REGCLASS (KSET0W), VK2)>;
3369  def : Pat<(v1i1 immAllZerosV), (COPY_TO_REGCLASS (KSET0W), VK1)>;
3370  def : Pat<(v8i1 immAllOnesV),  (COPY_TO_REGCLASS (KSET1W), VK8)>;
3371  def : Pat<(v4i1 immAllOnesV),  (COPY_TO_REGCLASS (KSET1W), VK4)>;
3372  def : Pat<(v2i1 immAllOnesV),  (COPY_TO_REGCLASS (KSET1W), VK2)>;
3373  def : Pat<(v1i1 immAllOnesV),  (COPY_TO_REGCLASS (KSET1W), VK1)>;
3374}
3375
3376// Patterns for kmask insert_subvector/extract_subvector to/from index=0
3377multiclass operation_subvector_mask_lowering<RegisterClass subRC, ValueType subVT,
3378                                             RegisterClass RC, ValueType VT> {
3379  def : Pat<(subVT (extract_subvector (VT RC:$src), (iPTR 0))),
3380            (subVT (COPY_TO_REGCLASS RC:$src, subRC))>;
3381
3382  def : Pat<(VT (insert_subvector undef, subRC:$src, (iPTR 0))),
3383            (VT (COPY_TO_REGCLASS subRC:$src, RC))>;
3384}
3385defm : operation_subvector_mask_lowering<VK1,  v1i1,  VK2,  v2i1>;
3386defm : operation_subvector_mask_lowering<VK1,  v1i1,  VK4,  v4i1>;
3387defm : operation_subvector_mask_lowering<VK1,  v1i1,  VK8,  v8i1>;
3388defm : operation_subvector_mask_lowering<VK1,  v1i1,  VK16, v16i1>;
3389defm : operation_subvector_mask_lowering<VK1,  v1i1,  VK32, v32i1>;
3390defm : operation_subvector_mask_lowering<VK1,  v1i1,  VK64, v64i1>;
3391
3392defm : operation_subvector_mask_lowering<VK2,  v2i1,  VK4,  v4i1>;
3393defm : operation_subvector_mask_lowering<VK2,  v2i1,  VK8,  v8i1>;
3394defm : operation_subvector_mask_lowering<VK2,  v2i1,  VK16, v16i1>;
3395defm : operation_subvector_mask_lowering<VK2,  v2i1,  VK32, v32i1>;
3396defm : operation_subvector_mask_lowering<VK2,  v2i1,  VK64, v64i1>;
3397
3398defm : operation_subvector_mask_lowering<VK4,  v4i1,  VK8,  v8i1>;
3399defm : operation_subvector_mask_lowering<VK4,  v4i1,  VK16, v16i1>;
3400defm : operation_subvector_mask_lowering<VK4,  v4i1,  VK32, v32i1>;
3401defm : operation_subvector_mask_lowering<VK4,  v4i1,  VK64, v64i1>;
3402
3403defm : operation_subvector_mask_lowering<VK8,  v8i1,  VK16, v16i1>;
3404defm : operation_subvector_mask_lowering<VK8,  v8i1,  VK32, v32i1>;
3405defm : operation_subvector_mask_lowering<VK8,  v8i1,  VK64, v64i1>;
3406
3407defm : operation_subvector_mask_lowering<VK16, v16i1, VK32, v32i1>;
3408defm : operation_subvector_mask_lowering<VK16, v16i1, VK64, v64i1>;
3409
3410defm : operation_subvector_mask_lowering<VK32, v32i1, VK64, v64i1>;
3411
3412//===----------------------------------------------------------------------===//
3413// AVX-512 - Aligned and unaligned load and store
3414//
3415
3416multiclass avx512_load<bits<8> opc, string OpcodeStr, string Name,
3417                       X86VectorVTInfo _, PatFrag ld_frag, PatFrag mload,
3418                       X86SchedWriteMoveLS Sched, string EVEX2VEXOvrd,
3419                       bit NoRMPattern = 0,
3420                       SDPatternOperator SelectOprr = vselect> {
3421  let hasSideEffects = 0 in {
3422  let isMoveReg = 1 in
3423  def rr : AVX512PI<opc, MRMSrcReg, (outs _.RC:$dst), (ins _.RC:$src),
3424                    !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), [],
3425                    _.ExeDomain>, EVEX, Sched<[Sched.RR]>,
3426                    EVEX2VEXOverride<EVEX2VEXOvrd#"rr">;
3427  def rrkz : AVX512PI<opc, MRMSrcReg, (outs _.RC:$dst),
3428                      (ins _.KRCWM:$mask,  _.RC:$src),
3429                      !strconcat(OpcodeStr, "\t{$src, ${dst} {${mask}} {z}|",
3430                       "${dst} {${mask}} {z}, $src}"),
3431                       [(set _.RC:$dst, (_.VT (SelectOprr _.KRCWM:$mask,
3432                                           (_.VT _.RC:$src),
3433                                           _.ImmAllZerosV)))], _.ExeDomain>,
3434                       EVEX, EVEX_KZ, Sched<[Sched.RR]>;
3435
3436  let mayLoad = 1, canFoldAsLoad = 1, isReMaterializable = 1 in
3437  def rm : AVX512PI<opc, MRMSrcMem, (outs _.RC:$dst), (ins _.MemOp:$src),
3438                    !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
3439                    !if(NoRMPattern, [],
3440                        [(set _.RC:$dst,
3441                          (_.VT (ld_frag addr:$src)))]),
3442                    _.ExeDomain>, EVEX, Sched<[Sched.RM]>,
3443                    EVEX2VEXOverride<EVEX2VEXOvrd#"rm">;
3444
3445  let Constraints = "$src0 = $dst", isConvertibleToThreeAddress = 1 in {
3446    def rrk : AVX512PI<opc, MRMSrcReg, (outs _.RC:$dst),
3447                      (ins _.RC:$src0, _.KRCWM:$mask, _.RC:$src1),
3448                      !strconcat(OpcodeStr, "\t{$src1, ${dst} {${mask}}|",
3449                      "${dst} {${mask}}, $src1}"),
3450                      [(set _.RC:$dst, (_.VT (SelectOprr _.KRCWM:$mask,
3451                                          (_.VT _.RC:$src1),
3452                                          (_.VT _.RC:$src0))))], _.ExeDomain>,
3453                       EVEX, EVEX_K, Sched<[Sched.RR]>;
3454    def rmk : AVX512PI<opc, MRMSrcMem, (outs _.RC:$dst),
3455                     (ins _.RC:$src0, _.KRCWM:$mask, _.MemOp:$src1),
3456                     !strconcat(OpcodeStr, "\t{$src1, ${dst} {${mask}}|",
3457                      "${dst} {${mask}}, $src1}"),
3458                     [(set _.RC:$dst, (_.VT
3459                         (vselect_mask _.KRCWM:$mask,
3460                          (_.VT (ld_frag addr:$src1)),
3461                           (_.VT _.RC:$src0))))], _.ExeDomain>,
3462                     EVEX, EVEX_K, Sched<[Sched.RM]>;
3463  }
3464  def rmkz : AVX512PI<opc, MRMSrcMem, (outs _.RC:$dst),
3465                  (ins _.KRCWM:$mask, _.MemOp:$src),
3466                  OpcodeStr #"\t{$src, ${dst} {${mask}} {z}|"#
3467                                "${dst} {${mask}} {z}, $src}",
3468                  [(set _.RC:$dst, (_.VT (vselect_mask _.KRCWM:$mask,
3469                    (_.VT (ld_frag addr:$src)), _.ImmAllZerosV)))],
3470                  _.ExeDomain>, EVEX, EVEX_KZ, Sched<[Sched.RM]>;
3471  }
3472  def : Pat<(_.VT (mload addr:$ptr, _.KRCWM:$mask, undef)),
3473            (!cast<Instruction>(Name#_.ZSuffix#rmkz) _.KRCWM:$mask, addr:$ptr)>;
3474
3475  def : Pat<(_.VT (mload addr:$ptr, _.KRCWM:$mask, _.ImmAllZerosV)),
3476            (!cast<Instruction>(Name#_.ZSuffix#rmkz) _.KRCWM:$mask, addr:$ptr)>;
3477
3478  def : Pat<(_.VT (mload addr:$ptr, _.KRCWM:$mask, (_.VT _.RC:$src0))),
3479            (!cast<Instruction>(Name#_.ZSuffix#rmk) _.RC:$src0,
3480             _.KRCWM:$mask, addr:$ptr)>;
3481}
3482
3483multiclass avx512_alignedload_vl<bits<8> opc, string OpcodeStr,
3484                                 AVX512VLVectorVTInfo _, Predicate prd,
3485                                 X86SchedWriteMoveLSWidths Sched,
3486                                 string EVEX2VEXOvrd, bit NoRMPattern = 0> {
3487  let Predicates = [prd] in
3488  defm Z : avx512_load<opc, OpcodeStr, NAME, _.info512,
3489                       _.info512.AlignedLdFrag, masked_load_aligned,
3490                       Sched.ZMM, "", NoRMPattern>, EVEX_V512;
3491
3492  let Predicates = [prd, HasVLX] in {
3493  defm Z256 : avx512_load<opc, OpcodeStr, NAME, _.info256,
3494                          _.info256.AlignedLdFrag, masked_load_aligned,
3495                          Sched.YMM, EVEX2VEXOvrd#"Y", NoRMPattern>, EVEX_V256;
3496  defm Z128 : avx512_load<opc, OpcodeStr, NAME, _.info128,
3497                          _.info128.AlignedLdFrag, masked_load_aligned,
3498                          Sched.XMM, EVEX2VEXOvrd, NoRMPattern>, EVEX_V128;
3499  }
3500}
3501
3502multiclass avx512_load_vl<bits<8> opc, string OpcodeStr,
3503                          AVX512VLVectorVTInfo _, Predicate prd,
3504                          X86SchedWriteMoveLSWidths Sched,
3505                          string EVEX2VEXOvrd, bit NoRMPattern = 0,
3506                          SDPatternOperator SelectOprr = vselect> {
3507  let Predicates = [prd] in
3508  defm Z : avx512_load<opc, OpcodeStr, NAME, _.info512, _.info512.LdFrag,
3509                       masked_load, Sched.ZMM, "",
3510                       NoRMPattern, SelectOprr>, EVEX_V512;
3511
3512  let Predicates = [prd, HasVLX] in {
3513  defm Z256 : avx512_load<opc, OpcodeStr, NAME, _.info256, _.info256.LdFrag,
3514                         masked_load, Sched.YMM, EVEX2VEXOvrd#"Y",
3515                         NoRMPattern, SelectOprr>, EVEX_V256;
3516  defm Z128 : avx512_load<opc, OpcodeStr, NAME, _.info128, _.info128.LdFrag,
3517                         masked_load, Sched.XMM, EVEX2VEXOvrd,
3518                         NoRMPattern, SelectOprr>, EVEX_V128;
3519  }
3520}
3521
3522multiclass avx512_store<bits<8> opc, string OpcodeStr, string BaseName,
3523                        X86VectorVTInfo _, PatFrag st_frag, PatFrag mstore,
3524                        X86SchedWriteMoveLS Sched, string EVEX2VEXOvrd,
3525                        bit NoMRPattern = 0> {
3526  let hasSideEffects = 0, isCodeGenOnly = 1, ForceDisassemble = 1 in {
3527  let isMoveReg = 1 in
3528  def rr_REV  : AVX512PI<opc, MRMDestReg, (outs _.RC:$dst), (ins _.RC:$src),
3529                         OpcodeStr # "\t{$src, $dst|$dst, $src}",
3530                         [], _.ExeDomain>, EVEX,
3531                         FoldGenData<BaseName#_.ZSuffix#rr>, Sched<[Sched.RR]>,
3532                         EVEX2VEXOverride<EVEX2VEXOvrd#"rr_REV">;
3533  def rrk_REV : AVX512PI<opc, MRMDestReg, (outs  _.RC:$dst),
3534                         (ins _.KRCWM:$mask, _.RC:$src),
3535                         OpcodeStr # "\t{$src, ${dst} {${mask}}|"#
3536                         "${dst} {${mask}}, $src}",
3537                         [], _.ExeDomain>,  EVEX, EVEX_K,
3538                         FoldGenData<BaseName#_.ZSuffix#rrk>,
3539                         Sched<[Sched.RR]>;
3540  def rrkz_REV : AVX512PI<opc, MRMDestReg, (outs  _.RC:$dst),
3541                          (ins _.KRCWM:$mask, _.RC:$src),
3542                          OpcodeStr # "\t{$src, ${dst} {${mask}} {z}|" #
3543                          "${dst} {${mask}} {z}, $src}",
3544                          [], _.ExeDomain>, EVEX, EVEX_KZ,
3545                          FoldGenData<BaseName#_.ZSuffix#rrkz>,
3546                          Sched<[Sched.RR]>;
3547  }
3548
3549  let hasSideEffects = 0, mayStore = 1 in
3550  def mr : AVX512PI<opc, MRMDestMem, (outs), (ins _.MemOp:$dst, _.RC:$src),
3551                    !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
3552                    !if(NoMRPattern, [],
3553                        [(st_frag (_.VT _.RC:$src), addr:$dst)]),
3554                    _.ExeDomain>, EVEX, Sched<[Sched.MR]>,
3555                    EVEX2VEXOverride<EVEX2VEXOvrd#"mr">;
3556  def mrk : AVX512PI<opc, MRMDestMem, (outs),
3557                     (ins _.MemOp:$dst, _.KRCWM:$mask, _.RC:$src),
3558              OpcodeStr # "\t{$src, ${dst} {${mask}}|${dst} {${mask}}, $src}",
3559               [], _.ExeDomain>, EVEX, EVEX_K, Sched<[Sched.MR]>,
3560               NotMemoryFoldable;
3561
3562  def: Pat<(mstore (_.VT _.RC:$src), addr:$ptr, _.KRCWM:$mask),
3563           (!cast<Instruction>(BaseName#_.ZSuffix#mrk) addr:$ptr,
3564                                                        _.KRCWM:$mask, _.RC:$src)>;
3565
3566  def : InstAlias<OpcodeStr#".s\t{$src, $dst|$dst, $src}",
3567                  (!cast<Instruction>(BaseName#_.ZSuffix#"rr_REV")
3568                   _.RC:$dst, _.RC:$src), 0>;
3569  def : InstAlias<OpcodeStr#".s\t{$src, ${dst} {${mask}}|${dst} {${mask}}, $src}",
3570                  (!cast<Instruction>(BaseName#_.ZSuffix#"rrk_REV")
3571                   _.RC:$dst, _.KRCWM:$mask, _.RC:$src), 0>;
3572  def : InstAlias<OpcodeStr#".s\t{$src, ${dst} {${mask}} {z}|${dst} {${mask}} {z}, $src}",
3573                  (!cast<Instruction>(BaseName#_.ZSuffix#"rrkz_REV")
3574                   _.RC:$dst, _.KRCWM:$mask, _.RC:$src), 0>;
3575}
3576
3577multiclass avx512_store_vl< bits<8> opc, string OpcodeStr,
3578                            AVX512VLVectorVTInfo _, Predicate prd,
3579                            X86SchedWriteMoveLSWidths Sched,
3580                            string EVEX2VEXOvrd, bit NoMRPattern = 0> {
3581  let Predicates = [prd] in
3582  defm Z : avx512_store<opc, OpcodeStr, NAME, _.info512, store,
3583                        masked_store, Sched.ZMM, "",
3584                        NoMRPattern>, EVEX_V512;
3585  let Predicates = [prd, HasVLX] in {
3586    defm Z256 : avx512_store<opc, OpcodeStr, NAME, _.info256, store,
3587                             masked_store, Sched.YMM,
3588                             EVEX2VEXOvrd#"Y", NoMRPattern>, EVEX_V256;
3589    defm Z128 : avx512_store<opc, OpcodeStr, NAME, _.info128, store,
3590                             masked_store, Sched.XMM, EVEX2VEXOvrd,
3591                             NoMRPattern>, EVEX_V128;
3592  }
3593}
3594
3595multiclass avx512_alignedstore_vl<bits<8> opc, string OpcodeStr,
3596                                  AVX512VLVectorVTInfo _, Predicate prd,
3597                                  X86SchedWriteMoveLSWidths Sched,
3598                                  string EVEX2VEXOvrd, bit NoMRPattern = 0> {
3599  let Predicates = [prd] in
3600  defm Z : avx512_store<opc, OpcodeStr, NAME, _.info512, alignedstore,
3601                        masked_store_aligned, Sched.ZMM, "",
3602                        NoMRPattern>, EVEX_V512;
3603
3604  let Predicates = [prd, HasVLX] in {
3605    defm Z256 : avx512_store<opc, OpcodeStr, NAME, _.info256, alignedstore,
3606                             masked_store_aligned, Sched.YMM,
3607                             EVEX2VEXOvrd#"Y", NoMRPattern>, EVEX_V256;
3608    defm Z128 : avx512_store<opc, OpcodeStr, NAME, _.info128, alignedstore,
3609                             masked_store_aligned, Sched.XMM, EVEX2VEXOvrd,
3610                             NoMRPattern>, EVEX_V128;
3611  }
3612}
3613
3614defm VMOVAPS : avx512_alignedload_vl<0x28, "vmovaps", avx512vl_f32_info,
3615                                     HasAVX512, SchedWriteFMoveLS, "VMOVAPS">,
3616               avx512_alignedstore_vl<0x29, "vmovaps", avx512vl_f32_info,
3617                                      HasAVX512, SchedWriteFMoveLS, "VMOVAPS">,
3618               PS, EVEX_CD8<32, CD8VF>;
3619
3620defm VMOVAPD : avx512_alignedload_vl<0x28, "vmovapd", avx512vl_f64_info,
3621                                     HasAVX512, SchedWriteFMoveLS, "VMOVAPD">,
3622               avx512_alignedstore_vl<0x29, "vmovapd", avx512vl_f64_info,
3623                                      HasAVX512, SchedWriteFMoveLS, "VMOVAPD">,
3624               PD, VEX_W, EVEX_CD8<64, CD8VF>;
3625
3626defm VMOVUPS : avx512_load_vl<0x10, "vmovups", avx512vl_f32_info, HasAVX512,
3627                              SchedWriteFMoveLS, "VMOVUPS", 0, null_frag>,
3628               avx512_store_vl<0x11, "vmovups", avx512vl_f32_info, HasAVX512,
3629                               SchedWriteFMoveLS, "VMOVUPS">,
3630                               PS, EVEX_CD8<32, CD8VF>;
3631
3632defm VMOVUPD : avx512_load_vl<0x10, "vmovupd", avx512vl_f64_info, HasAVX512,
3633                              SchedWriteFMoveLS, "VMOVUPD", 0, null_frag>,
3634               avx512_store_vl<0x11, "vmovupd", avx512vl_f64_info, HasAVX512,
3635                               SchedWriteFMoveLS, "VMOVUPD">,
3636               PD, VEX_W, EVEX_CD8<64, CD8VF>;
3637
3638defm VMOVDQA32 : avx512_alignedload_vl<0x6F, "vmovdqa32", avx512vl_i32_info,
3639                                       HasAVX512, SchedWriteVecMoveLS,
3640                                       "VMOVDQA", 1>,
3641                 avx512_alignedstore_vl<0x7F, "vmovdqa32", avx512vl_i32_info,
3642                                        HasAVX512, SchedWriteVecMoveLS,
3643                                        "VMOVDQA", 1>,
3644                 PD, EVEX_CD8<32, CD8VF>;
3645
3646defm VMOVDQA64 : avx512_alignedload_vl<0x6F, "vmovdqa64", avx512vl_i64_info,
3647                                       HasAVX512, SchedWriteVecMoveLS,
3648                                       "VMOVDQA">,
3649                 avx512_alignedstore_vl<0x7F, "vmovdqa64", avx512vl_i64_info,
3650                                        HasAVX512, SchedWriteVecMoveLS,
3651                                        "VMOVDQA">,
3652                 PD, VEX_W, EVEX_CD8<64, CD8VF>;
3653
3654defm VMOVDQU8 : avx512_load_vl<0x6F, "vmovdqu8", avx512vl_i8_info, HasBWI,
3655                               SchedWriteVecMoveLS, "VMOVDQU", 1>,
3656                avx512_store_vl<0x7F, "vmovdqu8", avx512vl_i8_info, HasBWI,
3657                                SchedWriteVecMoveLS, "VMOVDQU", 1>,
3658                XD, EVEX_CD8<8, CD8VF>;
3659
3660defm VMOVDQU16 : avx512_load_vl<0x6F, "vmovdqu16", avx512vl_i16_info, HasBWI,
3661                                SchedWriteVecMoveLS, "VMOVDQU", 1>,
3662                 avx512_store_vl<0x7F, "vmovdqu16", avx512vl_i16_info, HasBWI,
3663                                 SchedWriteVecMoveLS, "VMOVDQU", 1>,
3664                 XD, VEX_W, EVEX_CD8<16, CD8VF>;
3665
3666defm VMOVDQU32 : avx512_load_vl<0x6F, "vmovdqu32", avx512vl_i32_info, HasAVX512,
3667                                SchedWriteVecMoveLS, "VMOVDQU", 1, null_frag>,
3668                 avx512_store_vl<0x7F, "vmovdqu32", avx512vl_i32_info, HasAVX512,
3669                                 SchedWriteVecMoveLS, "VMOVDQU", 1>,
3670                 XS, EVEX_CD8<32, CD8VF>;
3671
3672defm VMOVDQU64 : avx512_load_vl<0x6F, "vmovdqu64", avx512vl_i64_info, HasAVX512,
3673                                SchedWriteVecMoveLS, "VMOVDQU", 0, null_frag>,
3674                 avx512_store_vl<0x7F, "vmovdqu64", avx512vl_i64_info, HasAVX512,
3675                                 SchedWriteVecMoveLS, "VMOVDQU">,
3676                 XS, VEX_W, EVEX_CD8<64, CD8VF>;
3677
3678// Special instructions to help with spilling when we don't have VLX. We need
3679// to load or store from a ZMM register instead. These are converted in
3680// expandPostRAPseudos.
3681let isReMaterializable = 1, canFoldAsLoad = 1,
3682    isPseudo = 1, mayLoad = 1, hasSideEffects = 0 in {
3683def VMOVAPSZ128rm_NOVLX : I<0, Pseudo, (outs VR128X:$dst), (ins f128mem:$src),
3684                            "", []>, Sched<[WriteFLoadX]>;
3685def VMOVAPSZ256rm_NOVLX : I<0, Pseudo, (outs VR256X:$dst), (ins f256mem:$src),
3686                            "", []>, Sched<[WriteFLoadY]>;
3687def VMOVUPSZ128rm_NOVLX : I<0, Pseudo, (outs VR128X:$dst), (ins f128mem:$src),
3688                            "", []>, Sched<[WriteFLoadX]>;
3689def VMOVUPSZ256rm_NOVLX : I<0, Pseudo, (outs VR256X:$dst), (ins f256mem:$src),
3690                            "", []>, Sched<[WriteFLoadY]>;
3691}
3692
3693let isPseudo = 1, mayStore = 1, hasSideEffects = 0 in {
3694def VMOVAPSZ128mr_NOVLX : I<0, Pseudo, (outs), (ins f128mem:$dst, VR128X:$src),
3695                            "", []>, Sched<[WriteFStoreX]>;
3696def VMOVAPSZ256mr_NOVLX : I<0, Pseudo, (outs), (ins f256mem:$dst, VR256X:$src),
3697                            "", []>, Sched<[WriteFStoreY]>;
3698def VMOVUPSZ128mr_NOVLX : I<0, Pseudo, (outs), (ins f128mem:$dst, VR128X:$src),
3699                            "", []>, Sched<[WriteFStoreX]>;
3700def VMOVUPSZ256mr_NOVLX : I<0, Pseudo, (outs), (ins f256mem:$dst, VR256X:$src),
3701                            "", []>, Sched<[WriteFStoreY]>;
3702}
3703
3704def : Pat<(v8i64 (vselect VK8WM:$mask, (v8i64 immAllZerosV),
3705                          (v8i64 VR512:$src))),
3706   (VMOVDQA64Zrrkz (COPY_TO_REGCLASS (KNOTWrr (COPY_TO_REGCLASS VK8:$mask, VK16)),
3707                                              VK8), VR512:$src)>;
3708
3709def : Pat<(v16i32 (vselect VK16WM:$mask, (v16i32 immAllZerosV),
3710                           (v16i32 VR512:$src))),
3711                  (VMOVDQA32Zrrkz (KNOTWrr VK16WM:$mask), VR512:$src)>;
3712
3713// These patterns exist to prevent the above patterns from introducing a second
3714// mask inversion when one already exists.
3715def : Pat<(v8i64 (vselect (v8i1 (vnot VK8:$mask)),
3716                          (v8i64 immAllZerosV),
3717                          (v8i64 VR512:$src))),
3718                 (VMOVDQA64Zrrkz VK8:$mask, VR512:$src)>;
3719def : Pat<(v16i32 (vselect (v16i1 (vnot VK16:$mask)),
3720                           (v16i32 immAllZerosV),
3721                           (v16i32 VR512:$src))),
3722                  (VMOVDQA32Zrrkz VK16WM:$mask, VR512:$src)>;
3723
3724multiclass mask_move_lowering<string InstrStr, X86VectorVTInfo Narrow,
3725                              X86VectorVTInfo Wide> {
3726 def : Pat<(Narrow.VT (vselect (Narrow.KVT Narrow.KRCWM:$mask),
3727                               Narrow.RC:$src1, Narrow.RC:$src0)),
3728           (EXTRACT_SUBREG
3729            (Wide.VT
3730             (!cast<Instruction>(InstrStr#"rrk")
3731              (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src0, Narrow.SubRegIdx)),
3732              (COPY_TO_REGCLASS Narrow.KRCWM:$mask, Wide.KRCWM),
3733              (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src1, Narrow.SubRegIdx)))),
3734            Narrow.SubRegIdx)>;
3735
3736 def : Pat<(Narrow.VT (vselect (Narrow.KVT Narrow.KRCWM:$mask),
3737                               Narrow.RC:$src1, Narrow.ImmAllZerosV)),
3738           (EXTRACT_SUBREG
3739            (Wide.VT
3740             (!cast<Instruction>(InstrStr#"rrkz")
3741              (COPY_TO_REGCLASS Narrow.KRCWM:$mask, Wide.KRCWM),
3742              (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src1, Narrow.SubRegIdx)))),
3743            Narrow.SubRegIdx)>;
3744}
3745
3746// Patterns for handling v8i1 selects of 256-bit vectors when VLX isn't
3747// available. Use a 512-bit operation and extract.
3748let Predicates = [HasAVX512, NoVLX] in {
3749  defm : mask_move_lowering<"VMOVAPSZ", v4f32x_info, v16f32_info>;
3750  defm : mask_move_lowering<"VMOVDQA32Z", v4i32x_info, v16i32_info>;
3751  defm : mask_move_lowering<"VMOVAPSZ", v8f32x_info, v16f32_info>;
3752  defm : mask_move_lowering<"VMOVDQA32Z", v8i32x_info, v16i32_info>;
3753
3754  defm : mask_move_lowering<"VMOVAPDZ", v2f64x_info, v8f64_info>;
3755  defm : mask_move_lowering<"VMOVDQA64Z", v2i64x_info, v8i64_info>;
3756  defm : mask_move_lowering<"VMOVAPDZ", v4f64x_info, v8f64_info>;
3757  defm : mask_move_lowering<"VMOVDQA64Z", v4i64x_info, v8i64_info>;
3758}
3759
3760let Predicates = [HasBWI, NoVLX] in {
3761  defm : mask_move_lowering<"VMOVDQU8Z", v16i8x_info, v64i8_info>;
3762  defm : mask_move_lowering<"VMOVDQU8Z", v32i8x_info, v64i8_info>;
3763
3764  defm : mask_move_lowering<"VMOVDQU16Z", v8i16x_info, v32i16_info>;
3765  defm : mask_move_lowering<"VMOVDQU16Z", v16i16x_info, v32i16_info>;
3766}
3767
3768let Predicates = [HasAVX512] in {
3769  // 512-bit load.
3770  def : Pat<(alignedloadv16i32 addr:$src),
3771            (VMOVDQA64Zrm addr:$src)>;
3772  def : Pat<(alignedloadv32i16 addr:$src),
3773            (VMOVDQA64Zrm addr:$src)>;
3774  def : Pat<(alignedloadv64i8 addr:$src),
3775            (VMOVDQA64Zrm addr:$src)>;
3776  def : Pat<(loadv16i32 addr:$src),
3777            (VMOVDQU64Zrm addr:$src)>;
3778  def : Pat<(loadv32i16 addr:$src),
3779            (VMOVDQU64Zrm addr:$src)>;
3780  def : Pat<(loadv64i8 addr:$src),
3781            (VMOVDQU64Zrm addr:$src)>;
3782
3783  // 512-bit store.
3784  def : Pat<(alignedstore (v16i32 VR512:$src), addr:$dst),
3785            (VMOVDQA64Zmr addr:$dst, VR512:$src)>;
3786  def : Pat<(alignedstore (v32i16 VR512:$src), addr:$dst),
3787            (VMOVDQA64Zmr addr:$dst, VR512:$src)>;
3788  def : Pat<(alignedstore (v64i8 VR512:$src), addr:$dst),
3789            (VMOVDQA64Zmr addr:$dst, VR512:$src)>;
3790  def : Pat<(store (v16i32 VR512:$src), addr:$dst),
3791            (VMOVDQU64Zmr addr:$dst, VR512:$src)>;
3792  def : Pat<(store (v32i16 VR512:$src), addr:$dst),
3793            (VMOVDQU64Zmr addr:$dst, VR512:$src)>;
3794  def : Pat<(store (v64i8 VR512:$src), addr:$dst),
3795            (VMOVDQU64Zmr addr:$dst, VR512:$src)>;
3796}
3797
3798let Predicates = [HasVLX] in {
3799  // 128-bit load.
3800  def : Pat<(alignedloadv4i32 addr:$src),
3801            (VMOVDQA64Z128rm addr:$src)>;
3802  def : Pat<(alignedloadv8i16 addr:$src),
3803            (VMOVDQA64Z128rm addr:$src)>;
3804  def : Pat<(alignedloadv16i8 addr:$src),
3805            (VMOVDQA64Z128rm addr:$src)>;
3806  def : Pat<(loadv4i32 addr:$src),
3807            (VMOVDQU64Z128rm addr:$src)>;
3808  def : Pat<(loadv8i16 addr:$src),
3809            (VMOVDQU64Z128rm addr:$src)>;
3810  def : Pat<(loadv16i8 addr:$src),
3811            (VMOVDQU64Z128rm addr:$src)>;
3812
3813  // 128-bit store.
3814  def : Pat<(alignedstore (v4i32 VR128X:$src), addr:$dst),
3815            (VMOVDQA64Z128mr addr:$dst, VR128X:$src)>;
3816  def : Pat<(alignedstore (v8i16 VR128X:$src), addr:$dst),
3817            (VMOVDQA64Z128mr addr:$dst, VR128X:$src)>;
3818  def : Pat<(alignedstore (v16i8 VR128X:$src), addr:$dst),
3819            (VMOVDQA64Z128mr addr:$dst, VR128X:$src)>;
3820  def : Pat<(store (v4i32 VR128X:$src), addr:$dst),
3821            (VMOVDQU64Z128mr addr:$dst, VR128X:$src)>;
3822  def : Pat<(store (v8i16 VR128X:$src), addr:$dst),
3823            (VMOVDQU64Z128mr addr:$dst, VR128X:$src)>;
3824  def : Pat<(store (v16i8 VR128X:$src), addr:$dst),
3825            (VMOVDQU64Z128mr addr:$dst, VR128X:$src)>;
3826
3827  // 256-bit load.
3828  def : Pat<(alignedloadv8i32 addr:$src),
3829            (VMOVDQA64Z256rm addr:$src)>;
3830  def : Pat<(alignedloadv16i16 addr:$src),
3831            (VMOVDQA64Z256rm addr:$src)>;
3832  def : Pat<(alignedloadv32i8 addr:$src),
3833            (VMOVDQA64Z256rm addr:$src)>;
3834  def : Pat<(loadv8i32 addr:$src),
3835            (VMOVDQU64Z256rm addr:$src)>;
3836  def : Pat<(loadv16i16 addr:$src),
3837            (VMOVDQU64Z256rm addr:$src)>;
3838  def : Pat<(loadv32i8 addr:$src),
3839            (VMOVDQU64Z256rm addr:$src)>;
3840
3841  // 256-bit store.
3842  def : Pat<(alignedstore (v8i32 VR256X:$src), addr:$dst),
3843            (VMOVDQA64Z256mr addr:$dst, VR256X:$src)>;
3844  def : Pat<(alignedstore (v16i16 VR256X:$src), addr:$dst),
3845            (VMOVDQA64Z256mr addr:$dst, VR256X:$src)>;
3846  def : Pat<(alignedstore (v32i8 VR256X:$src), addr:$dst),
3847            (VMOVDQA64Z256mr addr:$dst, VR256X:$src)>;
3848  def : Pat<(store (v8i32 VR256X:$src), addr:$dst),
3849            (VMOVDQU64Z256mr addr:$dst, VR256X:$src)>;
3850  def : Pat<(store (v16i16 VR256X:$src), addr:$dst),
3851            (VMOVDQU64Z256mr addr:$dst, VR256X:$src)>;
3852  def : Pat<(store (v32i8 VR256X:$src), addr:$dst),
3853            (VMOVDQU64Z256mr addr:$dst, VR256X:$src)>;
3854}
3855let Predicates = [HasFP16] in {
3856  def : Pat<(v32f16 (vselect VK32WM:$mask, (v32f16 VR512:$src1), (v32f16 VR512:$src0))),
3857            (VMOVDQU16Zrrk VR512:$src0, VK32WM:$mask, VR512:$src1)>;
3858  def : Pat<(v32f16 (vselect VK32WM:$mask, (v32f16 VR512:$src1), v32f16_info.ImmAllZerosV)),
3859            (VMOVDQU16Zrrkz VK32WM:$mask, VR512:$src1)>;
3860  def : Pat<(v32f16 (alignedloadv32f16 addr:$src)),
3861            (VMOVAPSZrm addr:$src)>;
3862  def : Pat<(v32f16 (vselect VK32WM:$mask,
3863                     (v32f16 (alignedloadv32f16 addr:$src)), (v32f16 VR512:$src0))),
3864            (VMOVDQU16Zrmk VR512:$src0, VK32WM:$mask, addr:$src)>;
3865  def : Pat<(v32f16 (vselect VK32WM:$mask,
3866                     (v32f16 (alignedloadv32f16 addr:$src)), v32f16_info.ImmAllZerosV)),
3867            (VMOVDQU16Zrmkz VK32WM:$mask, addr:$src)>;
3868  def : Pat<(v32f16 (loadv32f16 addr:$src)),
3869            (VMOVUPSZrm addr:$src)>;
3870  def : Pat<(v32f16 (vselect VK32WM:$mask,
3871                     (v32f16 (loadv32f16 addr:$src)), (v32f16 VR512:$src0))),
3872            (VMOVDQU16Zrmk VR512:$src0, VK32WM:$mask, addr:$src)>;
3873  def : Pat<(v32f16 (vselect VK32WM:$mask,
3874                     (v32f16 (loadv32f16 addr:$src)), v32f16_info.ImmAllZerosV)),
3875            (VMOVDQU16Zrmkz VK32WM:$mask, addr:$src)>;
3876  def : Pat<(v32f16 (masked_load addr:$src, VK32WM:$mask, (v32f16 VR512:$src0))),
3877            (VMOVDQU16Zrmk VR512:$src0, VK32WM:$mask, addr:$src)>;
3878  def : Pat<(v32f16 (masked_load addr:$src, VK32WM:$mask, undef)),
3879            (VMOVDQU16Zrmkz VK32WM:$mask, addr:$src)>;
3880  def : Pat<(v32f16 (masked_load addr:$src, VK32WM:$mask, v32f16_info.ImmAllZerosV)),
3881            (VMOVDQU16Zrmkz VK32WM:$mask, addr:$src)>;
3882
3883  def : Pat<(alignedstore (v32f16 VR512:$src), addr:$dst),
3884            (VMOVAPSZmr addr:$dst, VR512:$src)>;
3885  def : Pat<(store (v32f16 VR512:$src), addr:$dst),
3886            (VMOVUPSZmr addr:$dst, VR512:$src)>;
3887  def : Pat<(masked_store (v32f16 VR512:$src), addr:$dst, VK32WM:$mask),
3888            (VMOVDQU16Zmrk addr:$dst, VK32WM:$mask, VR512:$src)>;
3889}
3890let Predicates = [HasFP16, HasVLX] in {
3891  def : Pat<(v16f16 (vselect VK16WM:$mask, (v16f16 VR256X:$src1), (v16f16 VR256X:$src0))),
3892            (VMOVDQU16Z256rrk VR256X:$src0, VK16WM:$mask, VR256X:$src1)>;
3893  def : Pat<(v16f16 (vselect VK16WM:$mask, (v16f16 VR256X:$src1), v16f16x_info.ImmAllZerosV)),
3894            (VMOVDQU16Z256rrkz VK16WM:$mask, VR256X:$src1)>;
3895  def : Pat<(v16f16 (alignedloadv16f16 addr:$src)),
3896            (VMOVAPSZ256rm addr:$src)>;
3897  def : Pat<(v16f16 (vselect VK16WM:$mask,
3898                     (v16f16 (alignedloadv16f16 addr:$src)), (v16f16 VR256X:$src0))),
3899            (VMOVDQU16Z256rmk VR256X:$src0, VK16WM:$mask, addr:$src)>;
3900  def : Pat<(v16f16 (vselect VK16WM:$mask,
3901                     (v16f16 (alignedloadv16f16 addr:$src)), v16f16x_info.ImmAllZerosV)),
3902            (VMOVDQU16Z256rmkz VK16WM:$mask, addr:$src)>;
3903  def : Pat<(v16f16 (loadv16f16 addr:$src)),
3904            (VMOVUPSZ256rm addr:$src)>;
3905  def : Pat<(v16f16 (vselect VK16WM:$mask,
3906                     (v16f16 (loadv16f16 addr:$src)), (v16f16 VR256X:$src0))),
3907            (VMOVDQU16Z256rmk VR256X:$src0, VK16WM:$mask, addr:$src)>;
3908  def : Pat<(v16f16 (vselect VK16WM:$mask,
3909                     (v16f16 (loadv16f16 addr:$src)), v16f16x_info.ImmAllZerosV)),
3910            (VMOVDQU16Z256rmkz VK16WM:$mask, addr:$src)>;
3911  def : Pat<(v16f16 (masked_load addr:$src, VK16WM:$mask, (v16f16 VR256X:$src0))),
3912            (VMOVDQU16Z256rmk VR256X:$src0, VK16WM:$mask, addr:$src)>;
3913  def : Pat<(v16f16 (masked_load addr:$src, VK16WM:$mask, undef)),
3914            (VMOVDQU16Z256rmkz VK16WM:$mask, addr:$src)>;
3915  def : Pat<(v16f16 (masked_load addr:$src, VK16WM:$mask, v16f16x_info.ImmAllZerosV)),
3916            (VMOVDQU16Z256rmkz VK16WM:$mask, addr:$src)>;
3917
3918  def : Pat<(alignedstore (v16f16 VR256X:$src), addr:$dst),
3919            (VMOVAPSZ256mr addr:$dst, VR256X:$src)>;
3920  def : Pat<(store (v16f16 VR256X:$src), addr:$dst),
3921            (VMOVUPSZ256mr addr:$dst, VR256X:$src)>;
3922  def : Pat<(masked_store (v16f16 VR256X:$src), addr:$dst, VK16WM:$mask),
3923            (VMOVDQU16Z256mrk addr:$dst, VK16WM:$mask, VR256X:$src)>;
3924
3925  def : Pat<(v8f16 (vselect VK8WM:$mask, (v8f16 VR128X:$src1), (v8f16 VR128X:$src0))),
3926            (VMOVDQU16Z128rrk VR128X:$src0, VK8WM:$mask, VR128X:$src1)>;
3927  def : Pat<(v8f16 (vselect VK8WM:$mask, (v8f16 VR128X:$src1), v8f16x_info.ImmAllZerosV)),
3928            (VMOVDQU16Z128rrkz VK8WM:$mask, VR128X:$src1)>;
3929  def : Pat<(v8f16 (alignedloadv8f16 addr:$src)),
3930            (VMOVAPSZ128rm addr:$src)>;
3931  def : Pat<(v8f16 (vselect VK8WM:$mask,
3932                     (v8f16 (alignedloadv8f16 addr:$src)), (v8f16 VR128X:$src0))),
3933            (VMOVDQU16Z128rmk VR128X:$src0, VK8WM:$mask, addr:$src)>;
3934  def : Pat<(v8f16 (vselect VK8WM:$mask,
3935                     (v8f16 (alignedloadv8f16 addr:$src)), v8f16x_info.ImmAllZerosV)),
3936            (VMOVDQU16Z128rmkz VK8WM:$mask, addr:$src)>;
3937  def : Pat<(v8f16 (loadv8f16 addr:$src)),
3938            (VMOVUPSZ128rm addr:$src)>;
3939  def : Pat<(v8f16 (vselect VK8WM:$mask,
3940                     (v8f16 (loadv8f16 addr:$src)), (v8f16 VR128X:$src0))),
3941            (VMOVDQU16Z128rmk VR128X:$src0, VK8WM:$mask, addr:$src)>;
3942  def : Pat<(v8f16 (vselect VK8WM:$mask,
3943                     (v8f16 (loadv8f16 addr:$src)), v8f16x_info.ImmAllZerosV)),
3944            (VMOVDQU16Z128rmkz VK8WM:$mask, addr:$src)>;
3945  def : Pat<(v8f16 (masked_load addr:$src, VK8WM:$mask, (v8f16 VR128X:$src0))),
3946            (VMOVDQU16Z128rmk VR128X:$src0, VK8WM:$mask, addr:$src)>;
3947  def : Pat<(v8f16 (masked_load addr:$src, VK8WM:$mask, undef)),
3948            (VMOVDQU16Z128rmkz VK8WM:$mask, addr:$src)>;
3949  def : Pat<(v8f16 (masked_load addr:$src, VK8WM:$mask, v8f16x_info.ImmAllZerosV)),
3950            (VMOVDQU16Z128rmkz VK8WM:$mask, addr:$src)>;
3951
3952  def : Pat<(alignedstore (v8f16 VR128X:$src), addr:$dst),
3953            (VMOVAPSZ128mr addr:$dst, VR128X:$src)>;
3954  def : Pat<(store (v8f16 VR128X:$src), addr:$dst),
3955            (VMOVUPSZ128mr addr:$dst, VR128X:$src)>;
3956  def : Pat<(masked_store (v8f16 VR128X:$src), addr:$dst, VK8WM:$mask),
3957            (VMOVDQU16Z128mrk addr:$dst, VK8WM:$mask, VR128X:$src)>;
3958}
3959
3960// Move Int Doubleword to Packed Double Int
3961//
3962let ExeDomain = SSEPackedInt in {
3963def VMOVDI2PDIZrr : AVX512BI<0x6E, MRMSrcReg, (outs VR128X:$dst), (ins GR32:$src),
3964                      "vmovd\t{$src, $dst|$dst, $src}",
3965                      [(set VR128X:$dst,
3966                        (v4i32 (scalar_to_vector GR32:$src)))]>,
3967                        EVEX, Sched<[WriteVecMoveFromGpr]>;
3968def VMOVDI2PDIZrm : AVX512BI<0x6E, MRMSrcMem, (outs VR128X:$dst), (ins i32mem:$src),
3969                      "vmovd\t{$src, $dst|$dst, $src}",
3970                      [(set VR128X:$dst,
3971                        (v4i32 (scalar_to_vector (loadi32 addr:$src))))]>,
3972                      EVEX, EVEX_CD8<32, CD8VT1>, Sched<[WriteVecLoad]>;
3973def VMOV64toPQIZrr : AVX512BI<0x6E, MRMSrcReg, (outs VR128X:$dst), (ins GR64:$src),
3974                      "vmovq\t{$src, $dst|$dst, $src}",
3975                        [(set VR128X:$dst,
3976                          (v2i64 (scalar_to_vector GR64:$src)))]>,
3977                      EVEX, VEX_W, Sched<[WriteVecMoveFromGpr]>;
3978let isCodeGenOnly = 1, ForceDisassemble = 1, hasSideEffects = 0, mayLoad = 1 in
3979def VMOV64toPQIZrm : AVX512BI<0x6E, MRMSrcMem, (outs VR128X:$dst),
3980                      (ins i64mem:$src),
3981                      "vmovq\t{$src, $dst|$dst, $src}", []>,
3982                      EVEX, VEX_W, EVEX_CD8<64, CD8VT1>, Sched<[WriteVecLoad]>;
3983let isCodeGenOnly = 1 in {
3984def VMOV64toSDZrr : AVX512BI<0x6E, MRMSrcReg, (outs FR64X:$dst), (ins GR64:$src),
3985                       "vmovq\t{$src, $dst|$dst, $src}",
3986                       [(set FR64X:$dst, (bitconvert GR64:$src))]>,
3987                       EVEX, VEX_W, Sched<[WriteVecMoveFromGpr]>;
3988def VMOVSDto64Zrr : AVX512BI<0x7E, MRMDestReg, (outs GR64:$dst), (ins FR64X:$src),
3989                         "vmovq\t{$src, $dst|$dst, $src}",
3990                         [(set GR64:$dst, (bitconvert FR64X:$src))]>,
3991                         EVEX, VEX_W, Sched<[WriteVecMoveFromGpr]>;
3992}
3993} // ExeDomain = SSEPackedInt
3994
3995// Move Int Doubleword to Single Scalar
3996//
3997let ExeDomain = SSEPackedInt, isCodeGenOnly = 1 in {
3998def VMOVDI2SSZrr  : AVX512BI<0x6E, MRMSrcReg, (outs FR32X:$dst), (ins GR32:$src),
3999                      "vmovd\t{$src, $dst|$dst, $src}",
4000                      [(set FR32X:$dst, (bitconvert GR32:$src))]>,
4001                      EVEX, Sched<[WriteVecMoveFromGpr]>;
4002} // ExeDomain = SSEPackedInt, isCodeGenOnly = 1
4003
4004// Move doubleword from xmm register to r/m32
4005//
4006let ExeDomain = SSEPackedInt in {
4007def VMOVPDI2DIZrr  : AVX512BI<0x7E, MRMDestReg, (outs GR32:$dst), (ins VR128X:$src),
4008                       "vmovd\t{$src, $dst|$dst, $src}",
4009                       [(set GR32:$dst, (extractelt (v4i32 VR128X:$src),
4010                                        (iPTR 0)))]>,
4011                       EVEX, Sched<[WriteVecMoveToGpr]>;
4012def VMOVPDI2DIZmr  : AVX512BI<0x7E, MRMDestMem, (outs),
4013                       (ins i32mem:$dst, VR128X:$src),
4014                       "vmovd\t{$src, $dst|$dst, $src}",
4015                       [(store (i32 (extractelt (v4i32 VR128X:$src),
4016                                     (iPTR 0))), addr:$dst)]>,
4017                       EVEX, EVEX_CD8<32, CD8VT1>, Sched<[WriteVecStore]>;
4018} // ExeDomain = SSEPackedInt
4019
4020// Move quadword from xmm1 register to r/m64
4021//
4022let ExeDomain = SSEPackedInt in {
4023def VMOVPQIto64Zrr : I<0x7E, MRMDestReg, (outs GR64:$dst), (ins VR128X:$src),
4024                      "vmovq\t{$src, $dst|$dst, $src}",
4025                      [(set GR64:$dst, (extractelt (v2i64 VR128X:$src),
4026                                                   (iPTR 0)))]>,
4027                      PD, EVEX, VEX_W, Sched<[WriteVecMoveToGpr]>,
4028                      Requires<[HasAVX512]>;
4029
4030let isCodeGenOnly = 1, ForceDisassemble = 1, hasSideEffects = 0, mayStore = 1 in
4031def VMOVPQIto64Zmr : I<0x7E, MRMDestMem, (outs), (ins i64mem:$dst, VR128X:$src),
4032                      "vmovq\t{$src, $dst|$dst, $src}", []>, PD,
4033                      EVEX, VEX_W, Sched<[WriteVecStore]>,
4034                      Requires<[HasAVX512, In64BitMode]>;
4035
4036def VMOVPQI2QIZmr : I<0xD6, MRMDestMem, (outs),
4037                      (ins i64mem:$dst, VR128X:$src),
4038                      "vmovq\t{$src, $dst|$dst, $src}",
4039                      [(store (extractelt (v2i64 VR128X:$src), (iPTR 0)),
4040                              addr:$dst)]>,
4041                      EVEX, PD, VEX_W, EVEX_CD8<64, CD8VT1>,
4042                      Sched<[WriteVecStore]>, Requires<[HasAVX512]>;
4043
4044let hasSideEffects = 0, isCodeGenOnly = 1, ForceDisassemble = 1 in
4045def VMOVPQI2QIZrr : AVX512BI<0xD6, MRMDestReg, (outs VR128X:$dst),
4046                             (ins VR128X:$src),
4047                             "vmovq\t{$src, $dst|$dst, $src}", []>,
4048                             EVEX, VEX_W, Sched<[SchedWriteVecLogic.XMM]>;
4049} // ExeDomain = SSEPackedInt
4050
4051def : InstAlias<"vmovq.s\t{$src, $dst|$dst, $src}",
4052                (VMOVPQI2QIZrr VR128X:$dst, VR128X:$src), 0>;
4053
4054let Predicates = [HasAVX512] in {
4055  def : Pat<(X86vextractstore64 (v2i64 VR128X:$src), addr:$dst),
4056            (VMOVPQI2QIZmr addr:$dst, VR128X:$src)>;
4057}
4058
4059// Move Scalar Single to Double Int
4060//
4061let ExeDomain = SSEPackedInt, isCodeGenOnly = 1 in {
4062def VMOVSS2DIZrr  : AVX512BI<0x7E, MRMDestReg, (outs GR32:$dst),
4063                      (ins FR32X:$src),
4064                      "vmovd\t{$src, $dst|$dst, $src}",
4065                      [(set GR32:$dst, (bitconvert FR32X:$src))]>,
4066                      EVEX, Sched<[WriteVecMoveToGpr]>;
4067} // ExeDomain = SSEPackedInt, isCodeGenOnly = 1
4068
4069// Move Quadword Int to Packed Quadword Int
4070//
4071let ExeDomain = SSEPackedInt in {
4072def VMOVQI2PQIZrm : AVX512XSI<0x7E, MRMSrcMem, (outs VR128X:$dst),
4073                      (ins i64mem:$src),
4074                      "vmovq\t{$src, $dst|$dst, $src}",
4075                      [(set VR128X:$dst,
4076                        (v2i64 (scalar_to_vector (loadi64 addr:$src))))]>,
4077                      EVEX, VEX_W, EVEX_CD8<8, CD8VT8>, Sched<[WriteVecLoad]>;
4078} // ExeDomain = SSEPackedInt
4079
4080// Allow "vmovd" but print "vmovq".
4081def : InstAlias<"vmovd\t{$src, $dst|$dst, $src}",
4082                (VMOV64toPQIZrr VR128X:$dst, GR64:$src), 0>;
4083def : InstAlias<"vmovd\t{$src, $dst|$dst, $src}",
4084                (VMOVPQIto64Zrr GR64:$dst, VR128X:$src), 0>;
4085
4086// Conversions between masks and scalar fp.
4087def : Pat<(v32i1 (bitconvert FR32X:$src)),
4088          (KMOVDkr (VMOVSS2DIZrr FR32X:$src))>;
4089def : Pat<(f32 (bitconvert VK32:$src)),
4090          (VMOVDI2SSZrr (KMOVDrk VK32:$src))>;
4091
4092def : Pat<(v64i1 (bitconvert FR64X:$src)),
4093          (KMOVQkr (VMOVSDto64Zrr FR64X:$src))>;
4094def : Pat<(f64 (bitconvert VK64:$src)),
4095          (VMOV64toSDZrr (KMOVQrk VK64:$src))>;
4096
4097//===----------------------------------------------------------------------===//
4098// AVX-512  MOVSH, MOVSS, MOVSD
4099//===----------------------------------------------------------------------===//
4100
4101multiclass avx512_move_scalar<string asm, SDNode OpNode, PatFrag vzload_frag,
4102                              X86VectorVTInfo _,
4103                              list<Predicate> prd = [HasAVX512, OptForSize]> {
4104  let Predicates = prd in
4105  def rr : AVX512PI<0x10, MRMSrcReg, (outs _.RC:$dst),
4106             (ins _.RC:$src1, _.RC:$src2),
4107             !strconcat(asm, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
4108             [(set _.RC:$dst, (_.VT (OpNode _.RC:$src1, _.RC:$src2)))],
4109             _.ExeDomain>, EVEX_4V, Sched<[SchedWriteFShuffle.XMM]>;
4110  def rrkz : AVX512PI<0x10, MRMSrcReg, (outs _.RC:$dst),
4111              (ins _.KRCWM:$mask, _.RC:$src1, _.RC:$src2),
4112              !strconcat(asm, "\t{$src2, $src1, $dst {${mask}} {z}|",
4113              "$dst {${mask}} {z}, $src1, $src2}"),
4114              [(set _.RC:$dst, (_.VT (X86selects _.KRCWM:$mask,
4115                                      (_.VT (OpNode _.RC:$src1, _.RC:$src2)),
4116                                      _.ImmAllZerosV)))],
4117              _.ExeDomain>, EVEX_4V, EVEX_KZ, Sched<[SchedWriteFShuffle.XMM]>;
4118  let Constraints = "$src0 = $dst"  in
4119  def rrk : AVX512PI<0x10, MRMSrcReg, (outs _.RC:$dst),
4120             (ins _.RC:$src0, _.KRCWM:$mask, _.RC:$src1, _.RC:$src2),
4121             !strconcat(asm, "\t{$src2, $src1, $dst {${mask}}|",
4122             "$dst {${mask}}, $src1, $src2}"),
4123             [(set _.RC:$dst, (_.VT (X86selects _.KRCWM:$mask,
4124                                     (_.VT (OpNode _.RC:$src1, _.RC:$src2)),
4125                                     (_.VT _.RC:$src0))))],
4126             _.ExeDomain>, EVEX_4V, EVEX_K, Sched<[SchedWriteFShuffle.XMM]>;
4127  let canFoldAsLoad = 1, isReMaterializable = 1 in {
4128  def rm : AVX512PI<0x10, MRMSrcMem, (outs _.RC:$dst), (ins _.ScalarMemOp:$src),
4129             !strconcat(asm, "\t{$src, $dst|$dst, $src}"),
4130             [(set _.RC:$dst, (_.VT (vzload_frag addr:$src)))],
4131             _.ExeDomain>, EVEX, Sched<[WriteFLoad]>;
4132  // _alt version uses FR32/FR64 register class.
4133  let isCodeGenOnly = 1 in
4134  def rm_alt : AVX512PI<0x10, MRMSrcMem, (outs _.FRC:$dst), (ins _.ScalarMemOp:$src),
4135                 !strconcat(asm, "\t{$src, $dst|$dst, $src}"),
4136                 [(set _.FRC:$dst, (_.ScalarLdFrag addr:$src))],
4137                 _.ExeDomain>, EVEX, Sched<[WriteFLoad]>;
4138  }
4139  let mayLoad = 1, hasSideEffects = 0 in {
4140    let Constraints = "$src0 = $dst" in
4141    def rmk : AVX512PI<0x10, MRMSrcMem, (outs _.RC:$dst),
4142               (ins _.RC:$src0, _.KRCWM:$mask, _.ScalarMemOp:$src),
4143               !strconcat(asm, "\t{$src, $dst {${mask}}|",
4144               "$dst {${mask}}, $src}"),
4145               [], _.ExeDomain>, EVEX, EVEX_K, Sched<[WriteFLoad]>;
4146    def rmkz : AVX512PI<0x10, MRMSrcMem, (outs _.RC:$dst),
4147               (ins _.KRCWM:$mask, _.ScalarMemOp:$src),
4148               !strconcat(asm, "\t{$src, $dst {${mask}} {z}|",
4149               "$dst {${mask}} {z}, $src}"),
4150               [], _.ExeDomain>, EVEX, EVEX_KZ, Sched<[WriteFLoad]>;
4151  }
4152  def mr: AVX512PI<0x11, MRMDestMem, (outs), (ins _.ScalarMemOp:$dst, _.FRC:$src),
4153             !strconcat(asm, "\t{$src, $dst|$dst, $src}"),
4154             [(store _.FRC:$src, addr:$dst)],  _.ExeDomain>,
4155             EVEX, Sched<[WriteFStore]>;
4156  let mayStore = 1, hasSideEffects = 0 in
4157  def mrk: AVX512PI<0x11, MRMDestMem, (outs),
4158              (ins _.ScalarMemOp:$dst, VK1WM:$mask, _.RC:$src),
4159              !strconcat(asm, "\t{$src, $dst {${mask}}|$dst {${mask}}, $src}"),
4160              [], _.ExeDomain>, EVEX, EVEX_K, Sched<[WriteFStore]>,
4161              NotMemoryFoldable;
4162}
4163
4164defm VMOVSSZ : avx512_move_scalar<"vmovss", X86Movss, X86vzload32, f32x_info>,
4165                                  VEX_LIG, XS, EVEX_CD8<32, CD8VT1>;
4166
4167defm VMOVSDZ : avx512_move_scalar<"vmovsd", X86Movsd, X86vzload64, f64x_info>,
4168                                  VEX_LIG, XD, VEX_W, EVEX_CD8<64, CD8VT1>;
4169
4170defm VMOVSHZ : avx512_move_scalar<"vmovsh", X86Movsh, X86vzload16, f16x_info,
4171                                  [HasFP16]>,
4172                                  VEX_LIG, T_MAP5XS, EVEX_CD8<16, CD8VT1>;
4173
4174multiclass avx512_move_scalar_lowering<string InstrStr, SDNode OpNode,
4175                                       PatLeaf ZeroFP, X86VectorVTInfo _> {
4176
4177def : Pat<(_.VT (OpNode _.RC:$src0,
4178                        (_.VT (scalar_to_vector
4179                                  (_.EltVT (X86selects VK1WM:$mask,
4180                                                       (_.EltVT _.FRC:$src1),
4181                                                       (_.EltVT _.FRC:$src2))))))),
4182          (!cast<Instruction>(InstrStr#rrk)
4183                        (_.VT (COPY_TO_REGCLASS _.FRC:$src2, _.RC)),
4184                        VK1WM:$mask,
4185                        (_.VT _.RC:$src0),
4186                        (_.VT (COPY_TO_REGCLASS _.FRC:$src1, _.RC)))>;
4187
4188def : Pat<(_.VT (OpNode _.RC:$src0,
4189                        (_.VT (scalar_to_vector
4190                                  (_.EltVT (X86selects VK1WM:$mask,
4191                                                       (_.EltVT _.FRC:$src1),
4192                                                       (_.EltVT ZeroFP))))))),
4193          (!cast<Instruction>(InstrStr#rrkz)
4194                        VK1WM:$mask,
4195                        (_.VT _.RC:$src0),
4196                        (_.VT (COPY_TO_REGCLASS _.FRC:$src1, _.RC)))>;
4197}
4198
4199multiclass avx512_store_scalar_lowering<string InstrStr, AVX512VLVectorVTInfo _,
4200                                        dag Mask, RegisterClass MaskRC> {
4201
4202def : Pat<(masked_store
4203             (_.info512.VT (insert_subvector undef,
4204                               (_.info128.VT _.info128.RC:$src),
4205                               (iPTR 0))), addr:$dst, Mask),
4206          (!cast<Instruction>(InstrStr#mrk) addr:$dst,
4207                      (COPY_TO_REGCLASS MaskRC:$mask, VK1WM),
4208                      _.info128.RC:$src)>;
4209
4210}
4211
4212multiclass avx512_store_scalar_lowering_subreg<string InstrStr,
4213                                               AVX512VLVectorVTInfo _,
4214                                               dag Mask, RegisterClass MaskRC,
4215                                               SubRegIndex subreg> {
4216
4217def : Pat<(masked_store
4218             (_.info512.VT (insert_subvector undef,
4219                               (_.info128.VT _.info128.RC:$src),
4220                               (iPTR 0))), addr:$dst, Mask),
4221          (!cast<Instruction>(InstrStr#mrk) addr:$dst,
4222                      (COPY_TO_REGCLASS (i32 (INSERT_SUBREG (IMPLICIT_DEF), MaskRC:$mask, subreg)), VK1WM),
4223                      _.info128.RC:$src)>;
4224
4225}
4226
4227// This matches the more recent codegen from clang that avoids emitting a 512
4228// bit masked store directly. Codegen will widen 128-bit masked store to 512
4229// bits on AVX512F only targets.
4230multiclass avx512_store_scalar_lowering_subreg2<string InstrStr,
4231                                               AVX512VLVectorVTInfo _,
4232                                               dag Mask512, dag Mask128,
4233                                               RegisterClass MaskRC,
4234                                               SubRegIndex subreg> {
4235
4236// AVX512F pattern.
4237def : Pat<(masked_store
4238             (_.info512.VT (insert_subvector undef,
4239                               (_.info128.VT _.info128.RC:$src),
4240                               (iPTR 0))), addr:$dst, Mask512),
4241          (!cast<Instruction>(InstrStr#mrk) addr:$dst,
4242                      (COPY_TO_REGCLASS (i32 (INSERT_SUBREG (IMPLICIT_DEF), MaskRC:$mask, subreg)), VK1WM),
4243                      _.info128.RC:$src)>;
4244
4245// AVX512VL pattern.
4246def : Pat<(masked_store (_.info128.VT _.info128.RC:$src), addr:$dst, Mask128),
4247          (!cast<Instruction>(InstrStr#mrk) addr:$dst,
4248                      (COPY_TO_REGCLASS (i32 (INSERT_SUBREG (IMPLICIT_DEF), MaskRC:$mask, subreg)), VK1WM),
4249                      _.info128.RC:$src)>;
4250}
4251
4252multiclass avx512_load_scalar_lowering<string InstrStr, AVX512VLVectorVTInfo _,
4253                                       dag Mask, RegisterClass MaskRC> {
4254
4255def : Pat<(_.info128.VT (extract_subvector
4256                         (_.info512.VT (masked_load addr:$srcAddr, Mask,
4257                                        _.info512.ImmAllZerosV)),
4258                           (iPTR 0))),
4259          (!cast<Instruction>(InstrStr#rmkz)
4260                      (COPY_TO_REGCLASS MaskRC:$mask, VK1WM),
4261                      addr:$srcAddr)>;
4262
4263def : Pat<(_.info128.VT (extract_subvector
4264                (_.info512.VT (masked_load addr:$srcAddr, Mask,
4265                      (_.info512.VT (insert_subvector undef,
4266                            (_.info128.VT (X86vzmovl _.info128.RC:$src)),
4267                            (iPTR 0))))),
4268                (iPTR 0))),
4269          (!cast<Instruction>(InstrStr#rmk) _.info128.RC:$src,
4270                      (COPY_TO_REGCLASS MaskRC:$mask, VK1WM),
4271                      addr:$srcAddr)>;
4272
4273}
4274
4275multiclass avx512_load_scalar_lowering_subreg<string InstrStr,
4276                                              AVX512VLVectorVTInfo _,
4277                                              dag Mask, RegisterClass MaskRC,
4278                                              SubRegIndex subreg> {
4279
4280def : Pat<(_.info128.VT (extract_subvector
4281                         (_.info512.VT (masked_load addr:$srcAddr, Mask,
4282                                        _.info512.ImmAllZerosV)),
4283                           (iPTR 0))),
4284          (!cast<Instruction>(InstrStr#rmkz)
4285                      (COPY_TO_REGCLASS (i32 (INSERT_SUBREG (IMPLICIT_DEF), MaskRC:$mask, subreg)), VK1WM),
4286                      addr:$srcAddr)>;
4287
4288def : Pat<(_.info128.VT (extract_subvector
4289                (_.info512.VT (masked_load addr:$srcAddr, Mask,
4290                      (_.info512.VT (insert_subvector undef,
4291                            (_.info128.VT (X86vzmovl _.info128.RC:$src)),
4292                            (iPTR 0))))),
4293                (iPTR 0))),
4294          (!cast<Instruction>(InstrStr#rmk) _.info128.RC:$src,
4295                      (COPY_TO_REGCLASS (i32 (INSERT_SUBREG (IMPLICIT_DEF), MaskRC:$mask, subreg)), VK1WM),
4296                      addr:$srcAddr)>;
4297
4298}
4299
4300// This matches the more recent codegen from clang that avoids emitting a 512
4301// bit masked load directly. Codegen will widen 128-bit masked load to 512
4302// bits on AVX512F only targets.
4303multiclass avx512_load_scalar_lowering_subreg2<string InstrStr,
4304                                              AVX512VLVectorVTInfo _,
4305                                              dag Mask512, dag Mask128,
4306                                              RegisterClass MaskRC,
4307                                              SubRegIndex subreg> {
4308// AVX512F patterns.
4309def : Pat<(_.info128.VT (extract_subvector
4310                         (_.info512.VT (masked_load addr:$srcAddr, Mask512,
4311                                        _.info512.ImmAllZerosV)),
4312                           (iPTR 0))),
4313          (!cast<Instruction>(InstrStr#rmkz)
4314                      (COPY_TO_REGCLASS (i32 (INSERT_SUBREG (IMPLICIT_DEF), MaskRC:$mask, subreg)), VK1WM),
4315                      addr:$srcAddr)>;
4316
4317def : Pat<(_.info128.VT (extract_subvector
4318                (_.info512.VT (masked_load addr:$srcAddr, Mask512,
4319                      (_.info512.VT (insert_subvector undef,
4320                            (_.info128.VT (X86vzmovl _.info128.RC:$src)),
4321                            (iPTR 0))))),
4322                (iPTR 0))),
4323          (!cast<Instruction>(InstrStr#rmk) _.info128.RC:$src,
4324                      (COPY_TO_REGCLASS (i32 (INSERT_SUBREG (IMPLICIT_DEF), MaskRC:$mask, subreg)), VK1WM),
4325                      addr:$srcAddr)>;
4326
4327// AVX512Vl patterns.
4328def : Pat<(_.info128.VT (masked_load addr:$srcAddr, Mask128,
4329                         _.info128.ImmAllZerosV)),
4330          (!cast<Instruction>(InstrStr#rmkz)
4331                      (COPY_TO_REGCLASS (i32 (INSERT_SUBREG (IMPLICIT_DEF), MaskRC:$mask, subreg)), VK1WM),
4332                      addr:$srcAddr)>;
4333
4334def : Pat<(_.info128.VT (masked_load addr:$srcAddr, Mask128,
4335                         (_.info128.VT (X86vzmovl _.info128.RC:$src)))),
4336          (!cast<Instruction>(InstrStr#rmk) _.info128.RC:$src,
4337                      (COPY_TO_REGCLASS (i32 (INSERT_SUBREG (IMPLICIT_DEF), MaskRC:$mask, subreg)), VK1WM),
4338                      addr:$srcAddr)>;
4339}
4340
4341defm : avx512_move_scalar_lowering<"VMOVSHZ", X86Movsh, fp16imm0, v8f16x_info>;
4342defm : avx512_move_scalar_lowering<"VMOVSSZ", X86Movss, fp32imm0, v4f32x_info>;
4343defm : avx512_move_scalar_lowering<"VMOVSDZ", X86Movsd, fp64imm0, v2f64x_info>;
4344
4345defm : avx512_store_scalar_lowering<"VMOVSHZ", avx512vl_f16_info,
4346                   (v32i1 (bitconvert (and GR32:$mask, (i32 1)))), GR32>;
4347defm : avx512_store_scalar_lowering_subreg<"VMOVSHZ", avx512vl_f16_info,
4348                   (v32i1 (bitconvert (and GR32:$mask, (i32 1)))), GR32, sub_32bit>;
4349defm : avx512_store_scalar_lowering<"VMOVSSZ", avx512vl_f32_info,
4350                   (v16i1 (bitconvert (i16 (trunc (and GR32:$mask, (i32 1)))))), GR32>;
4351defm : avx512_store_scalar_lowering_subreg<"VMOVSSZ", avx512vl_f32_info,
4352                   (v16i1 (bitconvert (i16 (and GR16:$mask, (i16 1))))), GR16, sub_16bit>;
4353defm : avx512_store_scalar_lowering_subreg<"VMOVSDZ", avx512vl_f64_info,
4354                   (v8i1 (bitconvert (i8 (and GR8:$mask, (i8 1))))), GR8, sub_8bit>;
4355
4356defm : avx512_store_scalar_lowering_subreg2<"VMOVSHZ", avx512vl_f16_info,
4357                   (v32i1 (insert_subvector
4358                           (v32i1 immAllZerosV),
4359                           (v8i1 (bitconvert (and GR8:$mask, (i8 1)))),
4360                           (iPTR 0))),
4361                   (v8i1 (bitconvert (and GR8:$mask, (i8 1)))),
4362                   GR8, sub_8bit>;
4363defm : avx512_store_scalar_lowering_subreg2<"VMOVSSZ", avx512vl_f32_info,
4364                   (v16i1 (insert_subvector
4365                           (v16i1 immAllZerosV),
4366                           (v4i1 (extract_subvector
4367                                  (v8i1 (bitconvert (and GR8:$mask, (i8 1)))),
4368                                  (iPTR 0))),
4369                           (iPTR 0))),
4370                   (v4i1 (extract_subvector
4371                          (v8i1 (bitconvert (and GR8:$mask, (i8 1)))),
4372                          (iPTR 0))), GR8, sub_8bit>;
4373defm : avx512_store_scalar_lowering_subreg2<"VMOVSDZ", avx512vl_f64_info,
4374                   (v8i1
4375                    (extract_subvector
4376                     (v16i1
4377                      (insert_subvector
4378                       (v16i1 immAllZerosV),
4379                       (v2i1 (extract_subvector
4380                              (v8i1 (bitconvert (i8 (and GR8:$mask, (i8 1))))),
4381                              (iPTR 0))),
4382                       (iPTR 0))),
4383                     (iPTR 0))),
4384                   (v2i1 (extract_subvector
4385                          (v8i1 (bitconvert (i8 (and GR8:$mask, (i8 1))))),
4386                          (iPTR 0))), GR8, sub_8bit>;
4387
4388defm : avx512_load_scalar_lowering<"VMOVSHZ", avx512vl_f16_info,
4389                   (v32i1 (bitconvert (and GR32:$mask, (i32 1)))), GR32>;
4390defm : avx512_load_scalar_lowering_subreg<"VMOVSHZ", avx512vl_f16_info,
4391                   (v32i1 (bitconvert (and GR32:$mask, (i32 1)))), GR32, sub_32bit>;
4392defm : avx512_load_scalar_lowering<"VMOVSSZ", avx512vl_f32_info,
4393                   (v16i1 (bitconvert (i16 (trunc (and GR32:$mask, (i32 1)))))), GR32>;
4394defm : avx512_load_scalar_lowering_subreg<"VMOVSSZ", avx512vl_f32_info,
4395                   (v16i1 (bitconvert (i16 (and GR16:$mask, (i16 1))))), GR16, sub_16bit>;
4396defm : avx512_load_scalar_lowering_subreg<"VMOVSDZ", avx512vl_f64_info,
4397                   (v8i1 (bitconvert (i8 (and GR8:$mask, (i8 1))))), GR8, sub_8bit>;
4398
4399defm : avx512_load_scalar_lowering_subreg2<"VMOVSHZ", avx512vl_f16_info,
4400                   (v32i1 (insert_subvector
4401                           (v32i1 immAllZerosV),
4402                           (v8i1 (bitconvert (and GR8:$mask, (i8 1)))),
4403                           (iPTR 0))),
4404                   (v8i1 (bitconvert (and GR8:$mask, (i8 1)))),
4405                   GR8, sub_8bit>;
4406defm : avx512_load_scalar_lowering_subreg2<"VMOVSSZ", avx512vl_f32_info,
4407                   (v16i1 (insert_subvector
4408                           (v16i1 immAllZerosV),
4409                           (v4i1 (extract_subvector
4410                                  (v8i1 (bitconvert (and GR8:$mask, (i8 1)))),
4411                                  (iPTR 0))),
4412                           (iPTR 0))),
4413                   (v4i1 (extract_subvector
4414                          (v8i1 (bitconvert (and GR8:$mask, (i8 1)))),
4415                          (iPTR 0))), GR8, sub_8bit>;
4416defm : avx512_load_scalar_lowering_subreg2<"VMOVSDZ", avx512vl_f64_info,
4417                   (v8i1
4418                    (extract_subvector
4419                     (v16i1
4420                      (insert_subvector
4421                       (v16i1 immAllZerosV),
4422                       (v2i1 (extract_subvector
4423                              (v8i1 (bitconvert (i8 (and GR8:$mask, (i8 1))))),
4424                              (iPTR 0))),
4425                       (iPTR 0))),
4426                     (iPTR 0))),
4427                   (v2i1 (extract_subvector
4428                          (v8i1 (bitconvert (i8 (and GR8:$mask, (i8 1))))),
4429                          (iPTR 0))), GR8, sub_8bit>;
4430
4431def : Pat<(f16 (X86selects VK1WM:$mask, (f16 FR16X:$src1), (f16 FR16X:$src2))),
4432          (COPY_TO_REGCLASS (v8f16 (VMOVSHZrrk
4433           (v8f16 (COPY_TO_REGCLASS FR16X:$src2, VR128X)),
4434           VK1WM:$mask, (v8f16 (IMPLICIT_DEF)),
4435           (v8f16 (COPY_TO_REGCLASS FR16X:$src1, VR128X)))), FR16X)>;
4436
4437def : Pat<(f16 (X86selects VK1WM:$mask, (f16 FR16X:$src1), fp16imm0)),
4438          (COPY_TO_REGCLASS (v8f16 (VMOVSHZrrkz VK1WM:$mask, (v8f16 (IMPLICIT_DEF)),
4439           (v8f16 (COPY_TO_REGCLASS FR16X:$src1, VR128X)))), FR16X)>;
4440
4441def : Pat<(f32 (X86selects VK1WM:$mask, (f32 FR32X:$src1), (f32 FR32X:$src2))),
4442          (COPY_TO_REGCLASS (v4f32 (VMOVSSZrrk
4443           (v4f32 (COPY_TO_REGCLASS FR32X:$src2, VR128X)),
4444           VK1WM:$mask, (v4f32 (IMPLICIT_DEF)),
4445           (v4f32 (COPY_TO_REGCLASS FR32X:$src1, VR128X)))), FR32X)>;
4446
4447def : Pat<(f32 (X86selects VK1WM:$mask, (f32 FR32X:$src1), fp32imm0)),
4448          (COPY_TO_REGCLASS (v4f32 (VMOVSSZrrkz VK1WM:$mask, (v4f32 (IMPLICIT_DEF)),
4449           (v4f32 (COPY_TO_REGCLASS FR32X:$src1, VR128X)))), FR32X)>;
4450
4451def : Pat<(f32 (X86selects VK1WM:$mask, (loadf32 addr:$src), (f32 FR32X:$src0))),
4452          (COPY_TO_REGCLASS
4453           (v4f32 (VMOVSSZrmk (v4f32 (COPY_TO_REGCLASS FR32X:$src0, VR128X)),
4454                                                       VK1WM:$mask, addr:$src)),
4455           FR32X)>;
4456def : Pat<(f32 (X86selects VK1WM:$mask, (loadf32 addr:$src), fp32imm0)),
4457          (COPY_TO_REGCLASS (v4f32 (VMOVSSZrmkz VK1WM:$mask, addr:$src)), FR32X)>;
4458
4459def : Pat<(f64 (X86selects VK1WM:$mask, (f64 FR64X:$src1), (f64 FR64X:$src2))),
4460          (COPY_TO_REGCLASS (v2f64 (VMOVSDZrrk
4461           (v2f64 (COPY_TO_REGCLASS FR64X:$src2, VR128X)),
4462           VK1WM:$mask, (v2f64 (IMPLICIT_DEF)),
4463           (v2f64 (COPY_TO_REGCLASS FR64X:$src1, VR128X)))), FR64X)>;
4464
4465def : Pat<(f64 (X86selects VK1WM:$mask, (f64 FR64X:$src1), fp64imm0)),
4466          (COPY_TO_REGCLASS (v2f64 (VMOVSDZrrkz VK1WM:$mask, (v2f64 (IMPLICIT_DEF)),
4467           (v2f64 (COPY_TO_REGCLASS FR64X:$src1, VR128X)))), FR64X)>;
4468
4469def : Pat<(f64 (X86selects VK1WM:$mask, (loadf64 addr:$src), (f64 FR64X:$src0))),
4470          (COPY_TO_REGCLASS
4471           (v2f64 (VMOVSDZrmk (v2f64 (COPY_TO_REGCLASS FR64X:$src0, VR128X)),
4472                                                       VK1WM:$mask, addr:$src)),
4473           FR64X)>;
4474def : Pat<(f64 (X86selects VK1WM:$mask, (loadf64 addr:$src), fp64imm0)),
4475          (COPY_TO_REGCLASS (v2f64 (VMOVSDZrmkz VK1WM:$mask, addr:$src)), FR64X)>;
4476
4477
4478def : Pat<(v4f32 (X86selects VK1WM:$mask, (v4f32 VR128X:$src1), (v4f32 VR128X:$src2))),
4479          (VMOVSSZrrk VR128X:$src2, VK1WM:$mask, VR128X:$src1, VR128X:$src1)>;
4480def : Pat<(v2f64 (X86selects VK1WM:$mask, (v2f64 VR128X:$src1), (v2f64 VR128X:$src2))),
4481          (VMOVSDZrrk VR128X:$src2, VK1WM:$mask, VR128X:$src1, VR128X:$src1)>;
4482
4483def : Pat<(v4f32 (X86selects VK1WM:$mask, (v4f32 VR128X:$src1), (v4f32 immAllZerosV))),
4484          (VMOVSSZrrkz VK1WM:$mask, VR128X:$src1, VR128X:$src1)>;
4485def : Pat<(v2f64 (X86selects VK1WM:$mask, (v2f64 VR128X:$src1), (v2f64 immAllZerosV))),
4486          (VMOVSDZrrkz VK1WM:$mask, VR128X:$src1, VR128X:$src1)>;
4487
4488let hasSideEffects = 0, isCodeGenOnly = 1, ForceDisassemble = 1 in {
4489  let Predicates = [HasFP16] in {
4490    def VMOVSHZrr_REV: AVX512<0x11, MRMDestReg, (outs VR128X:$dst),
4491        (ins VR128X:$src1, VR128X:$src2),
4492        "vmovsh\t{$src2, $src1, $dst|$dst, $src1, $src2}",
4493        []>, T_MAP5XS, EVEX_4V, VEX_LIG,
4494        FoldGenData<"VMOVSHZrr">,
4495        Sched<[SchedWriteFShuffle.XMM]>;
4496
4497    let Constraints = "$src0 = $dst" in
4498    def VMOVSHZrrk_REV: AVX512<0x11, MRMDestReg, (outs VR128X:$dst),
4499        (ins f16x_info.RC:$src0, f16x_info.KRCWM:$mask,
4500         VR128X:$src1, VR128X:$src2),
4501        "vmovsh\t{$src2, $src1, $dst {${mask}}|"#
4502          "$dst {${mask}}, $src1, $src2}",
4503        []>, T_MAP5XS, EVEX_K, EVEX_4V, VEX_LIG,
4504        FoldGenData<"VMOVSHZrrk">,
4505        Sched<[SchedWriteFShuffle.XMM]>;
4506
4507    def VMOVSHZrrkz_REV: AVX512<0x11, MRMDestReg, (outs VR128X:$dst),
4508        (ins f16x_info.KRCWM:$mask, VR128X:$src1, VR128X:$src2),
4509        "vmovsh\t{$src2, $src1, $dst {${mask}} {z}|"#
4510          "$dst {${mask}} {z}, $src1, $src2}",
4511        []>, EVEX_KZ, T_MAP5XS, EVEX_4V, VEX_LIG,
4512        FoldGenData<"VMOVSHZrrkz">,
4513        Sched<[SchedWriteFShuffle.XMM]>;
4514  }
4515  def VMOVSSZrr_REV: AVX512<0x11, MRMDestReg, (outs VR128X:$dst),
4516                           (ins VR128X:$src1, VR128X:$src2),
4517                           "vmovss\t{$src2, $src1, $dst|$dst, $src1, $src2}",
4518                           []>, XS, EVEX_4V, VEX_LIG,
4519                           FoldGenData<"VMOVSSZrr">,
4520                           Sched<[SchedWriteFShuffle.XMM]>;
4521
4522  let Constraints = "$src0 = $dst" in
4523  def VMOVSSZrrk_REV: AVX512<0x11, MRMDestReg, (outs VR128X:$dst),
4524                             (ins f32x_info.RC:$src0, f32x_info.KRCWM:$mask,
4525                                                   VR128X:$src1, VR128X:$src2),
4526                             "vmovss\t{$src2, $src1, $dst {${mask}}|"#
4527                                        "$dst {${mask}}, $src1, $src2}",
4528                             []>, EVEX_K, XS, EVEX_4V, VEX_LIG,
4529                             FoldGenData<"VMOVSSZrrk">,
4530                             Sched<[SchedWriteFShuffle.XMM]>;
4531
4532  def VMOVSSZrrkz_REV: AVX512<0x11, MRMDestReg, (outs VR128X:$dst),
4533                         (ins f32x_info.KRCWM:$mask, VR128X:$src1, VR128X:$src2),
4534                         "vmovss\t{$src2, $src1, $dst {${mask}} {z}|"#
4535                                    "$dst {${mask}} {z}, $src1, $src2}",
4536                         []>, EVEX_KZ, XS, EVEX_4V, VEX_LIG,
4537                         FoldGenData<"VMOVSSZrrkz">,
4538                         Sched<[SchedWriteFShuffle.XMM]>;
4539
4540  def VMOVSDZrr_REV: AVX512<0x11, MRMDestReg, (outs VR128X:$dst),
4541                           (ins VR128X:$src1, VR128X:$src2),
4542                           "vmovsd\t{$src2, $src1, $dst|$dst, $src1, $src2}",
4543                           []>, XD, EVEX_4V, VEX_LIG, VEX_W,
4544                           FoldGenData<"VMOVSDZrr">,
4545                           Sched<[SchedWriteFShuffle.XMM]>;
4546
4547  let Constraints = "$src0 = $dst" in
4548  def VMOVSDZrrk_REV: AVX512<0x11, MRMDestReg, (outs VR128X:$dst),
4549                             (ins f64x_info.RC:$src0, f64x_info.KRCWM:$mask,
4550                                                   VR128X:$src1, VR128X:$src2),
4551                             "vmovsd\t{$src2, $src1, $dst {${mask}}|"#
4552                                        "$dst {${mask}}, $src1, $src2}",
4553                             []>, EVEX_K, XD, EVEX_4V, VEX_LIG,
4554                             VEX_W, FoldGenData<"VMOVSDZrrk">,
4555                             Sched<[SchedWriteFShuffle.XMM]>;
4556
4557  def VMOVSDZrrkz_REV: AVX512<0x11, MRMDestReg, (outs VR128X:$dst),
4558                              (ins f64x_info.KRCWM:$mask, VR128X:$src1,
4559                                                          VR128X:$src2),
4560                              "vmovsd\t{$src2, $src1, $dst {${mask}} {z}|"#
4561                                         "$dst {${mask}} {z}, $src1, $src2}",
4562                              []>, EVEX_KZ, XD, EVEX_4V, VEX_LIG,
4563                              VEX_W, FoldGenData<"VMOVSDZrrkz">,
4564                              Sched<[SchedWriteFShuffle.XMM]>;
4565}
4566
4567def : InstAlias<"vmovsh.s\t{$src2, $src1, $dst|$dst, $src1, $src2}",
4568                (VMOVSHZrr_REV VR128X:$dst, VR128X:$src1, VR128X:$src2), 0>;
4569def : InstAlias<"vmovsh.s\t{$src2, $src1, $dst {${mask}}|"#
4570                             "$dst {${mask}}, $src1, $src2}",
4571                (VMOVSHZrrk_REV VR128X:$dst, VK1WM:$mask,
4572                                VR128X:$src1, VR128X:$src2), 0>;
4573def : InstAlias<"vmovsh.s\t{$src2, $src1, $dst {${mask}} {z}|"#
4574                             "$dst {${mask}} {z}, $src1, $src2}",
4575                (VMOVSHZrrkz_REV VR128X:$dst, VK1WM:$mask,
4576                                 VR128X:$src1, VR128X:$src2), 0>;
4577def : InstAlias<"vmovss.s\t{$src2, $src1, $dst|$dst, $src1, $src2}",
4578                (VMOVSSZrr_REV VR128X:$dst, VR128X:$src1, VR128X:$src2), 0>;
4579def : InstAlias<"vmovss.s\t{$src2, $src1, $dst {${mask}}|"#
4580                             "$dst {${mask}}, $src1, $src2}",
4581                (VMOVSSZrrk_REV VR128X:$dst, VK1WM:$mask,
4582                                VR128X:$src1, VR128X:$src2), 0>;
4583def : InstAlias<"vmovss.s\t{$src2, $src1, $dst {${mask}} {z}|"#
4584                             "$dst {${mask}} {z}, $src1, $src2}",
4585                (VMOVSSZrrkz_REV VR128X:$dst, VK1WM:$mask,
4586                                 VR128X:$src1, VR128X:$src2), 0>;
4587def : InstAlias<"vmovsd.s\t{$src2, $src1, $dst|$dst, $src1, $src2}",
4588                (VMOVSDZrr_REV VR128X:$dst, VR128X:$src1, VR128X:$src2), 0>;
4589def : InstAlias<"vmovsd.s\t{$src2, $src1, $dst {${mask}}|"#
4590                             "$dst {${mask}}, $src1, $src2}",
4591                (VMOVSDZrrk_REV VR128X:$dst, VK1WM:$mask,
4592                                VR128X:$src1, VR128X:$src2), 0>;
4593def : InstAlias<"vmovsd.s\t{$src2, $src1, $dst {${mask}} {z}|"#
4594                             "$dst {${mask}} {z}, $src1, $src2}",
4595                (VMOVSDZrrkz_REV VR128X:$dst, VK1WM:$mask,
4596                                 VR128X:$src1, VR128X:$src2), 0>;
4597
4598let Predicates = [HasAVX512, OptForSize] in {
4599  def : Pat<(v4f32 (X86vzmovl (v4f32 VR128X:$src))),
4600            (VMOVSSZrr (v4f32 (AVX512_128_SET0)), VR128X:$src)>;
4601  def : Pat<(v4i32 (X86vzmovl (v4i32 VR128X:$src))),
4602            (VMOVSSZrr (v4i32 (AVX512_128_SET0)), VR128X:$src)>;
4603
4604  // Move low f32 and clear high bits.
4605  def : Pat<(v8f32 (X86vzmovl (v8f32 VR256X:$src))),
4606            (SUBREG_TO_REG (i32 0),
4607             (v4f32 (VMOVSSZrr (v4f32 (AVX512_128_SET0)),
4608              (v4f32 (EXTRACT_SUBREG (v8f32 VR256X:$src), sub_xmm)))), sub_xmm)>;
4609  def : Pat<(v8i32 (X86vzmovl (v8i32 VR256X:$src))),
4610            (SUBREG_TO_REG (i32 0),
4611             (v4i32 (VMOVSSZrr (v4i32 (AVX512_128_SET0)),
4612              (v4i32 (EXTRACT_SUBREG (v8i32 VR256X:$src), sub_xmm)))), sub_xmm)>;
4613
4614  def : Pat<(v16f32 (X86vzmovl (v16f32 VR512:$src))),
4615            (SUBREG_TO_REG (i32 0),
4616             (v4f32 (VMOVSSZrr (v4f32 (AVX512_128_SET0)),
4617              (v4f32 (EXTRACT_SUBREG (v16f32 VR512:$src), sub_xmm)))), sub_xmm)>;
4618  def : Pat<(v16i32 (X86vzmovl (v16i32 VR512:$src))),
4619            (SUBREG_TO_REG (i32 0),
4620             (v4i32 (VMOVSSZrr (v4i32 (AVX512_128_SET0)),
4621              (v4i32 (EXTRACT_SUBREG (v16i32 VR512:$src), sub_xmm)))), sub_xmm)>;
4622}
4623
4624// Use 128-bit blends for OptForSpeed since BLENDs have better throughput than
4625// VMOVSS/SD. Unfortunately, loses the ability to use XMM16-31.
4626let Predicates = [HasAVX512, OptForSpeed] in {
4627  def : Pat<(v16f32 (X86vzmovl (v16f32 VR512:$src))),
4628            (SUBREG_TO_REG (i32 0),
4629             (v4f32 (VBLENDPSrri (v4f32 (V_SET0)),
4630                          (v4f32 (EXTRACT_SUBREG (v16f32 VR512:$src), sub_xmm)),
4631                          (i8 1))), sub_xmm)>;
4632  def : Pat<(v16i32 (X86vzmovl (v16i32 VR512:$src))),
4633            (SUBREG_TO_REG (i32 0),
4634             (v4i32 (VPBLENDWrri (v4i32 (V_SET0)),
4635                          (v4i32 (EXTRACT_SUBREG (v16i32 VR512:$src), sub_xmm)),
4636                          (i8 3))), sub_xmm)>;
4637}
4638
4639let Predicates = [HasAVX512] in {
4640  def : Pat<(v4f32 (scalar_to_vector (loadf32 addr:$src))),
4641            (VMOVSSZrm addr:$src)>;
4642  def : Pat<(v2f64 (scalar_to_vector (loadf64 addr:$src))),
4643            (VMOVSDZrm addr:$src)>;
4644
4645  // Represent the same patterns above but in the form they appear for
4646  // 256-bit types
4647  def : Pat<(v8f32 (X86vzload32 addr:$src)),
4648            (SUBREG_TO_REG (i32 0), (VMOVSSZrm addr:$src), sub_xmm)>;
4649  def : Pat<(v4f64 (X86vzload64 addr:$src)),
4650            (SUBREG_TO_REG (i32 0), (VMOVSDZrm addr:$src), sub_xmm)>;
4651
4652  // Represent the same patterns above but in the form they appear for
4653  // 512-bit types
4654  def : Pat<(v16f32 (X86vzload32 addr:$src)),
4655            (SUBREG_TO_REG (i32 0), (VMOVSSZrm addr:$src), sub_xmm)>;
4656  def : Pat<(v8f64 (X86vzload64 addr:$src)),
4657            (SUBREG_TO_REG (i32 0), (VMOVSDZrm addr:$src), sub_xmm)>;
4658}
4659let Predicates = [HasFP16] in {
4660  def : Pat<(v8f16 (X86vzmovl (v8f16 VR128X:$src))),
4661            (VMOVSHZrr (v8f16 (AVX512_128_SET0)), VR128X:$src)>;
4662
4663  // FIXME we need better canonicalization in dag combine
4664  def : Pat<(v16f16 (X86vzmovl (v16f16 VR256X:$src))),
4665            (SUBREG_TO_REG (i32 0),
4666             (v8f16 (VMOVSHZrr (v8f16 (AVX512_128_SET0)),
4667              (v8f16 (EXTRACT_SUBREG (v16f16 VR256X:$src), sub_xmm)))), sub_xmm)>;
4668  def : Pat<(v32f16 (X86vzmovl (v32f16 VR512:$src))),
4669            (SUBREG_TO_REG (i32 0),
4670             (v8f16 (VMOVSHZrr (v8f16 (AVX512_128_SET0)),
4671              (v8f16 (EXTRACT_SUBREG (v32f16 VR512:$src), sub_xmm)))), sub_xmm)>;
4672
4673  def : Pat<(v8f16 (X86vzload16 addr:$src)),
4674            (VMOVSHZrm addr:$src)>;
4675
4676  def : Pat<(v16f16 (X86vzload16 addr:$src)),
4677            (SUBREG_TO_REG (i32 0), (VMOVSHZrm addr:$src), sub_xmm)>;
4678
4679  def : Pat<(v32f16 (X86vzload16 addr:$src)),
4680            (SUBREG_TO_REG (i32 0), (VMOVSHZrm addr:$src), sub_xmm)>;
4681}
4682
4683let ExeDomain = SSEPackedInt, SchedRW = [SchedWriteVecLogic.XMM] in {
4684def VMOVZPQILo2PQIZrr : AVX512XSI<0x7E, MRMSrcReg, (outs VR128X:$dst),
4685                                (ins VR128X:$src),
4686                                "vmovq\t{$src, $dst|$dst, $src}",
4687                                [(set VR128X:$dst, (v2i64 (X86vzmovl
4688                                                   (v2i64 VR128X:$src))))]>,
4689                                EVEX, VEX_W;
4690}
4691
4692let Predicates = [HasAVX512] in {
4693  def : Pat<(v4i32 (X86vzmovl (v4i32 (scalar_to_vector GR32:$src)))),
4694            (VMOVDI2PDIZrr GR32:$src)>;
4695
4696  def : Pat<(v2i64 (X86vzmovl (v2i64 (scalar_to_vector GR64:$src)))),
4697            (VMOV64toPQIZrr GR64:$src)>;
4698
4699  // AVX 128-bit movd/movq instruction write zeros in the high 128-bit part.
4700  def : Pat<(v4i32 (X86vzload32 addr:$src)),
4701            (VMOVDI2PDIZrm addr:$src)>;
4702  def : Pat<(v8i32 (X86vzload32 addr:$src)),
4703            (SUBREG_TO_REG (i32 0), (v4i32 (VMOVDI2PDIZrm addr:$src)), sub_xmm)>;
4704  def : Pat<(v2f64 (X86vzmovl (v2f64 VR128X:$src))),
4705            (VMOVZPQILo2PQIZrr VR128X:$src)>;
4706  def : Pat<(v2i64 (X86vzload64 addr:$src)),
4707            (VMOVQI2PQIZrm addr:$src)>;
4708  def : Pat<(v4i64 (X86vzload64 addr:$src)),
4709            (SUBREG_TO_REG (i64 0), (v2i64 (VMOVQI2PQIZrm addr:$src)), sub_xmm)>;
4710
4711  // Use regular 128-bit instructions to match 512-bit scalar_to_vec+zext.
4712  def : Pat<(v16i32 (X86vzload32 addr:$src)),
4713            (SUBREG_TO_REG (i32 0), (v4i32 (VMOVDI2PDIZrm addr:$src)), sub_xmm)>;
4714  def : Pat<(v8i64 (X86vzload64 addr:$src)),
4715            (SUBREG_TO_REG (i64 0), (v2i64 (VMOVQI2PQIZrm addr:$src)), sub_xmm)>;
4716
4717  def : Pat<(v4f64 (X86vzmovl (v4f64 VR256X:$src))),
4718            (SUBREG_TO_REG (i32 0),
4719             (v2f64 (VMOVZPQILo2PQIZrr
4720                     (v2f64 (EXTRACT_SUBREG (v4f64 VR256X:$src), sub_xmm)))),
4721             sub_xmm)>;
4722  def : Pat<(v4i64 (X86vzmovl (v4i64 VR256X:$src))),
4723            (SUBREG_TO_REG (i32 0),
4724             (v2i64 (VMOVZPQILo2PQIZrr
4725                     (v2i64 (EXTRACT_SUBREG (v4i64 VR256X:$src), sub_xmm)))),
4726             sub_xmm)>;
4727
4728  def : Pat<(v8f64 (X86vzmovl (v8f64 VR512:$src))),
4729            (SUBREG_TO_REG (i32 0),
4730             (v2f64 (VMOVZPQILo2PQIZrr
4731                     (v2f64 (EXTRACT_SUBREG (v8f64 VR512:$src), sub_xmm)))),
4732             sub_xmm)>;
4733  def : Pat<(v8i64 (X86vzmovl (v8i64 VR512:$src))),
4734            (SUBREG_TO_REG (i32 0),
4735             (v2i64 (VMOVZPQILo2PQIZrr
4736                     (v2i64 (EXTRACT_SUBREG (v8i64 VR512:$src), sub_xmm)))),
4737             sub_xmm)>;
4738}
4739
4740//===----------------------------------------------------------------------===//
4741// AVX-512 - Non-temporals
4742//===----------------------------------------------------------------------===//
4743
4744def VMOVNTDQAZrm : AVX512PI<0x2A, MRMSrcMem, (outs VR512:$dst),
4745                      (ins i512mem:$src), "vmovntdqa\t{$src, $dst|$dst, $src}",
4746                      [], SSEPackedInt>, Sched<[SchedWriteVecMoveLS.ZMM.RM]>,
4747                      EVEX, T8PD, EVEX_V512, EVEX_CD8<64, CD8VF>;
4748
4749let Predicates = [HasVLX] in {
4750  def VMOVNTDQAZ256rm : AVX512PI<0x2A, MRMSrcMem, (outs VR256X:$dst),
4751                       (ins i256mem:$src),
4752                       "vmovntdqa\t{$src, $dst|$dst, $src}",
4753                       [], SSEPackedInt>, Sched<[SchedWriteVecMoveLS.YMM.RM]>,
4754                       EVEX, T8PD, EVEX_V256, EVEX_CD8<64, CD8VF>;
4755
4756  def VMOVNTDQAZ128rm : AVX512PI<0x2A, MRMSrcMem, (outs VR128X:$dst),
4757                      (ins i128mem:$src),
4758                      "vmovntdqa\t{$src, $dst|$dst, $src}",
4759                      [], SSEPackedInt>, Sched<[SchedWriteVecMoveLS.XMM.RM]>,
4760                      EVEX, T8PD, EVEX_V128, EVEX_CD8<64, CD8VF>;
4761}
4762
4763multiclass avx512_movnt<bits<8> opc, string OpcodeStr, X86VectorVTInfo _,
4764                        X86SchedWriteMoveLS Sched,
4765                        PatFrag st_frag = alignednontemporalstore> {
4766  let SchedRW = [Sched.MR], AddedComplexity = 400 in
4767  def mr : AVX512PI<opc, MRMDestMem, (outs), (ins _.MemOp:$dst, _.RC:$src),
4768                    !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
4769                    [(st_frag (_.VT _.RC:$src), addr:$dst)],
4770                    _.ExeDomain>, EVEX, EVEX_CD8<_.EltSize, CD8VF>;
4771}
4772
4773multiclass avx512_movnt_vl<bits<8> opc, string OpcodeStr,
4774                           AVX512VLVectorVTInfo VTInfo,
4775                           X86SchedWriteMoveLSWidths Sched> {
4776  let Predicates = [HasAVX512] in
4777    defm Z : avx512_movnt<opc, OpcodeStr, VTInfo.info512, Sched.ZMM>, EVEX_V512;
4778
4779  let Predicates = [HasAVX512, HasVLX] in {
4780    defm Z256 : avx512_movnt<opc, OpcodeStr, VTInfo.info256, Sched.YMM>, EVEX_V256;
4781    defm Z128 : avx512_movnt<opc, OpcodeStr, VTInfo.info128, Sched.XMM>, EVEX_V128;
4782  }
4783}
4784
4785defm VMOVNTDQ : avx512_movnt_vl<0xE7, "vmovntdq", avx512vl_i64_info,
4786                                SchedWriteVecMoveLSNT>, PD;
4787defm VMOVNTPD : avx512_movnt_vl<0x2B, "vmovntpd", avx512vl_f64_info,
4788                                SchedWriteFMoveLSNT>, PD, VEX_W;
4789defm VMOVNTPS : avx512_movnt_vl<0x2B, "vmovntps", avx512vl_f32_info,
4790                                SchedWriteFMoveLSNT>, PS;
4791
4792let Predicates = [HasAVX512], AddedComplexity = 400 in {
4793  def : Pat<(alignednontemporalstore (v16i32 VR512:$src), addr:$dst),
4794            (VMOVNTDQZmr addr:$dst, VR512:$src)>;
4795  def : Pat<(alignednontemporalstore (v32i16 VR512:$src), addr:$dst),
4796            (VMOVNTDQZmr addr:$dst, VR512:$src)>;
4797  def : Pat<(alignednontemporalstore (v64i8 VR512:$src), addr:$dst),
4798            (VMOVNTDQZmr addr:$dst, VR512:$src)>;
4799
4800  def : Pat<(v8f64 (alignednontemporalload addr:$src)),
4801            (VMOVNTDQAZrm addr:$src)>;
4802  def : Pat<(v16f32 (alignednontemporalload addr:$src)),
4803            (VMOVNTDQAZrm addr:$src)>;
4804  def : Pat<(v8i64 (alignednontemporalload addr:$src)),
4805            (VMOVNTDQAZrm addr:$src)>;
4806  def : Pat<(v16i32 (alignednontemporalload addr:$src)),
4807            (VMOVNTDQAZrm addr:$src)>;
4808  def : Pat<(v32i16 (alignednontemporalload addr:$src)),
4809            (VMOVNTDQAZrm addr:$src)>;
4810  def : Pat<(v64i8 (alignednontemporalload addr:$src)),
4811            (VMOVNTDQAZrm addr:$src)>;
4812}
4813
4814let Predicates = [HasVLX], AddedComplexity = 400 in {
4815  def : Pat<(alignednontemporalstore (v8i32 VR256X:$src), addr:$dst),
4816            (VMOVNTDQZ256mr addr:$dst, VR256X:$src)>;
4817  def : Pat<(alignednontemporalstore (v16i16 VR256X:$src), addr:$dst),
4818            (VMOVNTDQZ256mr addr:$dst, VR256X:$src)>;
4819  def : Pat<(alignednontemporalstore (v32i8 VR256X:$src), addr:$dst),
4820            (VMOVNTDQZ256mr addr:$dst, VR256X:$src)>;
4821
4822  def : Pat<(v4f64 (alignednontemporalload addr:$src)),
4823            (VMOVNTDQAZ256rm addr:$src)>;
4824  def : Pat<(v8f32 (alignednontemporalload addr:$src)),
4825            (VMOVNTDQAZ256rm addr:$src)>;
4826  def : Pat<(v4i64 (alignednontemporalload addr:$src)),
4827            (VMOVNTDQAZ256rm addr:$src)>;
4828  def : Pat<(v8i32 (alignednontemporalload addr:$src)),
4829            (VMOVNTDQAZ256rm addr:$src)>;
4830  def : Pat<(v16i16 (alignednontemporalload addr:$src)),
4831            (VMOVNTDQAZ256rm addr:$src)>;
4832  def : Pat<(v32i8 (alignednontemporalload addr:$src)),
4833            (VMOVNTDQAZ256rm addr:$src)>;
4834
4835  def : Pat<(alignednontemporalstore (v4i32 VR128X:$src), addr:$dst),
4836            (VMOVNTDQZ128mr addr:$dst, VR128X:$src)>;
4837  def : Pat<(alignednontemporalstore (v8i16 VR128X:$src), addr:$dst),
4838            (VMOVNTDQZ128mr addr:$dst, VR128X:$src)>;
4839  def : Pat<(alignednontemporalstore (v16i8 VR128X:$src), addr:$dst),
4840            (VMOVNTDQZ128mr addr:$dst, VR128X:$src)>;
4841
4842  def : Pat<(v2f64 (alignednontemporalload addr:$src)),
4843            (VMOVNTDQAZ128rm addr:$src)>;
4844  def : Pat<(v4f32 (alignednontemporalload addr:$src)),
4845            (VMOVNTDQAZ128rm addr:$src)>;
4846  def : Pat<(v2i64 (alignednontemporalload addr:$src)),
4847            (VMOVNTDQAZ128rm addr:$src)>;
4848  def : Pat<(v4i32 (alignednontemporalload addr:$src)),
4849            (VMOVNTDQAZ128rm addr:$src)>;
4850  def : Pat<(v8i16 (alignednontemporalload addr:$src)),
4851            (VMOVNTDQAZ128rm addr:$src)>;
4852  def : Pat<(v16i8 (alignednontemporalload addr:$src)),
4853            (VMOVNTDQAZ128rm addr:$src)>;
4854}
4855
4856//===----------------------------------------------------------------------===//
4857// AVX-512 - Integer arithmetic
4858//
4859multiclass avx512_binop_rm<bits<8> opc, string OpcodeStr, SDNode OpNode,
4860                           X86VectorVTInfo _, X86FoldableSchedWrite sched,
4861                           bit IsCommutable = 0> {
4862  defm rr : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
4863                    (ins _.RC:$src1, _.RC:$src2), OpcodeStr,
4864                    "$src2, $src1", "$src1, $src2",
4865                    (_.VT (OpNode _.RC:$src1, _.RC:$src2)),
4866                    IsCommutable, IsCommutable>, AVX512BIBase, EVEX_4V,
4867                    Sched<[sched]>;
4868
4869  defm rm : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
4870                  (ins _.RC:$src1, _.MemOp:$src2), OpcodeStr,
4871                  "$src2, $src1", "$src1, $src2",
4872                  (_.VT (OpNode _.RC:$src1, (_.LdFrag addr:$src2)))>,
4873                  AVX512BIBase, EVEX_4V,
4874                  Sched<[sched.Folded, sched.ReadAfterFold]>;
4875}
4876
4877multiclass avx512_binop_rmb<bits<8> opc, string OpcodeStr, SDNode OpNode,
4878                            X86VectorVTInfo _, X86FoldableSchedWrite sched,
4879                            bit IsCommutable = 0> :
4880           avx512_binop_rm<opc, OpcodeStr, OpNode, _, sched, IsCommutable> {
4881  defm rmb : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
4882                  (ins _.RC:$src1, _.ScalarMemOp:$src2), OpcodeStr,
4883                  "${src2}"#_.BroadcastStr#", $src1",
4884                  "$src1, ${src2}"#_.BroadcastStr,
4885                  (_.VT (OpNode _.RC:$src1,
4886                                (_.BroadcastLdFrag addr:$src2)))>,
4887                  AVX512BIBase, EVEX_4V, EVEX_B,
4888                  Sched<[sched.Folded, sched.ReadAfterFold]>;
4889}
4890
4891multiclass avx512_binop_rm_vl<bits<8> opc, string OpcodeStr, SDNode OpNode,
4892                              AVX512VLVectorVTInfo VTInfo,
4893                              X86SchedWriteWidths sched, Predicate prd,
4894                              bit IsCommutable = 0> {
4895  let Predicates = [prd] in
4896    defm Z : avx512_binop_rm<opc, OpcodeStr, OpNode, VTInfo.info512, sched.ZMM,
4897                             IsCommutable>, EVEX_V512;
4898
4899  let Predicates = [prd, HasVLX] in {
4900    defm Z256 : avx512_binop_rm<opc, OpcodeStr, OpNode, VTInfo.info256,
4901                                sched.YMM, IsCommutable>, EVEX_V256;
4902    defm Z128 : avx512_binop_rm<opc, OpcodeStr, OpNode, VTInfo.info128,
4903                                sched.XMM, IsCommutable>, EVEX_V128;
4904  }
4905}
4906
4907multiclass avx512_binop_rmb_vl<bits<8> opc, string OpcodeStr, SDNode OpNode,
4908                               AVX512VLVectorVTInfo VTInfo,
4909                               X86SchedWriteWidths sched, Predicate prd,
4910                               bit IsCommutable = 0> {
4911  let Predicates = [prd] in
4912    defm Z : avx512_binop_rmb<opc, OpcodeStr, OpNode, VTInfo.info512, sched.ZMM,
4913                             IsCommutable>, EVEX_V512;
4914
4915  let Predicates = [prd, HasVLX] in {
4916    defm Z256 : avx512_binop_rmb<opc, OpcodeStr, OpNode, VTInfo.info256,
4917                                 sched.YMM, IsCommutable>, EVEX_V256;
4918    defm Z128 : avx512_binop_rmb<opc, OpcodeStr, OpNode, VTInfo.info128,
4919                                 sched.XMM, IsCommutable>, EVEX_V128;
4920  }
4921}
4922
4923multiclass avx512_binop_rm_vl_q<bits<8> opc, string OpcodeStr, SDNode OpNode,
4924                                X86SchedWriteWidths sched, Predicate prd,
4925                                bit IsCommutable = 0> {
4926  defm NAME : avx512_binop_rmb_vl<opc, OpcodeStr, OpNode, avx512vl_i64_info,
4927                                  sched, prd, IsCommutable>,
4928                                  VEX_W, EVEX_CD8<64, CD8VF>;
4929}
4930
4931multiclass avx512_binop_rm_vl_d<bits<8> opc, string OpcodeStr, SDNode OpNode,
4932                                X86SchedWriteWidths sched, Predicate prd,
4933                                bit IsCommutable = 0> {
4934  defm NAME : avx512_binop_rmb_vl<opc, OpcodeStr, OpNode, avx512vl_i32_info,
4935                                  sched, prd, IsCommutable>, EVEX_CD8<32, CD8VF>;
4936}
4937
4938multiclass avx512_binop_rm_vl_w<bits<8> opc, string OpcodeStr, SDNode OpNode,
4939                                X86SchedWriteWidths sched, Predicate prd,
4940                                bit IsCommutable = 0> {
4941  defm NAME : avx512_binop_rm_vl<opc, OpcodeStr, OpNode, avx512vl_i16_info,
4942                                 sched, prd, IsCommutable>, EVEX_CD8<16, CD8VF>,
4943                                 VEX_WIG;
4944}
4945
4946multiclass avx512_binop_rm_vl_b<bits<8> opc, string OpcodeStr, SDNode OpNode,
4947                                X86SchedWriteWidths sched, Predicate prd,
4948                                bit IsCommutable = 0> {
4949  defm NAME : avx512_binop_rm_vl<opc, OpcodeStr, OpNode, avx512vl_i8_info,
4950                                 sched, prd, IsCommutable>, EVEX_CD8<8, CD8VF>,
4951                                 VEX_WIG;
4952}
4953
4954multiclass avx512_binop_rm_vl_dq<bits<8> opc_d, bits<8> opc_q, string OpcodeStr,
4955                                 SDNode OpNode, X86SchedWriteWidths sched,
4956                                 Predicate prd, bit IsCommutable = 0> {
4957  defm Q : avx512_binop_rm_vl_q<opc_q, OpcodeStr#"q", OpNode, sched, prd,
4958                                   IsCommutable>;
4959
4960  defm D : avx512_binop_rm_vl_d<opc_d, OpcodeStr#"d", OpNode, sched, prd,
4961                                   IsCommutable>;
4962}
4963
4964multiclass avx512_binop_rm_vl_bw<bits<8> opc_b, bits<8> opc_w, string OpcodeStr,
4965                                 SDNode OpNode, X86SchedWriteWidths sched,
4966                                 Predicate prd, bit IsCommutable = 0> {
4967  defm W : avx512_binop_rm_vl_w<opc_w, OpcodeStr#"w", OpNode, sched, prd,
4968                                   IsCommutable>;
4969
4970  defm B : avx512_binop_rm_vl_b<opc_b, OpcodeStr#"b", OpNode, sched, prd,
4971                                   IsCommutable>;
4972}
4973
4974multiclass avx512_binop_rm_vl_all<bits<8> opc_b, bits<8> opc_w,
4975                                  bits<8> opc_d, bits<8> opc_q,
4976                                  string OpcodeStr, SDNode OpNode,
4977                                  X86SchedWriteWidths sched,
4978                                  bit IsCommutable = 0> {
4979  defm NAME : avx512_binop_rm_vl_dq<opc_d, opc_q, OpcodeStr, OpNode,
4980                                    sched, HasAVX512, IsCommutable>,
4981              avx512_binop_rm_vl_bw<opc_b, opc_w, OpcodeStr, OpNode,
4982                                    sched, HasBWI, IsCommutable>;
4983}
4984
4985multiclass avx512_binop_rm2<bits<8> opc, string OpcodeStr,
4986                            X86FoldableSchedWrite sched,
4987                            SDNode OpNode,X86VectorVTInfo _Src,
4988                            X86VectorVTInfo _Dst, X86VectorVTInfo _Brdct,
4989                            bit IsCommutable = 0> {
4990  defm rr : AVX512_maskable<opc, MRMSrcReg, _Dst, (outs _Dst.RC:$dst),
4991                            (ins _Src.RC:$src1, _Src.RC:$src2), OpcodeStr,
4992                            "$src2, $src1","$src1, $src2",
4993                            (_Dst.VT (OpNode
4994                                         (_Src.VT _Src.RC:$src1),
4995                                         (_Src.VT _Src.RC:$src2))),
4996                            IsCommutable>,
4997                            AVX512BIBase, EVEX_4V, Sched<[sched]>;
4998  defm rm : AVX512_maskable<opc, MRMSrcMem, _Dst, (outs _Dst.RC:$dst),
4999                        (ins _Src.RC:$src1, _Src.MemOp:$src2), OpcodeStr,
5000                        "$src2, $src1", "$src1, $src2",
5001                        (_Dst.VT (OpNode (_Src.VT _Src.RC:$src1),
5002                                      (_Src.LdFrag addr:$src2)))>,
5003                        AVX512BIBase, EVEX_4V,
5004                        Sched<[sched.Folded, sched.ReadAfterFold]>;
5005
5006  defm rmb : AVX512_maskable<opc, MRMSrcMem, _Dst, (outs _Dst.RC:$dst),
5007                    (ins _Src.RC:$src1, _Brdct.ScalarMemOp:$src2),
5008                    OpcodeStr,
5009                    "${src2}"#_Brdct.BroadcastStr#", $src1",
5010                     "$src1, ${src2}"#_Brdct.BroadcastStr,
5011                    (_Dst.VT (OpNode (_Src.VT _Src.RC:$src1), (bitconvert
5012                                 (_Brdct.VT (_Brdct.BroadcastLdFrag addr:$src2)))))>,
5013                    AVX512BIBase, EVEX_4V, EVEX_B,
5014                    Sched<[sched.Folded, sched.ReadAfterFold]>;
5015}
5016
5017defm VPADD : avx512_binop_rm_vl_all<0xFC, 0xFD, 0xFE, 0xD4, "vpadd", add,
5018                                    SchedWriteVecALU, 1>;
5019defm VPSUB : avx512_binop_rm_vl_all<0xF8, 0xF9, 0xFA, 0xFB, "vpsub", sub,
5020                                    SchedWriteVecALU, 0>;
5021defm VPADDS : avx512_binop_rm_vl_bw<0xEC, 0xED, "vpadds", saddsat,
5022                                    SchedWriteVecALU, HasBWI, 1>;
5023defm VPSUBS : avx512_binop_rm_vl_bw<0xE8, 0xE9, "vpsubs", ssubsat,
5024                                    SchedWriteVecALU, HasBWI, 0>;
5025defm VPADDUS : avx512_binop_rm_vl_bw<0xDC, 0xDD, "vpaddus", uaddsat,
5026                                     SchedWriteVecALU, HasBWI, 1>;
5027defm VPSUBUS : avx512_binop_rm_vl_bw<0xD8, 0xD9, "vpsubus", usubsat,
5028                                     SchedWriteVecALU, HasBWI, 0>;
5029defm VPMULLD : avx512_binop_rm_vl_d<0x40, "vpmulld", mul,
5030                                    SchedWritePMULLD, HasAVX512, 1>, T8PD;
5031defm VPMULLW : avx512_binop_rm_vl_w<0xD5, "vpmullw", mul,
5032                                    SchedWriteVecIMul, HasBWI, 1>;
5033defm VPMULLQ : avx512_binop_rm_vl_q<0x40, "vpmullq", mul,
5034                                    SchedWriteVecIMul, HasDQI, 1>, T8PD,
5035                                    NotEVEX2VEXConvertible;
5036defm VPMULHW : avx512_binop_rm_vl_w<0xE5, "vpmulhw", mulhs, SchedWriteVecIMul,
5037                                    HasBWI, 1>;
5038defm VPMULHUW : avx512_binop_rm_vl_w<0xE4, "vpmulhuw", mulhu, SchedWriteVecIMul,
5039                                     HasBWI, 1>;
5040defm VPMULHRSW : avx512_binop_rm_vl_w<0x0B, "vpmulhrsw", X86mulhrs,
5041                                      SchedWriteVecIMul, HasBWI, 1>, T8PD;
5042defm VPAVG : avx512_binop_rm_vl_bw<0xE0, 0xE3, "vpavg", X86avg,
5043                                   SchedWriteVecALU, HasBWI, 1>;
5044defm VPMULDQ : avx512_binop_rm_vl_q<0x28, "vpmuldq", X86pmuldq,
5045                                    SchedWriteVecIMul, HasAVX512, 1>, T8PD;
5046defm VPMULUDQ : avx512_binop_rm_vl_q<0xF4, "vpmuludq", X86pmuludq,
5047                                     SchedWriteVecIMul, HasAVX512, 1>;
5048
5049multiclass avx512_binop_all<bits<8> opc, string OpcodeStr,
5050                            X86SchedWriteWidths sched,
5051                            AVX512VLVectorVTInfo _SrcVTInfo,
5052                            AVX512VLVectorVTInfo _DstVTInfo,
5053                            SDNode OpNode, Predicate prd,  bit IsCommutable = 0> {
5054  let Predicates = [prd] in
5055    defm NAME#Z : avx512_binop_rm2<opc, OpcodeStr, sched.ZMM, OpNode,
5056                                 _SrcVTInfo.info512, _DstVTInfo.info512,
5057                                 v8i64_info, IsCommutable>,
5058                                  EVEX_V512, EVEX_CD8<64, CD8VF>, VEX_W;
5059  let Predicates = [HasVLX, prd] in {
5060    defm NAME#Z256 : avx512_binop_rm2<opc, OpcodeStr, sched.YMM, OpNode,
5061                                      _SrcVTInfo.info256, _DstVTInfo.info256,
5062                                      v4i64x_info, IsCommutable>,
5063                                      EVEX_V256, EVEX_CD8<64, CD8VF>, VEX_W;
5064    defm NAME#Z128 : avx512_binop_rm2<opc, OpcodeStr, sched.XMM, OpNode,
5065                                      _SrcVTInfo.info128, _DstVTInfo.info128,
5066                                      v2i64x_info, IsCommutable>,
5067                                     EVEX_V128, EVEX_CD8<64, CD8VF>, VEX_W;
5068  }
5069}
5070
5071defm VPMULTISHIFTQB : avx512_binop_all<0x83, "vpmultishiftqb", SchedWriteVecALU,
5072                                avx512vl_i8_info, avx512vl_i8_info,
5073                                X86multishift, HasVBMI, 0>, T8PD;
5074
5075multiclass avx512_packs_rmb<bits<8> opc, string OpcodeStr, SDNode OpNode,
5076                            X86VectorVTInfo _Src, X86VectorVTInfo _Dst,
5077                            X86FoldableSchedWrite sched> {
5078  defm rmb : AVX512_maskable<opc, MRMSrcMem, _Dst, (outs _Dst.RC:$dst),
5079                    (ins _Src.RC:$src1, _Src.ScalarMemOp:$src2),
5080                    OpcodeStr,
5081                    "${src2}"#_Src.BroadcastStr#", $src1",
5082                     "$src1, ${src2}"#_Src.BroadcastStr,
5083                    (_Dst.VT (OpNode (_Src.VT _Src.RC:$src1), (bitconvert
5084                                 (_Src.VT (_Src.BroadcastLdFrag addr:$src2)))))>,
5085                    EVEX_4V, EVEX_B, EVEX_CD8<_Src.EltSize, CD8VF>,
5086                    Sched<[sched.Folded, sched.ReadAfterFold]>;
5087}
5088
5089multiclass avx512_packs_rm<bits<8> opc, string OpcodeStr,
5090                            SDNode OpNode,X86VectorVTInfo _Src,
5091                            X86VectorVTInfo _Dst, X86FoldableSchedWrite sched,
5092                            bit IsCommutable = 0> {
5093  defm rr : AVX512_maskable<opc, MRMSrcReg, _Dst, (outs _Dst.RC:$dst),
5094                            (ins _Src.RC:$src1, _Src.RC:$src2), OpcodeStr,
5095                            "$src2, $src1","$src1, $src2",
5096                            (_Dst.VT (OpNode
5097                                         (_Src.VT _Src.RC:$src1),
5098                                         (_Src.VT _Src.RC:$src2))),
5099                            IsCommutable, IsCommutable>,
5100                            EVEX_CD8<_Src.EltSize, CD8VF>, EVEX_4V, Sched<[sched]>;
5101  defm rm : AVX512_maskable<opc, MRMSrcMem, _Dst, (outs _Dst.RC:$dst),
5102                        (ins _Src.RC:$src1, _Src.MemOp:$src2), OpcodeStr,
5103                        "$src2, $src1", "$src1, $src2",
5104                        (_Dst.VT (OpNode (_Src.VT _Src.RC:$src1),
5105                                      (_Src.LdFrag addr:$src2)))>,
5106                         EVEX_4V, EVEX_CD8<_Src.EltSize, CD8VF>,
5107                         Sched<[sched.Folded, sched.ReadAfterFold]>;
5108}
5109
5110multiclass avx512_packs_all_i32_i16<bits<8> opc, string OpcodeStr,
5111                                    SDNode OpNode> {
5112  let Predicates = [HasBWI] in
5113  defm NAME#Z : avx512_packs_rm<opc, OpcodeStr, OpNode, v16i32_info,
5114                                 v32i16_info, SchedWriteShuffle.ZMM>,
5115                avx512_packs_rmb<opc, OpcodeStr, OpNode, v16i32_info,
5116                                 v32i16_info, SchedWriteShuffle.ZMM>, EVEX_V512;
5117  let Predicates = [HasBWI, HasVLX] in {
5118    defm NAME#Z256 : avx512_packs_rm<opc, OpcodeStr, OpNode, v8i32x_info,
5119                                     v16i16x_info, SchedWriteShuffle.YMM>,
5120                     avx512_packs_rmb<opc, OpcodeStr, OpNode, v8i32x_info,
5121                                      v16i16x_info, SchedWriteShuffle.YMM>,
5122                                      EVEX_V256;
5123    defm NAME#Z128 : avx512_packs_rm<opc, OpcodeStr, OpNode, v4i32x_info,
5124                                     v8i16x_info, SchedWriteShuffle.XMM>,
5125                     avx512_packs_rmb<opc, OpcodeStr, OpNode, v4i32x_info,
5126                                      v8i16x_info, SchedWriteShuffle.XMM>,
5127                                      EVEX_V128;
5128  }
5129}
5130multiclass avx512_packs_all_i16_i8<bits<8> opc, string OpcodeStr,
5131                            SDNode OpNode> {
5132  let Predicates = [HasBWI] in
5133  defm NAME#Z : avx512_packs_rm<opc, OpcodeStr, OpNode, v32i16_info, v64i8_info,
5134                                SchedWriteShuffle.ZMM>, EVEX_V512, VEX_WIG;
5135  let Predicates = [HasBWI, HasVLX] in {
5136    defm NAME#Z256 : avx512_packs_rm<opc, OpcodeStr, OpNode, v16i16x_info,
5137                                     v32i8x_info, SchedWriteShuffle.YMM>,
5138                                     EVEX_V256, VEX_WIG;
5139    defm NAME#Z128 : avx512_packs_rm<opc, OpcodeStr, OpNode, v8i16x_info,
5140                                     v16i8x_info, SchedWriteShuffle.XMM>,
5141                                     EVEX_V128, VEX_WIG;
5142  }
5143}
5144
5145multiclass avx512_vpmadd<bits<8> opc, string OpcodeStr,
5146                            SDNode OpNode, AVX512VLVectorVTInfo _Src,
5147                            AVX512VLVectorVTInfo _Dst, bit IsCommutable = 0> {
5148  let Predicates = [HasBWI] in
5149  defm NAME#Z : avx512_packs_rm<opc, OpcodeStr, OpNode, _Src.info512,
5150                                _Dst.info512, SchedWriteVecIMul.ZMM,
5151                                IsCommutable>, EVEX_V512;
5152  let Predicates = [HasBWI, HasVLX] in {
5153    defm NAME#Z256 : avx512_packs_rm<opc, OpcodeStr, OpNode, _Src.info256,
5154                                     _Dst.info256, SchedWriteVecIMul.YMM,
5155                                     IsCommutable>, EVEX_V256;
5156    defm NAME#Z128 : avx512_packs_rm<opc, OpcodeStr, OpNode, _Src.info128,
5157                                     _Dst.info128, SchedWriteVecIMul.XMM,
5158                                     IsCommutable>, EVEX_V128;
5159  }
5160}
5161
5162defm VPACKSSDW : avx512_packs_all_i32_i16<0x6B, "vpackssdw", X86Packss>, AVX512BIBase;
5163defm VPACKUSDW : avx512_packs_all_i32_i16<0x2b, "vpackusdw", X86Packus>, AVX5128IBase;
5164defm VPACKSSWB : avx512_packs_all_i16_i8 <0x63, "vpacksswb", X86Packss>, AVX512BIBase;
5165defm VPACKUSWB : avx512_packs_all_i16_i8 <0x67, "vpackuswb", X86Packus>, AVX512BIBase;
5166
5167defm VPMADDUBSW : avx512_vpmadd<0x04, "vpmaddubsw", X86vpmaddubsw,
5168                     avx512vl_i8_info, avx512vl_i16_info>, AVX512BIBase, T8PD, VEX_WIG;
5169defm VPMADDWD   : avx512_vpmadd<0xF5, "vpmaddwd", X86vpmaddwd,
5170                     avx512vl_i16_info, avx512vl_i32_info, 1>, AVX512BIBase, VEX_WIG;
5171
5172defm VPMAXSB : avx512_binop_rm_vl_b<0x3C, "vpmaxsb", smax,
5173                                    SchedWriteVecALU, HasBWI, 1>, T8PD;
5174defm VPMAXSW : avx512_binop_rm_vl_w<0xEE, "vpmaxsw", smax,
5175                                    SchedWriteVecALU, HasBWI, 1>;
5176defm VPMAXSD : avx512_binop_rm_vl_d<0x3D, "vpmaxsd", smax,
5177                                    SchedWriteVecALU, HasAVX512, 1>, T8PD;
5178defm VPMAXSQ : avx512_binop_rm_vl_q<0x3D, "vpmaxsq", smax,
5179                                    SchedWriteVecALU, HasAVX512, 1>, T8PD,
5180                                    NotEVEX2VEXConvertible;
5181
5182defm VPMAXUB : avx512_binop_rm_vl_b<0xDE, "vpmaxub", umax,
5183                                    SchedWriteVecALU, HasBWI, 1>;
5184defm VPMAXUW : avx512_binop_rm_vl_w<0x3E, "vpmaxuw", umax,
5185                                    SchedWriteVecALU, HasBWI, 1>, T8PD;
5186defm VPMAXUD : avx512_binop_rm_vl_d<0x3F, "vpmaxud", umax,
5187                                    SchedWriteVecALU, HasAVX512, 1>, T8PD;
5188defm VPMAXUQ : avx512_binop_rm_vl_q<0x3F, "vpmaxuq", umax,
5189                                    SchedWriteVecALU, HasAVX512, 1>, T8PD,
5190                                    NotEVEX2VEXConvertible;
5191
5192defm VPMINSB : avx512_binop_rm_vl_b<0x38, "vpminsb", smin,
5193                                    SchedWriteVecALU, HasBWI, 1>, T8PD;
5194defm VPMINSW : avx512_binop_rm_vl_w<0xEA, "vpminsw", smin,
5195                                    SchedWriteVecALU, HasBWI, 1>;
5196defm VPMINSD : avx512_binop_rm_vl_d<0x39, "vpminsd", smin,
5197                                    SchedWriteVecALU, HasAVX512, 1>, T8PD;
5198defm VPMINSQ : avx512_binop_rm_vl_q<0x39, "vpminsq", smin,
5199                                    SchedWriteVecALU, HasAVX512, 1>, T8PD,
5200                                    NotEVEX2VEXConvertible;
5201
5202defm VPMINUB : avx512_binop_rm_vl_b<0xDA, "vpminub", umin,
5203                                    SchedWriteVecALU, HasBWI, 1>;
5204defm VPMINUW : avx512_binop_rm_vl_w<0x3A, "vpminuw", umin,
5205                                    SchedWriteVecALU, HasBWI, 1>, T8PD;
5206defm VPMINUD : avx512_binop_rm_vl_d<0x3B, "vpminud", umin,
5207                                    SchedWriteVecALU, HasAVX512, 1>, T8PD;
5208defm VPMINUQ : avx512_binop_rm_vl_q<0x3B, "vpminuq", umin,
5209                                    SchedWriteVecALU, HasAVX512, 1>, T8PD,
5210                                    NotEVEX2VEXConvertible;
5211
5212// PMULLQ: Use 512bit version to implement 128/256 bit in case NoVLX.
5213let Predicates = [HasDQI, NoVLX] in {
5214  def : Pat<(v4i64 (mul (v4i64 VR256X:$src1), (v4i64 VR256X:$src2))),
5215            (EXTRACT_SUBREG
5216                (VPMULLQZrr
5217                    (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src1, sub_ymm),
5218                    (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src2, sub_ymm)),
5219             sub_ymm)>;
5220  def : Pat<(v4i64 (mul (v4i64 VR256X:$src1), (v4i64 (X86VBroadcastld64 addr:$src2)))),
5221            (EXTRACT_SUBREG
5222                (VPMULLQZrmb
5223                    (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src1, sub_ymm),
5224                    addr:$src2),
5225             sub_ymm)>;
5226
5227  def : Pat<(v2i64 (mul (v2i64 VR128X:$src1), (v2i64 VR128X:$src2))),
5228            (EXTRACT_SUBREG
5229                (VPMULLQZrr
5230                    (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src1, sub_xmm),
5231                    (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src2, sub_xmm)),
5232             sub_xmm)>;
5233  def : Pat<(v2i64 (mul (v2i64 VR128X:$src1), (v2i64 (X86VBroadcastld64 addr:$src2)))),
5234            (EXTRACT_SUBREG
5235                (VPMULLQZrmb
5236                    (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src1, sub_xmm),
5237                    addr:$src2),
5238             sub_xmm)>;
5239}
5240
5241multiclass avx512_min_max_lowering<string Instr, SDNode OpNode> {
5242  def : Pat<(v4i64 (OpNode VR256X:$src1, VR256X:$src2)),
5243            (EXTRACT_SUBREG
5244                (!cast<Instruction>(Instr#"rr")
5245                    (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src1, sub_ymm),
5246                    (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src2, sub_ymm)),
5247             sub_ymm)>;
5248  def : Pat<(v4i64 (OpNode (v4i64 VR256X:$src1), (v4i64 (X86VBroadcastld64 addr:$src2)))),
5249            (EXTRACT_SUBREG
5250                (!cast<Instruction>(Instr#"rmb")
5251                    (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src1, sub_ymm),
5252                    addr:$src2),
5253             sub_ymm)>;
5254
5255  def : Pat<(v2i64 (OpNode VR128X:$src1, VR128X:$src2)),
5256            (EXTRACT_SUBREG
5257                (!cast<Instruction>(Instr#"rr")
5258                    (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src1, sub_xmm),
5259                    (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src2, sub_xmm)),
5260             sub_xmm)>;
5261  def : Pat<(v2i64 (OpNode (v2i64 VR128X:$src1), (v2i64 (X86VBroadcastld64 addr:$src2)))),
5262            (EXTRACT_SUBREG
5263                (!cast<Instruction>(Instr#"rmb")
5264                    (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src1, sub_xmm),
5265                    addr:$src2),
5266             sub_xmm)>;
5267}
5268
5269let Predicates = [HasAVX512, NoVLX] in {
5270  defm : avx512_min_max_lowering<"VPMAXUQZ", umax>;
5271  defm : avx512_min_max_lowering<"VPMINUQZ", umin>;
5272  defm : avx512_min_max_lowering<"VPMAXSQZ", smax>;
5273  defm : avx512_min_max_lowering<"VPMINSQZ", smin>;
5274}
5275
5276//===----------------------------------------------------------------------===//
5277// AVX-512  Logical Instructions
5278//===----------------------------------------------------------------------===//
5279
5280defm VPAND : avx512_binop_rm_vl_dq<0xDB, 0xDB, "vpand", and,
5281                                   SchedWriteVecLogic, HasAVX512, 1>;
5282defm VPOR : avx512_binop_rm_vl_dq<0xEB, 0xEB, "vpor", or,
5283                                  SchedWriteVecLogic, HasAVX512, 1>;
5284defm VPXOR : avx512_binop_rm_vl_dq<0xEF, 0xEF, "vpxor", xor,
5285                                   SchedWriteVecLogic, HasAVX512, 1>;
5286defm VPANDN : avx512_binop_rm_vl_dq<0xDF, 0xDF, "vpandn", X86andnp,
5287                                    SchedWriteVecLogic, HasAVX512>;
5288
5289let Predicates = [HasVLX] in {
5290  def : Pat<(v16i8 (and VR128X:$src1, VR128X:$src2)),
5291            (VPANDQZ128rr VR128X:$src1, VR128X:$src2)>;
5292  def : Pat<(v8i16 (and VR128X:$src1, VR128X:$src2)),
5293            (VPANDQZ128rr VR128X:$src1, VR128X:$src2)>;
5294
5295  def : Pat<(v16i8 (or VR128X:$src1, VR128X:$src2)),
5296            (VPORQZ128rr VR128X:$src1, VR128X:$src2)>;
5297  def : Pat<(v8i16 (or VR128X:$src1, VR128X:$src2)),
5298            (VPORQZ128rr VR128X:$src1, VR128X:$src2)>;
5299
5300  def : Pat<(v16i8 (xor VR128X:$src1, VR128X:$src2)),
5301            (VPXORQZ128rr VR128X:$src1, VR128X:$src2)>;
5302  def : Pat<(v8i16 (xor VR128X:$src1, VR128X:$src2)),
5303            (VPXORQZ128rr VR128X:$src1, VR128X:$src2)>;
5304
5305  def : Pat<(v16i8 (X86andnp VR128X:$src1, VR128X:$src2)),
5306            (VPANDNQZ128rr VR128X:$src1, VR128X:$src2)>;
5307  def : Pat<(v8i16 (X86andnp VR128X:$src1, VR128X:$src2)),
5308            (VPANDNQZ128rr VR128X:$src1, VR128X:$src2)>;
5309
5310  def : Pat<(and VR128X:$src1, (loadv16i8 addr:$src2)),
5311            (VPANDQZ128rm VR128X:$src1, addr:$src2)>;
5312  def : Pat<(and VR128X:$src1, (loadv8i16 addr:$src2)),
5313            (VPANDQZ128rm VR128X:$src1, addr:$src2)>;
5314
5315  def : Pat<(or VR128X:$src1, (loadv16i8 addr:$src2)),
5316            (VPORQZ128rm VR128X:$src1, addr:$src2)>;
5317  def : Pat<(or VR128X:$src1, (loadv8i16 addr:$src2)),
5318            (VPORQZ128rm VR128X:$src1, addr:$src2)>;
5319
5320  def : Pat<(xor VR128X:$src1, (loadv16i8 addr:$src2)),
5321            (VPXORQZ128rm VR128X:$src1, addr:$src2)>;
5322  def : Pat<(xor VR128X:$src1, (loadv8i16 addr:$src2)),
5323            (VPXORQZ128rm VR128X:$src1, addr:$src2)>;
5324
5325  def : Pat<(X86andnp VR128X:$src1, (loadv16i8 addr:$src2)),
5326            (VPANDNQZ128rm VR128X:$src1, addr:$src2)>;
5327  def : Pat<(X86andnp VR128X:$src1, (loadv8i16 addr:$src2)),
5328            (VPANDNQZ128rm VR128X:$src1, addr:$src2)>;
5329
5330  def : Pat<(v32i8 (and VR256X:$src1, VR256X:$src2)),
5331            (VPANDQZ256rr VR256X:$src1, VR256X:$src2)>;
5332  def : Pat<(v16i16 (and VR256X:$src1, VR256X:$src2)),
5333            (VPANDQZ256rr VR256X:$src1, VR256X:$src2)>;
5334
5335  def : Pat<(v32i8 (or VR256X:$src1, VR256X:$src2)),
5336            (VPORQZ256rr VR256X:$src1, VR256X:$src2)>;
5337  def : Pat<(v16i16 (or VR256X:$src1, VR256X:$src2)),
5338            (VPORQZ256rr VR256X:$src1, VR256X:$src2)>;
5339
5340  def : Pat<(v32i8 (xor VR256X:$src1, VR256X:$src2)),
5341            (VPXORQZ256rr VR256X:$src1, VR256X:$src2)>;
5342  def : Pat<(v16i16 (xor VR256X:$src1, VR256X:$src2)),
5343            (VPXORQZ256rr VR256X:$src1, VR256X:$src2)>;
5344
5345  def : Pat<(v32i8 (X86andnp VR256X:$src1, VR256X:$src2)),
5346            (VPANDNQZ256rr VR256X:$src1, VR256X:$src2)>;
5347  def : Pat<(v16i16 (X86andnp VR256X:$src1, VR256X:$src2)),
5348            (VPANDNQZ256rr VR256X:$src1, VR256X:$src2)>;
5349
5350  def : Pat<(and VR256X:$src1, (loadv32i8 addr:$src2)),
5351            (VPANDQZ256rm VR256X:$src1, addr:$src2)>;
5352  def : Pat<(and VR256X:$src1, (loadv16i16 addr:$src2)),
5353            (VPANDQZ256rm VR256X:$src1, addr:$src2)>;
5354
5355  def : Pat<(or VR256X:$src1, (loadv32i8 addr:$src2)),
5356            (VPORQZ256rm VR256X:$src1, addr:$src2)>;
5357  def : Pat<(or VR256X:$src1, (loadv16i16 addr:$src2)),
5358            (VPORQZ256rm VR256X:$src1, addr:$src2)>;
5359
5360  def : Pat<(xor VR256X:$src1, (loadv32i8 addr:$src2)),
5361            (VPXORQZ256rm VR256X:$src1, addr:$src2)>;
5362  def : Pat<(xor VR256X:$src1, (loadv16i16 addr:$src2)),
5363            (VPXORQZ256rm VR256X:$src1, addr:$src2)>;
5364
5365  def : Pat<(X86andnp VR256X:$src1, (loadv32i8 addr:$src2)),
5366            (VPANDNQZ256rm VR256X:$src1, addr:$src2)>;
5367  def : Pat<(X86andnp VR256X:$src1, (loadv16i16 addr:$src2)),
5368            (VPANDNQZ256rm VR256X:$src1, addr:$src2)>;
5369}
5370
5371let Predicates = [HasAVX512] in {
5372  def : Pat<(v64i8 (and VR512:$src1, VR512:$src2)),
5373            (VPANDQZrr VR512:$src1, VR512:$src2)>;
5374  def : Pat<(v32i16 (and VR512:$src1, VR512:$src2)),
5375            (VPANDQZrr VR512:$src1, VR512:$src2)>;
5376
5377  def : Pat<(v64i8 (or VR512:$src1, VR512:$src2)),
5378            (VPORQZrr VR512:$src1, VR512:$src2)>;
5379  def : Pat<(v32i16 (or VR512:$src1, VR512:$src2)),
5380            (VPORQZrr VR512:$src1, VR512:$src2)>;
5381
5382  def : Pat<(v64i8 (xor VR512:$src1, VR512:$src2)),
5383            (VPXORQZrr VR512:$src1, VR512:$src2)>;
5384  def : Pat<(v32i16 (xor VR512:$src1, VR512:$src2)),
5385            (VPXORQZrr VR512:$src1, VR512:$src2)>;
5386
5387  def : Pat<(v64i8 (X86andnp VR512:$src1, VR512:$src2)),
5388            (VPANDNQZrr VR512:$src1, VR512:$src2)>;
5389  def : Pat<(v32i16 (X86andnp VR512:$src1, VR512:$src2)),
5390            (VPANDNQZrr VR512:$src1, VR512:$src2)>;
5391
5392  def : Pat<(and VR512:$src1, (loadv64i8 addr:$src2)),
5393            (VPANDQZrm VR512:$src1, addr:$src2)>;
5394  def : Pat<(and VR512:$src1, (loadv32i16 addr:$src2)),
5395            (VPANDQZrm VR512:$src1, addr:$src2)>;
5396
5397  def : Pat<(or VR512:$src1, (loadv64i8 addr:$src2)),
5398            (VPORQZrm VR512:$src1, addr:$src2)>;
5399  def : Pat<(or VR512:$src1, (loadv32i16 addr:$src2)),
5400            (VPORQZrm VR512:$src1, addr:$src2)>;
5401
5402  def : Pat<(xor VR512:$src1, (loadv64i8 addr:$src2)),
5403            (VPXORQZrm VR512:$src1, addr:$src2)>;
5404  def : Pat<(xor VR512:$src1, (loadv32i16 addr:$src2)),
5405            (VPXORQZrm VR512:$src1, addr:$src2)>;
5406
5407  def : Pat<(X86andnp VR512:$src1, (loadv64i8 addr:$src2)),
5408            (VPANDNQZrm VR512:$src1, addr:$src2)>;
5409  def : Pat<(X86andnp VR512:$src1, (loadv32i16 addr:$src2)),
5410            (VPANDNQZrm VR512:$src1, addr:$src2)>;
5411}
5412
5413// Patterns to catch vselect with different type than logic op.
5414multiclass avx512_logical_lowering<string InstrStr, SDNode OpNode,
5415                                    X86VectorVTInfo _,
5416                                    X86VectorVTInfo IntInfo> {
5417  // Masked register-register logical operations.
5418  def : Pat<(_.VT (vselect_mask _.KRCWM:$mask,
5419                   (bitconvert (IntInfo.VT (OpNode _.RC:$src1, _.RC:$src2))),
5420                   _.RC:$src0)),
5421            (!cast<Instruction>(InstrStr#rrk) _.RC:$src0, _.KRCWM:$mask,
5422             _.RC:$src1, _.RC:$src2)>;
5423
5424  def : Pat<(_.VT (vselect_mask _.KRCWM:$mask,
5425                   (bitconvert (IntInfo.VT (OpNode _.RC:$src1, _.RC:$src2))),
5426                   _.ImmAllZerosV)),
5427            (!cast<Instruction>(InstrStr#rrkz) _.KRCWM:$mask, _.RC:$src1,
5428             _.RC:$src2)>;
5429
5430  // Masked register-memory logical operations.
5431  def : Pat<(_.VT (vselect_mask _.KRCWM:$mask,
5432                   (bitconvert (IntInfo.VT (OpNode _.RC:$src1,
5433                                            (load addr:$src2)))),
5434                   _.RC:$src0)),
5435            (!cast<Instruction>(InstrStr#rmk) _.RC:$src0, _.KRCWM:$mask,
5436             _.RC:$src1, addr:$src2)>;
5437  def : Pat<(_.VT (vselect_mask _.KRCWM:$mask,
5438                   (bitconvert (IntInfo.VT (OpNode _.RC:$src1,
5439                                            (load addr:$src2)))),
5440                   _.ImmAllZerosV)),
5441            (!cast<Instruction>(InstrStr#rmkz) _.KRCWM:$mask, _.RC:$src1,
5442             addr:$src2)>;
5443}
5444
5445multiclass avx512_logical_lowering_bcast<string InstrStr, SDNode OpNode,
5446                                         X86VectorVTInfo _,
5447                                         X86VectorVTInfo IntInfo> {
5448  // Register-broadcast logical operations.
5449  def : Pat<(_.VT (vselect_mask _.KRCWM:$mask,
5450                   (bitconvert
5451                    (IntInfo.VT (OpNode _.RC:$src1,
5452                                 (IntInfo.VT (IntInfo.BroadcastLdFrag addr:$src2))))),
5453                   _.RC:$src0)),
5454            (!cast<Instruction>(InstrStr#rmbk) _.RC:$src0, _.KRCWM:$mask,
5455             _.RC:$src1, addr:$src2)>;
5456  def : Pat<(_.VT (vselect_mask _.KRCWM:$mask,
5457                   (bitconvert
5458                    (IntInfo.VT (OpNode _.RC:$src1,
5459                                 (IntInfo.VT (IntInfo.BroadcastLdFrag addr:$src2))))),
5460                   _.ImmAllZerosV)),
5461            (!cast<Instruction>(InstrStr#rmbkz)  _.KRCWM:$mask,
5462             _.RC:$src1, addr:$src2)>;
5463}
5464
5465multiclass avx512_logical_lowering_sizes<string InstrStr, SDNode OpNode,
5466                                         AVX512VLVectorVTInfo SelectInfo,
5467                                         AVX512VLVectorVTInfo IntInfo> {
5468let Predicates = [HasVLX] in {
5469  defm : avx512_logical_lowering<InstrStr#"Z128", OpNode, SelectInfo.info128,
5470                                 IntInfo.info128>;
5471  defm : avx512_logical_lowering<InstrStr#"Z256", OpNode, SelectInfo.info256,
5472                                 IntInfo.info256>;
5473}
5474let Predicates = [HasAVX512] in {
5475  defm : avx512_logical_lowering<InstrStr#"Z", OpNode, SelectInfo.info512,
5476                                 IntInfo.info512>;
5477}
5478}
5479
5480multiclass avx512_logical_lowering_sizes_bcast<string InstrStr, SDNode OpNode,
5481                                               AVX512VLVectorVTInfo SelectInfo,
5482                                               AVX512VLVectorVTInfo IntInfo> {
5483let Predicates = [HasVLX] in {
5484  defm : avx512_logical_lowering_bcast<InstrStr#"Z128", OpNode,
5485                                       SelectInfo.info128, IntInfo.info128>;
5486  defm : avx512_logical_lowering_bcast<InstrStr#"Z256", OpNode,
5487                                       SelectInfo.info256, IntInfo.info256>;
5488}
5489let Predicates = [HasAVX512] in {
5490  defm : avx512_logical_lowering_bcast<InstrStr#"Z", OpNode,
5491                                       SelectInfo.info512, IntInfo.info512>;
5492}
5493}
5494
5495multiclass avx512_logical_lowering_types<string InstrStr, SDNode OpNode> {
5496  // i64 vselect with i32/i16/i8 logic op
5497  defm : avx512_logical_lowering_sizes<InstrStr#"Q", OpNode, avx512vl_i64_info,
5498                                       avx512vl_i32_info>;
5499  defm : avx512_logical_lowering_sizes<InstrStr#"Q", OpNode, avx512vl_i64_info,
5500                                       avx512vl_i16_info>;
5501  defm : avx512_logical_lowering_sizes<InstrStr#"Q", OpNode, avx512vl_i64_info,
5502                                       avx512vl_i8_info>;
5503
5504  // i32 vselect with i64/i16/i8 logic op
5505  defm : avx512_logical_lowering_sizes<InstrStr#"D", OpNode, avx512vl_i32_info,
5506                                       avx512vl_i64_info>;
5507  defm : avx512_logical_lowering_sizes<InstrStr#"D", OpNode, avx512vl_i32_info,
5508                                       avx512vl_i16_info>;
5509  defm : avx512_logical_lowering_sizes<InstrStr#"D", OpNode, avx512vl_i32_info,
5510                                       avx512vl_i8_info>;
5511
5512  // f32 vselect with i64/i32/i16/i8 logic op
5513  defm : avx512_logical_lowering_sizes<InstrStr#"D", OpNode, avx512vl_f32_info,
5514                                       avx512vl_i64_info>;
5515  defm : avx512_logical_lowering_sizes<InstrStr#"D", OpNode, avx512vl_f32_info,
5516                                       avx512vl_i32_info>;
5517  defm : avx512_logical_lowering_sizes<InstrStr#"D", OpNode, avx512vl_f32_info,
5518                                       avx512vl_i16_info>;
5519  defm : avx512_logical_lowering_sizes<InstrStr#"D", OpNode, avx512vl_f32_info,
5520                                       avx512vl_i8_info>;
5521
5522  // f64 vselect with i64/i32/i16/i8 logic op
5523  defm : avx512_logical_lowering_sizes<InstrStr#"Q", OpNode, avx512vl_f64_info,
5524                                       avx512vl_i64_info>;
5525  defm : avx512_logical_lowering_sizes<InstrStr#"Q", OpNode, avx512vl_f64_info,
5526                                       avx512vl_i32_info>;
5527  defm : avx512_logical_lowering_sizes<InstrStr#"Q", OpNode, avx512vl_f64_info,
5528                                       avx512vl_i16_info>;
5529  defm : avx512_logical_lowering_sizes<InstrStr#"Q", OpNode, avx512vl_f64_info,
5530                                       avx512vl_i8_info>;
5531
5532  defm : avx512_logical_lowering_sizes_bcast<InstrStr#"D", OpNode,
5533                                             avx512vl_f32_info,
5534                                             avx512vl_i32_info>;
5535  defm : avx512_logical_lowering_sizes_bcast<InstrStr#"Q", OpNode,
5536                                             avx512vl_f64_info,
5537                                             avx512vl_i64_info>;
5538}
5539
5540defm : avx512_logical_lowering_types<"VPAND", and>;
5541defm : avx512_logical_lowering_types<"VPOR",  or>;
5542defm : avx512_logical_lowering_types<"VPXOR", xor>;
5543defm : avx512_logical_lowering_types<"VPANDN", X86andnp>;
5544
5545//===----------------------------------------------------------------------===//
5546// AVX-512  FP arithmetic
5547//===----------------------------------------------------------------------===//
5548
5549multiclass avx512_fp_scalar<bits<8> opc, string OpcodeStr,X86VectorVTInfo _,
5550                            SDPatternOperator OpNode, SDNode VecNode,
5551                            X86FoldableSchedWrite sched, bit IsCommutable> {
5552  let ExeDomain = _.ExeDomain, Uses = [MXCSR], mayRaiseFPException = 1 in {
5553  defm rr_Int : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
5554                           (ins _.RC:$src1, _.RC:$src2), OpcodeStr,
5555                           "$src2, $src1", "$src1, $src2",
5556                           (_.VT (VecNode _.RC:$src1, _.RC:$src2))>,
5557                           Sched<[sched]>;
5558
5559  defm rm_Int : AVX512_maskable_scalar<opc, MRMSrcMem, _, (outs _.RC:$dst),
5560                         (ins _.RC:$src1, _.IntScalarMemOp:$src2), OpcodeStr,
5561                         "$src2, $src1", "$src1, $src2",
5562                         (_.VT (VecNode _.RC:$src1,
5563                                        (_.ScalarIntMemFrags addr:$src2)))>,
5564                         Sched<[sched.Folded, sched.ReadAfterFold]>;
5565  let isCodeGenOnly = 1, Predicates = [HasAVX512] in {
5566  def rr : I< opc, MRMSrcReg, (outs _.FRC:$dst),
5567                         (ins _.FRC:$src1, _.FRC:$src2),
5568                          OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}",
5569                          [(set _.FRC:$dst, (OpNode _.FRC:$src1, _.FRC:$src2))]>,
5570                          Sched<[sched]> {
5571    let isCommutable = IsCommutable;
5572  }
5573  def rm : I< opc, MRMSrcMem, (outs _.FRC:$dst),
5574                         (ins _.FRC:$src1, _.ScalarMemOp:$src2),
5575                         OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}",
5576                         [(set _.FRC:$dst, (OpNode _.FRC:$src1,
5577                         (_.ScalarLdFrag addr:$src2)))]>,
5578                         Sched<[sched.Folded, sched.ReadAfterFold]>;
5579  }
5580  }
5581}
5582
5583multiclass avx512_fp_scalar_round<bits<8> opc, string OpcodeStr,X86VectorVTInfo _,
5584                                  SDNode VecNode, X86FoldableSchedWrite sched> {
5585  let ExeDomain = _.ExeDomain, Uses = [MXCSR] in
5586  defm rrb_Int : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
5587                          (ins _.RC:$src1, _.RC:$src2, AVX512RC:$rc), OpcodeStr,
5588                          "$rc, $src2, $src1", "$src1, $src2, $rc",
5589                          (VecNode (_.VT _.RC:$src1), (_.VT _.RC:$src2),
5590                          (i32 timm:$rc))>,
5591                          EVEX_B, EVEX_RC, Sched<[sched]>;
5592}
5593multiclass avx512_fp_scalar_sae<bits<8> opc, string OpcodeStr,X86VectorVTInfo _,
5594                                SDNode OpNode, SDNode VecNode, SDNode SaeNode,
5595                                X86FoldableSchedWrite sched, bit IsCommutable,
5596                                string EVEX2VexOvrd> {
5597  let ExeDomain = _.ExeDomain in {
5598  defm rr_Int : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
5599                           (ins _.RC:$src1, _.RC:$src2), OpcodeStr,
5600                           "$src2, $src1", "$src1, $src2",
5601                           (_.VT (VecNode _.RC:$src1, _.RC:$src2))>,
5602                           Sched<[sched]>, SIMD_EXC;
5603
5604  defm rm_Int : AVX512_maskable_scalar<opc, MRMSrcMem, _, (outs _.RC:$dst),
5605                         (ins _.RC:$src1, _.IntScalarMemOp:$src2), OpcodeStr,
5606                         "$src2, $src1", "$src1, $src2",
5607                         (_.VT (VecNode _.RC:$src1,
5608                                        (_.ScalarIntMemFrags addr:$src2)))>,
5609                         Sched<[sched.Folded, sched.ReadAfterFold]>, SIMD_EXC;
5610
5611  let isCodeGenOnly = 1, Predicates = [HasAVX512],
5612      Uses = [MXCSR], mayRaiseFPException = 1 in {
5613  def rr : I< opc, MRMSrcReg, (outs _.FRC:$dst),
5614                         (ins _.FRC:$src1, _.FRC:$src2),
5615                          OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}",
5616                          [(set _.FRC:$dst, (OpNode _.FRC:$src1, _.FRC:$src2))]>,
5617                          Sched<[sched]>,
5618                          EVEX2VEXOverride<EVEX2VexOvrd#"rr"> {
5619    let isCommutable = IsCommutable;
5620  }
5621  def rm : I< opc, MRMSrcMem, (outs _.FRC:$dst),
5622                         (ins _.FRC:$src1, _.ScalarMemOp:$src2),
5623                         OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}",
5624                         [(set _.FRC:$dst, (OpNode _.FRC:$src1,
5625                         (_.ScalarLdFrag addr:$src2)))]>,
5626                         Sched<[sched.Folded, sched.ReadAfterFold]>,
5627                         EVEX2VEXOverride<EVEX2VexOvrd#"rm">;
5628  }
5629
5630  let Uses = [MXCSR] in
5631  defm rrb_Int : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
5632                            (ins _.RC:$src1, _.RC:$src2), OpcodeStr,
5633                            "{sae}, $src2, $src1", "$src1, $src2, {sae}",
5634                            (SaeNode (_.VT _.RC:$src1), (_.VT _.RC:$src2))>,
5635                            EVEX_B, Sched<[sched]>;
5636  }
5637}
5638
5639multiclass avx512_binop_s_round<bits<8> opc, string OpcodeStr, SDPatternOperator OpNode,
5640                                SDNode VecNode, SDNode RndNode,
5641                                X86SchedWriteSizes sched, bit IsCommutable> {
5642  defm SSZ : avx512_fp_scalar<opc, OpcodeStr#"ss", f32x_info, OpNode, VecNode,
5643                              sched.PS.Scl, IsCommutable>,
5644             avx512_fp_scalar_round<opc, OpcodeStr#"ss", f32x_info, RndNode,
5645                              sched.PS.Scl>,
5646                              XS, EVEX_4V, VEX_LIG,  EVEX_CD8<32, CD8VT1>;
5647  defm SDZ : avx512_fp_scalar<opc, OpcodeStr#"sd", f64x_info, OpNode, VecNode,
5648                              sched.PD.Scl, IsCommutable>,
5649             avx512_fp_scalar_round<opc, OpcodeStr#"sd", f64x_info, RndNode,
5650                              sched.PD.Scl>,
5651                              XD, VEX_W, EVEX_4V, VEX_LIG, EVEX_CD8<64, CD8VT1>;
5652  let Predicates = [HasFP16] in
5653    defm SHZ : avx512_fp_scalar<opc, OpcodeStr#"sh", f16x_info, OpNode,
5654                                VecNode, sched.PH.Scl, IsCommutable>,
5655               avx512_fp_scalar_round<opc, OpcodeStr#"sh", f16x_info, RndNode,
5656                                sched.PH.Scl>,
5657                                T_MAP5XS, EVEX_4V, VEX_LIG, EVEX_CD8<16, CD8VT1>;
5658}
5659
5660multiclass avx512_binop_s_sae<bits<8> opc, string OpcodeStr, SDNode OpNode,
5661                              SDNode VecNode, SDNode SaeNode,
5662                              X86SchedWriteSizes sched, bit IsCommutable> {
5663  defm SSZ : avx512_fp_scalar_sae<opc, OpcodeStr#"ss", f32x_info, OpNode,
5664                              VecNode, SaeNode, sched.PS.Scl, IsCommutable,
5665                              NAME#"SS">,
5666                              XS, EVEX_4V, VEX_LIG,  EVEX_CD8<32, CD8VT1>;
5667  defm SDZ : avx512_fp_scalar_sae<opc, OpcodeStr#"sd", f64x_info, OpNode,
5668                              VecNode, SaeNode, sched.PD.Scl, IsCommutable,
5669                              NAME#"SD">,
5670                              XD, VEX_W, EVEX_4V, VEX_LIG, EVEX_CD8<64, CD8VT1>;
5671  let Predicates = [HasFP16] in {
5672    defm SHZ : avx512_fp_scalar_sae<opc, OpcodeStr#"sh", f16x_info, OpNode,
5673                                VecNode, SaeNode, sched.PH.Scl, IsCommutable,
5674                                NAME#"SH">,
5675                                T_MAP5XS, EVEX_4V, VEX_LIG, EVEX_CD8<16, CD8VT1>,
5676                                NotEVEX2VEXConvertible;
5677  }
5678}
5679defm VADD : avx512_binop_s_round<0x58, "vadd", any_fadd, X86fadds, X86faddRnds,
5680                                 SchedWriteFAddSizes, 1>;
5681defm VMUL : avx512_binop_s_round<0x59, "vmul", any_fmul, X86fmuls, X86fmulRnds,
5682                                 SchedWriteFMulSizes, 1>;
5683defm VSUB : avx512_binop_s_round<0x5C, "vsub", any_fsub, X86fsubs, X86fsubRnds,
5684                                 SchedWriteFAddSizes, 0>;
5685defm VDIV : avx512_binop_s_round<0x5E, "vdiv", any_fdiv, X86fdivs, X86fdivRnds,
5686                                 SchedWriteFDivSizes, 0>;
5687defm VMIN : avx512_binop_s_sae<0x5D, "vmin", X86fmin, X86fmins, X86fminSAEs,
5688                               SchedWriteFCmpSizes, 0>;
5689defm VMAX : avx512_binop_s_sae<0x5F, "vmax", X86fmax, X86fmaxs, X86fmaxSAEs,
5690                               SchedWriteFCmpSizes, 0>;
5691
5692// MIN/MAX nodes are commutable under "unsafe-fp-math". In this case we use
5693// X86fminc and X86fmaxc instead of X86fmin and X86fmax
5694multiclass avx512_comutable_binop_s<bits<8> opc, string OpcodeStr,
5695                                    X86VectorVTInfo _, SDNode OpNode,
5696                                    X86FoldableSchedWrite sched,
5697                                    string EVEX2VEXOvrd> {
5698  let isCodeGenOnly = 1, Predicates = [HasAVX512], ExeDomain = _.ExeDomain in {
5699  def rr : I< opc, MRMSrcReg, (outs _.FRC:$dst),
5700                         (ins _.FRC:$src1, _.FRC:$src2),
5701                          OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}",
5702                          [(set _.FRC:$dst, (OpNode _.FRC:$src1, _.FRC:$src2))]>,
5703                          Sched<[sched]>, EVEX2VEXOverride<EVEX2VEXOvrd#"rr"> {
5704    let isCommutable = 1;
5705  }
5706  def rm : I< opc, MRMSrcMem, (outs _.FRC:$dst),
5707                         (ins _.FRC:$src1, _.ScalarMemOp:$src2),
5708                         OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}",
5709                         [(set _.FRC:$dst, (OpNode _.FRC:$src1,
5710                         (_.ScalarLdFrag addr:$src2)))]>,
5711                         Sched<[sched.Folded, sched.ReadAfterFold]>,
5712                         EVEX2VEXOverride<EVEX2VEXOvrd#"rm">;
5713  }
5714}
5715defm VMINCSSZ : avx512_comutable_binop_s<0x5D, "vminss", f32x_info, X86fminc,
5716                                         SchedWriteFCmp.Scl, "VMINCSS">, XS,
5717                                         EVEX_4V, VEX_LIG, EVEX_CD8<32, CD8VT1>, SIMD_EXC;
5718
5719defm VMINCSDZ : avx512_comutable_binop_s<0x5D, "vminsd", f64x_info, X86fminc,
5720                                         SchedWriteFCmp.Scl, "VMINCSD">, XD,
5721                                         VEX_W, EVEX_4V, VEX_LIG,
5722                                         EVEX_CD8<64, CD8VT1>, SIMD_EXC;
5723
5724defm VMAXCSSZ : avx512_comutable_binop_s<0x5F, "vmaxss", f32x_info, X86fmaxc,
5725                                         SchedWriteFCmp.Scl, "VMAXCSS">, XS,
5726                                         EVEX_4V, VEX_LIG, EVEX_CD8<32, CD8VT1>, SIMD_EXC;
5727
5728defm VMAXCSDZ : avx512_comutable_binop_s<0x5F, "vmaxsd", f64x_info, X86fmaxc,
5729                                         SchedWriteFCmp.Scl, "VMAXCSD">, XD,
5730                                         VEX_W, EVEX_4V, VEX_LIG,
5731                                         EVEX_CD8<64, CD8VT1>, SIMD_EXC;
5732
5733defm VMINCSHZ : avx512_comutable_binop_s<0x5D, "vminsh", f16x_info, X86fminc,
5734                                         SchedWriteFCmp.Scl, "VMINCSH">, T_MAP5XS,
5735                                         EVEX_4V, VEX_LIG, EVEX_CD8<16, CD8VT1>, SIMD_EXC,
5736                                         NotEVEX2VEXConvertible;
5737defm VMAXCSHZ : avx512_comutable_binop_s<0x5F, "vmaxsh", f16x_info, X86fmaxc,
5738                                         SchedWriteFCmp.Scl, "VMAXCSH">, T_MAP5XS,
5739                                         EVEX_4V, VEX_LIG, EVEX_CD8<16, CD8VT1>, SIMD_EXC,
5740                                         NotEVEX2VEXConvertible;
5741
5742multiclass avx512_fp_packed<bits<8> opc, string OpcodeStr, SDPatternOperator OpNode,
5743                            SDPatternOperator MaskOpNode,
5744                            X86VectorVTInfo _, X86FoldableSchedWrite sched,
5745                            bit IsCommutable,
5746                            bit IsKCommutable = IsCommutable,
5747                            string suffix = _.Suffix,
5748                            string ClobberConstraint = "",
5749                            bit MayRaiseFPException = 1> {
5750  let ExeDomain = _.ExeDomain, hasSideEffects = 0,
5751      Uses = [MXCSR], mayRaiseFPException = MayRaiseFPException in {
5752  defm rr: AVX512_maskable_split<opc, MRMSrcReg, _, (outs _.RC:$dst),
5753                                 (ins _.RC:$src1, _.RC:$src2), OpcodeStr#suffix,
5754                                 "$src2, $src1", "$src1, $src2",
5755                                 (_.VT (OpNode _.RC:$src1, _.RC:$src2)),
5756                                 (_.VT (MaskOpNode _.RC:$src1, _.RC:$src2)), ClobberConstraint,
5757                                 IsCommutable, IsKCommutable, IsKCommutable>, EVEX_4V, Sched<[sched]>;
5758  let mayLoad = 1 in {
5759    defm rm: AVX512_maskable_split<opc, MRMSrcMem, _, (outs _.RC:$dst),
5760                                   (ins _.RC:$src1, _.MemOp:$src2), OpcodeStr#suffix,
5761                                   "$src2, $src1", "$src1, $src2",
5762                                   (OpNode _.RC:$src1, (_.LdFrag addr:$src2)),
5763                                   (MaskOpNode _.RC:$src1, (_.LdFrag addr:$src2)),
5764                                   ClobberConstraint>, EVEX_4V, Sched<[sched.Folded, sched.ReadAfterFold]>;
5765    defm rmb: AVX512_maskable_split<opc, MRMSrcMem, _, (outs _.RC:$dst),
5766                                    (ins _.RC:$src1, _.ScalarMemOp:$src2), OpcodeStr#suffix,
5767                                    "${src2}"#_.BroadcastStr#", $src1",
5768                                    "$src1, ${src2}"#_.BroadcastStr,
5769                                    (OpNode  _.RC:$src1, (_.VT (_.BroadcastLdFrag addr:$src2))),
5770                                    (MaskOpNode  _.RC:$src1, (_.VT (_.BroadcastLdFrag addr:$src2))),
5771                                    ClobberConstraint>, EVEX_4V, EVEX_B, Sched<[sched.Folded, sched.ReadAfterFold]>;
5772    }
5773  }
5774}
5775
5776multiclass avx512_fp_round_packed<bits<8> opc, string OpcodeStr,
5777                                  SDPatternOperator OpNodeRnd,
5778                                  X86FoldableSchedWrite sched, X86VectorVTInfo _,
5779                                  string suffix = _.Suffix,
5780                                  string ClobberConstraint = ""> {
5781  let ExeDomain = _.ExeDomain, Uses = [MXCSR] in
5782  defm rrb: AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
5783                  (ins _.RC:$src1, _.RC:$src2, AVX512RC:$rc), OpcodeStr#suffix,
5784                  "$rc, $src2, $src1", "$src1, $src2, $rc",
5785                  (_.VT (OpNodeRnd _.RC:$src1, _.RC:$src2, (i32 timm:$rc))),
5786                  0, 0, 0, vselect_mask, ClobberConstraint>,
5787                  EVEX_4V, EVEX_B, EVEX_RC, Sched<[sched]>;
5788}
5789
5790multiclass avx512_fp_sae_packed<bits<8> opc, string OpcodeStr,
5791                                SDPatternOperator OpNodeSAE,
5792                                X86FoldableSchedWrite sched, X86VectorVTInfo _> {
5793  let ExeDomain = _.ExeDomain, Uses = [MXCSR] in
5794  defm rrb: AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
5795                  (ins _.RC:$src1, _.RC:$src2), OpcodeStr#_.Suffix,
5796                  "{sae}, $src2, $src1", "$src1, $src2, {sae}",
5797                  (_.VT (OpNodeSAE _.RC:$src1, _.RC:$src2))>,
5798                  EVEX_4V, EVEX_B, Sched<[sched]>;
5799}
5800
5801multiclass avx512_fp_binop_p<bits<8> opc, string OpcodeStr, SDPatternOperator OpNode,
5802                             SDPatternOperator MaskOpNode,
5803                             Predicate prd, X86SchedWriteSizes sched,
5804                             bit IsCommutable = 0,
5805                             bit IsPD128Commutable = IsCommutable> {
5806  let Predicates = [prd] in {
5807  defm PSZ : avx512_fp_packed<opc, OpcodeStr, OpNode, MaskOpNode, v16f32_info,
5808                              sched.PS.ZMM, IsCommutable>, EVEX_V512, PS,
5809                              EVEX_CD8<32, CD8VF>;
5810  defm PDZ : avx512_fp_packed<opc, OpcodeStr, OpNode, MaskOpNode, v8f64_info,
5811                              sched.PD.ZMM, IsCommutable>, EVEX_V512, PD, VEX_W,
5812                              EVEX_CD8<64, CD8VF>;
5813  }
5814
5815    // Define only if AVX512VL feature is present.
5816  let Predicates = [prd, HasVLX] in {
5817    defm PSZ128 : avx512_fp_packed<opc, OpcodeStr, OpNode, MaskOpNode, v4f32x_info,
5818                                   sched.PS.XMM, IsCommutable>, EVEX_V128, PS,
5819                                   EVEX_CD8<32, CD8VF>;
5820    defm PSZ256 : avx512_fp_packed<opc, OpcodeStr, OpNode, MaskOpNode, v8f32x_info,
5821                                   sched.PS.YMM, IsCommutable>, EVEX_V256, PS,
5822                                   EVEX_CD8<32, CD8VF>;
5823    defm PDZ128 : avx512_fp_packed<opc, OpcodeStr, OpNode, MaskOpNode, v2f64x_info,
5824                                   sched.PD.XMM, IsPD128Commutable,
5825                                   IsCommutable>, EVEX_V128, PD, VEX_W,
5826                                   EVEX_CD8<64, CD8VF>;
5827    defm PDZ256 : avx512_fp_packed<opc, OpcodeStr, OpNode, MaskOpNode, v4f64x_info,
5828                                   sched.PD.YMM, IsCommutable>, EVEX_V256, PD, VEX_W,
5829                                   EVEX_CD8<64, CD8VF>;
5830  }
5831}
5832
5833multiclass avx512_fp_binop_ph<bits<8> opc, string OpcodeStr, SDPatternOperator OpNode,
5834                              SDPatternOperator MaskOpNode,
5835                              X86SchedWriteSizes sched, bit IsCommutable = 0> {
5836  let Predicates = [HasFP16] in {
5837    defm PHZ : avx512_fp_packed<opc, OpcodeStr, OpNode, MaskOpNode, v32f16_info,
5838                                sched.PH.ZMM, IsCommutable>, EVEX_V512, T_MAP5PS,
5839                                EVEX_CD8<16, CD8VF>;
5840  }
5841  let Predicates = [HasVLX, HasFP16] in {
5842    defm PHZ128 : avx512_fp_packed<opc, OpcodeStr, OpNode, MaskOpNode, v8f16x_info,
5843                                   sched.PH.XMM, IsCommutable>, EVEX_V128, T_MAP5PS,
5844                                   EVEX_CD8<16, CD8VF>;
5845    defm PHZ256 : avx512_fp_packed<opc, OpcodeStr, OpNode, MaskOpNode, v16f16x_info,
5846                                   sched.PH.YMM, IsCommutable>, EVEX_V256, T_MAP5PS,
5847                                   EVEX_CD8<16, CD8VF>;
5848  }
5849}
5850
5851let Uses = [MXCSR] in
5852multiclass avx512_fp_binop_p_round<bits<8> opc, string OpcodeStr, SDNode OpNodeRnd,
5853                                   X86SchedWriteSizes sched> {
5854  let Predicates = [HasFP16] in {
5855    defm PHZ : avx512_fp_round_packed<opc, OpcodeStr, OpNodeRnd, sched.PH.ZMM,
5856                                      v32f16_info>,
5857                                      EVEX_V512, T_MAP5PS, EVEX_CD8<16, CD8VF>;
5858  }
5859  defm PSZ : avx512_fp_round_packed<opc, OpcodeStr, OpNodeRnd, sched.PS.ZMM,
5860                                    v16f32_info>,
5861                                    EVEX_V512, PS, EVEX_CD8<32, CD8VF>;
5862  defm PDZ : avx512_fp_round_packed<opc, OpcodeStr, OpNodeRnd, sched.PD.ZMM,
5863                                    v8f64_info>,
5864                                    EVEX_V512, PD, VEX_W,EVEX_CD8<64, CD8VF>;
5865}
5866
5867let Uses = [MXCSR] in
5868multiclass avx512_fp_binop_p_sae<bits<8> opc, string OpcodeStr, SDNode OpNodeRnd,
5869                                 X86SchedWriteSizes sched> {
5870  let Predicates = [HasFP16] in {
5871    defm PHZ : avx512_fp_sae_packed<opc, OpcodeStr, OpNodeRnd, sched.PH.ZMM,
5872                                    v32f16_info>,
5873                                    EVEX_V512, T_MAP5PS, EVEX_CD8<16, CD8VF>;
5874  }
5875  defm PSZ : avx512_fp_sae_packed<opc, OpcodeStr, OpNodeRnd, sched.PS.ZMM,
5876                                  v16f32_info>,
5877                                  EVEX_V512, PS, EVEX_CD8<32, CD8VF>;
5878  defm PDZ : avx512_fp_sae_packed<opc, OpcodeStr, OpNodeRnd, sched.PD.ZMM,
5879                                  v8f64_info>,
5880                                  EVEX_V512, PD, VEX_W,EVEX_CD8<64, CD8VF>;
5881}
5882
5883defm VADD : avx512_fp_binop_p<0x58, "vadd", any_fadd, fadd, HasAVX512,
5884                              SchedWriteFAddSizes, 1>,
5885            avx512_fp_binop_ph<0x58, "vadd", any_fadd, fadd, SchedWriteFAddSizes, 1>,
5886            avx512_fp_binop_p_round<0x58, "vadd", X86faddRnd, SchedWriteFAddSizes>;
5887defm VMUL : avx512_fp_binop_p<0x59, "vmul", any_fmul, fmul, HasAVX512,
5888                              SchedWriteFMulSizes, 1>,
5889            avx512_fp_binop_ph<0x59, "vmul", any_fmul, fmul, SchedWriteFMulSizes, 1>,
5890            avx512_fp_binop_p_round<0x59, "vmul", X86fmulRnd, SchedWriteFMulSizes>;
5891defm VSUB : avx512_fp_binop_p<0x5C, "vsub", any_fsub, fsub, HasAVX512,
5892                              SchedWriteFAddSizes>,
5893            avx512_fp_binop_ph<0x5C, "vsub", any_fsub, fsub, SchedWriteFAddSizes>,
5894            avx512_fp_binop_p_round<0x5C, "vsub", X86fsubRnd, SchedWriteFAddSizes>;
5895defm VDIV : avx512_fp_binop_p<0x5E, "vdiv", any_fdiv, fdiv, HasAVX512,
5896                              SchedWriteFDivSizes>,
5897            avx512_fp_binop_ph<0x5E, "vdiv", any_fdiv, fdiv, SchedWriteFDivSizes>,
5898            avx512_fp_binop_p_round<0x5E, "vdiv", X86fdivRnd, SchedWriteFDivSizes>;
5899defm VMIN : avx512_fp_binop_p<0x5D, "vmin", X86fmin, X86fmin, HasAVX512,
5900                              SchedWriteFCmpSizes, 0>,
5901            avx512_fp_binop_ph<0x5D, "vmin", X86fmin, X86fmin, SchedWriteFCmpSizes, 0>,
5902            avx512_fp_binop_p_sae<0x5D, "vmin", X86fminSAE, SchedWriteFCmpSizes>;
5903defm VMAX : avx512_fp_binop_p<0x5F, "vmax", X86fmax, X86fmax, HasAVX512,
5904                              SchedWriteFCmpSizes, 0>,
5905            avx512_fp_binop_ph<0x5F, "vmax", X86fmax, X86fmax, SchedWriteFCmpSizes, 0>,
5906            avx512_fp_binop_p_sae<0x5F, "vmax", X86fmaxSAE, SchedWriteFCmpSizes>;
5907let isCodeGenOnly = 1 in {
5908  defm VMINC : avx512_fp_binop_p<0x5D, "vmin", X86fminc, X86fminc, HasAVX512,
5909                                 SchedWriteFCmpSizes, 1>,
5910               avx512_fp_binop_ph<0x5D, "vmin", X86fminc, X86fminc,
5911                                 SchedWriteFCmpSizes, 1>;
5912  defm VMAXC : avx512_fp_binop_p<0x5F, "vmax", X86fmaxc, X86fmaxc, HasAVX512,
5913                                 SchedWriteFCmpSizes, 1>,
5914               avx512_fp_binop_ph<0x5F, "vmax", X86fmaxc, X86fmaxc,
5915                                 SchedWriteFCmpSizes, 1>;
5916}
5917let Uses = []<Register>, mayRaiseFPException = 0 in {
5918defm VAND  : avx512_fp_binop_p<0x54, "vand", null_frag, null_frag, HasDQI,
5919                               SchedWriteFLogicSizes, 1>;
5920defm VANDN : avx512_fp_binop_p<0x55, "vandn", null_frag, null_frag, HasDQI,
5921                               SchedWriteFLogicSizes, 0>;
5922defm VOR   : avx512_fp_binop_p<0x56, "vor", null_frag, null_frag, HasDQI,
5923                               SchedWriteFLogicSizes, 1>;
5924defm VXOR  : avx512_fp_binop_p<0x57, "vxor", null_frag, null_frag, HasDQI,
5925                               SchedWriteFLogicSizes, 1>;
5926}
5927
5928multiclass avx512_fp_scalef_p<bits<8> opc, string OpcodeStr, SDNode OpNode,
5929                              X86FoldableSchedWrite sched, X86VectorVTInfo _> {
5930  let ExeDomain = _.ExeDomain, Uses = [MXCSR], mayRaiseFPException = 1 in {
5931  defm rr: AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
5932                  (ins _.RC:$src1, _.RC:$src2), OpcodeStr#_.Suffix,
5933                  "$src2, $src1", "$src1, $src2",
5934                  (_.VT (OpNode _.RC:$src1, _.RC:$src2))>,
5935                  EVEX_4V, Sched<[sched]>;
5936  defm rm: AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
5937                  (ins _.RC:$src1, _.MemOp:$src2), OpcodeStr#_.Suffix,
5938                  "$src2, $src1", "$src1, $src2",
5939                  (OpNode _.RC:$src1, (_.LdFrag addr:$src2))>,
5940                  EVEX_4V, Sched<[sched.Folded, sched.ReadAfterFold]>;
5941  defm rmb: AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
5942                   (ins _.RC:$src1, _.ScalarMemOp:$src2), OpcodeStr#_.Suffix,
5943                   "${src2}"#_.BroadcastStr#", $src1",
5944                   "$src1, ${src2}"#_.BroadcastStr,
5945                   (OpNode  _.RC:$src1, (_.VT (_.BroadcastLdFrag addr:$src2)))>,
5946                   EVEX_4V, EVEX_B, Sched<[sched.Folded, sched.ReadAfterFold]>;
5947  }
5948}
5949
5950multiclass avx512_fp_scalef_scalar<bits<8> opc, string OpcodeStr, SDNode OpNode,
5951                                   X86FoldableSchedWrite sched, X86VectorVTInfo _> {
5952  let ExeDomain = _.ExeDomain, Uses = [MXCSR], mayRaiseFPException = 1 in {
5953  defm rr: AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
5954                  (ins _.RC:$src1, _.RC:$src2), OpcodeStr#_.Suffix,
5955                  "$src2, $src1", "$src1, $src2",
5956                  (_.VT (OpNode _.RC:$src1, _.RC:$src2))>,
5957                  Sched<[sched]>;
5958  defm rm: AVX512_maskable_scalar<opc, MRMSrcMem, _, (outs _.RC:$dst),
5959                  (ins _.RC:$src1, _.IntScalarMemOp:$src2), OpcodeStr#_.Suffix,
5960                  "$src2, $src1", "$src1, $src2",
5961                  (OpNode _.RC:$src1, (_.ScalarIntMemFrags addr:$src2))>,
5962                  Sched<[sched.Folded, sched.ReadAfterFold]>;
5963  }
5964}
5965
5966multiclass avx512_fp_scalef_all<bits<8> opc, bits<8> opcScaler, string OpcodeStr,
5967                                X86SchedWriteWidths sched> {
5968  let Predicates = [HasFP16] in {
5969    defm PHZ : avx512_fp_scalef_p<opc, OpcodeStr, X86scalef, sched.ZMM, v32f16_info>,
5970               avx512_fp_round_packed<opc, OpcodeStr, X86scalefRnd, sched.ZMM, v32f16_info>,
5971                                EVEX_V512, T_MAP6PD, EVEX_CD8<16, CD8VF>;
5972    defm SHZ : avx512_fp_scalef_scalar<opcScaler, OpcodeStr, X86scalefs, sched.Scl, f16x_info>,
5973               avx512_fp_scalar_round<opcScaler, OpcodeStr#"sh", f16x_info, X86scalefsRnd, sched.Scl>,
5974                             EVEX_4V, T_MAP6PD, EVEX_CD8<16, CD8VT1>;
5975  }
5976  defm PSZ : avx512_fp_scalef_p<opc, OpcodeStr, X86scalef, sched.ZMM, v16f32_info>,
5977             avx512_fp_round_packed<opc, OpcodeStr, X86scalefRnd, sched.ZMM, v16f32_info>,
5978                              EVEX_V512, EVEX_CD8<32, CD8VF>, T8PD;
5979  defm PDZ : avx512_fp_scalef_p<opc, OpcodeStr, X86scalef, sched.ZMM, v8f64_info>,
5980             avx512_fp_round_packed<opc, OpcodeStr, X86scalefRnd, sched.ZMM, v8f64_info>,
5981                              EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>, T8PD;
5982  defm SSZ : avx512_fp_scalef_scalar<opcScaler, OpcodeStr, X86scalefs, sched.Scl, f32x_info>,
5983             avx512_fp_scalar_round<opcScaler, OpcodeStr#"ss", f32x_info,
5984                                    X86scalefsRnd, sched.Scl>,
5985                                    EVEX_4V, VEX_LIG, EVEX_CD8<32, CD8VT1>, T8PD;
5986  defm SDZ : avx512_fp_scalef_scalar<opcScaler, OpcodeStr, X86scalefs, sched.Scl, f64x_info>,
5987             avx512_fp_scalar_round<opcScaler, OpcodeStr#"sd", f64x_info,
5988                                    X86scalefsRnd, sched.Scl>,
5989                                    EVEX_4V, VEX_LIG, EVEX_CD8<64, CD8VT1>, VEX_W, T8PD;
5990
5991  // Define only if AVX512VL feature is present.
5992  let Predicates = [HasVLX] in {
5993    defm PSZ128 : avx512_fp_scalef_p<opc, OpcodeStr, X86scalef, sched.XMM, v4f32x_info>,
5994                                   EVEX_V128, EVEX_CD8<32, CD8VF>, T8PD;
5995    defm PSZ256 : avx512_fp_scalef_p<opc, OpcodeStr, X86scalef, sched.YMM, v8f32x_info>,
5996                                   EVEX_V256, EVEX_CD8<32, CD8VF>, T8PD;
5997    defm PDZ128 : avx512_fp_scalef_p<opc, OpcodeStr, X86scalef, sched.XMM, v2f64x_info>,
5998                                   EVEX_V128, VEX_W, EVEX_CD8<64, CD8VF>, T8PD;
5999    defm PDZ256 : avx512_fp_scalef_p<opc, OpcodeStr, X86scalef, sched.YMM, v4f64x_info>,
6000                                   EVEX_V256, VEX_W, EVEX_CD8<64, CD8VF>, T8PD;
6001  }
6002
6003  let Predicates = [HasFP16, HasVLX] in {
6004    defm PHZ128 : avx512_fp_scalef_p<opc, OpcodeStr, X86scalef, sched.XMM, v8f16x_info>,
6005                                   EVEX_V128, EVEX_CD8<16, CD8VF>, T_MAP6PD;
6006    defm PHZ256 : avx512_fp_scalef_p<opc, OpcodeStr, X86scalef, sched.YMM, v16f16x_info>,
6007                                   EVEX_V256, EVEX_CD8<16, CD8VF>, T_MAP6PD;
6008  }
6009}
6010defm VSCALEF : avx512_fp_scalef_all<0x2C, 0x2D, "vscalef",
6011                                    SchedWriteFAdd>, NotEVEX2VEXConvertible;
6012
6013//===----------------------------------------------------------------------===//
6014// AVX-512  VPTESTM instructions
6015//===----------------------------------------------------------------------===//
6016
6017multiclass avx512_vptest<bits<8> opc, string OpcodeStr,
6018                         X86FoldableSchedWrite sched, X86VectorVTInfo _> {
6019  // NOTE: Patterns are omitted in favor of manual selection in X86ISelDAGToDAG.
6020  // There are just too many permutations due to commutability and bitcasts.
6021  let ExeDomain = _.ExeDomain, hasSideEffects = 0 in {
6022  defm rr : AVX512_maskable_cmp<opc, MRMSrcReg, _, (outs _.KRC:$dst),
6023                   (ins _.RC:$src1, _.RC:$src2), OpcodeStr,
6024                      "$src2, $src1", "$src1, $src2",
6025                   (null_frag), (null_frag), 1>,
6026                   EVEX_4V, Sched<[sched]>;
6027  let mayLoad = 1 in
6028  defm rm : AVX512_maskable_cmp<opc, MRMSrcMem, _, (outs _.KRC:$dst),
6029                   (ins _.RC:$src1, _.MemOp:$src2), OpcodeStr,
6030                       "$src2, $src1", "$src1, $src2",
6031                   (null_frag), (null_frag)>,
6032                   EVEX_4V, EVEX_CD8<_.EltSize, CD8VF>,
6033                   Sched<[sched.Folded, sched.ReadAfterFold]>;
6034  }
6035}
6036
6037multiclass avx512_vptest_mb<bits<8> opc, string OpcodeStr,
6038                            X86FoldableSchedWrite sched, X86VectorVTInfo _> {
6039  let ExeDomain = _.ExeDomain, mayLoad = 1, hasSideEffects = 0 in
6040  defm rmb : AVX512_maskable_cmp<opc, MRMSrcMem, _, (outs _.KRC:$dst),
6041                    (ins _.RC:$src1, _.ScalarMemOp:$src2), OpcodeStr,
6042                    "${src2}"#_.BroadcastStr#", $src1",
6043                    "$src1, ${src2}"#_.BroadcastStr,
6044                    (null_frag), (null_frag)>,
6045                    EVEX_B, EVEX_4V, EVEX_CD8<_.EltSize, CD8VF>,
6046                    Sched<[sched.Folded, sched.ReadAfterFold]>;
6047}
6048
6049multiclass avx512_vptest_dq_sizes<bits<8> opc, string OpcodeStr,
6050                                  X86SchedWriteWidths sched,
6051                                  AVX512VLVectorVTInfo _> {
6052  let Predicates  = [HasAVX512] in
6053  defm Z : avx512_vptest<opc, OpcodeStr, sched.ZMM, _.info512>,
6054           avx512_vptest_mb<opc, OpcodeStr, sched.ZMM, _.info512>, EVEX_V512;
6055
6056  let Predicates = [HasAVX512, HasVLX] in {
6057  defm Z256 : avx512_vptest<opc, OpcodeStr, sched.YMM, _.info256>,
6058              avx512_vptest_mb<opc, OpcodeStr, sched.YMM, _.info256>, EVEX_V256;
6059  defm Z128 : avx512_vptest<opc, OpcodeStr, sched.XMM, _.info128>,
6060              avx512_vptest_mb<opc, OpcodeStr, sched.XMM, _.info128>, EVEX_V128;
6061  }
6062}
6063
6064multiclass avx512_vptest_dq<bits<8> opc, string OpcodeStr,
6065                            X86SchedWriteWidths sched> {
6066  defm D : avx512_vptest_dq_sizes<opc, OpcodeStr#"d", sched,
6067                                 avx512vl_i32_info>;
6068  defm Q : avx512_vptest_dq_sizes<opc, OpcodeStr#"q", sched,
6069                                 avx512vl_i64_info>, VEX_W;
6070}
6071
6072multiclass avx512_vptest_wb<bits<8> opc, string OpcodeStr,
6073                            X86SchedWriteWidths sched> {
6074  let Predicates = [HasBWI] in {
6075  defm WZ:    avx512_vptest<opc, OpcodeStr#"w", sched.ZMM,
6076                            v32i16_info>, EVEX_V512, VEX_W;
6077  defm BZ:    avx512_vptest<opc, OpcodeStr#"b", sched.ZMM,
6078                            v64i8_info>, EVEX_V512;
6079  }
6080
6081  let Predicates = [HasVLX, HasBWI] in {
6082  defm WZ256: avx512_vptest<opc, OpcodeStr#"w", sched.YMM,
6083                            v16i16x_info>, EVEX_V256, VEX_W;
6084  defm WZ128: avx512_vptest<opc, OpcodeStr#"w", sched.XMM,
6085                            v8i16x_info>, EVEX_V128, VEX_W;
6086  defm BZ256: avx512_vptest<opc, OpcodeStr#"b", sched.YMM,
6087                            v32i8x_info>, EVEX_V256;
6088  defm BZ128: avx512_vptest<opc, OpcodeStr#"b", sched.XMM,
6089                            v16i8x_info>, EVEX_V128;
6090  }
6091}
6092
6093multiclass avx512_vptest_all_forms<bits<8> opc_wb, bits<8> opc_dq, string OpcodeStr,
6094                                   X86SchedWriteWidths sched> :
6095  avx512_vptest_wb<opc_wb, OpcodeStr, sched>,
6096  avx512_vptest_dq<opc_dq, OpcodeStr, sched>;
6097
6098defm VPTESTM   : avx512_vptest_all_forms<0x26, 0x27, "vptestm",
6099                                         SchedWriteVecLogic>, T8PD;
6100defm VPTESTNM  : avx512_vptest_all_forms<0x26, 0x27, "vptestnm",
6101                                         SchedWriteVecLogic>, T8XS;
6102
6103//===----------------------------------------------------------------------===//
6104// AVX-512  Shift instructions
6105//===----------------------------------------------------------------------===//
6106
6107multiclass avx512_shift_rmi<bits<8> opc, Format ImmFormR, Format ImmFormM,
6108                            string OpcodeStr, SDNode OpNode,
6109                            X86FoldableSchedWrite sched, X86VectorVTInfo _> {
6110  let ExeDomain = _.ExeDomain in {
6111  defm ri : AVX512_maskable<opc, ImmFormR, _, (outs _.RC:$dst),
6112                   (ins _.RC:$src1, u8imm:$src2), OpcodeStr,
6113                      "$src2, $src1", "$src1, $src2",
6114                   (_.VT (OpNode _.RC:$src1, (i8 timm:$src2)))>,
6115                   Sched<[sched]>;
6116  defm mi : AVX512_maskable<opc, ImmFormM, _, (outs _.RC:$dst),
6117                   (ins _.MemOp:$src1, u8imm:$src2), OpcodeStr,
6118                       "$src2, $src1", "$src1, $src2",
6119                   (_.VT (OpNode (_.VT (_.LdFrag addr:$src1)),
6120                          (i8 timm:$src2)))>,
6121                   Sched<[sched.Folded]>;
6122  }
6123}
6124
6125multiclass avx512_shift_rmbi<bits<8> opc, Format ImmFormM,
6126                             string OpcodeStr, SDNode OpNode,
6127                             X86FoldableSchedWrite sched, X86VectorVTInfo _> {
6128  let ExeDomain = _.ExeDomain in
6129  defm mbi : AVX512_maskable<opc, ImmFormM, _, (outs _.RC:$dst),
6130                   (ins _.ScalarMemOp:$src1, u8imm:$src2), OpcodeStr,
6131      "$src2, ${src1}"#_.BroadcastStr, "${src1}"#_.BroadcastStr#", $src2",
6132     (_.VT (OpNode (_.BroadcastLdFrag addr:$src1), (i8 timm:$src2)))>,
6133     EVEX_B, Sched<[sched.Folded]>;
6134}
6135
6136multiclass avx512_shift_rrm<bits<8> opc, string OpcodeStr, SDNode OpNode,
6137                            X86FoldableSchedWrite sched, ValueType SrcVT,
6138                            X86VectorVTInfo _> {
6139   // src2 is always 128-bit
6140  let ExeDomain = _.ExeDomain in {
6141  defm rr : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
6142                   (ins _.RC:$src1, VR128X:$src2), OpcodeStr,
6143                      "$src2, $src1", "$src1, $src2",
6144                   (_.VT (OpNode _.RC:$src1, (SrcVT VR128X:$src2)))>,
6145                   AVX512BIBase, EVEX_4V, Sched<[sched]>;
6146  defm rm : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
6147                   (ins _.RC:$src1, i128mem:$src2), OpcodeStr,
6148                       "$src2, $src1", "$src1, $src2",
6149                   (_.VT (OpNode _.RC:$src1, (SrcVT (load addr:$src2))))>,
6150                   AVX512BIBase,
6151                   EVEX_4V, Sched<[sched.Folded, sched.ReadAfterFold]>;
6152  }
6153}
6154
6155multiclass avx512_shift_sizes<bits<8> opc, string OpcodeStr, SDNode OpNode,
6156                              X86SchedWriteWidths sched, ValueType SrcVT,
6157                              AVX512VLVectorVTInfo VTInfo,
6158                              Predicate prd> {
6159  let Predicates = [prd] in
6160  defm Z    : avx512_shift_rrm<opc, OpcodeStr, OpNode, sched.ZMM, SrcVT,
6161                               VTInfo.info512>, EVEX_V512,
6162                               EVEX_CD8<VTInfo.info512.EltSize, CD8VQ> ;
6163  let Predicates = [prd, HasVLX] in {
6164  defm Z256 : avx512_shift_rrm<opc, OpcodeStr, OpNode, sched.YMM, SrcVT,
6165                               VTInfo.info256>, EVEX_V256,
6166                               EVEX_CD8<VTInfo.info256.EltSize, CD8VH>;
6167  defm Z128 : avx512_shift_rrm<opc, OpcodeStr, OpNode, sched.XMM, SrcVT,
6168                               VTInfo.info128>, EVEX_V128,
6169                               EVEX_CD8<VTInfo.info128.EltSize, CD8VF>;
6170  }
6171}
6172
6173multiclass avx512_shift_types<bits<8> opcd, bits<8> opcq, bits<8> opcw,
6174                              string OpcodeStr, SDNode OpNode,
6175                              X86SchedWriteWidths sched,
6176                              bit NotEVEX2VEXConvertibleQ = 0> {
6177  defm D : avx512_shift_sizes<opcd, OpcodeStr#"d", OpNode, sched, v4i32,
6178                              avx512vl_i32_info, HasAVX512>;
6179  let notEVEX2VEXConvertible = NotEVEX2VEXConvertibleQ in
6180  defm Q : avx512_shift_sizes<opcq, OpcodeStr#"q", OpNode, sched, v2i64,
6181                              avx512vl_i64_info, HasAVX512>, VEX_W;
6182  defm W : avx512_shift_sizes<opcw, OpcodeStr#"w", OpNode, sched, v8i16,
6183                              avx512vl_i16_info, HasBWI>;
6184}
6185
6186multiclass avx512_shift_rmi_sizes<bits<8> opc, Format ImmFormR, Format ImmFormM,
6187                                  string OpcodeStr, SDNode OpNode,
6188                                  X86SchedWriteWidths sched,
6189                                  AVX512VLVectorVTInfo VTInfo> {
6190  let Predicates = [HasAVX512] in
6191  defm Z:    avx512_shift_rmi<opc, ImmFormR, ImmFormM, OpcodeStr, OpNode,
6192                              sched.ZMM, VTInfo.info512>,
6193             avx512_shift_rmbi<opc, ImmFormM, OpcodeStr, OpNode, sched.ZMM,
6194                               VTInfo.info512>, EVEX_V512;
6195  let Predicates = [HasAVX512, HasVLX] in {
6196  defm Z256: avx512_shift_rmi<opc, ImmFormR, ImmFormM, OpcodeStr, OpNode,
6197                              sched.YMM, VTInfo.info256>,
6198             avx512_shift_rmbi<opc, ImmFormM, OpcodeStr, OpNode, sched.YMM,
6199                               VTInfo.info256>, EVEX_V256;
6200  defm Z128: avx512_shift_rmi<opc, ImmFormR, ImmFormM, OpcodeStr, OpNode,
6201                              sched.XMM, VTInfo.info128>,
6202             avx512_shift_rmbi<opc, ImmFormM, OpcodeStr, OpNode, sched.XMM,
6203                               VTInfo.info128>, EVEX_V128;
6204  }
6205}
6206
6207multiclass avx512_shift_rmi_w<bits<8> opcw, Format ImmFormR, Format ImmFormM,
6208                              string OpcodeStr, SDNode OpNode,
6209                              X86SchedWriteWidths sched> {
6210  let Predicates = [HasBWI] in
6211  defm WZ:    avx512_shift_rmi<opcw, ImmFormR, ImmFormM, OpcodeStr, OpNode,
6212                               sched.ZMM, v32i16_info>, EVEX_V512, VEX_WIG;
6213  let Predicates = [HasVLX, HasBWI] in {
6214  defm WZ256: avx512_shift_rmi<opcw, ImmFormR, ImmFormM, OpcodeStr, OpNode,
6215                               sched.YMM, v16i16x_info>, EVEX_V256, VEX_WIG;
6216  defm WZ128: avx512_shift_rmi<opcw, ImmFormR, ImmFormM, OpcodeStr, OpNode,
6217                               sched.XMM, v8i16x_info>, EVEX_V128, VEX_WIG;
6218  }
6219}
6220
6221multiclass avx512_shift_rmi_dq<bits<8> opcd, bits<8> opcq,
6222                               Format ImmFormR, Format ImmFormM,
6223                               string OpcodeStr, SDNode OpNode,
6224                               X86SchedWriteWidths sched,
6225                               bit NotEVEX2VEXConvertibleQ = 0> {
6226  defm D: avx512_shift_rmi_sizes<opcd, ImmFormR, ImmFormM, OpcodeStr#"d", OpNode,
6227                                 sched, avx512vl_i32_info>, EVEX_CD8<32, CD8VF>;
6228  let notEVEX2VEXConvertible = NotEVEX2VEXConvertibleQ in
6229  defm Q: avx512_shift_rmi_sizes<opcq, ImmFormR, ImmFormM, OpcodeStr#"q", OpNode,
6230                                 sched, avx512vl_i64_info>, EVEX_CD8<64, CD8VF>, VEX_W;
6231}
6232
6233defm VPSRL : avx512_shift_rmi_dq<0x72, 0x73, MRM2r, MRM2m, "vpsrl", X86vsrli,
6234                                 SchedWriteVecShiftImm>,
6235             avx512_shift_rmi_w<0x71, MRM2r, MRM2m, "vpsrlw", X86vsrli,
6236                                SchedWriteVecShiftImm>, AVX512BIi8Base, EVEX_4V;
6237
6238defm VPSLL : avx512_shift_rmi_dq<0x72, 0x73, MRM6r, MRM6m, "vpsll", X86vshli,
6239                                 SchedWriteVecShiftImm>,
6240             avx512_shift_rmi_w<0x71, MRM6r, MRM6m, "vpsllw", X86vshli,
6241                                SchedWriteVecShiftImm>, AVX512BIi8Base, EVEX_4V;
6242
6243defm VPSRA : avx512_shift_rmi_dq<0x72, 0x72, MRM4r, MRM4m, "vpsra", X86vsrai,
6244                                 SchedWriteVecShiftImm, 1>,
6245             avx512_shift_rmi_w<0x71, MRM4r, MRM4m, "vpsraw", X86vsrai,
6246                                SchedWriteVecShiftImm>, AVX512BIi8Base, EVEX_4V;
6247
6248defm VPROR : avx512_shift_rmi_dq<0x72, 0x72, MRM0r, MRM0m, "vpror", X86vrotri,
6249                                 SchedWriteVecShiftImm>, AVX512BIi8Base, EVEX_4V;
6250defm VPROL : avx512_shift_rmi_dq<0x72, 0x72, MRM1r, MRM1m, "vprol", X86vrotli,
6251                                 SchedWriteVecShiftImm>, AVX512BIi8Base, EVEX_4V;
6252
6253defm VPSLL : avx512_shift_types<0xF2, 0xF3, 0xF1, "vpsll", X86vshl,
6254                                SchedWriteVecShift>;
6255defm VPSRA : avx512_shift_types<0xE2, 0xE2, 0xE1, "vpsra", X86vsra,
6256                                SchedWriteVecShift, 1>;
6257defm VPSRL : avx512_shift_types<0xD2, 0xD3, 0xD1, "vpsrl", X86vsrl,
6258                                SchedWriteVecShift>;
6259
6260// Use 512bit VPSRA/VPSRAI version to implement v2i64/v4i64 in case NoVLX.
6261let Predicates = [HasAVX512, NoVLX] in {
6262  def : Pat<(v4i64 (X86vsra (v4i64 VR256X:$src1), (v2i64 VR128X:$src2))),
6263            (EXTRACT_SUBREG (v8i64
6264              (VPSRAQZrr
6265                (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src1, sub_ymm)),
6266                 VR128X:$src2)), sub_ymm)>;
6267
6268  def : Pat<(v2i64 (X86vsra (v2i64 VR128X:$src1), (v2i64 VR128X:$src2))),
6269            (EXTRACT_SUBREG (v8i64
6270              (VPSRAQZrr
6271                (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR128X:$src1, sub_xmm)),
6272                 VR128X:$src2)), sub_xmm)>;
6273
6274  def : Pat<(v4i64 (X86vsrai (v4i64 VR256X:$src1), (i8 timm:$src2))),
6275            (EXTRACT_SUBREG (v8i64
6276              (VPSRAQZri
6277                (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src1, sub_ymm)),
6278                 timm:$src2)), sub_ymm)>;
6279
6280  def : Pat<(v2i64 (X86vsrai (v2i64 VR128X:$src1), (i8 timm:$src2))),
6281            (EXTRACT_SUBREG (v8i64
6282              (VPSRAQZri
6283                (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR128X:$src1, sub_xmm)),
6284                 timm:$src2)), sub_xmm)>;
6285}
6286
6287//===-------------------------------------------------------------------===//
6288// Variable Bit Shifts
6289//===-------------------------------------------------------------------===//
6290
6291multiclass avx512_var_shift<bits<8> opc, string OpcodeStr, SDNode OpNode,
6292                            X86FoldableSchedWrite sched, X86VectorVTInfo _> {
6293  let ExeDomain = _.ExeDomain in {
6294  defm rr : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
6295                   (ins _.RC:$src1, _.RC:$src2), OpcodeStr,
6296                      "$src2, $src1", "$src1, $src2",
6297                   (_.VT (OpNode _.RC:$src1, (_.VT _.RC:$src2)))>,
6298                   AVX5128IBase, EVEX_4V, Sched<[sched]>;
6299  defm rm : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
6300                   (ins _.RC:$src1, _.MemOp:$src2), OpcodeStr,
6301                       "$src2, $src1", "$src1, $src2",
6302                   (_.VT (OpNode _.RC:$src1,
6303                   (_.VT (_.LdFrag addr:$src2))))>,
6304                   AVX5128IBase, EVEX_4V, EVEX_CD8<_.EltSize, CD8VF>,
6305                   Sched<[sched.Folded, sched.ReadAfterFold]>;
6306  }
6307}
6308
6309multiclass avx512_var_shift_mb<bits<8> opc, string OpcodeStr, SDNode OpNode,
6310                               X86FoldableSchedWrite sched, X86VectorVTInfo _> {
6311  let ExeDomain = _.ExeDomain in
6312  defm rmb : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
6313                    (ins _.RC:$src1, _.ScalarMemOp:$src2), OpcodeStr,
6314                    "${src2}"#_.BroadcastStr#", $src1",
6315                    "$src1, ${src2}"#_.BroadcastStr,
6316                    (_.VT (OpNode _.RC:$src1, (_.VT (_.BroadcastLdFrag addr:$src2))))>,
6317                    AVX5128IBase, EVEX_B, EVEX_4V, EVEX_CD8<_.EltSize, CD8VF>,
6318                    Sched<[sched.Folded, sched.ReadAfterFold]>;
6319}
6320
6321multiclass avx512_var_shift_sizes<bits<8> opc, string OpcodeStr, SDNode OpNode,
6322                                  X86SchedWriteWidths sched, AVX512VLVectorVTInfo _> {
6323  let Predicates  = [HasAVX512] in
6324  defm Z : avx512_var_shift<opc, OpcodeStr, OpNode, sched.ZMM, _.info512>,
6325           avx512_var_shift_mb<opc, OpcodeStr, OpNode, sched.ZMM, _.info512>, EVEX_V512;
6326
6327  let Predicates = [HasAVX512, HasVLX] in {
6328  defm Z256 : avx512_var_shift<opc, OpcodeStr, OpNode, sched.YMM, _.info256>,
6329              avx512_var_shift_mb<opc, OpcodeStr, OpNode, sched.YMM, _.info256>, EVEX_V256;
6330  defm Z128 : avx512_var_shift<opc, OpcodeStr, OpNode, sched.XMM, _.info128>,
6331              avx512_var_shift_mb<opc, OpcodeStr, OpNode, sched.XMM, _.info128>, EVEX_V128;
6332  }
6333}
6334
6335multiclass avx512_var_shift_types<bits<8> opc, string OpcodeStr,
6336                                  SDNode OpNode, X86SchedWriteWidths sched> {
6337  defm D : avx512_var_shift_sizes<opc, OpcodeStr#"d", OpNode, sched,
6338                                 avx512vl_i32_info>;
6339  defm Q : avx512_var_shift_sizes<opc, OpcodeStr#"q", OpNode, sched,
6340                                 avx512vl_i64_info>, VEX_W;
6341}
6342
6343// Use 512bit version to implement 128/256 bit in case NoVLX.
6344multiclass avx512_var_shift_lowering<AVX512VLVectorVTInfo _, string OpcodeStr,
6345                                     SDNode OpNode, list<Predicate> p> {
6346  let Predicates = p in {
6347  def : Pat<(_.info256.VT (OpNode (_.info256.VT _.info256.RC:$src1),
6348                                  (_.info256.VT _.info256.RC:$src2))),
6349            (EXTRACT_SUBREG
6350                (!cast<Instruction>(OpcodeStr#"Zrr")
6351                    (INSERT_SUBREG (_.info512.VT (IMPLICIT_DEF)), VR256X:$src1, sub_ymm),
6352                    (INSERT_SUBREG (_.info512.VT (IMPLICIT_DEF)), VR256X:$src2, sub_ymm)),
6353             sub_ymm)>;
6354
6355  def : Pat<(_.info128.VT (OpNode (_.info128.VT _.info128.RC:$src1),
6356                                  (_.info128.VT _.info128.RC:$src2))),
6357            (EXTRACT_SUBREG
6358                (!cast<Instruction>(OpcodeStr#"Zrr")
6359                    (INSERT_SUBREG (_.info512.VT (IMPLICIT_DEF)), VR128X:$src1, sub_xmm),
6360                    (INSERT_SUBREG (_.info512.VT (IMPLICIT_DEF)), VR128X:$src2, sub_xmm)),
6361             sub_xmm)>;
6362  }
6363}
6364multiclass avx512_var_shift_w<bits<8> opc, string OpcodeStr,
6365                              SDNode OpNode, X86SchedWriteWidths sched> {
6366  let Predicates = [HasBWI] in
6367  defm WZ:    avx512_var_shift<opc, OpcodeStr, OpNode, sched.ZMM, v32i16_info>,
6368              EVEX_V512, VEX_W;
6369  let Predicates = [HasVLX, HasBWI] in {
6370
6371  defm WZ256: avx512_var_shift<opc, OpcodeStr, OpNode, sched.YMM, v16i16x_info>,
6372              EVEX_V256, VEX_W;
6373  defm WZ128: avx512_var_shift<opc, OpcodeStr, OpNode, sched.XMM, v8i16x_info>,
6374              EVEX_V128, VEX_W;
6375  }
6376}
6377
6378defm VPSLLV : avx512_var_shift_types<0x47, "vpsllv", X86vshlv, SchedWriteVarVecShift>,
6379              avx512_var_shift_w<0x12, "vpsllvw", X86vshlv, SchedWriteVarVecShift>;
6380
6381defm VPSRAV : avx512_var_shift_types<0x46, "vpsrav", X86vsrav, SchedWriteVarVecShift>,
6382              avx512_var_shift_w<0x11, "vpsravw", X86vsrav, SchedWriteVarVecShift>;
6383
6384defm VPSRLV : avx512_var_shift_types<0x45, "vpsrlv", X86vsrlv, SchedWriteVarVecShift>,
6385              avx512_var_shift_w<0x10, "vpsrlvw", X86vsrlv, SchedWriteVarVecShift>;
6386
6387defm VPRORV : avx512_var_shift_types<0x14, "vprorv", rotr, SchedWriteVarVecShift>;
6388defm VPROLV : avx512_var_shift_types<0x15, "vprolv", rotl, SchedWriteVarVecShift>;
6389
6390defm : avx512_var_shift_lowering<avx512vl_i64_info, "VPSRAVQ", X86vsrav, [HasAVX512, NoVLX]>;
6391defm : avx512_var_shift_lowering<avx512vl_i16_info, "VPSLLVW", X86vshlv, [HasBWI, NoVLX]>;
6392defm : avx512_var_shift_lowering<avx512vl_i16_info, "VPSRAVW", X86vsrav, [HasBWI, NoVLX]>;
6393defm : avx512_var_shift_lowering<avx512vl_i16_info, "VPSRLVW", X86vsrlv, [HasBWI, NoVLX]>;
6394
6395
6396// Use 512bit VPROL/VPROLI version to implement v2i64/v4i64 + v4i32/v8i32 in case NoVLX.
6397let Predicates = [HasAVX512, NoVLX] in {
6398  def : Pat<(v2i64 (rotl (v2i64 VR128X:$src1), (v2i64 VR128X:$src2))),
6399            (EXTRACT_SUBREG (v8i64
6400              (VPROLVQZrr
6401                (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR128X:$src1, sub_xmm)),
6402                (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR128X:$src2, sub_xmm)))),
6403                       sub_xmm)>;
6404  def : Pat<(v4i64 (rotl (v4i64 VR256X:$src1), (v4i64 VR256X:$src2))),
6405            (EXTRACT_SUBREG (v8i64
6406              (VPROLVQZrr
6407                (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src1, sub_ymm)),
6408                (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src2, sub_ymm)))),
6409                       sub_ymm)>;
6410
6411  def : Pat<(v4i32 (rotl (v4i32 VR128X:$src1), (v4i32 VR128X:$src2))),
6412            (EXTRACT_SUBREG (v16i32
6413              (VPROLVDZrr
6414                (v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR128X:$src1, sub_xmm)),
6415                (v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR128X:$src2, sub_xmm)))),
6416                        sub_xmm)>;
6417  def : Pat<(v8i32 (rotl (v8i32 VR256X:$src1), (v8i32 VR256X:$src2))),
6418            (EXTRACT_SUBREG (v16i32
6419              (VPROLVDZrr
6420                (v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src1, sub_ymm)),
6421                (v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src2, sub_ymm)))),
6422                        sub_ymm)>;
6423
6424  def : Pat<(v2i64 (X86vrotli (v2i64 VR128X:$src1), (i8 timm:$src2))),
6425            (EXTRACT_SUBREG (v8i64
6426              (VPROLQZri
6427                (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR128X:$src1, sub_xmm)),
6428                        timm:$src2)), sub_xmm)>;
6429  def : Pat<(v4i64 (X86vrotli (v4i64 VR256X:$src1), (i8 timm:$src2))),
6430            (EXTRACT_SUBREG (v8i64
6431              (VPROLQZri
6432                (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src1, sub_ymm)),
6433                       timm:$src2)), sub_ymm)>;
6434
6435  def : Pat<(v4i32 (X86vrotli (v4i32 VR128X:$src1), (i8 timm:$src2))),
6436            (EXTRACT_SUBREG (v16i32
6437              (VPROLDZri
6438                (v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR128X:$src1, sub_xmm)),
6439                        timm:$src2)), sub_xmm)>;
6440  def : Pat<(v8i32 (X86vrotli (v8i32 VR256X:$src1), (i8 timm:$src2))),
6441            (EXTRACT_SUBREG (v16i32
6442              (VPROLDZri
6443                (v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src1, sub_ymm)),
6444                        timm:$src2)), sub_ymm)>;
6445}
6446
6447// Use 512bit VPROR/VPRORI version to implement v2i64/v4i64 + v4i32/v8i32 in case NoVLX.
6448let Predicates = [HasAVX512, NoVLX] in {
6449  def : Pat<(v2i64 (rotr (v2i64 VR128X:$src1), (v2i64 VR128X:$src2))),
6450            (EXTRACT_SUBREG (v8i64
6451              (VPRORVQZrr
6452                (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR128X:$src1, sub_xmm)),
6453                (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR128X:$src2, sub_xmm)))),
6454                       sub_xmm)>;
6455  def : Pat<(v4i64 (rotr (v4i64 VR256X:$src1), (v4i64 VR256X:$src2))),
6456            (EXTRACT_SUBREG (v8i64
6457              (VPRORVQZrr
6458                (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src1, sub_ymm)),
6459                (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src2, sub_ymm)))),
6460                       sub_ymm)>;
6461
6462  def : Pat<(v4i32 (rotr (v4i32 VR128X:$src1), (v4i32 VR128X:$src2))),
6463            (EXTRACT_SUBREG (v16i32
6464              (VPRORVDZrr
6465                (v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR128X:$src1, sub_xmm)),
6466                (v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR128X:$src2, sub_xmm)))),
6467                        sub_xmm)>;
6468  def : Pat<(v8i32 (rotr (v8i32 VR256X:$src1), (v8i32 VR256X:$src2))),
6469            (EXTRACT_SUBREG (v16i32
6470              (VPRORVDZrr
6471                (v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src1, sub_ymm)),
6472                (v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src2, sub_ymm)))),
6473                        sub_ymm)>;
6474
6475  def : Pat<(v2i64 (X86vrotri (v2i64 VR128X:$src1), (i8 timm:$src2))),
6476            (EXTRACT_SUBREG (v8i64
6477              (VPRORQZri
6478                (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR128X:$src1, sub_xmm)),
6479                        timm:$src2)), sub_xmm)>;
6480  def : Pat<(v4i64 (X86vrotri (v4i64 VR256X:$src1), (i8 timm:$src2))),
6481            (EXTRACT_SUBREG (v8i64
6482              (VPRORQZri
6483                (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src1, sub_ymm)),
6484                       timm:$src2)), sub_ymm)>;
6485
6486  def : Pat<(v4i32 (X86vrotri (v4i32 VR128X:$src1), (i8 timm:$src2))),
6487            (EXTRACT_SUBREG (v16i32
6488              (VPRORDZri
6489                (v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR128X:$src1, sub_xmm)),
6490                        timm:$src2)), sub_xmm)>;
6491  def : Pat<(v8i32 (X86vrotri (v8i32 VR256X:$src1), (i8 timm:$src2))),
6492            (EXTRACT_SUBREG (v16i32
6493              (VPRORDZri
6494                (v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src1, sub_ymm)),
6495                        timm:$src2)), sub_ymm)>;
6496}
6497
6498//===-------------------------------------------------------------------===//
6499// 1-src variable permutation VPERMW/D/Q
6500//===-------------------------------------------------------------------===//
6501
6502multiclass avx512_vperm_dq_sizes<bits<8> opc, string OpcodeStr, SDNode OpNode,
6503                                 X86FoldableSchedWrite sched, AVX512VLVectorVTInfo _> {
6504  let Predicates  = [HasAVX512] in
6505  defm Z : avx512_var_shift<opc, OpcodeStr, OpNode, sched, _.info512>,
6506           avx512_var_shift_mb<opc, OpcodeStr, OpNode, sched, _.info512>, EVEX_V512;
6507
6508  let Predicates = [HasAVX512, HasVLX] in
6509  defm Z256 : avx512_var_shift<opc, OpcodeStr, OpNode, sched, _.info256>,
6510              avx512_var_shift_mb<opc, OpcodeStr, OpNode, sched, _.info256>, EVEX_V256;
6511}
6512
6513multiclass avx512_vpermi_dq_sizes<bits<8> opc, Format ImmFormR, Format ImmFormM,
6514                                 string OpcodeStr, SDNode OpNode,
6515                                 X86FoldableSchedWrite sched, AVX512VLVectorVTInfo VTInfo> {
6516  let Predicates = [HasAVX512] in
6517  defm Z:    avx512_shift_rmi<opc, ImmFormR, ImmFormM, OpcodeStr, OpNode,
6518                              sched, VTInfo.info512>,
6519             avx512_shift_rmbi<opc, ImmFormM, OpcodeStr, OpNode,
6520                               sched, VTInfo.info512>, EVEX_V512;
6521  let Predicates = [HasAVX512, HasVLX] in
6522  defm Z256: avx512_shift_rmi<opc, ImmFormR, ImmFormM, OpcodeStr, OpNode,
6523                              sched, VTInfo.info256>,
6524             avx512_shift_rmbi<opc, ImmFormM, OpcodeStr, OpNode,
6525                               sched, VTInfo.info256>, EVEX_V256;
6526}
6527
6528multiclass avx512_vperm_bw<bits<8> opc, string OpcodeStr,
6529                              Predicate prd, SDNode OpNode,
6530                              X86FoldableSchedWrite sched, AVX512VLVectorVTInfo _> {
6531  let Predicates = [prd] in
6532  defm Z:    avx512_var_shift<opc, OpcodeStr, OpNode, sched, _.info512>,
6533              EVEX_V512 ;
6534  let Predicates = [HasVLX, prd] in {
6535  defm Z256: avx512_var_shift<opc, OpcodeStr, OpNode, sched, _.info256>,
6536              EVEX_V256 ;
6537  defm Z128: avx512_var_shift<opc, OpcodeStr, OpNode, sched, _.info128>,
6538              EVEX_V128 ;
6539  }
6540}
6541
6542defm VPERMW  : avx512_vperm_bw<0x8D, "vpermw", HasBWI, X86VPermv,
6543                               WriteVarShuffle256, avx512vl_i16_info>, VEX_W;
6544defm VPERMB  : avx512_vperm_bw<0x8D, "vpermb", HasVBMI, X86VPermv,
6545                               WriteVarShuffle256, avx512vl_i8_info>;
6546
6547defm VPERMD : avx512_vperm_dq_sizes<0x36, "vpermd", X86VPermv,
6548                                    WriteVarShuffle256, avx512vl_i32_info>;
6549defm VPERMQ : avx512_vperm_dq_sizes<0x36, "vpermq", X86VPermv,
6550                                    WriteVarShuffle256, avx512vl_i64_info>, VEX_W;
6551defm VPERMPS : avx512_vperm_dq_sizes<0x16, "vpermps", X86VPermv,
6552                                     WriteFVarShuffle256, avx512vl_f32_info>;
6553defm VPERMPD : avx512_vperm_dq_sizes<0x16, "vpermpd", X86VPermv,
6554                                     WriteFVarShuffle256, avx512vl_f64_info>, VEX_W;
6555
6556defm VPERMQ : avx512_vpermi_dq_sizes<0x00, MRMSrcReg, MRMSrcMem, "vpermq",
6557                             X86VPermi, WriteShuffle256, avx512vl_i64_info>,
6558                             EVEX, AVX512AIi8Base, EVEX_CD8<64, CD8VF>, VEX_W;
6559defm VPERMPD : avx512_vpermi_dq_sizes<0x01, MRMSrcReg, MRMSrcMem, "vpermpd",
6560                             X86VPermi, WriteFShuffle256, avx512vl_f64_info>,
6561                             EVEX, AVX512AIi8Base, EVEX_CD8<64, CD8VF>, VEX_W;
6562
6563//===----------------------------------------------------------------------===//
6564// AVX-512 - VPERMIL
6565//===----------------------------------------------------------------------===//
6566
6567multiclass avx512_permil_vec<bits<8> OpcVar, string OpcodeStr, SDNode OpNode,
6568                             X86FoldableSchedWrite sched, X86VectorVTInfo _,
6569                             X86VectorVTInfo Ctrl> {
6570  defm rr: AVX512_maskable<OpcVar, MRMSrcReg, _, (outs _.RC:$dst),
6571                  (ins _.RC:$src1, Ctrl.RC:$src2), OpcodeStr,
6572                  "$src2, $src1", "$src1, $src2",
6573                  (_.VT (OpNode _.RC:$src1,
6574                               (Ctrl.VT Ctrl.RC:$src2)))>,
6575                  T8PD, EVEX_4V, Sched<[sched]>;
6576  defm rm: AVX512_maskable<OpcVar, MRMSrcMem, _, (outs _.RC:$dst),
6577                  (ins _.RC:$src1, Ctrl.MemOp:$src2), OpcodeStr,
6578                  "$src2, $src1", "$src1, $src2",
6579                  (_.VT (OpNode
6580                           _.RC:$src1,
6581                           (Ctrl.VT (Ctrl.LdFrag addr:$src2))))>,
6582                  T8PD, EVEX_4V, EVEX_CD8<_.EltSize, CD8VF>,
6583                  Sched<[sched.Folded, sched.ReadAfterFold]>;
6584  defm rmb: AVX512_maskable<OpcVar, MRMSrcMem, _, (outs _.RC:$dst),
6585                   (ins _.RC:$src1, _.ScalarMemOp:$src2), OpcodeStr,
6586                   "${src2}"#_.BroadcastStr#", $src1",
6587                   "$src1, ${src2}"#_.BroadcastStr,
6588                   (_.VT (OpNode
6589                            _.RC:$src1,
6590                            (Ctrl.VT (Ctrl.BroadcastLdFrag addr:$src2))))>,
6591                   T8PD, EVEX_4V, EVEX_B, EVEX_CD8<_.EltSize, CD8VF>,
6592                   Sched<[sched.Folded, sched.ReadAfterFold]>;
6593}
6594
6595multiclass avx512_permil_vec_common<string OpcodeStr, bits<8> OpcVar,
6596                                    X86SchedWriteWidths sched,
6597                                    AVX512VLVectorVTInfo _,
6598                                    AVX512VLVectorVTInfo Ctrl> {
6599  let Predicates = [HasAVX512] in {
6600    defm Z    : avx512_permil_vec<OpcVar, OpcodeStr, X86VPermilpv, sched.ZMM,
6601                                  _.info512, Ctrl.info512>, EVEX_V512;
6602  }
6603  let Predicates = [HasAVX512, HasVLX] in {
6604    defm Z128 : avx512_permil_vec<OpcVar, OpcodeStr, X86VPermilpv, sched.XMM,
6605                                  _.info128, Ctrl.info128>, EVEX_V128;
6606    defm Z256 : avx512_permil_vec<OpcVar, OpcodeStr, X86VPermilpv, sched.YMM,
6607                                  _.info256, Ctrl.info256>, EVEX_V256;
6608  }
6609}
6610
6611multiclass avx512_permil<string OpcodeStr, bits<8> OpcImm, bits<8> OpcVar,
6612                         AVX512VLVectorVTInfo _, AVX512VLVectorVTInfo Ctrl>{
6613  defm NAME: avx512_permil_vec_common<OpcodeStr, OpcVar, SchedWriteFVarShuffle,
6614                                      _, Ctrl>;
6615  defm NAME: avx512_shift_rmi_sizes<OpcImm, MRMSrcReg, MRMSrcMem, OpcodeStr,
6616                                    X86VPermilpi, SchedWriteFShuffle, _>,
6617                    EVEX, AVX512AIi8Base, EVEX_CD8<_.info128.EltSize, CD8VF>;
6618}
6619
6620let ExeDomain = SSEPackedSingle in
6621defm VPERMILPS : avx512_permil<"vpermilps", 0x04, 0x0C, avx512vl_f32_info,
6622                               avx512vl_i32_info>;
6623let ExeDomain = SSEPackedDouble in
6624defm VPERMILPD : avx512_permil<"vpermilpd", 0x05, 0x0D, avx512vl_f64_info,
6625                               avx512vl_i64_info>, VEX_W1X;
6626
6627//===----------------------------------------------------------------------===//
6628// AVX-512 - VPSHUFD, VPSHUFLW, VPSHUFHW
6629//===----------------------------------------------------------------------===//
6630
6631defm VPSHUFD : avx512_shift_rmi_sizes<0x70, MRMSrcReg, MRMSrcMem, "vpshufd",
6632                             X86PShufd, SchedWriteShuffle, avx512vl_i32_info>,
6633                             EVEX, AVX512BIi8Base, EVEX_CD8<32, CD8VF>;
6634defm VPSHUFH : avx512_shift_rmi_w<0x70, MRMSrcReg, MRMSrcMem, "vpshufhw",
6635                                  X86PShufhw, SchedWriteShuffle>,
6636                                  EVEX, AVX512XSIi8Base;
6637defm VPSHUFL : avx512_shift_rmi_w<0x70, MRMSrcReg, MRMSrcMem, "vpshuflw",
6638                                  X86PShuflw, SchedWriteShuffle>,
6639                                  EVEX, AVX512XDIi8Base;
6640
6641//===----------------------------------------------------------------------===//
6642// AVX-512 - VPSHUFB
6643//===----------------------------------------------------------------------===//
6644
6645multiclass avx512_pshufb_sizes<bits<8> opc, string OpcodeStr, SDNode OpNode,
6646                               X86SchedWriteWidths sched> {
6647  let Predicates = [HasBWI] in
6648  defm Z:    avx512_var_shift<opc, OpcodeStr, OpNode, sched.ZMM, v64i8_info>,
6649                              EVEX_V512;
6650
6651  let Predicates = [HasVLX, HasBWI] in {
6652  defm Z256: avx512_var_shift<opc, OpcodeStr, OpNode, sched.YMM, v32i8x_info>,
6653                              EVEX_V256;
6654  defm Z128: avx512_var_shift<opc, OpcodeStr, OpNode, sched.XMM, v16i8x_info>,
6655                              EVEX_V128;
6656  }
6657}
6658
6659defm VPSHUFB: avx512_pshufb_sizes<0x00, "vpshufb", X86pshufb,
6660                                  SchedWriteVarShuffle>, VEX_WIG;
6661
6662//===----------------------------------------------------------------------===//
6663// Move Low to High and High to Low packed FP Instructions
6664//===----------------------------------------------------------------------===//
6665
6666def VMOVLHPSZrr : AVX512PSI<0x16, MRMSrcReg, (outs VR128X:$dst),
6667          (ins VR128X:$src1, VR128X:$src2),
6668          "vmovlhps\t{$src2, $src1, $dst|$dst, $src1, $src2}",
6669          [(set VR128X:$dst, (v4f32 (X86Movlhps VR128X:$src1, VR128X:$src2)))]>,
6670          Sched<[SchedWriteFShuffle.XMM]>, EVEX_4V;
6671let isCommutable = 1 in
6672def VMOVHLPSZrr : AVX512PSI<0x12, MRMSrcReg, (outs VR128X:$dst),
6673          (ins VR128X:$src1, VR128X:$src2),
6674          "vmovhlps\t{$src2, $src1, $dst|$dst, $src1, $src2}",
6675          [(set VR128X:$dst, (v4f32 (X86Movhlps VR128X:$src1, VR128X:$src2)))]>,
6676          Sched<[SchedWriteFShuffle.XMM]>, EVEX_4V, NotMemoryFoldable;
6677
6678//===----------------------------------------------------------------------===//
6679// VMOVHPS/PD VMOVLPS Instructions
6680// All patterns was taken from SSS implementation.
6681//===----------------------------------------------------------------------===//
6682
6683multiclass avx512_mov_hilo_packed<bits<8> opc, string OpcodeStr,
6684                                  SDPatternOperator OpNode,
6685                                  X86VectorVTInfo _> {
6686  let hasSideEffects = 0, mayLoad = 1, ExeDomain = _.ExeDomain in
6687  def rm : AVX512<opc, MRMSrcMem, (outs _.RC:$dst),
6688                  (ins _.RC:$src1, f64mem:$src2),
6689                  !strconcat(OpcodeStr,
6690                             "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
6691                  [(set _.RC:$dst,
6692                     (OpNode _.RC:$src1,
6693                       (_.VT (bitconvert
6694                         (v2f64 (scalar_to_vector (loadf64 addr:$src2)))))))]>,
6695                  Sched<[SchedWriteFShuffle.XMM.Folded, SchedWriteFShuffle.XMM.ReadAfterFold]>, EVEX_4V;
6696}
6697
6698// No patterns for MOVLPS/MOVHPS as the Movlhps node should only be created in
6699// SSE1. And MOVLPS pattern is even more complex.
6700defm VMOVHPSZ128 : avx512_mov_hilo_packed<0x16, "vmovhps", null_frag,
6701                                  v4f32x_info>, EVEX_CD8<32, CD8VT2>, PS;
6702defm VMOVHPDZ128 : avx512_mov_hilo_packed<0x16, "vmovhpd", X86Unpckl,
6703                                  v2f64x_info>, EVEX_CD8<64, CD8VT1>, PD, VEX_W;
6704defm VMOVLPSZ128 : avx512_mov_hilo_packed<0x12, "vmovlps", null_frag,
6705                                  v4f32x_info>, EVEX_CD8<32, CD8VT2>, PS;
6706defm VMOVLPDZ128 : avx512_mov_hilo_packed<0x12, "vmovlpd", X86Movsd,
6707                                  v2f64x_info>, EVEX_CD8<64, CD8VT1>, PD, VEX_W;
6708
6709let Predicates = [HasAVX512] in {
6710  // VMOVHPD patterns
6711  def : Pat<(v2f64 (X86Unpckl VR128X:$src1, (X86vzload64 addr:$src2))),
6712            (VMOVHPDZ128rm VR128X:$src1, addr:$src2)>;
6713
6714  // VMOVLPD patterns
6715  def : Pat<(v2f64 (X86Movsd VR128X:$src1, (X86vzload64 addr:$src2))),
6716            (VMOVLPDZ128rm VR128X:$src1, addr:$src2)>;
6717}
6718
6719let SchedRW = [WriteFStore] in {
6720let mayStore = 1, hasSideEffects = 0 in
6721def VMOVHPSZ128mr : AVX512PSI<0x17, MRMDestMem, (outs),
6722                       (ins f64mem:$dst, VR128X:$src),
6723                       "vmovhps\t{$src, $dst|$dst, $src}",
6724                       []>, EVEX, EVEX_CD8<32, CD8VT2>;
6725def VMOVHPDZ128mr : AVX512PDI<0x17, MRMDestMem, (outs),
6726                       (ins f64mem:$dst, VR128X:$src),
6727                       "vmovhpd\t{$src, $dst|$dst, $src}",
6728                       [(store (f64 (extractelt
6729                                     (v2f64 (X86Unpckh VR128X:$src, VR128X:$src)),
6730                                     (iPTR 0))), addr:$dst)]>,
6731                       EVEX, EVEX_CD8<64, CD8VT1>, VEX_W;
6732let mayStore = 1, hasSideEffects = 0 in
6733def VMOVLPSZ128mr : AVX512PSI<0x13, MRMDestMem, (outs),
6734                       (ins f64mem:$dst, VR128X:$src),
6735                       "vmovlps\t{$src, $dst|$dst, $src}",
6736                       []>, EVEX, EVEX_CD8<32, CD8VT2>;
6737def VMOVLPDZ128mr : AVX512PDI<0x13, MRMDestMem, (outs),
6738                       (ins f64mem:$dst, VR128X:$src),
6739                       "vmovlpd\t{$src, $dst|$dst, $src}",
6740                       [(store (f64 (extractelt (v2f64 VR128X:$src),
6741                                     (iPTR 0))), addr:$dst)]>,
6742                       EVEX, EVEX_CD8<64, CD8VT1>, VEX_W;
6743} // SchedRW
6744
6745let Predicates = [HasAVX512] in {
6746  // VMOVHPD patterns
6747  def : Pat<(store (f64 (extractelt
6748                           (v2f64 (X86VPermilpi VR128X:$src, (i8 1))),
6749                           (iPTR 0))), addr:$dst),
6750           (VMOVHPDZ128mr addr:$dst, VR128X:$src)>;
6751}
6752//===----------------------------------------------------------------------===//
6753// FMA - Fused Multiply Operations
6754//
6755
6756multiclass avx512_fma3p_213_rm<bits<8> opc, string OpcodeStr, SDPatternOperator OpNode,
6757                               SDNode MaskOpNode, X86FoldableSchedWrite sched,
6758                               X86VectorVTInfo _> {
6759  let Constraints = "$src1 = $dst", ExeDomain = _.ExeDomain, hasSideEffects = 0,
6760      Uses = [MXCSR], mayRaiseFPException = 1 in {
6761  defm r: AVX512_maskable_fma<opc, MRMSrcReg, _, (outs _.RC:$dst),
6762          (ins _.RC:$src2, _.RC:$src3),
6763          OpcodeStr, "$src3, $src2", "$src2, $src3",
6764          (_.VT (OpNode _.RC:$src2, _.RC:$src1, _.RC:$src3)),
6765          (_.VT (MaskOpNode _.RC:$src2, _.RC:$src1, _.RC:$src3)), 1, 1>,
6766          EVEX_4V, Sched<[sched]>;
6767
6768  defm m: AVX512_maskable_fma<opc, MRMSrcMem, _, (outs _.RC:$dst),
6769          (ins _.RC:$src2, _.MemOp:$src3),
6770          OpcodeStr, "$src3, $src2", "$src2, $src3",
6771          (_.VT (OpNode _.RC:$src2, _.RC:$src1, (_.LdFrag addr:$src3))),
6772          (_.VT (MaskOpNode _.RC:$src2, _.RC:$src1, (_.LdFrag addr:$src3))), 1, 0>,
6773          EVEX_4V, Sched<[sched.Folded, sched.ReadAfterFold]>;
6774
6775  defm mb: AVX512_maskable_fma<opc, MRMSrcMem, _, (outs _.RC:$dst),
6776            (ins _.RC:$src2, _.ScalarMemOp:$src3),
6777            OpcodeStr,   !strconcat("${src3}", _.BroadcastStr,", $src2"),
6778            !strconcat("$src2, ${src3}", _.BroadcastStr ),
6779            (OpNode _.RC:$src2,
6780             _.RC:$src1,(_.VT (_.BroadcastLdFrag addr:$src3))),
6781            (MaskOpNode _.RC:$src2,
6782             _.RC:$src1,(_.VT (_.BroadcastLdFrag addr:$src3))), 1, 0>,
6783            EVEX_4V, EVEX_B, Sched<[sched.Folded, sched.ReadAfterFold]>;
6784  }
6785}
6786
6787multiclass avx512_fma3_213_round<bits<8> opc, string OpcodeStr, SDNode OpNode,
6788                                 X86FoldableSchedWrite sched,
6789                                 X86VectorVTInfo _> {
6790  let Constraints = "$src1 = $dst", ExeDomain = _.ExeDomain, hasSideEffects = 0,
6791      Uses = [MXCSR] in
6792  defm rb: AVX512_maskable_fma<opc, MRMSrcReg, _, (outs _.RC:$dst),
6793          (ins _.RC:$src2, _.RC:$src3, AVX512RC:$rc),
6794          OpcodeStr, "$rc, $src3, $src2", "$src2, $src3, $rc",
6795          (_.VT ( OpNode _.RC:$src2, _.RC:$src1, _.RC:$src3, (i32 timm:$rc))),
6796          (_.VT ( OpNode _.RC:$src2, _.RC:$src1, _.RC:$src3, (i32 timm:$rc))), 1, 1>,
6797          EVEX_4V, EVEX_B, EVEX_RC, Sched<[sched]>;
6798}
6799
6800multiclass avx512_fma3p_213_common<bits<8> opc, string OpcodeStr, SDPatternOperator OpNode,
6801                                   SDNode MaskOpNode, SDNode OpNodeRnd,
6802                                   X86SchedWriteWidths sched,
6803                                   AVX512VLVectorVTInfo _,
6804                                   Predicate prd = HasAVX512> {
6805  let Predicates = [prd] in {
6806    defm Z      : avx512_fma3p_213_rm<opc, OpcodeStr, OpNode, MaskOpNode,
6807                                      sched.ZMM, _.info512>,
6808                  avx512_fma3_213_round<opc, OpcodeStr, OpNodeRnd, sched.ZMM,
6809                                        _.info512>,
6810                              EVEX_V512, EVEX_CD8<_.info512.EltSize, CD8VF>;
6811  }
6812  let Predicates = [HasVLX, prd] in {
6813    defm Z256 : avx512_fma3p_213_rm<opc, OpcodeStr, OpNode, MaskOpNode,
6814                                    sched.YMM, _.info256>,
6815                      EVEX_V256, EVEX_CD8<_.info256.EltSize, CD8VF>;
6816    defm Z128 : avx512_fma3p_213_rm<opc, OpcodeStr, OpNode, MaskOpNode,
6817                                    sched.XMM, _.info128>,
6818                      EVEX_V128, EVEX_CD8<_.info128.EltSize, CD8VF>;
6819  }
6820}
6821
6822multiclass avx512_fma3p_213_f<bits<8> opc, string OpcodeStr, SDPatternOperator OpNode,
6823                              SDNode MaskOpNode, SDNode OpNodeRnd> {
6824    defm PH : avx512_fma3p_213_common<opc, OpcodeStr#"ph", OpNode, MaskOpNode,
6825                                      OpNodeRnd, SchedWriteFMA,
6826                                      avx512vl_f16_info, HasFP16>, T_MAP6PD;
6827    defm PS : avx512_fma3p_213_common<opc, OpcodeStr#"ps", OpNode, MaskOpNode,
6828                                      OpNodeRnd, SchedWriteFMA,
6829                                      avx512vl_f32_info>, T8PD;
6830    defm PD : avx512_fma3p_213_common<opc, OpcodeStr#"pd", OpNode, MaskOpNode,
6831                                      OpNodeRnd, SchedWriteFMA,
6832                                      avx512vl_f64_info>, T8PD, VEX_W;
6833}
6834
6835defm VFMADD213    : avx512_fma3p_213_f<0xA8, "vfmadd213", any_fma,
6836                                       fma, X86FmaddRnd>;
6837defm VFMSUB213    : avx512_fma3p_213_f<0xAA, "vfmsub213", X86any_Fmsub,
6838                                       X86Fmsub, X86FmsubRnd>;
6839defm VFMADDSUB213 : avx512_fma3p_213_f<0xA6, "vfmaddsub213", X86Fmaddsub,
6840                                       X86Fmaddsub, X86FmaddsubRnd>;
6841defm VFMSUBADD213 : avx512_fma3p_213_f<0xA7, "vfmsubadd213", X86Fmsubadd,
6842                                       X86Fmsubadd, X86FmsubaddRnd>;
6843defm VFNMADD213   : avx512_fma3p_213_f<0xAC, "vfnmadd213", X86any_Fnmadd,
6844                                       X86Fnmadd, X86FnmaddRnd>;
6845defm VFNMSUB213   : avx512_fma3p_213_f<0xAE, "vfnmsub213", X86any_Fnmsub,
6846                                       X86Fnmsub, X86FnmsubRnd>;
6847
6848
6849multiclass avx512_fma3p_231_rm<bits<8> opc, string OpcodeStr, SDPatternOperator OpNode,
6850                               SDNode MaskOpNode, X86FoldableSchedWrite sched,
6851                               X86VectorVTInfo _> {
6852  let Constraints = "$src1 = $dst", ExeDomain = _.ExeDomain, hasSideEffects = 0,
6853      Uses = [MXCSR], mayRaiseFPException = 1 in {
6854  defm r: AVX512_maskable_fma<opc, MRMSrcReg, _, (outs _.RC:$dst),
6855          (ins _.RC:$src2, _.RC:$src3),
6856          OpcodeStr, "$src3, $src2", "$src2, $src3",
6857          (null_frag),
6858          (_.VT (MaskOpNode _.RC:$src2, _.RC:$src3, _.RC:$src1)), 1, 1>,
6859          EVEX_4V, Sched<[sched]>;
6860
6861  defm m: AVX512_maskable_fma<opc, MRMSrcMem, _, (outs _.RC:$dst),
6862          (ins _.RC:$src2, _.MemOp:$src3),
6863          OpcodeStr, "$src3, $src2", "$src2, $src3",
6864          (_.VT (OpNode _.RC:$src2, (_.LdFrag addr:$src3), _.RC:$src1)),
6865          (_.VT (MaskOpNode _.RC:$src2, (_.LdFrag addr:$src3), _.RC:$src1)), 1, 0>,
6866          EVEX_4V, Sched<[sched.Folded, sched.ReadAfterFold]>;
6867
6868  defm mb: AVX512_maskable_fma<opc, MRMSrcMem, _, (outs _.RC:$dst),
6869         (ins _.RC:$src2, _.ScalarMemOp:$src3),
6870         OpcodeStr, "${src3}"#_.BroadcastStr#", $src2",
6871         "$src2, ${src3}"#_.BroadcastStr,
6872         (_.VT (OpNode _.RC:$src2,
6873                      (_.VT (_.BroadcastLdFrag addr:$src3)),
6874                      _.RC:$src1)),
6875         (_.VT (MaskOpNode _.RC:$src2,
6876                           (_.VT (_.BroadcastLdFrag addr:$src3)),
6877                           _.RC:$src1)), 1, 0>, EVEX_4V, EVEX_B,
6878         Sched<[sched.Folded, sched.ReadAfterFold]>;
6879  }
6880}
6881
6882multiclass avx512_fma3_231_round<bits<8> opc, string OpcodeStr, SDNode OpNode,
6883                                 X86FoldableSchedWrite sched,
6884                                 X86VectorVTInfo _> {
6885  let Constraints = "$src1 = $dst", ExeDomain = _.ExeDomain, hasSideEffects = 0,
6886      Uses = [MXCSR] in
6887  defm rb: AVX512_maskable_fma<opc, MRMSrcReg, _, (outs _.RC:$dst),
6888          (ins _.RC:$src2, _.RC:$src3, AVX512RC:$rc),
6889          OpcodeStr, "$rc, $src3, $src2", "$src2, $src3, $rc",
6890          (null_frag),
6891          (_.VT (OpNode _.RC:$src2, _.RC:$src3, _.RC:$src1, (i32 timm:$rc))),
6892          1, 1>, EVEX_4V, EVEX_B, EVEX_RC, Sched<[sched]>;
6893}
6894
6895multiclass avx512_fma3p_231_common<bits<8> opc, string OpcodeStr, SDPatternOperator OpNode,
6896                                   SDNode MaskOpNode, SDNode OpNodeRnd,
6897                                   X86SchedWriteWidths sched,
6898                                   AVX512VLVectorVTInfo _,
6899                                   Predicate prd = HasAVX512> {
6900  let Predicates = [prd] in {
6901    defm Z      : avx512_fma3p_231_rm<opc, OpcodeStr, OpNode, MaskOpNode,
6902                                      sched.ZMM, _.info512>,
6903                  avx512_fma3_231_round<opc, OpcodeStr, OpNodeRnd, sched.ZMM,
6904                                        _.info512>,
6905                              EVEX_V512, EVEX_CD8<_.info512.EltSize, CD8VF>;
6906  }
6907  let Predicates = [HasVLX, prd] in {
6908    defm Z256 : avx512_fma3p_231_rm<opc, OpcodeStr, OpNode, MaskOpNode,
6909                                    sched.YMM, _.info256>,
6910                      EVEX_V256, EVEX_CD8<_.info256.EltSize, CD8VF>;
6911    defm Z128 : avx512_fma3p_231_rm<opc, OpcodeStr, OpNode, MaskOpNode,
6912                                    sched.XMM, _.info128>,
6913                      EVEX_V128, EVEX_CD8<_.info128.EltSize, CD8VF>;
6914  }
6915}
6916
6917multiclass avx512_fma3p_231_f<bits<8> opc, string OpcodeStr, SDPatternOperator OpNode,
6918                              SDNode MaskOpNode, SDNode OpNodeRnd > {
6919    defm PH : avx512_fma3p_231_common<opc, OpcodeStr#"ph", OpNode, MaskOpNode,
6920                                      OpNodeRnd, SchedWriteFMA,
6921                                      avx512vl_f16_info, HasFP16>, T_MAP6PD;
6922    defm PS : avx512_fma3p_231_common<opc, OpcodeStr#"ps", OpNode, MaskOpNode,
6923                                      OpNodeRnd, SchedWriteFMA,
6924                                      avx512vl_f32_info>, T8PD;
6925    defm PD : avx512_fma3p_231_common<opc, OpcodeStr#"pd", OpNode, MaskOpNode,
6926                                      OpNodeRnd, SchedWriteFMA,
6927                                      avx512vl_f64_info>, T8PD, VEX_W;
6928}
6929
6930defm VFMADD231    : avx512_fma3p_231_f<0xB8, "vfmadd231", any_fma,
6931                                       fma, X86FmaddRnd>;
6932defm VFMSUB231    : avx512_fma3p_231_f<0xBA, "vfmsub231", X86any_Fmsub,
6933                                       X86Fmsub, X86FmsubRnd>;
6934defm VFMADDSUB231 : avx512_fma3p_231_f<0xB6, "vfmaddsub231", X86Fmaddsub,
6935                                       X86Fmaddsub, X86FmaddsubRnd>;
6936defm VFMSUBADD231 : avx512_fma3p_231_f<0xB7, "vfmsubadd231", X86Fmsubadd,
6937                                       X86Fmsubadd, X86FmsubaddRnd>;
6938defm VFNMADD231   : avx512_fma3p_231_f<0xBC, "vfnmadd231", X86any_Fnmadd,
6939                                       X86Fnmadd, X86FnmaddRnd>;
6940defm VFNMSUB231   : avx512_fma3p_231_f<0xBE, "vfnmsub231", X86any_Fnmsub,
6941                                       X86Fnmsub, X86FnmsubRnd>;
6942
6943multiclass avx512_fma3p_132_rm<bits<8> opc, string OpcodeStr, SDPatternOperator OpNode,
6944                               SDNode MaskOpNode, X86FoldableSchedWrite sched,
6945                               X86VectorVTInfo _> {
6946  let Constraints = "$src1 = $dst", ExeDomain = _.ExeDomain, hasSideEffects = 0,
6947      Uses = [MXCSR], mayRaiseFPException = 1 in {
6948  defm r: AVX512_maskable_fma<opc, MRMSrcReg, _, (outs _.RC:$dst),
6949          (ins _.RC:$src2, _.RC:$src3),
6950          OpcodeStr, "$src3, $src2", "$src2, $src3",
6951          (null_frag),
6952          (_.VT (MaskOpNode _.RC:$src1, _.RC:$src3, _.RC:$src2)), 1, 1>,
6953          EVEX_4V, Sched<[sched]>;
6954
6955  // Pattern is 312 order so that the load is in a different place from the
6956  // 213 and 231 patterns this helps tablegen's duplicate pattern detection.
6957  defm m: AVX512_maskable_fma<opc, MRMSrcMem, _, (outs _.RC:$dst),
6958          (ins _.RC:$src2, _.MemOp:$src3),
6959          OpcodeStr, "$src3, $src2", "$src2, $src3",
6960          (_.VT (OpNode (_.LdFrag addr:$src3), _.RC:$src1, _.RC:$src2)),
6961          (_.VT (MaskOpNode (_.LdFrag addr:$src3), _.RC:$src1, _.RC:$src2)), 1, 0>,
6962          EVEX_4V, Sched<[sched.Folded, sched.ReadAfterFold]>;
6963
6964  // Pattern is 312 order so that the load is in a different place from the
6965  // 213 and 231 patterns this helps tablegen's duplicate pattern detection.
6966  defm mb: AVX512_maskable_fma<opc, MRMSrcMem, _, (outs _.RC:$dst),
6967         (ins _.RC:$src2, _.ScalarMemOp:$src3),
6968         OpcodeStr, "${src3}"#_.BroadcastStr#", $src2",
6969         "$src2, ${src3}"#_.BroadcastStr,
6970         (_.VT (OpNode (_.VT (_.BroadcastLdFrag addr:$src3)),
6971                       _.RC:$src1, _.RC:$src2)),
6972         (_.VT (MaskOpNode (_.VT (_.BroadcastLdFrag addr:$src3)),
6973                           _.RC:$src1, _.RC:$src2)), 1, 0>,
6974         EVEX_4V, EVEX_B, Sched<[sched.Folded, sched.ReadAfterFold]>;
6975  }
6976}
6977
6978multiclass avx512_fma3_132_round<bits<8> opc, string OpcodeStr, SDNode OpNode,
6979                                 X86FoldableSchedWrite sched,
6980                                 X86VectorVTInfo _> {
6981  let Constraints = "$src1 = $dst", ExeDomain = _.ExeDomain, hasSideEffects = 0,
6982      Uses = [MXCSR] in
6983  defm rb: AVX512_maskable_fma<opc, MRMSrcReg, _, (outs _.RC:$dst),
6984          (ins _.RC:$src2, _.RC:$src3, AVX512RC:$rc),
6985          OpcodeStr, "$rc, $src3, $src2", "$src2, $src3, $rc",
6986          (null_frag),
6987          (_.VT (OpNode _.RC:$src1, _.RC:$src3, _.RC:$src2, (i32 timm:$rc))),
6988          1, 1>, EVEX_4V, EVEX_B, EVEX_RC, Sched<[sched]>;
6989}
6990
6991multiclass avx512_fma3p_132_common<bits<8> opc, string OpcodeStr, SDPatternOperator OpNode,
6992                                   SDNode MaskOpNode, SDNode OpNodeRnd,
6993                                   X86SchedWriteWidths sched,
6994                                   AVX512VLVectorVTInfo _,
6995                                   Predicate prd = HasAVX512> {
6996  let Predicates = [prd] in {
6997    defm Z      : avx512_fma3p_132_rm<opc, OpcodeStr, OpNode, MaskOpNode,
6998                                      sched.ZMM, _.info512>,
6999                  avx512_fma3_132_round<opc, OpcodeStr, OpNodeRnd, sched.ZMM,
7000                                        _.info512>,
7001                              EVEX_V512, EVEX_CD8<_.info512.EltSize, CD8VF>;
7002  }
7003  let Predicates = [HasVLX, prd] in {
7004    defm Z256 : avx512_fma3p_132_rm<opc, OpcodeStr, OpNode, MaskOpNode,
7005                                    sched.YMM, _.info256>,
7006                      EVEX_V256, EVEX_CD8<_.info256.EltSize, CD8VF>;
7007    defm Z128 : avx512_fma3p_132_rm<opc, OpcodeStr, OpNode, MaskOpNode,
7008                                    sched.XMM, _.info128>,
7009                      EVEX_V128, EVEX_CD8<_.info128.EltSize, CD8VF>;
7010  }
7011}
7012
7013multiclass avx512_fma3p_132_f<bits<8> opc, string OpcodeStr, SDPatternOperator OpNode,
7014                              SDNode MaskOpNode, SDNode OpNodeRnd > {
7015    defm PH : avx512_fma3p_132_common<opc, OpcodeStr#"ph", OpNode, MaskOpNode,
7016                                      OpNodeRnd, SchedWriteFMA,
7017                                      avx512vl_f16_info, HasFP16>, T_MAP6PD;
7018    defm PS : avx512_fma3p_132_common<opc, OpcodeStr#"ps", OpNode, MaskOpNode,
7019                                      OpNodeRnd, SchedWriteFMA,
7020                                      avx512vl_f32_info>, T8PD;
7021    defm PD : avx512_fma3p_132_common<opc, OpcodeStr#"pd", OpNode, MaskOpNode,
7022                                      OpNodeRnd, SchedWriteFMA,
7023                                      avx512vl_f64_info>, T8PD, VEX_W;
7024}
7025
7026defm VFMADD132    : avx512_fma3p_132_f<0x98, "vfmadd132", any_fma,
7027                                       fma, X86FmaddRnd>;
7028defm VFMSUB132    : avx512_fma3p_132_f<0x9A, "vfmsub132", X86any_Fmsub,
7029                                       X86Fmsub, X86FmsubRnd>;
7030defm VFMADDSUB132 : avx512_fma3p_132_f<0x96, "vfmaddsub132", X86Fmaddsub,
7031                                       X86Fmaddsub, X86FmaddsubRnd>;
7032defm VFMSUBADD132 : avx512_fma3p_132_f<0x97, "vfmsubadd132", X86Fmsubadd,
7033                                       X86Fmsubadd, X86FmsubaddRnd>;
7034defm VFNMADD132   : avx512_fma3p_132_f<0x9C, "vfnmadd132", X86any_Fnmadd,
7035                                       X86Fnmadd, X86FnmaddRnd>;
7036defm VFNMSUB132   : avx512_fma3p_132_f<0x9E, "vfnmsub132", X86any_Fnmsub,
7037                                       X86Fnmsub, X86FnmsubRnd>;
7038
7039// Scalar FMA
7040multiclass avx512_fma3s_common<bits<8> opc, string OpcodeStr, X86VectorVTInfo _,
7041                               dag RHS_r, dag RHS_m, dag RHS_b, bit MaskOnlyReg> {
7042let Constraints = "$src1 = $dst", hasSideEffects = 0 in {
7043  defm r_Int: AVX512_maskable_3src_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
7044          (ins _.RC:$src2, _.RC:$src3), OpcodeStr,
7045          "$src3, $src2", "$src2, $src3", (null_frag), 1, 1>,
7046          EVEX_4V, Sched<[SchedWriteFMA.Scl]>, SIMD_EXC;
7047
7048  let mayLoad = 1 in
7049  defm m_Int: AVX512_maskable_3src_scalar<opc, MRMSrcMem, _, (outs _.RC:$dst),
7050          (ins _.RC:$src2, _.IntScalarMemOp:$src3), OpcodeStr,
7051          "$src3, $src2", "$src2, $src3", (null_frag), 1, 1>,
7052          EVEX_4V, Sched<[SchedWriteFMA.Scl.Folded, SchedWriteFMA.Scl.ReadAfterFold]>, SIMD_EXC;
7053
7054  let Uses = [MXCSR] in
7055  defm rb_Int: AVX512_maskable_3src_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
7056         (ins _.RC:$src2, _.RC:$src3, AVX512RC:$rc),
7057         OpcodeStr, "$rc, $src3, $src2", "$src2, $src3, $rc", (null_frag), 1, 1>,
7058         EVEX_4V, EVEX_B, EVEX_RC, Sched<[SchedWriteFMA.Scl]>;
7059
7060  let isCodeGenOnly = 1, isCommutable = 1 in {
7061    def r     : AVX512<opc, MRMSrcReg, (outs _.FRC:$dst),
7062                     (ins _.FRC:$src1, _.FRC:$src2, _.FRC:$src3),
7063                     !strconcat(OpcodeStr,
7064                              "\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
7065                     !if(MaskOnlyReg, [], [RHS_r])>, Sched<[SchedWriteFMA.Scl]>, EVEX_4V, SIMD_EXC;
7066    def m     : AVX512<opc, MRMSrcMem, (outs _.FRC:$dst),
7067                    (ins _.FRC:$src1, _.FRC:$src2, _.ScalarMemOp:$src3),
7068                    !strconcat(OpcodeStr,
7069                               "\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
7070                    [RHS_m]>, Sched<[SchedWriteFMA.Scl.Folded, SchedWriteFMA.Scl.ReadAfterFold]>, EVEX_4V, SIMD_EXC;
7071
7072    let Uses = [MXCSR] in
7073    def rb    : AVX512<opc, MRMSrcReg, (outs _.FRC:$dst),
7074                     (ins _.FRC:$src1, _.FRC:$src2, _.FRC:$src3, AVX512RC:$rc),
7075                     !strconcat(OpcodeStr,
7076                              "\t{$rc, $src3, $src2, $dst|$dst, $src2, $src3, $rc}"),
7077                     !if(MaskOnlyReg, [], [RHS_b])>, EVEX_B, EVEX_RC,
7078                     Sched<[SchedWriteFMA.Scl]>, EVEX_4V;
7079  }// isCodeGenOnly = 1
7080}// Constraints = "$src1 = $dst"
7081}
7082
7083multiclass avx512_fma3s_all<bits<8> opc213, bits<8> opc231, bits<8> opc132,
7084                            string OpcodeStr, SDPatternOperator OpNode, SDNode OpNodeRnd,
7085                            X86VectorVTInfo _, string SUFF> {
7086  let ExeDomain = _.ExeDomain in {
7087  defm NAME#213#SUFF#Z: avx512_fma3s_common<opc213, OpcodeStr#"213"#_.Suffix, _,
7088                // Operands for intrinsic are in 123 order to preserve passthu
7089                // semantics.
7090                (set _.FRC:$dst, (_.EltVT (OpNode _.FRC:$src2, _.FRC:$src1,
7091                         _.FRC:$src3))),
7092                (set _.FRC:$dst, (_.EltVT (OpNode _.FRC:$src2, _.FRC:$src1,
7093                         (_.ScalarLdFrag addr:$src3)))),
7094                (set _.FRC:$dst, (_.EltVT (OpNodeRnd _.FRC:$src2, _.FRC:$src1,
7095                         _.FRC:$src3, (i32 timm:$rc)))), 0>;
7096
7097  defm NAME#231#SUFF#Z: avx512_fma3s_common<opc231, OpcodeStr#"231"#_.Suffix, _,
7098                (set _.FRC:$dst, (_.EltVT (OpNode _.FRC:$src2, _.FRC:$src3,
7099                                          _.FRC:$src1))),
7100                (set _.FRC:$dst, (_.EltVT (OpNode _.FRC:$src2,
7101                            (_.ScalarLdFrag addr:$src3), _.FRC:$src1))),
7102                (set _.FRC:$dst, (_.EltVT (OpNodeRnd _.FRC:$src2, _.FRC:$src3,
7103                         _.FRC:$src1, (i32 timm:$rc)))), 1>;
7104
7105  // One pattern is 312 order so that the load is in a different place from the
7106  // 213 and 231 patterns this helps tablegen's duplicate pattern detection.
7107  defm NAME#132#SUFF#Z: avx512_fma3s_common<opc132, OpcodeStr#"132"#_.Suffix, _,
7108                (set _.FRC:$dst, (_.EltVT (OpNode _.FRC:$src1, _.FRC:$src3,
7109                         _.FRC:$src2))),
7110                (set _.FRC:$dst, (_.EltVT (OpNode (_.ScalarLdFrag addr:$src3),
7111                                 _.FRC:$src1, _.FRC:$src2))),
7112                (set _.FRC:$dst, (_.EltVT (OpNodeRnd _.FRC:$src1, _.FRC:$src3,
7113                         _.FRC:$src2, (i32 timm:$rc)))), 1>;
7114  }
7115}
7116
7117multiclass avx512_fma3s<bits<8> opc213, bits<8> opc231, bits<8> opc132,
7118                        string OpcodeStr, SDPatternOperator OpNode, SDNode OpNodeRnd> {
7119  let Predicates = [HasAVX512] in {
7120    defm NAME : avx512_fma3s_all<opc213, opc231, opc132, OpcodeStr, OpNode,
7121                                 OpNodeRnd, f32x_info, "SS">,
7122                                 EVEX_CD8<32, CD8VT1>, VEX_LIG, T8PD;
7123    defm NAME : avx512_fma3s_all<opc213, opc231, opc132, OpcodeStr, OpNode,
7124                                 OpNodeRnd, f64x_info, "SD">,
7125                                 EVEX_CD8<64, CD8VT1>, VEX_LIG, VEX_W, T8PD;
7126  }
7127  let Predicates = [HasFP16] in {
7128    defm NAME : avx512_fma3s_all<opc213, opc231, opc132, OpcodeStr, OpNode,
7129                                 OpNodeRnd, f16x_info, "SH">,
7130                                 EVEX_CD8<16, CD8VT1>, VEX_LIG, T_MAP6PD;
7131  }
7132}
7133
7134defm VFMADD  : avx512_fma3s<0xA9, 0xB9, 0x99, "vfmadd", any_fma, X86FmaddRnd>;
7135defm VFMSUB  : avx512_fma3s<0xAB, 0xBB, 0x9B, "vfmsub", X86any_Fmsub, X86FmsubRnd>;
7136defm VFNMADD : avx512_fma3s<0xAD, 0xBD, 0x9D, "vfnmadd", X86any_Fnmadd, X86FnmaddRnd>;
7137defm VFNMSUB : avx512_fma3s<0xAF, 0xBF, 0x9F, "vfnmsub", X86any_Fnmsub, X86FnmsubRnd>;
7138
7139multiclass avx512_scalar_fma_patterns<SDPatternOperator Op, SDNode MaskedOp,
7140                                      SDNode RndOp, string Prefix,
7141                                      string Suffix, SDNode Move,
7142                                      X86VectorVTInfo _, PatLeaf ZeroFP,
7143                                      Predicate prd = HasAVX512> {
7144  let Predicates = [prd] in {
7145    def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector
7146                (Op _.FRC:$src2,
7147                    (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0))),
7148                    _.FRC:$src3))))),
7149              (!cast<I>(Prefix#"213"#Suffix#"Zr_Int")
7150               VR128X:$src1, (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)),
7151               (_.VT (COPY_TO_REGCLASS _.FRC:$src3, VR128X)))>;
7152
7153    def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector
7154                (Op _.FRC:$src2, _.FRC:$src3,
7155                    (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0)))))))),
7156              (!cast<I>(Prefix#"231"#Suffix#"Zr_Int")
7157               VR128X:$src1, (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)),
7158               (_.VT (COPY_TO_REGCLASS _.FRC:$src3, VR128X)))>;
7159
7160    def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector
7161                (Op _.FRC:$src2,
7162                    (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0))),
7163                    (_.ScalarLdFrag addr:$src3)))))),
7164              (!cast<I>(Prefix#"213"#Suffix#"Zm_Int")
7165               VR128X:$src1, (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)),
7166               addr:$src3)>;
7167
7168    def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector
7169                (Op (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0))),
7170                    (_.ScalarLdFrag addr:$src3), _.FRC:$src2))))),
7171              (!cast<I>(Prefix#"132"#Suffix#"Zm_Int")
7172               VR128X:$src1, (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)),
7173               addr:$src3)>;
7174
7175    def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector
7176                (Op _.FRC:$src2, (_.ScalarLdFrag addr:$src3),
7177                    (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0)))))))),
7178              (!cast<I>(Prefix#"231"#Suffix#"Zm_Int")
7179               VR128X:$src1, (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)),
7180               addr:$src3)>;
7181
7182    def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector
7183               (X86selects_mask VK1WM:$mask,
7184                (MaskedOp _.FRC:$src2,
7185                    (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0))),
7186                    _.FRC:$src3),
7187                (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0)))))))),
7188              (!cast<I>(Prefix#"213"#Suffix#"Zr_Intk")
7189               VR128X:$src1, VK1WM:$mask,
7190               (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)),
7191               (_.VT (COPY_TO_REGCLASS _.FRC:$src3, VR128X)))>;
7192
7193    def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector
7194               (X86selects_mask VK1WM:$mask,
7195                (MaskedOp _.FRC:$src2,
7196                    (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0))),
7197                    (_.ScalarLdFrag addr:$src3)),
7198                (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0)))))))),
7199              (!cast<I>(Prefix#"213"#Suffix#"Zm_Intk")
7200               VR128X:$src1, VK1WM:$mask,
7201               (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)), addr:$src3)>;
7202
7203    def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector
7204               (X86selects_mask VK1WM:$mask,
7205                (MaskedOp (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0))),
7206                          (_.ScalarLdFrag addr:$src3), _.FRC:$src2),
7207                (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0)))))))),
7208              (!cast<I>(Prefix#"132"#Suffix#"Zm_Intk")
7209               VR128X:$src1, VK1WM:$mask,
7210               (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)), addr:$src3)>;
7211
7212    def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector
7213               (X86selects_mask VK1WM:$mask,
7214                (MaskedOp _.FRC:$src2, _.FRC:$src3,
7215                          (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0)))),
7216                (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0)))))))),
7217              (!cast<I>(Prefix#"231"#Suffix#"Zr_Intk")
7218               VR128X:$src1, VK1WM:$mask,
7219               (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)),
7220               (_.VT (COPY_TO_REGCLASS _.FRC:$src3, VR128X)))>;
7221
7222    def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector
7223               (X86selects_mask VK1WM:$mask,
7224                (MaskedOp _.FRC:$src2, (_.ScalarLdFrag addr:$src3),
7225                          (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0)))),
7226                (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0)))))))),
7227              (!cast<I>(Prefix#"231"#Suffix#"Zm_Intk")
7228               VR128X:$src1, VK1WM:$mask,
7229               (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)), addr:$src3)>;
7230
7231    def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector
7232               (X86selects_mask VK1WM:$mask,
7233                (MaskedOp _.FRC:$src2,
7234                          (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0))),
7235                          _.FRC:$src3),
7236                (_.EltVT ZeroFP)))))),
7237              (!cast<I>(Prefix#"213"#Suffix#"Zr_Intkz")
7238               VR128X:$src1, VK1WM:$mask,
7239               (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)),
7240               (_.VT (COPY_TO_REGCLASS _.FRC:$src3, VR128X)))>;
7241
7242    def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector
7243               (X86selects_mask VK1WM:$mask,
7244                (MaskedOp _.FRC:$src2, _.FRC:$src3,
7245                          (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0)))),
7246                (_.EltVT ZeroFP)))))),
7247              (!cast<I>(Prefix#"231"#Suffix#"Zr_Intkz")
7248               VR128X:$src1, VK1WM:$mask,
7249               (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)),
7250               (_.VT (COPY_TO_REGCLASS _.FRC:$src3, VR128X)))>;
7251
7252    def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector
7253               (X86selects_mask VK1WM:$mask,
7254                (MaskedOp _.FRC:$src2,
7255                          (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0))),
7256                          (_.ScalarLdFrag addr:$src3)),
7257                (_.EltVT ZeroFP)))))),
7258              (!cast<I>(Prefix#"213"#Suffix#"Zm_Intkz")
7259               VR128X:$src1, VK1WM:$mask,
7260               (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)), addr:$src3)>;
7261
7262    def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector
7263               (X86selects_mask VK1WM:$mask,
7264                (MaskedOp (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0))),
7265                          _.FRC:$src2, (_.ScalarLdFrag addr:$src3)),
7266                (_.EltVT ZeroFP)))))),
7267              (!cast<I>(Prefix#"132"#Suffix#"Zm_Intkz")
7268               VR128X:$src1, VK1WM:$mask,
7269               (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)), addr:$src3)>;
7270
7271    def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector
7272               (X86selects_mask VK1WM:$mask,
7273                (MaskedOp _.FRC:$src2, (_.ScalarLdFrag addr:$src3),
7274                          (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0)))),
7275                (_.EltVT ZeroFP)))))),
7276              (!cast<I>(Prefix#"231"#Suffix#"Zm_Intkz")
7277               VR128X:$src1, VK1WM:$mask,
7278               (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)), addr:$src3)>;
7279
7280    // Patterns with rounding mode.
7281    def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector
7282                (RndOp _.FRC:$src2,
7283                       (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0))),
7284                       _.FRC:$src3, (i32 timm:$rc)))))),
7285              (!cast<I>(Prefix#"213"#Suffix#"Zrb_Int")
7286               VR128X:$src1, (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)),
7287               (_.VT (COPY_TO_REGCLASS _.FRC:$src3, VR128X)), AVX512RC:$rc)>;
7288
7289    def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector
7290                (RndOp _.FRC:$src2, _.FRC:$src3,
7291                       (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0))),
7292                       (i32 timm:$rc)))))),
7293              (!cast<I>(Prefix#"231"#Suffix#"Zrb_Int")
7294               VR128X:$src1, (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)),
7295               (_.VT (COPY_TO_REGCLASS _.FRC:$src3, VR128X)), AVX512RC:$rc)>;
7296
7297    def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector
7298               (X86selects_mask VK1WM:$mask,
7299                (RndOp _.FRC:$src2,
7300                       (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0))),
7301                       _.FRC:$src3, (i32 timm:$rc)),
7302                (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0)))))))),
7303              (!cast<I>(Prefix#"213"#Suffix#"Zrb_Intk")
7304               VR128X:$src1, VK1WM:$mask,
7305               (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)),
7306               (_.VT (COPY_TO_REGCLASS _.FRC:$src3, VR128X)), AVX512RC:$rc)>;
7307
7308    def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector
7309               (X86selects_mask VK1WM:$mask,
7310                (RndOp _.FRC:$src2, _.FRC:$src3,
7311                       (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0))),
7312                       (i32 timm:$rc)),
7313                (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0)))))))),
7314              (!cast<I>(Prefix#"231"#Suffix#"Zrb_Intk")
7315               VR128X:$src1, VK1WM:$mask,
7316               (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)),
7317               (_.VT (COPY_TO_REGCLASS _.FRC:$src3, VR128X)), AVX512RC:$rc)>;
7318
7319    def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector
7320               (X86selects_mask VK1WM:$mask,
7321                (RndOp _.FRC:$src2,
7322                       (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0))),
7323                       _.FRC:$src3, (i32 timm:$rc)),
7324                (_.EltVT ZeroFP)))))),
7325              (!cast<I>(Prefix#"213"#Suffix#"Zrb_Intkz")
7326               VR128X:$src1, VK1WM:$mask,
7327               (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)),
7328               (_.VT (COPY_TO_REGCLASS _.FRC:$src3, VR128X)), AVX512RC:$rc)>;
7329
7330    def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector
7331               (X86selects_mask VK1WM:$mask,
7332                (RndOp _.FRC:$src2, _.FRC:$src3,
7333                       (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0))),
7334                       (i32 timm:$rc)),
7335                (_.EltVT ZeroFP)))))),
7336              (!cast<I>(Prefix#"231"#Suffix#"Zrb_Intkz")
7337               VR128X:$src1, VK1WM:$mask,
7338               (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)),
7339               (_.VT (COPY_TO_REGCLASS _.FRC:$src3, VR128X)), AVX512RC:$rc)>;
7340  }
7341}
7342defm : avx512_scalar_fma_patterns<any_fma, fma, X86FmaddRnd, "VFMADD", "SH",
7343                                  X86Movsh, v8f16x_info, fp16imm0, HasFP16>;
7344defm : avx512_scalar_fma_patterns<X86any_Fmsub, X86Fmsub, X86FmsubRnd, "VFMSUB", "SH",
7345                                  X86Movsh, v8f16x_info, fp16imm0, HasFP16>;
7346defm : avx512_scalar_fma_patterns<X86any_Fnmadd, X86Fnmadd, X86FnmaddRnd, "VFNMADD", "SH",
7347                                  X86Movsh, v8f16x_info, fp16imm0, HasFP16>;
7348defm : avx512_scalar_fma_patterns<X86any_Fnmsub, X86Fnmsub, X86FnmsubRnd, "VFNMSUB", "SH",
7349                                  X86Movsh, v8f16x_info, fp16imm0, HasFP16>;
7350
7351defm : avx512_scalar_fma_patterns<any_fma, fma, X86FmaddRnd, "VFMADD",
7352                                  "SS", X86Movss, v4f32x_info, fp32imm0>;
7353defm : avx512_scalar_fma_patterns<X86any_Fmsub, X86Fmsub, X86FmsubRnd, "VFMSUB",
7354                                  "SS", X86Movss, v4f32x_info, fp32imm0>;
7355defm : avx512_scalar_fma_patterns<X86any_Fnmadd, X86Fnmadd, X86FnmaddRnd, "VFNMADD",
7356                                  "SS", X86Movss, v4f32x_info, fp32imm0>;
7357defm : avx512_scalar_fma_patterns<X86any_Fnmsub, X86Fnmsub, X86FnmsubRnd, "VFNMSUB",
7358                                  "SS", X86Movss, v4f32x_info, fp32imm0>;
7359
7360defm : avx512_scalar_fma_patterns<any_fma, fma, X86FmaddRnd, "VFMADD",
7361                                  "SD", X86Movsd, v2f64x_info, fp64imm0>;
7362defm : avx512_scalar_fma_patterns<X86any_Fmsub, X86Fmsub, X86FmsubRnd, "VFMSUB",
7363                                  "SD", X86Movsd, v2f64x_info, fp64imm0>;
7364defm : avx512_scalar_fma_patterns<X86any_Fnmadd, X86Fnmadd, X86FnmaddRnd, "VFNMADD",
7365                                  "SD", X86Movsd, v2f64x_info, fp64imm0>;
7366defm : avx512_scalar_fma_patterns<X86any_Fnmsub, X86Fnmsub, X86FnmsubRnd, "VFNMSUB",
7367                                  "SD", X86Movsd, v2f64x_info, fp64imm0>;
7368
7369//===----------------------------------------------------------------------===//
7370// AVX-512  Packed Multiply of Unsigned 52-bit Integers and Add the Low 52-bit IFMA
7371//===----------------------------------------------------------------------===//
7372let Constraints = "$src1 = $dst" in {
7373multiclass avx512_pmadd52_rm<bits<8> opc, string OpcodeStr, SDNode OpNode,
7374                             X86FoldableSchedWrite sched, X86VectorVTInfo _> {
7375  // NOTE: The SDNode have the multiply operands first with the add last.
7376  // This enables commuted load patterns to be autogenerated by tablegen.
7377  let ExeDomain = _.ExeDomain in {
7378  defm r: AVX512_maskable_3src<opc, MRMSrcReg, _, (outs _.RC:$dst),
7379          (ins _.RC:$src2, _.RC:$src3),
7380          OpcodeStr, "$src3, $src2", "$src2, $src3",
7381          (_.VT (OpNode _.RC:$src2, _.RC:$src3, _.RC:$src1)), 1, 1>,
7382          T8PD, EVEX_4V, Sched<[sched]>;
7383
7384  defm m: AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst),
7385          (ins _.RC:$src2, _.MemOp:$src3),
7386          OpcodeStr, "$src3, $src2", "$src2, $src3",
7387          (_.VT (OpNode _.RC:$src2, (_.LdFrag addr:$src3), _.RC:$src1))>,
7388          T8PD, EVEX_4V, Sched<[sched.Folded, sched.ReadAfterFold]>;
7389
7390  defm mb: AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst),
7391            (ins _.RC:$src2, _.ScalarMemOp:$src3),
7392            OpcodeStr,   !strconcat("${src3}", _.BroadcastStr,", $src2"),
7393            !strconcat("$src2, ${src3}", _.BroadcastStr ),
7394            (OpNode _.RC:$src2,
7395                    (_.VT (_.BroadcastLdFrag addr:$src3)),
7396                    _.RC:$src1)>,
7397            T8PD, EVEX_4V, EVEX_B, Sched<[sched.Folded, sched.ReadAfterFold]>;
7398  }
7399}
7400} // Constraints = "$src1 = $dst"
7401
7402multiclass avx512_pmadd52_common<bits<8> opc, string OpcodeStr, SDNode OpNode,
7403                                 X86SchedWriteWidths sched, AVX512VLVectorVTInfo _> {
7404  let Predicates = [HasIFMA] in {
7405    defm Z      : avx512_pmadd52_rm<opc, OpcodeStr, OpNode, sched.ZMM, _.info512>,
7406                      EVEX_V512, EVEX_CD8<_.info512.EltSize, CD8VF>;
7407  }
7408  let Predicates = [HasVLX, HasIFMA] in {
7409    defm Z256 : avx512_pmadd52_rm<opc, OpcodeStr, OpNode, sched.YMM, _.info256>,
7410                      EVEX_V256, EVEX_CD8<_.info256.EltSize, CD8VF>;
7411    defm Z128 : avx512_pmadd52_rm<opc, OpcodeStr, OpNode, sched.XMM, _.info128>,
7412                      EVEX_V128, EVEX_CD8<_.info128.EltSize, CD8VF>;
7413  }
7414}
7415
7416defm VPMADD52LUQ : avx512_pmadd52_common<0xb4, "vpmadd52luq", x86vpmadd52l,
7417                                         SchedWriteVecIMul, avx512vl_i64_info>,
7418                                         VEX_W;
7419defm VPMADD52HUQ : avx512_pmadd52_common<0xb5, "vpmadd52huq", x86vpmadd52h,
7420                                         SchedWriteVecIMul, avx512vl_i64_info>,
7421                                         VEX_W;
7422
7423//===----------------------------------------------------------------------===//
7424// AVX-512  Scalar convert from sign integer to float/double
7425//===----------------------------------------------------------------------===//
7426
7427multiclass avx512_vcvtsi<bits<8> opc, SDPatternOperator OpNode, X86FoldableSchedWrite sched,
7428                    RegisterClass SrcRC, X86VectorVTInfo DstVT,
7429                    X86MemOperand x86memop, PatFrag ld_frag, string asm,
7430                    string mem, list<Register> _Uses = [MXCSR],
7431                    bit _mayRaiseFPException = 1> {
7432let ExeDomain = DstVT.ExeDomain, Uses = _Uses,
7433    mayRaiseFPException = _mayRaiseFPException in {
7434  let hasSideEffects = 0, isCodeGenOnly = 1 in {
7435    def rr : SI<opc, MRMSrcReg, (outs DstVT.FRC:$dst),
7436              (ins DstVT.FRC:$src1, SrcRC:$src),
7437              !strconcat(asm,"\t{$src, $src1, $dst|$dst, $src1, $src}"), []>,
7438              EVEX_4V, Sched<[sched, ReadDefault, ReadInt2Fpu]>;
7439    let mayLoad = 1 in
7440      def rm : SI<opc, MRMSrcMem, (outs DstVT.FRC:$dst),
7441              (ins DstVT.FRC:$src1, x86memop:$src),
7442              asm#"{"#mem#"}\t{$src, $src1, $dst|$dst, $src1, $src}", []>,
7443              EVEX_4V, Sched<[sched.Folded, sched.ReadAfterFold]>;
7444  } // hasSideEffects = 0
7445  def rr_Int : SI<opc, MRMSrcReg, (outs DstVT.RC:$dst),
7446                (ins DstVT.RC:$src1, SrcRC:$src2),
7447                !strconcat(asm,"\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
7448                [(set DstVT.RC:$dst,
7449                      (OpNode (DstVT.VT DstVT.RC:$src1), SrcRC:$src2))]>,
7450               EVEX_4V, Sched<[sched, ReadDefault, ReadInt2Fpu]>;
7451
7452  def rm_Int : SI<opc, MRMSrcMem, (outs DstVT.RC:$dst),
7453                (ins DstVT.RC:$src1, x86memop:$src2),
7454                asm#"{"#mem#"}\t{$src2, $src1, $dst|$dst, $src1, $src2}",
7455                [(set DstVT.RC:$dst,
7456                      (OpNode (DstVT.VT DstVT.RC:$src1),
7457                               (ld_frag addr:$src2)))]>,
7458                EVEX_4V, Sched<[sched.Folded, sched.ReadAfterFold]>;
7459}
7460  def : InstAlias<"v"#asm#mem#"\t{$src2, $src1, $dst|$dst, $src1, $src2}",
7461                  (!cast<Instruction>(NAME#"rr_Int") DstVT.RC:$dst,
7462                  DstVT.RC:$src1, SrcRC:$src2), 0, "att">;
7463}
7464
7465multiclass avx512_vcvtsi_round<bits<8> opc, SDNode OpNode,
7466                               X86FoldableSchedWrite sched, RegisterClass SrcRC,
7467                               X86VectorVTInfo DstVT, string asm,
7468                               string mem> {
7469  let ExeDomain = DstVT.ExeDomain, Uses = [MXCSR] in
7470  def rrb_Int : SI<opc, MRMSrcReg, (outs DstVT.RC:$dst),
7471              (ins DstVT.RC:$src1, SrcRC:$src2, AVX512RC:$rc),
7472              !strconcat(asm,
7473                  "\t{$src2, $rc, $src1, $dst|$dst, $src1, $rc, $src2}"),
7474              [(set DstVT.RC:$dst,
7475                    (OpNode (DstVT.VT DstVT.RC:$src1),
7476                             SrcRC:$src2,
7477                             (i32 timm:$rc)))]>,
7478              EVEX_4V, EVEX_B, EVEX_RC, Sched<[sched, ReadDefault, ReadInt2Fpu]>;
7479  def : InstAlias<"v"#asm#mem#"\t{$src2, $rc, $src1, $dst|$dst, $src1, $rc, $src2}",
7480                  (!cast<Instruction>(NAME#"rrb_Int") DstVT.RC:$dst,
7481                  DstVT.RC:$src1, SrcRC:$src2, AVX512RC:$rc), 0, "att">;
7482}
7483
7484multiclass avx512_vcvtsi_common<bits<8> opc, SDNode OpNode, SDNode OpNodeRnd,
7485                                X86FoldableSchedWrite sched,
7486                                RegisterClass SrcRC, X86VectorVTInfo DstVT,
7487                                X86MemOperand x86memop, PatFrag ld_frag,
7488                                string asm, string mem> {
7489  defm NAME : avx512_vcvtsi_round<opc, OpNodeRnd, sched, SrcRC, DstVT, asm, mem>,
7490              avx512_vcvtsi<opc, OpNode, sched, SrcRC, DstVT, x86memop,
7491                            ld_frag, asm, mem>, VEX_LIG;
7492}
7493
7494let Predicates = [HasAVX512] in {
7495defm VCVTSI2SSZ  : avx512_vcvtsi_common<0x2A, X86SintToFp, X86SintToFpRnd,
7496                                 WriteCvtI2SS, GR32,
7497                                 v4f32x_info, i32mem, loadi32, "cvtsi2ss", "l">,
7498                                 XS, EVEX_CD8<32, CD8VT1>;
7499defm VCVTSI642SSZ: avx512_vcvtsi_common<0x2A, X86SintToFp, X86SintToFpRnd,
7500                                 WriteCvtI2SS, GR64,
7501                                 v4f32x_info, i64mem, loadi64, "cvtsi2ss", "q">,
7502                                 XS, VEX_W, EVEX_CD8<64, CD8VT1>;
7503defm VCVTSI2SDZ  : avx512_vcvtsi<0x2A, null_frag, WriteCvtI2SD, GR32,
7504                                 v2f64x_info, i32mem, loadi32, "cvtsi2sd", "l", [], 0>,
7505                                 XD, VEX_LIG, EVEX_CD8<32, CD8VT1>;
7506defm VCVTSI642SDZ: avx512_vcvtsi_common<0x2A, X86SintToFp, X86SintToFpRnd,
7507                                 WriteCvtI2SD, GR64,
7508                                 v2f64x_info, i64mem, loadi64, "cvtsi2sd", "q">,
7509                                 XD, VEX_W, EVEX_CD8<64, CD8VT1>;
7510
7511def : InstAlias<"vcvtsi2ss\t{$src, $src1, $dst|$dst, $src1, $src}",
7512              (VCVTSI2SSZrm_Int VR128X:$dst, VR128X:$src1, i32mem:$src), 0, "att">;
7513def : InstAlias<"vcvtsi2sd\t{$src, $src1, $dst|$dst, $src1, $src}",
7514              (VCVTSI2SDZrm_Int VR128X:$dst, VR128X:$src1, i32mem:$src), 0, "att">;
7515
7516def : Pat<(f32 (any_sint_to_fp (loadi32 addr:$src))),
7517          (VCVTSI2SSZrm (f32 (IMPLICIT_DEF)), addr:$src)>;
7518def : Pat<(f32 (any_sint_to_fp (loadi64 addr:$src))),
7519          (VCVTSI642SSZrm (f32 (IMPLICIT_DEF)), addr:$src)>;
7520def : Pat<(f64 (any_sint_to_fp (loadi32 addr:$src))),
7521          (VCVTSI2SDZrm (f64 (IMPLICIT_DEF)), addr:$src)>;
7522def : Pat<(f64 (any_sint_to_fp (loadi64 addr:$src))),
7523          (VCVTSI642SDZrm (f64 (IMPLICIT_DEF)), addr:$src)>;
7524
7525def : Pat<(f32 (any_sint_to_fp GR32:$src)),
7526          (VCVTSI2SSZrr (f32 (IMPLICIT_DEF)), GR32:$src)>;
7527def : Pat<(f32 (any_sint_to_fp GR64:$src)),
7528          (VCVTSI642SSZrr (f32 (IMPLICIT_DEF)), GR64:$src)>;
7529def : Pat<(f64 (any_sint_to_fp GR32:$src)),
7530          (VCVTSI2SDZrr (f64 (IMPLICIT_DEF)), GR32:$src)>;
7531def : Pat<(f64 (any_sint_to_fp GR64:$src)),
7532          (VCVTSI642SDZrr (f64 (IMPLICIT_DEF)), GR64:$src)>;
7533
7534defm VCVTUSI2SSZ   : avx512_vcvtsi_common<0x7B, X86UintToFp, X86UintToFpRnd,
7535                                  WriteCvtI2SS, GR32,
7536                                  v4f32x_info, i32mem, loadi32,
7537                                  "cvtusi2ss", "l">, XS, EVEX_CD8<32, CD8VT1>;
7538defm VCVTUSI642SSZ : avx512_vcvtsi_common<0x7B, X86UintToFp, X86UintToFpRnd,
7539                                  WriteCvtI2SS, GR64,
7540                                  v4f32x_info, i64mem, loadi64, "cvtusi2ss", "q">,
7541                                  XS, VEX_W, EVEX_CD8<64, CD8VT1>;
7542defm VCVTUSI2SDZ   : avx512_vcvtsi<0x7B, null_frag, WriteCvtI2SD, GR32, v2f64x_info,
7543                                  i32mem, loadi32, "cvtusi2sd", "l", [], 0>,
7544                                  XD, VEX_LIG, EVEX_CD8<32, CD8VT1>;
7545defm VCVTUSI642SDZ : avx512_vcvtsi_common<0x7B, X86UintToFp, X86UintToFpRnd,
7546                                  WriteCvtI2SD, GR64,
7547                                  v2f64x_info, i64mem, loadi64, "cvtusi2sd", "q">,
7548                                  XD, VEX_W, EVEX_CD8<64, CD8VT1>;
7549
7550def : InstAlias<"vcvtusi2ss\t{$src, $src1, $dst|$dst, $src1, $src}",
7551              (VCVTUSI2SSZrm_Int VR128X:$dst, VR128X:$src1, i32mem:$src), 0, "att">;
7552def : InstAlias<"vcvtusi2sd\t{$src, $src1, $dst|$dst, $src1, $src}",
7553              (VCVTUSI2SDZrm_Int VR128X:$dst, VR128X:$src1, i32mem:$src), 0, "att">;
7554
7555def : Pat<(f32 (any_uint_to_fp (loadi32 addr:$src))),
7556          (VCVTUSI2SSZrm (f32 (IMPLICIT_DEF)), addr:$src)>;
7557def : Pat<(f32 (any_uint_to_fp (loadi64 addr:$src))),
7558          (VCVTUSI642SSZrm (f32 (IMPLICIT_DEF)), addr:$src)>;
7559def : Pat<(f64 (any_uint_to_fp (loadi32 addr:$src))),
7560          (VCVTUSI2SDZrm (f64 (IMPLICIT_DEF)), addr:$src)>;
7561def : Pat<(f64 (any_uint_to_fp (loadi64 addr:$src))),
7562          (VCVTUSI642SDZrm (f64 (IMPLICIT_DEF)), addr:$src)>;
7563
7564def : Pat<(f32 (any_uint_to_fp GR32:$src)),
7565          (VCVTUSI2SSZrr (f32 (IMPLICIT_DEF)), GR32:$src)>;
7566def : Pat<(f32 (any_uint_to_fp GR64:$src)),
7567          (VCVTUSI642SSZrr (f32 (IMPLICIT_DEF)), GR64:$src)>;
7568def : Pat<(f64 (any_uint_to_fp GR32:$src)),
7569          (VCVTUSI2SDZrr (f64 (IMPLICIT_DEF)), GR32:$src)>;
7570def : Pat<(f64 (any_uint_to_fp GR64:$src)),
7571          (VCVTUSI642SDZrr (f64 (IMPLICIT_DEF)), GR64:$src)>;
7572}
7573
7574//===----------------------------------------------------------------------===//
7575// AVX-512  Scalar convert from float/double to integer
7576//===----------------------------------------------------------------------===//
7577
7578multiclass avx512_cvt_s_int_round<bits<8> opc, X86VectorVTInfo SrcVT,
7579                                  X86VectorVTInfo DstVT, SDNode OpNode,
7580                                  SDNode OpNodeRnd,
7581                                  X86FoldableSchedWrite sched, string asm,
7582                                  string aliasStr, Predicate prd = HasAVX512> {
7583  let Predicates = [prd], ExeDomain = SrcVT.ExeDomain in {
7584    def rr_Int : SI<opc, MRMSrcReg, (outs DstVT.RC:$dst), (ins SrcVT.RC:$src),
7585                !strconcat(asm,"\t{$src, $dst|$dst, $src}"),
7586                [(set DstVT.RC:$dst, (OpNode (SrcVT.VT SrcVT.RC:$src)))]>,
7587                EVEX, VEX_LIG, Sched<[sched]>, SIMD_EXC;
7588    let Uses = [MXCSR] in
7589    def rrb_Int : SI<opc, MRMSrcReg, (outs DstVT.RC:$dst), (ins SrcVT.RC:$src, AVX512RC:$rc),
7590                 !strconcat(asm,"\t{$rc, $src, $dst|$dst, $src, $rc}"),
7591                 [(set DstVT.RC:$dst, (OpNodeRnd (SrcVT.VT SrcVT.RC:$src),(i32 timm:$rc)))]>,
7592                 EVEX, VEX_LIG, EVEX_B, EVEX_RC,
7593                 Sched<[sched]>;
7594    def rm_Int : SI<opc, MRMSrcMem, (outs DstVT.RC:$dst), (ins SrcVT.IntScalarMemOp:$src),
7595                !strconcat(asm,"\t{$src, $dst|$dst, $src}"),
7596                [(set DstVT.RC:$dst, (OpNode
7597                      (SrcVT.ScalarIntMemFrags addr:$src)))]>,
7598                EVEX, VEX_LIG, Sched<[sched.Folded, sched.ReadAfterFold]>, SIMD_EXC;
7599  } // Predicates = [prd]
7600
7601  def : InstAlias<"v" # asm # aliasStr # "\t{$src, $dst|$dst, $src}",
7602          (!cast<Instruction>(NAME # "rr_Int") DstVT.RC:$dst, SrcVT.RC:$src), 0, "att">;
7603  def : InstAlias<"v" # asm # aliasStr # "\t{$rc, $src, $dst|$dst, $src, $rc}",
7604          (!cast<Instruction>(NAME # "rrb_Int") DstVT.RC:$dst, SrcVT.RC:$src, AVX512RC:$rc), 0, "att">;
7605  def : InstAlias<"v" # asm # aliasStr # "\t{$src, $dst|$dst, $src}",
7606          (!cast<Instruction>(NAME # "rm_Int") DstVT.RC:$dst,
7607                                          SrcVT.IntScalarMemOp:$src), 0, "att">;
7608}
7609
7610// Convert float/double to signed/unsigned int 32/64
7611defm VCVTSS2SIZ: avx512_cvt_s_int_round<0x2D, f32x_info, i32x_info,X86cvts2si,
7612                                   X86cvts2siRnd, WriteCvtSS2I, "cvtss2si", "{l}">,
7613                                   XS, EVEX_CD8<32, CD8VT1>;
7614defm VCVTSS2SI64Z: avx512_cvt_s_int_round<0x2D, f32x_info, i64x_info, X86cvts2si,
7615                                   X86cvts2siRnd, WriteCvtSS2I, "cvtss2si", "{q}">,
7616                                   XS, VEX_W, EVEX_CD8<32, CD8VT1>;
7617defm VCVTSS2USIZ: avx512_cvt_s_int_round<0x79, f32x_info, i32x_info, X86cvts2usi,
7618                                   X86cvts2usiRnd, WriteCvtSS2I, "cvtss2usi", "{l}">,
7619                                   XS, EVEX_CD8<32, CD8VT1>;
7620defm VCVTSS2USI64Z: avx512_cvt_s_int_round<0x79, f32x_info, i64x_info, X86cvts2usi,
7621                                   X86cvts2usiRnd, WriteCvtSS2I, "cvtss2usi", "{q}">,
7622                                   XS, VEX_W, EVEX_CD8<32, CD8VT1>;
7623defm VCVTSD2SIZ: avx512_cvt_s_int_round<0x2D, f64x_info, i32x_info, X86cvts2si,
7624                                   X86cvts2siRnd, WriteCvtSD2I, "cvtsd2si", "{l}">,
7625                                   XD, EVEX_CD8<64, CD8VT1>;
7626defm VCVTSD2SI64Z: avx512_cvt_s_int_round<0x2D, f64x_info, i64x_info, X86cvts2si,
7627                                   X86cvts2siRnd, WriteCvtSD2I, "cvtsd2si", "{q}">,
7628                                   XD, VEX_W, EVEX_CD8<64, CD8VT1>;
7629defm VCVTSD2USIZ:   avx512_cvt_s_int_round<0x79, f64x_info, i32x_info, X86cvts2usi,
7630                                   X86cvts2usiRnd, WriteCvtSD2I, "cvtsd2usi", "{l}">,
7631                                   XD, EVEX_CD8<64, CD8VT1>;
7632defm VCVTSD2USI64Z: avx512_cvt_s_int_round<0x79, f64x_info, i64x_info, X86cvts2usi,
7633                                   X86cvts2usiRnd, WriteCvtSD2I, "cvtsd2usi", "{q}">,
7634                                   XD, VEX_W, EVEX_CD8<64, CD8VT1>;
7635
7636multiclass avx512_cvt_s<bits<8> opc, string asm, X86VectorVTInfo SrcVT,
7637                        X86VectorVTInfo DstVT, SDNode OpNode,
7638                        X86FoldableSchedWrite sched> {
7639  let Predicates = [HasAVX512], ExeDomain = SrcVT.ExeDomain in {
7640    let isCodeGenOnly = 1 in {
7641    def rr : AVX512<opc, MRMSrcReg, (outs DstVT.RC:$dst), (ins SrcVT.FRC:$src),
7642                !strconcat(asm,"\t{$src, $dst|$dst, $src}"),
7643                [(set DstVT.RC:$dst, (OpNode SrcVT.FRC:$src))]>,
7644                EVEX, VEX_LIG, Sched<[sched]>, SIMD_EXC;
7645    def rm : AVX512<opc, MRMSrcMem, (outs DstVT.RC:$dst), (ins SrcVT.ScalarMemOp:$src),
7646                !strconcat(asm,"\t{$src, $dst|$dst, $src}"),
7647                [(set DstVT.RC:$dst, (OpNode (SrcVT.ScalarLdFrag addr:$src)))]>,
7648                EVEX, VEX_LIG, Sched<[sched.Folded, sched.ReadAfterFold]>, SIMD_EXC;
7649    }
7650  } // Predicates = [HasAVX512]
7651}
7652
7653defm VCVTSS2SIZ: avx512_cvt_s<0x2D, "vcvtss2si", f32x_info, i32x_info,
7654                       lrint, WriteCvtSS2I>, XS, EVEX_CD8<32, CD8VT1>;
7655defm VCVTSS2SI64Z: avx512_cvt_s<0x2D, "vcvtss2si", f32x_info, i64x_info,
7656                       llrint, WriteCvtSS2I>, VEX_W, XS, EVEX_CD8<32, CD8VT1>;
7657defm VCVTSD2SIZ: avx512_cvt_s<0x2D, "vcvtsd2si", f64x_info, i32x_info,
7658                       lrint, WriteCvtSD2I>, XD, EVEX_CD8<64, CD8VT1>;
7659defm VCVTSD2SI64Z: avx512_cvt_s<0x2D, "vcvtsd2si", f64x_info, i64x_info,
7660                       llrint, WriteCvtSD2I>, VEX_W, XD, EVEX_CD8<64, CD8VT1>;
7661
7662let Predicates = [HasAVX512] in {
7663  def : Pat<(i64 (lrint FR32:$src)), (VCVTSS2SI64Zrr FR32:$src)>;
7664  def : Pat<(i64 (lrint (loadf32 addr:$src))), (VCVTSS2SI64Zrm addr:$src)>;
7665
7666  def : Pat<(i64 (lrint FR64:$src)), (VCVTSD2SI64Zrr FR64:$src)>;
7667  def : Pat<(i64 (lrint (loadf64 addr:$src))), (VCVTSD2SI64Zrm addr:$src)>;
7668}
7669
7670// Patterns used for matching vcvtsi2s{s,d} intrinsic sequences from clang
7671// which produce unnecessary vmovs{s,d} instructions
7672let Predicates = [HasAVX512] in {
7673def : Pat<(v4f32 (X86Movss
7674                   (v4f32 VR128X:$dst),
7675                   (v4f32 (scalar_to_vector (f32 (any_sint_to_fp GR64:$src)))))),
7676          (VCVTSI642SSZrr_Int VR128X:$dst, GR64:$src)>;
7677
7678def : Pat<(v4f32 (X86Movss
7679                   (v4f32 VR128X:$dst),
7680                   (v4f32 (scalar_to_vector (f32 (any_sint_to_fp (loadi64 addr:$src))))))),
7681          (VCVTSI642SSZrm_Int VR128X:$dst, addr:$src)>;
7682
7683def : Pat<(v4f32 (X86Movss
7684                   (v4f32 VR128X:$dst),
7685                   (v4f32 (scalar_to_vector (f32 (any_sint_to_fp GR32:$src)))))),
7686          (VCVTSI2SSZrr_Int VR128X:$dst, GR32:$src)>;
7687
7688def : Pat<(v4f32 (X86Movss
7689                   (v4f32 VR128X:$dst),
7690                   (v4f32 (scalar_to_vector (f32 (any_sint_to_fp (loadi32 addr:$src))))))),
7691          (VCVTSI2SSZrm_Int VR128X:$dst, addr:$src)>;
7692
7693def : Pat<(v2f64 (X86Movsd
7694                   (v2f64 VR128X:$dst),
7695                   (v2f64 (scalar_to_vector (f64 (any_sint_to_fp GR64:$src)))))),
7696          (VCVTSI642SDZrr_Int VR128X:$dst, GR64:$src)>;
7697
7698def : Pat<(v2f64 (X86Movsd
7699                   (v2f64 VR128X:$dst),
7700                   (v2f64 (scalar_to_vector (f64 (any_sint_to_fp (loadi64 addr:$src))))))),
7701          (VCVTSI642SDZrm_Int VR128X:$dst, addr:$src)>;
7702
7703def : Pat<(v2f64 (X86Movsd
7704                   (v2f64 VR128X:$dst),
7705                   (v2f64 (scalar_to_vector (f64 (any_sint_to_fp GR32:$src)))))),
7706          (VCVTSI2SDZrr_Int VR128X:$dst, GR32:$src)>;
7707
7708def : Pat<(v2f64 (X86Movsd
7709                   (v2f64 VR128X:$dst),
7710                   (v2f64 (scalar_to_vector (f64 (any_sint_to_fp (loadi32 addr:$src))))))),
7711          (VCVTSI2SDZrm_Int VR128X:$dst, addr:$src)>;
7712
7713def : Pat<(v4f32 (X86Movss
7714                   (v4f32 VR128X:$dst),
7715                   (v4f32 (scalar_to_vector (f32 (any_uint_to_fp GR64:$src)))))),
7716          (VCVTUSI642SSZrr_Int VR128X:$dst, GR64:$src)>;
7717
7718def : Pat<(v4f32 (X86Movss
7719                   (v4f32 VR128X:$dst),
7720                   (v4f32 (scalar_to_vector (f32 (any_uint_to_fp (loadi64 addr:$src))))))),
7721          (VCVTUSI642SSZrm_Int VR128X:$dst, addr:$src)>;
7722
7723def : Pat<(v4f32 (X86Movss
7724                   (v4f32 VR128X:$dst),
7725                   (v4f32 (scalar_to_vector (f32 (any_uint_to_fp GR32:$src)))))),
7726          (VCVTUSI2SSZrr_Int VR128X:$dst, GR32:$src)>;
7727
7728def : Pat<(v4f32 (X86Movss
7729                   (v4f32 VR128X:$dst),
7730                   (v4f32 (scalar_to_vector (f32 (any_uint_to_fp (loadi32 addr:$src))))))),
7731          (VCVTUSI2SSZrm_Int VR128X:$dst, addr:$src)>;
7732
7733def : Pat<(v2f64 (X86Movsd
7734                   (v2f64 VR128X:$dst),
7735                   (v2f64 (scalar_to_vector (f64 (any_uint_to_fp GR64:$src)))))),
7736          (VCVTUSI642SDZrr_Int VR128X:$dst, GR64:$src)>;
7737
7738def : Pat<(v2f64 (X86Movsd
7739                   (v2f64 VR128X:$dst),
7740                   (v2f64 (scalar_to_vector (f64 (any_uint_to_fp (loadi64 addr:$src))))))),
7741          (VCVTUSI642SDZrm_Int VR128X:$dst, addr:$src)>;
7742
7743def : Pat<(v2f64 (X86Movsd
7744                   (v2f64 VR128X:$dst),
7745                   (v2f64 (scalar_to_vector (f64 (any_uint_to_fp GR32:$src)))))),
7746          (VCVTUSI2SDZrr_Int VR128X:$dst, GR32:$src)>;
7747
7748def : Pat<(v2f64 (X86Movsd
7749                   (v2f64 VR128X:$dst),
7750                   (v2f64 (scalar_to_vector (f64 (any_uint_to_fp (loadi32 addr:$src))))))),
7751          (VCVTUSI2SDZrm_Int VR128X:$dst, addr:$src)>;
7752} // Predicates = [HasAVX512]
7753
7754// Convert float/double to signed/unsigned int 32/64 with truncation
7755multiclass avx512_cvt_s_all<bits<8> opc, string asm, X86VectorVTInfo _SrcRC,
7756                            X86VectorVTInfo _DstRC, SDPatternOperator OpNode,
7757                            SDNode OpNodeInt, SDNode OpNodeSAE,
7758                            X86FoldableSchedWrite sched, string aliasStr,
7759                            Predicate prd = HasAVX512> {
7760let Predicates = [prd], ExeDomain = _SrcRC.ExeDomain in {
7761  let isCodeGenOnly = 1 in {
7762  def rr : AVX512<opc, MRMSrcReg, (outs _DstRC.RC:$dst), (ins _SrcRC.FRC:$src),
7763              !strconcat(asm,"\t{$src, $dst|$dst, $src}"),
7764              [(set _DstRC.RC:$dst, (OpNode _SrcRC.FRC:$src))]>,
7765              EVEX, VEX_LIG, Sched<[sched]>, SIMD_EXC;
7766  def rm : AVX512<opc, MRMSrcMem, (outs _DstRC.RC:$dst), (ins _SrcRC.ScalarMemOp:$src),
7767              !strconcat(asm,"\t{$src, $dst|$dst, $src}"),
7768              [(set _DstRC.RC:$dst, (OpNode (_SrcRC.ScalarLdFrag addr:$src)))]>,
7769              EVEX, VEX_LIG, Sched<[sched.Folded, sched.ReadAfterFold]>, SIMD_EXC;
7770  }
7771
7772  def rr_Int : AVX512<opc, MRMSrcReg, (outs _DstRC.RC:$dst), (ins _SrcRC.RC:$src),
7773            !strconcat(asm,"\t{$src, $dst|$dst, $src}"),
7774           [(set _DstRC.RC:$dst, (OpNodeInt (_SrcRC.VT _SrcRC.RC:$src)))]>,
7775           EVEX, VEX_LIG, Sched<[sched]>, SIMD_EXC;
7776  let Uses = [MXCSR] in
7777  def rrb_Int : AVX512<opc, MRMSrcReg, (outs _DstRC.RC:$dst), (ins _SrcRC.RC:$src),
7778            !strconcat(asm,"\t{{sae}, $src, $dst|$dst, $src, {sae}}"),
7779            [(set _DstRC.RC:$dst, (OpNodeSAE (_SrcRC.VT _SrcRC.RC:$src)))]>,
7780                                  EVEX, VEX_LIG, EVEX_B, Sched<[sched]>;
7781  def rm_Int : AVX512<opc, MRMSrcMem, (outs _DstRC.RC:$dst),
7782              (ins _SrcRC.IntScalarMemOp:$src),
7783              !strconcat(asm,"\t{$src, $dst|$dst, $src}"),
7784              [(set _DstRC.RC:$dst,
7785                (OpNodeInt (_SrcRC.ScalarIntMemFrags addr:$src)))]>,
7786              EVEX, VEX_LIG, Sched<[sched.Folded, sched.ReadAfterFold]>, SIMD_EXC;
7787} // Predicates = [prd]
7788
7789  def : InstAlias<asm # aliasStr # "\t{$src, $dst|$dst, $src}",
7790          (!cast<Instruction>(NAME # "rr_Int") _DstRC.RC:$dst, _SrcRC.RC:$src), 0, "att">;
7791  def : InstAlias<asm # aliasStr # "\t{{sae}, $src, $dst|$dst, $src, {sae}}",
7792          (!cast<Instruction>(NAME # "rrb_Int") _DstRC.RC:$dst, _SrcRC.RC:$src), 0, "att">;
7793  def : InstAlias<asm # aliasStr # "\t{$src, $dst|$dst, $src}",
7794          (!cast<Instruction>(NAME # "rm_Int") _DstRC.RC:$dst,
7795                                          _SrcRC.IntScalarMemOp:$src), 0, "att">;
7796}
7797
7798defm VCVTTSS2SIZ: avx512_cvt_s_all<0x2C, "vcvttss2si", f32x_info, i32x_info,
7799                        any_fp_to_sint, X86cvtts2Int, X86cvtts2IntSAE, WriteCvtSS2I,
7800                        "{l}">, XS, EVEX_CD8<32, CD8VT1>;
7801defm VCVTTSS2SI64Z: avx512_cvt_s_all<0x2C, "vcvttss2si", f32x_info, i64x_info,
7802                        any_fp_to_sint, X86cvtts2Int, X86cvtts2IntSAE, WriteCvtSS2I,
7803                        "{q}">, VEX_W, XS, EVEX_CD8<32, CD8VT1>;
7804defm VCVTTSD2SIZ: avx512_cvt_s_all<0x2C, "vcvttsd2si", f64x_info, i32x_info,
7805                        any_fp_to_sint, X86cvtts2Int, X86cvtts2IntSAE, WriteCvtSD2I,
7806                        "{l}">, XD, EVEX_CD8<64, CD8VT1>;
7807defm VCVTTSD2SI64Z: avx512_cvt_s_all<0x2C, "vcvttsd2si", f64x_info, i64x_info,
7808                        any_fp_to_sint, X86cvtts2Int, X86cvtts2IntSAE, WriteCvtSD2I,
7809                        "{q}">, VEX_W, XD, EVEX_CD8<64, CD8VT1>;
7810
7811defm VCVTTSS2USIZ: avx512_cvt_s_all<0x78, "vcvttss2usi", f32x_info, i32x_info,
7812                        any_fp_to_uint, X86cvtts2UInt, X86cvtts2UIntSAE, WriteCvtSS2I,
7813                        "{l}">, XS, EVEX_CD8<32, CD8VT1>;
7814defm VCVTTSS2USI64Z: avx512_cvt_s_all<0x78, "vcvttss2usi", f32x_info, i64x_info,
7815                        any_fp_to_uint, X86cvtts2UInt, X86cvtts2UIntSAE, WriteCvtSS2I,
7816                        "{q}">, XS,VEX_W, EVEX_CD8<32, CD8VT1>;
7817defm VCVTTSD2USIZ: avx512_cvt_s_all<0x78, "vcvttsd2usi", f64x_info, i32x_info,
7818                        any_fp_to_uint, X86cvtts2UInt, X86cvtts2UIntSAE, WriteCvtSD2I,
7819                        "{l}">, XD, EVEX_CD8<64, CD8VT1>;
7820defm VCVTTSD2USI64Z: avx512_cvt_s_all<0x78, "vcvttsd2usi", f64x_info, i64x_info,
7821                        any_fp_to_uint, X86cvtts2UInt, X86cvtts2UIntSAE, WriteCvtSD2I,
7822                        "{q}">, XD, VEX_W, EVEX_CD8<64, CD8VT1>;
7823
7824//===----------------------------------------------------------------------===//
7825// AVX-512  Convert form float to double and back
7826//===----------------------------------------------------------------------===//
7827
7828let Uses = [MXCSR], mayRaiseFPException = 1 in
7829multiclass avx512_cvt_fp_scalar<bits<8> opc, string OpcodeStr, X86VectorVTInfo _,
7830                                X86VectorVTInfo _Src, SDNode OpNode,
7831                                X86FoldableSchedWrite sched> {
7832  defm rr_Int : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
7833                         (ins _.RC:$src1, _Src.RC:$src2), OpcodeStr,
7834                         "$src2, $src1", "$src1, $src2",
7835                         (_.VT (OpNode (_.VT _.RC:$src1),
7836                                       (_Src.VT _Src.RC:$src2)))>,
7837                         EVEX_4V, VEX_LIG, Sched<[sched]>;
7838  defm rm_Int : AVX512_maskable_scalar<opc, MRMSrcMem, _, (outs _.RC:$dst),
7839                         (ins _.RC:$src1, _Src.IntScalarMemOp:$src2), OpcodeStr,
7840                         "$src2, $src1", "$src1, $src2",
7841                         (_.VT (OpNode (_.VT _.RC:$src1),
7842                                  (_Src.ScalarIntMemFrags addr:$src2)))>,
7843                         EVEX_4V, VEX_LIG,
7844                         Sched<[sched.Folded, sched.ReadAfterFold]>;
7845
7846  let isCodeGenOnly = 1, hasSideEffects = 0 in {
7847    def rr : I<opc, MRMSrcReg, (outs _.FRC:$dst),
7848               (ins _.FRC:$src1, _Src.FRC:$src2),
7849               OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}", []>,
7850               EVEX_4V, VEX_LIG, Sched<[sched]>;
7851    let mayLoad = 1 in
7852    def rm : I<opc, MRMSrcMem, (outs _.FRC:$dst),
7853               (ins _.FRC:$src1, _Src.ScalarMemOp:$src2),
7854               OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}", []>,
7855               EVEX_4V, VEX_LIG, Sched<[sched.Folded, sched.ReadAfterFold]>;
7856  }
7857}
7858
7859// Scalar Conversion with SAE - suppress all exceptions
7860multiclass avx512_cvt_fp_sae_scalar<bits<8> opc, string OpcodeStr, X86VectorVTInfo _,
7861                                    X86VectorVTInfo _Src, SDNode OpNodeSAE,
7862                                    X86FoldableSchedWrite sched> {
7863  let Uses = [MXCSR] in
7864  defm rrb_Int : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
7865                        (ins _.RC:$src1, _Src.RC:$src2), OpcodeStr,
7866                        "{sae}, $src2, $src1", "$src1, $src2, {sae}",
7867                        (_.VT (OpNodeSAE (_.VT _.RC:$src1),
7868                                         (_Src.VT _Src.RC:$src2)))>,
7869                        EVEX_4V, VEX_LIG, EVEX_B, Sched<[sched]>;
7870}
7871
7872// Scalar Conversion with rounding control (RC)
7873multiclass avx512_cvt_fp_rc_scalar<bits<8> opc, string OpcodeStr, X86VectorVTInfo _,
7874                                   X86VectorVTInfo _Src, SDNode OpNodeRnd,
7875                                   X86FoldableSchedWrite sched> {
7876  let Uses = [MXCSR] in
7877  defm rrb_Int : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
7878                        (ins _.RC:$src1, _Src.RC:$src2, AVX512RC:$rc), OpcodeStr,
7879                        "$rc, $src2, $src1", "$src1, $src2, $rc",
7880                        (_.VT (OpNodeRnd (_.VT _.RC:$src1),
7881                                         (_Src.VT _Src.RC:$src2), (i32 timm:$rc)))>,
7882                        EVEX_4V, VEX_LIG, Sched<[sched]>,
7883                        EVEX_B, EVEX_RC;
7884}
7885multiclass avx512_cvt_fp_scalar_trunc<bits<8> opc, string OpcodeStr,
7886                                      SDNode OpNode, SDNode OpNodeRnd,
7887                                      X86FoldableSchedWrite sched,
7888                                      X86VectorVTInfo _src, X86VectorVTInfo _dst,
7889                                      Predicate prd = HasAVX512> {
7890  let Predicates = [prd], ExeDomain = SSEPackedSingle in {
7891    defm Z : avx512_cvt_fp_scalar<opc, OpcodeStr, _dst, _src, OpNode, sched>,
7892             avx512_cvt_fp_rc_scalar<opc, OpcodeStr, _dst, _src,
7893                               OpNodeRnd, sched>, EVEX_CD8<_src.EltSize, CD8VT1>;
7894  }
7895}
7896
7897multiclass avx512_cvt_fp_scalar_extend<bits<8> opc, string OpcodeStr,
7898                                       SDNode OpNode, SDNode OpNodeSAE,
7899                                       X86FoldableSchedWrite sched,
7900                                       X86VectorVTInfo _src, X86VectorVTInfo _dst,
7901                                       Predicate prd = HasAVX512> {
7902  let Predicates = [prd], ExeDomain = SSEPackedSingle in {
7903    defm Z : avx512_cvt_fp_scalar<opc, OpcodeStr, _dst, _src, OpNode, sched>,
7904             avx512_cvt_fp_sae_scalar<opc, OpcodeStr, _dst, _src, OpNodeSAE, sched>,
7905             EVEX_CD8<_src.EltSize, CD8VT1>;
7906  }
7907}
7908defm VCVTSD2SS : avx512_cvt_fp_scalar_trunc<0x5A, "vcvtsd2ss", X86frounds,
7909                                         X86froundsRnd, WriteCvtSD2SS, f64x_info,
7910                                         f32x_info>, XD, VEX_W;
7911defm VCVTSS2SD : avx512_cvt_fp_scalar_extend<0x5A, "vcvtss2sd", X86fpexts,
7912                                          X86fpextsSAE, WriteCvtSS2SD, f32x_info,
7913                                          f64x_info>, XS;
7914defm VCVTSD2SH : avx512_cvt_fp_scalar_trunc<0x5A, "vcvtsd2sh", X86frounds,
7915                                          X86froundsRnd, WriteCvtSD2SS, f64x_info,
7916                                          f16x_info, HasFP16>, T_MAP5XD, VEX_W;
7917defm VCVTSH2SD : avx512_cvt_fp_scalar_extend<0x5A, "vcvtsh2sd", X86fpexts,
7918                                          X86fpextsSAE, WriteCvtSS2SD, f16x_info,
7919                                          f64x_info, HasFP16>, T_MAP5XS;
7920defm VCVTSS2SH : avx512_cvt_fp_scalar_trunc<0x1D, "vcvtss2sh", X86frounds,
7921                                          X86froundsRnd, WriteCvtSD2SS, f32x_info,
7922                                          f16x_info, HasFP16>, T_MAP5PS;
7923defm VCVTSH2SS : avx512_cvt_fp_scalar_extend<0x13, "vcvtsh2ss", X86fpexts,
7924                                          X86fpextsSAE, WriteCvtSS2SD, f16x_info,
7925                                          f32x_info, HasFP16>, T_MAP6PS;
7926
7927def : Pat<(f64 (any_fpextend FR32X:$src)),
7928          (VCVTSS2SDZrr (f64 (IMPLICIT_DEF)), FR32X:$src)>,
7929          Requires<[HasAVX512]>;
7930def : Pat<(f64 (any_fpextend (loadf32 addr:$src))),
7931          (VCVTSS2SDZrm (f64 (IMPLICIT_DEF)), addr:$src)>,
7932          Requires<[HasAVX512, OptForSize]>;
7933
7934def : Pat<(f32 (any_fpround FR64X:$src)),
7935          (VCVTSD2SSZrr (f32 (IMPLICIT_DEF)), FR64X:$src)>,
7936           Requires<[HasAVX512]>;
7937
7938def : Pat<(f32 (any_fpextend FR16X:$src)),
7939          (VCVTSH2SSZrr (f32 (IMPLICIT_DEF)), FR16X:$src)>,
7940          Requires<[HasFP16]>;
7941def : Pat<(f32 (any_fpextend (loadf16 addr:$src))),
7942          (VCVTSH2SSZrm (f32 (IMPLICIT_DEF)), addr:$src)>,
7943          Requires<[HasFP16, OptForSize]>;
7944
7945def : Pat<(f64 (any_fpextend FR16X:$src)),
7946          (VCVTSH2SDZrr (f64 (IMPLICIT_DEF)), FR16X:$src)>,
7947          Requires<[HasFP16]>;
7948def : Pat<(f64 (any_fpextend (loadf16 addr:$src))),
7949          (VCVTSH2SDZrm (f64 (IMPLICIT_DEF)), addr:$src)>,
7950          Requires<[HasFP16, OptForSize]>;
7951
7952def : Pat<(f16 (any_fpround FR32X:$src)),
7953          (VCVTSS2SHZrr (f16 (IMPLICIT_DEF)), FR32X:$src)>,
7954           Requires<[HasFP16]>;
7955def : Pat<(f16 (any_fpround FR64X:$src)),
7956          (VCVTSD2SHZrr (f16 (IMPLICIT_DEF)), FR64X:$src)>,
7957           Requires<[HasFP16]>;
7958
7959def : Pat<(v4f32 (X86Movss
7960                   (v4f32 VR128X:$dst),
7961                   (v4f32 (scalar_to_vector
7962                     (f32 (any_fpround (f64 (extractelt VR128X:$src, (iPTR 0))))))))),
7963          (VCVTSD2SSZrr_Int VR128X:$dst, VR128X:$src)>,
7964          Requires<[HasAVX512]>;
7965
7966def : Pat<(v2f64 (X86Movsd
7967                   (v2f64 VR128X:$dst),
7968                   (v2f64 (scalar_to_vector
7969                     (f64 (any_fpextend (f32 (extractelt VR128X:$src, (iPTR 0))))))))),
7970          (VCVTSS2SDZrr_Int VR128X:$dst, VR128X:$src)>,
7971          Requires<[HasAVX512]>;
7972
7973//===----------------------------------------------------------------------===//
7974// AVX-512  Vector convert from signed/unsigned integer to float/double
7975//          and from float/double to signed/unsigned integer
7976//===----------------------------------------------------------------------===//
7977
7978multiclass avx512_vcvt_fp<bits<8> opc, string OpcodeStr, X86VectorVTInfo _,
7979                          X86VectorVTInfo _Src, SDPatternOperator OpNode, SDPatternOperator MaskOpNode,
7980                          X86FoldableSchedWrite sched,
7981                          string Broadcast = _.BroadcastStr,
7982                          string Alias = "", X86MemOperand MemOp = _Src.MemOp,
7983                          RegisterClass MaskRC = _.KRCWM,
7984                          dag LdDAG = (_.VT (OpNode (_Src.VT (_Src.LdFrag addr:$src)))),
7985                          dag MaskLdDAG = (_.VT (MaskOpNode (_Src.VT (_Src.LdFrag addr:$src))))> {
7986let Uses = [MXCSR], mayRaiseFPException = 1 in {
7987  defm rr : AVX512_maskable_cvt<opc, MRMSrcReg, _, (outs _.RC:$dst),
7988                         (ins _Src.RC:$src),
7989                         (ins _.RC:$src0, MaskRC:$mask, _Src.RC:$src),
7990                         (ins MaskRC:$mask, _Src.RC:$src),
7991                          OpcodeStr, "$src", "$src",
7992                         (_.VT (OpNode (_Src.VT _Src.RC:$src))),
7993                         (vselect_mask MaskRC:$mask,
7994                                       (_.VT (MaskOpNode (_Src.VT _Src.RC:$src))),
7995                                       _.RC:$src0),
7996                         (vselect_mask MaskRC:$mask,
7997                                       (_.VT (MaskOpNode (_Src.VT _Src.RC:$src))),
7998                                       _.ImmAllZerosV)>,
7999                         EVEX, Sched<[sched]>;
8000
8001  defm rm : AVX512_maskable_cvt<opc, MRMSrcMem, _, (outs _.RC:$dst),
8002                         (ins MemOp:$src),
8003                         (ins _.RC:$src0, MaskRC:$mask, MemOp:$src),
8004                         (ins MaskRC:$mask, MemOp:$src),
8005                         OpcodeStr#Alias, "$src", "$src",
8006                         LdDAG,
8007                         (vselect_mask MaskRC:$mask, MaskLdDAG, _.RC:$src0),
8008                         (vselect_mask MaskRC:$mask, MaskLdDAG, _.ImmAllZerosV)>,
8009                         EVEX, Sched<[sched.Folded]>;
8010
8011  defm rmb : AVX512_maskable_cvt<opc, MRMSrcMem, _, (outs _.RC:$dst),
8012                         (ins _Src.ScalarMemOp:$src),
8013                         (ins _.RC:$src0, MaskRC:$mask, _Src.ScalarMemOp:$src),
8014                         (ins MaskRC:$mask, _Src.ScalarMemOp:$src),
8015                         OpcodeStr,
8016                         "${src}"#Broadcast, "${src}"#Broadcast,
8017                         (_.VT (OpNode (_Src.VT
8018                                  (_Src.BroadcastLdFrag addr:$src))
8019                            )),
8020                         (vselect_mask MaskRC:$mask,
8021                                       (_.VT
8022                                        (MaskOpNode
8023                                         (_Src.VT
8024                                          (_Src.BroadcastLdFrag addr:$src)))),
8025                                       _.RC:$src0),
8026                         (vselect_mask MaskRC:$mask,
8027                                       (_.VT
8028                                        (MaskOpNode
8029                                         (_Src.VT
8030                                          (_Src.BroadcastLdFrag addr:$src)))),
8031                                       _.ImmAllZerosV)>,
8032                         EVEX, EVEX_B, Sched<[sched.Folded]>;
8033  }
8034}
8035// Conversion with SAE - suppress all exceptions
8036multiclass avx512_vcvt_fp_sae<bits<8> opc, string OpcodeStr, X86VectorVTInfo _,
8037                              X86VectorVTInfo _Src, SDNode OpNodeSAE,
8038                              X86FoldableSchedWrite sched> {
8039  let Uses = [MXCSR] in
8040  defm rrb : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
8041                        (ins _Src.RC:$src), OpcodeStr,
8042                        "{sae}, $src", "$src, {sae}",
8043                        (_.VT (OpNodeSAE (_Src.VT _Src.RC:$src)))>,
8044                        EVEX, EVEX_B, Sched<[sched]>;
8045}
8046
8047// Conversion with rounding control (RC)
8048multiclass avx512_vcvt_fp_rc<bits<8> opc, string OpcodeStr, X86VectorVTInfo _,
8049                         X86VectorVTInfo _Src, SDPatternOperator OpNodeRnd,
8050                         X86FoldableSchedWrite sched> {
8051  let Uses = [MXCSR] in
8052  defm rrb : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
8053                        (ins _Src.RC:$src, AVX512RC:$rc), OpcodeStr,
8054                        "$rc, $src", "$src, $rc",
8055                        (_.VT (OpNodeRnd (_Src.VT _Src.RC:$src), (i32 timm:$rc)))>,
8056                        EVEX, EVEX_B, EVEX_RC, Sched<[sched]>;
8057}
8058
8059// Similar to avx512_vcvt_fp, but uses an extload for the memory form.
8060multiclass avx512_vcvt_fpextend<bits<8> opc, string OpcodeStr, X86VectorVTInfo _,
8061                                X86VectorVTInfo _Src, SDPatternOperator OpNode,
8062                                SDNode MaskOpNode,
8063                                X86FoldableSchedWrite sched,
8064                                string Broadcast = _.BroadcastStr,
8065                                string Alias = "", X86MemOperand MemOp = _Src.MemOp,
8066                                RegisterClass MaskRC = _.KRCWM>
8067  : avx512_vcvt_fp<opc, OpcodeStr, _, _Src, OpNode, MaskOpNode, sched, Broadcast,
8068                   Alias, MemOp, MaskRC,
8069                   (_.VT (!cast<PatFrag>("extload"#_Src.VTName) addr:$src)),
8070                   (_.VT (!cast<PatFrag>("extload"#_Src.VTName) addr:$src))>;
8071
8072// Extend [Float to Double, Half to Float]
8073multiclass avx512_cvt_extend<bits<8> opc, string OpcodeStr,
8074                             AVX512VLVectorVTInfo _dst, AVX512VLVectorVTInfo _src,
8075                             X86SchedWriteWidths sched, Predicate prd = HasAVX512> {
8076  let Predicates = [prd] in {
8077    defm Z : avx512_vcvt_fpextend<opc, OpcodeStr,  _dst.info512, _src.info256,
8078                            any_fpextend, fpextend, sched.ZMM>,
8079             avx512_vcvt_fp_sae<opc, OpcodeStr, _dst.info512, _src.info256,
8080                                X86vfpextSAE, sched.ZMM>, EVEX_V512;
8081  }
8082  let Predicates = [prd, HasVLX] in {
8083    defm Z128 : avx512_vcvt_fpextend<opc, OpcodeStr, _dst.info128, _src.info128,
8084                               X86any_vfpext, X86vfpext, sched.XMM,
8085                               _dst.info128.BroadcastStr,
8086                               "", f64mem>, EVEX_V128;
8087    defm Z256 : avx512_vcvt_fpextend<opc, OpcodeStr, _dst.info256, _src.info128,
8088                               any_fpextend, fpextend, sched.YMM>, EVEX_V256;
8089  }
8090}
8091
8092// Truncate [Double to Float, Float to Half]
8093multiclass avx512_cvt_trunc<bits<8> opc, string OpcodeStr,
8094                            AVX512VLVectorVTInfo _dst, AVX512VLVectorVTInfo _src,
8095                            X86SchedWriteWidths sched, Predicate prd = HasAVX512,
8096                            PatFrag bcast128 = _src.info128.BroadcastLdFrag,
8097                            PatFrag loadVT128 = _src.info128.LdFrag,
8098                            RegisterClass maskRC128 = _src.info128.KRCWM> {
8099  let Predicates = [prd] in {
8100    defm Z : avx512_vcvt_fp<opc, OpcodeStr, _dst.info256, _src.info512,
8101                            X86any_vfpround, X86vfpround, sched.ZMM>,
8102             avx512_vcvt_fp_rc<opc, OpcodeStr, _dst.info256, _src.info512,
8103                               X86vfproundRnd, sched.ZMM>, EVEX_V512;
8104  }
8105  let Predicates = [prd, HasVLX] in {
8106    defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, _dst.info128, _src.info128,
8107                               null_frag, null_frag, sched.XMM,
8108                               _src.info128.BroadcastStr, "{x}",
8109                               f128mem, maskRC128>, EVEX_V128;
8110    defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, _dst.info128, _src.info256,
8111                               X86any_vfpround, X86vfpround,
8112                               sched.YMM, _src.info256.BroadcastStr, "{y}">, EVEX_V256;
8113
8114    // Special patterns to allow use of X86vmfpround for masking. Instruction
8115    // patterns have been disabled with null_frag.
8116    def : Pat<(_dst.info128.VT (X86any_vfpround (_src.info128.VT VR128X:$src))),
8117              (!cast<Instruction>(NAME # "Z128rr") VR128X:$src)>;
8118    def : Pat<(X86vmfpround (_src.info128.VT VR128X:$src), (_dst.info128.VT VR128X:$src0),
8119                            maskRC128:$mask),
8120              (!cast<Instruction>(NAME # "Z128rrk") VR128X:$src0, maskRC128:$mask, VR128X:$src)>;
8121    def : Pat<(X86vmfpround (_src.info128.VT VR128X:$src), _dst.info128.ImmAllZerosV,
8122                            maskRC128:$mask),
8123              (!cast<Instruction>(NAME # "Z128rrkz") maskRC128:$mask, VR128X:$src)>;
8124
8125    def : Pat<(_dst.info128.VT (X86any_vfpround (loadVT128 addr:$src))),
8126              (!cast<Instruction>(NAME # "Z128rm") addr:$src)>;
8127    def : Pat<(X86vmfpround (loadVT128 addr:$src), (_dst.info128.VT VR128X:$src0),
8128                            maskRC128:$mask),
8129              (!cast<Instruction>(NAME # "Z128rmk") VR128X:$src0, maskRC128:$mask, addr:$src)>;
8130    def : Pat<(X86vmfpround (loadVT128 addr:$src), _dst.info128.ImmAllZerosV,
8131                            maskRC128:$mask),
8132              (!cast<Instruction>(NAME # "Z128rmkz") maskRC128:$mask, addr:$src)>;
8133
8134    def : Pat<(_dst.info128.VT (X86any_vfpround (_src.info128.VT (bcast128 addr:$src)))),
8135              (!cast<Instruction>(NAME # "Z128rmb") addr:$src)>;
8136    def : Pat<(X86vmfpround (_src.info128.VT (bcast128 addr:$src)),
8137                            (_dst.info128.VT VR128X:$src0), maskRC128:$mask),
8138              (!cast<Instruction>(NAME # "Z128rmbk") VR128X:$src0, maskRC128:$mask, addr:$src)>;
8139    def : Pat<(X86vmfpround (_src.info128.VT (bcast128 addr:$src)),
8140                            _dst.info128.ImmAllZerosV, maskRC128:$mask),
8141              (!cast<Instruction>(NAME # "Z128rmbkz") maskRC128:$mask, addr:$src)>;
8142  }
8143
8144  def : InstAlias<OpcodeStr#"x\t{$src, $dst|$dst, $src}",
8145                  (!cast<Instruction>(NAME # "Z128rr") VR128X:$dst, VR128X:$src), 0, "att">;
8146  def : InstAlias<OpcodeStr#"x\t{$src, $dst {${mask}}|$dst {${mask}}, $src}",
8147                  (!cast<Instruction>(NAME # "Z128rrk") VR128X:$dst,
8148                  VK2WM:$mask, VR128X:$src), 0, "att">;
8149  def : InstAlias<OpcodeStr#"x\t{$src, $dst {${mask}} {z}|"
8150                  "$dst {${mask}} {z}, $src}",
8151                  (!cast<Instruction>(NAME # "Z128rrkz") VR128X:$dst,
8152                  VK2WM:$mask, VR128X:$src), 0, "att">;
8153  def : InstAlias<OpcodeStr#"x\t{${src}{1to2}, $dst|$dst, ${src}{1to2}}",
8154                  (!cast<Instruction>(NAME # "Z128rmb") VR128X:$dst, f64mem:$src), 0, "att">;
8155  def : InstAlias<OpcodeStr#"x\t{${src}{1to2}, $dst {${mask}}|"
8156                  "$dst {${mask}}, ${src}{1to2}}",
8157                  (!cast<Instruction>(NAME # "Z128rmbk") VR128X:$dst,
8158                  VK2WM:$mask, f64mem:$src), 0, "att">;
8159  def : InstAlias<OpcodeStr#"x\t{${src}{1to2}, $dst {${mask}} {z}|"
8160                  "$dst {${mask}} {z}, ${src}{1to2}}",
8161                  (!cast<Instruction>(NAME # "Z128rmbkz") VR128X:$dst,
8162                  VK2WM:$mask, f64mem:$src), 0, "att">;
8163
8164  def : InstAlias<OpcodeStr#"y\t{$src, $dst|$dst, $src}",
8165                  (!cast<Instruction>(NAME # "Z256rr") VR128X:$dst, VR256X:$src), 0, "att">;
8166  def : InstAlias<OpcodeStr#"y\t{$src, $dst {${mask}}|$dst {${mask}}, $src}",
8167                  (!cast<Instruction>(NAME # "Z256rrk") VR128X:$dst,
8168                  VK4WM:$mask, VR256X:$src), 0, "att">;
8169  def : InstAlias<OpcodeStr#"y\t{$src, $dst {${mask}} {z}|"
8170                  "$dst {${mask}} {z}, $src}",
8171                  (!cast<Instruction>(NAME # "Z256rrkz") VR128X:$dst,
8172                  VK4WM:$mask, VR256X:$src), 0, "att">;
8173  def : InstAlias<OpcodeStr#"y\t{${src}{1to4}, $dst|$dst, ${src}{1to4}}",
8174                  (!cast<Instruction>(NAME # "Z256rmb") VR128X:$dst, f64mem:$src), 0, "att">;
8175  def : InstAlias<OpcodeStr#"y\t{${src}{1to4}, $dst {${mask}}|"
8176                  "$dst {${mask}}, ${src}{1to4}}",
8177                  (!cast<Instruction>(NAME # "Z256rmbk") VR128X:$dst,
8178                  VK4WM:$mask, f64mem:$src), 0, "att">;
8179  def : InstAlias<OpcodeStr#"y\t{${src}{1to4}, $dst {${mask}} {z}|"
8180                  "$dst {${mask}} {z}, ${src}{1to4}}",
8181                  (!cast<Instruction>(NAME # "Z256rmbkz") VR128X:$dst,
8182                  VK4WM:$mask, f64mem:$src), 0, "att">;
8183}
8184
8185defm VCVTPD2PS : avx512_cvt_trunc<0x5A, "vcvtpd2ps",
8186                                  avx512vl_f32_info, avx512vl_f64_info, SchedWriteCvtPD2PS>,
8187                                  VEX_W, PD, EVEX_CD8<64, CD8VF>;
8188defm VCVTPS2PD : avx512_cvt_extend<0x5A, "vcvtps2pd",
8189                                   avx512vl_f64_info, avx512vl_f32_info, SchedWriteCvtPS2PD>,
8190                                   PS, EVEX_CD8<32, CD8VH>;
8191
8192// Extend Half to Double
8193multiclass avx512_cvtph2pd<bits<8> opc, string OpcodeStr,
8194                            X86SchedWriteWidths sched> {
8195  let Predicates = [HasFP16] in {
8196    defm Z : avx512_vcvt_fpextend<opc, OpcodeStr, v8f64_info, v8f16x_info,
8197                                  any_fpextend, fpextend, sched.ZMM>,
8198             avx512_vcvt_fp_sae<opc, OpcodeStr, v8f64_info, v8f16x_info,
8199                                X86vfpextSAE, sched.ZMM>, EVEX_V512;
8200    def : Pat<(v8f64 (extloadv8f16 addr:$src)),
8201                (!cast<Instruction>(NAME # "Zrm") addr:$src)>;
8202  }
8203  let Predicates = [HasFP16, HasVLX] in {
8204    defm Z128 : avx512_vcvt_fpextend<opc, OpcodeStr, v2f64x_info, v8f16x_info,
8205                                     X86any_vfpext, X86vfpext, sched.XMM, "{1to2}", "",
8206                                     f32mem>, EVEX_V128;
8207    defm Z256 : avx512_vcvt_fpextend<opc, OpcodeStr, v4f64x_info, v8f16x_info,
8208                                     X86any_vfpext, X86vfpext, sched.YMM, "{1to4}", "",
8209                                     f64mem>, EVEX_V256;
8210  }
8211}
8212
8213// Truncate Double to Half
8214multiclass avx512_cvtpd2ph<bits<8> opc, string OpcodeStr, X86SchedWriteWidths sched> {
8215  let Predicates = [HasFP16] in {
8216    defm Z : avx512_vcvt_fp<opc, OpcodeStr, v8f16x_info, v8f64_info,
8217                            X86any_vfpround, X86vfpround, sched.ZMM, "{1to8}", "{z}">,
8218             avx512_vcvt_fp_rc<opc, OpcodeStr, v8f16x_info, v8f64_info,
8219                               X86vfproundRnd, sched.ZMM>, EVEX_V512;
8220  }
8221  let Predicates = [HasFP16, HasVLX] in {
8222    defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v8f16x_info, v2f64x_info, null_frag,
8223                               null_frag, sched.XMM, "{1to2}", "{x}", f128mem,
8224                               VK2WM>, EVEX_V128;
8225    defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v8f16x_info, v4f64x_info, null_frag,
8226                               null_frag, sched.YMM, "{1to4}", "{y}", f256mem,
8227                               VK4WM>, EVEX_V256;
8228  }
8229  def : InstAlias<OpcodeStr#"x\t{$src, $dst|$dst, $src}",
8230                  (!cast<Instruction>(NAME # "Z128rr") VR128X:$dst,
8231                  VR128X:$src), 0, "att">;
8232  def : InstAlias<OpcodeStr#"x\t{$src, $dst {${mask}}|$dst {${mask}}, $src}",
8233                  (!cast<Instruction>(NAME # "Z128rrk") VR128X:$dst,
8234                  VK2WM:$mask, VR128X:$src), 0, "att">;
8235  def : InstAlias<OpcodeStr#"x\t{$src, $dst {${mask}} {z}|$dst {${mask}} {z}, $src}",
8236                  (!cast<Instruction>(NAME # "Z128rrkz") VR128X:$dst,
8237                  VK2WM:$mask, VR128X:$src), 0, "att">;
8238  def : InstAlias<OpcodeStr#"x\t{${src}{1to2}, $dst|$dst, ${src}{1to2}}",
8239                  (!cast<Instruction>(NAME # "Z128rmb") VR128X:$dst,
8240                  i64mem:$src), 0, "att">;
8241  def : InstAlias<OpcodeStr#"x\t{${src}{1to2}, $dst {${mask}}|"
8242                  "$dst {${mask}}, ${src}{1to2}}",
8243                  (!cast<Instruction>(NAME # "Z128rmbk") VR128X:$dst,
8244                  VK2WM:$mask, i64mem:$src), 0, "att">;
8245  def : InstAlias<OpcodeStr#"x\t{${src}{1to2}, $dst {${mask}} {z}|"
8246                  "$dst {${mask}} {z}, ${src}{1to2}}",
8247                  (!cast<Instruction>(NAME # "Z128rmbkz") VR128X:$dst,
8248                  VK2WM:$mask, i64mem:$src), 0, "att">;
8249
8250  def : InstAlias<OpcodeStr#"y\t{$src, $dst|$dst, $src}",
8251                  (!cast<Instruction>(NAME # "Z256rr") VR128X:$dst,
8252                  VR256X:$src), 0, "att">;
8253  def : InstAlias<OpcodeStr#"y\t{$src, $dst {${mask}}|"
8254                  "$dst {${mask}}, $src}",
8255                  (!cast<Instruction>(NAME # "Z256rrk") VR128X:$dst,
8256                  VK4WM:$mask, VR256X:$src), 0, "att">;
8257  def : InstAlias<OpcodeStr#"y\t{$src, $dst {${mask}} {z}|"
8258                  "$dst {${mask}} {z}, $src}",
8259                  (!cast<Instruction>(NAME # "Z256rrkz") VR128X:$dst,
8260                  VK4WM:$mask, VR256X:$src), 0, "att">;
8261  def : InstAlias<OpcodeStr#"y\t{${src}{1to4}, $dst|$dst, ${src}{1to4}}",
8262                  (!cast<Instruction>(NAME # "Z256rmb") VR128X:$dst,
8263                  i64mem:$src), 0, "att">;
8264  def : InstAlias<OpcodeStr#"y\t{${src}{1to4}, $dst {${mask}}|"
8265                  "$dst {${mask}}, ${src}{1to4}}",
8266                  (!cast<Instruction>(NAME # "Z256rmbk") VR128X:$dst,
8267                  VK4WM:$mask, i64mem:$src), 0, "att">;
8268  def : InstAlias<OpcodeStr#"y\t{${src}{1to4}, $dst {${mask}} {z}|"
8269                  "$dst {${mask}} {z}, ${src}{1to4}}",
8270                  (!cast<Instruction>(NAME # "Z256rmbkz") VR128X:$dst,
8271                  VK4WM:$mask, i64mem:$src), 0, "att">;
8272
8273  def : InstAlias<OpcodeStr#"z\t{$src, $dst|$dst, $src}",
8274                  (!cast<Instruction>(NAME # "Zrr") VR128X:$dst,
8275                  VR512:$src), 0, "att">;
8276  def : InstAlias<OpcodeStr#"z\t{$src, $dst {${mask}}|"
8277                  "$dst {${mask}}, $src}",
8278                  (!cast<Instruction>(NAME # "Zrrk") VR128X:$dst,
8279                  VK8WM:$mask, VR512:$src), 0, "att">;
8280  def : InstAlias<OpcodeStr#"z\t{$src, $dst {${mask}} {z}|"
8281                  "$dst {${mask}} {z}, $src}",
8282                  (!cast<Instruction>(NAME # "Zrrkz") VR128X:$dst,
8283                  VK8WM:$mask, VR512:$src), 0, "att">;
8284  def : InstAlias<OpcodeStr#"z\t{${src}{1to8}, $dst|$dst, ${src}{1to8}}",
8285                  (!cast<Instruction>(NAME # "Zrmb") VR128X:$dst,
8286                  i64mem:$src), 0, "att">;
8287  def : InstAlias<OpcodeStr#"z\t{${src}{1to8}, $dst {${mask}}|"
8288                  "$dst {${mask}}, ${src}{1to8}}",
8289                  (!cast<Instruction>(NAME # "Zrmbk") VR128X:$dst,
8290                  VK8WM:$mask, i64mem:$src), 0, "att">;
8291  def : InstAlias<OpcodeStr#"z\t{${src}{1to8}, $dst {${mask}} {z}|"
8292                  "$dst {${mask}} {z}, ${src}{1to8}}",
8293                  (!cast<Instruction>(NAME # "Zrmbkz") VR128X:$dst,
8294                  VK8WM:$mask, i64mem:$src), 0, "att">;
8295}
8296
8297defm VCVTPS2PHX : avx512_cvt_trunc<0x1D, "vcvtps2phx", avx512vl_f16_info,
8298                                   avx512vl_f32_info, SchedWriteCvtPD2PS,
8299                                   HasFP16>, T_MAP5PD, EVEX_CD8<32, CD8VF>;
8300defm VCVTPH2PSX : avx512_cvt_extend<0x13, "vcvtph2psx", avx512vl_f32_info,
8301                                    avx512vl_f16_info, SchedWriteCvtPS2PD,
8302                                    HasFP16>, T_MAP6PD, EVEX_CD8<16, CD8VH>;
8303defm VCVTPD2PH : avx512_cvtpd2ph<0x5A, "vcvtpd2ph", SchedWriteCvtPD2PS>,
8304                                 VEX_W, T_MAP5PD, EVEX_CD8<64, CD8VF>;
8305defm VCVTPH2PD : avx512_cvtph2pd<0x5A, "vcvtph2pd", SchedWriteCvtPS2PD>,
8306                                 T_MAP5PS, EVEX_CD8<16, CD8VQ>;
8307
8308let Predicates = [HasFP16, HasVLX] in {
8309  // Special patterns to allow use of X86vmfpround for masking. Instruction
8310  // patterns have been disabled with null_frag.
8311  def : Pat<(v8f16 (X86any_vfpround (v4f64 VR256X:$src))),
8312            (VCVTPD2PHZ256rr VR256X:$src)>;
8313  def : Pat<(v8f16 (X86vmfpround (v4f64 VR256X:$src), (v8f16 VR128X:$src0),
8314                          VK4WM:$mask)),
8315            (VCVTPD2PHZ256rrk VR128X:$src0, VK4WM:$mask, VR256X:$src)>;
8316  def : Pat<(X86vmfpround (v4f64 VR256X:$src), v8f16x_info.ImmAllZerosV,
8317                          VK4WM:$mask),
8318            (VCVTPD2PHZ256rrkz VK4WM:$mask, VR256X:$src)>;
8319
8320  def : Pat<(v8f16 (X86any_vfpround (loadv4f64 addr:$src))),
8321            (VCVTPD2PHZ256rm addr:$src)>;
8322  def : Pat<(X86vmfpround (loadv4f64 addr:$src), (v8f16 VR128X:$src0),
8323                          VK4WM:$mask),
8324            (VCVTPD2PHZ256rmk VR128X:$src0, VK4WM:$mask, addr:$src)>;
8325  def : Pat<(X86vmfpround (loadv4f64 addr:$src), v8f16x_info.ImmAllZerosV,
8326                          VK4WM:$mask),
8327            (VCVTPD2PHZ256rmkz VK4WM:$mask, addr:$src)>;
8328
8329  def : Pat<(v8f16 (X86any_vfpround (v4f64 (X86VBroadcastld64 addr:$src)))),
8330            (VCVTPD2PHZ256rmb addr:$src)>;
8331  def : Pat<(X86vmfpround (v4f64 (X86VBroadcastld64 addr:$src)),
8332                          (v8f16 VR128X:$src0), VK4WM:$mask),
8333            (VCVTPD2PHZ256rmbk VR128X:$src0, VK4WM:$mask, addr:$src)>;
8334  def : Pat<(X86vmfpround (v4f64 (X86VBroadcastld64 addr:$src)),
8335                          v8f16x_info.ImmAllZerosV, VK4WM:$mask),
8336            (VCVTPD2PHZ256rmbkz VK4WM:$mask, addr:$src)>;
8337
8338  def : Pat<(v8f16 (X86any_vfpround (v2f64 VR128X:$src))),
8339            (VCVTPD2PHZ128rr VR128X:$src)>;
8340  def : Pat<(X86vmfpround (v2f64 VR128X:$src), (v8f16 VR128X:$src0),
8341                          VK2WM:$mask),
8342            (VCVTPD2PHZ128rrk VR128X:$src0, VK2WM:$mask, VR128X:$src)>;
8343  def : Pat<(X86vmfpround (v2f64 VR128X:$src), v8f16x_info.ImmAllZerosV,
8344                          VK2WM:$mask),
8345            (VCVTPD2PHZ128rrkz VK2WM:$mask, VR128X:$src)>;
8346
8347  def : Pat<(v8f16 (X86any_vfpround (loadv2f64 addr:$src))),
8348            (VCVTPD2PHZ128rm addr:$src)>;
8349  def : Pat<(X86vmfpround (loadv2f64 addr:$src), (v8f16 VR128X:$src0),
8350                          VK2WM:$mask),
8351            (VCVTPD2PHZ128rmk VR128X:$src0, VK2WM:$mask, addr:$src)>;
8352  def : Pat<(X86vmfpround (loadv2f64 addr:$src), v8f16x_info.ImmAllZerosV,
8353                          VK2WM:$mask),
8354            (VCVTPD2PHZ128rmkz VK2WM:$mask, addr:$src)>;
8355
8356  def : Pat<(v8f16 (X86any_vfpround (v2f64 (X86VBroadcastld64 addr:$src)))),
8357            (VCVTPD2PHZ128rmb addr:$src)>;
8358  def : Pat<(X86vmfpround (v2f64 (X86VBroadcastld64 addr:$src)),
8359                          (v8f16 VR128X:$src0), VK2WM:$mask),
8360            (VCVTPD2PHZ128rmbk VR128X:$src0, VK2WM:$mask, addr:$src)>;
8361  def : Pat<(X86vmfpround (v2f64 (X86VBroadcastld64 addr:$src)),
8362                          v8f16x_info.ImmAllZerosV, VK2WM:$mask),
8363            (VCVTPD2PHZ128rmbkz VK2WM:$mask, addr:$src)>;
8364}
8365
8366// Convert Signed/Unsigned Doubleword to Double
8367let Uses = []<Register>, mayRaiseFPException = 0 in
8368multiclass avx512_cvtdq2pd<bits<8> opc, string OpcodeStr, SDPatternOperator OpNode,
8369                           SDNode MaskOpNode, SDPatternOperator OpNode128,
8370                           SDNode MaskOpNode128,
8371                           X86SchedWriteWidths sched> {
8372  // No rounding in this op
8373  let Predicates = [HasAVX512] in
8374    defm Z : avx512_vcvt_fp<opc, OpcodeStr, v8f64_info, v8i32x_info, OpNode,
8375                            MaskOpNode, sched.ZMM>, EVEX_V512;
8376
8377  let Predicates = [HasVLX] in {
8378    defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v2f64x_info, v4i32x_info,
8379                               OpNode128, MaskOpNode128, sched.XMM, "{1to2}",
8380                               "", i64mem, VK2WM,
8381                               (v2f64 (OpNode128 (bc_v4i32
8382                                (v2i64
8383                                 (scalar_to_vector (loadi64 addr:$src)))))),
8384                               (v2f64 (MaskOpNode128 (bc_v4i32
8385                                (v2i64
8386                                 (scalar_to_vector (loadi64 addr:$src))))))>,
8387                               EVEX_V128;
8388    defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v4f64x_info, v4i32x_info, OpNode,
8389                               MaskOpNode, sched.YMM>, EVEX_V256;
8390  }
8391}
8392
8393// Convert Signed/Unsigned Doubleword to Float
8394multiclass avx512_cvtdq2ps<bits<8> opc, string OpcodeStr, SDPatternOperator OpNode,
8395                           SDNode MaskOpNode, SDNode OpNodeRnd,
8396                           X86SchedWriteWidths sched> {
8397  let Predicates = [HasAVX512] in
8398    defm Z : avx512_vcvt_fp<opc, OpcodeStr, v16f32_info, v16i32_info, OpNode,
8399                            MaskOpNode, sched.ZMM>,
8400             avx512_vcvt_fp_rc<opc, OpcodeStr, v16f32_info, v16i32_info,
8401                               OpNodeRnd, sched.ZMM>, EVEX_V512;
8402
8403  let Predicates = [HasVLX] in {
8404    defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v4f32x_info, v4i32x_info, OpNode,
8405                               MaskOpNode, sched.XMM>, EVEX_V128;
8406    defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v8f32x_info, v8i32x_info, OpNode,
8407                               MaskOpNode, sched.YMM>, EVEX_V256;
8408  }
8409}
8410
8411// Convert Float to Signed/Unsigned Doubleword with truncation
8412multiclass avx512_cvttps2dq<bits<8> opc, string OpcodeStr, SDPatternOperator OpNode,
8413                            SDNode MaskOpNode,
8414                            SDNode OpNodeSAE, X86SchedWriteWidths sched> {
8415  let Predicates = [HasAVX512] in {
8416    defm Z : avx512_vcvt_fp<opc, OpcodeStr, v16i32_info, v16f32_info, OpNode,
8417                            MaskOpNode, sched.ZMM>,
8418             avx512_vcvt_fp_sae<opc, OpcodeStr, v16i32_info, v16f32_info,
8419                                OpNodeSAE, sched.ZMM>, EVEX_V512;
8420  }
8421  let Predicates = [HasVLX] in {
8422    defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v4i32x_info, v4f32x_info, OpNode,
8423                               MaskOpNode, sched.XMM>, EVEX_V128;
8424    defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v8i32x_info, v8f32x_info, OpNode,
8425                               MaskOpNode, sched.YMM>, EVEX_V256;
8426  }
8427}
8428
8429// Convert Float to Signed/Unsigned Doubleword
8430multiclass avx512_cvtps2dq<bits<8> opc, string OpcodeStr, SDNode OpNode,
8431                           SDNode MaskOpNode, SDNode OpNodeRnd,
8432                           X86SchedWriteWidths sched> {
8433  let Predicates = [HasAVX512] in {
8434    defm Z : avx512_vcvt_fp<opc, OpcodeStr, v16i32_info, v16f32_info, OpNode,
8435                            MaskOpNode, sched.ZMM>,
8436             avx512_vcvt_fp_rc<opc, OpcodeStr, v16i32_info, v16f32_info,
8437                                OpNodeRnd, sched.ZMM>, EVEX_V512;
8438  }
8439  let Predicates = [HasVLX] in {
8440    defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v4i32x_info, v4f32x_info, OpNode,
8441                               MaskOpNode, sched.XMM>, EVEX_V128;
8442    defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v8i32x_info, v8f32x_info, OpNode,
8443                               MaskOpNode, sched.YMM>, EVEX_V256;
8444  }
8445}
8446
8447// Convert Double to Signed/Unsigned Doubleword with truncation
8448multiclass avx512_cvttpd2dq<bits<8> opc, string OpcodeStr, SDPatternOperator OpNode,
8449                            SDNode MaskOpNode, SDNode OpNodeSAE,
8450                            X86SchedWriteWidths sched> {
8451  let Predicates = [HasAVX512] in {
8452    defm Z : avx512_vcvt_fp<opc, OpcodeStr, v8i32x_info, v8f64_info, OpNode,
8453                            MaskOpNode, sched.ZMM>,
8454             avx512_vcvt_fp_sae<opc, OpcodeStr, v8i32x_info, v8f64_info,
8455                                OpNodeSAE, sched.ZMM>, EVEX_V512;
8456  }
8457  let Predicates = [HasVLX] in {
8458    // we need "x"/"y" suffixes in order to distinguish between 128 and 256
8459    // memory forms of these instructions in Asm Parser. They have the same
8460    // dest type - 'v4i32x_info'. We also specify the broadcast string explicitly
8461    // due to the same reason.
8462    defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v4i32x_info, v2f64x_info,
8463                               null_frag, null_frag, sched.XMM, "{1to2}", "{x}", f128mem,
8464                               VK2WM>, EVEX_V128;
8465    defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v4i32x_info, v4f64x_info, OpNode,
8466                               MaskOpNode, sched.YMM, "{1to4}", "{y}">, EVEX_V256;
8467  }
8468
8469  def : InstAlias<OpcodeStr#"x\t{$src, $dst|$dst, $src}",
8470                  (!cast<Instruction>(NAME # "Z128rr") VR128X:$dst,
8471                  VR128X:$src), 0, "att">;
8472  def : InstAlias<OpcodeStr#"x\t{$src, $dst {${mask}}|$dst {${mask}}, $src}",
8473                  (!cast<Instruction>(NAME # "Z128rrk") VR128X:$dst,
8474                  VK2WM:$mask, VR128X:$src), 0, "att">;
8475  def : InstAlias<OpcodeStr#"x\t{$src, $dst {${mask}} {z}|$dst {${mask}} {z}, $src}",
8476                  (!cast<Instruction>(NAME # "Z128rrkz") VR128X:$dst,
8477                  VK2WM:$mask, VR128X:$src), 0, "att">;
8478  def : InstAlias<OpcodeStr#"x\t{${src}{1to2}, $dst|$dst, ${src}{1to2}}",
8479                  (!cast<Instruction>(NAME # "Z128rmb") VR128X:$dst,
8480                  f64mem:$src), 0, "att">;
8481  def : InstAlias<OpcodeStr#"x\t{${src}{1to2}, $dst {${mask}}|"
8482                  "$dst {${mask}}, ${src}{1to2}}",
8483                  (!cast<Instruction>(NAME # "Z128rmbk") VR128X:$dst,
8484                  VK2WM:$mask, f64mem:$src), 0, "att">;
8485  def : InstAlias<OpcodeStr#"x\t{${src}{1to2}, $dst {${mask}} {z}|"
8486                  "$dst {${mask}} {z}, ${src}{1to2}}",
8487                  (!cast<Instruction>(NAME # "Z128rmbkz") VR128X:$dst,
8488                  VK2WM:$mask, f64mem:$src), 0, "att">;
8489
8490  def : InstAlias<OpcodeStr#"y\t{$src, $dst|$dst, $src}",
8491                  (!cast<Instruction>(NAME # "Z256rr") VR128X:$dst,
8492                  VR256X:$src), 0, "att">;
8493  def : InstAlias<OpcodeStr#"y\t{$src, $dst {${mask}}|$dst {${mask}}, $src}",
8494                  (!cast<Instruction>(NAME # "Z256rrk") VR128X:$dst,
8495                  VK4WM:$mask, VR256X:$src), 0, "att">;
8496  def : InstAlias<OpcodeStr#"y\t{$src, $dst {${mask}} {z}|$dst {${mask}} {z}, $src}",
8497                  (!cast<Instruction>(NAME # "Z256rrkz") VR128X:$dst,
8498                  VK4WM:$mask, VR256X:$src), 0, "att">;
8499  def : InstAlias<OpcodeStr#"y\t{${src}{1to4}, $dst|$dst, ${src}{1to4}}",
8500                  (!cast<Instruction>(NAME # "Z256rmb") VR128X:$dst,
8501                  f64mem:$src), 0, "att">;
8502  def : InstAlias<OpcodeStr#"y\t{${src}{1to4}, $dst {${mask}}|"
8503                  "$dst {${mask}}, ${src}{1to4}}",
8504                  (!cast<Instruction>(NAME # "Z256rmbk") VR128X:$dst,
8505                  VK4WM:$mask, f64mem:$src), 0, "att">;
8506  def : InstAlias<OpcodeStr#"y\t{${src}{1to4}, $dst {${mask}} {z}|"
8507                  "$dst {${mask}} {z}, ${src}{1to4}}",
8508                  (!cast<Instruction>(NAME # "Z256rmbkz") VR128X:$dst,
8509                  VK4WM:$mask, f64mem:$src), 0, "att">;
8510}
8511
8512// Convert Double to Signed/Unsigned Doubleword
8513multiclass avx512_cvtpd2dq<bits<8> opc, string OpcodeStr, SDNode OpNode,
8514                           SDNode MaskOpNode, SDNode OpNodeRnd,
8515                           X86SchedWriteWidths sched> {
8516  let Predicates = [HasAVX512] in {
8517    defm Z : avx512_vcvt_fp<opc, OpcodeStr, v8i32x_info, v8f64_info, OpNode,
8518                            MaskOpNode, sched.ZMM>,
8519             avx512_vcvt_fp_rc<opc, OpcodeStr, v8i32x_info, v8f64_info,
8520                               OpNodeRnd, sched.ZMM>, EVEX_V512;
8521  }
8522  let Predicates = [HasVLX] in {
8523    // we need "x"/"y" suffixes in order to distinguish between 128 and 256
8524    // memory forms of these instructions in Asm Parcer. They have the same
8525    // dest type - 'v4i32x_info'. We also specify the broadcast string explicitly
8526    // due to the same reason.
8527    defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v4i32x_info, v2f64x_info,
8528                               null_frag, null_frag, sched.XMM, "{1to2}", "{x}", f128mem,
8529                               VK2WM>, EVEX_V128;
8530    defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v4i32x_info, v4f64x_info, OpNode,
8531                               MaskOpNode, sched.YMM, "{1to4}", "{y}">, EVEX_V256;
8532  }
8533
8534  def : InstAlias<OpcodeStr#"x\t{$src, $dst|$dst, $src}",
8535                  (!cast<Instruction>(NAME # "Z128rr") VR128X:$dst, VR128X:$src), 0, "att">;
8536  def : InstAlias<OpcodeStr#"x\t{$src, $dst {${mask}}|$dst {${mask}}, $src}",
8537                  (!cast<Instruction>(NAME # "Z128rrk") VR128X:$dst,
8538                  VK2WM:$mask, VR128X:$src), 0, "att">;
8539  def : InstAlias<OpcodeStr#"x\t{$src, $dst {${mask}} {z}|$dst {${mask}} {z}, $src}",
8540                  (!cast<Instruction>(NAME # "Z128rrkz") VR128X:$dst,
8541                  VK2WM:$mask, VR128X:$src), 0, "att">;
8542  def : InstAlias<OpcodeStr#"x\t{${src}{1to2}, $dst|$dst, ${src}{1to2}}",
8543                  (!cast<Instruction>(NAME # "Z128rmb") VR128X:$dst,
8544                  f64mem:$src), 0, "att">;
8545  def : InstAlias<OpcodeStr#"x\t{${src}{1to2}, $dst {${mask}}|"
8546                  "$dst {${mask}}, ${src}{1to2}}",
8547                  (!cast<Instruction>(NAME # "Z128rmbk") VR128X:$dst,
8548                  VK2WM:$mask, f64mem:$src), 0, "att">;
8549  def : InstAlias<OpcodeStr#"x\t{${src}{1to2}, $dst {${mask}} {z}|"
8550                  "$dst {${mask}} {z}, ${src}{1to2}}",
8551                  (!cast<Instruction>(NAME # "Z128rmbkz") VR128X:$dst,
8552                  VK2WM:$mask, f64mem:$src), 0, "att">;
8553
8554  def : InstAlias<OpcodeStr#"y\t{$src, $dst|$dst, $src}",
8555                  (!cast<Instruction>(NAME # "Z256rr") VR128X:$dst, VR256X:$src), 0, "att">;
8556  def : InstAlias<OpcodeStr#"y\t{$src, $dst {${mask}}|$dst {${mask}}, $src}",
8557                  (!cast<Instruction>(NAME # "Z256rrk") VR128X:$dst,
8558                  VK4WM:$mask, VR256X:$src), 0, "att">;
8559  def : InstAlias<OpcodeStr#"y\t{$src, $dst {${mask}} {z}|$dst {${mask}} {z}, $src}",
8560                  (!cast<Instruction>(NAME # "Z256rrkz") VR128X:$dst,
8561                  VK4WM:$mask, VR256X:$src), 0, "att">;
8562  def : InstAlias<OpcodeStr#"y\t{${src}{1to4}, $dst|$dst, ${src}{1to4}}",
8563                  (!cast<Instruction>(NAME # "Z256rmb") VR128X:$dst,
8564                  f64mem:$src), 0, "att">;
8565  def : InstAlias<OpcodeStr#"y\t{${src}{1to4}, $dst {${mask}}|"
8566                  "$dst {${mask}}, ${src}{1to4}}",
8567                  (!cast<Instruction>(NAME # "Z256rmbk") VR128X:$dst,
8568                  VK4WM:$mask, f64mem:$src), 0, "att">;
8569  def : InstAlias<OpcodeStr#"y\t{${src}{1to4}, $dst {${mask}} {z}|"
8570                  "$dst {${mask}} {z}, ${src}{1to4}}",
8571                  (!cast<Instruction>(NAME # "Z256rmbkz") VR128X:$dst,
8572                  VK4WM:$mask, f64mem:$src), 0, "att">;
8573}
8574
8575// Convert Double to Signed/Unsigned Quardword
8576multiclass avx512_cvtpd2qq<bits<8> opc, string OpcodeStr, SDNode OpNode,
8577                           SDNode MaskOpNode, SDNode OpNodeRnd,
8578                           X86SchedWriteWidths sched> {
8579  let Predicates = [HasDQI] in {
8580    defm Z : avx512_vcvt_fp<opc, OpcodeStr, v8i64_info, v8f64_info, OpNode,
8581                            MaskOpNode, sched.ZMM>,
8582             avx512_vcvt_fp_rc<opc, OpcodeStr, v8i64_info, v8f64_info,
8583                               OpNodeRnd, sched.ZMM>, EVEX_V512;
8584  }
8585  let Predicates = [HasDQI, HasVLX] in {
8586    defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v2i64x_info, v2f64x_info, OpNode,
8587                               MaskOpNode, sched.XMM>, EVEX_V128;
8588    defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v4i64x_info, v4f64x_info, OpNode,
8589                               MaskOpNode, sched.YMM>, EVEX_V256;
8590  }
8591}
8592
8593// Convert Double to Signed/Unsigned Quardword with truncation
8594multiclass avx512_cvttpd2qq<bits<8> opc, string OpcodeStr, SDPatternOperator OpNode,
8595                            SDNode MaskOpNode, SDNode OpNodeRnd,
8596                            X86SchedWriteWidths sched> {
8597  let Predicates = [HasDQI] in {
8598    defm Z : avx512_vcvt_fp<opc, OpcodeStr, v8i64_info, v8f64_info, OpNode,
8599                            MaskOpNode, sched.ZMM>,
8600             avx512_vcvt_fp_sae<opc, OpcodeStr, v8i64_info, v8f64_info,
8601                                OpNodeRnd, sched.ZMM>, EVEX_V512;
8602  }
8603  let Predicates = [HasDQI, HasVLX] in {
8604    defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v2i64x_info, v2f64x_info, OpNode,
8605                               MaskOpNode, sched.XMM>, EVEX_V128;
8606    defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v4i64x_info, v4f64x_info, OpNode,
8607                               MaskOpNode, sched.YMM>, EVEX_V256;
8608  }
8609}
8610
8611// Convert Signed/Unsigned Quardword to Double
8612multiclass avx512_cvtqq2pd<bits<8> opc, string OpcodeStr, SDPatternOperator OpNode,
8613                           SDNode MaskOpNode, SDNode OpNodeRnd,
8614                           X86SchedWriteWidths sched> {
8615  let Predicates = [HasDQI] in {
8616    defm Z : avx512_vcvt_fp<opc, OpcodeStr, v8f64_info, v8i64_info, OpNode,
8617                            MaskOpNode, sched.ZMM>,
8618             avx512_vcvt_fp_rc<opc, OpcodeStr, v8f64_info, v8i64_info,
8619                               OpNodeRnd, sched.ZMM>, EVEX_V512;
8620  }
8621  let Predicates = [HasDQI, HasVLX] in {
8622    defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v2f64x_info, v2i64x_info, OpNode,
8623                               MaskOpNode, sched.XMM>, EVEX_V128, NotEVEX2VEXConvertible;
8624    defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v4f64x_info, v4i64x_info, OpNode,
8625                               MaskOpNode, sched.YMM>, EVEX_V256, NotEVEX2VEXConvertible;
8626  }
8627}
8628
8629// Convert Float to Signed/Unsigned Quardword
8630multiclass avx512_cvtps2qq<bits<8> opc, string OpcodeStr, SDNode OpNode,
8631                           SDNode MaskOpNode, SDNode OpNodeRnd,
8632                           X86SchedWriteWidths sched> {
8633  let Predicates = [HasDQI] in {
8634    defm Z : avx512_vcvt_fp<opc, OpcodeStr, v8i64_info, v8f32x_info, OpNode,
8635                            MaskOpNode, sched.ZMM>,
8636             avx512_vcvt_fp_rc<opc, OpcodeStr, v8i64_info, v8f32x_info,
8637                               OpNodeRnd, sched.ZMM>, EVEX_V512;
8638  }
8639  let Predicates = [HasDQI, HasVLX] in {
8640    // Explicitly specified broadcast string, since we take only 2 elements
8641    // from v4f32x_info source
8642    defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v2i64x_info, v4f32x_info, OpNode,
8643                               MaskOpNode, sched.XMM, "{1to2}", "", f64mem, VK2WM,
8644                               (v2i64 (OpNode (bc_v4f32
8645                                (v2f64
8646                                 (scalar_to_vector (loadf64 addr:$src)))))),
8647                               (v2i64 (MaskOpNode (bc_v4f32
8648                                (v2f64
8649                                 (scalar_to_vector (loadf64 addr:$src))))))>,
8650                               EVEX_V128;
8651    defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v4i64x_info, v4f32x_info, OpNode,
8652                               MaskOpNode, sched.YMM>, EVEX_V256;
8653  }
8654}
8655
8656// Convert Float to Signed/Unsigned Quardword with truncation
8657multiclass avx512_cvttps2qq<bits<8> opc, string OpcodeStr, SDPatternOperator OpNode,
8658                            SDNode MaskOpNode, SDNode OpNodeRnd,
8659                            X86SchedWriteWidths sched> {
8660  let Predicates = [HasDQI] in {
8661    defm Z : avx512_vcvt_fp<opc, OpcodeStr, v8i64_info, v8f32x_info, OpNode,
8662                            MaskOpNode, sched.ZMM>,
8663             avx512_vcvt_fp_sae<opc, OpcodeStr, v8i64_info, v8f32x_info,
8664                                OpNodeRnd, sched.ZMM>, EVEX_V512;
8665  }
8666  let Predicates = [HasDQI, HasVLX] in {
8667    // Explicitly specified broadcast string, since we take only 2 elements
8668    // from v4f32x_info source
8669    defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v2i64x_info, v4f32x_info, OpNode,
8670                               MaskOpNode, sched.XMM, "{1to2}", "", f64mem, VK2WM,
8671                               (v2i64 (OpNode (bc_v4f32
8672                                (v2f64
8673                                 (scalar_to_vector (loadf64 addr:$src)))))),
8674                               (v2i64 (MaskOpNode (bc_v4f32
8675                                (v2f64
8676                                 (scalar_to_vector (loadf64 addr:$src))))))>,
8677                               EVEX_V128;
8678    defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v4i64x_info, v4f32x_info, OpNode,
8679                               MaskOpNode, sched.YMM>, EVEX_V256;
8680  }
8681}
8682
8683// Convert Signed/Unsigned Quardword to Float
8684// Also Convert Signed/Unsigned Doubleword to Half
8685multiclass avx512_cvtqq2ps_dq2ph<bits<8> opc, string OpcodeStr, SDPatternOperator OpNode,
8686                                 SDPatternOperator MaskOpNode, SDPatternOperator OpNode128,
8687                                 SDPatternOperator OpNode128M, SDPatternOperator OpNodeRnd,
8688                                 AVX512VLVectorVTInfo _dst, AVX512VLVectorVTInfo _src,
8689                                 X86SchedWriteWidths sched, Predicate prd = HasDQI> {
8690  let Predicates = [prd] in {
8691    defm Z : avx512_vcvt_fp<opc, OpcodeStr, _dst.info256, _src.info512, OpNode,
8692                            MaskOpNode, sched.ZMM>,
8693             avx512_vcvt_fp_rc<opc, OpcodeStr, _dst.info256, _src.info512,
8694                               OpNodeRnd, sched.ZMM>, EVEX_V512;
8695  }
8696  let Predicates = [prd, HasVLX] in {
8697    // we need "x"/"y" suffixes in order to distinguish between 128 and 256
8698    // memory forms of these instructions in Asm Parcer. They have the same
8699    // dest type - 'v4i32x_info'. We also specify the broadcast string explicitly
8700    // due to the same reason.
8701    defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, _dst.info128, _src.info128, null_frag,
8702                               null_frag, sched.XMM, _src.info128.BroadcastStr,
8703                               "{x}", i128mem, _src.info128.KRCWM>,
8704                               EVEX_V128, NotEVEX2VEXConvertible;
8705    defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, _dst.info128, _src.info256, OpNode,
8706                               MaskOpNode, sched.YMM, _src.info256.BroadcastStr,
8707                               "{y}">, EVEX_V256,
8708                               NotEVEX2VEXConvertible;
8709
8710    // Special patterns to allow use of X86VM[SU]intToFP for masking. Instruction
8711    // patterns have been disabled with null_frag.
8712    def : Pat<(_dst.info128.VT (OpNode128 (_src.info128.VT VR128X:$src))),
8713              (!cast<Instruction>(NAME # "Z128rr") VR128X:$src)>;
8714    def : Pat<(OpNode128M (_src.info128.VT VR128X:$src), (_dst.info128.VT VR128X:$src0),
8715                             _src.info128.KRCWM:$mask),
8716              (!cast<Instruction>(NAME # "Z128rrk") VR128X:$src0, _src.info128.KRCWM:$mask, VR128X:$src)>;
8717    def : Pat<(OpNode128M (_src.info128.VT VR128X:$src), _dst.info128.ImmAllZerosV,
8718                             _src.info128.KRCWM:$mask),
8719              (!cast<Instruction>(NAME # "Z128rrkz") _src.info128.KRCWM:$mask, VR128X:$src)>;
8720
8721    def : Pat<(_dst.info128.VT (OpNode128 (_src.info128.LdFrag addr:$src))),
8722              (!cast<Instruction>(NAME # "Z128rm") addr:$src)>;
8723    def : Pat<(OpNode128M (_src.info128.LdFrag addr:$src), (_dst.info128.VT VR128X:$src0),
8724                             _src.info128.KRCWM:$mask),
8725              (!cast<Instruction>(NAME # "Z128rmk") VR128X:$src0, _src.info128.KRCWM:$mask, addr:$src)>;
8726    def : Pat<(OpNode128M (_src.info128.LdFrag addr:$src), _dst.info128.ImmAllZerosV,
8727                             _src.info128.KRCWM:$mask),
8728              (!cast<Instruction>(NAME # "Z128rmkz") _src.info128.KRCWM:$mask, addr:$src)>;
8729
8730    def : Pat<(_dst.info128.VT (OpNode128 (_src.info128.VT (X86VBroadcastld64 addr:$src)))),
8731              (!cast<Instruction>(NAME # "Z128rmb") addr:$src)>;
8732    def : Pat<(OpNode128M (_src.info128.VT (X86VBroadcastld64 addr:$src)),
8733                             (_dst.info128.VT VR128X:$src0), _src.info128.KRCWM:$mask),
8734              (!cast<Instruction>(NAME # "Z128rmbk") VR128X:$src0, _src.info128.KRCWM:$mask, addr:$src)>;
8735    def : Pat<(OpNode128M (_src.info128.VT (X86VBroadcastld64 addr:$src)),
8736                             _dst.info128.ImmAllZerosV, _src.info128.KRCWM:$mask),
8737              (!cast<Instruction>(NAME # "Z128rmbkz") _src.info128.KRCWM:$mask, addr:$src)>;
8738  }
8739
8740  def : InstAlias<OpcodeStr#"x\t{$src, $dst|$dst, $src}",
8741                  (!cast<Instruction>(NAME # "Z128rr") VR128X:$dst,
8742                  VR128X:$src), 0, "att">;
8743  def : InstAlias<OpcodeStr#"x\t{$src, $dst {${mask}}|$dst {${mask}}, $src}",
8744                  (!cast<Instruction>(NAME # "Z128rrk") VR128X:$dst,
8745                  VK2WM:$mask, VR128X:$src), 0, "att">;
8746  def : InstAlias<OpcodeStr#"x\t{$src, $dst {${mask}} {z}|$dst {${mask}} {z}, $src}",
8747                  (!cast<Instruction>(NAME # "Z128rrkz") VR128X:$dst,
8748                  VK2WM:$mask, VR128X:$src), 0, "att">;
8749  def : InstAlias<OpcodeStr#"x\t{${src}{1to2}, $dst|$dst, ${src}{1to2}}",
8750                  (!cast<Instruction>(NAME # "Z128rmb") VR128X:$dst,
8751                  i64mem:$src), 0, "att">;
8752  def : InstAlias<OpcodeStr#"x\t{${src}{1to2}, $dst {${mask}}|"
8753                  "$dst {${mask}}, ${src}{1to2}}",
8754                  (!cast<Instruction>(NAME # "Z128rmbk") VR128X:$dst,
8755                  VK2WM:$mask, i64mem:$src), 0, "att">;
8756  def : InstAlias<OpcodeStr#"x\t{${src}{1to2}, $dst {${mask}} {z}|"
8757                  "$dst {${mask}} {z}, ${src}{1to2}}",
8758                  (!cast<Instruction>(NAME # "Z128rmbkz") VR128X:$dst,
8759                  VK2WM:$mask, i64mem:$src), 0, "att">;
8760
8761  def : InstAlias<OpcodeStr#"y\t{$src, $dst|$dst, $src}",
8762                  (!cast<Instruction>(NAME # "Z256rr") VR128X:$dst,
8763                  VR256X:$src), 0, "att">;
8764  def : InstAlias<OpcodeStr#"y\t{$src, $dst {${mask}}|"
8765                  "$dst {${mask}}, $src}",
8766                  (!cast<Instruction>(NAME # "Z256rrk") VR128X:$dst,
8767                  VK4WM:$mask, VR256X:$src), 0, "att">;
8768  def : InstAlias<OpcodeStr#"y\t{$src, $dst {${mask}} {z}|"
8769                  "$dst {${mask}} {z}, $src}",
8770                  (!cast<Instruction>(NAME # "Z256rrkz") VR128X:$dst,
8771                  VK4WM:$mask, VR256X:$src), 0, "att">;
8772  def : InstAlias<OpcodeStr#"y\t{${src}{1to4}, $dst|$dst, ${src}{1to4}}",
8773                  (!cast<Instruction>(NAME # "Z256rmb") VR128X:$dst,
8774                  i64mem:$src), 0, "att">;
8775  def : InstAlias<OpcodeStr#"y\t{${src}{1to4}, $dst {${mask}}|"
8776                  "$dst {${mask}}, ${src}{1to4}}",
8777                  (!cast<Instruction>(NAME # "Z256rmbk") VR128X:$dst,
8778                  VK4WM:$mask, i64mem:$src), 0, "att">;
8779  def : InstAlias<OpcodeStr#"y\t{${src}{1to4}, $dst {${mask}} {z}|"
8780                  "$dst {${mask}} {z}, ${src}{1to4}}",
8781                  (!cast<Instruction>(NAME # "Z256rmbkz") VR128X:$dst,
8782                  VK4WM:$mask, i64mem:$src), 0, "att">;
8783}
8784
8785defm VCVTDQ2PD : avx512_cvtdq2pd<0xE6, "vcvtdq2pd", any_sint_to_fp, sint_to_fp,
8786                                 X86any_VSintToFP, X86VSintToFP,
8787                                 SchedWriteCvtDQ2PD>, XS, EVEX_CD8<32, CD8VH>;
8788
8789defm VCVTDQ2PS : avx512_cvtdq2ps<0x5B, "vcvtdq2ps", any_sint_to_fp, sint_to_fp,
8790                                X86VSintToFpRnd, SchedWriteCvtDQ2PS>,
8791                                PS, EVEX_CD8<32, CD8VF>;
8792
8793defm VCVTTPS2DQ : avx512_cvttps2dq<0x5B, "vcvttps2dq", X86any_cvttp2si,
8794                                 X86cvttp2si, X86cvttp2siSAE,
8795                                 SchedWriteCvtPS2DQ>, XS, EVEX_CD8<32, CD8VF>;
8796
8797defm VCVTTPD2DQ : avx512_cvttpd2dq<0xE6, "vcvttpd2dq", X86any_cvttp2si,
8798                                 X86cvttp2si, X86cvttp2siSAE,
8799                                 SchedWriteCvtPD2DQ>,
8800                                 PD, VEX_W, EVEX_CD8<64, CD8VF>;
8801
8802defm VCVTTPS2UDQ : avx512_cvttps2dq<0x78, "vcvttps2udq", X86any_cvttp2ui,
8803                                 X86cvttp2ui, X86cvttp2uiSAE,
8804                                 SchedWriteCvtPS2DQ>, PS, EVEX_CD8<32, CD8VF>;
8805
8806defm VCVTTPD2UDQ : avx512_cvttpd2dq<0x78, "vcvttpd2udq", X86any_cvttp2ui,
8807                                 X86cvttp2ui, X86cvttp2uiSAE,
8808                                 SchedWriteCvtPD2DQ>,
8809                                 PS, VEX_W, EVEX_CD8<64, CD8VF>;
8810
8811defm VCVTUDQ2PD : avx512_cvtdq2pd<0x7A, "vcvtudq2pd", any_uint_to_fp,
8812                                  uint_to_fp, X86any_VUintToFP, X86VUintToFP,
8813                                  SchedWriteCvtDQ2PD>, XS, EVEX_CD8<32, CD8VH>;
8814
8815defm VCVTUDQ2PS : avx512_cvtdq2ps<0x7A, "vcvtudq2ps", any_uint_to_fp,
8816                                 uint_to_fp, X86VUintToFpRnd,
8817                                 SchedWriteCvtDQ2PS>, XD, EVEX_CD8<32, CD8VF>;
8818
8819defm VCVTPS2DQ : avx512_cvtps2dq<0x5B, "vcvtps2dq", X86cvtp2Int, X86cvtp2Int,
8820                                 X86cvtp2IntRnd, SchedWriteCvtPS2DQ>, PD,
8821                                 EVEX_CD8<32, CD8VF>;
8822
8823defm VCVTPD2DQ : avx512_cvtpd2dq<0xE6, "vcvtpd2dq", X86cvtp2Int, X86cvtp2Int,
8824                                 X86cvtp2IntRnd, SchedWriteCvtPD2DQ>, XD,
8825                                 VEX_W, EVEX_CD8<64, CD8VF>;
8826
8827defm VCVTPS2UDQ : avx512_cvtps2dq<0x79, "vcvtps2udq", X86cvtp2UInt, X86cvtp2UInt,
8828                                 X86cvtp2UIntRnd, SchedWriteCvtPS2DQ>,
8829                                 PS, EVEX_CD8<32, CD8VF>;
8830
8831defm VCVTPD2UDQ : avx512_cvtpd2dq<0x79, "vcvtpd2udq", X86cvtp2UInt, X86cvtp2UInt,
8832                                 X86cvtp2UIntRnd, SchedWriteCvtPD2DQ>, VEX_W,
8833                                 PS, EVEX_CD8<64, CD8VF>;
8834
8835defm VCVTPD2QQ : avx512_cvtpd2qq<0x7B, "vcvtpd2qq", X86cvtp2Int, X86cvtp2Int,
8836                                 X86cvtp2IntRnd, SchedWriteCvtPD2DQ>, VEX_W,
8837                                 PD, EVEX_CD8<64, CD8VF>;
8838
8839defm VCVTPS2QQ : avx512_cvtps2qq<0x7B, "vcvtps2qq", X86cvtp2Int, X86cvtp2Int,
8840                                 X86cvtp2IntRnd, SchedWriteCvtPS2DQ>, PD,
8841                                 EVEX_CD8<32, CD8VH>;
8842
8843defm VCVTPD2UQQ : avx512_cvtpd2qq<0x79, "vcvtpd2uqq", X86cvtp2UInt, X86cvtp2UInt,
8844                                 X86cvtp2UIntRnd, SchedWriteCvtPD2DQ>, VEX_W,
8845                                 PD, EVEX_CD8<64, CD8VF>;
8846
8847defm VCVTPS2UQQ : avx512_cvtps2qq<0x79, "vcvtps2uqq", X86cvtp2UInt, X86cvtp2UInt,
8848                                 X86cvtp2UIntRnd, SchedWriteCvtPS2DQ>, PD,
8849                                 EVEX_CD8<32, CD8VH>;
8850
8851defm VCVTTPD2QQ : avx512_cvttpd2qq<0x7A, "vcvttpd2qq", X86any_cvttp2si,
8852                                 X86cvttp2si, X86cvttp2siSAE,
8853                                 SchedWriteCvtPD2DQ>, VEX_W,
8854                                 PD, EVEX_CD8<64, CD8VF>;
8855
8856defm VCVTTPS2QQ : avx512_cvttps2qq<0x7A, "vcvttps2qq", X86any_cvttp2si,
8857                                 X86cvttp2si, X86cvttp2siSAE,
8858                                 SchedWriteCvtPS2DQ>, PD,
8859                                 EVEX_CD8<32, CD8VH>;
8860
8861defm VCVTTPD2UQQ : avx512_cvttpd2qq<0x78, "vcvttpd2uqq", X86any_cvttp2ui,
8862                                 X86cvttp2ui, X86cvttp2uiSAE,
8863                                 SchedWriteCvtPD2DQ>, VEX_W,
8864                                 PD, EVEX_CD8<64, CD8VF>;
8865
8866defm VCVTTPS2UQQ : avx512_cvttps2qq<0x78, "vcvttps2uqq", X86any_cvttp2ui,
8867                                 X86cvttp2ui, X86cvttp2uiSAE,
8868                                 SchedWriteCvtPS2DQ>, PD,
8869                                 EVEX_CD8<32, CD8VH>;
8870
8871defm VCVTQQ2PD : avx512_cvtqq2pd<0xE6, "vcvtqq2pd", any_sint_to_fp,
8872                            sint_to_fp, X86VSintToFpRnd,
8873                            SchedWriteCvtDQ2PD>, VEX_W, XS, EVEX_CD8<64, CD8VF>;
8874
8875defm VCVTUQQ2PD : avx512_cvtqq2pd<0x7A, "vcvtuqq2pd", any_uint_to_fp,
8876                            uint_to_fp, X86VUintToFpRnd, SchedWriteCvtDQ2PD>,
8877                            VEX_W, XS, EVEX_CD8<64, CD8VF>;
8878
8879defm VCVTDQ2PH : avx512_cvtqq2ps_dq2ph<0x5B, "vcvtdq2ph", any_sint_to_fp, sint_to_fp,
8880                            X86any_VSintToFP, X86VMSintToFP,
8881                            X86VSintToFpRnd, avx512vl_f16_info, avx512vl_i32_info,
8882                            SchedWriteCvtDQ2PS, HasFP16>,
8883                            T_MAP5PS, EVEX_CD8<32, CD8VF>;
8884
8885defm VCVTUDQ2PH : avx512_cvtqq2ps_dq2ph<0x7A, "vcvtudq2ph", any_uint_to_fp, uint_to_fp,
8886                            X86any_VUintToFP, X86VMUintToFP,
8887                            X86VUintToFpRnd, avx512vl_f16_info, avx512vl_i32_info,
8888                            SchedWriteCvtDQ2PS, HasFP16>, T_MAP5XD,
8889                            EVEX_CD8<32, CD8VF>;
8890
8891defm VCVTQQ2PS : avx512_cvtqq2ps_dq2ph<0x5B, "vcvtqq2ps", any_sint_to_fp, sint_to_fp,
8892                            X86any_VSintToFP, X86VMSintToFP,
8893                            X86VSintToFpRnd, avx512vl_f32_info, avx512vl_i64_info,
8894                            SchedWriteCvtDQ2PS>, VEX_W, PS,
8895                            EVEX_CD8<64, CD8VF>;
8896
8897defm VCVTUQQ2PS : avx512_cvtqq2ps_dq2ph<0x7A, "vcvtuqq2ps", any_uint_to_fp, uint_to_fp,
8898                            X86any_VUintToFP, X86VMUintToFP,
8899                            X86VUintToFpRnd, avx512vl_f32_info, avx512vl_i64_info,
8900                            SchedWriteCvtDQ2PS>, VEX_W, XD,
8901                            EVEX_CD8<64, CD8VF>;
8902
8903let Predicates = [HasVLX] in {
8904  // Special patterns to allow use of X86mcvtp2Int for masking. Instruction
8905  // patterns have been disabled with null_frag.
8906  def : Pat<(v4i32 (X86cvtp2Int (v2f64 VR128X:$src))),
8907            (VCVTPD2DQZ128rr VR128X:$src)>;
8908  def : Pat<(X86mcvtp2Int (v2f64 VR128X:$src), (v4i32 VR128X:$src0),
8909                          VK2WM:$mask),
8910            (VCVTPD2DQZ128rrk VR128X:$src0, VK2WM:$mask, VR128X:$src)>;
8911  def : Pat<(X86mcvtp2Int (v2f64 VR128X:$src), v4i32x_info.ImmAllZerosV,
8912                          VK2WM:$mask),
8913            (VCVTPD2DQZ128rrkz VK2WM:$mask, VR128X:$src)>;
8914
8915  def : Pat<(v4i32 (X86cvtp2Int (loadv2f64 addr:$src))),
8916            (VCVTPD2DQZ128rm addr:$src)>;
8917  def : Pat<(X86mcvtp2Int (loadv2f64 addr:$src), (v4i32 VR128X:$src0),
8918                          VK2WM:$mask),
8919            (VCVTPD2DQZ128rmk VR128X:$src0, VK2WM:$mask, addr:$src)>;
8920  def : Pat<(X86mcvtp2Int (loadv2f64 addr:$src), v4i32x_info.ImmAllZerosV,
8921                          VK2WM:$mask),
8922            (VCVTPD2DQZ128rmkz VK2WM:$mask, addr:$src)>;
8923
8924  def : Pat<(v4i32 (X86cvtp2Int (v2f64 (X86VBroadcastld64 addr:$src)))),
8925            (VCVTPD2DQZ128rmb addr:$src)>;
8926  def : Pat<(X86mcvtp2Int (v2f64 (X86VBroadcastld64 addr:$src)),
8927                          (v4i32 VR128X:$src0), VK2WM:$mask),
8928            (VCVTPD2DQZ128rmbk VR128X:$src0, VK2WM:$mask, addr:$src)>;
8929  def : Pat<(X86mcvtp2Int (v2f64 (X86VBroadcastld64 addr:$src)),
8930                          v4i32x_info.ImmAllZerosV, VK2WM:$mask),
8931            (VCVTPD2DQZ128rmbkz VK2WM:$mask, addr:$src)>;
8932
8933  // Special patterns to allow use of X86mcvttp2si for masking. Instruction
8934  // patterns have been disabled with null_frag.
8935  def : Pat<(v4i32 (X86any_cvttp2si (v2f64 VR128X:$src))),
8936            (VCVTTPD2DQZ128rr VR128X:$src)>;
8937  def : Pat<(X86mcvttp2si (v2f64 VR128X:$src), (v4i32 VR128X:$src0),
8938                          VK2WM:$mask),
8939            (VCVTTPD2DQZ128rrk VR128X:$src0, VK2WM:$mask, VR128X:$src)>;
8940  def : Pat<(X86mcvttp2si (v2f64 VR128X:$src), v4i32x_info.ImmAllZerosV,
8941                          VK2WM:$mask),
8942            (VCVTTPD2DQZ128rrkz VK2WM:$mask, VR128X:$src)>;
8943
8944  def : Pat<(v4i32 (X86any_cvttp2si (loadv2f64 addr:$src))),
8945            (VCVTTPD2DQZ128rm addr:$src)>;
8946  def : Pat<(X86mcvttp2si (loadv2f64 addr:$src), (v4i32 VR128X:$src0),
8947                          VK2WM:$mask),
8948            (VCVTTPD2DQZ128rmk VR128X:$src0, VK2WM:$mask, addr:$src)>;
8949  def : Pat<(X86mcvttp2si (loadv2f64 addr:$src), v4i32x_info.ImmAllZerosV,
8950                          VK2WM:$mask),
8951            (VCVTTPD2DQZ128rmkz VK2WM:$mask, addr:$src)>;
8952
8953  def : Pat<(v4i32 (X86any_cvttp2si (v2f64 (X86VBroadcastld64 addr:$src)))),
8954            (VCVTTPD2DQZ128rmb addr:$src)>;
8955  def : Pat<(X86mcvttp2si (v2f64 (X86VBroadcastld64 addr:$src)),
8956                          (v4i32 VR128X:$src0), VK2WM:$mask),
8957            (VCVTTPD2DQZ128rmbk VR128X:$src0, VK2WM:$mask, addr:$src)>;
8958  def : Pat<(X86mcvttp2si (v2f64 (X86VBroadcastld64 addr:$src)),
8959                          v4i32x_info.ImmAllZerosV, VK2WM:$mask),
8960            (VCVTTPD2DQZ128rmbkz VK2WM:$mask, addr:$src)>;
8961
8962  // Special patterns to allow use of X86mcvtp2UInt for masking. Instruction
8963  // patterns have been disabled with null_frag.
8964  def : Pat<(v4i32 (X86cvtp2UInt (v2f64 VR128X:$src))),
8965            (VCVTPD2UDQZ128rr VR128X:$src)>;
8966  def : Pat<(X86mcvtp2UInt (v2f64 VR128X:$src), (v4i32 VR128X:$src0),
8967                           VK2WM:$mask),
8968            (VCVTPD2UDQZ128rrk VR128X:$src0, VK2WM:$mask, VR128X:$src)>;
8969  def : Pat<(X86mcvtp2UInt (v2f64 VR128X:$src), v4i32x_info.ImmAllZerosV,
8970                           VK2WM:$mask),
8971            (VCVTPD2UDQZ128rrkz VK2WM:$mask, VR128X:$src)>;
8972
8973  def : Pat<(v4i32 (X86cvtp2UInt (loadv2f64 addr:$src))),
8974            (VCVTPD2UDQZ128rm addr:$src)>;
8975  def : Pat<(X86mcvtp2UInt (loadv2f64 addr:$src), (v4i32 VR128X:$src0),
8976                           VK2WM:$mask),
8977            (VCVTPD2UDQZ128rmk VR128X:$src0, VK2WM:$mask, addr:$src)>;
8978  def : Pat<(X86mcvtp2UInt (loadv2f64 addr:$src), v4i32x_info.ImmAllZerosV,
8979                           VK2WM:$mask),
8980            (VCVTPD2UDQZ128rmkz VK2WM:$mask, addr:$src)>;
8981
8982  def : Pat<(v4i32 (X86cvtp2UInt (v2f64 (X86VBroadcastld64 addr:$src)))),
8983            (VCVTPD2UDQZ128rmb addr:$src)>;
8984  def : Pat<(X86mcvtp2UInt (v2f64 (X86VBroadcastld64 addr:$src)),
8985                           (v4i32 VR128X:$src0), VK2WM:$mask),
8986            (VCVTPD2UDQZ128rmbk VR128X:$src0, VK2WM:$mask, addr:$src)>;
8987  def : Pat<(X86mcvtp2UInt (v2f64 (X86VBroadcastld64 addr:$src)),
8988                           v4i32x_info.ImmAllZerosV, VK2WM:$mask),
8989            (VCVTPD2UDQZ128rmbkz VK2WM:$mask, addr:$src)>;
8990
8991  // Special patterns to allow use of X86mcvtp2UInt for masking. Instruction
8992  // patterns have been disabled with null_frag.
8993  def : Pat<(v4i32 (X86any_cvttp2ui (v2f64 VR128X:$src))),
8994            (VCVTTPD2UDQZ128rr VR128X:$src)>;
8995  def : Pat<(X86mcvttp2ui (v2f64 VR128X:$src), (v4i32 VR128X:$src0),
8996                          VK2WM:$mask),
8997            (VCVTTPD2UDQZ128rrk VR128X:$src0, VK2WM:$mask, VR128X:$src)>;
8998  def : Pat<(X86mcvttp2ui (v2f64 VR128X:$src), v4i32x_info.ImmAllZerosV,
8999                          VK2WM:$mask),
9000            (VCVTTPD2UDQZ128rrkz VK2WM:$mask, VR128X:$src)>;
9001
9002  def : Pat<(v4i32 (X86any_cvttp2ui (loadv2f64 addr:$src))),
9003            (VCVTTPD2UDQZ128rm addr:$src)>;
9004  def : Pat<(X86mcvttp2ui (loadv2f64 addr:$src), (v4i32 VR128X:$src0),
9005                          VK2WM:$mask),
9006            (VCVTTPD2UDQZ128rmk VR128X:$src0, VK2WM:$mask, addr:$src)>;
9007  def : Pat<(X86mcvttp2ui (loadv2f64 addr:$src), v4i32x_info.ImmAllZerosV,
9008                          VK2WM:$mask),
9009            (VCVTTPD2UDQZ128rmkz VK2WM:$mask, addr:$src)>;
9010
9011  def : Pat<(v4i32 (X86any_cvttp2ui (v2f64 (X86VBroadcastld64 addr:$src)))),
9012            (VCVTTPD2UDQZ128rmb addr:$src)>;
9013  def : Pat<(X86mcvttp2ui (v2f64 (X86VBroadcastld64 addr:$src)),
9014                          (v4i32 VR128X:$src0), VK2WM:$mask),
9015            (VCVTTPD2UDQZ128rmbk VR128X:$src0, VK2WM:$mask, addr:$src)>;
9016  def : Pat<(X86mcvttp2ui (v2f64 (X86VBroadcastld64 addr:$src)),
9017                          v4i32x_info.ImmAllZerosV, VK2WM:$mask),
9018            (VCVTTPD2UDQZ128rmbkz VK2WM:$mask, addr:$src)>;
9019}
9020
9021let Predicates = [HasDQI, HasVLX] in {
9022  def : Pat<(v2i64 (X86cvtp2Int (bc_v4f32 (v2f64 (X86vzload64 addr:$src))))),
9023            (VCVTPS2QQZ128rm addr:$src)>;
9024  def : Pat<(v2i64 (vselect_mask VK2WM:$mask,
9025                                 (X86cvtp2Int (bc_v4f32 (v2f64 (X86vzload64 addr:$src)))),
9026                                 VR128X:$src0)),
9027            (VCVTPS2QQZ128rmk VR128X:$src0, VK2WM:$mask, addr:$src)>;
9028  def : Pat<(v2i64 (vselect_mask VK2WM:$mask,
9029                                 (X86cvtp2Int (bc_v4f32 (v2f64 (X86vzload64 addr:$src)))),
9030                                 v2i64x_info.ImmAllZerosV)),
9031            (VCVTPS2QQZ128rmkz VK2WM:$mask, addr:$src)>;
9032
9033  def : Pat<(v2i64 (X86cvtp2UInt (bc_v4f32 (v2f64 (X86vzload64 addr:$src))))),
9034            (VCVTPS2UQQZ128rm addr:$src)>;
9035  def : Pat<(v2i64 (vselect_mask VK2WM:$mask,
9036                                 (X86cvtp2UInt (bc_v4f32 (v2f64 (X86vzload64 addr:$src)))),
9037                                 VR128X:$src0)),
9038            (VCVTPS2UQQZ128rmk VR128X:$src0, VK2WM:$mask, addr:$src)>;
9039  def : Pat<(v2i64 (vselect_mask VK2WM:$mask,
9040                                 (X86cvtp2UInt (bc_v4f32 (v2f64 (X86vzload64 addr:$src)))),
9041                                 v2i64x_info.ImmAllZerosV)),
9042            (VCVTPS2UQQZ128rmkz VK2WM:$mask, addr:$src)>;
9043
9044  def : Pat<(v2i64 (X86any_cvttp2si (bc_v4f32 (v2f64 (X86vzload64 addr:$src))))),
9045            (VCVTTPS2QQZ128rm addr:$src)>;
9046  def : Pat<(v2i64 (vselect_mask VK2WM:$mask,
9047                                 (X86cvttp2si (bc_v4f32 (v2f64 (X86vzload64 addr:$src)))),
9048                                 VR128X:$src0)),
9049            (VCVTTPS2QQZ128rmk VR128X:$src0, VK2WM:$mask, addr:$src)>;
9050  def : Pat<(v2i64 (vselect_mask VK2WM:$mask,
9051                                 (X86cvttp2si (bc_v4f32 (v2f64 (X86vzload64 addr:$src)))),
9052                                 v2i64x_info.ImmAllZerosV)),
9053            (VCVTTPS2QQZ128rmkz VK2WM:$mask, addr:$src)>;
9054
9055  def : Pat<(v2i64 (X86any_cvttp2ui (bc_v4f32 (v2f64 (X86vzload64 addr:$src))))),
9056            (VCVTTPS2UQQZ128rm addr:$src)>;
9057  def : Pat<(v2i64 (vselect_mask VK2WM:$mask,
9058                                 (X86cvttp2ui (bc_v4f32 (v2f64 (X86vzload64 addr:$src)))),
9059                                 VR128X:$src0)),
9060            (VCVTTPS2UQQZ128rmk VR128X:$src0, VK2WM:$mask, addr:$src)>;
9061  def : Pat<(v2i64 (vselect_mask VK2WM:$mask,
9062                                 (X86cvttp2ui (bc_v4f32 (v2f64 (X86vzload64 addr:$src)))),
9063                                 v2i64x_info.ImmAllZerosV)),
9064            (VCVTTPS2UQQZ128rmkz VK2WM:$mask, addr:$src)>;
9065}
9066
9067let Predicates = [HasVLX] in {
9068  def : Pat<(v2f64 (X86any_VSintToFP (bc_v4i32 (v2i64 (X86vzload64 addr:$src))))),
9069            (VCVTDQ2PDZ128rm addr:$src)>;
9070  def : Pat<(v2f64 (vselect_mask VK2WM:$mask,
9071                                 (X86VSintToFP (bc_v4i32 (v2i64 (X86vzload64 addr:$src)))),
9072                                 VR128X:$src0)),
9073            (VCVTDQ2PDZ128rmk VR128X:$src0, VK2WM:$mask, addr:$src)>;
9074  def : Pat<(v2f64 (vselect_mask VK2WM:$mask,
9075                                 (X86VSintToFP (bc_v4i32 (v2i64 (X86vzload64 addr:$src)))),
9076                                 v2f64x_info.ImmAllZerosV)),
9077            (VCVTDQ2PDZ128rmkz VK2WM:$mask, addr:$src)>;
9078
9079  def : Pat<(v2f64 (X86any_VUintToFP (bc_v4i32 (v2i64 (X86vzload64 addr:$src))))),
9080            (VCVTUDQ2PDZ128rm addr:$src)>;
9081  def : Pat<(v2f64 (vselect_mask VK2WM:$mask,
9082                                 (X86VUintToFP (bc_v4i32 (v2i64 (X86vzload64 addr:$src)))),
9083                                 VR128X:$src0)),
9084            (VCVTUDQ2PDZ128rmk VR128X:$src0, VK2WM:$mask, addr:$src)>;
9085  def : Pat<(v2f64 (vselect_mask VK2WM:$mask,
9086                                 (X86VUintToFP (bc_v4i32 (v2i64 (X86vzload64 addr:$src)))),
9087                                 v2f64x_info.ImmAllZerosV)),
9088            (VCVTUDQ2PDZ128rmkz VK2WM:$mask, addr:$src)>;
9089}
9090
9091//===----------------------------------------------------------------------===//
9092// Half precision conversion instructions
9093//===----------------------------------------------------------------------===//
9094
9095let Uses = [MXCSR], mayRaiseFPException = 1 in
9096multiclass avx512_cvtph2ps<X86VectorVTInfo _dest, X86VectorVTInfo _src,
9097                           X86MemOperand x86memop, dag ld_dag,
9098                           X86FoldableSchedWrite sched> {
9099  defm rr : AVX512_maskable_split<0x13, MRMSrcReg, _dest ,(outs _dest.RC:$dst),
9100                            (ins _src.RC:$src), "vcvtph2ps", "$src", "$src",
9101                            (X86any_cvtph2ps (_src.VT _src.RC:$src)),
9102                            (X86cvtph2ps (_src.VT _src.RC:$src))>,
9103                            T8PD, Sched<[sched]>;
9104  defm rm : AVX512_maskable_split<0x13, MRMSrcMem, _dest, (outs _dest.RC:$dst),
9105                            (ins x86memop:$src), "vcvtph2ps", "$src", "$src",
9106                            (X86any_cvtph2ps (_src.VT ld_dag)),
9107                            (X86cvtph2ps (_src.VT ld_dag))>,
9108                            T8PD, Sched<[sched.Folded]>;
9109}
9110
9111multiclass avx512_cvtph2ps_sae<X86VectorVTInfo _dest, X86VectorVTInfo _src,
9112                               X86FoldableSchedWrite sched> {
9113  let Uses = [MXCSR] in
9114  defm rrb : AVX512_maskable<0x13, MRMSrcReg, _dest, (outs _dest.RC:$dst),
9115                             (ins _src.RC:$src), "vcvtph2ps",
9116                             "{sae}, $src", "$src, {sae}",
9117                             (X86cvtph2psSAE (_src.VT _src.RC:$src))>,
9118                             T8PD, EVEX_B, Sched<[sched]>;
9119}
9120
9121let Predicates = [HasAVX512] in
9122  defm VCVTPH2PSZ : avx512_cvtph2ps<v16f32_info, v16i16x_info, f256mem,
9123                                    (load addr:$src), WriteCvtPH2PSZ>,
9124                    avx512_cvtph2ps_sae<v16f32_info, v16i16x_info, WriteCvtPH2PSZ>,
9125                    EVEX, EVEX_V512, EVEX_CD8<32, CD8VH>;
9126
9127let Predicates = [HasVLX] in {
9128  defm VCVTPH2PSZ256 : avx512_cvtph2ps<v8f32x_info, v8i16x_info, f128mem,
9129                       (load addr:$src), WriteCvtPH2PSY>, EVEX, EVEX_V256,
9130                       EVEX_CD8<32, CD8VH>;
9131  defm VCVTPH2PSZ128 : avx512_cvtph2ps<v4f32x_info, v8i16x_info, f64mem,
9132                       (bitconvert (v2i64 (X86vzload64 addr:$src))),
9133                       WriteCvtPH2PS>, EVEX, EVEX_V128,
9134                       EVEX_CD8<32, CD8VH>;
9135
9136  // Pattern match vcvtph2ps of a scalar i64 load.
9137  def : Pat<(v4f32 (X86any_cvtph2ps (v8i16 (bitconvert
9138              (v2i64 (scalar_to_vector (loadi64 addr:$src))))))),
9139            (VCVTPH2PSZ128rm addr:$src)>;
9140}
9141
9142multiclass avx512_cvtps2ph<X86VectorVTInfo _dest, X86VectorVTInfo _src,
9143                           X86MemOperand x86memop, SchedWrite RR, SchedWrite MR> {
9144let ExeDomain = GenericDomain, Uses = [MXCSR], mayRaiseFPException = 1 in {
9145  def rr : AVX512AIi8<0x1D, MRMDestReg, (outs _dest.RC:$dst),
9146             (ins _src.RC:$src1, i32u8imm:$src2),
9147             "vcvtps2ph\t{$src2, $src1, $dst|$dst, $src1, $src2}",
9148             [(set _dest.RC:$dst,
9149                   (X86any_cvtps2ph (_src.VT _src.RC:$src1), (i32 timm:$src2)))]>,
9150             Sched<[RR]>;
9151  let Constraints = "$src0 = $dst" in
9152  def rrk : AVX512AIi8<0x1D, MRMDestReg, (outs _dest.RC:$dst),
9153             (ins _dest.RC:$src0, _src.KRCWM:$mask, _src.RC:$src1, i32u8imm:$src2),
9154             "vcvtps2ph\t{$src2, $src1, $dst {${mask}}|$dst {${mask}}, $src1, $src2}",
9155             [(set _dest.RC:$dst,
9156                   (X86mcvtps2ph (_src.VT _src.RC:$src1), (i32 timm:$src2),
9157                                 _dest.RC:$src0, _src.KRCWM:$mask))]>,
9158             Sched<[RR]>, EVEX_K;
9159  def rrkz : AVX512AIi8<0x1D, MRMDestReg, (outs _dest.RC:$dst),
9160             (ins _src.KRCWM:$mask, _src.RC:$src1, i32u8imm:$src2),
9161             "vcvtps2ph\t{$src2, $src1, $dst {${mask}} {z}|$dst {${mask}} {z}, $src1, $src2}",
9162             [(set _dest.RC:$dst,
9163                   (X86mcvtps2ph (_src.VT _src.RC:$src1), (i32 timm:$src2),
9164                                 _dest.ImmAllZerosV, _src.KRCWM:$mask))]>,
9165             Sched<[RR]>, EVEX_KZ;
9166  let hasSideEffects = 0, mayStore = 1 in {
9167    def mr : AVX512AIi8<0x1D, MRMDestMem, (outs),
9168               (ins x86memop:$dst, _src.RC:$src1, i32u8imm:$src2),
9169               "vcvtps2ph\t{$src2, $src1, $dst|$dst, $src1, $src2}", []>,
9170               Sched<[MR]>;
9171    def mrk : AVX512AIi8<0x1D, MRMDestMem, (outs),
9172               (ins x86memop:$dst, _dest.KRCWM:$mask, _src.RC:$src1, i32u8imm:$src2),
9173               "vcvtps2ph\t{$src2, $src1, $dst {${mask}}|$dst {${mask}}, $src1, $src2}", []>,
9174                EVEX_K, Sched<[MR]>, NotMemoryFoldable;
9175  }
9176}
9177}
9178
9179multiclass avx512_cvtps2ph_sae<X86VectorVTInfo _dest, X86VectorVTInfo _src,
9180                               SchedWrite Sched> {
9181  let hasSideEffects = 0, Uses = [MXCSR] in
9182  defm rrb : AVX512_maskable_in_asm<0x1D, MRMDestReg, _dest,
9183                   (outs _dest.RC:$dst),
9184                   (ins _src.RC:$src1, i32u8imm:$src2),
9185                   "vcvtps2ph", "$src2, {sae}, $src1", "$src1, {sae}, $src2", []>,
9186                   EVEX_B, AVX512AIi8Base, Sched<[Sched]>;
9187}
9188
9189let Predicates = [HasAVX512] in {
9190  defm VCVTPS2PHZ : avx512_cvtps2ph<v16i16x_info, v16f32_info, f256mem,
9191                                    WriteCvtPS2PHZ, WriteCvtPS2PHZSt>,
9192                    avx512_cvtps2ph_sae<v16i16x_info, v16f32_info, WriteCvtPS2PHZ>,
9193                                        EVEX, EVEX_V512, EVEX_CD8<32, CD8VH>;
9194
9195  def : Pat<(store (v16i16 (X86any_cvtps2ph VR512:$src1, timm:$src2)), addr:$dst),
9196            (VCVTPS2PHZmr addr:$dst, VR512:$src1, timm:$src2)>;
9197}
9198
9199let Predicates = [HasVLX] in {
9200  defm VCVTPS2PHZ256 : avx512_cvtps2ph<v8i16x_info, v8f32x_info, f128mem,
9201                                       WriteCvtPS2PHY, WriteCvtPS2PHYSt>,
9202                                       EVEX, EVEX_V256, EVEX_CD8<32, CD8VH>;
9203  defm VCVTPS2PHZ128 : avx512_cvtps2ph<v8i16x_info, v4f32x_info, f64mem,
9204                                       WriteCvtPS2PH, WriteCvtPS2PHSt>,
9205                                       EVEX, EVEX_V128, EVEX_CD8<32, CD8VH>;
9206
9207  def : Pat<(store (f64 (extractelt
9208                         (bc_v2f64 (v8i16 (X86any_cvtps2ph VR128X:$src1, timm:$src2))),
9209                         (iPTR 0))), addr:$dst),
9210            (VCVTPS2PHZ128mr addr:$dst, VR128X:$src1, timm:$src2)>;
9211  def : Pat<(store (i64 (extractelt
9212                         (bc_v2i64 (v8i16 (X86any_cvtps2ph VR128X:$src1, timm:$src2))),
9213                         (iPTR 0))), addr:$dst),
9214            (VCVTPS2PHZ128mr addr:$dst, VR128X:$src1, timm:$src2)>;
9215  def : Pat<(store (v8i16 (X86any_cvtps2ph VR256X:$src1, timm:$src2)), addr:$dst),
9216            (VCVTPS2PHZ256mr addr:$dst, VR256X:$src1, timm:$src2)>;
9217}
9218
9219//  Unordered/Ordered scalar fp compare with Sae and set EFLAGS
9220multiclass avx512_ord_cmp_sae<bits<8> opc, X86VectorVTInfo _,
9221                              string OpcodeStr, Domain d,
9222                              X86FoldableSchedWrite sched = WriteFComX> {
9223  let ExeDomain = d, hasSideEffects = 0, Uses = [MXCSR] in
9224  def rrb: AVX512<opc, MRMSrcReg, (outs), (ins _.RC:$src1, _.RC:$src2),
9225                  !strconcat(OpcodeStr, "\t{{sae}, $src2, $src1|$src1, $src2, {sae}}"), []>,
9226                  EVEX, EVEX_B, VEX_LIG, EVEX_V128, Sched<[sched]>;
9227}
9228
9229let Defs = [EFLAGS], Predicates = [HasAVX512] in {
9230  defm VUCOMISSZ : avx512_ord_cmp_sae<0x2E, v4f32x_info, "vucomiss", SSEPackedSingle>,
9231                                   AVX512PSIi8Base, EVEX_CD8<32, CD8VT1>;
9232  defm VUCOMISDZ : avx512_ord_cmp_sae<0x2E, v2f64x_info, "vucomisd", SSEPackedDouble>,
9233                                   AVX512PDIi8Base, VEX_W, EVEX_CD8<64, CD8VT1>;
9234  defm VCOMISSZ : avx512_ord_cmp_sae<0x2F, v4f32x_info, "vcomiss", SSEPackedSingle>,
9235                                   AVX512PSIi8Base, EVEX_CD8<32, CD8VT1>;
9236  defm VCOMISDZ : avx512_ord_cmp_sae<0x2F, v2f64x_info, "vcomisd", SSEPackedDouble>,
9237                                   AVX512PDIi8Base, VEX_W, EVEX_CD8<64, CD8VT1>;
9238}
9239
9240let Defs = [EFLAGS], Predicates = [HasAVX512] in {
9241  defm VUCOMISSZ : sse12_ord_cmp<0x2E, FR32X, X86any_fcmp, f32, f32mem, loadf32,
9242                                 "ucomiss", SSEPackedSingle>, PS, EVEX, VEX_LIG,
9243                                 EVEX_CD8<32, CD8VT1>;
9244  defm VUCOMISDZ : sse12_ord_cmp<0x2E, FR64X, X86any_fcmp, f64, f64mem, loadf64,
9245                                  "ucomisd", SSEPackedDouble>, PD, EVEX,
9246                                  VEX_LIG, VEX_W, EVEX_CD8<64, CD8VT1>;
9247  defm VCOMISSZ  : sse12_ord_cmp<0x2F, FR32X, X86strict_fcmps, f32, f32mem, loadf32,
9248                                 "comiss", SSEPackedSingle>, PS, EVEX, VEX_LIG,
9249                                 EVEX_CD8<32, CD8VT1>;
9250  defm VCOMISDZ  : sse12_ord_cmp<0x2F, FR64X, X86strict_fcmps, f64, f64mem, loadf64,
9251                                 "comisd", SSEPackedDouble>, PD, EVEX,
9252                                  VEX_LIG, VEX_W, EVEX_CD8<64, CD8VT1>;
9253  let isCodeGenOnly = 1 in {
9254    defm VUCOMISSZ  : sse12_ord_cmp_int<0x2E, VR128X, X86ucomi, v4f32, ssmem,
9255                          sse_load_f32, "ucomiss", SSEPackedSingle>, PS, EVEX, VEX_LIG,
9256                          EVEX_CD8<32, CD8VT1>;
9257    defm VUCOMISDZ  : sse12_ord_cmp_int<0x2E, VR128X, X86ucomi, v2f64, sdmem,
9258                          sse_load_f64, "ucomisd", SSEPackedDouble>, PD, EVEX,
9259                          VEX_LIG, VEX_W, EVEX_CD8<64, CD8VT1>;
9260
9261    defm VCOMISSZ  : sse12_ord_cmp_int<0x2F, VR128X, X86comi, v4f32, ssmem,
9262                          sse_load_f32, "comiss", SSEPackedSingle>, PS, EVEX, VEX_LIG,
9263                          EVEX_CD8<32, CD8VT1>;
9264    defm VCOMISDZ  : sse12_ord_cmp_int<0x2F, VR128X, X86comi, v2f64, sdmem,
9265                          sse_load_f64, "comisd", SSEPackedDouble>, PD, EVEX,
9266                          VEX_LIG, VEX_W, EVEX_CD8<64, CD8VT1>;
9267  }
9268}
9269
9270let Defs = [EFLAGS], Predicates = [HasFP16] in {
9271  defm VUCOMISHZ : avx512_ord_cmp_sae<0x2E, v8f16x_info, "vucomish",
9272                                SSEPackedSingle>, AVX512PSIi8Base, T_MAP5PS,
9273                                EVEX_CD8<16, CD8VT1>;
9274  defm VCOMISHZ : avx512_ord_cmp_sae<0x2F, v8f16x_info, "vcomish",
9275                                SSEPackedSingle>, AVX512PSIi8Base, T_MAP5PS,
9276                                EVEX_CD8<16, CD8VT1>;
9277  defm VUCOMISHZ : sse12_ord_cmp<0x2E, FR16X, X86any_fcmp, f16, f16mem, loadf16,
9278                                "ucomish", SSEPackedSingle>, T_MAP5PS, EVEX,
9279                                VEX_LIG, EVEX_CD8<16, CD8VT1>;
9280  defm VCOMISHZ : sse12_ord_cmp<0x2F, FR16X, X86strict_fcmps, f16, f16mem, loadf16,
9281                                "comish", SSEPackedSingle>, T_MAP5PS, EVEX,
9282                                VEX_LIG, EVEX_CD8<16, CD8VT1>;
9283  let isCodeGenOnly = 1 in {
9284    defm VUCOMISHZ : sse12_ord_cmp_int<0x2E, VR128X, X86ucomi, v8f16, shmem,
9285                                sse_load_f16, "ucomish", SSEPackedSingle>,
9286                                T_MAP5PS, EVEX, VEX_LIG, EVEX_CD8<16, CD8VT1>;
9287
9288    defm VCOMISHZ : sse12_ord_cmp_int<0x2F, VR128X, X86comi, v8f16, shmem,
9289                                sse_load_f16, "comish", SSEPackedSingle>,
9290                                T_MAP5PS, EVEX, VEX_LIG, EVEX_CD8<16, CD8VT1>;
9291  }
9292}
9293
9294/// avx512_fp14_s rcp14ss, rcp14sd, rsqrt14ss, rsqrt14sd, rcpsh, rsqrtsh
9295multiclass avx512_fp14_s<bits<8> opc, string OpcodeStr, SDNode OpNode,
9296                         X86FoldableSchedWrite sched, X86VectorVTInfo _,
9297                         Predicate prd = HasAVX512> {
9298  let Predicates = [prd], ExeDomain = _.ExeDomain in {
9299  defm rr : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
9300                           (ins _.RC:$src1, _.RC:$src2), OpcodeStr,
9301                           "$src2, $src1", "$src1, $src2",
9302                           (OpNode (_.VT _.RC:$src1), (_.VT _.RC:$src2))>,
9303                           EVEX_4V, VEX_LIG, Sched<[sched]>;
9304  defm rm : AVX512_maskable_scalar<opc, MRMSrcMem, _, (outs _.RC:$dst),
9305                         (ins _.RC:$src1, _.IntScalarMemOp:$src2), OpcodeStr,
9306                         "$src2, $src1", "$src1, $src2",
9307                         (OpNode (_.VT _.RC:$src1),
9308                          (_.ScalarIntMemFrags addr:$src2))>, EVEX_4V, VEX_LIG,
9309                          Sched<[sched.Folded, sched.ReadAfterFold]>;
9310}
9311}
9312
9313defm VRCPSHZ : avx512_fp14_s<0x4D, "vrcpsh", X86rcp14s, SchedWriteFRcp.Scl,
9314                               f16x_info, HasFP16>, EVEX_CD8<16, CD8VT1>,
9315                               T_MAP6PD;
9316defm VRSQRTSHZ : avx512_fp14_s<0x4F, "vrsqrtsh", X86rsqrt14s,
9317                                 SchedWriteFRsqrt.Scl, f16x_info, HasFP16>,
9318                                 EVEX_CD8<16, CD8VT1>, T_MAP6PD;
9319let Uses = [MXCSR] in {
9320defm VRCP14SSZ : avx512_fp14_s<0x4D, "vrcp14ss", X86rcp14s, SchedWriteFRcp.Scl,
9321                               f32x_info>, EVEX_CD8<32, CD8VT1>,
9322                               T8PD;
9323defm VRCP14SDZ : avx512_fp14_s<0x4D, "vrcp14sd", X86rcp14s, SchedWriteFRcp.Scl,
9324                               f64x_info>, VEX_W, EVEX_CD8<64, CD8VT1>,
9325                               T8PD;
9326defm VRSQRT14SSZ : avx512_fp14_s<0x4F, "vrsqrt14ss", X86rsqrt14s,
9327                                 SchedWriteFRsqrt.Scl, f32x_info>,
9328                                 EVEX_CD8<32, CD8VT1>, T8PD;
9329defm VRSQRT14SDZ : avx512_fp14_s<0x4F, "vrsqrt14sd", X86rsqrt14s,
9330                                 SchedWriteFRsqrt.Scl, f64x_info>, VEX_W,
9331                                 EVEX_CD8<64, CD8VT1>, T8PD;
9332}
9333
9334/// avx512_fp14_p rcp14ps, rcp14pd, rsqrt14ps, rsqrt14pd
9335multiclass avx512_fp14_p<bits<8> opc, string OpcodeStr, SDNode OpNode,
9336                         X86FoldableSchedWrite sched, X86VectorVTInfo _> {
9337  let ExeDomain = _.ExeDomain in {
9338  defm r: AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
9339                         (ins _.RC:$src), OpcodeStr, "$src", "$src",
9340                         (_.VT (OpNode _.RC:$src))>, EVEX, T8PD,
9341                         Sched<[sched]>;
9342  defm m: AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
9343                         (ins _.MemOp:$src), OpcodeStr, "$src", "$src",
9344                         (OpNode (_.VT
9345                           (bitconvert (_.LdFrag addr:$src))))>, EVEX, T8PD,
9346                         Sched<[sched.Folded, sched.ReadAfterFold]>;
9347  defm mb: AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
9348                          (ins _.ScalarMemOp:$src), OpcodeStr,
9349                          "${src}"#_.BroadcastStr, "${src}"#_.BroadcastStr,
9350                          (OpNode (_.VT
9351                            (_.BroadcastLdFrag addr:$src)))>,
9352                          EVEX, T8PD, EVEX_B, Sched<[sched.Folded, sched.ReadAfterFold]>;
9353  }
9354}
9355
9356multiclass avx512_fp14_p_vl_all<bits<8> opc, string OpcodeStr, SDNode OpNode,
9357                                X86SchedWriteWidths sched> {
9358  let Uses = [MXCSR] in {
9359  defm 14PSZ : avx512_fp14_p<opc, !strconcat(OpcodeStr, "14ps"), OpNode, sched.ZMM,
9360                             v16f32_info>, EVEX_V512, EVEX_CD8<32, CD8VF>;
9361  defm 14PDZ : avx512_fp14_p<opc, !strconcat(OpcodeStr, "14pd"), OpNode, sched.ZMM,
9362                             v8f64_info>, EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>;
9363  }
9364  let Predicates = [HasFP16] in
9365  defm PHZ : avx512_fp14_p<opc, !strconcat(OpcodeStr, "ph"), OpNode, sched.ZMM,
9366                           v32f16_info>, EVEX_V512, T_MAP6PD, EVEX_CD8<16, CD8VF>;
9367
9368  // Define only if AVX512VL feature is present.
9369  let Predicates = [HasVLX], Uses = [MXCSR] in {
9370    defm 14PSZ128 : avx512_fp14_p<opc, !strconcat(OpcodeStr, "14ps"),
9371                                  OpNode, sched.XMM, v4f32x_info>,
9372                                  EVEX_V128, EVEX_CD8<32, CD8VF>;
9373    defm 14PSZ256 : avx512_fp14_p<opc, !strconcat(OpcodeStr, "14ps"),
9374                                  OpNode, sched.YMM, v8f32x_info>,
9375                                  EVEX_V256, EVEX_CD8<32, CD8VF>;
9376    defm 14PDZ128 : avx512_fp14_p<opc, !strconcat(OpcodeStr, "14pd"),
9377                                  OpNode, sched.XMM, v2f64x_info>,
9378                                  EVEX_V128, VEX_W, EVEX_CD8<64, CD8VF>;
9379    defm 14PDZ256 : avx512_fp14_p<opc, !strconcat(OpcodeStr, "14pd"),
9380                                  OpNode, sched.YMM, v4f64x_info>,
9381                                  EVEX_V256, VEX_W, EVEX_CD8<64, CD8VF>;
9382  }
9383  let Predicates = [HasFP16, HasVLX] in {
9384    defm PHZ128 : avx512_fp14_p<opc, !strconcat(OpcodeStr, "ph"),
9385                                OpNode, sched.XMM, v8f16x_info>,
9386                                EVEX_V128, T_MAP6PD, EVEX_CD8<16, CD8VF>;
9387    defm PHZ256 : avx512_fp14_p<opc, !strconcat(OpcodeStr, "ph"),
9388                                OpNode, sched.YMM, v16f16x_info>,
9389                                EVEX_V256, T_MAP6PD, EVEX_CD8<16, CD8VF>;
9390  }
9391}
9392
9393defm VRSQRT : avx512_fp14_p_vl_all<0x4E, "vrsqrt", X86rsqrt14, SchedWriteFRsqrt>;
9394defm VRCP : avx512_fp14_p_vl_all<0x4C, "vrcp", X86rcp14, SchedWriteFRcp>;
9395
9396/// avx512_fp28_s rcp28ss, rcp28sd, rsqrt28ss, rsqrt28sd
9397multiclass avx512_fp28_s<bits<8> opc, string OpcodeStr,X86VectorVTInfo _,
9398                         SDNode OpNode, SDNode OpNodeSAE,
9399                         X86FoldableSchedWrite sched> {
9400  let ExeDomain = _.ExeDomain, Uses = [MXCSR] in {
9401  defm r : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
9402                           (ins _.RC:$src1, _.RC:$src2), OpcodeStr,
9403                           "$src2, $src1", "$src1, $src2",
9404                           (OpNode (_.VT _.RC:$src1), (_.VT _.RC:$src2))>,
9405                           Sched<[sched]>, SIMD_EXC;
9406
9407  defm rb : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
9408                            (ins _.RC:$src1, _.RC:$src2), OpcodeStr,
9409                            "{sae}, $src2, $src1", "$src1, $src2, {sae}",
9410                            (OpNodeSAE (_.VT _.RC:$src1), (_.VT _.RC:$src2))>,
9411                            EVEX_B, Sched<[sched]>;
9412
9413  defm m : AVX512_maskable_scalar<opc, MRMSrcMem, _, (outs _.RC:$dst),
9414                         (ins _.RC:$src1, _.IntScalarMemOp:$src2), OpcodeStr,
9415                         "$src2, $src1", "$src1, $src2",
9416                         (OpNode (_.VT _.RC:$src1), (_.ScalarIntMemFrags addr:$src2))>,
9417                         Sched<[sched.Folded, sched.ReadAfterFold]>, SIMD_EXC;
9418  }
9419}
9420
9421multiclass avx512_eri_s<bits<8> opc, string OpcodeStr, SDNode OpNode,
9422                        SDNode OpNodeSAE, X86FoldableSchedWrite sched> {
9423  defm SSZ : avx512_fp28_s<opc, OpcodeStr#"ss", f32x_info, OpNode, OpNodeSAE,
9424                           sched>, EVEX_CD8<32, CD8VT1>, VEX_LIG, T8PD, EVEX_4V;
9425  defm SDZ : avx512_fp28_s<opc, OpcodeStr#"sd", f64x_info, OpNode, OpNodeSAE,
9426                           sched>, EVEX_CD8<64, CD8VT1>, VEX_LIG, VEX_W, T8PD, EVEX_4V;
9427}
9428
9429multiclass avx512_vgetexpsh<bits<8> opc, string OpcodeStr, SDNode OpNode,
9430                        SDNode OpNodeSAE, X86FoldableSchedWrite sched> {
9431  let Predicates = [HasFP16] in
9432  defm SHZ : avx512_fp28_s<opc, OpcodeStr#"sh", f16x_info, OpNode,  OpNodeSAE, sched>,
9433               EVEX_CD8<16, CD8VT1>, T_MAP6PD, EVEX_4V;
9434}
9435
9436let Predicates = [HasERI] in {
9437  defm VRCP28   : avx512_eri_s<0xCB, "vrcp28", X86rcp28s, X86rcp28SAEs,
9438                               SchedWriteFRcp.Scl>;
9439  defm VRSQRT28 : avx512_eri_s<0xCD, "vrsqrt28", X86rsqrt28s, X86rsqrt28SAEs,
9440                               SchedWriteFRsqrt.Scl>;
9441}
9442
9443defm VGETEXP   : avx512_eri_s<0x43, "vgetexp", X86fgetexps, X86fgetexpSAEs,
9444                              SchedWriteFRnd.Scl>,
9445                 avx512_vgetexpsh<0x43, "vgetexp", X86fgetexps, X86fgetexpSAEs,
9446                                  SchedWriteFRnd.Scl>;
9447/// avx512_fp28_p rcp28ps, rcp28pd, rsqrt28ps, rsqrt28pd
9448
9449multiclass avx512_fp28_p<bits<8> opc, string OpcodeStr, X86VectorVTInfo _,
9450                         SDNode OpNode, X86FoldableSchedWrite sched> {
9451  let ExeDomain = _.ExeDomain, Uses = [MXCSR], mayRaiseFPException = 1 in {
9452  defm r : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
9453                         (ins _.RC:$src), OpcodeStr, "$src", "$src",
9454                         (OpNode (_.VT _.RC:$src))>,
9455                         Sched<[sched]>;
9456
9457  defm m : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
9458                         (ins _.MemOp:$src), OpcodeStr, "$src", "$src",
9459                         (OpNode (_.VT
9460                             (bitconvert (_.LdFrag addr:$src))))>,
9461                          Sched<[sched.Folded, sched.ReadAfterFold]>;
9462
9463  defm mb : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
9464                         (ins _.ScalarMemOp:$src), OpcodeStr,
9465                         "${src}"#_.BroadcastStr, "${src}"#_.BroadcastStr,
9466                         (OpNode (_.VT
9467                                  (_.BroadcastLdFrag addr:$src)))>,
9468                         EVEX_B, Sched<[sched.Folded, sched.ReadAfterFold]>;
9469  }
9470}
9471multiclass avx512_fp28_p_sae<bits<8> opc, string OpcodeStr, X86VectorVTInfo _,
9472                         SDNode OpNode, X86FoldableSchedWrite sched> {
9473  let ExeDomain = _.ExeDomain, Uses = [MXCSR] in
9474  defm rb : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
9475                        (ins _.RC:$src), OpcodeStr,
9476                        "{sae}, $src", "$src, {sae}",
9477                        (OpNode (_.VT _.RC:$src))>,
9478                        EVEX_B, Sched<[sched]>;
9479}
9480
9481multiclass  avx512_eri<bits<8> opc, string OpcodeStr, SDNode OpNode,
9482                       SDNode OpNodeSAE, X86SchedWriteWidths sched> {
9483   defm PSZ : avx512_fp28_p<opc, OpcodeStr#"ps", v16f32_info, OpNode, sched.ZMM>,
9484              avx512_fp28_p_sae<opc, OpcodeStr#"ps", v16f32_info, OpNodeSAE, sched.ZMM>,
9485              T8PD, EVEX_V512, EVEX_CD8<32, CD8VF>;
9486   defm PDZ : avx512_fp28_p<opc, OpcodeStr#"pd", v8f64_info, OpNode, sched.ZMM>,
9487              avx512_fp28_p_sae<opc, OpcodeStr#"pd", v8f64_info, OpNodeSAE, sched.ZMM>,
9488              T8PD, EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>;
9489}
9490
9491multiclass avx512_fp_unaryop_packed<bits<8> opc, string OpcodeStr,
9492                                  SDNode OpNode, X86SchedWriteWidths sched> {
9493  // Define only if AVX512VL feature is present.
9494  let Predicates = [HasVLX] in {
9495    defm PSZ128 : avx512_fp28_p<opc, OpcodeStr#"ps", v4f32x_info, OpNode,
9496                                sched.XMM>,
9497                                EVEX_V128, T8PD, EVEX_CD8<32, CD8VF>;
9498    defm PSZ256 : avx512_fp28_p<opc, OpcodeStr#"ps", v8f32x_info, OpNode,
9499                                sched.YMM>,
9500                                EVEX_V256, T8PD, EVEX_CD8<32, CD8VF>;
9501    defm PDZ128 : avx512_fp28_p<opc, OpcodeStr#"pd", v2f64x_info, OpNode,
9502                                sched.XMM>,
9503                                EVEX_V128, VEX_W, T8PD, EVEX_CD8<64, CD8VF>;
9504    defm PDZ256 : avx512_fp28_p<opc, OpcodeStr#"pd", v4f64x_info, OpNode,
9505                                sched.YMM>,
9506                                EVEX_V256, VEX_W, T8PD, EVEX_CD8<64, CD8VF>;
9507  }
9508}
9509
9510multiclass  avx512_vgetexp_fp16<bits<8> opc, string OpcodeStr, SDNode OpNode,
9511                       SDNode OpNodeSAE, X86SchedWriteWidths sched> {
9512  let Predicates = [HasFP16] in
9513  defm PHZ : avx512_fp28_p<opc, OpcodeStr#"ph", v32f16_info, OpNode, sched.ZMM>,
9514              avx512_fp28_p_sae<opc, OpcodeStr#"ph", v32f16_info, OpNodeSAE, sched.ZMM>,
9515              T_MAP6PD, EVEX_V512, EVEX_CD8<16, CD8VF>;
9516  let Predicates = [HasFP16, HasVLX] in {
9517    defm PHZ128 : avx512_fp28_p<opc, OpcodeStr#"ph", v8f16x_info, OpNode, sched.XMM>,
9518                                     EVEX_V128, T_MAP6PD, EVEX_CD8<16, CD8VF>;
9519    defm PHZ256 : avx512_fp28_p<opc, OpcodeStr#"ph", v16f16x_info, OpNode, sched.YMM>,
9520                                     EVEX_V256, T_MAP6PD, EVEX_CD8<16, CD8VF>;
9521  }
9522}
9523let Predicates = [HasERI] in {
9524 defm VRSQRT28 : avx512_eri<0xCC, "vrsqrt28", X86rsqrt28, X86rsqrt28SAE,
9525                            SchedWriteFRsqrt>, EVEX;
9526 defm VRCP28   : avx512_eri<0xCA, "vrcp28", X86rcp28, X86rcp28SAE,
9527                            SchedWriteFRcp>, EVEX;
9528 defm VEXP2    : avx512_eri<0xC8, "vexp2", X86exp2, X86exp2SAE,
9529                            SchedWriteFAdd>, EVEX;
9530}
9531defm VGETEXP   : avx512_eri<0x42, "vgetexp", X86fgetexp, X86fgetexpSAE,
9532                            SchedWriteFRnd>,
9533                 avx512_vgetexp_fp16<0x42, "vgetexp", X86fgetexp, X86fgetexpSAE,
9534                                     SchedWriteFRnd>,
9535                 avx512_fp_unaryop_packed<0x42, "vgetexp", X86fgetexp,
9536                                          SchedWriteFRnd>, EVEX;
9537
9538multiclass avx512_sqrt_packed_round<bits<8> opc, string OpcodeStr,
9539                                    X86FoldableSchedWrite sched, X86VectorVTInfo _>{
9540  let ExeDomain = _.ExeDomain in
9541  defm rb: AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
9542                         (ins _.RC:$src, AVX512RC:$rc), OpcodeStr, "$rc, $src", "$src, $rc",
9543                         (_.VT (X86fsqrtRnd _.RC:$src, (i32 timm:$rc)))>,
9544                         EVEX, EVEX_B, EVEX_RC, Sched<[sched]>;
9545}
9546
9547multiclass avx512_sqrt_packed<bits<8> opc, string OpcodeStr,
9548                              X86FoldableSchedWrite sched, X86VectorVTInfo _>{
9549  let ExeDomain = _.ExeDomain, Uses = [MXCSR], mayRaiseFPException = 1 in {
9550  defm r: AVX512_maskable_split<opc, MRMSrcReg, _, (outs _.RC:$dst),
9551                         (ins _.RC:$src), OpcodeStr, "$src", "$src",
9552                         (_.VT (any_fsqrt _.RC:$src)),
9553                         (_.VT (fsqrt _.RC:$src))>, EVEX,
9554                         Sched<[sched]>;
9555  defm m: AVX512_maskable_split<opc, MRMSrcMem, _, (outs _.RC:$dst),
9556                         (ins _.MemOp:$src), OpcodeStr, "$src", "$src",
9557                         (any_fsqrt (_.VT (_.LdFrag addr:$src))),
9558                         (fsqrt (_.VT (_.LdFrag addr:$src)))>, EVEX,
9559                         Sched<[sched.Folded, sched.ReadAfterFold]>;
9560  defm mb: AVX512_maskable_split<opc, MRMSrcMem, _, (outs _.RC:$dst),
9561                          (ins _.ScalarMemOp:$src), OpcodeStr,
9562                          "${src}"#_.BroadcastStr, "${src}"#_.BroadcastStr,
9563                          (any_fsqrt (_.VT (_.BroadcastLdFrag addr:$src))),
9564                          (fsqrt (_.VT (_.BroadcastLdFrag addr:$src)))>,
9565                          EVEX, EVEX_B, Sched<[sched.Folded, sched.ReadAfterFold]>;
9566  }
9567}
9568
9569let Uses = [MXCSR], mayRaiseFPException = 1 in
9570multiclass avx512_sqrt_packed_all<bits<8> opc, string OpcodeStr,
9571                                  X86SchedWriteSizes sched> {
9572  let Predicates = [HasFP16] in
9573  defm PHZ : avx512_sqrt_packed<opc, !strconcat(OpcodeStr, "ph"),
9574                                sched.PH.ZMM, v32f16_info>,
9575                                EVEX_V512, T_MAP5PS, EVEX_CD8<16, CD8VF>;
9576  let Predicates = [HasFP16, HasVLX] in {
9577    defm PHZ128 : avx512_sqrt_packed<opc, !strconcat(OpcodeStr, "ph"),
9578                                     sched.PH.XMM, v8f16x_info>,
9579                                     EVEX_V128, T_MAP5PS, EVEX_CD8<16, CD8VF>;
9580    defm PHZ256 : avx512_sqrt_packed<opc, !strconcat(OpcodeStr, "ph"),
9581                                     sched.PH.YMM, v16f16x_info>,
9582                                     EVEX_V256, T_MAP5PS, EVEX_CD8<16, CD8VF>;
9583  }
9584  defm PSZ : avx512_sqrt_packed<opc, !strconcat(OpcodeStr, "ps"),
9585                                sched.PS.ZMM, v16f32_info>,
9586                                EVEX_V512, PS, EVEX_CD8<32, CD8VF>;
9587  defm PDZ : avx512_sqrt_packed<opc, !strconcat(OpcodeStr, "pd"),
9588                                sched.PD.ZMM, v8f64_info>,
9589                                EVEX_V512, VEX_W, PD, EVEX_CD8<64, CD8VF>;
9590  // Define only if AVX512VL feature is present.
9591  let Predicates = [HasVLX] in {
9592    defm PSZ128 : avx512_sqrt_packed<opc, !strconcat(OpcodeStr, "ps"),
9593                                     sched.PS.XMM, v4f32x_info>,
9594                                     EVEX_V128, PS, EVEX_CD8<32, CD8VF>;
9595    defm PSZ256 : avx512_sqrt_packed<opc, !strconcat(OpcodeStr, "ps"),
9596                                     sched.PS.YMM, v8f32x_info>,
9597                                     EVEX_V256, PS, EVEX_CD8<32, CD8VF>;
9598    defm PDZ128 : avx512_sqrt_packed<opc, !strconcat(OpcodeStr, "pd"),
9599                                     sched.PD.XMM, v2f64x_info>,
9600                                     EVEX_V128, VEX_W, PD, EVEX_CD8<64, CD8VF>;
9601    defm PDZ256 : avx512_sqrt_packed<opc, !strconcat(OpcodeStr, "pd"),
9602                                     sched.PD.YMM, v4f64x_info>,
9603                                     EVEX_V256, VEX_W, PD, EVEX_CD8<64, CD8VF>;
9604  }
9605}
9606
9607let Uses = [MXCSR] in
9608multiclass avx512_sqrt_packed_all_round<bits<8> opc, string OpcodeStr,
9609                                        X86SchedWriteSizes sched> {
9610  let Predicates = [HasFP16] in
9611  defm PHZ : avx512_sqrt_packed_round<opc, !strconcat(OpcodeStr, "ph"),
9612                                      sched.PH.ZMM, v32f16_info>,
9613                                      EVEX_V512, T_MAP5PS, EVEX_CD8<16, CD8VF>;
9614  defm PSZ : avx512_sqrt_packed_round<opc, !strconcat(OpcodeStr, "ps"),
9615                                      sched.PS.ZMM, v16f32_info>,
9616                                      EVEX_V512, PS, EVEX_CD8<32, CD8VF>;
9617  defm PDZ : avx512_sqrt_packed_round<opc, !strconcat(OpcodeStr, "pd"),
9618                                      sched.PD.ZMM, v8f64_info>,
9619                                      EVEX_V512, VEX_W, PD, EVEX_CD8<64, CD8VF>;
9620}
9621
9622multiclass avx512_sqrt_scalar<bits<8> opc, string OpcodeStr, X86FoldableSchedWrite sched,
9623                              X86VectorVTInfo _, string Name, Predicate prd = HasAVX512> {
9624  let ExeDomain = _.ExeDomain, Predicates = [prd] in {
9625    defm r_Int : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
9626                         (ins _.RC:$src1, _.RC:$src2), OpcodeStr,
9627                         "$src2, $src1", "$src1, $src2",
9628                         (X86fsqrts (_.VT _.RC:$src1),
9629                                    (_.VT _.RC:$src2))>,
9630                         Sched<[sched]>, SIMD_EXC;
9631    defm m_Int : AVX512_maskable_scalar<opc, MRMSrcMem, _, (outs _.RC:$dst),
9632                         (ins _.RC:$src1, _.IntScalarMemOp:$src2), OpcodeStr,
9633                         "$src2, $src1", "$src1, $src2",
9634                         (X86fsqrts (_.VT _.RC:$src1),
9635                                    (_.ScalarIntMemFrags addr:$src2))>,
9636                         Sched<[sched.Folded, sched.ReadAfterFold]>, SIMD_EXC;
9637    let Uses = [MXCSR] in
9638    defm rb_Int : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
9639                         (ins _.RC:$src1, _.RC:$src2, AVX512RC:$rc), OpcodeStr,
9640                         "$rc, $src2, $src1", "$src1, $src2, $rc",
9641                         (X86fsqrtRnds (_.VT _.RC:$src1),
9642                                     (_.VT _.RC:$src2),
9643                                     (i32 timm:$rc))>,
9644                         EVEX_B, EVEX_RC, Sched<[sched]>;
9645
9646    let isCodeGenOnly = 1, hasSideEffects = 0 in {
9647      def r : I<opc, MRMSrcReg, (outs _.FRC:$dst),
9648                (ins _.FRC:$src1, _.FRC:$src2),
9649                OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}", []>,
9650                Sched<[sched]>, SIMD_EXC;
9651      let mayLoad = 1 in
9652        def m : I<opc, MRMSrcMem, (outs _.FRC:$dst),
9653                  (ins _.FRC:$src1, _.ScalarMemOp:$src2),
9654                  OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}", []>,
9655                  Sched<[sched.Folded, sched.ReadAfterFold]>, SIMD_EXC;
9656    }
9657  }
9658
9659  let Predicates = [prd] in {
9660    def : Pat<(_.EltVT (any_fsqrt _.FRC:$src)),
9661              (!cast<Instruction>(Name#Zr)
9662                  (_.EltVT (IMPLICIT_DEF)), _.FRC:$src)>;
9663  }
9664
9665  let Predicates = [prd, OptForSize] in {
9666    def : Pat<(_.EltVT (any_fsqrt (load addr:$src))),
9667              (!cast<Instruction>(Name#Zm)
9668                  (_.EltVT (IMPLICIT_DEF)), addr:$src)>;
9669  }
9670}
9671
9672multiclass avx512_sqrt_scalar_all<bits<8> opc, string OpcodeStr,
9673                                  X86SchedWriteSizes sched> {
9674  defm SHZ : avx512_sqrt_scalar<opc, OpcodeStr#"sh", sched.PH.Scl, f16x_info, NAME#"SH", HasFP16>,
9675                        EVEX_CD8<16, CD8VT1>, EVEX_4V, T_MAP5XS;
9676  defm SSZ : avx512_sqrt_scalar<opc, OpcodeStr#"ss", sched.PS.Scl, f32x_info, NAME#"SS">,
9677                        EVEX_CD8<32, CD8VT1>, EVEX_4V, XS;
9678  defm SDZ : avx512_sqrt_scalar<opc, OpcodeStr#"sd", sched.PD.Scl, f64x_info, NAME#"SD">,
9679                        EVEX_CD8<64, CD8VT1>, EVEX_4V, XD, VEX_W;
9680}
9681
9682defm VSQRT : avx512_sqrt_packed_all<0x51, "vsqrt", SchedWriteFSqrtSizes>,
9683             avx512_sqrt_packed_all_round<0x51, "vsqrt", SchedWriteFSqrtSizes>;
9684
9685defm VSQRT : avx512_sqrt_scalar_all<0x51, "vsqrt", SchedWriteFSqrtSizes>, VEX_LIG;
9686
9687multiclass avx512_rndscale_scalar<bits<8> opc, string OpcodeStr,
9688                                  X86FoldableSchedWrite sched, X86VectorVTInfo _> {
9689  let ExeDomain = _.ExeDomain in {
9690  defm r_Int : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
9691                           (ins _.RC:$src1, _.RC:$src2, i32u8imm:$src3), OpcodeStr,
9692                           "$src3, $src2, $src1", "$src1, $src2, $src3",
9693                           (_.VT (X86RndScales (_.VT _.RC:$src1), (_.VT _.RC:$src2),
9694                           (i32 timm:$src3)))>,
9695                           Sched<[sched]>, SIMD_EXC;
9696
9697  let Uses = [MXCSR] in
9698  defm rb_Int : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
9699                         (ins _.RC:$src1, _.RC:$src2, i32u8imm:$src3), OpcodeStr,
9700                         "$src3, {sae}, $src2, $src1", "$src1, $src2, {sae}, $src3",
9701                         (_.VT (X86RndScalesSAE (_.VT _.RC:$src1), (_.VT _.RC:$src2),
9702                         (i32 timm:$src3)))>, EVEX_B,
9703                         Sched<[sched]>;
9704
9705  defm m_Int : AVX512_maskable_scalar<opc, MRMSrcMem, _, (outs _.RC:$dst),
9706                         (ins _.RC:$src1, _.IntScalarMemOp:$src2, i32u8imm:$src3),
9707                         OpcodeStr,
9708                         "$src3, $src2, $src1", "$src1, $src2, $src3",
9709                         (_.VT (X86RndScales _.RC:$src1,
9710                                (_.ScalarIntMemFrags addr:$src2), (i32 timm:$src3)))>,
9711                         Sched<[sched.Folded, sched.ReadAfterFold]>, SIMD_EXC;
9712
9713  let isCodeGenOnly = 1, hasSideEffects = 0, Predicates = [HasAVX512] in {
9714    def r : I<opc, MRMSrcReg, (outs _.FRC:$dst),
9715               (ins _.FRC:$src1, _.FRC:$src2, i32u8imm:$src3),
9716               OpcodeStr#"\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}",
9717               []>, Sched<[sched]>, SIMD_EXC;
9718
9719    let mayLoad = 1 in
9720      def m : I<opc, MRMSrcMem, (outs _.FRC:$dst),
9721                 (ins _.FRC:$src1, _.ScalarMemOp:$src2, i32u8imm:$src3),
9722                 OpcodeStr#"\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}",
9723                 []>, Sched<[sched.Folded, sched.ReadAfterFold]>, SIMD_EXC;
9724  }
9725  }
9726
9727  let Predicates = [HasAVX512] in {
9728    def : Pat<(X86any_VRndScale _.FRC:$src1, timm:$src2),
9729              (_.EltVT (!cast<Instruction>(NAME#r) (_.EltVT (IMPLICIT_DEF)),
9730               _.FRC:$src1, timm:$src2))>;
9731  }
9732
9733  let Predicates = [HasAVX512, OptForSize] in {
9734    def : Pat<(X86any_VRndScale (_.ScalarLdFrag addr:$src1), timm:$src2),
9735              (_.EltVT (!cast<Instruction>(NAME#m) (_.EltVT (IMPLICIT_DEF)),
9736               addr:$src1, timm:$src2))>;
9737  }
9738}
9739
9740let Predicates = [HasFP16] in
9741defm VRNDSCALESHZ : avx512_rndscale_scalar<0x0A, "vrndscalesh",
9742                                           SchedWriteFRnd.Scl, f16x_info>,
9743                                           AVX512PSIi8Base, TA, EVEX_4V,
9744                                           EVEX_CD8<16, CD8VT1>;
9745
9746defm VRNDSCALESSZ : avx512_rndscale_scalar<0x0A, "vrndscaless",
9747                                           SchedWriteFRnd.Scl, f32x_info>,
9748                                           AVX512AIi8Base, EVEX_4V, VEX_LIG,
9749                                           EVEX_CD8<32, CD8VT1>;
9750
9751defm VRNDSCALESDZ : avx512_rndscale_scalar<0x0B, "vrndscalesd",
9752                                           SchedWriteFRnd.Scl, f64x_info>,
9753                                           VEX_W, AVX512AIi8Base, EVEX_4V, VEX_LIG,
9754                                           EVEX_CD8<64, CD8VT1>;
9755
9756multiclass avx512_masked_scalar<SDNode OpNode, string OpcPrefix, SDNode Move,
9757                                dag Mask, X86VectorVTInfo _, PatLeaf ZeroFP,
9758                                dag OutMask, Predicate BasePredicate> {
9759  let Predicates = [BasePredicate] in {
9760    def : Pat<(Move _.VT:$src1, (scalar_to_vector (X86selects_mask Mask,
9761               (OpNode (extractelt _.VT:$src2, (iPTR 0))),
9762               (extractelt _.VT:$dst, (iPTR 0))))),
9763              (!cast<Instruction>("V"#OpcPrefix#r_Intk)
9764               _.VT:$dst, OutMask, _.VT:$src2, _.VT:$src1)>;
9765
9766    def : Pat<(Move _.VT:$src1, (scalar_to_vector (X86selects_mask Mask,
9767               (OpNode (extractelt _.VT:$src2, (iPTR 0))),
9768               ZeroFP))),
9769              (!cast<Instruction>("V"#OpcPrefix#r_Intkz)
9770               OutMask, _.VT:$src2, _.VT:$src1)>;
9771  }
9772}
9773
9774defm : avx512_masked_scalar<fsqrt, "SQRTSHZ", X86Movsh,
9775                            (v1i1 (scalar_to_vector (i8 (trunc (i32 GR32:$mask))))), v8f16x_info,
9776                            fp16imm0, (COPY_TO_REGCLASS  $mask, VK1WM), HasFP16>;
9777defm : avx512_masked_scalar<fsqrt, "SQRTSSZ", X86Movss,
9778                            (v1i1 (scalar_to_vector (i8 (trunc (i32 GR32:$mask))))), v4f32x_info,
9779                            fp32imm0, (COPY_TO_REGCLASS  $mask, VK1WM), HasAVX512>;
9780defm : avx512_masked_scalar<fsqrt, "SQRTSDZ", X86Movsd,
9781                            (v1i1 (scalar_to_vector (i8 (trunc (i32 GR32:$mask))))), v2f64x_info,
9782                            fp64imm0, (COPY_TO_REGCLASS  $mask, VK1WM), HasAVX512>;
9783
9784
9785//-------------------------------------------------
9786// Integer truncate and extend operations
9787//-------------------------------------------------
9788
9789// PatFrags that contain a select and a truncate op. The take operands in the
9790// same order as X86vmtrunc, X86vmtruncs, X86vmtruncus. This allows us to pass
9791// either to the multiclasses.
9792def select_trunc : PatFrag<(ops node:$src, node:$src0, node:$mask),
9793                           (vselect_mask node:$mask,
9794                                         (trunc node:$src), node:$src0)>;
9795def select_truncs : PatFrag<(ops node:$src, node:$src0, node:$mask),
9796                            (vselect_mask node:$mask,
9797                                          (X86vtruncs node:$src), node:$src0)>;
9798def select_truncus : PatFrag<(ops node:$src, node:$src0, node:$mask),
9799                             (vselect_mask node:$mask,
9800                                           (X86vtruncus node:$src), node:$src0)>;
9801
9802multiclass avx512_trunc_common<bits<8> opc, string OpcodeStr, SDNode OpNode,
9803                              SDPatternOperator MaskNode,
9804                              X86FoldableSchedWrite sched, X86VectorVTInfo SrcInfo,
9805                              X86VectorVTInfo DestInfo, X86MemOperand x86memop> {
9806  let ExeDomain = DestInfo.ExeDomain in {
9807  def rr : AVX512XS8I<opc, MRMDestReg, (outs DestInfo.RC:$dst),
9808             (ins SrcInfo.RC:$src),
9809             OpcodeStr # "\t{$src, $dst|$dst, $src}",
9810             [(set DestInfo.RC:$dst,
9811                   (DestInfo.VT (OpNode (SrcInfo.VT SrcInfo.RC:$src))))]>,
9812             EVEX, Sched<[sched]>;
9813  let Constraints = "$src0 = $dst" in
9814  def rrk : AVX512XS8I<opc, MRMDestReg, (outs DestInfo.RC:$dst),
9815             (ins DestInfo.RC:$src0, SrcInfo.KRCWM:$mask, SrcInfo.RC:$src),
9816             OpcodeStr # "\t{$src, $dst {${mask}}|$dst {${mask}}, $src}",
9817             [(set DestInfo.RC:$dst,
9818                   (MaskNode (SrcInfo.VT SrcInfo.RC:$src),
9819                             (DestInfo.VT DestInfo.RC:$src0),
9820                             SrcInfo.KRCWM:$mask))]>,
9821             EVEX, EVEX_K, Sched<[sched]>;
9822  def rrkz : AVX512XS8I<opc, MRMDestReg, (outs DestInfo.RC:$dst),
9823             (ins SrcInfo.KRCWM:$mask, SrcInfo.RC:$src),
9824             OpcodeStr # "\t{$src, $dst {${mask}} {z}|$dst {${mask}} {z}, $src}",
9825             [(set DestInfo.RC:$dst,
9826                   (DestInfo.VT (MaskNode (SrcInfo.VT SrcInfo.RC:$src),
9827                             DestInfo.ImmAllZerosV, SrcInfo.KRCWM:$mask)))]>,
9828             EVEX, EVEX_KZ, Sched<[sched]>;
9829  }
9830
9831  let mayStore = 1, hasSideEffects = 0, ExeDomain = DestInfo.ExeDomain in {
9832    def mr : AVX512XS8I<opc, MRMDestMem, (outs),
9833               (ins x86memop:$dst, SrcInfo.RC:$src),
9834               OpcodeStr # "\t{$src, $dst|$dst, $src}", []>,
9835               EVEX, Sched<[sched.Folded]>;
9836
9837    def mrk : AVX512XS8I<opc, MRMDestMem, (outs),
9838               (ins x86memop:$dst, SrcInfo.KRCWM:$mask, SrcInfo.RC:$src),
9839               OpcodeStr # "\t{$src, $dst {${mask}}|$dst {${mask}}, $src}", []>,
9840               EVEX, EVEX_K, Sched<[sched.Folded]>, NotMemoryFoldable;
9841  }//mayStore = 1, hasSideEffects = 0
9842}
9843
9844multiclass avx512_trunc_mr_lowering<X86VectorVTInfo SrcInfo,
9845                                    PatFrag truncFrag, PatFrag mtruncFrag,
9846                                    string Name> {
9847
9848  def : Pat<(truncFrag (SrcInfo.VT SrcInfo.RC:$src), addr:$dst),
9849            (!cast<Instruction>(Name#SrcInfo.ZSuffix#mr)
9850                                    addr:$dst, SrcInfo.RC:$src)>;
9851
9852  def : Pat<(mtruncFrag (SrcInfo.VT SrcInfo.RC:$src), addr:$dst,
9853                        SrcInfo.KRCWM:$mask),
9854            (!cast<Instruction>(Name#SrcInfo.ZSuffix#mrk)
9855                            addr:$dst, SrcInfo.KRCWM:$mask, SrcInfo.RC:$src)>;
9856}
9857
9858multiclass avx512_trunc<bits<8> opc, string OpcodeStr, SDNode OpNode128,
9859                        SDNode OpNode256, SDNode OpNode512,
9860                        SDPatternOperator MaskNode128,
9861                        SDPatternOperator MaskNode256,
9862                        SDPatternOperator MaskNode512,
9863                        X86FoldableSchedWrite sched,
9864                        AVX512VLVectorVTInfo VTSrcInfo,
9865                        X86VectorVTInfo DestInfoZ128,
9866                        X86VectorVTInfo DestInfoZ256, X86VectorVTInfo DestInfoZ,
9867                        X86MemOperand x86memopZ128, X86MemOperand x86memopZ256,
9868                        X86MemOperand x86memopZ, PatFrag truncFrag,
9869                        PatFrag mtruncFrag, Predicate prd = HasAVX512>{
9870
9871  let Predicates = [HasVLX, prd] in {
9872    defm Z128:  avx512_trunc_common<opc, OpcodeStr, OpNode128, MaskNode128, sched,
9873                             VTSrcInfo.info128, DestInfoZ128, x86memopZ128>,
9874                avx512_trunc_mr_lowering<VTSrcInfo.info128, truncFrag,
9875                                         mtruncFrag, NAME>, EVEX_V128;
9876
9877    defm Z256:  avx512_trunc_common<opc, OpcodeStr, OpNode256, MaskNode256, sched,
9878                             VTSrcInfo.info256, DestInfoZ256, x86memopZ256>,
9879                avx512_trunc_mr_lowering<VTSrcInfo.info256, truncFrag,
9880                                         mtruncFrag, NAME>, EVEX_V256;
9881  }
9882  let Predicates = [prd] in
9883    defm Z:     avx512_trunc_common<opc, OpcodeStr, OpNode512, MaskNode512, sched,
9884                             VTSrcInfo.info512, DestInfoZ, x86memopZ>,
9885                avx512_trunc_mr_lowering<VTSrcInfo.info512, truncFrag,
9886                                         mtruncFrag, NAME>, EVEX_V512;
9887}
9888
9889multiclass avx512_trunc_qb<bits<8> opc, string OpcodeStr,
9890                           X86FoldableSchedWrite sched, PatFrag StoreNode,
9891                           PatFrag MaskedStoreNode, SDNode InVecNode,
9892                           SDPatternOperator InVecMaskNode> {
9893  defm NAME: avx512_trunc<opc, OpcodeStr, InVecNode, InVecNode, InVecNode,
9894                          InVecMaskNode, InVecMaskNode, InVecMaskNode, sched,
9895                          avx512vl_i64_info, v16i8x_info, v16i8x_info,
9896                          v16i8x_info, i16mem, i32mem, i64mem, StoreNode,
9897                          MaskedStoreNode>, EVEX_CD8<8, CD8VO>;
9898}
9899
9900multiclass avx512_trunc_qw<bits<8> opc, string OpcodeStr, SDNode OpNode,
9901                           SDPatternOperator MaskNode,
9902                           X86FoldableSchedWrite sched, PatFrag StoreNode,
9903                           PatFrag MaskedStoreNode, SDNode InVecNode,
9904                           SDPatternOperator InVecMaskNode> {
9905  defm NAME: avx512_trunc<opc, OpcodeStr, InVecNode, InVecNode, OpNode,
9906                          InVecMaskNode, InVecMaskNode, MaskNode, sched,
9907                          avx512vl_i64_info, v8i16x_info, v8i16x_info,
9908                          v8i16x_info, i32mem, i64mem, i128mem, StoreNode,
9909                          MaskedStoreNode>, EVEX_CD8<16, CD8VQ>;
9910}
9911
9912multiclass avx512_trunc_qd<bits<8> opc, string OpcodeStr, SDNode OpNode,
9913                           SDPatternOperator MaskNode,
9914                           X86FoldableSchedWrite sched, PatFrag StoreNode,
9915                           PatFrag MaskedStoreNode, SDNode InVecNode,
9916                           SDPatternOperator InVecMaskNode> {
9917  defm NAME: avx512_trunc<opc, OpcodeStr, InVecNode, OpNode, OpNode,
9918                          InVecMaskNode, MaskNode, MaskNode, sched,
9919                          avx512vl_i64_info, v4i32x_info, v4i32x_info,
9920                          v8i32x_info, i64mem, i128mem, i256mem, StoreNode,
9921                          MaskedStoreNode>, EVEX_CD8<32, CD8VH>;
9922}
9923
9924multiclass avx512_trunc_db<bits<8> opc, string OpcodeStr, SDNode OpNode,
9925                           SDPatternOperator MaskNode,
9926                           X86FoldableSchedWrite sched, PatFrag StoreNode,
9927                           PatFrag MaskedStoreNode, SDNode InVecNode,
9928                           SDPatternOperator InVecMaskNode> {
9929  defm NAME: avx512_trunc<opc, OpcodeStr, InVecNode, InVecNode, OpNode,
9930                          InVecMaskNode, InVecMaskNode, MaskNode, sched,
9931                          avx512vl_i32_info, v16i8x_info, v16i8x_info,
9932                          v16i8x_info, i32mem, i64mem, i128mem, StoreNode,
9933                          MaskedStoreNode>, EVEX_CD8<8, CD8VQ>;
9934}
9935
9936multiclass avx512_trunc_dw<bits<8> opc, string OpcodeStr, SDNode OpNode,
9937                           SDPatternOperator MaskNode,
9938                           X86FoldableSchedWrite sched, PatFrag StoreNode,
9939                           PatFrag MaskedStoreNode, SDNode InVecNode,
9940                           SDPatternOperator InVecMaskNode> {
9941  defm NAME: avx512_trunc<opc, OpcodeStr, InVecNode, OpNode, OpNode,
9942                          InVecMaskNode, MaskNode, MaskNode, sched,
9943                          avx512vl_i32_info, v8i16x_info, v8i16x_info,
9944                          v16i16x_info, i64mem, i128mem, i256mem, StoreNode,
9945                          MaskedStoreNode>, EVEX_CD8<16, CD8VH>;
9946}
9947
9948multiclass avx512_trunc_wb<bits<8> opc, string OpcodeStr, SDNode OpNode,
9949                           SDPatternOperator MaskNode,
9950                           X86FoldableSchedWrite sched, PatFrag StoreNode,
9951                           PatFrag MaskedStoreNode, SDNode InVecNode,
9952                           SDPatternOperator InVecMaskNode> {
9953  defm NAME: avx512_trunc<opc, OpcodeStr, InVecNode, OpNode, OpNode,
9954                          InVecMaskNode, MaskNode, MaskNode, sched,
9955                          avx512vl_i16_info, v16i8x_info, v16i8x_info,
9956                          v32i8x_info, i64mem, i128mem, i256mem, StoreNode,
9957                          MaskedStoreNode, HasBWI>, EVEX_CD8<16, CD8VH>;
9958}
9959
9960defm VPMOVQB    : avx512_trunc_qb<0x32, "vpmovqb",
9961                                  WriteVPMOV256, truncstorevi8,
9962                                  masked_truncstorevi8, X86vtrunc, X86vmtrunc>;
9963defm VPMOVSQB   : avx512_trunc_qb<0x22, "vpmovsqb",
9964                                  WriteVPMOV256, truncstore_s_vi8,
9965                                  masked_truncstore_s_vi8, X86vtruncs,
9966                                  X86vmtruncs>;
9967defm VPMOVUSQB  : avx512_trunc_qb<0x12, "vpmovusqb",
9968                                  WriteVPMOV256, truncstore_us_vi8,
9969                                  masked_truncstore_us_vi8, X86vtruncus, X86vmtruncus>;
9970
9971defm VPMOVQW    : avx512_trunc_qw<0x34, "vpmovqw", trunc, select_trunc,
9972                                  WriteVPMOV256, truncstorevi16,
9973                                  masked_truncstorevi16, X86vtrunc, X86vmtrunc>;
9974defm VPMOVSQW   : avx512_trunc_qw<0x24, "vpmovsqw",  X86vtruncs, select_truncs,
9975                                  WriteVPMOV256, truncstore_s_vi16,
9976                                  masked_truncstore_s_vi16, X86vtruncs,
9977                                  X86vmtruncs>;
9978defm VPMOVUSQW  : avx512_trunc_qw<0x14, "vpmovusqw", X86vtruncus,
9979                                  select_truncus, WriteVPMOV256,
9980                                  truncstore_us_vi16, masked_truncstore_us_vi16,
9981                                  X86vtruncus, X86vmtruncus>;
9982
9983defm VPMOVQD    : avx512_trunc_qd<0x35, "vpmovqd", trunc, select_trunc,
9984                                  WriteVPMOV256, truncstorevi32,
9985                                  masked_truncstorevi32, X86vtrunc, X86vmtrunc>;
9986defm VPMOVSQD   : avx512_trunc_qd<0x25, "vpmovsqd",  X86vtruncs, select_truncs,
9987                                  WriteVPMOV256, truncstore_s_vi32,
9988                                  masked_truncstore_s_vi32, X86vtruncs,
9989                                  X86vmtruncs>;
9990defm VPMOVUSQD  : avx512_trunc_qd<0x15, "vpmovusqd", X86vtruncus,
9991                                  select_truncus, WriteVPMOV256,
9992                                  truncstore_us_vi32, masked_truncstore_us_vi32,
9993                                  X86vtruncus, X86vmtruncus>;
9994
9995defm VPMOVDB    : avx512_trunc_db<0x31, "vpmovdb", trunc, select_trunc,
9996                                  WriteVPMOV256, truncstorevi8,
9997                                  masked_truncstorevi8, X86vtrunc, X86vmtrunc>;
9998defm VPMOVSDB   : avx512_trunc_db<0x21, "vpmovsdb", X86vtruncs, select_truncs,
9999                                  WriteVPMOV256, truncstore_s_vi8,
10000                                  masked_truncstore_s_vi8, X86vtruncs,
10001                                  X86vmtruncs>;
10002defm VPMOVUSDB  : avx512_trunc_db<0x11, "vpmovusdb",  X86vtruncus,
10003                                  select_truncus, WriteVPMOV256,
10004                                  truncstore_us_vi8, masked_truncstore_us_vi8,
10005                                  X86vtruncus, X86vmtruncus>;
10006
10007defm VPMOVDW    : avx512_trunc_dw<0x33, "vpmovdw", trunc, select_trunc,
10008                                  WriteVPMOV256, truncstorevi16,
10009                                  masked_truncstorevi16, X86vtrunc, X86vmtrunc>;
10010defm VPMOVSDW   : avx512_trunc_dw<0x23, "vpmovsdw", X86vtruncs, select_truncs,
10011                                  WriteVPMOV256, truncstore_s_vi16,
10012                                  masked_truncstore_s_vi16, X86vtruncs,
10013                                  X86vmtruncs>;
10014defm VPMOVUSDW  : avx512_trunc_dw<0x13, "vpmovusdw", X86vtruncus,
10015                                  select_truncus, WriteVPMOV256,
10016                                  truncstore_us_vi16, masked_truncstore_us_vi16,
10017                                  X86vtruncus, X86vmtruncus>;
10018
10019defm VPMOVWB    : avx512_trunc_wb<0x30, "vpmovwb", trunc, select_trunc,
10020                                  WriteVPMOV256, truncstorevi8,
10021                                  masked_truncstorevi8, X86vtrunc,
10022                                  X86vmtrunc>;
10023defm VPMOVSWB   : avx512_trunc_wb<0x20, "vpmovswb", X86vtruncs, select_truncs,
10024                                  WriteVPMOV256, truncstore_s_vi8,
10025                                  masked_truncstore_s_vi8, X86vtruncs,
10026                                  X86vmtruncs>;
10027defm VPMOVUSWB  : avx512_trunc_wb<0x10, "vpmovuswb", X86vtruncus,
10028                                  select_truncus, WriteVPMOV256,
10029                                  truncstore_us_vi8, masked_truncstore_us_vi8,
10030                                  X86vtruncus, X86vmtruncus>;
10031
10032let Predicates = [HasAVX512, NoVLX] in {
10033def: Pat<(v8i16 (trunc (v8i32 VR256X:$src))),
10034         (v8i16 (EXTRACT_SUBREG
10035                 (v16i16 (VPMOVDWZrr (v16i32 (INSERT_SUBREG (IMPLICIT_DEF),
10036                                          VR256X:$src, sub_ymm)))), sub_xmm))>;
10037def: Pat<(v4i32 (trunc (v4i64 VR256X:$src))),
10038         (v4i32 (EXTRACT_SUBREG
10039                 (v8i32 (VPMOVQDZrr (v8i64 (INSERT_SUBREG (IMPLICIT_DEF),
10040                                           VR256X:$src, sub_ymm)))), sub_xmm))>;
10041}
10042
10043let Predicates = [HasBWI, NoVLX] in {
10044def: Pat<(v16i8 (trunc (v16i16 VR256X:$src))),
10045         (v16i8 (EXTRACT_SUBREG (VPMOVWBZrr (v32i16 (INSERT_SUBREG (IMPLICIT_DEF),
10046                                            VR256X:$src, sub_ymm))), sub_xmm))>;
10047}
10048
10049// Without BWI we can't use vXi16/vXi8 vselect so we have to use vmtrunc nodes.
10050multiclass mtrunc_lowering<string InstrName, SDNode OpNode,
10051                           X86VectorVTInfo DestInfo,
10052                           X86VectorVTInfo SrcInfo> {
10053  def : Pat<(DestInfo.VT (OpNode (SrcInfo.VT SrcInfo.RC:$src),
10054                                 DestInfo.RC:$src0,
10055                                 SrcInfo.KRCWM:$mask)),
10056            (!cast<Instruction>(InstrName#"rrk") DestInfo.RC:$src0,
10057                                                 SrcInfo.KRCWM:$mask,
10058                                                 SrcInfo.RC:$src)>;
10059
10060  def : Pat<(DestInfo.VT (OpNode (SrcInfo.VT SrcInfo.RC:$src),
10061                                 DestInfo.ImmAllZerosV,
10062                                 SrcInfo.KRCWM:$mask)),
10063            (!cast<Instruction>(InstrName#"rrkz") SrcInfo.KRCWM:$mask,
10064                                                  SrcInfo.RC:$src)>;
10065}
10066
10067let Predicates = [HasVLX] in {
10068defm : mtrunc_lowering<"VPMOVDWZ256", X86vmtrunc, v8i16x_info, v8i32x_info>;
10069defm : mtrunc_lowering<"VPMOVSDWZ256", X86vmtruncs, v8i16x_info, v8i32x_info>;
10070defm : mtrunc_lowering<"VPMOVUSDWZ256", X86vmtruncus, v8i16x_info, v8i32x_info>;
10071}
10072
10073let Predicates = [HasAVX512] in {
10074defm : mtrunc_lowering<"VPMOVDWZ", X86vmtrunc, v16i16x_info, v16i32_info>;
10075defm : mtrunc_lowering<"VPMOVSDWZ", X86vmtruncs, v16i16x_info, v16i32_info>;
10076defm : mtrunc_lowering<"VPMOVUSDWZ", X86vmtruncus, v16i16x_info, v16i32_info>;
10077
10078defm : mtrunc_lowering<"VPMOVDBZ", X86vmtrunc, v16i8x_info, v16i32_info>;
10079defm : mtrunc_lowering<"VPMOVSDBZ", X86vmtruncs, v16i8x_info, v16i32_info>;
10080defm : mtrunc_lowering<"VPMOVUSDBZ", X86vmtruncus, v16i8x_info, v16i32_info>;
10081
10082defm : mtrunc_lowering<"VPMOVQWZ", X86vmtrunc, v8i16x_info, v8i64_info>;
10083defm : mtrunc_lowering<"VPMOVSQWZ", X86vmtruncs, v8i16x_info, v8i64_info>;
10084defm : mtrunc_lowering<"VPMOVUSQWZ", X86vmtruncus, v8i16x_info, v8i64_info>;
10085}
10086
10087multiclass avx512_pmovx_common<bits<8> opc, string OpcodeStr, X86FoldableSchedWrite sched,
10088              X86VectorVTInfo DestInfo, X86VectorVTInfo SrcInfo,
10089              X86MemOperand x86memop, PatFrag LdFrag, SDNode OpNode>{
10090  let ExeDomain = DestInfo.ExeDomain in {
10091  defm rr   : AVX512_maskable<opc, MRMSrcReg, DestInfo, (outs DestInfo.RC:$dst),
10092                    (ins SrcInfo.RC:$src), OpcodeStr ,"$src", "$src",
10093                    (DestInfo.VT (OpNode (SrcInfo.VT SrcInfo.RC:$src)))>,
10094                  EVEX, Sched<[sched]>;
10095
10096  defm rm : AVX512_maskable<opc, MRMSrcMem, DestInfo, (outs DestInfo.RC:$dst),
10097                  (ins x86memop:$src), OpcodeStr ,"$src", "$src",
10098                  (DestInfo.VT (LdFrag addr:$src))>,
10099                EVEX, Sched<[sched.Folded]>;
10100  }
10101}
10102
10103multiclass avx512_pmovx_bw<bits<8> opc, string OpcodeStr,
10104          SDNode OpNode, SDNode InVecNode, string ExtTy,
10105          X86FoldableSchedWrite schedX, X86FoldableSchedWrite schedYZ,
10106          PatFrag LdFrag = !cast<PatFrag>(ExtTy#"extloadvi8")> {
10107  let Predicates = [HasVLX, HasBWI] in {
10108    defm Z128:  avx512_pmovx_common<opc, OpcodeStr, schedX, v8i16x_info,
10109                    v16i8x_info, i64mem, LdFrag, InVecNode>,
10110                     EVEX_CD8<8, CD8VH>, T8PD, EVEX_V128, VEX_WIG;
10111
10112    defm Z256:  avx512_pmovx_common<opc, OpcodeStr, schedYZ, v16i16x_info,
10113                    v16i8x_info, i128mem, LdFrag, OpNode>,
10114                     EVEX_CD8<8, CD8VH>, T8PD, EVEX_V256, VEX_WIG;
10115  }
10116  let Predicates = [HasBWI] in {
10117    defm Z   :  avx512_pmovx_common<opc, OpcodeStr, schedYZ, v32i16_info,
10118                    v32i8x_info, i256mem, LdFrag, OpNode>,
10119                     EVEX_CD8<8, CD8VH>, T8PD, EVEX_V512, VEX_WIG;
10120  }
10121}
10122
10123multiclass avx512_pmovx_bd<bits<8> opc, string OpcodeStr,
10124          SDNode OpNode, SDNode InVecNode, string ExtTy,
10125          X86FoldableSchedWrite schedX, X86FoldableSchedWrite schedYZ,
10126          PatFrag LdFrag = !cast<PatFrag>(ExtTy#"extloadvi8")> {
10127  let Predicates = [HasVLX, HasAVX512] in {
10128    defm Z128:  avx512_pmovx_common<opc, OpcodeStr, schedX, v4i32x_info,
10129                   v16i8x_info, i32mem, LdFrag, InVecNode>,
10130                         EVEX_CD8<8, CD8VQ>, T8PD, EVEX_V128, VEX_WIG;
10131
10132    defm Z256:  avx512_pmovx_common<opc, OpcodeStr, schedYZ, v8i32x_info,
10133                   v16i8x_info, i64mem, LdFrag, InVecNode>,
10134                         EVEX_CD8<8, CD8VQ>, T8PD, EVEX_V256, VEX_WIG;
10135  }
10136  let Predicates = [HasAVX512] in {
10137    defm Z   :  avx512_pmovx_common<opc, OpcodeStr, schedYZ, v16i32_info,
10138                   v16i8x_info, i128mem, LdFrag, OpNode>,
10139                         EVEX_CD8<8, CD8VQ>, T8PD, EVEX_V512, VEX_WIG;
10140  }
10141}
10142
10143multiclass avx512_pmovx_bq<bits<8> opc, string OpcodeStr,
10144                              SDNode InVecNode, string ExtTy,
10145                              X86FoldableSchedWrite schedX, X86FoldableSchedWrite schedYZ,
10146                              PatFrag LdFrag = !cast<PatFrag>(ExtTy#"extloadvi8")> {
10147  let Predicates = [HasVLX, HasAVX512] in {
10148    defm Z128:  avx512_pmovx_common<opc, OpcodeStr, schedX, v2i64x_info,
10149                   v16i8x_info, i16mem, LdFrag, InVecNode>,
10150                     EVEX_CD8<8, CD8VO>, T8PD, EVEX_V128, VEX_WIG;
10151
10152    defm Z256:  avx512_pmovx_common<opc, OpcodeStr, schedYZ, v4i64x_info,
10153                   v16i8x_info, i32mem, LdFrag, InVecNode>,
10154                     EVEX_CD8<8, CD8VO>, T8PD, EVEX_V256, VEX_WIG;
10155  }
10156  let Predicates = [HasAVX512] in {
10157    defm Z   :  avx512_pmovx_common<opc, OpcodeStr, schedYZ, v8i64_info,
10158                   v16i8x_info, i64mem, LdFrag, InVecNode>,
10159                     EVEX_CD8<8, CD8VO>, T8PD, EVEX_V512, VEX_WIG;
10160  }
10161}
10162
10163multiclass avx512_pmovx_wd<bits<8> opc, string OpcodeStr,
10164         SDNode OpNode, SDNode InVecNode, string ExtTy,
10165         X86FoldableSchedWrite schedX, X86FoldableSchedWrite schedYZ,
10166         PatFrag LdFrag = !cast<PatFrag>(ExtTy#"extloadvi16")> {
10167  let Predicates = [HasVLX, HasAVX512] in {
10168    defm Z128:  avx512_pmovx_common<opc, OpcodeStr, schedX, v4i32x_info,
10169                   v8i16x_info, i64mem, LdFrag, InVecNode>,
10170                     EVEX_CD8<16, CD8VH>, T8PD, EVEX_V128, VEX_WIG;
10171
10172    defm Z256:  avx512_pmovx_common<opc, OpcodeStr, schedYZ, v8i32x_info,
10173                   v8i16x_info, i128mem, LdFrag, OpNode>,
10174                     EVEX_CD8<16, CD8VH>, T8PD, EVEX_V256, VEX_WIG;
10175  }
10176  let Predicates = [HasAVX512] in {
10177    defm Z   :  avx512_pmovx_common<opc, OpcodeStr, schedYZ, v16i32_info,
10178                   v16i16x_info, i256mem, LdFrag, OpNode>,
10179                     EVEX_CD8<16, CD8VH>, T8PD, EVEX_V512, VEX_WIG;
10180  }
10181}
10182
10183multiclass avx512_pmovx_wq<bits<8> opc, string OpcodeStr,
10184         SDNode OpNode, SDNode InVecNode, string ExtTy,
10185         X86FoldableSchedWrite schedX, X86FoldableSchedWrite schedYZ,
10186         PatFrag LdFrag = !cast<PatFrag>(ExtTy#"extloadvi16")> {
10187  let Predicates = [HasVLX, HasAVX512] in {
10188    defm Z128:  avx512_pmovx_common<opc, OpcodeStr, schedX, v2i64x_info,
10189                   v8i16x_info, i32mem, LdFrag, InVecNode>,
10190                     EVEX_CD8<16, CD8VQ>, T8PD, EVEX_V128, VEX_WIG;
10191
10192    defm Z256:  avx512_pmovx_common<opc, OpcodeStr, schedYZ, v4i64x_info,
10193                   v8i16x_info, i64mem, LdFrag, InVecNode>,
10194                     EVEX_CD8<16, CD8VQ>, T8PD, EVEX_V256, VEX_WIG;
10195  }
10196  let Predicates = [HasAVX512] in {
10197    defm Z   :  avx512_pmovx_common<opc, OpcodeStr, schedYZ, v8i64_info,
10198                   v8i16x_info, i128mem, LdFrag, OpNode>,
10199                     EVEX_CD8<16, CD8VQ>, T8PD, EVEX_V512, VEX_WIG;
10200  }
10201}
10202
10203multiclass avx512_pmovx_dq<bits<8> opc, string OpcodeStr,
10204         SDNode OpNode, SDNode InVecNode, string ExtTy,
10205         X86FoldableSchedWrite schedX, X86FoldableSchedWrite schedYZ,
10206         PatFrag LdFrag = !cast<PatFrag>(ExtTy#"extloadvi32")> {
10207
10208  let Predicates = [HasVLX, HasAVX512] in {
10209    defm Z128:  avx512_pmovx_common<opc, OpcodeStr, schedX, v2i64x_info,
10210                   v4i32x_info, i64mem, LdFrag, InVecNode>,
10211                     EVEX_CD8<32, CD8VH>, T8PD, EVEX_V128;
10212
10213    defm Z256:  avx512_pmovx_common<opc, OpcodeStr, schedYZ, v4i64x_info,
10214                   v4i32x_info, i128mem, LdFrag, OpNode>,
10215                     EVEX_CD8<32, CD8VH>, T8PD, EVEX_V256;
10216  }
10217  let Predicates = [HasAVX512] in {
10218    defm Z   :  avx512_pmovx_common<opc, OpcodeStr, schedYZ, v8i64_info,
10219                   v8i32x_info, i256mem, LdFrag, OpNode>,
10220                     EVEX_CD8<32, CD8VH>, T8PD, EVEX_V512;
10221  }
10222}
10223
10224defm VPMOVZXBW : avx512_pmovx_bw<0x30, "vpmovzxbw", zext, zext_invec, "z", SchedWriteShuffle.XMM, WriteVPMOV256>;
10225defm VPMOVZXBD : avx512_pmovx_bd<0x31, "vpmovzxbd", zext, zext_invec, "z", SchedWriteShuffle.XMM, WriteVPMOV256>;
10226defm VPMOVZXBQ : avx512_pmovx_bq<0x32, "vpmovzxbq",       zext_invec, "z", SchedWriteShuffle.XMM, WriteVPMOV256>;
10227defm VPMOVZXWD : avx512_pmovx_wd<0x33, "vpmovzxwd", zext, zext_invec, "z", SchedWriteShuffle.XMM, WriteVPMOV256>;
10228defm VPMOVZXWQ : avx512_pmovx_wq<0x34, "vpmovzxwq", zext, zext_invec, "z", SchedWriteShuffle.XMM, WriteVPMOV256>;
10229defm VPMOVZXDQ : avx512_pmovx_dq<0x35, "vpmovzxdq", zext, zext_invec, "z", SchedWriteShuffle.XMM, WriteVPMOV256>;
10230
10231defm VPMOVSXBW: avx512_pmovx_bw<0x20, "vpmovsxbw", sext, sext_invec, "s", SchedWriteShuffle.XMM, WriteVPMOV256>;
10232defm VPMOVSXBD: avx512_pmovx_bd<0x21, "vpmovsxbd", sext, sext_invec, "s", SchedWriteShuffle.XMM, WriteVPMOV256>;
10233defm VPMOVSXBQ: avx512_pmovx_bq<0x22, "vpmovsxbq",       sext_invec, "s", SchedWriteShuffle.XMM, WriteVPMOV256>;
10234defm VPMOVSXWD: avx512_pmovx_wd<0x23, "vpmovsxwd", sext, sext_invec, "s", SchedWriteShuffle.XMM, WriteVPMOV256>;
10235defm VPMOVSXWQ: avx512_pmovx_wq<0x24, "vpmovsxwq", sext, sext_invec, "s", SchedWriteShuffle.XMM, WriteVPMOV256>;
10236defm VPMOVSXDQ: avx512_pmovx_dq<0x25, "vpmovsxdq", sext, sext_invec, "s", SchedWriteShuffle.XMM, WriteVPMOV256>;
10237
10238
10239// Patterns that we also need any extend versions of. aext_vector_inreg
10240// is currently legalized to zext_vector_inreg.
10241multiclass AVX512_pmovx_patterns_base<string OpcPrefix, SDNode ExtOp> {
10242  // 256-bit patterns
10243  let Predicates = [HasVLX, HasBWI] in {
10244    def : Pat<(v16i16 (ExtOp (loadv16i8 addr:$src))),
10245              (!cast<I>(OpcPrefix#BWZ256rm) addr:$src)>;
10246  }
10247
10248  let Predicates = [HasVLX] in {
10249    def : Pat<(v8i32 (ExtOp (loadv8i16 addr:$src))),
10250              (!cast<I>(OpcPrefix#WDZ256rm) addr:$src)>;
10251
10252    def : Pat<(v4i64 (ExtOp (loadv4i32 addr:$src))),
10253              (!cast<I>(OpcPrefix#DQZ256rm) addr:$src)>;
10254  }
10255
10256  // 512-bit patterns
10257  let Predicates = [HasBWI] in {
10258    def : Pat<(v32i16 (ExtOp (loadv32i8 addr:$src))),
10259              (!cast<I>(OpcPrefix#BWZrm) addr:$src)>;
10260  }
10261  let Predicates = [HasAVX512] in {
10262    def : Pat<(v16i32 (ExtOp (loadv16i8 addr:$src))),
10263              (!cast<I>(OpcPrefix#BDZrm) addr:$src)>;
10264    def : Pat<(v16i32 (ExtOp (loadv16i16 addr:$src))),
10265              (!cast<I>(OpcPrefix#WDZrm) addr:$src)>;
10266
10267    def : Pat<(v8i64 (ExtOp (loadv8i16 addr:$src))),
10268              (!cast<I>(OpcPrefix#WQZrm) addr:$src)>;
10269
10270    def : Pat<(v8i64 (ExtOp (loadv8i32 addr:$src))),
10271              (!cast<I>(OpcPrefix#DQZrm) addr:$src)>;
10272  }
10273}
10274
10275multiclass AVX512_pmovx_patterns<string OpcPrefix, SDNode ExtOp,
10276                                 SDNode InVecOp> :
10277    AVX512_pmovx_patterns_base<OpcPrefix, ExtOp> {
10278  // 128-bit patterns
10279  let Predicates = [HasVLX, HasBWI] in {
10280  def : Pat<(v8i16 (InVecOp (bc_v16i8 (v2i64 (scalar_to_vector (loadi64 addr:$src)))))),
10281            (!cast<I>(OpcPrefix#BWZ128rm) addr:$src)>;
10282  def : Pat<(v8i16 (InVecOp (bc_v16i8 (v2f64 (scalar_to_vector (loadf64 addr:$src)))))),
10283            (!cast<I>(OpcPrefix#BWZ128rm) addr:$src)>;
10284  def : Pat<(v8i16 (InVecOp (bc_v16i8 (v2i64 (X86vzload64 addr:$src))))),
10285            (!cast<I>(OpcPrefix#BWZ128rm) addr:$src)>;
10286  }
10287  let Predicates = [HasVLX] in {
10288  def : Pat<(v4i32 (InVecOp (bc_v16i8 (v4i32 (scalar_to_vector (loadi32 addr:$src)))))),
10289            (!cast<I>(OpcPrefix#BDZ128rm) addr:$src)>;
10290  def : Pat<(v4i32 (InVecOp (bc_v16i8 (v4i32 (X86vzload32 addr:$src))))),
10291            (!cast<I>(OpcPrefix#BDZ128rm) addr:$src)>;
10292
10293  def : Pat<(v2i64 (InVecOp (bc_v16i8 (v4i32 (scalar_to_vector (extloadi32i16 addr:$src)))))),
10294            (!cast<I>(OpcPrefix#BQZ128rm) addr:$src)>;
10295
10296  def : Pat<(v4i32 (InVecOp (bc_v8i16 (v2i64 (scalar_to_vector (loadi64 addr:$src)))))),
10297            (!cast<I>(OpcPrefix#WDZ128rm) addr:$src)>;
10298  def : Pat<(v4i32 (InVecOp (bc_v8i16 (v2f64 (scalar_to_vector (loadf64 addr:$src)))))),
10299            (!cast<I>(OpcPrefix#WDZ128rm) addr:$src)>;
10300  def : Pat<(v4i32 (InVecOp (bc_v8i16 (v2i64 (X86vzload64 addr:$src))))),
10301            (!cast<I>(OpcPrefix#WDZ128rm) addr:$src)>;
10302
10303  def : Pat<(v2i64 (InVecOp (bc_v8i16 (v4i32 (scalar_to_vector (loadi32 addr:$src)))))),
10304            (!cast<I>(OpcPrefix#WQZ128rm) addr:$src)>;
10305  def : Pat<(v2i64 (InVecOp (bc_v8i16 (v4i32 (X86vzload32 addr:$src))))),
10306            (!cast<I>(OpcPrefix#WQZ128rm) addr:$src)>;
10307
10308  def : Pat<(v2i64 (InVecOp (bc_v4i32 (v2i64 (scalar_to_vector (loadi64 addr:$src)))))),
10309            (!cast<I>(OpcPrefix#DQZ128rm) addr:$src)>;
10310  def : Pat<(v2i64 (InVecOp (bc_v4i32 (v2f64 (scalar_to_vector (loadf64 addr:$src)))))),
10311            (!cast<I>(OpcPrefix#DQZ128rm) addr:$src)>;
10312  def : Pat<(v2i64 (InVecOp (bc_v4i32 (v2i64 (X86vzload64 addr:$src))))),
10313            (!cast<I>(OpcPrefix#DQZ128rm) addr:$src)>;
10314  }
10315  let Predicates = [HasVLX] in {
10316  def : Pat<(v8i32 (InVecOp (bc_v16i8 (v2i64 (scalar_to_vector (loadi64 addr:$src)))))),
10317            (!cast<I>(OpcPrefix#BDZ256rm) addr:$src)>;
10318  def : Pat<(v8i32 (InVecOp (bc_v16i8 (v2i64 (scalar_to_vector (loadf64 addr:$src)))))),
10319            (!cast<I>(OpcPrefix#BDZ256rm) addr:$src)>;
10320  def : Pat<(v8i32 (InVecOp (bc_v16i8 (v2i64 (X86vzload64 addr:$src))))),
10321            (!cast<I>(OpcPrefix#BDZ256rm) addr:$src)>;
10322
10323  def : Pat<(v4i64 (InVecOp (bc_v16i8 (v4i32 (scalar_to_vector (loadi32 addr:$src)))))),
10324            (!cast<I>(OpcPrefix#BQZ256rm) addr:$src)>;
10325  def : Pat<(v4i64 (InVecOp (bc_v16i8 (v4i32 (X86vzload32 addr:$src))))),
10326            (!cast<I>(OpcPrefix#BQZ256rm) addr:$src)>;
10327
10328  def : Pat<(v4i64 (InVecOp (bc_v8i16 (v2i64 (scalar_to_vector (loadi64 addr:$src)))))),
10329            (!cast<I>(OpcPrefix#WQZ256rm) addr:$src)>;
10330  def : Pat<(v4i64 (InVecOp (bc_v8i16 (v2i64 (scalar_to_vector (loadf64 addr:$src)))))),
10331            (!cast<I>(OpcPrefix#WQZ256rm) addr:$src)>;
10332  def : Pat<(v4i64 (InVecOp (bc_v8i16 (v2i64 (X86vzload64 addr:$src))))),
10333            (!cast<I>(OpcPrefix#WQZ256rm) addr:$src)>;
10334  }
10335  // 512-bit patterns
10336  let Predicates = [HasAVX512] in {
10337  def : Pat<(v8i64 (InVecOp (bc_v16i8 (v2i64 (scalar_to_vector (loadi64 addr:$src)))))),
10338            (!cast<I>(OpcPrefix#BQZrm) addr:$src)>;
10339  def : Pat<(v8i64 (InVecOp (bc_v16i8 (v2f64 (scalar_to_vector (loadf64 addr:$src)))))),
10340            (!cast<I>(OpcPrefix#BQZrm) addr:$src)>;
10341  def : Pat<(v8i64 (InVecOp (bc_v16i8 (v2i64 (X86vzload64 addr:$src))))),
10342            (!cast<I>(OpcPrefix#BQZrm) addr:$src)>;
10343  }
10344}
10345
10346defm : AVX512_pmovx_patterns<"VPMOVSX", sext, sext_invec>;
10347defm : AVX512_pmovx_patterns<"VPMOVZX", zext, zext_invec>;
10348
10349// Without BWI we can't do a trunc from v16i16 to v16i8. DAG combine can merge
10350// ext+trunc aggressively making it impossible to legalize the DAG to this
10351// pattern directly.
10352let Predicates = [HasAVX512, NoBWI] in {
10353def: Pat<(v16i8 (trunc (v16i16 VR256X:$src))),
10354         (VPMOVDBZrr (v16i32 (VPMOVZXWDZrr VR256X:$src)))>;
10355def: Pat<(v16i8 (trunc (loadv16i16 addr:$src))),
10356         (VPMOVDBZrr (v16i32 (VPMOVZXWDZrm addr:$src)))>;
10357}
10358
10359//===----------------------------------------------------------------------===//
10360// GATHER - SCATTER Operations
10361
10362// FIXME: Improve scheduling of gather/scatter instructions.
10363multiclass avx512_gather<bits<8> opc, string OpcodeStr, X86VectorVTInfo _,
10364                         X86MemOperand memop, RegisterClass MaskRC = _.KRCWM> {
10365  let Constraints = "@earlyclobber $dst, $src1 = $dst, $mask = $mask_wb",
10366      ExeDomain = _.ExeDomain, mayLoad = 1, hasSideEffects = 0 in
10367  def rm  : AVX5128I<opc, MRMSrcMem, (outs _.RC:$dst, MaskRC:$mask_wb),
10368            (ins _.RC:$src1, MaskRC:$mask, memop:$src2),
10369            !strconcat(OpcodeStr#_.Suffix,
10370            "\t{$src2, ${dst} {${mask}}|${dst} {${mask}}, $src2}"),
10371            []>, EVEX, EVEX_K, EVEX_CD8<_.EltSize, CD8VT1>,
10372            Sched<[WriteLoad, WriteVecMaskedGatherWriteback]>;
10373}
10374
10375multiclass avx512_gather_q_pd<bits<8> dopc, bits<8> qopc,
10376                        AVX512VLVectorVTInfo _, string OpcodeStr, string SUFF> {
10377  defm NAME#D#SUFF#Z: avx512_gather<dopc, OpcodeStr#"d", _.info512,
10378                                      vy512xmem>, EVEX_V512, VEX_W;
10379  defm NAME#Q#SUFF#Z: avx512_gather<qopc, OpcodeStr#"q", _.info512,
10380                                      vz512mem>, EVEX_V512, VEX_W;
10381let Predicates = [HasVLX] in {
10382  defm NAME#D#SUFF#Z256: avx512_gather<dopc, OpcodeStr#"d", _.info256,
10383                              vx256xmem>, EVEX_V256, VEX_W;
10384  defm NAME#Q#SUFF#Z256: avx512_gather<qopc, OpcodeStr#"q", _.info256,
10385                              vy256xmem>, EVEX_V256, VEX_W;
10386  defm NAME#D#SUFF#Z128: avx512_gather<dopc, OpcodeStr#"d", _.info128,
10387                              vx128xmem>, EVEX_V128, VEX_W;
10388  defm NAME#Q#SUFF#Z128: avx512_gather<qopc, OpcodeStr#"q", _.info128,
10389                              vx128xmem>, EVEX_V128, VEX_W;
10390}
10391}
10392
10393multiclass avx512_gather_d_ps<bits<8> dopc, bits<8> qopc,
10394                       AVX512VLVectorVTInfo _, string OpcodeStr, string SUFF> {
10395  defm NAME#D#SUFF#Z: avx512_gather<dopc, OpcodeStr#"d", _.info512, vz512mem>,
10396                                       EVEX_V512;
10397  defm NAME#Q#SUFF#Z: avx512_gather<qopc, OpcodeStr#"q", _.info256, vz256mem>,
10398                                       EVEX_V512;
10399let Predicates = [HasVLX] in {
10400  defm NAME#D#SUFF#Z256: avx512_gather<dopc, OpcodeStr#"d", _.info256,
10401                                          vy256xmem>, EVEX_V256;
10402  defm NAME#Q#SUFF#Z256: avx512_gather<qopc, OpcodeStr#"q", _.info128,
10403                                          vy128xmem>, EVEX_V256;
10404  defm NAME#D#SUFF#Z128: avx512_gather<dopc, OpcodeStr#"d", _.info128,
10405                                          vx128xmem>, EVEX_V128;
10406  defm NAME#Q#SUFF#Z128: avx512_gather<qopc, OpcodeStr#"q", _.info128,
10407                                          vx64xmem, VK2WM>, EVEX_V128;
10408}
10409}
10410
10411
10412defm VGATHER : avx512_gather_q_pd<0x92, 0x93, avx512vl_f64_info, "vgather", "PD">,
10413               avx512_gather_d_ps<0x92, 0x93, avx512vl_f32_info, "vgather", "PS">;
10414
10415defm VPGATHER : avx512_gather_q_pd<0x90, 0x91, avx512vl_i64_info, "vpgather", "Q">,
10416                avx512_gather_d_ps<0x90, 0x91, avx512vl_i32_info, "vpgather", "D">;
10417
10418multiclass avx512_scatter<bits<8> opc, string OpcodeStr, X86VectorVTInfo _,
10419                          X86MemOperand memop, RegisterClass MaskRC = _.KRCWM> {
10420
10421let mayStore = 1, Constraints = "$mask = $mask_wb", ExeDomain = _.ExeDomain,
10422    hasSideEffects = 0 in
10423
10424  def mr  : AVX5128I<opc, MRMDestMem, (outs MaskRC:$mask_wb),
10425            (ins memop:$dst, MaskRC:$mask, _.RC:$src),
10426            !strconcat(OpcodeStr#_.Suffix,
10427            "\t{$src, ${dst} {${mask}}|${dst} {${mask}}, $src}"),
10428            []>, EVEX, EVEX_K, EVEX_CD8<_.EltSize, CD8VT1>,
10429            Sched<[WriteStore]>;
10430}
10431
10432multiclass avx512_scatter_q_pd<bits<8> dopc, bits<8> qopc,
10433                        AVX512VLVectorVTInfo _, string OpcodeStr, string SUFF> {
10434  defm NAME#D#SUFF#Z: avx512_scatter<dopc, OpcodeStr#"d", _.info512,
10435                                      vy512xmem>, EVEX_V512, VEX_W;
10436  defm NAME#Q#SUFF#Z: avx512_scatter<qopc, OpcodeStr#"q", _.info512,
10437                                      vz512mem>, EVEX_V512, VEX_W;
10438let Predicates = [HasVLX] in {
10439  defm NAME#D#SUFF#Z256: avx512_scatter<dopc, OpcodeStr#"d", _.info256,
10440                              vx256xmem>, EVEX_V256, VEX_W;
10441  defm NAME#Q#SUFF#Z256: avx512_scatter<qopc, OpcodeStr#"q", _.info256,
10442                              vy256xmem>, EVEX_V256, VEX_W;
10443  defm NAME#D#SUFF#Z128: avx512_scatter<dopc, OpcodeStr#"d", _.info128,
10444                              vx128xmem>, EVEX_V128, VEX_W;
10445  defm NAME#Q#SUFF#Z128: avx512_scatter<qopc, OpcodeStr#"q", _.info128,
10446                              vx128xmem>, EVEX_V128, VEX_W;
10447}
10448}
10449
10450multiclass avx512_scatter_d_ps<bits<8> dopc, bits<8> qopc,
10451                       AVX512VLVectorVTInfo _, string OpcodeStr, string SUFF> {
10452  defm NAME#D#SUFF#Z: avx512_scatter<dopc, OpcodeStr#"d", _.info512, vz512mem>,
10453                                       EVEX_V512;
10454  defm NAME#Q#SUFF#Z: avx512_scatter<qopc, OpcodeStr#"q", _.info256, vz256mem>,
10455                                       EVEX_V512;
10456let Predicates = [HasVLX] in {
10457  defm NAME#D#SUFF#Z256: avx512_scatter<dopc, OpcodeStr#"d", _.info256,
10458                                          vy256xmem>, EVEX_V256;
10459  defm NAME#Q#SUFF#Z256: avx512_scatter<qopc, OpcodeStr#"q", _.info128,
10460                                          vy128xmem>, EVEX_V256;
10461  defm NAME#D#SUFF#Z128: avx512_scatter<dopc, OpcodeStr#"d", _.info128,
10462                                          vx128xmem>, EVEX_V128;
10463  defm NAME#Q#SUFF#Z128: avx512_scatter<qopc, OpcodeStr#"q", _.info128,
10464                                          vx64xmem, VK2WM>, EVEX_V128;
10465}
10466}
10467
10468defm VSCATTER : avx512_scatter_q_pd<0xA2, 0xA3, avx512vl_f64_info, "vscatter", "PD">,
10469               avx512_scatter_d_ps<0xA2, 0xA3, avx512vl_f32_info, "vscatter", "PS">;
10470
10471defm VPSCATTER : avx512_scatter_q_pd<0xA0, 0xA1, avx512vl_i64_info, "vpscatter", "Q">,
10472                avx512_scatter_d_ps<0xA0, 0xA1, avx512vl_i32_info, "vpscatter", "D">;
10473
10474// prefetch
10475multiclass avx512_gather_scatter_prefetch<bits<8> opc, Format F, string OpcodeStr,
10476                       RegisterClass KRC, X86MemOperand memop> {
10477  let Predicates = [HasPFI], mayLoad = 1, mayStore = 1 in
10478  def m  : AVX5128I<opc, F, (outs), (ins KRC:$mask, memop:$src),
10479            !strconcat(OpcodeStr, "\t{$src {${mask}}|{${mask}}, $src}"), []>,
10480            EVEX, EVEX_K, Sched<[WriteLoad]>;
10481}
10482
10483defm VGATHERPF0DPS: avx512_gather_scatter_prefetch<0xC6, MRM1m, "vgatherpf0dps",
10484                     VK16WM, vz512mem>, EVEX_V512, EVEX_CD8<32, CD8VT1>;
10485
10486defm VGATHERPF0QPS: avx512_gather_scatter_prefetch<0xC7, MRM1m, "vgatherpf0qps",
10487                     VK8WM, vz256mem>, EVEX_V512, EVEX_CD8<64, CD8VT1>;
10488
10489defm VGATHERPF0DPD: avx512_gather_scatter_prefetch<0xC6, MRM1m, "vgatherpf0dpd",
10490                     VK8WM, vy512xmem>, EVEX_V512, VEX_W, EVEX_CD8<32, CD8VT1>;
10491
10492defm VGATHERPF0QPD: avx512_gather_scatter_prefetch<0xC7, MRM1m, "vgatherpf0qpd",
10493                     VK8WM, vz512mem>, EVEX_V512, VEX_W, EVEX_CD8<64, CD8VT1>;
10494
10495defm VGATHERPF1DPS: avx512_gather_scatter_prefetch<0xC6, MRM2m, "vgatherpf1dps",
10496                     VK16WM, vz512mem>, EVEX_V512, EVEX_CD8<32, CD8VT1>;
10497
10498defm VGATHERPF1QPS: avx512_gather_scatter_prefetch<0xC7, MRM2m, "vgatherpf1qps",
10499                     VK8WM, vz256mem>, EVEX_V512, EVEX_CD8<64, CD8VT1>;
10500
10501defm VGATHERPF1DPD: avx512_gather_scatter_prefetch<0xC6, MRM2m, "vgatherpf1dpd",
10502                     VK8WM, vy512xmem>, EVEX_V512, VEX_W, EVEX_CD8<32, CD8VT1>;
10503
10504defm VGATHERPF1QPD: avx512_gather_scatter_prefetch<0xC7, MRM2m, "vgatherpf1qpd",
10505                     VK8WM, vz512mem>, EVEX_V512, VEX_W, EVEX_CD8<64, CD8VT1>;
10506
10507defm VSCATTERPF0DPS: avx512_gather_scatter_prefetch<0xC6, MRM5m, "vscatterpf0dps",
10508                     VK16WM, vz512mem>, EVEX_V512, EVEX_CD8<32, CD8VT1>;
10509
10510defm VSCATTERPF0QPS: avx512_gather_scatter_prefetch<0xC7, MRM5m, "vscatterpf0qps",
10511                     VK8WM, vz256mem>, EVEX_V512, EVEX_CD8<64, CD8VT1>;
10512
10513defm VSCATTERPF0DPD: avx512_gather_scatter_prefetch<0xC6, MRM5m, "vscatterpf0dpd",
10514                     VK8WM, vy512xmem>, EVEX_V512, VEX_W, EVEX_CD8<32, CD8VT1>;
10515
10516defm VSCATTERPF0QPD: avx512_gather_scatter_prefetch<0xC7, MRM5m, "vscatterpf0qpd",
10517                     VK8WM, vz512mem>, EVEX_V512, VEX_W, EVEX_CD8<64, CD8VT1>;
10518
10519defm VSCATTERPF1DPS: avx512_gather_scatter_prefetch<0xC6, MRM6m, "vscatterpf1dps",
10520                     VK16WM, vz512mem>, EVEX_V512, EVEX_CD8<32, CD8VT1>;
10521
10522defm VSCATTERPF1QPS: avx512_gather_scatter_prefetch<0xC7, MRM6m, "vscatterpf1qps",
10523                     VK8WM, vz256mem>, EVEX_V512, EVEX_CD8<64, CD8VT1>;
10524
10525defm VSCATTERPF1DPD: avx512_gather_scatter_prefetch<0xC6, MRM6m, "vscatterpf1dpd",
10526                     VK8WM, vy512xmem>, EVEX_V512, VEX_W, EVEX_CD8<32, CD8VT1>;
10527
10528defm VSCATTERPF1QPD: avx512_gather_scatter_prefetch<0xC7, MRM6m, "vscatterpf1qpd",
10529                     VK8WM, vz512mem>, EVEX_V512, VEX_W, EVEX_CD8<64, CD8VT1>;
10530
10531multiclass cvt_by_vec_width<bits<8> opc, X86VectorVTInfo Vec, string OpcodeStr, SchedWrite Sched> {
10532def rr : AVX512XS8I<opc, MRMSrcReg, (outs Vec.RC:$dst), (ins Vec.KRC:$src),
10533                  !strconcat(OpcodeStr#Vec.Suffix, "\t{$src, $dst|$dst, $src}"),
10534                  [(set Vec.RC:$dst, (Vec.VT (sext Vec.KRC:$src)))]>,
10535                  EVEX, Sched<[Sched]>;
10536}
10537
10538multiclass cvt_mask_by_elt_width<bits<8> opc, AVX512VLVectorVTInfo VTInfo,
10539                                 string OpcodeStr, Predicate prd> {
10540let Predicates = [prd] in
10541  defm Z : cvt_by_vec_width<opc, VTInfo.info512, OpcodeStr, WriteVecMoveZ>, EVEX_V512;
10542
10543  let Predicates = [prd, HasVLX] in {
10544    defm Z256 : cvt_by_vec_width<opc, VTInfo.info256, OpcodeStr, WriteVecMoveY>, EVEX_V256;
10545    defm Z128 : cvt_by_vec_width<opc, VTInfo.info128, OpcodeStr, WriteVecMoveX>, EVEX_V128;
10546  }
10547}
10548
10549defm VPMOVM2B : cvt_mask_by_elt_width<0x28, avx512vl_i8_info, "vpmovm2" , HasBWI>;
10550defm VPMOVM2W : cvt_mask_by_elt_width<0x28, avx512vl_i16_info, "vpmovm2", HasBWI> , VEX_W;
10551defm VPMOVM2D : cvt_mask_by_elt_width<0x38, avx512vl_i32_info, "vpmovm2", HasDQI>;
10552defm VPMOVM2Q : cvt_mask_by_elt_width<0x38, avx512vl_i64_info, "vpmovm2", HasDQI> , VEX_W;
10553
10554multiclass convert_vector_to_mask_common<bits<8> opc, X86VectorVTInfo _, string OpcodeStr > {
10555    def rr : AVX512XS8I<opc, MRMSrcReg, (outs _.KRC:$dst), (ins _.RC:$src),
10556                        !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
10557                        [(set _.KRC:$dst, (X86pcmpgtm _.ImmAllZerosV, (_.VT _.RC:$src)))]>,
10558                        EVEX, Sched<[WriteMove]>;
10559}
10560
10561// Use 512bit version to implement 128/256 bit in case NoVLX.
10562multiclass convert_vector_to_mask_lowering<X86VectorVTInfo ExtendInfo,
10563                                           X86VectorVTInfo _,
10564                                           string Name> {
10565
10566  def : Pat<(_.KVT (X86pcmpgtm _.ImmAllZerosV, (_.VT _.RC:$src))),
10567            (_.KVT (COPY_TO_REGCLASS
10568                     (!cast<Instruction>(Name#"Zrr")
10569                       (INSERT_SUBREG (ExtendInfo.VT (IMPLICIT_DEF)),
10570                                      _.RC:$src, _.SubRegIdx)),
10571                   _.KRC))>;
10572}
10573
10574multiclass avx512_convert_vector_to_mask<bits<8> opc, string OpcodeStr,
10575                                   AVX512VLVectorVTInfo VTInfo, Predicate prd> {
10576  let Predicates = [prd] in
10577    defm Z : convert_vector_to_mask_common <opc, VTInfo.info512, OpcodeStr>,
10578                                            EVEX_V512;
10579
10580  let Predicates = [prd, HasVLX] in {
10581    defm Z256 : convert_vector_to_mask_common<opc, VTInfo.info256, OpcodeStr>,
10582                                              EVEX_V256;
10583    defm Z128 : convert_vector_to_mask_common<opc, VTInfo.info128, OpcodeStr>,
10584                                               EVEX_V128;
10585  }
10586  let Predicates = [prd, NoVLX] in {
10587    defm Z256_Alt : convert_vector_to_mask_lowering<VTInfo.info512, VTInfo.info256, NAME>;
10588    defm Z128_Alt : convert_vector_to_mask_lowering<VTInfo.info512, VTInfo.info128, NAME>;
10589  }
10590}
10591
10592defm VPMOVB2M : avx512_convert_vector_to_mask<0x29, "vpmovb2m",
10593                                              avx512vl_i8_info, HasBWI>;
10594defm VPMOVW2M : avx512_convert_vector_to_mask<0x29, "vpmovw2m",
10595                                              avx512vl_i16_info, HasBWI>, VEX_W;
10596defm VPMOVD2M : avx512_convert_vector_to_mask<0x39, "vpmovd2m",
10597                                              avx512vl_i32_info, HasDQI>;
10598defm VPMOVQ2M : avx512_convert_vector_to_mask<0x39, "vpmovq2m",
10599                                              avx512vl_i64_info, HasDQI>, VEX_W;
10600
10601// Patterns for handling sext from a mask register to v16i8/v16i16 when DQI
10602// is available, but BWI is not. We can't handle this in lowering because
10603// a target independent DAG combine likes to combine sext and trunc.
10604let Predicates = [HasDQI, NoBWI] in {
10605  def : Pat<(v16i8 (sext (v16i1 VK16:$src))),
10606            (VPMOVDBZrr (v16i32 (VPMOVM2DZrr VK16:$src)))>;
10607  def : Pat<(v16i16 (sext (v16i1 VK16:$src))),
10608            (VPMOVDWZrr (v16i32 (VPMOVM2DZrr VK16:$src)))>;
10609}
10610
10611let Predicates = [HasDQI, NoBWI, HasVLX] in {
10612  def : Pat<(v8i16 (sext (v8i1 VK8:$src))),
10613            (VPMOVDWZ256rr (v8i32 (VPMOVM2DZ256rr VK8:$src)))>;
10614}
10615
10616//===----------------------------------------------------------------------===//
10617// AVX-512 - COMPRESS and EXPAND
10618//
10619
10620multiclass compress_by_vec_width_common<bits<8> opc, X86VectorVTInfo _,
10621                                 string OpcodeStr, X86FoldableSchedWrite sched> {
10622  defm rr : AVX512_maskable<opc, MRMDestReg, _, (outs _.RC:$dst),
10623              (ins _.RC:$src1), OpcodeStr, "$src1", "$src1",
10624              (null_frag)>, AVX5128IBase,
10625              Sched<[sched]>;
10626
10627  let mayStore = 1, hasSideEffects = 0 in
10628  def mr : AVX5128I<opc, MRMDestMem, (outs),
10629              (ins _.MemOp:$dst, _.RC:$src),
10630              OpcodeStr # "\t{$src, $dst|$dst, $src}",
10631              []>, EVEX_CD8<_.EltSize, CD8VT1>,
10632              Sched<[sched.Folded]>;
10633
10634  def mrk : AVX5128I<opc, MRMDestMem, (outs),
10635              (ins _.MemOp:$dst, _.KRCWM:$mask, _.RC:$src),
10636              OpcodeStr # "\t{$src, $dst {${mask}}|$dst {${mask}}, $src}",
10637              []>,
10638              EVEX_K, EVEX_CD8<_.EltSize, CD8VT1>,
10639              Sched<[sched.Folded]>;
10640}
10641
10642multiclass compress_by_vec_width_lowering<X86VectorVTInfo _, string Name> {
10643  def : Pat<(X86mCompressingStore (_.VT _.RC:$src), addr:$dst, _.KRCWM:$mask),
10644            (!cast<Instruction>(Name#_.ZSuffix#mrk)
10645                            addr:$dst, _.KRCWM:$mask, _.RC:$src)>;
10646
10647  def : Pat<(X86compress (_.VT _.RC:$src), _.RC:$src0, _.KRCWM:$mask),
10648            (!cast<Instruction>(Name#_.ZSuffix#rrk)
10649                            _.RC:$src0, _.KRCWM:$mask, _.RC:$src)>;
10650  def : Pat<(X86compress (_.VT _.RC:$src), _.ImmAllZerosV, _.KRCWM:$mask),
10651            (!cast<Instruction>(Name#_.ZSuffix#rrkz)
10652                            _.KRCWM:$mask, _.RC:$src)>;
10653}
10654
10655multiclass compress_by_elt_width<bits<8> opc, string OpcodeStr,
10656                                 X86FoldableSchedWrite sched,
10657                                 AVX512VLVectorVTInfo VTInfo,
10658                                 Predicate Pred = HasAVX512> {
10659  let Predicates = [Pred] in
10660  defm Z : compress_by_vec_width_common<opc, VTInfo.info512, OpcodeStr, sched>,
10661           compress_by_vec_width_lowering<VTInfo.info512, NAME>, EVEX_V512;
10662
10663  let Predicates = [Pred, HasVLX] in {
10664    defm Z256 : compress_by_vec_width_common<opc, VTInfo.info256, OpcodeStr, sched>,
10665                compress_by_vec_width_lowering<VTInfo.info256, NAME>, EVEX_V256;
10666    defm Z128 : compress_by_vec_width_common<opc, VTInfo.info128, OpcodeStr, sched>,
10667                compress_by_vec_width_lowering<VTInfo.info128, NAME>, EVEX_V128;
10668  }
10669}
10670
10671// FIXME: Is there a better scheduler class for VPCOMPRESS?
10672defm VPCOMPRESSD : compress_by_elt_width <0x8B, "vpcompressd", WriteVarShuffle256,
10673                                          avx512vl_i32_info>, EVEX, NotMemoryFoldable;
10674defm VPCOMPRESSQ : compress_by_elt_width <0x8B, "vpcompressq", WriteVarShuffle256,
10675                                          avx512vl_i64_info>, EVEX, VEX_W, NotMemoryFoldable;
10676defm VCOMPRESSPS : compress_by_elt_width <0x8A, "vcompressps", WriteVarShuffle256,
10677                                          avx512vl_f32_info>, EVEX, NotMemoryFoldable;
10678defm VCOMPRESSPD : compress_by_elt_width <0x8A, "vcompresspd", WriteVarShuffle256,
10679                                          avx512vl_f64_info>, EVEX, VEX_W, NotMemoryFoldable;
10680
10681// expand
10682multiclass expand_by_vec_width<bits<8> opc, X86VectorVTInfo _,
10683                                 string OpcodeStr, X86FoldableSchedWrite sched> {
10684  defm rr : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
10685              (ins _.RC:$src1), OpcodeStr, "$src1", "$src1",
10686              (null_frag)>, AVX5128IBase,
10687              Sched<[sched]>;
10688
10689  defm rm : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
10690              (ins _.MemOp:$src1), OpcodeStr, "$src1", "$src1",
10691              (null_frag)>,
10692            AVX5128IBase, EVEX_CD8<_.EltSize, CD8VT1>,
10693            Sched<[sched.Folded, sched.ReadAfterFold]>;
10694}
10695
10696multiclass expand_by_vec_width_lowering<X86VectorVTInfo _, string Name> {
10697
10698  def : Pat<(_.VT (X86mExpandingLoad addr:$src, _.KRCWM:$mask, undef)),
10699            (!cast<Instruction>(Name#_.ZSuffix#rmkz)
10700                                        _.KRCWM:$mask, addr:$src)>;
10701
10702  def : Pat<(_.VT (X86mExpandingLoad addr:$src, _.KRCWM:$mask, _.ImmAllZerosV)),
10703            (!cast<Instruction>(Name#_.ZSuffix#rmkz)
10704                                        _.KRCWM:$mask, addr:$src)>;
10705
10706  def : Pat<(_.VT (X86mExpandingLoad addr:$src, _.KRCWM:$mask,
10707                                               (_.VT _.RC:$src0))),
10708            (!cast<Instruction>(Name#_.ZSuffix#rmk)
10709                            _.RC:$src0, _.KRCWM:$mask, addr:$src)>;
10710
10711  def : Pat<(X86expand (_.VT _.RC:$src), _.RC:$src0, _.KRCWM:$mask),
10712            (!cast<Instruction>(Name#_.ZSuffix#rrk)
10713                            _.RC:$src0, _.KRCWM:$mask, _.RC:$src)>;
10714  def : Pat<(X86expand (_.VT _.RC:$src), _.ImmAllZerosV, _.KRCWM:$mask),
10715            (!cast<Instruction>(Name#_.ZSuffix#rrkz)
10716                            _.KRCWM:$mask, _.RC:$src)>;
10717}
10718
10719multiclass expand_by_elt_width<bits<8> opc, string OpcodeStr,
10720                               X86FoldableSchedWrite sched,
10721                               AVX512VLVectorVTInfo VTInfo,
10722                               Predicate Pred = HasAVX512> {
10723  let Predicates = [Pred] in
10724  defm Z : expand_by_vec_width<opc, VTInfo.info512, OpcodeStr, sched>,
10725           expand_by_vec_width_lowering<VTInfo.info512, NAME>, EVEX_V512;
10726
10727  let Predicates = [Pred, HasVLX] in {
10728    defm Z256 : expand_by_vec_width<opc, VTInfo.info256, OpcodeStr, sched>,
10729                expand_by_vec_width_lowering<VTInfo.info256, NAME>, EVEX_V256;
10730    defm Z128 : expand_by_vec_width<opc, VTInfo.info128, OpcodeStr, sched>,
10731                expand_by_vec_width_lowering<VTInfo.info128, NAME>, EVEX_V128;
10732  }
10733}
10734
10735// FIXME: Is there a better scheduler class for VPEXPAND?
10736defm VPEXPANDD : expand_by_elt_width <0x89, "vpexpandd", WriteVarShuffle256,
10737                                      avx512vl_i32_info>, EVEX;
10738defm VPEXPANDQ : expand_by_elt_width <0x89, "vpexpandq", WriteVarShuffle256,
10739                                      avx512vl_i64_info>, EVEX, VEX_W;
10740defm VEXPANDPS : expand_by_elt_width <0x88, "vexpandps", WriteVarShuffle256,
10741                                      avx512vl_f32_info>, EVEX;
10742defm VEXPANDPD : expand_by_elt_width <0x88, "vexpandpd", WriteVarShuffle256,
10743                                      avx512vl_f64_info>, EVEX, VEX_W;
10744
10745//handle instruction  reg_vec1 = op(reg_vec,imm)
10746//                               op(mem_vec,imm)
10747//                               op(broadcast(eltVt),imm)
10748//all instruction created with FROUND_CURRENT
10749multiclass avx512_unary_fp_packed_imm<bits<8> opc, string OpcodeStr,
10750                                      SDPatternOperator OpNode,
10751                                      SDPatternOperator MaskOpNode,
10752                                      X86FoldableSchedWrite sched,
10753                                      X86VectorVTInfo _> {
10754  let ExeDomain = _.ExeDomain, Uses = [MXCSR], mayRaiseFPException = 1 in {
10755  defm rri : AVX512_maskable_split<opc, MRMSrcReg, _, (outs _.RC:$dst),
10756                      (ins _.RC:$src1, i32u8imm:$src2),
10757                      OpcodeStr#_.Suffix, "$src2, $src1", "$src1, $src2",
10758                      (OpNode (_.VT _.RC:$src1), (i32 timm:$src2)),
10759                      (MaskOpNode (_.VT _.RC:$src1), (i32 timm:$src2))>,
10760                      Sched<[sched]>;
10761  defm rmi : AVX512_maskable_split<opc, MRMSrcMem, _, (outs _.RC:$dst),
10762                    (ins _.MemOp:$src1, i32u8imm:$src2),
10763                    OpcodeStr#_.Suffix, "$src2, $src1", "$src1, $src2",
10764                    (OpNode (_.VT (bitconvert (_.LdFrag addr:$src1))),
10765                            (i32 timm:$src2)),
10766                    (MaskOpNode (_.VT (bitconvert (_.LdFrag addr:$src1))),
10767                                (i32 timm:$src2))>,
10768                    Sched<[sched.Folded, sched.ReadAfterFold]>;
10769  defm rmbi : AVX512_maskable_split<opc, MRMSrcMem, _, (outs _.RC:$dst),
10770                    (ins _.ScalarMemOp:$src1, i32u8imm:$src2),
10771                    OpcodeStr#_.Suffix, "$src2, ${src1}"#_.BroadcastStr,
10772                    "${src1}"#_.BroadcastStr#", $src2",
10773                    (OpNode (_.VT (_.BroadcastLdFrag addr:$src1)),
10774                            (i32 timm:$src2)),
10775                    (MaskOpNode (_.VT (_.BroadcastLdFrag addr:$src1)),
10776                                (i32 timm:$src2))>, EVEX_B,
10777                    Sched<[sched.Folded, sched.ReadAfterFold]>;
10778  }
10779}
10780
10781//handle instruction  reg_vec1 = op(reg_vec2,reg_vec3,imm),{sae}
10782multiclass avx512_unary_fp_sae_packed_imm<bits<8> opc, string OpcodeStr,
10783                                          SDNode OpNode, X86FoldableSchedWrite sched,
10784                                          X86VectorVTInfo _> {
10785  let ExeDomain = _.ExeDomain, Uses = [MXCSR] in
10786  defm rrib : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
10787                      (ins _.RC:$src1, i32u8imm:$src2),
10788                      OpcodeStr#_.Suffix, "$src2, {sae}, $src1",
10789                      "$src1, {sae}, $src2",
10790                      (OpNode (_.VT _.RC:$src1),
10791                              (i32 timm:$src2))>,
10792                      EVEX_B, Sched<[sched]>;
10793}
10794
10795multiclass avx512_common_unary_fp_sae_packed_imm<string OpcodeStr,
10796            AVX512VLVectorVTInfo _, bits<8> opc, SDPatternOperator OpNode,
10797            SDPatternOperator MaskOpNode, SDNode OpNodeSAE, X86SchedWriteWidths sched,
10798            Predicate prd>{
10799  let Predicates = [prd] in {
10800    defm Z    : avx512_unary_fp_packed_imm<opc, OpcodeStr, OpNode, MaskOpNode,
10801                                           sched.ZMM, _.info512>,
10802                avx512_unary_fp_sae_packed_imm<opc, OpcodeStr, OpNodeSAE,
10803                                               sched.ZMM, _.info512>, EVEX_V512;
10804  }
10805  let Predicates = [prd, HasVLX] in {
10806    defm Z128 : avx512_unary_fp_packed_imm<opc, OpcodeStr, OpNode, MaskOpNode,
10807                                           sched.XMM, _.info128>, EVEX_V128;
10808    defm Z256 : avx512_unary_fp_packed_imm<opc, OpcodeStr, OpNode, MaskOpNode,
10809                                           sched.YMM, _.info256>, EVEX_V256;
10810  }
10811}
10812
10813//handle instruction  reg_vec1 = op(reg_vec2,reg_vec3,imm)
10814//                               op(reg_vec2,mem_vec,imm)
10815//                               op(reg_vec2,broadcast(eltVt),imm)
10816//all instruction created with FROUND_CURRENT
10817multiclass avx512_fp_packed_imm<bits<8> opc, string OpcodeStr, SDNode OpNode,
10818                                X86FoldableSchedWrite sched, X86VectorVTInfo _>{
10819  let ExeDomain = _.ExeDomain, Uses = [MXCSR], mayRaiseFPException = 1 in {
10820  defm rri : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
10821                      (ins _.RC:$src1, _.RC:$src2, i32u8imm:$src3),
10822                      OpcodeStr, "$src3, $src2, $src1", "$src1, $src2, $src3",
10823                      (OpNode (_.VT _.RC:$src1),
10824                              (_.VT _.RC:$src2),
10825                              (i32 timm:$src3))>,
10826                      Sched<[sched]>;
10827  defm rmi : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
10828                    (ins _.RC:$src1, _.MemOp:$src2, i32u8imm:$src3),
10829                    OpcodeStr, "$src3, $src2, $src1", "$src1, $src2, $src3",
10830                    (OpNode (_.VT _.RC:$src1),
10831                            (_.VT (bitconvert (_.LdFrag addr:$src2))),
10832                            (i32 timm:$src3))>,
10833                    Sched<[sched.Folded, sched.ReadAfterFold]>;
10834  defm rmbi : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
10835                    (ins _.RC:$src1, _.ScalarMemOp:$src2, i32u8imm:$src3),
10836                    OpcodeStr, "$src3, ${src2}"#_.BroadcastStr#", $src1",
10837                    "$src1, ${src2}"#_.BroadcastStr#", $src3",
10838                    (OpNode (_.VT _.RC:$src1),
10839                            (_.VT (_.BroadcastLdFrag addr:$src2)),
10840                            (i32 timm:$src3))>, EVEX_B,
10841                    Sched<[sched.Folded, sched.ReadAfterFold]>;
10842  }
10843}
10844
10845//handle instruction  reg_vec1 = op(reg_vec2,reg_vec3,imm)
10846//                               op(reg_vec2,mem_vec,imm)
10847multiclass avx512_3Op_rm_imm8<bits<8> opc, string OpcodeStr, SDNode OpNode,
10848                              X86FoldableSchedWrite sched, X86VectorVTInfo DestInfo,
10849                              X86VectorVTInfo SrcInfo>{
10850  let ExeDomain = DestInfo.ExeDomain in {
10851  defm rri : AVX512_maskable<opc, MRMSrcReg, DestInfo, (outs DestInfo.RC:$dst),
10852                  (ins SrcInfo.RC:$src1, SrcInfo.RC:$src2, u8imm:$src3),
10853                  OpcodeStr, "$src3, $src2, $src1", "$src1, $src2, $src3",
10854                  (DestInfo.VT (OpNode (SrcInfo.VT SrcInfo.RC:$src1),
10855                               (SrcInfo.VT SrcInfo.RC:$src2),
10856                               (i8 timm:$src3)))>,
10857                  Sched<[sched]>;
10858  defm rmi : AVX512_maskable<opc, MRMSrcMem, DestInfo, (outs DestInfo.RC:$dst),
10859                (ins SrcInfo.RC:$src1, SrcInfo.MemOp:$src2, u8imm:$src3),
10860                OpcodeStr, "$src3, $src2, $src1", "$src1, $src2, $src3",
10861                (DestInfo.VT (OpNode (SrcInfo.VT SrcInfo.RC:$src1),
10862                             (SrcInfo.VT (bitconvert
10863                                                (SrcInfo.LdFrag addr:$src2))),
10864                             (i8 timm:$src3)))>,
10865                Sched<[sched.Folded, sched.ReadAfterFold]>;
10866  }
10867}
10868
10869//handle instruction  reg_vec1 = op(reg_vec2,reg_vec3,imm)
10870//                               op(reg_vec2,mem_vec,imm)
10871//                               op(reg_vec2,broadcast(eltVt),imm)
10872multiclass avx512_3Op_imm8<bits<8> opc, string OpcodeStr, SDNode OpNode,
10873                           X86FoldableSchedWrite sched, X86VectorVTInfo _>:
10874  avx512_3Op_rm_imm8<opc, OpcodeStr, OpNode, sched, _, _>{
10875
10876  let ExeDomain = _.ExeDomain in
10877  defm rmbi : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
10878                    (ins _.RC:$src1, _.ScalarMemOp:$src2, u8imm:$src3),
10879                    OpcodeStr, "$src3, ${src2}"#_.BroadcastStr#", $src1",
10880                    "$src1, ${src2}"#_.BroadcastStr#", $src3",
10881                    (OpNode (_.VT _.RC:$src1),
10882                            (_.VT (_.BroadcastLdFrag addr:$src2)),
10883                            (i8 timm:$src3))>, EVEX_B,
10884                    Sched<[sched.Folded, sched.ReadAfterFold]>;
10885}
10886
10887//handle scalar instruction  reg_vec1 = op(reg_vec2,reg_vec3,imm)
10888//                                      op(reg_vec2,mem_scalar,imm)
10889multiclass avx512_fp_scalar_imm<bits<8> opc, string OpcodeStr, SDNode OpNode,
10890                                X86FoldableSchedWrite sched, X86VectorVTInfo _> {
10891  let ExeDomain = _.ExeDomain, Uses = [MXCSR], mayRaiseFPException = 1 in {
10892  defm rri : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
10893                      (ins _.RC:$src1, _.RC:$src2, i32u8imm:$src3),
10894                      OpcodeStr, "$src3, $src2, $src1", "$src1, $src2, $src3",
10895                      (OpNode (_.VT _.RC:$src1),
10896                              (_.VT _.RC:$src2),
10897                              (i32 timm:$src3))>,
10898                      Sched<[sched]>;
10899  defm rmi : AVX512_maskable_scalar<opc, MRMSrcMem, _, (outs _.RC:$dst),
10900                    (ins _.RC:$src1, _.IntScalarMemOp:$src2, i32u8imm:$src3),
10901                    OpcodeStr, "$src3, $src2, $src1", "$src1, $src2, $src3",
10902                    (OpNode (_.VT _.RC:$src1),
10903                            (_.ScalarIntMemFrags addr:$src2),
10904                            (i32 timm:$src3))>,
10905                    Sched<[sched.Folded, sched.ReadAfterFold]>;
10906  }
10907}
10908
10909//handle instruction  reg_vec1 = op(reg_vec2,reg_vec3,imm),{sae}
10910multiclass avx512_fp_sae_packed_imm<bits<8> opc, string OpcodeStr,
10911                                    SDNode OpNode, X86FoldableSchedWrite sched,
10912                                    X86VectorVTInfo _> {
10913  let ExeDomain = _.ExeDomain, Uses = [MXCSR] in
10914  defm rrib : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
10915                      (ins _.RC:$src1, _.RC:$src2, i32u8imm:$src3),
10916                      OpcodeStr, "$src3, {sae}, $src2, $src1",
10917                      "$src1, $src2, {sae}, $src3",
10918                      (OpNode (_.VT _.RC:$src1),
10919                              (_.VT _.RC:$src2),
10920                              (i32 timm:$src3))>,
10921                      EVEX_B, Sched<[sched]>;
10922}
10923
10924//handle scalar instruction  reg_vec1 = op(reg_vec2,reg_vec3,imm),{sae}
10925multiclass avx512_fp_sae_scalar_imm<bits<8> opc, string OpcodeStr, SDNode OpNode,
10926                                    X86FoldableSchedWrite sched, X86VectorVTInfo _> {
10927  let ExeDomain = _.ExeDomain, Uses = [MXCSR] in
10928  defm NAME#rrib : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
10929                      (ins _.RC:$src1, _.RC:$src2, i32u8imm:$src3),
10930                      OpcodeStr, "$src3, {sae}, $src2, $src1",
10931                      "$src1, $src2, {sae}, $src3",
10932                      (OpNode (_.VT _.RC:$src1),
10933                              (_.VT _.RC:$src2),
10934                              (i32 timm:$src3))>,
10935                      EVEX_B, Sched<[sched]>;
10936}
10937
10938multiclass avx512_common_fp_sae_packed_imm<string OpcodeStr,
10939            AVX512VLVectorVTInfo _, bits<8> opc, SDNode OpNode,
10940            SDNode OpNodeSAE, X86SchedWriteWidths sched, Predicate prd>{
10941  let Predicates = [prd] in {
10942    defm Z    : avx512_fp_packed_imm<opc, OpcodeStr, OpNode, sched.ZMM, _.info512>,
10943                avx512_fp_sae_packed_imm<opc, OpcodeStr, OpNodeSAE, sched.ZMM, _.info512>,
10944                                  EVEX_V512;
10945
10946  }
10947  let Predicates = [prd, HasVLX] in {
10948    defm Z128 : avx512_fp_packed_imm<opc, OpcodeStr, OpNode, sched.XMM, _.info128>,
10949                                  EVEX_V128;
10950    defm Z256 : avx512_fp_packed_imm<opc, OpcodeStr, OpNode, sched.YMM, _.info256>,
10951                                  EVEX_V256;
10952  }
10953}
10954
10955multiclass avx512_common_3Op_rm_imm8<bits<8> opc, SDNode OpNode, string OpStr,
10956                   X86SchedWriteWidths sched, AVX512VLVectorVTInfo DestInfo,
10957                   AVX512VLVectorVTInfo SrcInfo, Predicate Pred = HasBWI> {
10958  let Predicates = [Pred] in {
10959    defm Z    : avx512_3Op_rm_imm8<opc, OpStr, OpNode, sched.ZMM, DestInfo.info512,
10960                           SrcInfo.info512>, EVEX_V512, AVX512AIi8Base, EVEX_4V;
10961  }
10962  let Predicates = [Pred, HasVLX] in {
10963    defm Z128 : avx512_3Op_rm_imm8<opc, OpStr, OpNode, sched.XMM, DestInfo.info128,
10964                           SrcInfo.info128>, EVEX_V128, AVX512AIi8Base, EVEX_4V;
10965    defm Z256 : avx512_3Op_rm_imm8<opc, OpStr, OpNode, sched.YMM, DestInfo.info256,
10966                           SrcInfo.info256>, EVEX_V256, AVX512AIi8Base, EVEX_4V;
10967  }
10968}
10969
10970multiclass avx512_common_3Op_imm8<string OpcodeStr, AVX512VLVectorVTInfo _,
10971                                  bits<8> opc, SDNode OpNode, X86SchedWriteWidths sched,
10972                                  Predicate Pred = HasAVX512> {
10973  let Predicates = [Pred] in {
10974    defm Z    : avx512_3Op_imm8<opc, OpcodeStr, OpNode, sched.ZMM, _.info512>,
10975                                EVEX_V512;
10976  }
10977  let Predicates = [Pred, HasVLX] in {
10978    defm Z128 : avx512_3Op_imm8<opc, OpcodeStr, OpNode, sched.XMM, _.info128>,
10979                                EVEX_V128;
10980    defm Z256 : avx512_3Op_imm8<opc, OpcodeStr, OpNode, sched.YMM, _.info256>,
10981                                EVEX_V256;
10982  }
10983}
10984
10985multiclass avx512_common_fp_sae_scalar_imm<string OpcodeStr,
10986                  X86VectorVTInfo _, bits<8> opc, SDNode OpNode,
10987                  SDNode OpNodeSAE, X86SchedWriteWidths sched, Predicate prd> {
10988  let Predicates = [prd] in {
10989     defm Z : avx512_fp_scalar_imm<opc, OpcodeStr, OpNode, sched.XMM, _>,
10990              avx512_fp_sae_scalar_imm<opc, OpcodeStr, OpNodeSAE, sched.XMM, _>;
10991  }
10992}
10993
10994multiclass avx512_common_unary_fp_sae_packed_imm_all<string OpcodeStr,
10995                    bits<8> opcPs, bits<8> opcPd, SDPatternOperator OpNode,
10996                    SDPatternOperator MaskOpNode, SDNode OpNodeSAE,
10997                    X86SchedWriteWidths sched, Predicate prd>{
10998  defm PH : avx512_common_unary_fp_sae_packed_imm<OpcodeStr, avx512vl_f16_info,
10999                            opcPs, OpNode, MaskOpNode, OpNodeSAE, sched, HasFP16>,
11000                            AVX512PSIi8Base, TA, EVEX, EVEX_CD8<16, CD8VF>;
11001  defm PS : avx512_common_unary_fp_sae_packed_imm<OpcodeStr, avx512vl_f32_info,
11002                            opcPs, OpNode, MaskOpNode, OpNodeSAE, sched, prd>,
11003                            AVX512AIi8Base, EVEX, EVEX_CD8<32, CD8VF>;
11004  defm PD : avx512_common_unary_fp_sae_packed_imm<OpcodeStr, avx512vl_f64_info,
11005                            opcPd, OpNode, MaskOpNode, OpNodeSAE, sched, prd>,
11006                            AVX512AIi8Base, EVEX, EVEX_CD8<64, CD8VF>, VEX_W;
11007}
11008
11009defm VREDUCE   : avx512_common_unary_fp_sae_packed_imm_all<"vreduce", 0x56, 0x56,
11010                              X86VReduce, X86VReduce, X86VReduceSAE,
11011                              SchedWriteFRnd, HasDQI>;
11012defm VRNDSCALE : avx512_common_unary_fp_sae_packed_imm_all<"vrndscale", 0x08, 0x09,
11013                              X86any_VRndScale, X86VRndScale, X86VRndScaleSAE,
11014                              SchedWriteFRnd, HasAVX512>;
11015defm VGETMANT : avx512_common_unary_fp_sae_packed_imm_all<"vgetmant", 0x26, 0x26,
11016                              X86VGetMant, X86VGetMant, X86VGetMantSAE,
11017                              SchedWriteFRnd, HasAVX512>;
11018
11019defm VRANGEPD : avx512_common_fp_sae_packed_imm<"vrangepd", avx512vl_f64_info,
11020                                                0x50, X86VRange, X86VRangeSAE,
11021                                                SchedWriteFAdd, HasDQI>,
11022      AVX512AIi8Base, EVEX_4V, EVEX_CD8<64, CD8VF>, VEX_W;
11023defm VRANGEPS : avx512_common_fp_sae_packed_imm<"vrangeps", avx512vl_f32_info,
11024                                                0x50, X86VRange, X86VRangeSAE,
11025                                                SchedWriteFAdd, HasDQI>,
11026      AVX512AIi8Base, EVEX_4V, EVEX_CD8<32, CD8VF>;
11027
11028defm VRANGESD: avx512_common_fp_sae_scalar_imm<"vrangesd",
11029      f64x_info, 0x51, X86Ranges, X86RangesSAE, SchedWriteFAdd, HasDQI>,
11030      AVX512AIi8Base, VEX_LIG, EVEX_4V, EVEX_CD8<64, CD8VT1>, VEX_W;
11031defm VRANGESS: avx512_common_fp_sae_scalar_imm<"vrangess", f32x_info,
11032      0x51, X86Ranges, X86RangesSAE, SchedWriteFAdd, HasDQI>,
11033      AVX512AIi8Base, VEX_LIG, EVEX_4V, EVEX_CD8<32, CD8VT1>;
11034
11035defm VREDUCESD: avx512_common_fp_sae_scalar_imm<"vreducesd", f64x_info,
11036      0x57, X86Reduces, X86ReducesSAE, SchedWriteFRnd, HasDQI>,
11037      AVX512AIi8Base, VEX_LIG, EVEX_4V, EVEX_CD8<64, CD8VT1>, VEX_W;
11038defm VREDUCESS: avx512_common_fp_sae_scalar_imm<"vreducess", f32x_info,
11039      0x57, X86Reduces, X86ReducesSAE, SchedWriteFRnd, HasDQI>,
11040      AVX512AIi8Base, VEX_LIG, EVEX_4V, EVEX_CD8<32, CD8VT1>;
11041defm VREDUCESH: avx512_common_fp_sae_scalar_imm<"vreducesh", f16x_info,
11042      0x57, X86Reduces, X86ReducesSAE, SchedWriteFRnd, HasFP16>,
11043      AVX512PSIi8Base, TA, VEX_LIG, EVEX_4V, EVEX_CD8<16, CD8VT1>;
11044
11045defm VGETMANTSD: avx512_common_fp_sae_scalar_imm<"vgetmantsd", f64x_info,
11046      0x27, X86GetMants, X86GetMantsSAE, SchedWriteFRnd, HasAVX512>,
11047      AVX512AIi8Base, VEX_LIG, EVEX_4V, EVEX_CD8<64, CD8VT1>, VEX_W;
11048defm VGETMANTSS: avx512_common_fp_sae_scalar_imm<"vgetmantss", f32x_info,
11049      0x27, X86GetMants, X86GetMantsSAE, SchedWriteFRnd, HasAVX512>,
11050      AVX512AIi8Base, VEX_LIG, EVEX_4V, EVEX_CD8<32, CD8VT1>;
11051defm VGETMANTSH: avx512_common_fp_sae_scalar_imm<"vgetmantsh", f16x_info,
11052      0x27, X86GetMants, X86GetMantsSAE, SchedWriteFRnd, HasFP16>,
11053      AVX512PSIi8Base, TA, VEX_LIG, EVEX_4V, EVEX_CD8<16, CD8VT1>;
11054
11055multiclass avx512_shuff_packed_128_common<bits<8> opc, string OpcodeStr,
11056                                          X86FoldableSchedWrite sched,
11057                                          X86VectorVTInfo _,
11058                                          X86VectorVTInfo CastInfo,
11059                                          string EVEX2VEXOvrd> {
11060  let ExeDomain = _.ExeDomain in {
11061  defm rri : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
11062                  (ins _.RC:$src1, _.RC:$src2, u8imm:$src3),
11063                  OpcodeStr, "$src3, $src2, $src1", "$src1, $src2, $src3",
11064                  (_.VT (bitconvert
11065                         (CastInfo.VT (X86Shuf128 _.RC:$src1, _.RC:$src2,
11066                                                  (i8 timm:$src3)))))>,
11067                  Sched<[sched]>, EVEX2VEXOverride<EVEX2VEXOvrd#"rr">;
11068  defm rmi : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
11069                (ins _.RC:$src1, _.MemOp:$src2, u8imm:$src3),
11070                OpcodeStr, "$src3, $src2, $src1", "$src1, $src2, $src3",
11071                (_.VT
11072                 (bitconvert
11073                  (CastInfo.VT (X86Shuf128 _.RC:$src1,
11074                                           (CastInfo.LdFrag addr:$src2),
11075                                           (i8 timm:$src3)))))>,
11076                Sched<[sched.Folded, sched.ReadAfterFold]>,
11077                EVEX2VEXOverride<EVEX2VEXOvrd#"rm">;
11078  defm rmbi : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
11079                    (ins _.RC:$src1, _.ScalarMemOp:$src2, u8imm:$src3),
11080                    OpcodeStr, "$src3, ${src2}"#_.BroadcastStr#", $src1",
11081                    "$src1, ${src2}"#_.BroadcastStr#", $src3",
11082                    (_.VT
11083                     (bitconvert
11084                      (CastInfo.VT
11085                       (X86Shuf128 _.RC:$src1,
11086                                   (_.BroadcastLdFrag addr:$src2),
11087                                   (i8 timm:$src3)))))>, EVEX_B,
11088                    Sched<[sched.Folded, sched.ReadAfterFold]>;
11089  }
11090}
11091
11092multiclass avx512_shuff_packed_128<string OpcodeStr, X86FoldableSchedWrite sched,
11093                                   AVX512VLVectorVTInfo _,
11094                                   AVX512VLVectorVTInfo CastInfo, bits<8> opc,
11095                                   string EVEX2VEXOvrd>{
11096  let Predicates = [HasAVX512] in
11097  defm Z : avx512_shuff_packed_128_common<opc, OpcodeStr, sched,
11098                                          _.info512, CastInfo.info512, "">, EVEX_V512;
11099
11100  let Predicates = [HasAVX512, HasVLX] in
11101  defm Z256 : avx512_shuff_packed_128_common<opc, OpcodeStr, sched,
11102                                             _.info256, CastInfo.info256,
11103                                             EVEX2VEXOvrd>, EVEX_V256;
11104}
11105
11106defm VSHUFF32X4 : avx512_shuff_packed_128<"vshuff32x4", WriteFShuffle256,
11107      avx512vl_f32_info, avx512vl_f64_info, 0x23, "VPERM2F128">, AVX512AIi8Base, EVEX_4V, EVEX_CD8<32, CD8VF>;
11108defm VSHUFF64X2 : avx512_shuff_packed_128<"vshuff64x2", WriteFShuffle256,
11109      avx512vl_f64_info, avx512vl_f64_info, 0x23, "VPERM2F128">, AVX512AIi8Base, EVEX_4V, EVEX_CD8<64, CD8VF>, VEX_W;
11110defm VSHUFI32X4 : avx512_shuff_packed_128<"vshufi32x4", WriteFShuffle256,
11111      avx512vl_i32_info, avx512vl_i64_info, 0x43, "VPERM2I128">, AVX512AIi8Base, EVEX_4V, EVEX_CD8<32, CD8VF>;
11112defm VSHUFI64X2 : avx512_shuff_packed_128<"vshufi64x2", WriteFShuffle256,
11113      avx512vl_i64_info, avx512vl_i64_info, 0x43, "VPERM2I128">, AVX512AIi8Base, EVEX_4V, EVEX_CD8<64, CD8VF>, VEX_W;
11114
11115multiclass avx512_valign<bits<8> opc, string OpcodeStr,
11116                         X86FoldableSchedWrite sched, X86VectorVTInfo _>{
11117  // NOTE: EVEX2VEXOverride changed back to Unset for 256-bit at the
11118  // instantiation of this class.
11119  let ExeDomain = _.ExeDomain in {
11120  defm rri : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
11121                  (ins _.RC:$src1, _.RC:$src2, u8imm:$src3),
11122                  OpcodeStr, "$src3, $src2, $src1", "$src1, $src2, $src3",
11123                  (_.VT (X86VAlign _.RC:$src1, _.RC:$src2, (i8 timm:$src3)))>,
11124                  Sched<[sched]>, EVEX2VEXOverride<"VPALIGNRrri">;
11125  defm rmi : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
11126                (ins _.RC:$src1, _.MemOp:$src2, u8imm:$src3),
11127                OpcodeStr, "$src3, $src2, $src1", "$src1, $src2, $src3",
11128                (_.VT (X86VAlign _.RC:$src1,
11129                                 (bitconvert (_.LdFrag addr:$src2)),
11130                                 (i8 timm:$src3)))>,
11131                Sched<[sched.Folded, sched.ReadAfterFold]>,
11132                EVEX2VEXOverride<"VPALIGNRrmi">;
11133
11134  defm rmbi : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
11135                   (ins _.RC:$src1, _.ScalarMemOp:$src2, u8imm:$src3),
11136                   OpcodeStr, "$src3, ${src2}"#_.BroadcastStr#", $src1",
11137                   "$src1, ${src2}"#_.BroadcastStr#", $src3",
11138                   (X86VAlign _.RC:$src1,
11139                              (_.VT (_.BroadcastLdFrag addr:$src2)),
11140                              (i8 timm:$src3))>, EVEX_B,
11141                   Sched<[sched.Folded, sched.ReadAfterFold]>;
11142  }
11143}
11144
11145multiclass avx512_valign_common<string OpcodeStr, X86SchedWriteWidths sched,
11146                                AVX512VLVectorVTInfo _> {
11147  let Predicates = [HasAVX512] in {
11148    defm Z    : avx512_valign<0x03, OpcodeStr, sched.ZMM, _.info512>,
11149                                AVX512AIi8Base, EVEX_4V, EVEX_V512;
11150  }
11151  let Predicates = [HasAVX512, HasVLX] in {
11152    defm Z128 : avx512_valign<0x03, OpcodeStr, sched.XMM, _.info128>,
11153                                AVX512AIi8Base, EVEX_4V, EVEX_V128;
11154    // We can't really override the 256-bit version so change it back to unset.
11155    let EVEX2VEXOverride = ? in
11156    defm Z256 : avx512_valign<0x03, OpcodeStr, sched.YMM, _.info256>,
11157                                AVX512AIi8Base, EVEX_4V, EVEX_V256;
11158  }
11159}
11160
11161defm VALIGND: avx512_valign_common<"valignd", SchedWriteShuffle,
11162                                   avx512vl_i32_info>, EVEX_CD8<32, CD8VF>;
11163defm VALIGNQ: avx512_valign_common<"valignq", SchedWriteShuffle,
11164                                   avx512vl_i64_info>, EVEX_CD8<64, CD8VF>,
11165                                   VEX_W;
11166
11167defm VPALIGNR: avx512_common_3Op_rm_imm8<0x0F, X86PAlignr, "vpalignr",
11168                                         SchedWriteShuffle, avx512vl_i8_info,
11169                                         avx512vl_i8_info>, EVEX_CD8<8, CD8VF>;
11170
11171// Fragments to help convert valignq into masked valignd. Or valignq/valignd
11172// into vpalignr.
11173def ValignqImm32XForm : SDNodeXForm<timm, [{
11174  return getI8Imm(N->getZExtValue() * 2, SDLoc(N));
11175}]>;
11176def ValignqImm8XForm : SDNodeXForm<timm, [{
11177  return getI8Imm(N->getZExtValue() * 8, SDLoc(N));
11178}]>;
11179def ValigndImm8XForm : SDNodeXForm<timm, [{
11180  return getI8Imm(N->getZExtValue() * 4, SDLoc(N));
11181}]>;
11182
11183multiclass avx512_vpalign_mask_lowering<string OpcodeStr, SDNode OpNode,
11184                                        X86VectorVTInfo From, X86VectorVTInfo To,
11185                                        SDNodeXForm ImmXForm> {
11186  def : Pat<(To.VT (vselect_mask To.KRCWM:$mask,
11187                                 (bitconvert
11188                                  (From.VT (OpNode From.RC:$src1, From.RC:$src2,
11189                                                   timm:$src3))),
11190                                 To.RC:$src0)),
11191            (!cast<Instruction>(OpcodeStr#"rrik") To.RC:$src0, To.KRCWM:$mask,
11192                                                  To.RC:$src1, To.RC:$src2,
11193                                                  (ImmXForm timm:$src3))>;
11194
11195  def : Pat<(To.VT (vselect_mask To.KRCWM:$mask,
11196                                 (bitconvert
11197                                  (From.VT (OpNode From.RC:$src1, From.RC:$src2,
11198                                                   timm:$src3))),
11199                                 To.ImmAllZerosV)),
11200            (!cast<Instruction>(OpcodeStr#"rrikz") To.KRCWM:$mask,
11201                                                   To.RC:$src1, To.RC:$src2,
11202                                                   (ImmXForm timm:$src3))>;
11203
11204  def : Pat<(To.VT (vselect_mask To.KRCWM:$mask,
11205                                 (bitconvert
11206                                  (From.VT (OpNode From.RC:$src1,
11207                                                   (From.LdFrag addr:$src2),
11208                                           timm:$src3))),
11209                                 To.RC:$src0)),
11210            (!cast<Instruction>(OpcodeStr#"rmik") To.RC:$src0, To.KRCWM:$mask,
11211                                                  To.RC:$src1, addr:$src2,
11212                                                  (ImmXForm timm:$src3))>;
11213
11214  def : Pat<(To.VT (vselect_mask To.KRCWM:$mask,
11215                                 (bitconvert
11216                                  (From.VT (OpNode From.RC:$src1,
11217                                                   (From.LdFrag addr:$src2),
11218                                           timm:$src3))),
11219                                 To.ImmAllZerosV)),
11220            (!cast<Instruction>(OpcodeStr#"rmikz") To.KRCWM:$mask,
11221                                                   To.RC:$src1, addr:$src2,
11222                                                   (ImmXForm timm:$src3))>;
11223}
11224
11225multiclass avx512_vpalign_mask_lowering_mb<string OpcodeStr, SDNode OpNode,
11226                                           X86VectorVTInfo From,
11227                                           X86VectorVTInfo To,
11228                                           SDNodeXForm ImmXForm> :
11229      avx512_vpalign_mask_lowering<OpcodeStr, OpNode, From, To, ImmXForm> {
11230  def : Pat<(From.VT (OpNode From.RC:$src1,
11231                             (bitconvert (To.VT (To.BroadcastLdFrag addr:$src2))),
11232                             timm:$src3)),
11233            (!cast<Instruction>(OpcodeStr#"rmbi") To.RC:$src1, addr:$src2,
11234                                                  (ImmXForm timm:$src3))>;
11235
11236  def : Pat<(To.VT (vselect_mask To.KRCWM:$mask,
11237                                 (bitconvert
11238                                  (From.VT (OpNode From.RC:$src1,
11239                                           (bitconvert
11240                                            (To.VT (To.BroadcastLdFrag addr:$src2))),
11241                                           timm:$src3))),
11242                                 To.RC:$src0)),
11243            (!cast<Instruction>(OpcodeStr#"rmbik") To.RC:$src0, To.KRCWM:$mask,
11244                                                   To.RC:$src1, addr:$src2,
11245                                                   (ImmXForm timm:$src3))>;
11246
11247  def : Pat<(To.VT (vselect_mask To.KRCWM:$mask,
11248                                 (bitconvert
11249                                  (From.VT (OpNode From.RC:$src1,
11250                                           (bitconvert
11251                                            (To.VT (To.BroadcastLdFrag addr:$src2))),
11252                                           timm:$src3))),
11253                                 To.ImmAllZerosV)),
11254            (!cast<Instruction>(OpcodeStr#"rmbikz") To.KRCWM:$mask,
11255                                                    To.RC:$src1, addr:$src2,
11256                                                    (ImmXForm timm:$src3))>;
11257}
11258
11259let Predicates = [HasAVX512] in {
11260  // For 512-bit we lower to the widest element type we can. So we only need
11261  // to handle converting valignq to valignd.
11262  defm : avx512_vpalign_mask_lowering_mb<"VALIGNDZ", X86VAlign, v8i64_info,
11263                                         v16i32_info, ValignqImm32XForm>;
11264}
11265
11266let Predicates = [HasVLX] in {
11267  // For 128-bit we lower to the widest element type we can. So we only need
11268  // to handle converting valignq to valignd.
11269  defm : avx512_vpalign_mask_lowering_mb<"VALIGNDZ128", X86VAlign, v2i64x_info,
11270                                         v4i32x_info, ValignqImm32XForm>;
11271  // For 256-bit we lower to the widest element type we can. So we only need
11272  // to handle converting valignq to valignd.
11273  defm : avx512_vpalign_mask_lowering_mb<"VALIGNDZ256", X86VAlign, v4i64x_info,
11274                                         v8i32x_info, ValignqImm32XForm>;
11275}
11276
11277let Predicates = [HasVLX, HasBWI] in {
11278  // We can turn 128 and 256 bit VALIGND/VALIGNQ into VPALIGNR.
11279  defm : avx512_vpalign_mask_lowering<"VPALIGNRZ128", X86VAlign, v2i64x_info,
11280                                      v16i8x_info, ValignqImm8XForm>;
11281  defm : avx512_vpalign_mask_lowering<"VPALIGNRZ128", X86VAlign, v4i32x_info,
11282                                      v16i8x_info, ValigndImm8XForm>;
11283}
11284
11285defm VDBPSADBW: avx512_common_3Op_rm_imm8<0x42, X86dbpsadbw, "vdbpsadbw",
11286                SchedWritePSADBW, avx512vl_i16_info, avx512vl_i8_info>,
11287                EVEX_CD8<8, CD8VF>, NotEVEX2VEXConvertible;
11288
11289multiclass avx512_unary_rm<bits<8> opc, string OpcodeStr, SDNode OpNode,
11290                           X86FoldableSchedWrite sched, X86VectorVTInfo _> {
11291  let ExeDomain = _.ExeDomain in {
11292  defm rr : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
11293                    (ins _.RC:$src1), OpcodeStr,
11294                    "$src1", "$src1",
11295                    (_.VT (OpNode (_.VT _.RC:$src1)))>, EVEX, AVX5128IBase,
11296                    Sched<[sched]>;
11297
11298  defm rm : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
11299                  (ins _.MemOp:$src1), OpcodeStr,
11300                  "$src1", "$src1",
11301                  (_.VT (OpNode (_.VT (bitconvert (_.LdFrag addr:$src1)))))>,
11302            EVEX, AVX5128IBase, EVEX_CD8<_.EltSize, CD8VF>,
11303            Sched<[sched.Folded]>;
11304  }
11305}
11306
11307multiclass avx512_unary_rmb<bits<8> opc, string OpcodeStr, SDNode OpNode,
11308                            X86FoldableSchedWrite sched, X86VectorVTInfo _> :
11309           avx512_unary_rm<opc, OpcodeStr, OpNode, sched, _> {
11310  defm rmb : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
11311                  (ins _.ScalarMemOp:$src1), OpcodeStr,
11312                  "${src1}"#_.BroadcastStr,
11313                  "${src1}"#_.BroadcastStr,
11314                  (_.VT (OpNode (_.VT (_.BroadcastLdFrag addr:$src1))))>,
11315             EVEX, AVX5128IBase, EVEX_B, EVEX_CD8<_.EltSize, CD8VF>,
11316             Sched<[sched.Folded]>;
11317}
11318
11319multiclass avx512_unary_rm_vl<bits<8> opc, string OpcodeStr, SDNode OpNode,
11320                              X86SchedWriteWidths sched,
11321                              AVX512VLVectorVTInfo VTInfo, Predicate prd> {
11322  let Predicates = [prd] in
11323    defm Z : avx512_unary_rm<opc, OpcodeStr, OpNode, sched.ZMM, VTInfo.info512>,
11324                             EVEX_V512;
11325
11326  let Predicates = [prd, HasVLX] in {
11327    defm Z256 : avx512_unary_rm<opc, OpcodeStr, OpNode, sched.YMM, VTInfo.info256>,
11328                              EVEX_V256;
11329    defm Z128 : avx512_unary_rm<opc, OpcodeStr, OpNode, sched.XMM, VTInfo.info128>,
11330                              EVEX_V128;
11331  }
11332}
11333
11334multiclass avx512_unary_rmb_vl<bits<8> opc, string OpcodeStr, SDNode OpNode,
11335                               X86SchedWriteWidths sched, AVX512VLVectorVTInfo VTInfo,
11336                               Predicate prd> {
11337  let Predicates = [prd] in
11338    defm Z : avx512_unary_rmb<opc, OpcodeStr, OpNode, sched.ZMM, VTInfo.info512>,
11339                              EVEX_V512;
11340
11341  let Predicates = [prd, HasVLX] in {
11342    defm Z256 : avx512_unary_rmb<opc, OpcodeStr, OpNode, sched.YMM, VTInfo.info256>,
11343                                 EVEX_V256;
11344    defm Z128 : avx512_unary_rmb<opc, OpcodeStr, OpNode, sched.XMM, VTInfo.info128>,
11345                                 EVEX_V128;
11346  }
11347}
11348
11349multiclass avx512_unary_rm_vl_dq<bits<8> opc_d, bits<8> opc_q, string OpcodeStr,
11350                                 SDNode OpNode, X86SchedWriteWidths sched,
11351                                 Predicate prd> {
11352  defm Q : avx512_unary_rmb_vl<opc_q, OpcodeStr#"q", OpNode, sched,
11353                               avx512vl_i64_info, prd>, VEX_W;
11354  defm D : avx512_unary_rmb_vl<opc_d, OpcodeStr#"d", OpNode, sched,
11355                               avx512vl_i32_info, prd>;
11356}
11357
11358multiclass avx512_unary_rm_vl_bw<bits<8> opc_b, bits<8> opc_w, string OpcodeStr,
11359                                 SDNode OpNode, X86SchedWriteWidths sched,
11360                                 Predicate prd> {
11361  defm W : avx512_unary_rm_vl<opc_w, OpcodeStr#"w", OpNode, sched,
11362                              avx512vl_i16_info, prd>, VEX_WIG;
11363  defm B : avx512_unary_rm_vl<opc_b, OpcodeStr#"b", OpNode, sched,
11364                              avx512vl_i8_info, prd>, VEX_WIG;
11365}
11366
11367multiclass avx512_unary_rm_vl_all<bits<8> opc_b, bits<8> opc_w,
11368                                  bits<8> opc_d, bits<8> opc_q,
11369                                  string OpcodeStr, SDNode OpNode,
11370                                  X86SchedWriteWidths sched> {
11371  defm NAME : avx512_unary_rm_vl_dq<opc_d, opc_q, OpcodeStr, OpNode, sched,
11372                                    HasAVX512>,
11373              avx512_unary_rm_vl_bw<opc_b, opc_w, OpcodeStr, OpNode, sched,
11374                                    HasBWI>;
11375}
11376
11377defm VPABS : avx512_unary_rm_vl_all<0x1C, 0x1D, 0x1E, 0x1F, "vpabs", abs,
11378                                    SchedWriteVecALU>;
11379
11380// VPABS: Use 512bit version to implement 128/256 bit in case NoVLX.
11381let Predicates = [HasAVX512, NoVLX] in {
11382  def : Pat<(v4i64 (abs VR256X:$src)),
11383            (EXTRACT_SUBREG
11384                (VPABSQZrr
11385                    (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src, sub_ymm)),
11386             sub_ymm)>;
11387  def : Pat<(v2i64 (abs VR128X:$src)),
11388            (EXTRACT_SUBREG
11389                (VPABSQZrr
11390                    (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm)),
11391             sub_xmm)>;
11392}
11393
11394// Use 512bit version to implement 128/256 bit.
11395multiclass avx512_unary_lowering<string InstrStr, SDNode OpNode,
11396                                 AVX512VLVectorVTInfo _, Predicate prd> {
11397  let Predicates = [prd, NoVLX] in {
11398    def : Pat<(_.info256.VT (OpNode (_.info256.VT _.info256.RC:$src1))),
11399              (EXTRACT_SUBREG
11400                (!cast<Instruction>(InstrStr # "Zrr")
11401                  (INSERT_SUBREG(_.info512.VT(IMPLICIT_DEF)),
11402                                 _.info256.RC:$src1,
11403                                 _.info256.SubRegIdx)),
11404              _.info256.SubRegIdx)>;
11405
11406    def : Pat<(_.info128.VT (OpNode (_.info128.VT _.info128.RC:$src1))),
11407              (EXTRACT_SUBREG
11408                (!cast<Instruction>(InstrStr # "Zrr")
11409                  (INSERT_SUBREG(_.info512.VT(IMPLICIT_DEF)),
11410                                 _.info128.RC:$src1,
11411                                 _.info128.SubRegIdx)),
11412              _.info128.SubRegIdx)>;
11413  }
11414}
11415
11416defm VPLZCNT    : avx512_unary_rm_vl_dq<0x44, 0x44, "vplzcnt", ctlz,
11417                                        SchedWriteVecIMul, HasCDI>;
11418
11419// FIXME: Is there a better scheduler class for VPCONFLICT?
11420defm VPCONFLICT : avx512_unary_rm_vl_dq<0xC4, 0xC4, "vpconflict", X86Conflict,
11421                                        SchedWriteVecALU, HasCDI>;
11422
11423// VPLZCNT: Use 512bit version to implement 128/256 bit in case NoVLX.
11424defm : avx512_unary_lowering<"VPLZCNTQ", ctlz, avx512vl_i64_info, HasCDI>;
11425defm : avx512_unary_lowering<"VPLZCNTD", ctlz, avx512vl_i32_info, HasCDI>;
11426
11427//===---------------------------------------------------------------------===//
11428// Counts number of ones - VPOPCNTD and VPOPCNTQ
11429//===---------------------------------------------------------------------===//
11430
11431// FIXME: Is there a better scheduler class for VPOPCNTD/VPOPCNTQ?
11432defm VPOPCNT : avx512_unary_rm_vl_dq<0x55, 0x55, "vpopcnt", ctpop,
11433                                     SchedWriteVecALU, HasVPOPCNTDQ>;
11434
11435defm : avx512_unary_lowering<"VPOPCNTQ", ctpop, avx512vl_i64_info, HasVPOPCNTDQ>;
11436defm : avx512_unary_lowering<"VPOPCNTD", ctpop, avx512vl_i32_info, HasVPOPCNTDQ>;
11437
11438//===---------------------------------------------------------------------===//
11439// Replicate Single FP - MOVSHDUP and MOVSLDUP
11440//===---------------------------------------------------------------------===//
11441
11442multiclass avx512_replicate<bits<8> opc, string OpcodeStr, SDNode OpNode,
11443                            X86SchedWriteWidths sched> {
11444  defm NAME:       avx512_unary_rm_vl<opc, OpcodeStr, OpNode, sched,
11445                                      avx512vl_f32_info, HasAVX512>, XS;
11446}
11447
11448defm VMOVSHDUP : avx512_replicate<0x16, "vmovshdup", X86Movshdup,
11449                                  SchedWriteFShuffle>;
11450defm VMOVSLDUP : avx512_replicate<0x12, "vmovsldup", X86Movsldup,
11451                                  SchedWriteFShuffle>;
11452
11453//===----------------------------------------------------------------------===//
11454// AVX-512 - MOVDDUP
11455//===----------------------------------------------------------------------===//
11456
11457multiclass avx512_movddup_128<bits<8> opc, string OpcodeStr,
11458                              X86FoldableSchedWrite sched, X86VectorVTInfo _> {
11459  let ExeDomain = _.ExeDomain in {
11460  defm rr : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
11461                   (ins _.RC:$src), OpcodeStr, "$src", "$src",
11462                   (_.VT (X86VBroadcast (_.VT _.RC:$src)))>, EVEX,
11463                   Sched<[sched]>;
11464  defm rm : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
11465                 (ins _.ScalarMemOp:$src), OpcodeStr, "$src", "$src",
11466                 (_.VT (_.BroadcastLdFrag addr:$src))>,
11467                 EVEX, EVEX_CD8<_.EltSize, CD8VH>,
11468                 Sched<[sched.Folded]>;
11469  }
11470}
11471
11472multiclass avx512_movddup_common<bits<8> opc, string OpcodeStr,
11473                                 X86SchedWriteWidths sched, AVX512VLVectorVTInfo VTInfo> {
11474  defm Z : avx512_unary_rm<opc, OpcodeStr, X86Movddup, sched.ZMM,
11475                           VTInfo.info512>, EVEX_V512;
11476
11477  let Predicates = [HasAVX512, HasVLX] in {
11478    defm Z256 : avx512_unary_rm<opc, OpcodeStr, X86Movddup, sched.YMM,
11479                                VTInfo.info256>, EVEX_V256;
11480    defm Z128 : avx512_movddup_128<opc, OpcodeStr, sched.XMM,
11481                                   VTInfo.info128>, EVEX_V128;
11482  }
11483}
11484
11485multiclass avx512_movddup<bits<8> opc, string OpcodeStr,
11486                          X86SchedWriteWidths sched> {
11487  defm NAME:      avx512_movddup_common<opc, OpcodeStr, sched,
11488                                        avx512vl_f64_info>, XD, VEX_W;
11489}
11490
11491defm VMOVDDUP : avx512_movddup<0x12, "vmovddup", SchedWriteFShuffle>;
11492
11493let Predicates = [HasVLX] in {
11494def : Pat<(v2f64 (X86VBroadcast f64:$src)),
11495          (VMOVDDUPZ128rr (v2f64 (COPY_TO_REGCLASS FR64X:$src, VR128X)))>;
11496
11497def : Pat<(vselect_mask (v2i1 VK2WM:$mask), (v2f64 (X86VBroadcast f64:$src)),
11498                        (v2f64 VR128X:$src0)),
11499          (VMOVDDUPZ128rrk VR128X:$src0, VK2WM:$mask,
11500                           (v2f64 (COPY_TO_REGCLASS FR64X:$src, VR128X)))>;
11501def : Pat<(vselect_mask (v2i1 VK2WM:$mask), (v2f64 (X86VBroadcast f64:$src)),
11502                        immAllZerosV),
11503          (VMOVDDUPZ128rrkz VK2WM:$mask, (v2f64 (COPY_TO_REGCLASS FR64X:$src, VR128X)))>;
11504}
11505
11506//===----------------------------------------------------------------------===//
11507// AVX-512 - Unpack Instructions
11508//===----------------------------------------------------------------------===//
11509
11510let Uses = []<Register>, mayRaiseFPException = 0 in {
11511defm VUNPCKH : avx512_fp_binop_p<0x15, "vunpckh", X86Unpckh, X86Unpckh, HasAVX512,
11512                                 SchedWriteFShuffleSizes, 0, 1>;
11513defm VUNPCKL : avx512_fp_binop_p<0x14, "vunpckl", X86Unpckl, X86Unpckl, HasAVX512,
11514                                 SchedWriteFShuffleSizes>;
11515}
11516
11517defm VPUNPCKLBW : avx512_binop_rm_vl_b<0x60, "vpunpcklbw", X86Unpckl,
11518                                       SchedWriteShuffle, HasBWI>;
11519defm VPUNPCKHBW : avx512_binop_rm_vl_b<0x68, "vpunpckhbw", X86Unpckh,
11520                                       SchedWriteShuffle, HasBWI>;
11521defm VPUNPCKLWD : avx512_binop_rm_vl_w<0x61, "vpunpcklwd", X86Unpckl,
11522                                       SchedWriteShuffle, HasBWI>;
11523defm VPUNPCKHWD : avx512_binop_rm_vl_w<0x69, "vpunpckhwd", X86Unpckh,
11524                                       SchedWriteShuffle, HasBWI>;
11525
11526defm VPUNPCKLDQ : avx512_binop_rm_vl_d<0x62, "vpunpckldq", X86Unpckl,
11527                                       SchedWriteShuffle, HasAVX512>;
11528defm VPUNPCKHDQ : avx512_binop_rm_vl_d<0x6A, "vpunpckhdq", X86Unpckh,
11529                                       SchedWriteShuffle, HasAVX512>;
11530defm VPUNPCKLQDQ : avx512_binop_rm_vl_q<0x6C, "vpunpcklqdq", X86Unpckl,
11531                                        SchedWriteShuffle, HasAVX512>;
11532defm VPUNPCKHQDQ : avx512_binop_rm_vl_q<0x6D, "vpunpckhqdq", X86Unpckh,
11533                                        SchedWriteShuffle, HasAVX512>;
11534
11535//===----------------------------------------------------------------------===//
11536// AVX-512 - Extract & Insert Integer Instructions
11537//===----------------------------------------------------------------------===//
11538
11539multiclass avx512_extract_elt_bw_m<bits<8> opc, string OpcodeStr, SDNode OpNode,
11540                                                            X86VectorVTInfo _> {
11541  def mr : AVX512Ii8<opc, MRMDestMem, (outs),
11542              (ins _.ScalarMemOp:$dst, _.RC:$src1, u8imm:$src2),
11543              OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}",
11544              [(store (_.EltVT (trunc (OpNode (_.VT _.RC:$src1), timm:$src2))),
11545                       addr:$dst)]>,
11546              EVEX, EVEX_CD8<_.EltSize, CD8VT1>, Sched<[WriteVecExtractSt]>;
11547}
11548
11549multiclass avx512_extract_elt_b<string OpcodeStr, X86VectorVTInfo _> {
11550  let Predicates = [HasBWI] in {
11551    def rr : AVX512Ii8<0x14, MRMDestReg, (outs GR32orGR64:$dst),
11552                  (ins _.RC:$src1, u8imm:$src2),
11553                  OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}",
11554                  [(set GR32orGR64:$dst,
11555                        (X86pextrb (_.VT _.RC:$src1), timm:$src2))]>,
11556                  EVEX, TAPD, Sched<[WriteVecExtract]>;
11557
11558    defm NAME : avx512_extract_elt_bw_m<0x14, OpcodeStr, X86pextrb, _>, TAPD;
11559  }
11560}
11561
11562multiclass avx512_extract_elt_w<string OpcodeStr, X86VectorVTInfo _> {
11563  let Predicates = [HasBWI] in {
11564    def rr : AVX512Ii8<0xC5, MRMSrcReg, (outs GR32orGR64:$dst),
11565                  (ins _.RC:$src1, u8imm:$src2),
11566                  OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}",
11567                  [(set GR32orGR64:$dst,
11568                        (X86pextrw (_.VT _.RC:$src1), timm:$src2))]>,
11569                  EVEX, PD, Sched<[WriteVecExtract]>;
11570
11571    let hasSideEffects = 0, isCodeGenOnly = 1, ForceDisassemble = 1 in
11572    def rr_REV : AVX512Ii8<0x15, MRMDestReg, (outs GR32orGR64:$dst),
11573                   (ins _.RC:$src1, u8imm:$src2),
11574                   OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}", []>,
11575                   EVEX, TAPD, FoldGenData<NAME#rr>,
11576                   Sched<[WriteVecExtract]>;
11577
11578    defm NAME : avx512_extract_elt_bw_m<0x15, OpcodeStr, X86pextrw, _>, TAPD;
11579  }
11580}
11581
11582multiclass avx512_extract_elt_dq<string OpcodeStr, X86VectorVTInfo _,
11583                                                            RegisterClass GRC> {
11584  let Predicates = [HasDQI] in {
11585    def rr : AVX512Ii8<0x16, MRMDestReg, (outs GRC:$dst),
11586                  (ins _.RC:$src1, u8imm:$src2),
11587                  OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}",
11588                  [(set GRC:$dst,
11589                      (extractelt (_.VT _.RC:$src1), imm:$src2))]>,
11590                  EVEX, TAPD, Sched<[WriteVecExtract]>;
11591
11592    def mr : AVX512Ii8<0x16, MRMDestMem, (outs),
11593                (ins _.ScalarMemOp:$dst, _.RC:$src1, u8imm:$src2),
11594                OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}",
11595                [(store (extractelt (_.VT _.RC:$src1),
11596                                    imm:$src2),addr:$dst)]>,
11597                EVEX, EVEX_CD8<_.EltSize, CD8VT1>, TAPD,
11598                Sched<[WriteVecExtractSt]>;
11599  }
11600}
11601
11602defm VPEXTRBZ : avx512_extract_elt_b<"vpextrb", v16i8x_info>, VEX_WIG;
11603defm VPEXTRWZ : avx512_extract_elt_w<"vpextrw", v8i16x_info>, VEX_WIG;
11604defm VPEXTRDZ : avx512_extract_elt_dq<"vpextrd", v4i32x_info, GR32>;
11605defm VPEXTRQZ : avx512_extract_elt_dq<"vpextrq", v2i64x_info, GR64>, VEX_W;
11606
11607multiclass avx512_insert_elt_m<bits<8> opc, string OpcodeStr, SDNode OpNode,
11608                                            X86VectorVTInfo _, PatFrag LdFrag,
11609                                            SDPatternOperator immoperator> {
11610  def rm : AVX512Ii8<opc, MRMSrcMem, (outs _.RC:$dst),
11611      (ins _.RC:$src1,  _.ScalarMemOp:$src2, u8imm:$src3),
11612      OpcodeStr#"\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}",
11613      [(set _.RC:$dst,
11614          (_.VT (OpNode _.RC:$src1, (LdFrag addr:$src2), immoperator:$src3)))]>,
11615      EVEX_4V, EVEX_CD8<_.EltSize, CD8VT1>, Sched<[WriteVecInsert.Folded, WriteVecInsert.ReadAfterFold]>;
11616}
11617
11618multiclass avx512_insert_elt_bw<bits<8> opc, string OpcodeStr, SDNode OpNode,
11619                                            X86VectorVTInfo _, PatFrag LdFrag> {
11620  let Predicates = [HasBWI] in {
11621    def rr : AVX512Ii8<opc, MRMSrcReg, (outs _.RC:$dst),
11622        (ins _.RC:$src1, GR32orGR64:$src2, u8imm:$src3),
11623        OpcodeStr#"\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}",
11624        [(set _.RC:$dst,
11625            (OpNode _.RC:$src1, GR32orGR64:$src2, timm:$src3))]>, EVEX_4V,
11626        Sched<[WriteVecInsert]>;
11627
11628    defm NAME : avx512_insert_elt_m<opc, OpcodeStr, OpNode, _, LdFrag, timm>;
11629  }
11630}
11631
11632multiclass avx512_insert_elt_dq<bits<8> opc, string OpcodeStr,
11633                                         X86VectorVTInfo _, RegisterClass GRC> {
11634  let Predicates = [HasDQI] in {
11635    def rr : AVX512Ii8<opc, MRMSrcReg, (outs _.RC:$dst),
11636        (ins _.RC:$src1, GRC:$src2, u8imm:$src3),
11637        OpcodeStr#"\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}",
11638        [(set _.RC:$dst,
11639            (_.VT (insertelt _.RC:$src1, GRC:$src2, imm:$src3)))]>,
11640        EVEX_4V, TAPD, Sched<[WriteVecInsert]>;
11641
11642    defm NAME : avx512_insert_elt_m<opc, OpcodeStr, insertelt, _,
11643                                    _.ScalarLdFrag, imm>, TAPD;
11644  }
11645}
11646
11647defm VPINSRBZ : avx512_insert_elt_bw<0x20, "vpinsrb", X86pinsrb, v16i8x_info,
11648                                     extloadi8>, TAPD, VEX_WIG;
11649defm VPINSRWZ : avx512_insert_elt_bw<0xC4, "vpinsrw", X86pinsrw, v8i16x_info,
11650                                     extloadi16>, PD, VEX_WIG;
11651defm VPINSRDZ : avx512_insert_elt_dq<0x22, "vpinsrd", v4i32x_info, GR32>;
11652defm VPINSRQZ : avx512_insert_elt_dq<0x22, "vpinsrq", v2i64x_info, GR64>, VEX_W;
11653
11654//===----------------------------------------------------------------------===//
11655// VSHUFPS - VSHUFPD Operations
11656//===----------------------------------------------------------------------===//
11657
11658multiclass avx512_shufp<string OpcodeStr, AVX512VLVectorVTInfo VTInfo_FP>{
11659  defm NAME: avx512_common_3Op_imm8<OpcodeStr, VTInfo_FP, 0xC6, X86Shufp,
11660                                    SchedWriteFShuffle>,
11661                                    EVEX_CD8<VTInfo_FP.info512.EltSize, CD8VF>,
11662                                    AVX512AIi8Base, EVEX_4V;
11663}
11664
11665defm VSHUFPS: avx512_shufp<"vshufps", avx512vl_f32_info>, PS;
11666defm VSHUFPD: avx512_shufp<"vshufpd", avx512vl_f64_info>, PD, VEX_W;
11667
11668//===----------------------------------------------------------------------===//
11669// AVX-512 - Byte shift Left/Right
11670//===----------------------------------------------------------------------===//
11671
11672multiclass avx512_shift_packed<bits<8> opc, SDNode OpNode, Format MRMr,
11673                               Format MRMm, string OpcodeStr,
11674                               X86FoldableSchedWrite sched, X86VectorVTInfo _>{
11675  def ri : AVX512<opc, MRMr,
11676             (outs _.RC:$dst), (ins _.RC:$src1, u8imm:$src2),
11677             !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
11678             [(set _.RC:$dst,(_.VT (OpNode _.RC:$src1, (i8 timm:$src2))))]>,
11679             Sched<[sched]>;
11680  def mi : AVX512<opc, MRMm,
11681           (outs _.RC:$dst), (ins _.MemOp:$src1, u8imm:$src2),
11682           !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
11683           [(set _.RC:$dst,(_.VT (OpNode
11684                                 (_.VT (bitconvert (_.LdFrag addr:$src1))),
11685                                 (i8 timm:$src2))))]>,
11686           Sched<[sched.Folded, sched.ReadAfterFold]>;
11687}
11688
11689multiclass avx512_shift_packed_all<bits<8> opc, SDNode OpNode, Format MRMr,
11690                                   Format MRMm, string OpcodeStr,
11691                                   X86SchedWriteWidths sched, Predicate prd>{
11692  let Predicates = [prd] in
11693    defm Z : avx512_shift_packed<opc, OpNode, MRMr, MRMm, OpcodeStr,
11694                                 sched.ZMM, v64i8_info>, EVEX_V512;
11695  let Predicates = [prd, HasVLX] in {
11696    defm Z256 : avx512_shift_packed<opc, OpNode, MRMr, MRMm, OpcodeStr,
11697                                    sched.YMM, v32i8x_info>, EVEX_V256;
11698    defm Z128 : avx512_shift_packed<opc, OpNode, MRMr, MRMm, OpcodeStr,
11699                                    sched.XMM, v16i8x_info>, EVEX_V128;
11700  }
11701}
11702defm VPSLLDQ : avx512_shift_packed_all<0x73, X86vshldq, MRM7r, MRM7m, "vpslldq",
11703                                       SchedWriteShuffle, HasBWI>,
11704                                       AVX512PDIi8Base, EVEX_4V, VEX_WIG;
11705defm VPSRLDQ : avx512_shift_packed_all<0x73, X86vshrdq, MRM3r, MRM3m, "vpsrldq",
11706                                       SchedWriteShuffle, HasBWI>,
11707                                       AVX512PDIi8Base, EVEX_4V, VEX_WIG;
11708
11709multiclass avx512_psadbw_packed<bits<8> opc, SDNode OpNode,
11710                                string OpcodeStr, X86FoldableSchedWrite sched,
11711                                X86VectorVTInfo _dst, X86VectorVTInfo _src> {
11712  let isCommutable = 1 in
11713  def rr : AVX512BI<opc, MRMSrcReg,
11714             (outs _dst.RC:$dst), (ins _src.RC:$src1, _src.RC:$src2),
11715             !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
11716             [(set _dst.RC:$dst,(_dst.VT
11717                                (OpNode (_src.VT _src.RC:$src1),
11718                                        (_src.VT _src.RC:$src2))))]>,
11719             Sched<[sched]>;
11720  def rm : AVX512BI<opc, MRMSrcMem,
11721           (outs _dst.RC:$dst), (ins _src.RC:$src1, _src.MemOp:$src2),
11722           !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
11723           [(set _dst.RC:$dst,(_dst.VT
11724                              (OpNode (_src.VT _src.RC:$src1),
11725                              (_src.VT (bitconvert
11726                                        (_src.LdFrag addr:$src2))))))]>,
11727           Sched<[sched.Folded, sched.ReadAfterFold]>;
11728}
11729
11730multiclass avx512_psadbw_packed_all<bits<8> opc, SDNode OpNode,
11731                                    string OpcodeStr, X86SchedWriteWidths sched,
11732                                    Predicate prd> {
11733  let Predicates = [prd] in
11734    defm Z : avx512_psadbw_packed<opc, OpNode, OpcodeStr, sched.ZMM,
11735                                  v8i64_info, v64i8_info>, EVEX_V512;
11736  let Predicates = [prd, HasVLX] in {
11737    defm Z256 : avx512_psadbw_packed<opc, OpNode, OpcodeStr, sched.YMM,
11738                                     v4i64x_info, v32i8x_info>, EVEX_V256;
11739    defm Z128 : avx512_psadbw_packed<opc, OpNode, OpcodeStr, sched.XMM,
11740                                     v2i64x_info, v16i8x_info>, EVEX_V128;
11741  }
11742}
11743
11744defm VPSADBW : avx512_psadbw_packed_all<0xf6, X86psadbw, "vpsadbw",
11745                                        SchedWritePSADBW, HasBWI>, EVEX_4V, VEX_WIG;
11746
11747// Transforms to swizzle an immediate to enable better matching when
11748// memory operand isn't in the right place.
11749def VPTERNLOG321_imm8 : SDNodeXForm<timm, [{
11750  // Convert a VPTERNLOG immediate by swapping operand 0 and operand 2.
11751  uint8_t Imm = N->getZExtValue();
11752  // Swap bits 1/4 and 3/6.
11753  uint8_t NewImm = Imm & 0xa5;
11754  if (Imm & 0x02) NewImm |= 0x10;
11755  if (Imm & 0x10) NewImm |= 0x02;
11756  if (Imm & 0x08) NewImm |= 0x40;
11757  if (Imm & 0x40) NewImm |= 0x08;
11758  return getI8Imm(NewImm, SDLoc(N));
11759}]>;
11760def VPTERNLOG213_imm8 : SDNodeXForm<timm, [{
11761  // Convert a VPTERNLOG immediate by swapping operand 1 and operand 2.
11762  uint8_t Imm = N->getZExtValue();
11763  // Swap bits 2/4 and 3/5.
11764  uint8_t NewImm = Imm & 0xc3;
11765  if (Imm & 0x04) NewImm |= 0x10;
11766  if (Imm & 0x10) NewImm |= 0x04;
11767  if (Imm & 0x08) NewImm |= 0x20;
11768  if (Imm & 0x20) NewImm |= 0x08;
11769  return getI8Imm(NewImm, SDLoc(N));
11770}]>;
11771def VPTERNLOG132_imm8 : SDNodeXForm<timm, [{
11772  // Convert a VPTERNLOG immediate by swapping operand 1 and operand 2.
11773  uint8_t Imm = N->getZExtValue();
11774  // Swap bits 1/2 and 5/6.
11775  uint8_t NewImm = Imm & 0x99;
11776  if (Imm & 0x02) NewImm |= 0x04;
11777  if (Imm & 0x04) NewImm |= 0x02;
11778  if (Imm & 0x20) NewImm |= 0x40;
11779  if (Imm & 0x40) NewImm |= 0x20;
11780  return getI8Imm(NewImm, SDLoc(N));
11781}]>;
11782def VPTERNLOG231_imm8 : SDNodeXForm<timm, [{
11783  // Convert a VPTERNLOG immediate by moving operand 1 to the end.
11784  uint8_t Imm = N->getZExtValue();
11785  // Move bits 1->2, 2->4, 3->6, 4->1, 5->3, 6->5
11786  uint8_t NewImm = Imm & 0x81;
11787  if (Imm & 0x02) NewImm |= 0x04;
11788  if (Imm & 0x04) NewImm |= 0x10;
11789  if (Imm & 0x08) NewImm |= 0x40;
11790  if (Imm & 0x10) NewImm |= 0x02;
11791  if (Imm & 0x20) NewImm |= 0x08;
11792  if (Imm & 0x40) NewImm |= 0x20;
11793  return getI8Imm(NewImm, SDLoc(N));
11794}]>;
11795def VPTERNLOG312_imm8 : SDNodeXForm<timm, [{
11796  // Convert a VPTERNLOG immediate by moving operand 2 to the beginning.
11797  uint8_t Imm = N->getZExtValue();
11798  // Move bits 1->4, 2->1, 3->5, 4->2, 5->6, 6->3
11799  uint8_t NewImm = Imm & 0x81;
11800  if (Imm & 0x02) NewImm |= 0x10;
11801  if (Imm & 0x04) NewImm |= 0x02;
11802  if (Imm & 0x08) NewImm |= 0x20;
11803  if (Imm & 0x10) NewImm |= 0x04;
11804  if (Imm & 0x20) NewImm |= 0x40;
11805  if (Imm & 0x40) NewImm |= 0x08;
11806  return getI8Imm(NewImm, SDLoc(N));
11807}]>;
11808
11809multiclass avx512_ternlog<bits<8> opc, string OpcodeStr, SDNode OpNode,
11810                          X86FoldableSchedWrite sched, X86VectorVTInfo _,
11811                          string Name>{
11812  let Constraints = "$src1 = $dst", ExeDomain = _.ExeDomain in {
11813  defm rri : AVX512_maskable_3src<opc, MRMSrcReg, _, (outs _.RC:$dst),
11814                      (ins _.RC:$src2, _.RC:$src3, u8imm:$src4),
11815                      OpcodeStr, "$src4, $src3, $src2", "$src2, $src3, $src4",
11816                      (OpNode (_.VT _.RC:$src1),
11817                              (_.VT _.RC:$src2),
11818                              (_.VT _.RC:$src3),
11819                              (i8 timm:$src4)), 1, 1>,
11820                      AVX512AIi8Base, EVEX_4V, Sched<[sched]>;
11821  defm rmi : AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst),
11822                    (ins _.RC:$src2, _.MemOp:$src3, u8imm:$src4),
11823                    OpcodeStr, "$src4, $src3, $src2", "$src2, $src3, $src4",
11824                    (OpNode (_.VT _.RC:$src1),
11825                            (_.VT _.RC:$src2),
11826                            (_.VT (bitconvert (_.LdFrag addr:$src3))),
11827                            (i8 timm:$src4)), 1, 0>,
11828                    AVX512AIi8Base, EVEX_4V, EVEX_CD8<_.EltSize, CD8VF>,
11829                    Sched<[sched.Folded, sched.ReadAfterFold]>;
11830  defm rmbi : AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst),
11831                    (ins _.RC:$src2, _.ScalarMemOp:$src3, u8imm:$src4),
11832                    OpcodeStr, "$src4, ${src3}"#_.BroadcastStr#", $src2",
11833                    "$src2, ${src3}"#_.BroadcastStr#", $src4",
11834                    (OpNode (_.VT _.RC:$src1),
11835                            (_.VT _.RC:$src2),
11836                            (_.VT (_.BroadcastLdFrag addr:$src3)),
11837                            (i8 timm:$src4)), 1, 0>, EVEX_B,
11838                    AVX512AIi8Base, EVEX_4V, EVEX_CD8<_.EltSize, CD8VF>,
11839                    Sched<[sched.Folded, sched.ReadAfterFold]>;
11840  }// Constraints = "$src1 = $dst"
11841
11842  // Additional patterns for matching passthru operand in other positions.
11843  def : Pat<(_.VT (vselect_mask _.KRCWM:$mask,
11844                   (OpNode _.RC:$src3, _.RC:$src2, _.RC:$src1, (i8 timm:$src4)),
11845                   _.RC:$src1)),
11846            (!cast<Instruction>(Name#_.ZSuffix#rrik) _.RC:$src1, _.KRCWM:$mask,
11847             _.RC:$src2, _.RC:$src3, (VPTERNLOG321_imm8 timm:$src4))>;
11848  def : Pat<(_.VT (vselect_mask _.KRCWM:$mask,
11849                   (OpNode _.RC:$src2, _.RC:$src1, _.RC:$src3, (i8 timm:$src4)),
11850                   _.RC:$src1)),
11851            (!cast<Instruction>(Name#_.ZSuffix#rrik) _.RC:$src1, _.KRCWM:$mask,
11852             _.RC:$src2, _.RC:$src3, (VPTERNLOG213_imm8 timm:$src4))>;
11853
11854  // Additional patterns for matching zero masking with loads in other
11855  // positions.
11856  def : Pat<(_.VT (vselect_mask _.KRCWM:$mask,
11857                   (OpNode (bitconvert (_.LdFrag addr:$src3)),
11858                    _.RC:$src2, _.RC:$src1, (i8 timm:$src4)),
11859                   _.ImmAllZerosV)),
11860            (!cast<Instruction>(Name#_.ZSuffix#rmikz) _.RC:$src1, _.KRCWM:$mask,
11861             _.RC:$src2, addr:$src3, (VPTERNLOG321_imm8 timm:$src4))>;
11862  def : Pat<(_.VT (vselect_mask _.KRCWM:$mask,
11863                   (OpNode _.RC:$src1, (bitconvert (_.LdFrag addr:$src3)),
11864                    _.RC:$src2, (i8 timm:$src4)),
11865                   _.ImmAllZerosV)),
11866            (!cast<Instruction>(Name#_.ZSuffix#rmikz) _.RC:$src1, _.KRCWM:$mask,
11867             _.RC:$src2, addr:$src3, (VPTERNLOG132_imm8 timm:$src4))>;
11868
11869  // Additional patterns for matching masked loads with different
11870  // operand orders.
11871  def : Pat<(_.VT (vselect_mask _.KRCWM:$mask,
11872                   (OpNode _.RC:$src1, (bitconvert (_.LdFrag addr:$src3)),
11873                    _.RC:$src2, (i8 timm:$src4)),
11874                   _.RC:$src1)),
11875            (!cast<Instruction>(Name#_.ZSuffix#rmik) _.RC:$src1, _.KRCWM:$mask,
11876             _.RC:$src2, addr:$src3, (VPTERNLOG132_imm8 timm:$src4))>;
11877  def : Pat<(_.VT (vselect_mask _.KRCWM:$mask,
11878                   (OpNode (bitconvert (_.LdFrag addr:$src3)),
11879                    _.RC:$src2, _.RC:$src1, (i8 timm:$src4)),
11880                   _.RC:$src1)),
11881            (!cast<Instruction>(Name#_.ZSuffix#rmik) _.RC:$src1, _.KRCWM:$mask,
11882             _.RC:$src2, addr:$src3, (VPTERNLOG321_imm8 timm:$src4))>;
11883  def : Pat<(_.VT (vselect_mask _.KRCWM:$mask,
11884                   (OpNode _.RC:$src2, _.RC:$src1,
11885                    (bitconvert (_.LdFrag addr:$src3)), (i8 timm:$src4)),
11886                   _.RC:$src1)),
11887            (!cast<Instruction>(Name#_.ZSuffix#rmik) _.RC:$src1, _.KRCWM:$mask,
11888             _.RC:$src2, addr:$src3, (VPTERNLOG213_imm8 timm:$src4))>;
11889  def : Pat<(_.VT (vselect_mask _.KRCWM:$mask,
11890                   (OpNode _.RC:$src2, (bitconvert (_.LdFrag addr:$src3)),
11891                    _.RC:$src1, (i8 timm:$src4)),
11892                   _.RC:$src1)),
11893            (!cast<Instruction>(Name#_.ZSuffix#rmik) _.RC:$src1, _.KRCWM:$mask,
11894             _.RC:$src2, addr:$src3, (VPTERNLOG231_imm8 timm:$src4))>;
11895  def : Pat<(_.VT (vselect_mask _.KRCWM:$mask,
11896                   (OpNode (bitconvert (_.LdFrag addr:$src3)),
11897                    _.RC:$src1, _.RC:$src2, (i8 timm:$src4)),
11898                   _.RC:$src1)),
11899            (!cast<Instruction>(Name#_.ZSuffix#rmik) _.RC:$src1, _.KRCWM:$mask,
11900             _.RC:$src2, addr:$src3, (VPTERNLOG312_imm8 timm:$src4))>;
11901
11902  // Additional patterns for matching zero masking with broadcasts in other
11903  // positions.
11904  def : Pat<(_.VT (vselect_mask _.KRCWM:$mask,
11905                   (OpNode (_.BroadcastLdFrag addr:$src3),
11906                    _.RC:$src2, _.RC:$src1, (i8 timm:$src4)),
11907                   _.ImmAllZerosV)),
11908            (!cast<Instruction>(Name#_.ZSuffix#rmbikz) _.RC:$src1,
11909             _.KRCWM:$mask, _.RC:$src2, addr:$src3,
11910             (VPTERNLOG321_imm8 timm:$src4))>;
11911  def : Pat<(_.VT (vselect_mask _.KRCWM:$mask,
11912                   (OpNode _.RC:$src1,
11913                    (_.BroadcastLdFrag addr:$src3),
11914                    _.RC:$src2, (i8 timm:$src4)),
11915                   _.ImmAllZerosV)),
11916            (!cast<Instruction>(Name#_.ZSuffix#rmbikz) _.RC:$src1,
11917             _.KRCWM:$mask, _.RC:$src2, addr:$src3,
11918             (VPTERNLOG132_imm8 timm:$src4))>;
11919
11920  // Additional patterns for matching masked broadcasts with different
11921  // operand orders.
11922  def : Pat<(_.VT (vselect_mask _.KRCWM:$mask,
11923                   (OpNode _.RC:$src1, (_.BroadcastLdFrag addr:$src3),
11924                    _.RC:$src2, (i8 timm:$src4)),
11925                   _.RC:$src1)),
11926            (!cast<Instruction>(Name#_.ZSuffix#rmbik) _.RC:$src1, _.KRCWM:$mask,
11927             _.RC:$src2, addr:$src3, (VPTERNLOG132_imm8 timm:$src4))>;
11928  def : Pat<(_.VT (vselect_mask _.KRCWM:$mask,
11929                   (OpNode (_.BroadcastLdFrag addr:$src3),
11930                    _.RC:$src2, _.RC:$src1, (i8 timm:$src4)),
11931                   _.RC:$src1)),
11932            (!cast<Instruction>(Name#_.ZSuffix#rmbik) _.RC:$src1, _.KRCWM:$mask,
11933             _.RC:$src2, addr:$src3, (VPTERNLOG321_imm8 timm:$src4))>;
11934  def : Pat<(_.VT (vselect_mask _.KRCWM:$mask,
11935                   (OpNode _.RC:$src2, _.RC:$src1,
11936                    (_.BroadcastLdFrag addr:$src3),
11937                    (i8 timm:$src4)), _.RC:$src1)),
11938            (!cast<Instruction>(Name#_.ZSuffix#rmbik) _.RC:$src1, _.KRCWM:$mask,
11939             _.RC:$src2, addr:$src3, (VPTERNLOG213_imm8 timm:$src4))>;
11940  def : Pat<(_.VT (vselect_mask _.KRCWM:$mask,
11941                   (OpNode _.RC:$src2,
11942                    (_.BroadcastLdFrag addr:$src3),
11943                    _.RC:$src1, (i8 timm:$src4)),
11944                   _.RC:$src1)),
11945            (!cast<Instruction>(Name#_.ZSuffix#rmbik) _.RC:$src1, _.KRCWM:$mask,
11946             _.RC:$src2, addr:$src3, (VPTERNLOG231_imm8 timm:$src4))>;
11947  def : Pat<(_.VT (vselect_mask _.KRCWM:$mask,
11948                   (OpNode (_.BroadcastLdFrag addr:$src3),
11949                    _.RC:$src1, _.RC:$src2, (i8 timm:$src4)),
11950                   _.RC:$src1)),
11951            (!cast<Instruction>(Name#_.ZSuffix#rmbik) _.RC:$src1, _.KRCWM:$mask,
11952             _.RC:$src2, addr:$src3, (VPTERNLOG312_imm8 timm:$src4))>;
11953}
11954
11955multiclass avx512_common_ternlog<string OpcodeStr, X86SchedWriteWidths sched,
11956                                 AVX512VLVectorVTInfo _> {
11957  let Predicates = [HasAVX512] in
11958    defm Z    : avx512_ternlog<0x25, OpcodeStr, X86vpternlog, sched.ZMM,
11959                               _.info512, NAME>, EVEX_V512;
11960  let Predicates = [HasAVX512, HasVLX] in {
11961    defm Z128 : avx512_ternlog<0x25, OpcodeStr, X86vpternlog, sched.XMM,
11962                               _.info128, NAME>, EVEX_V128;
11963    defm Z256 : avx512_ternlog<0x25, OpcodeStr, X86vpternlog, sched.YMM,
11964                               _.info256, NAME>, EVEX_V256;
11965  }
11966}
11967
11968defm VPTERNLOGD : avx512_common_ternlog<"vpternlogd", SchedWriteVecALU,
11969                                        avx512vl_i32_info>;
11970defm VPTERNLOGQ : avx512_common_ternlog<"vpternlogq", SchedWriteVecALU,
11971                                        avx512vl_i64_info>, VEX_W;
11972
11973// Patterns to implement vnot using vpternlog instead of creating all ones
11974// using pcmpeq or vpternlog and then xoring with that. The value 15 is chosen
11975// so that the result is only dependent on src0. But we use the same source
11976// for all operands to prevent a false dependency.
11977// TODO: We should maybe have a more generalized algorithm for folding to
11978// vpternlog.
11979let Predicates = [HasAVX512] in {
11980  def : Pat<(v64i8 (vnot VR512:$src)),
11981            (VPTERNLOGQZrri VR512:$src, VR512:$src, VR512:$src, (i8 15))>;
11982  def : Pat<(v32i16 (vnot VR512:$src)),
11983            (VPTERNLOGQZrri VR512:$src, VR512:$src, VR512:$src, (i8 15))>;
11984  def : Pat<(v16i32 (vnot VR512:$src)),
11985            (VPTERNLOGQZrri VR512:$src, VR512:$src, VR512:$src, (i8 15))>;
11986  def : Pat<(v8i64 (vnot VR512:$src)),
11987            (VPTERNLOGQZrri VR512:$src, VR512:$src, VR512:$src, (i8 15))>;
11988}
11989
11990let Predicates = [HasAVX512, NoVLX] in {
11991  def : Pat<(v16i8 (vnot VR128X:$src)),
11992            (EXTRACT_SUBREG
11993             (VPTERNLOGQZrri
11994              (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
11995              (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
11996              (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
11997              (i8 15)), sub_xmm)>;
11998  def : Pat<(v8i16 (vnot VR128X:$src)),
11999            (EXTRACT_SUBREG
12000             (VPTERNLOGQZrri
12001              (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
12002              (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
12003              (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
12004              (i8 15)), sub_xmm)>;
12005  def : Pat<(v4i32 (vnot VR128X:$src)),
12006            (EXTRACT_SUBREG
12007             (VPTERNLOGQZrri
12008              (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
12009              (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
12010              (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
12011              (i8 15)), sub_xmm)>;
12012  def : Pat<(v2i64 (vnot VR128X:$src)),
12013            (EXTRACT_SUBREG
12014             (VPTERNLOGQZrri
12015              (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
12016              (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
12017              (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
12018              (i8 15)), sub_xmm)>;
12019
12020  def : Pat<(v32i8 (vnot VR256X:$src)),
12021            (EXTRACT_SUBREG
12022             (VPTERNLOGQZrri
12023              (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src, sub_ymm),
12024              (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src, sub_ymm),
12025              (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src, sub_ymm),
12026              (i8 15)), sub_ymm)>;
12027  def : Pat<(v16i16 (vnot VR256X:$src)),
12028            (EXTRACT_SUBREG
12029             (VPTERNLOGQZrri
12030              (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src, sub_ymm),
12031              (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src, sub_ymm),
12032              (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src, sub_ymm),
12033              (i8 15)), sub_ymm)>;
12034  def : Pat<(v8i32 (vnot VR256X:$src)),
12035            (EXTRACT_SUBREG
12036             (VPTERNLOGQZrri
12037              (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src, sub_ymm),
12038              (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src, sub_ymm),
12039              (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src, sub_ymm),
12040              (i8 15)), sub_ymm)>;
12041  def : Pat<(v4i64 (vnot VR256X:$src)),
12042            (EXTRACT_SUBREG
12043             (VPTERNLOGQZrri
12044              (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src, sub_ymm),
12045              (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src, sub_ymm),
12046              (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src, sub_ymm),
12047              (i8 15)), sub_ymm)>;
12048}
12049
12050let Predicates = [HasVLX] in {
12051  def : Pat<(v16i8 (vnot VR128X:$src)),
12052            (VPTERNLOGQZ128rri VR128X:$src, VR128X:$src, VR128X:$src, (i8 15))>;
12053  def : Pat<(v8i16 (vnot VR128X:$src)),
12054            (VPTERNLOGQZ128rri VR128X:$src, VR128X:$src, VR128X:$src, (i8 15))>;
12055  def : Pat<(v4i32 (vnot VR128X:$src)),
12056            (VPTERNLOGQZ128rri VR128X:$src, VR128X:$src, VR128X:$src, (i8 15))>;
12057  def : Pat<(v2i64 (vnot VR128X:$src)),
12058            (VPTERNLOGQZ128rri VR128X:$src, VR128X:$src, VR128X:$src, (i8 15))>;
12059
12060  def : Pat<(v32i8 (vnot VR256X:$src)),
12061            (VPTERNLOGQZ256rri VR256X:$src, VR256X:$src, VR256X:$src, (i8 15))>;
12062  def : Pat<(v16i16 (vnot VR256X:$src)),
12063            (VPTERNLOGQZ256rri VR256X:$src, VR256X:$src, VR256X:$src, (i8 15))>;
12064  def : Pat<(v8i32 (vnot VR256X:$src)),
12065            (VPTERNLOGQZ256rri VR256X:$src, VR256X:$src, VR256X:$src, (i8 15))>;
12066  def : Pat<(v4i64 (vnot VR256X:$src)),
12067            (VPTERNLOGQZ256rri VR256X:$src, VR256X:$src, VR256X:$src, (i8 15))>;
12068}
12069
12070//===----------------------------------------------------------------------===//
12071// AVX-512 - FixupImm
12072//===----------------------------------------------------------------------===//
12073
12074multiclass avx512_fixupimm_packed<bits<8> opc, string OpcodeStr,
12075                                  X86FoldableSchedWrite sched, X86VectorVTInfo _,
12076                                  X86VectorVTInfo TblVT>{
12077  let Constraints = "$src1 = $dst", ExeDomain = _.ExeDomain,
12078      Uses = [MXCSR], mayRaiseFPException = 1 in {
12079    defm rri : AVX512_maskable_3src<opc, MRMSrcReg, _, (outs _.RC:$dst),
12080                        (ins _.RC:$src2, _.RC:$src3, i32u8imm:$src4),
12081                         OpcodeStr#_.Suffix, "$src4, $src3, $src2", "$src2, $src3, $src4",
12082                        (X86VFixupimm (_.VT _.RC:$src1),
12083                                      (_.VT _.RC:$src2),
12084                                      (TblVT.VT _.RC:$src3),
12085                                      (i32 timm:$src4))>, Sched<[sched]>;
12086    defm rmi : AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst),
12087                      (ins _.RC:$src2, _.MemOp:$src3, i32u8imm:$src4),
12088                      OpcodeStr#_.Suffix, "$src4, $src3, $src2", "$src2, $src3, $src4",
12089                      (X86VFixupimm (_.VT _.RC:$src1),
12090                                    (_.VT _.RC:$src2),
12091                                    (TblVT.VT (bitconvert (TblVT.LdFrag addr:$src3))),
12092                                    (i32 timm:$src4))>,
12093                      Sched<[sched.Folded, sched.ReadAfterFold]>;
12094    defm rmbi : AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst),
12095                      (ins _.RC:$src2, _.ScalarMemOp:$src3, i32u8imm:$src4),
12096                    OpcodeStr#_.Suffix, "$src4, ${src3}"#_.BroadcastStr#", $src2",
12097                    "$src2, ${src3}"#_.BroadcastStr#", $src4",
12098                      (X86VFixupimm (_.VT _.RC:$src1),
12099                                    (_.VT _.RC:$src2),
12100                                    (TblVT.VT (TblVT.BroadcastLdFrag addr:$src3)),
12101                                    (i32 timm:$src4))>,
12102                    EVEX_B, Sched<[sched.Folded, sched.ReadAfterFold]>;
12103  } // Constraints = "$src1 = $dst"
12104}
12105
12106multiclass avx512_fixupimm_packed_sae<bits<8> opc, string OpcodeStr,
12107                                      X86FoldableSchedWrite sched,
12108                                      X86VectorVTInfo _, X86VectorVTInfo TblVT>
12109  : avx512_fixupimm_packed<opc, OpcodeStr, sched, _, TblVT> {
12110let Constraints = "$src1 = $dst", ExeDomain = _.ExeDomain, Uses = [MXCSR] in {
12111  defm rrib : AVX512_maskable_3src<opc, MRMSrcReg, _, (outs _.RC:$dst),
12112                      (ins _.RC:$src2, _.RC:$src3, i32u8imm:$src4),
12113                      OpcodeStr#_.Suffix, "$src4, {sae}, $src3, $src2",
12114                      "$src2, $src3, {sae}, $src4",
12115                      (X86VFixupimmSAE (_.VT _.RC:$src1),
12116                                       (_.VT _.RC:$src2),
12117                                       (TblVT.VT _.RC:$src3),
12118                                       (i32 timm:$src4))>,
12119                      EVEX_B, Sched<[sched]>;
12120  }
12121}
12122
12123multiclass avx512_fixupimm_scalar<bits<8> opc, string OpcodeStr,
12124                                  X86FoldableSchedWrite sched, X86VectorVTInfo _,
12125                                  X86VectorVTInfo _src3VT> {
12126  let Constraints = "$src1 = $dst" , Predicates = [HasAVX512],
12127      ExeDomain = _.ExeDomain in {
12128    defm rri : AVX512_maskable_3src_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
12129                      (ins _.RC:$src2, _.RC:$src3, i32u8imm:$src4),
12130                      OpcodeStr#_.Suffix, "$src4, $src3, $src2", "$src2, $src3, $src4",
12131                      (X86VFixupimms (_.VT _.RC:$src1),
12132                                     (_.VT _.RC:$src2),
12133                                     (_src3VT.VT _src3VT.RC:$src3),
12134                                     (i32 timm:$src4))>, Sched<[sched]>, SIMD_EXC;
12135    let Uses = [MXCSR] in
12136    defm rrib : AVX512_maskable_3src_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
12137                      (ins _.RC:$src2, _.RC:$src3, i32u8imm:$src4),
12138                      OpcodeStr#_.Suffix, "$src4, {sae}, $src3, $src2",
12139                      "$src2, $src3, {sae}, $src4",
12140                      (X86VFixupimmSAEs (_.VT _.RC:$src1),
12141                                        (_.VT _.RC:$src2),
12142                                        (_src3VT.VT _src3VT.RC:$src3),
12143                                        (i32 timm:$src4))>,
12144                      EVEX_B, Sched<[sched.Folded, sched.ReadAfterFold]>;
12145    defm rmi : AVX512_maskable_3src_scalar<opc, MRMSrcMem, _, (outs _.RC:$dst),
12146                     (ins _.RC:$src2, _.ScalarMemOp:$src3, i32u8imm:$src4),
12147                     OpcodeStr#_.Suffix, "$src4, $src3, $src2", "$src2, $src3, $src4",
12148                     (X86VFixupimms (_.VT _.RC:$src1),
12149                                    (_.VT _.RC:$src2),
12150                                    (_src3VT.VT (scalar_to_vector
12151                                              (_src3VT.ScalarLdFrag addr:$src3))),
12152                                    (i32 timm:$src4))>,
12153                     Sched<[sched.Folded, sched.ReadAfterFold]>, SIMD_EXC;
12154  }
12155}
12156
12157multiclass avx512_fixupimm_packed_all<X86SchedWriteWidths sched,
12158                                      AVX512VLVectorVTInfo _Vec,
12159                                      AVX512VLVectorVTInfo _Tbl> {
12160  let Predicates = [HasAVX512] in
12161    defm Z    : avx512_fixupimm_packed_sae<0x54, "vfixupimm", sched.ZMM,
12162                                _Vec.info512, _Tbl.info512>, AVX512AIi8Base,
12163                                EVEX_4V, EVEX_V512;
12164  let Predicates = [HasAVX512, HasVLX] in {
12165    defm Z128 : avx512_fixupimm_packed<0x54, "vfixupimm", sched.XMM,
12166                            _Vec.info128, _Tbl.info128>, AVX512AIi8Base,
12167                            EVEX_4V, EVEX_V128;
12168    defm Z256 : avx512_fixupimm_packed<0x54, "vfixupimm", sched.YMM,
12169                            _Vec.info256, _Tbl.info256>, AVX512AIi8Base,
12170                            EVEX_4V, EVEX_V256;
12171  }
12172}
12173
12174defm VFIXUPIMMSSZ : avx512_fixupimm_scalar<0x55, "vfixupimm",
12175                                           SchedWriteFAdd.Scl, f32x_info, v4i32x_info>,
12176                          AVX512AIi8Base, VEX_LIG, EVEX_4V, EVEX_CD8<32, CD8VT1>;
12177defm VFIXUPIMMSDZ : avx512_fixupimm_scalar<0x55, "vfixupimm",
12178                                           SchedWriteFAdd.Scl, f64x_info, v2i64x_info>,
12179                          AVX512AIi8Base, VEX_LIG, EVEX_4V, EVEX_CD8<64, CD8VT1>, VEX_W;
12180defm VFIXUPIMMPS : avx512_fixupimm_packed_all<SchedWriteFAdd, avx512vl_f32_info,
12181                         avx512vl_i32_info>, EVEX_CD8<32, CD8VF>;
12182defm VFIXUPIMMPD : avx512_fixupimm_packed_all<SchedWriteFAdd, avx512vl_f64_info,
12183                         avx512vl_i64_info>, EVEX_CD8<64, CD8VF>, VEX_W;
12184
12185// Patterns used to select SSE scalar fp arithmetic instructions from
12186// either:
12187//
12188// (1) a scalar fp operation followed by a blend
12189//
12190// The effect is that the backend no longer emits unnecessary vector
12191// insert instructions immediately after SSE scalar fp instructions
12192// like addss or mulss.
12193//
12194// For example, given the following code:
12195//   __m128 foo(__m128 A, __m128 B) {
12196//     A[0] += B[0];
12197//     return A;
12198//   }
12199//
12200// Previously we generated:
12201//   addss %xmm0, %xmm1
12202//   movss %xmm1, %xmm0
12203//
12204// We now generate:
12205//   addss %xmm1, %xmm0
12206//
12207// (2) a vector packed single/double fp operation followed by a vector insert
12208//
12209// The effect is that the backend converts the packed fp instruction
12210// followed by a vector insert into a single SSE scalar fp instruction.
12211//
12212// For example, given the following code:
12213//   __m128 foo(__m128 A, __m128 B) {
12214//     __m128 C = A + B;
12215//     return (__m128) {c[0], a[1], a[2], a[3]};
12216//   }
12217//
12218// Previously we generated:
12219//   addps %xmm0, %xmm1
12220//   movss %xmm1, %xmm0
12221//
12222// We now generate:
12223//   addss %xmm1, %xmm0
12224
12225// TODO: Some canonicalization in lowering would simplify the number of
12226// patterns we have to try to match.
12227multiclass AVX512_scalar_math_fp_patterns<SDPatternOperator Op, SDNode MaskedOp,
12228                                          string OpcPrefix, SDNode MoveNode,
12229                                          X86VectorVTInfo _, PatLeaf ZeroFP> {
12230  let Predicates = [HasAVX512] in {
12231    // extracted scalar math op with insert via movss
12232    def : Pat<(MoveNode
12233               (_.VT VR128X:$dst),
12234               (_.VT (scalar_to_vector
12235                      (Op (_.EltVT (extractelt (_.VT VR128X:$dst), (iPTR 0))),
12236                          _.FRC:$src)))),
12237              (!cast<Instruction>("V"#OpcPrefix#"Zrr_Int") _.VT:$dst,
12238               (_.VT (COPY_TO_REGCLASS _.FRC:$src, VR128X)))>;
12239    def : Pat<(MoveNode
12240               (_.VT VR128X:$dst),
12241               (_.VT (scalar_to_vector
12242                      (Op (_.EltVT (extractelt (_.VT VR128X:$dst), (iPTR 0))),
12243                          (_.ScalarLdFrag addr:$src))))),
12244              (!cast<Instruction>("V"#OpcPrefix#"Zrm_Int") _.VT:$dst, addr:$src)>;
12245
12246    // extracted masked scalar math op with insert via movss
12247    def : Pat<(MoveNode (_.VT VR128X:$src1),
12248               (scalar_to_vector
12249                (X86selects_mask VK1WM:$mask,
12250                            (MaskedOp (_.EltVT
12251                                       (extractelt (_.VT VR128X:$src1), (iPTR 0))),
12252                                      _.FRC:$src2),
12253                            _.FRC:$src0))),
12254              (!cast<Instruction>("V"#OpcPrefix#"Zrr_Intk")
12255               (_.VT (COPY_TO_REGCLASS _.FRC:$src0, VR128X)),
12256               VK1WM:$mask, _.VT:$src1,
12257               (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)))>;
12258    def : Pat<(MoveNode (_.VT VR128X:$src1),
12259               (scalar_to_vector
12260                (X86selects_mask VK1WM:$mask,
12261                            (MaskedOp (_.EltVT
12262                                       (extractelt (_.VT VR128X:$src1), (iPTR 0))),
12263                                      (_.ScalarLdFrag addr:$src2)),
12264                            _.FRC:$src0))),
12265              (!cast<Instruction>("V"#OpcPrefix#"Zrm_Intk")
12266               (_.VT (COPY_TO_REGCLASS _.FRC:$src0, VR128X)),
12267               VK1WM:$mask, _.VT:$src1, addr:$src2)>;
12268
12269    // extracted masked scalar math op with insert via movss
12270    def : Pat<(MoveNode (_.VT VR128X:$src1),
12271               (scalar_to_vector
12272                (X86selects_mask VK1WM:$mask,
12273                            (MaskedOp (_.EltVT
12274                                       (extractelt (_.VT VR128X:$src1), (iPTR 0))),
12275                                      _.FRC:$src2), (_.EltVT ZeroFP)))),
12276      (!cast<I>("V"#OpcPrefix#"Zrr_Intkz")
12277          VK1WM:$mask, _.VT:$src1,
12278          (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)))>;
12279    def : Pat<(MoveNode (_.VT VR128X:$src1),
12280               (scalar_to_vector
12281                (X86selects_mask VK1WM:$mask,
12282                            (MaskedOp (_.EltVT
12283                                       (extractelt (_.VT VR128X:$src1), (iPTR 0))),
12284                                      (_.ScalarLdFrag addr:$src2)), (_.EltVT ZeroFP)))),
12285      (!cast<I>("V"#OpcPrefix#"Zrm_Intkz") VK1WM:$mask, _.VT:$src1, addr:$src2)>;
12286  }
12287}
12288
12289defm : AVX512_scalar_math_fp_patterns<any_fadd, fadd, "ADDSS", X86Movss, v4f32x_info, fp32imm0>;
12290defm : AVX512_scalar_math_fp_patterns<any_fsub, fsub, "SUBSS", X86Movss, v4f32x_info, fp32imm0>;
12291defm : AVX512_scalar_math_fp_patterns<any_fmul, fmul, "MULSS", X86Movss, v4f32x_info, fp32imm0>;
12292defm : AVX512_scalar_math_fp_patterns<any_fdiv, fdiv, "DIVSS", X86Movss, v4f32x_info, fp32imm0>;
12293
12294defm : AVX512_scalar_math_fp_patterns<any_fadd, fadd, "ADDSD", X86Movsd, v2f64x_info, fp64imm0>;
12295defm : AVX512_scalar_math_fp_patterns<any_fsub, fsub, "SUBSD", X86Movsd, v2f64x_info, fp64imm0>;
12296defm : AVX512_scalar_math_fp_patterns<any_fmul, fmul, "MULSD", X86Movsd, v2f64x_info, fp64imm0>;
12297defm : AVX512_scalar_math_fp_patterns<any_fdiv, fdiv, "DIVSD", X86Movsd, v2f64x_info, fp64imm0>;
12298
12299defm : AVX512_scalar_math_fp_patterns<any_fadd, fadd, "ADDSH", X86Movsh, v8f16x_info, fp16imm0>;
12300defm : AVX512_scalar_math_fp_patterns<any_fsub, fsub, "SUBSH", X86Movsh, v8f16x_info, fp16imm0>;
12301defm : AVX512_scalar_math_fp_patterns<any_fmul, fmul, "MULSH", X86Movsh, v8f16x_info, fp16imm0>;
12302defm : AVX512_scalar_math_fp_patterns<any_fdiv, fdiv, "DIVSH", X86Movsh, v8f16x_info, fp16imm0>;
12303
12304multiclass AVX512_scalar_unary_math_patterns<SDPatternOperator OpNode, string OpcPrefix,
12305                                             SDNode Move, X86VectorVTInfo _> {
12306  let Predicates = [HasAVX512] in {
12307    def : Pat<(_.VT (Move _.VT:$dst,
12308                     (scalar_to_vector (OpNode (extractelt _.VT:$src, 0))))),
12309              (!cast<Instruction>("V"#OpcPrefix#"Zr_Int") _.VT:$dst, _.VT:$src)>;
12310  }
12311}
12312
12313defm : AVX512_scalar_unary_math_patterns<any_fsqrt, "SQRTSS", X86Movss, v4f32x_info>;
12314defm : AVX512_scalar_unary_math_patterns<any_fsqrt, "SQRTSD", X86Movsd, v2f64x_info>;
12315defm : AVX512_scalar_unary_math_patterns<any_fsqrt, "SQRTSH", X86Movsh, v8f16x_info>;
12316
12317//===----------------------------------------------------------------------===//
12318// AES instructions
12319//===----------------------------------------------------------------------===//
12320
12321multiclass avx512_vaes<bits<8> Op, string OpStr, string IntPrefix> {
12322  let Predicates = [HasVLX, HasVAES] in {
12323    defm Z128 : AESI_binop_rm_int<Op, OpStr,
12324                                  !cast<Intrinsic>(IntPrefix),
12325                                  loadv2i64, 0, VR128X, i128mem>,
12326                  EVEX_4V, EVEX_CD8<64, CD8VF>, EVEX_V128, VEX_WIG;
12327    defm Z256 : AESI_binop_rm_int<Op, OpStr,
12328                                  !cast<Intrinsic>(IntPrefix#"_256"),
12329                                  loadv4i64, 0, VR256X, i256mem>,
12330                  EVEX_4V, EVEX_CD8<64, CD8VF>, EVEX_V256, VEX_WIG;
12331    }
12332    let Predicates = [HasAVX512, HasVAES] in
12333    defm Z    : AESI_binop_rm_int<Op, OpStr,
12334                                  !cast<Intrinsic>(IntPrefix#"_512"),
12335                                  loadv8i64, 0, VR512, i512mem>,
12336                  EVEX_4V, EVEX_CD8<64, CD8VF>, EVEX_V512, VEX_WIG;
12337}
12338
12339defm VAESENC      : avx512_vaes<0xDC, "vaesenc", "int_x86_aesni_aesenc">;
12340defm VAESENCLAST  : avx512_vaes<0xDD, "vaesenclast", "int_x86_aesni_aesenclast">;
12341defm VAESDEC      : avx512_vaes<0xDE, "vaesdec", "int_x86_aesni_aesdec">;
12342defm VAESDECLAST  : avx512_vaes<0xDF, "vaesdeclast", "int_x86_aesni_aesdeclast">;
12343
12344//===----------------------------------------------------------------------===//
12345// PCLMUL instructions - Carry less multiplication
12346//===----------------------------------------------------------------------===//
12347
12348let Predicates = [HasAVX512, HasVPCLMULQDQ] in
12349defm VPCLMULQDQZ : vpclmulqdq<VR512, i512mem, loadv8i64, int_x86_pclmulqdq_512>,
12350                              EVEX_4V, EVEX_V512, EVEX_CD8<64, CD8VF>, VEX_WIG;
12351
12352let Predicates = [HasVLX, HasVPCLMULQDQ] in {
12353defm VPCLMULQDQZ128 : vpclmulqdq<VR128X, i128mem, loadv2i64, int_x86_pclmulqdq>,
12354                              EVEX_4V, EVEX_V128, EVEX_CD8<64, CD8VF>, VEX_WIG;
12355
12356defm VPCLMULQDQZ256: vpclmulqdq<VR256X, i256mem, loadv4i64,
12357                                int_x86_pclmulqdq_256>, EVEX_4V, EVEX_V256,
12358                                EVEX_CD8<64, CD8VF>, VEX_WIG;
12359}
12360
12361// Aliases
12362defm : vpclmulqdq_aliases<"VPCLMULQDQZ", VR512, i512mem>;
12363defm : vpclmulqdq_aliases<"VPCLMULQDQZ128", VR128X, i128mem>;
12364defm : vpclmulqdq_aliases<"VPCLMULQDQZ256", VR256X, i256mem>;
12365
12366//===----------------------------------------------------------------------===//
12367// VBMI2
12368//===----------------------------------------------------------------------===//
12369
12370multiclass VBMI2_shift_var_rm<bits<8> Op, string OpStr, SDNode OpNode,
12371                              X86FoldableSchedWrite sched, X86VectorVTInfo VTI> {
12372  let Constraints = "$src1 = $dst",
12373      ExeDomain   = VTI.ExeDomain in {
12374    defm r:   AVX512_maskable_3src<Op, MRMSrcReg, VTI, (outs VTI.RC:$dst),
12375                (ins VTI.RC:$src2, VTI.RC:$src3), OpStr,
12376                "$src3, $src2", "$src2, $src3",
12377                (VTI.VT (OpNode VTI.RC:$src1, VTI.RC:$src2, VTI.RC:$src3))>,
12378                T8PD, EVEX_4V, Sched<[sched]>;
12379    defm m:   AVX512_maskable_3src<Op, MRMSrcMem, VTI, (outs VTI.RC:$dst),
12380                (ins VTI.RC:$src2, VTI.MemOp:$src3), OpStr,
12381                "$src3, $src2", "$src2, $src3",
12382                (VTI.VT (OpNode VTI.RC:$src1, VTI.RC:$src2,
12383                        (VTI.VT (VTI.LdFrag addr:$src3))))>,
12384                T8PD, EVEX_4V,
12385                Sched<[sched.Folded, sched.ReadAfterFold]>;
12386  }
12387}
12388
12389multiclass VBMI2_shift_var_rmb<bits<8> Op, string OpStr, SDNode OpNode,
12390                               X86FoldableSchedWrite sched, X86VectorVTInfo VTI>
12391         : VBMI2_shift_var_rm<Op, OpStr, OpNode, sched, VTI> {
12392  let Constraints = "$src1 = $dst",
12393      ExeDomain   = VTI.ExeDomain in
12394  defm mb:  AVX512_maskable_3src<Op, MRMSrcMem, VTI, (outs VTI.RC:$dst),
12395              (ins VTI.RC:$src2, VTI.ScalarMemOp:$src3), OpStr,
12396              "${src3}"#VTI.BroadcastStr#", $src2",
12397              "$src2, ${src3}"#VTI.BroadcastStr,
12398              (OpNode VTI.RC:$src1, VTI.RC:$src2,
12399               (VTI.VT (VTI.BroadcastLdFrag addr:$src3)))>,
12400              T8PD, EVEX_4V, EVEX_B,
12401              Sched<[sched.Folded, sched.ReadAfterFold]>;
12402}
12403
12404multiclass VBMI2_shift_var_rm_common<bits<8> Op, string OpStr, SDNode OpNode,
12405                                     X86SchedWriteWidths sched, AVX512VLVectorVTInfo VTI> {
12406  let Predicates = [HasVBMI2] in
12407  defm Z      : VBMI2_shift_var_rm<Op, OpStr, OpNode, sched.ZMM, VTI.info512>,
12408                                   EVEX_V512;
12409  let Predicates = [HasVBMI2, HasVLX] in {
12410    defm Z256 : VBMI2_shift_var_rm<Op, OpStr, OpNode, sched.YMM, VTI.info256>,
12411                                   EVEX_V256;
12412    defm Z128 : VBMI2_shift_var_rm<Op, OpStr, OpNode, sched.XMM, VTI.info128>,
12413                                   EVEX_V128;
12414  }
12415}
12416
12417multiclass VBMI2_shift_var_rmb_common<bits<8> Op, string OpStr, SDNode OpNode,
12418                                      X86SchedWriteWidths sched, AVX512VLVectorVTInfo VTI> {
12419  let Predicates = [HasVBMI2] in
12420  defm Z      : VBMI2_shift_var_rmb<Op, OpStr, OpNode, sched.ZMM, VTI.info512>,
12421                                    EVEX_V512;
12422  let Predicates = [HasVBMI2, HasVLX] in {
12423    defm Z256 : VBMI2_shift_var_rmb<Op, OpStr, OpNode, sched.YMM, VTI.info256>,
12424                                    EVEX_V256;
12425    defm Z128 : VBMI2_shift_var_rmb<Op, OpStr, OpNode, sched.XMM, VTI.info128>,
12426                                    EVEX_V128;
12427  }
12428}
12429multiclass VBMI2_shift_var<bits<8> wOp, bits<8> dqOp, string Prefix,
12430                           SDNode OpNode, X86SchedWriteWidths sched> {
12431  defm W : VBMI2_shift_var_rm_common<wOp, Prefix#"w", OpNode, sched,
12432             avx512vl_i16_info>, VEX_W, EVEX_CD8<16, CD8VF>;
12433  defm D : VBMI2_shift_var_rmb_common<dqOp, Prefix#"d", OpNode, sched,
12434             avx512vl_i32_info>, EVEX_CD8<32, CD8VF>;
12435  defm Q : VBMI2_shift_var_rmb_common<dqOp, Prefix#"q", OpNode, sched,
12436             avx512vl_i64_info>, VEX_W, EVEX_CD8<64, CD8VF>;
12437}
12438
12439multiclass VBMI2_shift_imm<bits<8> wOp, bits<8> dqOp, string Prefix,
12440                           SDNode OpNode, X86SchedWriteWidths sched> {
12441  defm W : avx512_common_3Op_rm_imm8<wOp, OpNode, Prefix#"w", sched,
12442             avx512vl_i16_info, avx512vl_i16_info, HasVBMI2>,
12443             VEX_W, EVEX_CD8<16, CD8VF>;
12444  defm D : avx512_common_3Op_imm8<Prefix#"d", avx512vl_i32_info, dqOp,
12445             OpNode, sched, HasVBMI2>, AVX512AIi8Base, EVEX_4V, EVEX_CD8<32, CD8VF>;
12446  defm Q : avx512_common_3Op_imm8<Prefix#"q", avx512vl_i64_info, dqOp, OpNode,
12447             sched, HasVBMI2>, AVX512AIi8Base, EVEX_4V, EVEX_CD8<64, CD8VF>, VEX_W;
12448}
12449
12450// Concat & Shift
12451defm VPSHLDV : VBMI2_shift_var<0x70, 0x71, "vpshldv", X86VShldv, SchedWriteVecIMul>;
12452defm VPSHRDV : VBMI2_shift_var<0x72, 0x73, "vpshrdv", X86VShrdv, SchedWriteVecIMul>;
12453defm VPSHLD  : VBMI2_shift_imm<0x70, 0x71, "vpshld", X86VShld, SchedWriteVecIMul>;
12454defm VPSHRD  : VBMI2_shift_imm<0x72, 0x73, "vpshrd", X86VShrd, SchedWriteVecIMul>;
12455
12456// Compress
12457defm VPCOMPRESSB : compress_by_elt_width<0x63, "vpcompressb", WriteVarShuffle256,
12458                                         avx512vl_i8_info, HasVBMI2>, EVEX,
12459                                         NotMemoryFoldable;
12460defm VPCOMPRESSW : compress_by_elt_width <0x63, "vpcompressw", WriteVarShuffle256,
12461                                          avx512vl_i16_info, HasVBMI2>, EVEX, VEX_W,
12462                                          NotMemoryFoldable;
12463// Expand
12464defm VPEXPANDB : expand_by_elt_width <0x62, "vpexpandb", WriteVarShuffle256,
12465                                      avx512vl_i8_info, HasVBMI2>, EVEX;
12466defm VPEXPANDW : expand_by_elt_width <0x62, "vpexpandw", WriteVarShuffle256,
12467                                      avx512vl_i16_info, HasVBMI2>, EVEX, VEX_W;
12468
12469//===----------------------------------------------------------------------===//
12470// VNNI
12471//===----------------------------------------------------------------------===//
12472
12473let Constraints = "$src1 = $dst" in
12474multiclass VNNI_rmb<bits<8> Op, string OpStr, SDNode OpNode,
12475                    X86FoldableSchedWrite sched, X86VectorVTInfo VTI,
12476                    bit IsCommutable> {
12477  let ExeDomain = VTI.ExeDomain in {
12478  defm r  :   AVX512_maskable_3src<Op, MRMSrcReg, VTI, (outs VTI.RC:$dst),
12479                                   (ins VTI.RC:$src2, VTI.RC:$src3), OpStr,
12480                                   "$src3, $src2", "$src2, $src3",
12481                                   (VTI.VT (OpNode VTI.RC:$src1,
12482                                            VTI.RC:$src2, VTI.RC:$src3)),
12483                                   IsCommutable, IsCommutable>,
12484                                   EVEX_4V, T8PD, Sched<[sched]>;
12485  defm m  :   AVX512_maskable_3src<Op, MRMSrcMem, VTI, (outs VTI.RC:$dst),
12486                                   (ins VTI.RC:$src2, VTI.MemOp:$src3), OpStr,
12487                                   "$src3, $src2", "$src2, $src3",
12488                                   (VTI.VT (OpNode VTI.RC:$src1, VTI.RC:$src2,
12489                                            (VTI.VT (VTI.LdFrag addr:$src3))))>,
12490                                   EVEX_4V, EVEX_CD8<32, CD8VF>, T8PD,
12491                                   Sched<[sched.Folded, sched.ReadAfterFold]>;
12492  defm mb :   AVX512_maskable_3src<Op, MRMSrcMem, VTI, (outs VTI.RC:$dst),
12493                                   (ins VTI.RC:$src2, VTI.ScalarMemOp:$src3),
12494                                   OpStr, "${src3}"#VTI.BroadcastStr#", $src2",
12495                                   "$src2, ${src3}"#VTI.BroadcastStr,
12496                                   (OpNode VTI.RC:$src1, VTI.RC:$src2,
12497                                    (VTI.VT (VTI.BroadcastLdFrag addr:$src3)))>,
12498                                   EVEX_4V, EVEX_CD8<32, CD8VF>, EVEX_B,
12499                                   T8PD, Sched<[sched.Folded, sched.ReadAfterFold]>;
12500  }
12501}
12502
12503multiclass VNNI_common<bits<8> Op, string OpStr, SDNode OpNode,
12504                       X86SchedWriteWidths sched, bit IsCommutable> {
12505  let Predicates = [HasVNNI] in
12506  defm Z      :   VNNI_rmb<Op, OpStr, OpNode, sched.ZMM, v16i32_info,
12507                           IsCommutable>, EVEX_V512;
12508  let Predicates = [HasVNNI, HasVLX] in {
12509    defm Z256 :   VNNI_rmb<Op, OpStr, OpNode, sched.YMM, v8i32x_info,
12510                           IsCommutable>, EVEX_V256;
12511    defm Z128 :   VNNI_rmb<Op, OpStr, OpNode, sched.XMM, v4i32x_info,
12512                           IsCommutable>, EVEX_V128;
12513  }
12514}
12515
12516// FIXME: Is there a better scheduler class for VPDP?
12517defm VPDPBUSD   : VNNI_common<0x50, "vpdpbusd", X86Vpdpbusd, SchedWriteVecIMul, 0>;
12518defm VPDPBUSDS  : VNNI_common<0x51, "vpdpbusds", X86Vpdpbusds, SchedWriteVecIMul, 0>;
12519defm VPDPWSSD   : VNNI_common<0x52, "vpdpwssd", X86Vpdpwssd, SchedWriteVecIMul, 1>;
12520defm VPDPWSSDS  : VNNI_common<0x53, "vpdpwssds", X86Vpdpwssds, SchedWriteVecIMul, 1>;
12521
12522// Patterns to match VPDPWSSD from existing instructions/intrinsics.
12523let Predicates = [HasVNNI] in {
12524  def : Pat<(v16i32 (add VR512:$src1,
12525                         (X86vpmaddwd_su VR512:$src2, VR512:$src3))),
12526            (VPDPWSSDZr VR512:$src1, VR512:$src2, VR512:$src3)>;
12527  def : Pat<(v16i32 (add VR512:$src1,
12528                         (X86vpmaddwd_su VR512:$src2, (load addr:$src3)))),
12529            (VPDPWSSDZm VR512:$src1, VR512:$src2, addr:$src3)>;
12530}
12531let Predicates = [HasVNNI,HasVLX] in {
12532  def : Pat<(v8i32 (add VR256X:$src1,
12533                        (X86vpmaddwd_su VR256X:$src2, VR256X:$src3))),
12534            (VPDPWSSDZ256r VR256X:$src1, VR256X:$src2, VR256X:$src3)>;
12535  def : Pat<(v8i32 (add VR256X:$src1,
12536                        (X86vpmaddwd_su VR256X:$src2, (load addr:$src3)))),
12537            (VPDPWSSDZ256m VR256X:$src1, VR256X:$src2, addr:$src3)>;
12538  def : Pat<(v4i32 (add VR128X:$src1,
12539                        (X86vpmaddwd_su VR128X:$src2, VR128X:$src3))),
12540            (VPDPWSSDZ128r VR128X:$src1, VR128X:$src2, VR128X:$src3)>;
12541  def : Pat<(v4i32 (add VR128X:$src1,
12542                        (X86vpmaddwd_su VR128X:$src2, (load addr:$src3)))),
12543            (VPDPWSSDZ128m VR128X:$src1, VR128X:$src2, addr:$src3)>;
12544}
12545
12546//===----------------------------------------------------------------------===//
12547// Bit Algorithms
12548//===----------------------------------------------------------------------===//
12549
12550// FIXME: Is there a better scheduler class for VPOPCNTB/VPOPCNTW?
12551defm VPOPCNTB : avx512_unary_rm_vl<0x54, "vpopcntb", ctpop, SchedWriteVecALU,
12552                                   avx512vl_i8_info, HasBITALG>;
12553defm VPOPCNTW : avx512_unary_rm_vl<0x54, "vpopcntw", ctpop, SchedWriteVecALU,
12554                                   avx512vl_i16_info, HasBITALG>, VEX_W;
12555
12556defm : avx512_unary_lowering<"VPOPCNTB", ctpop, avx512vl_i8_info, HasBITALG>;
12557defm : avx512_unary_lowering<"VPOPCNTW", ctpop, avx512vl_i16_info, HasBITALG>;
12558
12559def X86Vpshufbitqmb_su : PatFrag<(ops node:$src1, node:$src2),
12560                                 (X86Vpshufbitqmb node:$src1, node:$src2), [{
12561  return N->hasOneUse();
12562}]>;
12563
12564multiclass VPSHUFBITQMB_rm<X86FoldableSchedWrite sched, X86VectorVTInfo VTI> {
12565  defm rr : AVX512_maskable_cmp<0x8F, MRMSrcReg, VTI, (outs VTI.KRC:$dst),
12566                                (ins VTI.RC:$src1, VTI.RC:$src2),
12567                                "vpshufbitqmb",
12568                                "$src2, $src1", "$src1, $src2",
12569                                (X86Vpshufbitqmb (VTI.VT VTI.RC:$src1),
12570                                (VTI.VT VTI.RC:$src2)),
12571                                (X86Vpshufbitqmb_su (VTI.VT VTI.RC:$src1),
12572                                (VTI.VT VTI.RC:$src2))>, EVEX_4V, T8PD,
12573                                Sched<[sched]>;
12574  defm rm : AVX512_maskable_cmp<0x8F, MRMSrcMem, VTI, (outs VTI.KRC:$dst),
12575                                (ins VTI.RC:$src1, VTI.MemOp:$src2),
12576                                "vpshufbitqmb",
12577                                "$src2, $src1", "$src1, $src2",
12578                                (X86Vpshufbitqmb (VTI.VT VTI.RC:$src1),
12579                                (VTI.VT (VTI.LdFrag addr:$src2))),
12580                                (X86Vpshufbitqmb_su (VTI.VT VTI.RC:$src1),
12581                                (VTI.VT (VTI.LdFrag addr:$src2)))>,
12582                                EVEX_4V, EVEX_CD8<8, CD8VF>, T8PD,
12583                                Sched<[sched.Folded, sched.ReadAfterFold]>;
12584}
12585
12586multiclass VPSHUFBITQMB_common<X86SchedWriteWidths sched, AVX512VLVectorVTInfo VTI> {
12587  let Predicates = [HasBITALG] in
12588  defm Z      : VPSHUFBITQMB_rm<sched.ZMM, VTI.info512>, EVEX_V512;
12589  let Predicates = [HasBITALG, HasVLX] in {
12590    defm Z256 : VPSHUFBITQMB_rm<sched.YMM, VTI.info256>, EVEX_V256;
12591    defm Z128 : VPSHUFBITQMB_rm<sched.XMM, VTI.info128>, EVEX_V128;
12592  }
12593}
12594
12595// FIXME: Is there a better scheduler class for VPSHUFBITQMB?
12596defm VPSHUFBITQMB : VPSHUFBITQMB_common<SchedWriteVecIMul, avx512vl_i8_info>;
12597
12598//===----------------------------------------------------------------------===//
12599// GFNI
12600//===----------------------------------------------------------------------===//
12601
12602multiclass GF2P8MULB_avx512_common<bits<8> Op, string OpStr, SDNode OpNode,
12603                                   X86SchedWriteWidths sched> {
12604  let Predicates = [HasGFNI, HasAVX512, HasBWI] in
12605  defm Z      : avx512_binop_rm<Op, OpStr, OpNode, v64i8_info, sched.ZMM, 1>,
12606                                EVEX_V512;
12607  let Predicates = [HasGFNI, HasVLX, HasBWI] in {
12608    defm Z256 : avx512_binop_rm<Op, OpStr, OpNode, v32i8x_info, sched.YMM, 1>,
12609                                EVEX_V256;
12610    defm Z128 : avx512_binop_rm<Op, OpStr, OpNode, v16i8x_info, sched.XMM, 1>,
12611                                EVEX_V128;
12612  }
12613}
12614
12615defm VGF2P8MULB : GF2P8MULB_avx512_common<0xCF, "vgf2p8mulb", X86GF2P8mulb,
12616                                          SchedWriteVecALU>,
12617                                          EVEX_CD8<8, CD8VF>, T8PD;
12618
12619multiclass GF2P8AFFINE_avx512_rmb_imm<bits<8> Op, string OpStr, SDNode OpNode,
12620                                      X86FoldableSchedWrite sched, X86VectorVTInfo VTI,
12621                                      X86VectorVTInfo BcstVTI>
12622           : avx512_3Op_rm_imm8<Op, OpStr, OpNode, sched, VTI, VTI> {
12623  let ExeDomain = VTI.ExeDomain in
12624  defm rmbi : AVX512_maskable<Op, MRMSrcMem, VTI, (outs VTI.RC:$dst),
12625                (ins VTI.RC:$src1, VTI.ScalarMemOp:$src2, u8imm:$src3),
12626                OpStr, "$src3, ${src2}"#BcstVTI.BroadcastStr#", $src1",
12627                "$src1, ${src2}"#BcstVTI.BroadcastStr#", $src3",
12628                (OpNode (VTI.VT VTI.RC:$src1),
12629                 (bitconvert (BcstVTI.VT (X86VBroadcastld64 addr:$src2))),
12630                 (i8 timm:$src3))>, EVEX_B,
12631                 Sched<[sched.Folded, sched.ReadAfterFold]>;
12632}
12633
12634multiclass GF2P8AFFINE_avx512_common<bits<8> Op, string OpStr, SDNode OpNode,
12635                                     X86SchedWriteWidths sched> {
12636  let Predicates = [HasGFNI, HasAVX512, HasBWI] in
12637  defm Z      : GF2P8AFFINE_avx512_rmb_imm<Op, OpStr, OpNode, sched.ZMM,
12638                                           v64i8_info, v8i64_info>, EVEX_V512;
12639  let Predicates = [HasGFNI, HasVLX, HasBWI] in {
12640    defm Z256 : GF2P8AFFINE_avx512_rmb_imm<Op, OpStr, OpNode, sched.YMM,
12641                                           v32i8x_info, v4i64x_info>, EVEX_V256;
12642    defm Z128 : GF2P8AFFINE_avx512_rmb_imm<Op, OpStr, OpNode, sched.XMM,
12643                                           v16i8x_info, v2i64x_info>, EVEX_V128;
12644  }
12645}
12646
12647defm VGF2P8AFFINEINVQB : GF2P8AFFINE_avx512_common<0xCF, "vgf2p8affineinvqb",
12648                         X86GF2P8affineinvqb, SchedWriteVecIMul>,
12649                         EVEX_4V, EVEX_CD8<8, CD8VF>, VEX_W, AVX512AIi8Base;
12650defm VGF2P8AFFINEQB    : GF2P8AFFINE_avx512_common<0xCE, "vgf2p8affineqb",
12651                         X86GF2P8affineqb, SchedWriteVecIMul>,
12652                         EVEX_4V, EVEX_CD8<8, CD8VF>, VEX_W, AVX512AIi8Base;
12653
12654
12655//===----------------------------------------------------------------------===//
12656// AVX5124FMAPS
12657//===----------------------------------------------------------------------===//
12658
12659let hasSideEffects = 0, mayLoad = 1, ExeDomain = SSEPackedSingle,
12660    Constraints = "$src1 = $dst", Uses = [MXCSR], mayRaiseFPException = 1 in {
12661defm V4FMADDPSrm : AVX512_maskable_3src_in_asm<0x9A, MRMSrcMem, v16f32_info,
12662                    (outs VR512:$dst), (ins VR512:$src2, f128mem:$src3),
12663                    "v4fmaddps", "$src3, $src2", "$src2, $src3",
12664                    []>, EVEX_V512, EVEX_4V, T8XD, EVEX_CD8<32, CD8VQ>,
12665                    Sched<[SchedWriteFMA.ZMM.Folded]>;
12666
12667defm V4FNMADDPSrm : AVX512_maskable_3src_in_asm<0xAA, MRMSrcMem, v16f32_info,
12668                     (outs VR512:$dst), (ins VR512:$src2, f128mem:$src3),
12669                     "v4fnmaddps", "$src3, $src2", "$src2, $src3",
12670                     []>, EVEX_V512, EVEX_4V, T8XD, EVEX_CD8<32, CD8VQ>,
12671                     Sched<[SchedWriteFMA.ZMM.Folded]>;
12672
12673defm V4FMADDSSrm : AVX512_maskable_3src_in_asm<0x9B, MRMSrcMem, f32x_info,
12674                    (outs VR128X:$dst), (ins  VR128X:$src2, f128mem:$src3),
12675                    "v4fmaddss", "$src3, $src2", "$src2, $src3",
12676                    []>, VEX_LIG, EVEX_4V, T8XD, EVEX_CD8<32, CD8VF>,
12677                    Sched<[SchedWriteFMA.Scl.Folded]>;
12678
12679defm V4FNMADDSSrm : AVX512_maskable_3src_in_asm<0xAB, MRMSrcMem, f32x_info,
12680                     (outs VR128X:$dst), (ins VR128X:$src2, f128mem:$src3),
12681                     "v4fnmaddss", "$src3, $src2", "$src2, $src3",
12682                     []>, VEX_LIG, EVEX_4V, T8XD, EVEX_CD8<32, CD8VF>,
12683                     Sched<[SchedWriteFMA.Scl.Folded]>;
12684}
12685
12686//===----------------------------------------------------------------------===//
12687// AVX5124VNNIW
12688//===----------------------------------------------------------------------===//
12689
12690let hasSideEffects = 0, mayLoad = 1, ExeDomain = SSEPackedInt,
12691    Constraints = "$src1 = $dst" in {
12692defm VP4DPWSSDrm : AVX512_maskable_3src_in_asm<0x52, MRMSrcMem, v16i32_info,
12693                    (outs VR512:$dst), (ins VR512:$src2, f128mem:$src3),
12694                     "vp4dpwssd", "$src3, $src2", "$src2, $src3",
12695                    []>, EVEX_V512, EVEX_4V, T8XD, EVEX_CD8<32, CD8VQ>,
12696                    Sched<[SchedWriteFMA.ZMM.Folded]>;
12697
12698defm VP4DPWSSDSrm : AVX512_maskable_3src_in_asm<0x53, MRMSrcMem, v16i32_info,
12699                     (outs VR512:$dst), (ins VR512:$src2, f128mem:$src3),
12700                     "vp4dpwssds", "$src3, $src2", "$src2, $src3",
12701                     []>, EVEX_V512, EVEX_4V, T8XD, EVEX_CD8<32, CD8VQ>,
12702                     Sched<[SchedWriteFMA.ZMM.Folded]>;
12703}
12704
12705let hasSideEffects = 0 in {
12706  let mayStore = 1, SchedRW = [WriteFStoreX] in
12707  def MASKPAIR16STORE : PseudoI<(outs), (ins anymem:$dst, VK16PAIR:$src), []>;
12708  let mayLoad = 1, SchedRW = [WriteFLoadX] in
12709  def MASKPAIR16LOAD : PseudoI<(outs VK16PAIR:$dst), (ins anymem:$src), []>;
12710}
12711
12712//===----------------------------------------------------------------------===//
12713// VP2INTERSECT
12714//===----------------------------------------------------------------------===//
12715
12716multiclass avx512_vp2intersect_modes<X86FoldableSchedWrite sched, X86VectorVTInfo _> {
12717  def rr : I<0x68, MRMSrcReg,
12718                  (outs _.KRPC:$dst),
12719                  (ins _.RC:$src1, _.RC:$src2),
12720                  !strconcat("vp2intersect", _.Suffix,
12721                             "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
12722                  [(set _.KRPC:$dst, (X86vp2intersect
12723                            _.RC:$src1, (_.VT _.RC:$src2)))]>,
12724                  EVEX_4V, T8XD, Sched<[sched]>;
12725
12726  def rm : I<0x68, MRMSrcMem,
12727                  (outs _.KRPC:$dst),
12728                  (ins  _.RC:$src1, _.MemOp:$src2),
12729                  !strconcat("vp2intersect", _.Suffix,
12730                             "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
12731                  [(set _.KRPC:$dst, (X86vp2intersect
12732                            _.RC:$src1, (_.VT (bitconvert (_.LdFrag addr:$src2)))))]>,
12733                  EVEX_4V, T8XD, EVEX_CD8<_.EltSize, CD8VF>,
12734                  Sched<[sched.Folded, sched.ReadAfterFold]>;
12735
12736  def rmb : I<0x68, MRMSrcMem,
12737                  (outs _.KRPC:$dst),
12738                  (ins _.RC:$src1, _.ScalarMemOp:$src2),
12739                  !strconcat("vp2intersect", _.Suffix, "\t{${src2}", _.BroadcastStr,
12740                             ", $src1, $dst|$dst, $src1, ${src2}", _.BroadcastStr ,"}"),
12741                  [(set _.KRPC:$dst, (X86vp2intersect
12742                             _.RC:$src1, (_.VT (_.BroadcastLdFrag addr:$src2))))]>,
12743                  EVEX_4V, T8XD, EVEX_B, EVEX_CD8<_.EltSize, CD8VF>,
12744                  Sched<[sched.Folded, sched.ReadAfterFold]>;
12745}
12746
12747multiclass avx512_vp2intersect<X86SchedWriteWidths sched, AVX512VLVectorVTInfo _> {
12748  let Predicates  = [HasAVX512, HasVP2INTERSECT] in
12749    defm Z : avx512_vp2intersect_modes<sched.ZMM, _.info512>, EVEX_V512;
12750
12751  let Predicates = [HasAVX512, HasVP2INTERSECT, HasVLX] in {
12752    defm Z256 : avx512_vp2intersect_modes<sched.YMM, _.info256>, EVEX_V256;
12753    defm Z128 : avx512_vp2intersect_modes<sched.XMM, _.info128>, EVEX_V128;
12754  }
12755}
12756
12757defm VP2INTERSECTD : avx512_vp2intersect<SchedWriteVecALU, avx512vl_i32_info>;
12758defm VP2INTERSECTQ : avx512_vp2intersect<SchedWriteVecALU, avx512vl_i64_info>, VEX_W;
12759
12760multiclass avx512_binop_all2<bits<8> opc, string OpcodeStr,
12761                             X86SchedWriteWidths sched,
12762                             AVX512VLVectorVTInfo _SrcVTInfo,
12763                             AVX512VLVectorVTInfo _DstVTInfo,
12764                             SDNode OpNode, Predicate prd,
12765                             bit IsCommutable = 0> {
12766  let Predicates = [prd] in
12767    defm NAME#Z : avx512_binop_rm2<opc, OpcodeStr, sched.ZMM, OpNode,
12768                                   _SrcVTInfo.info512, _DstVTInfo.info512,
12769                                   _SrcVTInfo.info512, IsCommutable>,
12770                                   EVEX_V512, EVEX_CD8<32, CD8VF>;
12771  let Predicates = [HasVLX, prd] in {
12772    defm NAME#Z256 : avx512_binop_rm2<opc, OpcodeStr, sched.YMM, OpNode,
12773                                      _SrcVTInfo.info256, _DstVTInfo.info256,
12774                                      _SrcVTInfo.info256, IsCommutable>,
12775                                     EVEX_V256, EVEX_CD8<32, CD8VF>;
12776    defm NAME#Z128 : avx512_binop_rm2<opc, OpcodeStr, sched.XMM, OpNode,
12777                                      _SrcVTInfo.info128, _DstVTInfo.info128,
12778                                      _SrcVTInfo.info128, IsCommutable>,
12779                                      EVEX_V128, EVEX_CD8<32, CD8VF>;
12780  }
12781}
12782
12783let ExeDomain = SSEPackedSingle in
12784defm VCVTNE2PS2BF16 : avx512_binop_all2<0x72, "vcvtne2ps2bf16",
12785                                        SchedWriteCvtPD2PS, //FIXME: Should be SchedWriteCvtPS2BF
12786                                        avx512vl_f32_info, avx512vl_i16_info,
12787                                        X86cvtne2ps2bf16, HasBF16, 0>, T8XD;
12788
12789// Truncate Float to BFloat16
12790multiclass avx512_cvtps2bf16<bits<8> opc, string OpcodeStr,
12791                             X86SchedWriteWidths sched> {
12792  let ExeDomain = SSEPackedSingle in {
12793  let Predicates = [HasBF16], Uses = []<Register>, mayRaiseFPException = 0 in {
12794    defm Z : avx512_vcvt_fp<opc, OpcodeStr, v16i16x_info, v16f32_info,
12795                            X86cvtneps2bf16, X86cvtneps2bf16, sched.ZMM>, EVEX_V512;
12796  }
12797  let Predicates = [HasBF16, HasVLX] in {
12798    let Uses = []<Register>, mayRaiseFPException = 0 in {
12799    defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v8i16x_info, v4f32x_info,
12800                               null_frag, null_frag, sched.XMM, "{1to4}", "{x}", f128mem,
12801                               VK4WM>, EVEX_V128;
12802    defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v8i16x_info, v8f32x_info,
12803                               X86cvtneps2bf16, X86cvtneps2bf16,
12804                               sched.YMM, "{1to8}", "{y}">, EVEX_V256;
12805    }
12806  } // Predicates = [HasBF16, HasVLX]
12807  } // ExeDomain = SSEPackedSingle
12808
12809  def : InstAlias<OpcodeStr#"x\t{$src, $dst|$dst, $src}",
12810                  (!cast<Instruction>(NAME # "Z128rr") VR128X:$dst,
12811                  VR128X:$src), 0>;
12812  def : InstAlias<OpcodeStr#"x\t{$src, $dst|$dst, $src}",
12813                  (!cast<Instruction>(NAME # "Z128rm") VR128X:$dst,
12814                  f128mem:$src), 0, "intel">;
12815  def : InstAlias<OpcodeStr#"y\t{$src, $dst|$dst, $src}",
12816                  (!cast<Instruction>(NAME # "Z256rr") VR128X:$dst,
12817                  VR256X:$src), 0>;
12818  def : InstAlias<OpcodeStr#"y\t{$src, $dst|$dst, $src}",
12819                  (!cast<Instruction>(NAME # "Z256rm") VR128X:$dst,
12820                  f256mem:$src), 0, "intel">;
12821}
12822
12823defm VCVTNEPS2BF16 : avx512_cvtps2bf16<0x72, "vcvtneps2bf16",
12824                                       SchedWriteCvtPD2PS>, T8XS,
12825                                       EVEX_CD8<32, CD8VF>;
12826
12827let Predicates = [HasBF16, HasVLX] in {
12828  // Special patterns to allow use of X86mcvtneps2bf16 for masking. Instruction
12829  // patterns have been disabled with null_frag.
12830  def : Pat<(v8i16 (X86cvtneps2bf16 (v4f32 VR128X:$src))),
12831            (VCVTNEPS2BF16Z128rr VR128X:$src)>;
12832  def : Pat<(X86mcvtneps2bf16 (v4f32 VR128X:$src), (v8i16 VR128X:$src0),
12833                              VK4WM:$mask),
12834            (VCVTNEPS2BF16Z128rrk VR128X:$src0, VK4WM:$mask, VR128X:$src)>;
12835  def : Pat<(X86mcvtneps2bf16 (v4f32 VR128X:$src), v8i16x_info.ImmAllZerosV,
12836                              VK4WM:$mask),
12837            (VCVTNEPS2BF16Z128rrkz VK4WM:$mask, VR128X:$src)>;
12838
12839  def : Pat<(v8i16 (X86cvtneps2bf16 (loadv4f32 addr:$src))),
12840            (VCVTNEPS2BF16Z128rm addr:$src)>;
12841  def : Pat<(X86mcvtneps2bf16 (loadv4f32 addr:$src), (v8i16 VR128X:$src0),
12842                              VK4WM:$mask),
12843            (VCVTNEPS2BF16Z128rmk VR128X:$src0, VK4WM:$mask, addr:$src)>;
12844  def : Pat<(X86mcvtneps2bf16 (loadv4f32 addr:$src), v8i16x_info.ImmAllZerosV,
12845                              VK4WM:$mask),
12846            (VCVTNEPS2BF16Z128rmkz VK4WM:$mask, addr:$src)>;
12847
12848  def : Pat<(v8i16 (X86cvtneps2bf16 (v4f32
12849                                     (X86VBroadcastld32 addr:$src)))),
12850            (VCVTNEPS2BF16Z128rmb addr:$src)>;
12851  def : Pat<(X86mcvtneps2bf16 (v4f32 (X86VBroadcastld32 addr:$src)),
12852                              (v8i16 VR128X:$src0), VK4WM:$mask),
12853            (VCVTNEPS2BF16Z128rmbk VR128X:$src0, VK4WM:$mask, addr:$src)>;
12854  def : Pat<(X86mcvtneps2bf16 (v4f32 (X86VBroadcastld32 addr:$src)),
12855                              v8i16x_info.ImmAllZerosV, VK4WM:$mask),
12856            (VCVTNEPS2BF16Z128rmbkz VK4WM:$mask, addr:$src)>;
12857}
12858
12859let Constraints = "$src1 = $dst" in {
12860multiclass avx512_dpbf16ps_rm<bits<8> opc, string OpcodeStr, SDNode OpNode,
12861                              X86FoldableSchedWrite sched,
12862                              X86VectorVTInfo _, X86VectorVTInfo src_v> {
12863  defm r: AVX512_maskable_3src<opc, MRMSrcReg, _, (outs _.RC:$dst),
12864                           (ins src_v.RC:$src2, src_v.RC:$src3),
12865                           OpcodeStr, "$src3, $src2", "$src2, $src3",
12866                           (_.VT (OpNode _.RC:$src1, src_v.RC:$src2, src_v.RC:$src3))>,
12867                           EVEX_4V, Sched<[sched]>;
12868
12869  defm m: AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst),
12870                               (ins src_v.RC:$src2, src_v.MemOp:$src3),
12871                               OpcodeStr, "$src3, $src2", "$src2, $src3",
12872                               (_.VT (OpNode _.RC:$src1, src_v.RC:$src2,
12873                               (src_v.LdFrag addr:$src3)))>, EVEX_4V,
12874                               Sched<[sched.Folded, sched.ReadAfterFold]>;
12875
12876  defm mb: AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst),
12877                  (ins src_v.RC:$src2, src_v.ScalarMemOp:$src3),
12878                  OpcodeStr,
12879                  !strconcat("${src3}", _.BroadcastStr,", $src2"),
12880                  !strconcat("$src2, ${src3}", _.BroadcastStr),
12881                  (_.VT (OpNode _.RC:$src1, src_v.RC:$src2,
12882                  (src_v.VT (src_v.BroadcastLdFrag addr:$src3))))>,
12883                  EVEX_B, EVEX_4V, Sched<[sched.Folded, sched.ReadAfterFold]>;
12884
12885}
12886} // Constraints = "$src1 = $dst"
12887
12888multiclass avx512_dpbf16ps_sizes<bits<8> opc, string OpcodeStr, SDNode OpNode,
12889                                 X86SchedWriteWidths sched, AVX512VLVectorVTInfo _,
12890                                 AVX512VLVectorVTInfo src_v, Predicate prd> {
12891  let Predicates = [prd] in {
12892    defm Z    : avx512_dpbf16ps_rm<opc, OpcodeStr, OpNode, sched.ZMM, _.info512,
12893                                   src_v.info512>, EVEX_V512;
12894  }
12895  let Predicates = [HasVLX, prd] in {
12896    defm Z256 : avx512_dpbf16ps_rm<opc, OpcodeStr, OpNode, sched.YMM, _.info256,
12897                                   src_v.info256>, EVEX_V256;
12898    defm Z128 : avx512_dpbf16ps_rm<opc, OpcodeStr, OpNode, sched.XMM, _.info128,
12899                                   src_v.info128>, EVEX_V128;
12900  }
12901}
12902
12903let ExeDomain = SSEPackedSingle in
12904defm VDPBF16PS : avx512_dpbf16ps_sizes<0x52, "vdpbf16ps", X86dpbf16ps, SchedWriteFMA,
12905                                       avx512vl_f32_info, avx512vl_i32_info,
12906                                       HasBF16>, T8XS, EVEX_CD8<32, CD8VF>;
12907
12908//===----------------------------------------------------------------------===//
12909// AVX512FP16
12910//===----------------------------------------------------------------------===//
12911
12912let Predicates = [HasFP16] in {
12913// Move word ( r/m16) to Packed word
12914def VMOVW2SHrr : AVX512<0x6E, MRMSrcReg, (outs VR128X:$dst), (ins GR32:$src),
12915                      "vmovw\t{$src, $dst|$dst, $src}", []>, T_MAP5PD, EVEX, Sched<[WriteVecMoveFromGpr]>;
12916def VMOVWrm : AVX512<0x6E, MRMSrcMem, (outs VR128X:$dst), (ins i16mem:$src),
12917                      "vmovw\t{$src, $dst|$dst, $src}",
12918                      [(set VR128X:$dst,
12919                        (v8i16 (scalar_to_vector (loadi16 addr:$src))))]>,
12920                      T_MAP5PD, EVEX, EVEX_CD8<16, CD8VT1>, Sched<[WriteFLoad]>;
12921
12922def : Pat<(f16 (bitconvert GR16:$src)),
12923          (f16 (COPY_TO_REGCLASS
12924                (VMOVW2SHrr
12925                 (INSERT_SUBREG (IMPLICIT_DEF), GR16:$src, sub_16bit)),
12926                FR16X))>;
12927def : Pat<(v8i16 (scalar_to_vector (i16 GR16:$src))),
12928          (VMOVW2SHrr (INSERT_SUBREG (IMPLICIT_DEF), GR16:$src, sub_16bit))>;
12929def : Pat<(v4i32 (X86vzmovl (scalar_to_vector (and GR32:$src, 0xffff)))),
12930          (VMOVW2SHrr GR32:$src)>;
12931// FIXME: We should really find a way to improve these patterns.
12932def : Pat<(v8i32 (X86vzmovl
12933                  (insert_subvector undef,
12934                                    (v4i32 (scalar_to_vector
12935                                            (and GR32:$src, 0xffff))),
12936                                    (iPTR 0)))),
12937          (SUBREG_TO_REG (i32 0), (VMOVW2SHrr GR32:$src), sub_xmm)>;
12938def : Pat<(v16i32 (X86vzmovl
12939                   (insert_subvector undef,
12940                                     (v4i32 (scalar_to_vector
12941                                             (and GR32:$src, 0xffff))),
12942                                     (iPTR 0)))),
12943          (SUBREG_TO_REG (i32 0), (VMOVW2SHrr GR32:$src), sub_xmm)>;
12944
12945def : Pat<(v8i16 (X86vzmovl (scalar_to_vector (i16 GR16:$src)))),
12946          (VMOVW2SHrr (INSERT_SUBREG (IMPLICIT_DEF), GR16:$src, sub_16bit))>;
12947
12948// AVX 128-bit movw instruction write zeros in the high 128-bit part.
12949def : Pat<(v8i16 (X86vzload16 addr:$src)),
12950          (VMOVWrm addr:$src)>;
12951def : Pat<(v16i16 (X86vzload16 addr:$src)),
12952          (SUBREG_TO_REG (i32 0), (v8i16 (VMOVWrm addr:$src)), sub_xmm)>;
12953
12954// Use regular 128-bit instructions to match 512-bit scalar_to_vec+zext.
12955def : Pat<(v32i16 (X86vzload16 addr:$src)),
12956          (SUBREG_TO_REG (i32 0), (v8i16 (VMOVWrm addr:$src)), sub_xmm)>;
12957
12958def : Pat<(v4i32 (scalar_to_vector (i32 (extloadi16 addr:$src)))),
12959          (VMOVWrm addr:$src)>;
12960def : Pat<(v4i32 (X86vzmovl (scalar_to_vector (i32 (zextloadi16 addr:$src))))),
12961          (VMOVWrm addr:$src)>;
12962def : Pat<(v8i32 (X86vzmovl
12963                  (insert_subvector undef,
12964                                    (v4i32 (scalar_to_vector
12965                                            (i32 (zextloadi16 addr:$src)))),
12966                                    (iPTR 0)))),
12967          (SUBREG_TO_REG (i32 0), (VMOVWrm addr:$src), sub_xmm)>;
12968def : Pat<(v16i32 (X86vzmovl
12969                   (insert_subvector undef,
12970                                     (v4i32 (scalar_to_vector
12971                                             (i32 (zextloadi16 addr:$src)))),
12972                                     (iPTR 0)))),
12973          (SUBREG_TO_REG (i32 0), (VMOVWrm addr:$src), sub_xmm)>;
12974
12975// Move word from xmm register to r/m16
12976def VMOVSH2Wrr  : AVX512<0x7E, MRMDestReg, (outs GR32:$dst), (ins VR128X:$src),
12977                       "vmovw\t{$src, $dst|$dst, $src}", []>, T_MAP5PD, EVEX, Sched<[WriteVecMoveToGpr]>;
12978def VMOVWmr  : AVX512<0x7E, MRMDestMem, (outs),
12979                       (ins i16mem:$dst, VR128X:$src),
12980                       "vmovw\t{$src, $dst|$dst, $src}",
12981                       [(store (i16 (extractelt (v8i16 VR128X:$src),
12982                                     (iPTR 0))), addr:$dst)]>,
12983                       T_MAP5PD, EVEX, EVEX_CD8<16, CD8VT1>, Sched<[WriteFStore]>;
12984
12985def : Pat<(i16 (bitconvert FR16X:$src)),
12986          (i16 (EXTRACT_SUBREG
12987                (VMOVSH2Wrr (COPY_TO_REGCLASS FR16X:$src, VR128X)),
12988                sub_16bit))>;
12989def : Pat<(i16 (extractelt (v8i16 VR128X:$src), (iPTR 0))),
12990          (i16 (EXTRACT_SUBREG (VMOVSH2Wrr VR128X:$src), sub_16bit))>;
12991}
12992
12993// Allow "vmovw" to use GR64
12994let hasSideEffects = 0 in {
12995  def VMOVW64toSHrr : AVX512<0x6E, MRMSrcReg, (outs VR128X:$dst), (ins GR64:$src),
12996                     "vmovw\t{$src, $dst|$dst, $src}", []>, T_MAP5PD, EVEX, VEX_W, Sched<[WriteVecMoveFromGpr]>;
12997  def VMOVSHtoW64rr : AVX512<0x7E, MRMDestReg, (outs GR64:$dst), (ins VR128X:$src),
12998                     "vmovw\t{$src, $dst|$dst, $src}", []>, T_MAP5PD, EVEX, VEX_W, Sched<[WriteVecMoveToGpr]>;
12999}
13000
13001// Convert 16-bit float to i16/u16
13002multiclass avx512_cvtph2w<bits<8> opc, string OpcodeStr, SDPatternOperator OpNode,
13003                          SDPatternOperator MaskOpNode, SDNode OpNodeRnd,
13004                          AVX512VLVectorVTInfo _Dst,
13005                          AVX512VLVectorVTInfo _Src,
13006                          X86SchedWriteWidths sched> {
13007  let Predicates = [HasFP16] in {
13008    defm Z : avx512_vcvt_fp<opc, OpcodeStr, _Dst.info512, _Src.info512,
13009                            OpNode, MaskOpNode, sched.ZMM>,
13010             avx512_vcvt_fp_rc<opc, OpcodeStr, _Dst.info512, _Src.info512,
13011                               OpNodeRnd, sched.ZMM>, EVEX_V512;
13012  }
13013  let Predicates = [HasFP16, HasVLX] in {
13014    defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, _Dst.info128, _Src.info128,
13015                               OpNode, MaskOpNode, sched.XMM>, EVEX_V128;
13016    defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, _Dst.info256, _Src.info256,
13017                               OpNode, MaskOpNode, sched.YMM>, EVEX_V256;
13018  }
13019}
13020
13021// Convert 16-bit float to i16/u16 truncate
13022multiclass avx512_cvttph2w<bits<8> opc, string OpcodeStr, SDPatternOperator OpNode,
13023                           SDPatternOperator MaskOpNode, SDNode OpNodeRnd,
13024                           AVX512VLVectorVTInfo _Dst, AVX512VLVectorVTInfo _Src,
13025                           X86SchedWriteWidths sched> {
13026  let Predicates = [HasFP16] in {
13027    defm Z : avx512_vcvt_fp<opc, OpcodeStr, _Dst.info512, _Src.info512,
13028                            OpNode, MaskOpNode, sched.ZMM>,
13029             avx512_vcvt_fp_sae<opc, OpcodeStr, _Dst.info512, _Src.info512,
13030                               OpNodeRnd, sched.ZMM>, EVEX_V512;
13031  }
13032  let Predicates = [HasFP16, HasVLX] in {
13033    defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, _Dst.info128, _Src.info128,
13034                               OpNode, MaskOpNode, sched.XMM>, EVEX_V128;
13035    defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, _Dst.info256, _Src.info256,
13036                               OpNode, MaskOpNode, sched.YMM>, EVEX_V256;
13037  }
13038}
13039
13040defm VCVTPH2UW : avx512_cvtph2w<0x7D, "vcvtph2uw", X86cvtp2UInt, X86cvtp2UInt,
13041                                X86cvtp2UIntRnd, avx512vl_i16_info,
13042                                avx512vl_f16_info, SchedWriteCvtPD2DQ>,
13043                                T_MAP5PS, EVEX_CD8<16, CD8VF>;
13044defm VCVTUW2PH : avx512_cvtph2w<0x7D, "vcvtuw2ph", any_uint_to_fp, uint_to_fp,
13045                                X86VUintToFpRnd, avx512vl_f16_info,
13046                                avx512vl_i16_info, SchedWriteCvtPD2DQ>,
13047                                T_MAP5XD, EVEX_CD8<16, CD8VF>;
13048defm VCVTTPH2W : avx512_cvttph2w<0x7C, "vcvttph2w", X86any_cvttp2si,
13049                                X86cvttp2si, X86cvttp2siSAE,
13050                                avx512vl_i16_info, avx512vl_f16_info,
13051                                SchedWriteCvtPD2DQ>, T_MAP5PD, EVEX_CD8<16, CD8VF>;
13052defm VCVTTPH2UW : avx512_cvttph2w<0x7C, "vcvttph2uw", X86any_cvttp2ui,
13053                                X86cvttp2ui, X86cvttp2uiSAE,
13054                                avx512vl_i16_info, avx512vl_f16_info,
13055                                SchedWriteCvtPD2DQ>, T_MAP5PS, EVEX_CD8<16, CD8VF>;
13056defm VCVTPH2W : avx512_cvtph2w<0x7D, "vcvtph2w", X86cvtp2Int, X86cvtp2Int,
13057                                X86cvtp2IntRnd, avx512vl_i16_info,
13058                                avx512vl_f16_info, SchedWriteCvtPD2DQ>,
13059                                T_MAP5PD, EVEX_CD8<16, CD8VF>;
13060defm VCVTW2PH : avx512_cvtph2w<0x7D, "vcvtw2ph", any_sint_to_fp, sint_to_fp,
13061                                X86VSintToFpRnd, avx512vl_f16_info,
13062                                avx512vl_i16_info, SchedWriteCvtPD2DQ>,
13063                                T_MAP5XS, EVEX_CD8<16, CD8VF>;
13064
13065// Convert Half to Signed/Unsigned Doubleword
13066multiclass avx512_cvtph2dq<bits<8> opc, string OpcodeStr, SDPatternOperator OpNode,
13067                           SDPatternOperator MaskOpNode, SDNode OpNodeRnd,
13068                           X86SchedWriteWidths sched> {
13069  let Predicates = [HasFP16] in {
13070    defm Z : avx512_vcvt_fp<opc, OpcodeStr, v16i32_info, v16f16x_info, OpNode,
13071                            MaskOpNode, sched.ZMM>,
13072             avx512_vcvt_fp_rc<opc, OpcodeStr, v16i32_info, v16f16x_info,
13073                                OpNodeRnd, sched.ZMM>, EVEX_V512;
13074  }
13075  let Predicates = [HasFP16, HasVLX] in {
13076    defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v4i32x_info, v8f16x_info, OpNode,
13077                               MaskOpNode, sched.XMM, "{1to4}", "", f64mem>, EVEX_V128;
13078    defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v8i32x_info, v8f16x_info, OpNode,
13079                               MaskOpNode, sched.YMM>, EVEX_V256;
13080  }
13081}
13082
13083// Convert Half to Signed/Unsigned Doubleword with truncation
13084multiclass avx512_cvttph2dq<bits<8> opc, string OpcodeStr, SDPatternOperator OpNode,
13085                            SDPatternOperator MaskOpNode, SDNode OpNodeRnd,
13086                            X86SchedWriteWidths sched> {
13087  let Predicates = [HasFP16] in {
13088    defm Z : avx512_vcvt_fp<opc, OpcodeStr, v16i32_info, v16f16x_info, OpNode,
13089                            MaskOpNode, sched.ZMM>,
13090             avx512_vcvt_fp_sae<opc, OpcodeStr, v16i32_info, v16f16x_info,
13091                                OpNodeRnd, sched.ZMM>, EVEX_V512;
13092  }
13093  let Predicates = [HasFP16, HasVLX] in {
13094    defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v4i32x_info, v8f16x_info, OpNode,
13095                               MaskOpNode, sched.XMM, "{1to4}", "", f64mem>, EVEX_V128;
13096    defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v8i32x_info, v8f16x_info, OpNode,
13097                               MaskOpNode, sched.YMM>, EVEX_V256;
13098  }
13099}
13100
13101
13102defm VCVTPH2DQ : avx512_cvtph2dq<0x5B, "vcvtph2dq", X86cvtp2Int, X86cvtp2Int,
13103                                 X86cvtp2IntRnd, SchedWriteCvtPS2DQ>, T_MAP5PD,
13104                                 EVEX_CD8<16, CD8VH>;
13105defm VCVTPH2UDQ : avx512_cvtph2dq<0x79, "vcvtph2udq", X86cvtp2UInt, X86cvtp2UInt,
13106                                 X86cvtp2UIntRnd, SchedWriteCvtPS2DQ>, T_MAP5PS,
13107                                 EVEX_CD8<16, CD8VH>;
13108
13109defm VCVTTPH2DQ : avx512_cvttph2dq<0x5B, "vcvttph2dq", X86any_cvttp2si,
13110                                X86cvttp2si, X86cvttp2siSAE,
13111                                SchedWriteCvtPS2DQ>, T_MAP5XS,
13112                                EVEX_CD8<16, CD8VH>;
13113
13114defm VCVTTPH2UDQ : avx512_cvttph2dq<0x78, "vcvttph2udq", X86any_cvttp2ui,
13115                                 X86cvttp2ui, X86cvttp2uiSAE,
13116                                 SchedWriteCvtPS2DQ>, T_MAP5PS,
13117                                 EVEX_CD8<16, CD8VH>;
13118
13119// Convert Half to Signed/Unsigned Quardword
13120multiclass avx512_cvtph2qq<bits<8> opc, string OpcodeStr, SDPatternOperator OpNode,
13121                           SDPatternOperator MaskOpNode, SDNode OpNodeRnd,
13122                           X86SchedWriteWidths sched> {
13123  let Predicates = [HasFP16] in {
13124    defm Z : avx512_vcvt_fp<opc, OpcodeStr, v8i64_info, v8f16x_info, OpNode,
13125                            MaskOpNode, sched.ZMM>,
13126             avx512_vcvt_fp_rc<opc, OpcodeStr, v8i64_info, v8f16x_info,
13127                               OpNodeRnd, sched.ZMM>, EVEX_V512;
13128  }
13129  let Predicates = [HasFP16, HasVLX] in {
13130    // Explicitly specified broadcast string, since we take only 2 elements
13131    // from v8f16x_info source
13132    defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v2i64x_info, v8f16x_info, OpNode,
13133                               MaskOpNode, sched.XMM, "{1to2}", "", f32mem>,
13134                               EVEX_V128;
13135    // Explicitly specified broadcast string, since we take only 4 elements
13136    // from v8f16x_info source
13137    defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v4i64x_info, v8f16x_info, OpNode,
13138                               MaskOpNode, sched.YMM, "{1to4}", "", f64mem>,
13139                               EVEX_V256;
13140  }
13141}
13142
13143// Convert Half to Signed/Unsigned Quardword with truncation
13144multiclass avx512_cvttph2qq<bits<8> opc, string OpcodeStr, SDPatternOperator OpNode,
13145                            SDPatternOperator MaskOpNode, SDNode OpNodeRnd,
13146                            X86SchedWriteWidths sched> {
13147  let Predicates = [HasFP16] in {
13148    defm Z : avx512_vcvt_fp<opc, OpcodeStr, v8i64_info, v8f16x_info, OpNode,
13149                            MaskOpNode, sched.ZMM>,
13150             avx512_vcvt_fp_sae<opc, OpcodeStr, v8i64_info, v8f16x_info,
13151                                OpNodeRnd, sched.ZMM>, EVEX_V512;
13152  }
13153  let Predicates = [HasFP16, HasVLX] in {
13154    // Explicitly specified broadcast string, since we take only 2 elements
13155    // from v8f16x_info source
13156    defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v2i64x_info, v8f16x_info, OpNode,
13157                               MaskOpNode, sched.XMM, "{1to2}", "", f32mem>, EVEX_V128;
13158    // Explicitly specified broadcast string, since we take only 4 elements
13159    // from v8f16x_info source
13160    defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v4i64x_info, v8f16x_info, OpNode,
13161                               MaskOpNode, sched.YMM, "{1to4}", "", f64mem>, EVEX_V256;
13162  }
13163}
13164
13165defm VCVTPH2QQ : avx512_cvtph2qq<0x7B, "vcvtph2qq", X86cvtp2Int, X86cvtp2Int,
13166                                 X86cvtp2IntRnd, SchedWriteCvtPS2DQ>, T_MAP5PD,
13167                                 EVEX_CD8<16, CD8VQ>;
13168
13169defm VCVTPH2UQQ : avx512_cvtph2qq<0x79, "vcvtph2uqq", X86cvtp2UInt, X86cvtp2UInt,
13170                                 X86cvtp2UIntRnd, SchedWriteCvtPS2DQ>, T_MAP5PD,
13171                                 EVEX_CD8<16, CD8VQ>;
13172
13173defm VCVTTPH2QQ : avx512_cvttph2qq<0x7A, "vcvttph2qq", X86any_cvttp2si,
13174                                 X86cvttp2si, X86cvttp2siSAE,
13175                                 SchedWriteCvtPS2DQ>, T_MAP5PD,
13176                                 EVEX_CD8<16, CD8VQ>;
13177
13178defm VCVTTPH2UQQ : avx512_cvttph2qq<0x78, "vcvttph2uqq", X86any_cvttp2ui,
13179                                 X86cvttp2ui, X86cvttp2uiSAE,
13180                                 SchedWriteCvtPS2DQ>, T_MAP5PD,
13181                                 EVEX_CD8<16, CD8VQ>;
13182
13183// Convert Signed/Unsigned Quardword to Half
13184multiclass avx512_cvtqq2ph<bits<8> opc, string OpcodeStr, SDPatternOperator OpNode,
13185                           SDPatternOperator MaskOpNode, SDNode OpNodeRnd,
13186                           X86SchedWriteWidths sched> {
13187  // we need "x"/"y"/"z" suffixes in order to distinguish between 128, 256 and
13188  // 512 memory forms of these instructions in Asm Parcer. They have the same
13189  // dest type - 'v8f16x_info'. We also specify the broadcast string explicitly
13190  // due to the same reason.
13191  let Predicates = [HasFP16] in {
13192    defm Z : avx512_vcvt_fp<opc, OpcodeStr, v8f16x_info, v8i64_info, OpNode,
13193                            MaskOpNode, sched.ZMM, "{1to8}", "{z}">,
13194             avx512_vcvt_fp_rc<opc, OpcodeStr, v8f16x_info, v8i64_info,
13195                               OpNodeRnd, sched.ZMM>, EVEX_V512;
13196  }
13197  let Predicates = [HasFP16, HasVLX] in {
13198    defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v8f16x_info, v2i64x_info,
13199                               null_frag, null_frag, sched.XMM, "{1to2}", "{x}",
13200                               i128mem, VK2WM>,
13201                               EVEX_V128, NotEVEX2VEXConvertible;
13202    defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v8f16x_info, v4i64x_info,
13203                               null_frag, null_frag, sched.YMM, "{1to4}", "{y}",
13204                               i256mem, VK4WM>,
13205                               EVEX_V256, NotEVEX2VEXConvertible;
13206  }
13207
13208  def : InstAlias<OpcodeStr#"x\t{$src, $dst|$dst, $src}",
13209                  (!cast<Instruction>(NAME # "Z128rr") VR128X:$dst,
13210                  VR128X:$src), 0, "att">;
13211  def : InstAlias<OpcodeStr#"x\t{$src, $dst {${mask}}|$dst {${mask}}, $src}",
13212                  (!cast<Instruction>(NAME # "Z128rrk") VR128X:$dst,
13213                  VK2WM:$mask, VR128X:$src), 0, "att">;
13214  def : InstAlias<OpcodeStr#"x\t{$src, $dst {${mask}} {z}|$dst {${mask}} {z}, $src}",
13215                  (!cast<Instruction>(NAME # "Z128rrkz") VR128X:$dst,
13216                  VK2WM:$mask, VR128X:$src), 0, "att">;
13217  def : InstAlias<OpcodeStr#"x\t{${src}{1to2}, $dst|$dst, ${src}{1to2}}",
13218                  (!cast<Instruction>(NAME # "Z128rmb") VR128X:$dst,
13219                  i64mem:$src), 0, "att">;
13220  def : InstAlias<OpcodeStr#"x\t{${src}{1to2}, $dst {${mask}}|"
13221                  "$dst {${mask}}, ${src}{1to2}}",
13222                  (!cast<Instruction>(NAME # "Z128rmbk") VR128X:$dst,
13223                  VK2WM:$mask, i64mem:$src), 0, "att">;
13224  def : InstAlias<OpcodeStr#"x\t{${src}{1to2}, $dst {${mask}} {z}|"
13225                  "$dst {${mask}} {z}, ${src}{1to2}}",
13226                  (!cast<Instruction>(NAME # "Z128rmbkz") VR128X:$dst,
13227                  VK2WM:$mask, i64mem:$src), 0, "att">;
13228
13229  def : InstAlias<OpcodeStr#"y\t{$src, $dst|$dst, $src}",
13230                  (!cast<Instruction>(NAME # "Z256rr") VR128X:$dst,
13231                  VR256X:$src), 0, "att">;
13232  def : InstAlias<OpcodeStr#"y\t{$src, $dst {${mask}}|"
13233                  "$dst {${mask}}, $src}",
13234                  (!cast<Instruction>(NAME # "Z256rrk") VR128X:$dst,
13235                  VK4WM:$mask, VR256X:$src), 0, "att">;
13236  def : InstAlias<OpcodeStr#"y\t{$src, $dst {${mask}} {z}|"
13237                  "$dst {${mask}} {z}, $src}",
13238                  (!cast<Instruction>(NAME # "Z256rrkz") VR128X:$dst,
13239                  VK4WM:$mask, VR256X:$src), 0, "att">;
13240  def : InstAlias<OpcodeStr#"y\t{${src}{1to4}, $dst|$dst, ${src}{1to4}}",
13241                  (!cast<Instruction>(NAME # "Z256rmb") VR128X:$dst,
13242                  i64mem:$src), 0, "att">;
13243  def : InstAlias<OpcodeStr#"y\t{${src}{1to4}, $dst {${mask}}|"
13244                  "$dst {${mask}}, ${src}{1to4}}",
13245                  (!cast<Instruction>(NAME # "Z256rmbk") VR128X:$dst,
13246                  VK4WM:$mask, i64mem:$src), 0, "att">;
13247  def : InstAlias<OpcodeStr#"y\t{${src}{1to4}, $dst {${mask}} {z}|"
13248                  "$dst {${mask}} {z}, ${src}{1to4}}",
13249                  (!cast<Instruction>(NAME # "Z256rmbkz") VR128X:$dst,
13250                  VK4WM:$mask, i64mem:$src), 0, "att">;
13251
13252  def : InstAlias<OpcodeStr#"z\t{$src, $dst|$dst, $src}",
13253                  (!cast<Instruction>(NAME # "Zrr") VR128X:$dst,
13254                  VR512:$src), 0, "att">;
13255  def : InstAlias<OpcodeStr#"z\t{$src, $dst {${mask}}|"
13256                  "$dst {${mask}}, $src}",
13257                  (!cast<Instruction>(NAME # "Zrrk") VR128X:$dst,
13258                  VK8WM:$mask, VR512:$src), 0, "att">;
13259  def : InstAlias<OpcodeStr#"z\t{$src, $dst {${mask}} {z}|"
13260                  "$dst {${mask}} {z}, $src}",
13261                  (!cast<Instruction>(NAME # "Zrrkz") VR128X:$dst,
13262                  VK8WM:$mask, VR512:$src), 0, "att">;
13263  def : InstAlias<OpcodeStr#"z\t{${src}{1to8}, $dst|$dst, ${src}{1to8}}",
13264                  (!cast<Instruction>(NAME # "Zrmb") VR128X:$dst,
13265                  i64mem:$src), 0, "att">;
13266  def : InstAlias<OpcodeStr#"z\t{${src}{1to8}, $dst {${mask}}|"
13267                  "$dst {${mask}}, ${src}{1to8}}",
13268                  (!cast<Instruction>(NAME # "Zrmbk") VR128X:$dst,
13269                  VK8WM:$mask, i64mem:$src), 0, "att">;
13270  def : InstAlias<OpcodeStr#"z\t{${src}{1to8}, $dst {${mask}} {z}|"
13271                  "$dst {${mask}} {z}, ${src}{1to8}}",
13272                  (!cast<Instruction>(NAME # "Zrmbkz") VR128X:$dst,
13273                  VK8WM:$mask, i64mem:$src), 0, "att">;
13274}
13275
13276defm VCVTQQ2PH : avx512_cvtqq2ph<0x5B, "vcvtqq2ph", any_sint_to_fp, sint_to_fp,
13277                            X86VSintToFpRnd, SchedWriteCvtDQ2PS>, VEX_W, T_MAP5PS,
13278                            EVEX_CD8<64, CD8VF>;
13279
13280defm VCVTUQQ2PH : avx512_cvtqq2ph<0x7A, "vcvtuqq2ph", any_uint_to_fp, uint_to_fp,
13281                            X86VUintToFpRnd, SchedWriteCvtDQ2PS>, VEX_W, T_MAP5XD,
13282                            EVEX_CD8<64, CD8VF>;
13283
13284// Convert half to signed/unsigned int 32/64
13285defm VCVTSH2SIZ: avx512_cvt_s_int_round<0x2D, f16x_info, i32x_info, X86cvts2si,
13286                                   X86cvts2siRnd, WriteCvtSS2I, "cvtsh2si", "{l}", HasFP16>,
13287                                   T_MAP5XS, EVEX_CD8<16, CD8VT1>;
13288defm VCVTSH2SI64Z: avx512_cvt_s_int_round<0x2D, f16x_info, i64x_info, X86cvts2si,
13289                                   X86cvts2siRnd, WriteCvtSS2I, "cvtsh2si", "{q}", HasFP16>,
13290                                   T_MAP5XS, VEX_W, EVEX_CD8<16, CD8VT1>;
13291defm VCVTSH2USIZ: avx512_cvt_s_int_round<0x79, f16x_info, i32x_info, X86cvts2usi,
13292                                   X86cvts2usiRnd, WriteCvtSS2I, "cvtsh2usi", "{l}", HasFP16>,
13293                                   T_MAP5XS, EVEX_CD8<16, CD8VT1>;
13294defm VCVTSH2USI64Z: avx512_cvt_s_int_round<0x79, f16x_info, i64x_info, X86cvts2usi,
13295                                   X86cvts2usiRnd, WriteCvtSS2I, "cvtsh2usi", "{q}", HasFP16>,
13296                                   T_MAP5XS, VEX_W, EVEX_CD8<16, CD8VT1>;
13297
13298defm VCVTTSH2SIZ: avx512_cvt_s_all<0x2C, "vcvttsh2si", f16x_info, i32x_info,
13299                        any_fp_to_sint, X86cvtts2Int, X86cvtts2IntSAE, WriteCvtSS2I,
13300                        "{l}", HasFP16>, T_MAP5XS, EVEX_CD8<16, CD8VT1>;
13301defm VCVTTSH2SI64Z: avx512_cvt_s_all<0x2C, "vcvttsh2si", f16x_info, i64x_info,
13302                        any_fp_to_sint, X86cvtts2Int, X86cvtts2IntSAE, WriteCvtSS2I,
13303                        "{q}", HasFP16>, VEX_W, T_MAP5XS, EVEX_CD8<16, CD8VT1>;
13304defm VCVTTSH2USIZ: avx512_cvt_s_all<0x78, "vcvttsh2usi", f16x_info, i32x_info,
13305                        any_fp_to_uint, X86cvtts2UInt, X86cvtts2UIntSAE, WriteCvtSS2I,
13306                        "{l}", HasFP16>, T_MAP5XS, EVEX_CD8<16, CD8VT1>;
13307defm VCVTTSH2USI64Z: avx512_cvt_s_all<0x78, "vcvttsh2usi", f16x_info, i64x_info,
13308                        any_fp_to_uint, X86cvtts2UInt, X86cvtts2UIntSAE, WriteCvtSS2I,
13309                        "{q}", HasFP16>, T_MAP5XS, VEX_W, EVEX_CD8<16, CD8VT1>;
13310
13311let Predicates = [HasFP16] in {
13312  defm VCVTSI2SHZ  : avx512_vcvtsi_common<0x2A,  X86SintToFp, X86SintToFpRnd, WriteCvtI2SS, GR32,
13313                                   v8f16x_info, i32mem, loadi32, "cvtsi2sh", "l">,
13314                                   T_MAP5XS, EVEX_CD8<32, CD8VT1>;
13315  defm VCVTSI642SHZ: avx512_vcvtsi_common<0x2A,  X86SintToFp, X86SintToFpRnd, WriteCvtI2SS, GR64,
13316                                   v8f16x_info, i64mem, loadi64, "cvtsi2sh","q">,
13317                                   T_MAP5XS, VEX_W, EVEX_CD8<64, CD8VT1>;
13318  defm VCVTUSI2SHZ   : avx512_vcvtsi_common<0x7B,  X86UintToFp, X86UintToFpRnd, WriteCvtI2SS, GR32,
13319                                    v8f16x_info, i32mem, loadi32,
13320                                    "cvtusi2sh","l">, T_MAP5XS, EVEX_CD8<32, CD8VT1>;
13321  defm VCVTUSI642SHZ : avx512_vcvtsi_common<0x7B,  X86UintToFp, X86UintToFpRnd, WriteCvtI2SS, GR64,
13322                                    v8f16x_info, i64mem, loadi64, "cvtusi2sh", "q">,
13323                                    T_MAP5XS, VEX_W, EVEX_CD8<64, CD8VT1>;
13324  def : InstAlias<"vcvtsi2sh\t{$src, $src1, $dst|$dst, $src1, $src}",
13325              (VCVTSI2SHZrm_Int VR128X:$dst, VR128X:$src1, i32mem:$src), 0, "att">;
13326
13327  def : InstAlias<"vcvtusi2sh\t{$src, $src1, $dst|$dst, $src1, $src}",
13328              (VCVTUSI2SHZrm_Int VR128X:$dst, VR128X:$src1, i32mem:$src), 0, "att">;
13329
13330
13331  def : Pat<(f16 (any_sint_to_fp (loadi32 addr:$src))),
13332            (VCVTSI2SHZrm (f16 (IMPLICIT_DEF)), addr:$src)>;
13333  def : Pat<(f16 (any_sint_to_fp (loadi64 addr:$src))),
13334            (VCVTSI642SHZrm (f16 (IMPLICIT_DEF)), addr:$src)>;
13335
13336  def : Pat<(f16 (any_sint_to_fp GR32:$src)),
13337            (VCVTSI2SHZrr (f16 (IMPLICIT_DEF)), GR32:$src)>;
13338  def : Pat<(f16 (any_sint_to_fp GR64:$src)),
13339            (VCVTSI642SHZrr (f16 (IMPLICIT_DEF)), GR64:$src)>;
13340
13341  def : Pat<(f16 (any_uint_to_fp (loadi32 addr:$src))),
13342            (VCVTUSI2SHZrm (f16 (IMPLICIT_DEF)), addr:$src)>;
13343  def : Pat<(f16 (any_uint_to_fp (loadi64 addr:$src))),
13344            (VCVTUSI642SHZrm (f16 (IMPLICIT_DEF)), addr:$src)>;
13345
13346  def : Pat<(f16 (any_uint_to_fp GR32:$src)),
13347            (VCVTUSI2SHZrr (f16 (IMPLICIT_DEF)), GR32:$src)>;
13348  def : Pat<(f16 (any_uint_to_fp GR64:$src)),
13349            (VCVTUSI642SHZrr (f16 (IMPLICIT_DEF)), GR64:$src)>;
13350
13351  // Patterns used for matching vcvtsi2sh intrinsic sequences from clang
13352  // which produce unnecessary vmovsh instructions
13353  def : Pat<(v8f16 (X86Movsh
13354                     (v8f16 VR128X:$dst),
13355                     (v8f16 (scalar_to_vector (f16 (any_sint_to_fp GR64:$src)))))),
13356            (VCVTSI642SHZrr_Int VR128X:$dst, GR64:$src)>;
13357
13358  def : Pat<(v8f16 (X86Movsh
13359                     (v8f16 VR128X:$dst),
13360                     (v8f16 (scalar_to_vector (f16 (any_sint_to_fp (loadi64 addr:$src))))))),
13361            (VCVTSI642SHZrm_Int VR128X:$dst, addr:$src)>;
13362
13363  def : Pat<(v8f16 (X86Movsh
13364                     (v8f16 VR128X:$dst),
13365                     (v8f16 (scalar_to_vector (f16 (any_sint_to_fp GR32:$src)))))),
13366            (VCVTSI2SHZrr_Int VR128X:$dst, GR32:$src)>;
13367
13368  def : Pat<(v8f16 (X86Movsh
13369                     (v8f16 VR128X:$dst),
13370                     (v8f16 (scalar_to_vector (f16 (any_sint_to_fp (loadi32 addr:$src))))))),
13371            (VCVTSI2SHZrm_Int VR128X:$dst, addr:$src)>;
13372
13373  def : Pat<(v8f16 (X86Movsh
13374                     (v8f16 VR128X:$dst),
13375                     (v8f16 (scalar_to_vector (f16 (any_uint_to_fp GR64:$src)))))),
13376            (VCVTUSI642SHZrr_Int VR128X:$dst, GR64:$src)>;
13377
13378  def : Pat<(v8f16 (X86Movsh
13379                     (v8f16 VR128X:$dst),
13380                     (v8f16 (scalar_to_vector (f16 (any_uint_to_fp (loadi64 addr:$src))))))),
13381            (VCVTUSI642SHZrm_Int VR128X:$dst, addr:$src)>;
13382
13383  def : Pat<(v8f16 (X86Movsh
13384                     (v8f16 VR128X:$dst),
13385                     (v8f16 (scalar_to_vector (f16 (any_uint_to_fp GR32:$src)))))),
13386            (VCVTUSI2SHZrr_Int VR128X:$dst, GR32:$src)>;
13387
13388  def : Pat<(v8f16 (X86Movsh
13389                     (v8f16 VR128X:$dst),
13390                     (v8f16 (scalar_to_vector (f16 (any_uint_to_fp (loadi32 addr:$src))))))),
13391            (VCVTUSI2SHZrm_Int VR128X:$dst, addr:$src)>;
13392} // Predicates = [HasFP16]
13393
13394let Predicates = [HasFP16, HasVLX] in {
13395  // Special patterns to allow use of X86VMSintToFP for masking. Instruction
13396  // patterns have been disabled with null_frag.
13397  def : Pat<(v8f16 (X86any_VSintToFP (v4i64 VR256X:$src))),
13398            (VCVTQQ2PHZ256rr VR256X:$src)>;
13399  def : Pat<(X86VMSintToFP (v4i64 VR256X:$src), (v8f16 VR128X:$src0),
13400                           VK4WM:$mask),
13401            (VCVTQQ2PHZ256rrk VR128X:$src0, VK4WM:$mask, VR256X:$src)>;
13402  def : Pat<(X86VMSintToFP (v4i64 VR256X:$src), v8f16x_info.ImmAllZerosV,
13403                           VK4WM:$mask),
13404            (VCVTQQ2PHZ256rrkz VK4WM:$mask, VR256X:$src)>;
13405
13406  def : Pat<(v8f16 (X86any_VSintToFP (loadv4i64 addr:$src))),
13407            (VCVTQQ2PHZ256rm addr:$src)>;
13408  def : Pat<(X86VMSintToFP (loadv4i64 addr:$src), (v8f16 VR128X:$src0),
13409                           VK4WM:$mask),
13410            (VCVTQQ2PHZ256rmk VR128X:$src0, VK4WM:$mask, addr:$src)>;
13411  def : Pat<(X86VMSintToFP (loadv4i64 addr:$src), v8f16x_info.ImmAllZerosV,
13412                           VK4WM:$mask),
13413            (VCVTQQ2PHZ256rmkz VK4WM:$mask, addr:$src)>;
13414
13415  def : Pat<(v8f16 (X86any_VSintToFP (v4i64 (X86VBroadcastld64 addr:$src)))),
13416            (VCVTQQ2PHZ256rmb addr:$src)>;
13417  def : Pat<(X86VMSintToFP (v4i64 (X86VBroadcastld64 addr:$src)),
13418                           (v8f16 VR128X:$src0), VK4WM:$mask),
13419            (VCVTQQ2PHZ256rmbk VR128X:$src0, VK4WM:$mask, addr:$src)>;
13420  def : Pat<(X86VMSintToFP (v4i64 (X86VBroadcastld64 addr:$src)),
13421                           v8f16x_info.ImmAllZerosV, VK4WM:$mask),
13422            (VCVTQQ2PHZ256rmbkz VK4WM:$mask, addr:$src)>;
13423
13424  def : Pat<(v8f16 (X86any_VSintToFP (v2i64 VR128X:$src))),
13425            (VCVTQQ2PHZ128rr VR128X:$src)>;
13426  def : Pat<(X86VMSintToFP (v2i64 VR128X:$src), (v8f16 VR128X:$src0),
13427                           VK2WM:$mask),
13428            (VCVTQQ2PHZ128rrk VR128X:$src0, VK2WM:$mask, VR128X:$src)>;
13429  def : Pat<(X86VMSintToFP (v2i64 VR128X:$src), v8f16x_info.ImmAllZerosV,
13430                           VK2WM:$mask),
13431            (VCVTQQ2PHZ128rrkz VK2WM:$mask, VR128X:$src)>;
13432
13433  def : Pat<(v8f16 (X86any_VSintToFP (loadv2i64 addr:$src))),
13434            (VCVTQQ2PHZ128rm addr:$src)>;
13435  def : Pat<(X86VMSintToFP (loadv2i64 addr:$src), (v8f16 VR128X:$src0),
13436                           VK2WM:$mask),
13437            (VCVTQQ2PHZ128rmk VR128X:$src0, VK2WM:$mask, addr:$src)>;
13438  def : Pat<(X86VMSintToFP (loadv2i64 addr:$src), v8f16x_info.ImmAllZerosV,
13439                           VK2WM:$mask),
13440            (VCVTQQ2PHZ128rmkz VK2WM:$mask, addr:$src)>;
13441
13442  def : Pat<(v8f16 (X86any_VSintToFP (v2i64 (X86VBroadcastld64 addr:$src)))),
13443            (VCVTQQ2PHZ128rmb addr:$src)>;
13444  def : Pat<(X86VMSintToFP (v2i64 (X86VBroadcastld64 addr:$src)),
13445                           (v8f16 VR128X:$src0), VK2WM:$mask),
13446            (VCVTQQ2PHZ128rmbk VR128X:$src0, VK2WM:$mask, addr:$src)>;
13447  def : Pat<(X86VMSintToFP (v2i64 (X86VBroadcastld64 addr:$src)),
13448                           v8f16x_info.ImmAllZerosV, VK2WM:$mask),
13449            (VCVTQQ2PHZ128rmbkz VK2WM:$mask, addr:$src)>;
13450
13451  // Special patterns to allow use of X86VMUintToFP for masking. Instruction
13452  // patterns have been disabled with null_frag.
13453  def : Pat<(v8f16 (X86any_VUintToFP (v4i64 VR256X:$src))),
13454            (VCVTUQQ2PHZ256rr VR256X:$src)>;
13455  def : Pat<(X86VMUintToFP (v4i64 VR256X:$src), (v8f16 VR128X:$src0),
13456                           VK4WM:$mask),
13457            (VCVTUQQ2PHZ256rrk VR128X:$src0, VK4WM:$mask, VR256X:$src)>;
13458  def : Pat<(X86VMUintToFP (v4i64 VR256X:$src), v8f16x_info.ImmAllZerosV,
13459                           VK4WM:$mask),
13460            (VCVTUQQ2PHZ256rrkz VK4WM:$mask, VR256X:$src)>;
13461
13462  def : Pat<(v8f16 (X86any_VUintToFP (loadv4i64 addr:$src))),
13463            (VCVTUQQ2PHZ256rm addr:$src)>;
13464  def : Pat<(X86VMUintToFP (loadv4i64 addr:$src), (v8f16 VR128X:$src0),
13465                           VK4WM:$mask),
13466            (VCVTUQQ2PHZ256rmk VR128X:$src0, VK4WM:$mask, addr:$src)>;
13467  def : Pat<(X86VMUintToFP (loadv4i64 addr:$src), v8f16x_info.ImmAllZerosV,
13468                           VK4WM:$mask),
13469            (VCVTUQQ2PHZ256rmkz VK4WM:$mask, addr:$src)>;
13470
13471  def : Pat<(v8f16 (X86any_VUintToFP (v4i64 (X86VBroadcastld64 addr:$src)))),
13472            (VCVTUQQ2PHZ256rmb addr:$src)>;
13473  def : Pat<(X86VMUintToFP (v4i64 (X86VBroadcastld64 addr:$src)),
13474                           (v8f16 VR128X:$src0), VK4WM:$mask),
13475            (VCVTUQQ2PHZ256rmbk VR128X:$src0, VK4WM:$mask, addr:$src)>;
13476  def : Pat<(X86VMUintToFP (v4i64 (X86VBroadcastld64 addr:$src)),
13477                           v8f16x_info.ImmAllZerosV, VK4WM:$mask),
13478            (VCVTUQQ2PHZ256rmbkz VK4WM:$mask, addr:$src)>;
13479
13480  def : Pat<(v8f16 (X86any_VUintToFP (v2i64 VR128X:$src))),
13481            (VCVTUQQ2PHZ128rr VR128X:$src)>;
13482  def : Pat<(X86VMUintToFP (v2i64 VR128X:$src), (v8f16 VR128X:$src0),
13483                           VK2WM:$mask),
13484            (VCVTUQQ2PHZ128rrk VR128X:$src0, VK2WM:$mask, VR128X:$src)>;
13485  def : Pat<(X86VMUintToFP (v2i64 VR128X:$src), v8f16x_info.ImmAllZerosV,
13486                           VK2WM:$mask),
13487            (VCVTUQQ2PHZ128rrkz VK2WM:$mask, VR128X:$src)>;
13488
13489  def : Pat<(v8f16 (X86any_VUintToFP (loadv2i64 addr:$src))),
13490            (VCVTUQQ2PHZ128rm addr:$src)>;
13491  def : Pat<(X86VMUintToFP (loadv2i64 addr:$src), (v8f16 VR128X:$src0),
13492                           VK2WM:$mask),
13493            (VCVTUQQ2PHZ128rmk VR128X:$src0, VK2WM:$mask, addr:$src)>;
13494  def : Pat<(X86VMUintToFP (loadv2i64 addr:$src), v8f16x_info.ImmAllZerosV,
13495                           VK2WM:$mask),
13496            (VCVTUQQ2PHZ128rmkz VK2WM:$mask, addr:$src)>;
13497
13498  def : Pat<(v8f16 (X86any_VUintToFP (v2i64 (X86VBroadcastld64 addr:$src)))),
13499            (VCVTUQQ2PHZ128rmb addr:$src)>;
13500  def : Pat<(X86VMUintToFP (v2i64 (X86VBroadcastld64 addr:$src)),
13501                           (v8f16 VR128X:$src0), VK2WM:$mask),
13502            (VCVTUQQ2PHZ128rmbk VR128X:$src0, VK2WM:$mask, addr:$src)>;
13503  def : Pat<(X86VMUintToFP (v2i64 (X86VBroadcastld64 addr:$src)),
13504                           v8f16x_info.ImmAllZerosV, VK2WM:$mask),
13505            (VCVTUQQ2PHZ128rmbkz VK2WM:$mask, addr:$src)>;
13506}
13507
13508let Constraints = "@earlyclobber $dst, $src1 = $dst" in {
13509  multiclass avx512_cfmaop_rm<bits<8> opc, string OpcodeStr, SDNode OpNode, X86VectorVTInfo _, bit IsCommutable> {
13510    defm r: AVX512_maskable_3src<opc, MRMSrcReg, _, (outs _.RC:$dst),
13511            (ins _.RC:$src2, _.RC:$src3),
13512            OpcodeStr, "$src3, $src2", "$src2, $src3",
13513            (_.VT (OpNode _.RC:$src2, _.RC:$src3, _.RC:$src1)), IsCommutable>, EVEX_4V;
13514
13515    defm m: AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst),
13516            (ins _.RC:$src2, _.MemOp:$src3),
13517            OpcodeStr, "$src3, $src2", "$src2, $src3",
13518            (_.VT (OpNode _.RC:$src2, (_.LdFrag addr:$src3), _.RC:$src1))>, EVEX_4V;
13519
13520    defm mb: AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst),
13521            (ins _.RC:$src2, _.ScalarMemOp:$src3),
13522            OpcodeStr, !strconcat("${src3}", _.BroadcastStr,", $src2"), !strconcat("$src2, ${src3}", _.BroadcastStr),
13523            (_.VT (OpNode _.RC:$src2, (_.VT (_.BroadcastLdFrag addr:$src3)), _.RC:$src1))>, EVEX_B, EVEX_4V;
13524  }
13525} // Constraints = "@earlyclobber $dst, $src1 = $dst"
13526
13527multiclass avx512_cfmaop_round<bits<8> opc, string OpcodeStr, SDNode OpNode,
13528                                 X86VectorVTInfo _> {
13529  let Constraints = "@earlyclobber $dst, $src1 = $dst" in
13530  defm rb: AVX512_maskable_3src<opc, MRMSrcReg, _, (outs _.RC:$dst),
13531          (ins _.RC:$src2, _.RC:$src3, AVX512RC:$rc),
13532          OpcodeStr, "$rc, $src3, $src2", "$src2, $src3, $rc",
13533          (_.VT (OpNode _.RC:$src2, _.RC:$src3, _.RC:$src1, (i32 timm:$rc)))>,
13534          EVEX_4V, EVEX_B, EVEX_RC;
13535}
13536
13537
13538multiclass avx512_cfmaop_common<bits<8> opc, string OpcodeStr, SDNode OpNode, SDNode OpNodeRnd, bit IsCommutable> {
13539  let Predicates = [HasFP16] in {
13540    defm Z    : avx512_cfmaop_rm<opc, OpcodeStr, OpNode, v16f32_info, IsCommutable>,
13541                avx512_cfmaop_round<opc, OpcodeStr, OpNodeRnd, v16f32_info>,
13542                      EVEX_V512, Sched<[WriteFMAZ]>;
13543  }
13544  let Predicates = [HasVLX, HasFP16] in {
13545    defm Z256 : avx512_cfmaop_rm<opc, OpcodeStr, OpNode, v8f32x_info, IsCommutable>, EVEX_V256, Sched<[WriteFMAY]>;
13546    defm Z128 : avx512_cfmaop_rm<opc, OpcodeStr, OpNode, v4f32x_info, IsCommutable>, EVEX_V128, Sched<[WriteFMAX]>;
13547  }
13548}
13549
13550multiclass avx512_cfmulop_common<bits<8> opc, string OpcodeStr, SDNode OpNode,
13551                                 SDNode MaskOpNode, SDNode OpNodeRnd, bit IsCommutable> {
13552  let Predicates = [HasFP16] in {
13553    defm Z    : avx512_fp_packed<opc, OpcodeStr, OpNode, MaskOpNode, v16f32_info,
13554                                 WriteFMAZ, IsCommutable, IsCommutable, "", "@earlyclobber $dst", 0>,
13555                avx512_fp_round_packed<opc, OpcodeStr, OpNodeRnd, WriteFMAZ, v16f32_info,
13556                                       "", "@earlyclobber $dst">, EVEX_V512;
13557  }
13558  let Predicates = [HasVLX, HasFP16] in {
13559    defm Z256 : avx512_fp_packed<opc, OpcodeStr, OpNode, MaskOpNode, v8f32x_info,
13560                                 WriteFMAY, IsCommutable, IsCommutable, "", "@earlyclobber $dst", 0>, EVEX_V256;
13561    defm Z128 : avx512_fp_packed<opc, OpcodeStr, OpNode, MaskOpNode, v4f32x_info,
13562                                 WriteFMAX, IsCommutable, IsCommutable, "", "@earlyclobber $dst", 0>, EVEX_V128;
13563  }
13564}
13565
13566
13567let Uses = [MXCSR] in {
13568  defm VFMADDCPH  : avx512_cfmaop_common<0x56, "vfmaddcph", x86vfmaddc, x86vfmaddcRnd, 1>,
13569                                    T_MAP6XS, EVEX_CD8<32, CD8VF>;
13570  defm VFCMADDCPH : avx512_cfmaop_common<0x56, "vfcmaddcph", x86vfcmaddc, x86vfcmaddcRnd, 0>,
13571                                    T_MAP6XD, EVEX_CD8<32, CD8VF>;
13572
13573  defm VFMULCPH  : avx512_cfmulop_common<0xD6, "vfmulcph", x86vfmulc, x86vfmulc,
13574                                         x86vfmulcRnd, 1>, T_MAP6XS, EVEX_CD8<32, CD8VF>;
13575  defm VFCMULCPH : avx512_cfmulop_common<0xD6, "vfcmulcph", x86vfcmulc,
13576                                         x86vfcmulc, x86vfcmulcRnd, 0>, T_MAP6XD, EVEX_CD8<32, CD8VF>;
13577}
13578
13579
13580multiclass avx512_cfmaop_sh_common<bits<8> opc, string OpcodeStr, SDNode OpNode, SDNode OpNodeRnd,
13581                                   bit IsCommutable> {
13582  let Predicates = [HasFP16], Constraints = "@earlyclobber $dst, $src1 = $dst" in {
13583    defm r : AVX512_maskable_3src<opc, MRMSrcReg, v4f32x_info, (outs VR128X:$dst),
13584                        (ins VR128X:$src2, VR128X:$src3), OpcodeStr,
13585                        "$src3, $src2", "$src2, $src3",
13586                        (v4f32 (OpNode VR128X:$src2, VR128X:$src3, VR128X:$src1)), IsCommutable>,
13587                        Sched<[WriteFMAX]>;
13588    defm m : AVX512_maskable_3src<opc, MRMSrcMem, v4f32x_info, (outs VR128X:$dst),
13589                        (ins VR128X:$src2, ssmem:$src3), OpcodeStr,
13590                        "$src3, $src2", "$src2, $src3",
13591                        (v4f32 (OpNode VR128X:$src2, (sse_load_f32 addr:$src3), VR128X:$src1))>,
13592                        Sched<[WriteFMAX.Folded, WriteFMAX.ReadAfterFold]>;
13593    defm rb : AVX512_maskable_3src<opc,  MRMSrcReg, v4f32x_info, (outs VR128X:$dst),
13594                        (ins VR128X:$src2, VR128X:$src3, AVX512RC:$rc), OpcodeStr,
13595                        "$rc, $src3, $src2", "$src2, $src3, $rc",
13596                        (v4f32 (OpNodeRnd VR128X:$src2, VR128X:$src3, VR128X:$src1, (i32 timm:$rc)))>,
13597                        EVEX_B, EVEX_RC, Sched<[WriteFMAX]>;
13598  }
13599}
13600
13601multiclass avx512_cfmbinop_sh_common<bits<8> opc, string OpcodeStr, SDNode OpNode,
13602                                     SDNode OpNodeRnd, bit IsCommutable> {
13603  let Predicates = [HasFP16] in {
13604    defm rr : AVX512_maskable<opc, MRMSrcReg, f32x_info, (outs VR128X:$dst),
13605                        (ins VR128X:$src1, VR128X:$src2), OpcodeStr,
13606                        "$src2, $src1", "$src1, $src2",
13607                        (v4f32 (OpNode VR128X:$src1, VR128X:$src2)),
13608                        IsCommutable, IsCommutable, IsCommutable,
13609                        X86selects, "@earlyclobber $dst">, Sched<[WriteFMAX]>;
13610    defm rm : AVX512_maskable<opc, MRMSrcMem, f32x_info, (outs VR128X:$dst),
13611                        (ins VR128X:$src1, ssmem:$src2), OpcodeStr,
13612                        "$src2, $src1", "$src1, $src2",
13613                        (v4f32 (OpNode VR128X:$src1, (sse_load_f32 addr:$src2))),
13614                        0, 0, 0, X86selects, "@earlyclobber $dst">,
13615                        Sched<[WriteFMAX.Folded, WriteFMAX.ReadAfterFold]>;
13616    defm rrb : AVX512_maskable<opc, MRMSrcReg, f32x_info, (outs VR128X:$dst),
13617                        (ins VR128X:$src1, VR128X:$src2, AVX512RC:$rc), OpcodeStr,
13618                        "$rc, $src2, $src1", "$src1, $src2, $rc",
13619                        (OpNodeRnd (v4f32 VR128X:$src1), (v4f32 VR128X:$src2), (i32 timm:$rc)),
13620                        0, 0, 0, X86selects, "@earlyclobber $dst">,
13621                        EVEX_B, EVEX_RC, Sched<[WriteFMAX]>;
13622  }
13623}
13624
13625let Uses = [MXCSR] in {
13626  defm VFMADDCSHZ  : avx512_cfmaop_sh_common<0x57, "vfmaddcsh", x86vfmaddcSh, x86vfmaddcShRnd, 1>,
13627                                    T_MAP6XS, EVEX_CD8<32, CD8VT1>, EVEX_V128, EVEX_4V;
13628  defm VFCMADDCSHZ : avx512_cfmaop_sh_common<0x57, "vfcmaddcsh", x86vfcmaddcSh, x86vfcmaddcShRnd, 0>,
13629                                    T_MAP6XD, EVEX_CD8<32, CD8VT1>, EVEX_V128, EVEX_4V;
13630
13631  defm VFMULCSHZ  : avx512_cfmbinop_sh_common<0xD7, "vfmulcsh", x86vfmulcSh, x86vfmulcShRnd, 1>,
13632                                    T_MAP6XS, EVEX_CD8<32, CD8VT1>, EVEX_V128, VEX_LIG, EVEX_4V;
13633  defm VFCMULCSHZ : avx512_cfmbinop_sh_common<0xD7, "vfcmulcsh", x86vfcmulcSh, x86vfcmulcShRnd, 0>,
13634                                    T_MAP6XD, EVEX_CD8<32, CD8VT1>, EVEX_V128, VEX_LIG, EVEX_4V;
13635}
13636