xref: /freebsd/contrib/llvm-project/llvm/lib/Target/X86/X86InstrAVX512.td (revision 51015e6d0f570239b0c2088dc6cf2b018928375d)
1//===-- X86InstrAVX512.td - AVX512 Instruction Set ---------*- tablegen -*-===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file describes the X86 AVX512 instruction set, defining the
10// instructions, and properties of the instructions which are needed for code
11// generation, machine code emission, and analysis.
12//
13//===----------------------------------------------------------------------===//
14
15// Group template arguments that can be derived from the vector type (EltNum x
16// EltVT).  These are things like the register class for the writemask, etc.
17// The idea is to pass one of these as the template argument rather than the
18// individual arguments.
19// The template is also used for scalar types, in this case numelts is 1.
20class X86VectorVTInfo<int numelts, ValueType eltvt, RegisterClass rc,
21                      string suffix = ""> {
22  RegisterClass RC = rc;
23  ValueType EltVT = eltvt;
24  int NumElts = numelts;
25
26  // Corresponding mask register class.
27  RegisterClass KRC = !cast<RegisterClass>("VK" # NumElts);
28
29  // Corresponding mask register pair class.
30  RegisterOperand KRPC = !if (!gt(NumElts, 16), ?,
31                              !cast<RegisterOperand>("VK" # NumElts # "Pair"));
32
33  // Corresponding write-mask register class.
34  RegisterClass KRCWM = !cast<RegisterClass>("VK" # NumElts # "WM");
35
36  // The mask VT.
37  ValueType KVT = !cast<ValueType>("v" # NumElts # "i1");
38
39  // Suffix used in the instruction mnemonic.
40  string Suffix = suffix;
41
42  // VTName is a string name for vector VT. For vector types it will be
43  // v # NumElts # EltVT, so for vector of 8 elements of i32 it will be v8i32
44  // It is a little bit complex for scalar types, where NumElts = 1.
45  // In this case we build v4f32 or v2f64
46  string VTName = "v" # !if (!eq (NumElts, 1),
47                        !if (!eq (EltVT.Size, 16), 8,
48                        !if (!eq (EltVT.Size, 32), 4,
49                        !if (!eq (EltVT.Size, 64), 2, NumElts))), NumElts) # EltVT;
50
51  // The vector VT.
52  ValueType VT = !cast<ValueType>(VTName);
53
54  string EltTypeName = !cast<string>(EltVT);
55  // Size of the element type in bits, e.g. 32 for v16i32.
56  string EltSizeName = !subst("i", "", !subst("f", "", EltTypeName));
57  int EltSize = EltVT.Size;
58
59  // "i" for integer types and "f" for floating-point types
60  string TypeVariantName = !subst(EltSizeName, "", EltTypeName);
61
62  // Size of RC in bits, e.g. 512 for VR512.
63  int Size = VT.Size;
64
65  // The corresponding memory operand, e.g. i512mem for VR512.
66  X86MemOperand MemOp = !cast<X86MemOperand>(TypeVariantName # Size # "mem");
67  X86MemOperand ScalarMemOp = !cast<X86MemOperand>(EltVT # "mem");
68  // FP scalar memory operand for intrinsics - ssmem/sdmem.
69  Operand IntScalarMemOp = !if (!eq (EltTypeName, "f16"), !cast<Operand>("shmem"),
70                           !if (!eq (EltTypeName, "f32"), !cast<Operand>("ssmem"),
71                           !if (!eq (EltTypeName, "f64"), !cast<Operand>("sdmem"), ?)));
72
73  // Load patterns
74  PatFrag LdFrag = !cast<PatFrag>("load" # VTName);
75
76  PatFrag AlignedLdFrag = !cast<PatFrag>("alignedload" # VTName);
77
78  PatFrag ScalarLdFrag = !cast<PatFrag>("load" # EltVT);
79  PatFrag BroadcastLdFrag = !cast<PatFrag>("X86VBroadcastld" # EltSizeName);
80
81  PatFrags ScalarIntMemFrags = !if (!eq (EltTypeName, "f16"), !cast<PatFrags>("sse_load_f16"),
82                               !if (!eq (EltTypeName, "f32"), !cast<PatFrags>("sse_load_f32"),
83                               !if (!eq (EltTypeName, "f64"), !cast<PatFrags>("sse_load_f64"), ?)));
84
85  // The string to specify embedded broadcast in assembly.
86  string BroadcastStr = "{1to" # NumElts # "}";
87
88  // 8-bit compressed displacement tuple/subvector format.  This is only
89  // defined for NumElts <= 8.
90  CD8VForm CD8TupleForm = !if (!eq (!srl(NumElts, 4), 0),
91                               !cast<CD8VForm>("CD8VT" # NumElts), ?);
92
93  SubRegIndex SubRegIdx = !if (!eq (Size, 128), sub_xmm,
94                          !if (!eq (Size, 256), sub_ymm, ?));
95
96  Domain ExeDomain = !if (!eq (EltTypeName, "f32"), SSEPackedSingle,
97                     !if (!eq (EltTypeName, "f64"), SSEPackedDouble,
98                     !if (!eq (EltTypeName, "f16"), SSEPackedSingle, // FIXME?
99                     SSEPackedInt)));
100
101  RegisterClass FRC = !if (!eq (EltTypeName, "f32"), FR32X,
102                      !if (!eq (EltTypeName, "f16"), FR16X,
103                      FR64X));
104
105  dag ImmAllZerosV = (VT immAllZerosV);
106
107  string ZSuffix = !if (!eq (Size, 128), "Z128",
108                   !if (!eq (Size, 256), "Z256", "Z"));
109}
110
111def v64i8_info  : X86VectorVTInfo<64,  i8, VR512, "b">;
112def v32i16_info : X86VectorVTInfo<32, i16, VR512, "w">;
113def v16i32_info : X86VectorVTInfo<16, i32, VR512, "d">;
114def v8i64_info  : X86VectorVTInfo<8,  i64, VR512, "q">;
115def v32f16_info : X86VectorVTInfo<32, f16, VR512, "ph">;
116def v16f32_info : X86VectorVTInfo<16, f32, VR512, "ps">;
117def v8f64_info  : X86VectorVTInfo<8,  f64, VR512, "pd">;
118
119// "x" in v32i8x_info means RC = VR256X
120def v32i8x_info  : X86VectorVTInfo<32,  i8, VR256X, "b">;
121def v16i16x_info : X86VectorVTInfo<16, i16, VR256X, "w">;
122def v8i32x_info  : X86VectorVTInfo<8,  i32, VR256X, "d">;
123def v4i64x_info  : X86VectorVTInfo<4,  i64, VR256X, "q">;
124def v16f16x_info : X86VectorVTInfo<16, f16, VR256X, "ph">;
125def v8f32x_info  : X86VectorVTInfo<8,  f32, VR256X, "ps">;
126def v4f64x_info  : X86VectorVTInfo<4,  f64, VR256X, "pd">;
127
128def v16i8x_info  : X86VectorVTInfo<16,  i8, VR128X, "b">;
129def v8i16x_info  : X86VectorVTInfo<8,  i16, VR128X, "w">;
130def v4i32x_info  : X86VectorVTInfo<4,  i32, VR128X, "d">;
131def v2i64x_info  : X86VectorVTInfo<2,  i64, VR128X, "q">;
132def v8f16x_info  : X86VectorVTInfo<8,  f16, VR128X, "ph">;
133def v4f32x_info  : X86VectorVTInfo<4,  f32, VR128X, "ps">;
134def v2f64x_info  : X86VectorVTInfo<2,  f64, VR128X, "pd">;
135
136// We map scalar types to the smallest (128-bit) vector type
137// with the appropriate element type. This allows to use the same masking logic.
138def i32x_info    : X86VectorVTInfo<1,  i32, GR32, "si">;
139def i64x_info    : X86VectorVTInfo<1,  i64, GR64, "sq">;
140def f16x_info    : X86VectorVTInfo<1,  f16, VR128X, "sh">;
141def f32x_info    : X86VectorVTInfo<1,  f32, VR128X, "ss">;
142def f64x_info    : X86VectorVTInfo<1,  f64, VR128X, "sd">;
143
144class AVX512VLVectorVTInfo<X86VectorVTInfo i512, X86VectorVTInfo i256,
145                           X86VectorVTInfo i128> {
146  X86VectorVTInfo info512 = i512;
147  X86VectorVTInfo info256 = i256;
148  X86VectorVTInfo info128 = i128;
149}
150
151def avx512vl_i8_info  : AVX512VLVectorVTInfo<v64i8_info, v32i8x_info,
152                                             v16i8x_info>;
153def avx512vl_i16_info : AVX512VLVectorVTInfo<v32i16_info, v16i16x_info,
154                                             v8i16x_info>;
155def avx512vl_i32_info : AVX512VLVectorVTInfo<v16i32_info, v8i32x_info,
156                                             v4i32x_info>;
157def avx512vl_i64_info : AVX512VLVectorVTInfo<v8i64_info, v4i64x_info,
158                                             v2i64x_info>;
159def avx512vl_f16_info : AVX512VLVectorVTInfo<v32f16_info, v16f16x_info,
160                                             v8f16x_info>;
161def avx512vl_f32_info : AVX512VLVectorVTInfo<v16f32_info, v8f32x_info,
162                                             v4f32x_info>;
163def avx512vl_f64_info : AVX512VLVectorVTInfo<v8f64_info, v4f64x_info,
164                                             v2f64x_info>;
165
166class X86KVectorVTInfo<RegisterClass _krc, RegisterClass _krcwm,
167                       ValueType _vt> {
168  RegisterClass KRC = _krc;
169  RegisterClass KRCWM = _krcwm;
170  ValueType KVT = _vt;
171}
172
173def v1i1_info : X86KVectorVTInfo<VK1, VK1WM, v1i1>;
174def v2i1_info : X86KVectorVTInfo<VK2, VK2WM, v2i1>;
175def v4i1_info : X86KVectorVTInfo<VK4, VK4WM, v4i1>;
176def v8i1_info : X86KVectorVTInfo<VK8, VK8WM, v8i1>;
177def v16i1_info : X86KVectorVTInfo<VK16, VK16WM, v16i1>;
178def v32i1_info : X86KVectorVTInfo<VK32, VK32WM, v32i1>;
179def v64i1_info : X86KVectorVTInfo<VK64, VK64WM, v64i1>;
180
181// Used for matching masked operations. Ensures the operation part only has a
182// single use.
183def vselect_mask : PatFrag<(ops node:$mask, node:$src1, node:$src2),
184                           (vselect node:$mask, node:$src1, node:$src2), [{
185  return isProfitableToFormMaskedOp(N);
186}]>;
187
188def X86selects_mask : PatFrag<(ops node:$mask, node:$src1, node:$src2),
189                              (X86selects node:$mask, node:$src1, node:$src2), [{
190  return isProfitableToFormMaskedOp(N);
191}]>;
192
193// This multiclass generates the masking variants from the non-masking
194// variant.  It only provides the assembly pieces for the masking variants.
195// It assumes custom ISel patterns for masking which can be provided as
196// template arguments.
197multiclass AVX512_maskable_custom<bits<8> O, Format F,
198                                  dag Outs,
199                                  dag Ins, dag MaskingIns, dag ZeroMaskingIns,
200                                  string OpcodeStr,
201                                  string AttSrcAsm, string IntelSrcAsm,
202                                  list<dag> Pattern,
203                                  list<dag> MaskingPattern,
204                                  list<dag> ZeroMaskingPattern,
205                                  string MaskingConstraint = "",
206                                  bit IsCommutable = 0,
207                                  bit IsKCommutable = 0,
208                                  bit IsKZCommutable = IsCommutable,
209                                  string ClobberConstraint = ""> {
210  let isCommutable = IsCommutable, Constraints = ClobberConstraint in
211    def NAME: AVX512<O, F, Outs, Ins,
212                       OpcodeStr#"\t{"#AttSrcAsm#", $dst|"#
213                                     "$dst, "#IntelSrcAsm#"}",
214                       Pattern>;
215
216  // Prefer over VMOV*rrk Pat<>
217  let isCommutable = IsKCommutable in
218    def NAME#k: AVX512<O, F, Outs, MaskingIns,
219                       OpcodeStr#"\t{"#AttSrcAsm#", $dst {${mask}}|"#
220                                     "$dst {${mask}}, "#IntelSrcAsm#"}",
221                       MaskingPattern>,
222              EVEX_K {
223      // In case of the 3src subclass this is overridden with a let.
224      string Constraints = !if(!eq(ClobberConstraint, ""), MaskingConstraint,
225                               !if(!eq(MaskingConstraint, ""), ClobberConstraint,
226                                   !strconcat(ClobberConstraint, ", ", MaskingConstraint)));
227    }
228
229  // Zero mask does not add any restrictions to commute operands transformation.
230  // So, it is Ok to use IsCommutable instead of IsKCommutable.
231  let isCommutable = IsKZCommutable, // Prefer over VMOV*rrkz Pat<>
232      Constraints = ClobberConstraint in
233    def NAME#kz: AVX512<O, F, Outs, ZeroMaskingIns,
234                       OpcodeStr#"\t{"#AttSrcAsm#", $dst {${mask}} {z}|"#
235                                     "$dst {${mask}} {z}, "#IntelSrcAsm#"}",
236                       ZeroMaskingPattern>,
237              EVEX_KZ;
238}
239
240
241// Common base class of AVX512_maskable and AVX512_maskable_3src.
242multiclass AVX512_maskable_common<bits<8> O, Format F, X86VectorVTInfo _,
243                                  dag Outs,
244                                  dag Ins, dag MaskingIns, dag ZeroMaskingIns,
245                                  string OpcodeStr,
246                                  string AttSrcAsm, string IntelSrcAsm,
247                                  dag RHS, dag MaskingRHS,
248                                  SDPatternOperator Select = vselect_mask,
249                                  string MaskingConstraint = "",
250                                  bit IsCommutable = 0,
251                                  bit IsKCommutable = 0,
252                                  bit IsKZCommutable = IsCommutable,
253                                  string ClobberConstraint = ""> :
254  AVX512_maskable_custom<O, F, Outs, Ins, MaskingIns, ZeroMaskingIns, OpcodeStr,
255                         AttSrcAsm, IntelSrcAsm,
256                         [(set _.RC:$dst, RHS)],
257                         [(set _.RC:$dst, MaskingRHS)],
258                         [(set _.RC:$dst,
259                               (Select _.KRCWM:$mask, RHS, _.ImmAllZerosV))],
260                         MaskingConstraint, IsCommutable,
261                         IsKCommutable, IsKZCommutable, ClobberConstraint>;
262
263// This multiclass generates the unconditional/non-masking, the masking and
264// the zero-masking variant of the vector instruction.  In the masking case, the
265// preserved vector elements come from a new dummy input operand tied to $dst.
266// This version uses a separate dag for non-masking and masking.
267multiclass AVX512_maskable_split<bits<8> O, Format F, X86VectorVTInfo _,
268                           dag Outs, dag Ins, string OpcodeStr,
269                           string AttSrcAsm, string IntelSrcAsm,
270                           dag RHS, dag MaskRHS,
271                           string ClobberConstraint = "",
272                           bit IsCommutable = 0, bit IsKCommutable = 0,
273                           bit IsKZCommutable = IsCommutable> :
274   AVX512_maskable_custom<O, F, Outs, Ins,
275                          !con((ins _.RC:$src0, _.KRCWM:$mask), Ins),
276                          !con((ins _.KRCWM:$mask), Ins),
277                          OpcodeStr, AttSrcAsm, IntelSrcAsm,
278                          [(set _.RC:$dst, RHS)],
279                          [(set _.RC:$dst,
280                              (vselect_mask _.KRCWM:$mask, MaskRHS, _.RC:$src0))],
281                          [(set _.RC:$dst,
282                              (vselect_mask _.KRCWM:$mask, MaskRHS, _.ImmAllZerosV))],
283                          "$src0 = $dst", IsCommutable, IsKCommutable,
284                          IsKZCommutable, ClobberConstraint>;
285
286// This multiclass generates the unconditional/non-masking, the masking and
287// the zero-masking variant of the vector instruction.  In the masking case, the
288// preserved vector elements come from a new dummy input operand tied to $dst.
289multiclass AVX512_maskable<bits<8> O, Format F, X86VectorVTInfo _,
290                           dag Outs, dag Ins, string OpcodeStr,
291                           string AttSrcAsm, string IntelSrcAsm,
292                           dag RHS,
293                           bit IsCommutable = 0, bit IsKCommutable = 0,
294                           bit IsKZCommutable = IsCommutable,
295                           SDPatternOperator Select = vselect_mask,
296                           string ClobberConstraint = ""> :
297   AVX512_maskable_common<O, F, _, Outs, Ins,
298                          !con((ins _.RC:$src0, _.KRCWM:$mask), Ins),
299                          !con((ins _.KRCWM:$mask), Ins),
300                          OpcodeStr, AttSrcAsm, IntelSrcAsm, RHS,
301                          (Select _.KRCWM:$mask, RHS, _.RC:$src0),
302                          Select, "$src0 = $dst", IsCommutable, IsKCommutable,
303                          IsKZCommutable, ClobberConstraint>;
304
305// This multiclass generates the unconditional/non-masking, the masking and
306// the zero-masking variant of the scalar instruction.
307multiclass AVX512_maskable_scalar<bits<8> O, Format F, X86VectorVTInfo _,
308                           dag Outs, dag Ins, string OpcodeStr,
309                           string AttSrcAsm, string IntelSrcAsm,
310                           dag RHS> :
311   AVX512_maskable<O, F, _, Outs, Ins, OpcodeStr, AttSrcAsm, IntelSrcAsm,
312                   RHS, 0, 0, 0, X86selects_mask>;
313
314// Similar to AVX512_maskable but in this case one of the source operands
315// ($src1) is already tied to $dst so we just use that for the preserved
316// vector elements.  NOTE that the NonTiedIns (the ins dag) should exclude
317// $src1.
318multiclass AVX512_maskable_3src<bits<8> O, Format F, X86VectorVTInfo _,
319                                dag Outs, dag NonTiedIns, string OpcodeStr,
320                                string AttSrcAsm, string IntelSrcAsm,
321                                dag RHS,
322                                bit IsCommutable = 0,
323                                bit IsKCommutable = 0,
324                                SDPatternOperator Select = vselect_mask,
325                                bit MaskOnly = 0> :
326   AVX512_maskable_common<O, F, _, Outs,
327                          !con((ins _.RC:$src1), NonTiedIns),
328                          !con((ins _.RC:$src1, _.KRCWM:$mask), NonTiedIns),
329                          !con((ins _.RC:$src1, _.KRCWM:$mask), NonTiedIns),
330                          OpcodeStr, AttSrcAsm, IntelSrcAsm,
331                          !if(MaskOnly, (null_frag), RHS),
332                          (Select _.KRCWM:$mask, RHS, _.RC:$src1),
333                          Select, "", IsCommutable, IsKCommutable>;
334
335// Similar to AVX512_maskable_3src but in this case the input VT for the tied
336// operand differs from the output VT. This requires a bitconvert on
337// the preserved vector going into the vselect.
338// NOTE: The unmasked pattern is disabled.
339multiclass AVX512_maskable_3src_cast<bits<8> O, Format F, X86VectorVTInfo OutVT,
340                                     X86VectorVTInfo InVT,
341                                     dag Outs, dag NonTiedIns, string OpcodeStr,
342                                     string AttSrcAsm, string IntelSrcAsm,
343                                     dag RHS, bit IsCommutable = 0> :
344   AVX512_maskable_common<O, F, OutVT, Outs,
345                          !con((ins InVT.RC:$src1), NonTiedIns),
346                          !con((ins InVT.RC:$src1, InVT.KRCWM:$mask), NonTiedIns),
347                          !con((ins InVT.RC:$src1, InVT.KRCWM:$mask), NonTiedIns),
348                          OpcodeStr, AttSrcAsm, IntelSrcAsm, (null_frag),
349                          (vselect_mask InVT.KRCWM:$mask, RHS,
350                           (bitconvert InVT.RC:$src1)),
351                           vselect_mask, "", IsCommutable>;
352
353multiclass AVX512_maskable_3src_scalar<bits<8> O, Format F, X86VectorVTInfo _,
354                                     dag Outs, dag NonTiedIns, string OpcodeStr,
355                                     string AttSrcAsm, string IntelSrcAsm,
356                                     dag RHS,
357                                     bit IsCommutable = 0,
358                                     bit IsKCommutable = 0,
359                                     bit MaskOnly = 0> :
360   AVX512_maskable_3src<O, F, _, Outs, NonTiedIns, OpcodeStr, AttSrcAsm,
361                        IntelSrcAsm, RHS, IsCommutable, IsKCommutable,
362                        X86selects_mask, MaskOnly>;
363
364multiclass AVX512_maskable_in_asm<bits<8> O, Format F, X86VectorVTInfo _,
365                                  dag Outs, dag Ins,
366                                  string OpcodeStr,
367                                  string AttSrcAsm, string IntelSrcAsm,
368                                  list<dag> Pattern> :
369   AVX512_maskable_custom<O, F, Outs, Ins,
370                          !con((ins _.RC:$src0, _.KRCWM:$mask), Ins),
371                          !con((ins _.KRCWM:$mask), Ins),
372                          OpcodeStr, AttSrcAsm, IntelSrcAsm, Pattern, [], [],
373                          "$src0 = $dst">;
374
375multiclass AVX512_maskable_3src_in_asm<bits<8> O, Format F, X86VectorVTInfo _,
376                                       dag Outs, dag NonTiedIns,
377                                       string OpcodeStr,
378                                       string AttSrcAsm, string IntelSrcAsm,
379                                       list<dag> Pattern> :
380   AVX512_maskable_custom<O, F, Outs,
381                          !con((ins _.RC:$src1), NonTiedIns),
382                          !con((ins _.RC:$src1, _.KRCWM:$mask), NonTiedIns),
383                          !con((ins _.RC:$src1, _.KRCWM:$mask), NonTiedIns),
384                          OpcodeStr, AttSrcAsm, IntelSrcAsm, Pattern, [], [],
385                          "">;
386
387// Instruction with mask that puts result in mask register,
388// like "compare" and "vptest"
389multiclass AVX512_maskable_custom_cmp<bits<8> O, Format F,
390                                  dag Outs,
391                                  dag Ins, dag MaskingIns,
392                                  string OpcodeStr,
393                                  string AttSrcAsm, string IntelSrcAsm,
394                                  list<dag> Pattern,
395                                  list<dag> MaskingPattern,
396                                  bit IsCommutable = 0> {
397    let isCommutable = IsCommutable in {
398    def NAME: AVX512<O, F, Outs, Ins,
399                       OpcodeStr#"\t{"#AttSrcAsm#", $dst|"#
400                                     "$dst, "#IntelSrcAsm#"}",
401                       Pattern>;
402
403    def NAME#k: AVX512<O, F, Outs, MaskingIns,
404                       OpcodeStr#"\t{"#AttSrcAsm#", $dst {${mask}}|"#
405                                     "$dst {${mask}}, "#IntelSrcAsm#"}",
406                       MaskingPattern>, EVEX_K;
407    }
408}
409
410multiclass AVX512_maskable_common_cmp<bits<8> O, Format F, X86VectorVTInfo _,
411                                  dag Outs,
412                                  dag Ins, dag MaskingIns,
413                                  string OpcodeStr,
414                                  string AttSrcAsm, string IntelSrcAsm,
415                                  dag RHS, dag MaskingRHS,
416                                  bit IsCommutable = 0> :
417  AVX512_maskable_custom_cmp<O, F, Outs, Ins, MaskingIns, OpcodeStr,
418                         AttSrcAsm, IntelSrcAsm,
419                         [(set _.KRC:$dst, RHS)],
420                         [(set _.KRC:$dst, MaskingRHS)], IsCommutable>;
421
422multiclass AVX512_maskable_cmp<bits<8> O, Format F, X86VectorVTInfo _,
423                           dag Outs, dag Ins, string OpcodeStr,
424                           string AttSrcAsm, string IntelSrcAsm,
425                           dag RHS, dag RHS_su, bit IsCommutable = 0> :
426   AVX512_maskable_common_cmp<O, F, _, Outs, Ins,
427                          !con((ins _.KRCWM:$mask), Ins),
428                          OpcodeStr, AttSrcAsm, IntelSrcAsm, RHS,
429                          (and _.KRCWM:$mask, RHS_su), IsCommutable>;
430
431// Used by conversion instructions.
432multiclass AVX512_maskable_cvt<bits<8> O, Format F, X86VectorVTInfo _,
433                                  dag Outs,
434                                  dag Ins, dag MaskingIns, dag ZeroMaskingIns,
435                                  string OpcodeStr,
436                                  string AttSrcAsm, string IntelSrcAsm,
437                                  dag RHS, dag MaskingRHS, dag ZeroMaskingRHS> :
438  AVX512_maskable_custom<O, F, Outs, Ins, MaskingIns, ZeroMaskingIns, OpcodeStr,
439                         AttSrcAsm, IntelSrcAsm,
440                         [(set _.RC:$dst, RHS)],
441                         [(set _.RC:$dst, MaskingRHS)],
442                         [(set _.RC:$dst, ZeroMaskingRHS)],
443                         "$src0 = $dst">;
444
445multiclass AVX512_maskable_fma<bits<8> O, Format F, X86VectorVTInfo _,
446                               dag Outs, dag NonTiedIns, string OpcodeStr,
447                               string AttSrcAsm, string IntelSrcAsm,
448                               dag RHS, dag MaskingRHS, bit IsCommutable,
449                               bit IsKCommutable> :
450   AVX512_maskable_custom<O, F, Outs,
451                          !con((ins _.RC:$src1), NonTiedIns),
452                          !con((ins _.RC:$src1, _.KRCWM:$mask), NonTiedIns),
453                          !con((ins _.RC:$src1, _.KRCWM:$mask), NonTiedIns),
454                          OpcodeStr, AttSrcAsm, IntelSrcAsm,
455                          [(set _.RC:$dst, RHS)],
456                          [(set _.RC:$dst,
457                            (vselect_mask _.KRCWM:$mask, MaskingRHS, _.RC:$src1))],
458                          [(set _.RC:$dst,
459                            (vselect_mask _.KRCWM:$mask, MaskingRHS, _.ImmAllZerosV))],
460                          "", IsCommutable, IsKCommutable>;
461
462// Alias instruction that maps zero vector to pxor / xorp* for AVX-512.
463// This is expanded by ExpandPostRAPseudos to an xorps / vxorps, and then
464// swizzled by ExecutionDomainFix to pxor.
465// We set canFoldAsLoad because this can be converted to a constant-pool
466// load of an all-zeros value if folding it would be beneficial.
467let isReMaterializable = 1, isAsCheapAsAMove = 1, canFoldAsLoad = 1,
468    isPseudo = 1, Predicates = [HasAVX512], SchedRW = [WriteZero] in {
469def AVX512_512_SET0 : I<0, Pseudo, (outs VR512:$dst), (ins), "",
470               [(set VR512:$dst, (v16i32 immAllZerosV))]>;
471def AVX512_512_SETALLONES : I<0, Pseudo, (outs VR512:$dst), (ins), "",
472               [(set VR512:$dst, (v16i32 immAllOnesV))]>;
473}
474
475let Predicates = [HasAVX512] in {
476def : Pat<(v64i8 immAllZerosV), (AVX512_512_SET0)>;
477def : Pat<(v32i16 immAllZerosV), (AVX512_512_SET0)>;
478def : Pat<(v8i64 immAllZerosV), (AVX512_512_SET0)>;
479def : Pat<(v32f16 immAllZerosV), (AVX512_512_SET0)>;
480def : Pat<(v16f32 immAllZerosV), (AVX512_512_SET0)>;
481def : Pat<(v8f64 immAllZerosV), (AVX512_512_SET0)>;
482}
483
484// Alias instructions that allow VPTERNLOG to be used with a mask to create
485// a mix of all ones and all zeros elements. This is done this way to force
486// the same register to be used as input for all three sources.
487let isPseudo = 1, Predicates = [HasAVX512], SchedRW = [WriteVecALU] in {
488def AVX512_512_SEXT_MASK_32 : I<0, Pseudo, (outs VR512:$dst),
489                                (ins VK16WM:$mask), "",
490                           [(set VR512:$dst, (vselect (v16i1 VK16WM:$mask),
491                                                      (v16i32 immAllOnesV),
492                                                      (v16i32 immAllZerosV)))]>;
493def AVX512_512_SEXT_MASK_64 : I<0, Pseudo, (outs VR512:$dst),
494                                (ins VK8WM:$mask), "",
495                [(set VR512:$dst, (vselect (v8i1 VK8WM:$mask),
496                                           (v8i64 immAllOnesV),
497                                           (v8i64 immAllZerosV)))]>;
498}
499
500let isReMaterializable = 1, isAsCheapAsAMove = 1, canFoldAsLoad = 1,
501    isPseudo = 1, Predicates = [HasAVX512], SchedRW = [WriteZero] in {
502def AVX512_128_SET0 : I<0, Pseudo, (outs VR128X:$dst), (ins), "",
503               [(set VR128X:$dst, (v4i32 immAllZerosV))]>;
504def AVX512_256_SET0 : I<0, Pseudo, (outs VR256X:$dst), (ins), "",
505               [(set VR256X:$dst, (v8i32 immAllZerosV))]>;
506}
507
508let Predicates = [HasAVX512] in {
509def : Pat<(v8i16 immAllZerosV), (AVX512_128_SET0)>;
510def : Pat<(v16i8 immAllZerosV), (AVX512_128_SET0)>;
511def : Pat<(v2i64 immAllZerosV), (AVX512_128_SET0)>;
512def : Pat<(v8f16 immAllZerosV), (AVX512_128_SET0)>;
513def : Pat<(v4f32 immAllZerosV), (AVX512_128_SET0)>;
514def : Pat<(v2f64 immAllZerosV), (AVX512_128_SET0)>;
515def : Pat<(v32i8 immAllZerosV), (AVX512_256_SET0)>;
516def : Pat<(v16i16 immAllZerosV), (AVX512_256_SET0)>;
517def : Pat<(v4i64 immAllZerosV), (AVX512_256_SET0)>;
518def : Pat<(v16f16 immAllZerosV), (AVX512_256_SET0)>;
519def : Pat<(v8f32 immAllZerosV), (AVX512_256_SET0)>;
520def : Pat<(v4f64 immAllZerosV), (AVX512_256_SET0)>;
521}
522
523// Alias instructions that map fld0 to xorps for sse or vxorps for avx.
524// This is expanded by ExpandPostRAPseudos.
525let isReMaterializable = 1, isAsCheapAsAMove = 1, canFoldAsLoad = 1,
526    isPseudo = 1, SchedRW = [WriteZero], Predicates = [HasAVX512] in {
527  def AVX512_FsFLD0SH : I<0, Pseudo, (outs FR16X:$dst), (ins), "",
528                          [(set FR16X:$dst, fp16imm0)]>;
529  def AVX512_FsFLD0SS : I<0, Pseudo, (outs FR32X:$dst), (ins), "",
530                          [(set FR32X:$dst, fp32imm0)]>;
531  def AVX512_FsFLD0SD : I<0, Pseudo, (outs FR64X:$dst), (ins), "",
532                          [(set FR64X:$dst, fp64imm0)]>;
533  def AVX512_FsFLD0F128 : I<0, Pseudo, (outs VR128X:$dst), (ins), "",
534                            [(set VR128X:$dst, fp128imm0)]>;
535}
536
537//===----------------------------------------------------------------------===//
538// AVX-512 - VECTOR INSERT
539//
540
541// Supports two different pattern operators for mask and unmasked ops. Allows
542// null_frag to be passed for one.
543multiclass vinsert_for_size_split<int Opcode, X86VectorVTInfo From,
544                                  X86VectorVTInfo To,
545                                  SDPatternOperator vinsert_insert,
546                                  SDPatternOperator vinsert_for_mask,
547                                  X86FoldableSchedWrite sched> {
548  let hasSideEffects = 0, ExeDomain = To.ExeDomain in {
549    defm rr : AVX512_maskable_split<Opcode, MRMSrcReg, To, (outs To.RC:$dst),
550                   (ins To.RC:$src1, From.RC:$src2, u8imm:$src3),
551                   "vinsert" # From.EltTypeName # "x" # From.NumElts,
552                   "$src3, $src2, $src1", "$src1, $src2, $src3",
553                   (vinsert_insert:$src3 (To.VT To.RC:$src1),
554                                         (From.VT From.RC:$src2),
555                                         (iPTR imm)),
556                   (vinsert_for_mask:$src3 (To.VT To.RC:$src1),
557                                           (From.VT From.RC:$src2),
558                                           (iPTR imm))>,
559                   AVX512AIi8Base, EVEX_4V, Sched<[sched]>;
560    let mayLoad = 1 in
561    defm rm : AVX512_maskable_split<Opcode, MRMSrcMem, To, (outs To.RC:$dst),
562                   (ins To.RC:$src1, From.MemOp:$src2, u8imm:$src3),
563                   "vinsert" # From.EltTypeName # "x" # From.NumElts,
564                   "$src3, $src2, $src1", "$src1, $src2, $src3",
565                   (vinsert_insert:$src3 (To.VT To.RC:$src1),
566                               (From.VT (From.LdFrag addr:$src2)),
567                               (iPTR imm)),
568                   (vinsert_for_mask:$src3 (To.VT To.RC:$src1),
569                               (From.VT (From.LdFrag addr:$src2)),
570                               (iPTR imm))>, AVX512AIi8Base, EVEX_4V,
571                   EVEX_CD8<From.EltSize, From.CD8TupleForm>,
572                   Sched<[sched.Folded, sched.ReadAfterFold]>;
573  }
574}
575
576// Passes the same pattern operator for masked and unmasked ops.
577multiclass vinsert_for_size<int Opcode, X86VectorVTInfo From,
578                            X86VectorVTInfo To,
579                            SDPatternOperator vinsert_insert,
580                            X86FoldableSchedWrite sched> :
581  vinsert_for_size_split<Opcode, From, To, vinsert_insert, vinsert_insert, sched>;
582
583multiclass vinsert_for_size_lowering<string InstrStr, X86VectorVTInfo From,
584                       X86VectorVTInfo To, PatFrag vinsert_insert,
585                       SDNodeXForm INSERT_get_vinsert_imm , list<Predicate> p> {
586  let Predicates = p in {
587    def : Pat<(vinsert_insert:$ins
588                     (To.VT To.RC:$src1), (From.VT From.RC:$src2), (iPTR imm)),
589              (To.VT (!cast<Instruction>(InstrStr#"rr")
590                     To.RC:$src1, From.RC:$src2,
591                     (INSERT_get_vinsert_imm To.RC:$ins)))>;
592
593    def : Pat<(vinsert_insert:$ins
594                  (To.VT To.RC:$src1),
595                  (From.VT (From.LdFrag addr:$src2)),
596                  (iPTR imm)),
597              (To.VT (!cast<Instruction>(InstrStr#"rm")
598                  To.RC:$src1, addr:$src2,
599                  (INSERT_get_vinsert_imm To.RC:$ins)))>;
600  }
601}
602
603multiclass vinsert_for_type<ValueType EltVT32, int Opcode128,
604                            ValueType EltVT64, int Opcode256,
605                            X86FoldableSchedWrite sched> {
606
607  let Predicates = [HasVLX] in
608    defm NAME # "32x4Z256" : vinsert_for_size<Opcode128,
609                                 X86VectorVTInfo< 4, EltVT32, VR128X>,
610                                 X86VectorVTInfo< 8, EltVT32, VR256X>,
611                                 vinsert128_insert, sched>, EVEX_V256;
612
613  defm NAME # "32x4Z" : vinsert_for_size<Opcode128,
614                                 X86VectorVTInfo< 4, EltVT32, VR128X>,
615                                 X86VectorVTInfo<16, EltVT32, VR512>,
616                                 vinsert128_insert, sched>, EVEX_V512;
617
618  defm NAME # "64x4Z" : vinsert_for_size<Opcode256,
619                                 X86VectorVTInfo< 4, EltVT64, VR256X>,
620                                 X86VectorVTInfo< 8, EltVT64, VR512>,
621                                 vinsert256_insert, sched>, VEX_W, EVEX_V512;
622
623  // Even with DQI we'd like to only use these instructions for masking.
624  let Predicates = [HasVLX, HasDQI] in
625    defm NAME # "64x2Z256" : vinsert_for_size_split<Opcode128,
626                                   X86VectorVTInfo< 2, EltVT64, VR128X>,
627                                   X86VectorVTInfo< 4, EltVT64, VR256X>,
628                                   null_frag, vinsert128_insert, sched>,
629                                   VEX_W1X, EVEX_V256;
630
631  // Even with DQI we'd like to only use these instructions for masking.
632  let Predicates = [HasDQI] in {
633    defm NAME # "64x2Z" : vinsert_for_size_split<Opcode128,
634                                 X86VectorVTInfo< 2, EltVT64, VR128X>,
635                                 X86VectorVTInfo< 8, EltVT64, VR512>,
636                                 null_frag, vinsert128_insert, sched>,
637                                 VEX_W, EVEX_V512;
638
639    defm NAME # "32x8Z" : vinsert_for_size_split<Opcode256,
640                                   X86VectorVTInfo< 8, EltVT32, VR256X>,
641                                   X86VectorVTInfo<16, EltVT32, VR512>,
642                                   null_frag, vinsert256_insert, sched>,
643                                   EVEX_V512;
644  }
645}
646
647// FIXME: Is there a better scheduler class for VINSERTF/VINSERTI?
648defm VINSERTF : vinsert_for_type<f32, 0x18, f64, 0x1a, WriteFShuffle256>;
649defm VINSERTI : vinsert_for_type<i32, 0x38, i64, 0x3a, WriteShuffle256>;
650
651// Codegen pattern with the alternative types,
652// Even with AVX512DQ we'll still use these for unmasked operations.
653defm : vinsert_for_size_lowering<"VINSERTF32x4Z256", v2f64x_info, v4f64x_info,
654              vinsert128_insert, INSERT_get_vinsert128_imm, [HasVLX]>;
655defm : vinsert_for_size_lowering<"VINSERTI32x4Z256", v2i64x_info, v4i64x_info,
656              vinsert128_insert, INSERT_get_vinsert128_imm, [HasVLX]>;
657
658defm : vinsert_for_size_lowering<"VINSERTF32x4Z", v2f64x_info, v8f64_info,
659              vinsert128_insert, INSERT_get_vinsert128_imm, [HasAVX512]>;
660defm : vinsert_for_size_lowering<"VINSERTI32x4Z", v2i64x_info, v8i64_info,
661              vinsert128_insert, INSERT_get_vinsert128_imm, [HasAVX512]>;
662
663defm : vinsert_for_size_lowering<"VINSERTF64x4Z", v8f32x_info, v16f32_info,
664              vinsert256_insert, INSERT_get_vinsert256_imm, [HasAVX512]>;
665defm : vinsert_for_size_lowering<"VINSERTI64x4Z", v8i32x_info, v16i32_info,
666              vinsert256_insert, INSERT_get_vinsert256_imm, [HasAVX512]>;
667
668// Codegen pattern with the alternative types insert VEC128 into VEC256
669defm : vinsert_for_size_lowering<"VINSERTI32x4Z256", v8i16x_info, v16i16x_info,
670              vinsert128_insert, INSERT_get_vinsert128_imm, [HasVLX]>;
671defm : vinsert_for_size_lowering<"VINSERTI32x4Z256", v16i8x_info, v32i8x_info,
672              vinsert128_insert, INSERT_get_vinsert128_imm, [HasVLX]>;
673defm : vinsert_for_size_lowering<"VINSERTF32x4Z256", v8f16x_info, v16f16x_info,
674              vinsert128_insert, INSERT_get_vinsert128_imm, [HasVLX]>;
675// Codegen pattern with the alternative types insert VEC128 into VEC512
676defm : vinsert_for_size_lowering<"VINSERTI32x4Z", v8i16x_info, v32i16_info,
677              vinsert128_insert, INSERT_get_vinsert128_imm, [HasAVX512]>;
678defm : vinsert_for_size_lowering<"VINSERTI32x4Z", v16i8x_info, v64i8_info,
679               vinsert128_insert, INSERT_get_vinsert128_imm, [HasAVX512]>;
680defm : vinsert_for_size_lowering<"VINSERTF32x4Z", v8f16x_info, v32f16_info,
681              vinsert128_insert, INSERT_get_vinsert128_imm, [HasAVX512]>;
682// Codegen pattern with the alternative types insert VEC256 into VEC512
683defm : vinsert_for_size_lowering<"VINSERTI64x4Z", v16i16x_info, v32i16_info,
684              vinsert256_insert, INSERT_get_vinsert256_imm, [HasAVX512]>;
685defm : vinsert_for_size_lowering<"VINSERTI64x4Z", v32i8x_info, v64i8_info,
686              vinsert256_insert, INSERT_get_vinsert256_imm, [HasAVX512]>;
687defm : vinsert_for_size_lowering<"VINSERTF64x4Z", v16f16x_info, v32f16_info,
688              vinsert256_insert, INSERT_get_vinsert256_imm, [HasAVX512]>;
689
690
691multiclass vinsert_for_mask_cast<string InstrStr, X86VectorVTInfo From,
692                                 X86VectorVTInfo To, X86VectorVTInfo Cast,
693                                 PatFrag vinsert_insert,
694                                 SDNodeXForm INSERT_get_vinsert_imm,
695                                 list<Predicate> p> {
696let Predicates = p in {
697  def : Pat<(Cast.VT
698             (vselect_mask Cast.KRCWM:$mask,
699                           (bitconvert
700                            (vinsert_insert:$ins (To.VT To.RC:$src1),
701                                                 (From.VT From.RC:$src2),
702                                                 (iPTR imm))),
703                           Cast.RC:$src0)),
704            (!cast<Instruction>(InstrStr#"rrk")
705             Cast.RC:$src0, Cast.KRCWM:$mask, To.RC:$src1, From.RC:$src2,
706             (INSERT_get_vinsert_imm To.RC:$ins))>;
707  def : Pat<(Cast.VT
708             (vselect_mask Cast.KRCWM:$mask,
709                           (bitconvert
710                            (vinsert_insert:$ins (To.VT To.RC:$src1),
711                                                 (From.VT
712                                                  (bitconvert
713                                                   (From.LdFrag addr:$src2))),
714                                                 (iPTR imm))),
715                           Cast.RC:$src0)),
716            (!cast<Instruction>(InstrStr#"rmk")
717             Cast.RC:$src0, Cast.KRCWM:$mask, To.RC:$src1, addr:$src2,
718             (INSERT_get_vinsert_imm To.RC:$ins))>;
719
720  def : Pat<(Cast.VT
721             (vselect_mask Cast.KRCWM:$mask,
722                           (bitconvert
723                            (vinsert_insert:$ins (To.VT To.RC:$src1),
724                                                 (From.VT From.RC:$src2),
725                                                 (iPTR imm))),
726                           Cast.ImmAllZerosV)),
727            (!cast<Instruction>(InstrStr#"rrkz")
728             Cast.KRCWM:$mask, To.RC:$src1, From.RC:$src2,
729             (INSERT_get_vinsert_imm To.RC:$ins))>;
730  def : Pat<(Cast.VT
731             (vselect_mask Cast.KRCWM:$mask,
732                           (bitconvert
733                            (vinsert_insert:$ins (To.VT To.RC:$src1),
734                                                 (From.VT (From.LdFrag addr:$src2)),
735                                                 (iPTR imm))),
736                           Cast.ImmAllZerosV)),
737            (!cast<Instruction>(InstrStr#"rmkz")
738             Cast.KRCWM:$mask, To.RC:$src1, addr:$src2,
739             (INSERT_get_vinsert_imm To.RC:$ins))>;
740}
741}
742
743defm : vinsert_for_mask_cast<"VINSERTF32x4Z256", v2f64x_info, v4f64x_info,
744                             v8f32x_info, vinsert128_insert,
745                             INSERT_get_vinsert128_imm, [HasVLX]>;
746defm : vinsert_for_mask_cast<"VINSERTF64x2Z256", v4f32x_info, v8f32x_info,
747                             v4f64x_info, vinsert128_insert,
748                             INSERT_get_vinsert128_imm, [HasDQI, HasVLX]>;
749
750defm : vinsert_for_mask_cast<"VINSERTI32x4Z256", v2i64x_info, v4i64x_info,
751                             v8i32x_info, vinsert128_insert,
752                             INSERT_get_vinsert128_imm, [HasVLX]>;
753defm : vinsert_for_mask_cast<"VINSERTI32x4Z256", v8i16x_info, v16i16x_info,
754                             v8i32x_info, vinsert128_insert,
755                             INSERT_get_vinsert128_imm, [HasVLX]>;
756defm : vinsert_for_mask_cast<"VINSERTI32x4Z256", v16i8x_info, v32i8x_info,
757                             v8i32x_info, vinsert128_insert,
758                             INSERT_get_vinsert128_imm, [HasVLX]>;
759defm : vinsert_for_mask_cast<"VINSERTF64x2Z256", v4i32x_info, v8i32x_info,
760                             v4i64x_info, vinsert128_insert,
761                             INSERT_get_vinsert128_imm, [HasDQI, HasVLX]>;
762defm : vinsert_for_mask_cast<"VINSERTF64x2Z256", v8i16x_info, v16i16x_info,
763                             v4i64x_info, vinsert128_insert,
764                             INSERT_get_vinsert128_imm, [HasDQI, HasVLX]>;
765defm : vinsert_for_mask_cast<"VINSERTF64x2Z256", v16i8x_info, v32i8x_info,
766                             v4i64x_info, vinsert128_insert,
767                             INSERT_get_vinsert128_imm, [HasDQI, HasVLX]>;
768
769defm : vinsert_for_mask_cast<"VINSERTF32x4Z", v2f64x_info, v8f64_info,
770                             v16f32_info, vinsert128_insert,
771                             INSERT_get_vinsert128_imm, [HasAVX512]>;
772defm : vinsert_for_mask_cast<"VINSERTF64x2Z", v4f32x_info, v16f32_info,
773                             v8f64_info, vinsert128_insert,
774                             INSERT_get_vinsert128_imm, [HasDQI]>;
775
776defm : vinsert_for_mask_cast<"VINSERTI32x4Z", v2i64x_info, v8i64_info,
777                             v16i32_info, vinsert128_insert,
778                             INSERT_get_vinsert128_imm, [HasAVX512]>;
779defm : vinsert_for_mask_cast<"VINSERTI32x4Z", v8i16x_info, v32i16_info,
780                             v16i32_info, vinsert128_insert,
781                             INSERT_get_vinsert128_imm, [HasAVX512]>;
782defm : vinsert_for_mask_cast<"VINSERTI32x4Z", v16i8x_info, v64i8_info,
783                             v16i32_info, vinsert128_insert,
784                             INSERT_get_vinsert128_imm, [HasAVX512]>;
785defm : vinsert_for_mask_cast<"VINSERTI64x2Z", v4i32x_info, v16i32_info,
786                             v8i64_info, vinsert128_insert,
787                             INSERT_get_vinsert128_imm, [HasDQI]>;
788defm : vinsert_for_mask_cast<"VINSERTI64x2Z", v8i16x_info, v32i16_info,
789                             v8i64_info, vinsert128_insert,
790                             INSERT_get_vinsert128_imm, [HasDQI]>;
791defm : vinsert_for_mask_cast<"VINSERTI64x2Z", v16i8x_info, v64i8_info,
792                             v8i64_info, vinsert128_insert,
793                             INSERT_get_vinsert128_imm, [HasDQI]>;
794
795defm : vinsert_for_mask_cast<"VINSERTF32x8Z", v4f64x_info, v8f64_info,
796                             v16f32_info, vinsert256_insert,
797                             INSERT_get_vinsert256_imm, [HasDQI]>;
798defm : vinsert_for_mask_cast<"VINSERTF64x4Z", v8f32x_info, v16f32_info,
799                             v8f64_info, vinsert256_insert,
800                             INSERT_get_vinsert256_imm, [HasAVX512]>;
801
802defm : vinsert_for_mask_cast<"VINSERTI32x8Z", v4i64x_info, v8i64_info,
803                             v16i32_info, vinsert256_insert,
804                             INSERT_get_vinsert256_imm, [HasDQI]>;
805defm : vinsert_for_mask_cast<"VINSERTI32x8Z", v16i16x_info, v32i16_info,
806                             v16i32_info, vinsert256_insert,
807                             INSERT_get_vinsert256_imm, [HasDQI]>;
808defm : vinsert_for_mask_cast<"VINSERTI32x8Z", v32i8x_info, v64i8_info,
809                             v16i32_info, vinsert256_insert,
810                             INSERT_get_vinsert256_imm, [HasDQI]>;
811defm : vinsert_for_mask_cast<"VINSERTI64x4Z", v8i32x_info, v16i32_info,
812                             v8i64_info, vinsert256_insert,
813                             INSERT_get_vinsert256_imm, [HasAVX512]>;
814defm : vinsert_for_mask_cast<"VINSERTI64x4Z", v16i16x_info, v32i16_info,
815                             v8i64_info, vinsert256_insert,
816                             INSERT_get_vinsert256_imm, [HasAVX512]>;
817defm : vinsert_for_mask_cast<"VINSERTI64x4Z", v32i8x_info, v64i8_info,
818                             v8i64_info, vinsert256_insert,
819                             INSERT_get_vinsert256_imm, [HasAVX512]>;
820
821// vinsertps - insert f32 to XMM
822let ExeDomain = SSEPackedSingle in {
823let isCommutable = 1 in
824def VINSERTPSZrr : AVX512AIi8<0x21, MRMSrcReg, (outs VR128X:$dst),
825      (ins VR128X:$src1, VR128X:$src2, u8imm:$src3),
826      "vinsertps\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}",
827      [(set VR128X:$dst, (X86insertps VR128X:$src1, VR128X:$src2, timm:$src3))]>,
828      EVEX_4V, Sched<[SchedWriteFShuffle.XMM]>;
829def VINSERTPSZrm: AVX512AIi8<0x21, MRMSrcMem, (outs VR128X:$dst),
830      (ins VR128X:$src1, f32mem:$src2, u8imm:$src3),
831      "vinsertps\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}",
832      [(set VR128X:$dst, (X86insertps VR128X:$src1,
833                          (v4f32 (scalar_to_vector (loadf32 addr:$src2))),
834                          timm:$src3))]>,
835      EVEX_4V, EVEX_CD8<32, CD8VT1>,
836      Sched<[SchedWriteFShuffle.XMM.Folded, SchedWriteFShuffle.XMM.ReadAfterFold]>;
837}
838
839//===----------------------------------------------------------------------===//
840// AVX-512 VECTOR EXTRACT
841//---
842
843// Supports two different pattern operators for mask and unmasked ops. Allows
844// null_frag to be passed for one.
845multiclass vextract_for_size_split<int Opcode,
846                                   X86VectorVTInfo From, X86VectorVTInfo To,
847                                   SDPatternOperator vextract_extract,
848                                   SDPatternOperator vextract_for_mask,
849                                   SchedWrite SchedRR, SchedWrite SchedMR> {
850
851  let hasSideEffects = 0, ExeDomain = To.ExeDomain in {
852    defm rr : AVX512_maskable_split<Opcode, MRMDestReg, To, (outs To.RC:$dst),
853                (ins From.RC:$src1, u8imm:$idx),
854                "vextract" # To.EltTypeName # "x" # To.NumElts,
855                "$idx, $src1", "$src1, $idx",
856                (vextract_extract:$idx (From.VT From.RC:$src1), (iPTR imm)),
857                (vextract_for_mask:$idx (From.VT From.RC:$src1), (iPTR imm))>,
858                AVX512AIi8Base, EVEX, Sched<[SchedRR]>;
859
860    def mr  : AVX512AIi8<Opcode, MRMDestMem, (outs),
861                    (ins To.MemOp:$dst, From.RC:$src1, u8imm:$idx),
862                    "vextract" # To.EltTypeName # "x" # To.NumElts #
863                        "\t{$idx, $src1, $dst|$dst, $src1, $idx}",
864                    [(store (To.VT (vextract_extract:$idx
865                                    (From.VT From.RC:$src1), (iPTR imm))),
866                             addr:$dst)]>, EVEX,
867                    Sched<[SchedMR]>;
868
869    let mayStore = 1, hasSideEffects = 0 in
870    def mrk : AVX512AIi8<Opcode, MRMDestMem, (outs),
871                    (ins To.MemOp:$dst, To.KRCWM:$mask,
872                                        From.RC:$src1, u8imm:$idx),
873                     "vextract" # To.EltTypeName # "x" # To.NumElts #
874                          "\t{$idx, $src1, $dst {${mask}}|"
875                          "$dst {${mask}}, $src1, $idx}", []>,
876                    EVEX_K, EVEX, Sched<[SchedMR]>, NotMemoryFoldable;
877  }
878}
879
880// Passes the same pattern operator for masked and unmasked ops.
881multiclass vextract_for_size<int Opcode, X86VectorVTInfo From,
882                             X86VectorVTInfo To,
883                             SDPatternOperator vextract_extract,
884                             SchedWrite SchedRR, SchedWrite SchedMR> :
885  vextract_for_size_split<Opcode, From, To, vextract_extract, vextract_extract, SchedRR, SchedMR>;
886
887// Codegen pattern for the alternative types
888multiclass vextract_for_size_lowering<string InstrStr, X86VectorVTInfo From,
889                X86VectorVTInfo To, PatFrag vextract_extract,
890                SDNodeXForm EXTRACT_get_vextract_imm, list<Predicate> p> {
891  let Predicates = p in {
892     def : Pat<(vextract_extract:$ext (From.VT From.RC:$src1), (iPTR imm)),
893               (To.VT (!cast<Instruction>(InstrStr#"rr")
894                          From.RC:$src1,
895                          (EXTRACT_get_vextract_imm To.RC:$ext)))>;
896     def : Pat<(store (To.VT (vextract_extract:$ext (From.VT From.RC:$src1),
897                              (iPTR imm))), addr:$dst),
898               (!cast<Instruction>(InstrStr#"mr") addr:$dst, From.RC:$src1,
899                (EXTRACT_get_vextract_imm To.RC:$ext))>;
900  }
901}
902
903multiclass vextract_for_type<ValueType EltVT32, int Opcode128,
904                             ValueType EltVT64, int Opcode256,
905                             SchedWrite SchedRR, SchedWrite SchedMR> {
906  let Predicates = [HasAVX512] in {
907    defm NAME # "32x4Z" : vextract_for_size<Opcode128,
908                                   X86VectorVTInfo<16, EltVT32, VR512>,
909                                   X86VectorVTInfo< 4, EltVT32, VR128X>,
910                                   vextract128_extract, SchedRR, SchedMR>,
911                                       EVEX_V512, EVEX_CD8<32, CD8VT4>;
912    defm NAME # "64x4Z" : vextract_for_size<Opcode256,
913                                   X86VectorVTInfo< 8, EltVT64, VR512>,
914                                   X86VectorVTInfo< 4, EltVT64, VR256X>,
915                                   vextract256_extract, SchedRR, SchedMR>,
916                                       VEX_W, EVEX_V512, EVEX_CD8<64, CD8VT4>;
917  }
918  let Predicates = [HasVLX] in
919    defm NAME # "32x4Z256" : vextract_for_size<Opcode128,
920                                 X86VectorVTInfo< 8, EltVT32, VR256X>,
921                                 X86VectorVTInfo< 4, EltVT32, VR128X>,
922                                 vextract128_extract, SchedRR, SchedMR>,
923                                     EVEX_V256, EVEX_CD8<32, CD8VT4>;
924
925  // Even with DQI we'd like to only use these instructions for masking.
926  let Predicates = [HasVLX, HasDQI] in
927    defm NAME # "64x2Z256" : vextract_for_size_split<Opcode128,
928                                 X86VectorVTInfo< 4, EltVT64, VR256X>,
929                                 X86VectorVTInfo< 2, EltVT64, VR128X>,
930                                 null_frag, vextract128_extract, SchedRR, SchedMR>,
931                                     VEX_W1X, EVEX_V256, EVEX_CD8<64, CD8VT2>;
932
933  // Even with DQI we'd like to only use these instructions for masking.
934  let Predicates = [HasDQI] in {
935    defm NAME # "64x2Z" : vextract_for_size_split<Opcode128,
936                                 X86VectorVTInfo< 8, EltVT64, VR512>,
937                                 X86VectorVTInfo< 2, EltVT64, VR128X>,
938                                 null_frag, vextract128_extract, SchedRR, SchedMR>,
939                                     VEX_W, EVEX_V512, EVEX_CD8<64, CD8VT2>;
940    defm NAME # "32x8Z" : vextract_for_size_split<Opcode256,
941                                 X86VectorVTInfo<16, EltVT32, VR512>,
942                                 X86VectorVTInfo< 8, EltVT32, VR256X>,
943                                 null_frag, vextract256_extract, SchedRR, SchedMR>,
944                                     EVEX_V512, EVEX_CD8<32, CD8VT8>;
945  }
946}
947
948// TODO - replace WriteFStore/WriteVecStore with X86SchedWriteMoveLSWidths types.
949defm VEXTRACTF : vextract_for_type<f32, 0x19, f64, 0x1b, WriteFShuffle256, WriteFStore>;
950defm VEXTRACTI : vextract_for_type<i32, 0x39, i64, 0x3b, WriteShuffle256, WriteVecStore>;
951
952// extract_subvector codegen patterns with the alternative types.
953// Even with AVX512DQ we'll still use these for unmasked operations.
954defm : vextract_for_size_lowering<"VEXTRACTF32x4Z", v8f64_info, v2f64x_info,
955          vextract128_extract, EXTRACT_get_vextract128_imm, [HasAVX512]>;
956defm : vextract_for_size_lowering<"VEXTRACTI32x4Z", v8i64_info, v2i64x_info,
957          vextract128_extract, EXTRACT_get_vextract128_imm, [HasAVX512]>;
958
959defm : vextract_for_size_lowering<"VEXTRACTF64x4Z", v16f32_info, v8f32x_info,
960          vextract256_extract, EXTRACT_get_vextract256_imm, [HasAVX512]>;
961defm : vextract_for_size_lowering<"VEXTRACTI64x4Z", v16i32_info, v8i32x_info,
962          vextract256_extract, EXTRACT_get_vextract256_imm, [HasAVX512]>;
963
964defm : vextract_for_size_lowering<"VEXTRACTF32x4Z256", v4f64x_info, v2f64x_info,
965          vextract128_extract, EXTRACT_get_vextract128_imm, [HasVLX]>;
966defm : vextract_for_size_lowering<"VEXTRACTI32x4Z256", v4i64x_info, v2i64x_info,
967          vextract128_extract, EXTRACT_get_vextract128_imm, [HasVLX]>;
968
969// Codegen pattern with the alternative types extract VEC128 from VEC256
970defm : vextract_for_size_lowering<"VEXTRACTI32x4Z256", v16i16x_info, v8i16x_info,
971          vextract128_extract, EXTRACT_get_vextract128_imm, [HasVLX]>;
972defm : vextract_for_size_lowering<"VEXTRACTI32x4Z256", v32i8x_info, v16i8x_info,
973          vextract128_extract, EXTRACT_get_vextract128_imm, [HasVLX]>;
974defm : vextract_for_size_lowering<"VEXTRACTF32x4Z256", v16f16x_info, v8f16x_info,
975          vextract128_extract, EXTRACT_get_vextract128_imm, [HasVLX]>;
976
977// Codegen pattern with the alternative types extract VEC128 from VEC512
978defm : vextract_for_size_lowering<"VEXTRACTI32x4Z", v32i16_info, v8i16x_info,
979                 vextract128_extract, EXTRACT_get_vextract128_imm, [HasAVX512]>;
980defm : vextract_for_size_lowering<"VEXTRACTI32x4Z", v64i8_info, v16i8x_info,
981                 vextract128_extract, EXTRACT_get_vextract128_imm, [HasAVX512]>;
982defm : vextract_for_size_lowering<"VEXTRACTF32x4Z", v32f16_info, v8f16x_info,
983                 vextract128_extract, EXTRACT_get_vextract128_imm, [HasAVX512]>;
984// Codegen pattern with the alternative types extract VEC256 from VEC512
985defm : vextract_for_size_lowering<"VEXTRACTI64x4Z", v32i16_info, v16i16x_info,
986                 vextract256_extract, EXTRACT_get_vextract256_imm, [HasAVX512]>;
987defm : vextract_for_size_lowering<"VEXTRACTI64x4Z", v64i8_info, v32i8x_info,
988                 vextract256_extract, EXTRACT_get_vextract256_imm, [HasAVX512]>;
989defm : vextract_for_size_lowering<"VEXTRACTF64x4Z", v32f16_info, v16f16x_info,
990                 vextract256_extract, EXTRACT_get_vextract256_imm, [HasAVX512]>;
991
992
993// A 128-bit extract from bits [255:128] of a 512-bit vector should use a
994// smaller extract to enable EVEX->VEX.
995let Predicates = [NoVLX] in {
996def : Pat<(v2i64 (extract_subvector (v8i64 VR512:$src), (iPTR 2))),
997          (v2i64 (VEXTRACTI128rr
998                  (v4i64 (EXTRACT_SUBREG (v8i64 VR512:$src), sub_ymm)),
999                  (iPTR 1)))>;
1000def : Pat<(v2f64 (extract_subvector (v8f64 VR512:$src), (iPTR 2))),
1001          (v2f64 (VEXTRACTF128rr
1002                  (v4f64 (EXTRACT_SUBREG (v8f64 VR512:$src), sub_ymm)),
1003                  (iPTR 1)))>;
1004def : Pat<(v4i32 (extract_subvector (v16i32 VR512:$src), (iPTR 4))),
1005          (v4i32 (VEXTRACTI128rr
1006                  (v8i32 (EXTRACT_SUBREG (v16i32 VR512:$src), sub_ymm)),
1007                  (iPTR 1)))>;
1008def : Pat<(v4f32 (extract_subvector (v16f32 VR512:$src), (iPTR 4))),
1009          (v4f32 (VEXTRACTF128rr
1010                  (v8f32 (EXTRACT_SUBREG (v16f32 VR512:$src), sub_ymm)),
1011                  (iPTR 1)))>;
1012def : Pat<(v8i16 (extract_subvector (v32i16 VR512:$src), (iPTR 8))),
1013          (v8i16 (VEXTRACTI128rr
1014                  (v16i16 (EXTRACT_SUBREG (v32i16 VR512:$src), sub_ymm)),
1015                  (iPTR 1)))>;
1016def : Pat<(v8f16 (extract_subvector (v32f16 VR512:$src), (iPTR 8))),
1017          (v8f16 (VEXTRACTF128rr
1018                  (v16f16 (EXTRACT_SUBREG (v32f16 VR512:$src), sub_ymm)),
1019                  (iPTR 1)))>;
1020def : Pat<(v16i8 (extract_subvector (v64i8 VR512:$src), (iPTR 16))),
1021          (v16i8 (VEXTRACTI128rr
1022                  (v32i8 (EXTRACT_SUBREG (v64i8 VR512:$src), sub_ymm)),
1023                  (iPTR 1)))>;
1024}
1025
1026// A 128-bit extract from bits [255:128] of a 512-bit vector should use a
1027// smaller extract to enable EVEX->VEX.
1028let Predicates = [HasVLX] in {
1029def : Pat<(v2i64 (extract_subvector (v8i64 VR512:$src), (iPTR 2))),
1030          (v2i64 (VEXTRACTI32x4Z256rr
1031                  (v4i64 (EXTRACT_SUBREG (v8i64 VR512:$src), sub_ymm)),
1032                  (iPTR 1)))>;
1033def : Pat<(v2f64 (extract_subvector (v8f64 VR512:$src), (iPTR 2))),
1034          (v2f64 (VEXTRACTF32x4Z256rr
1035                  (v4f64 (EXTRACT_SUBREG (v8f64 VR512:$src), sub_ymm)),
1036                  (iPTR 1)))>;
1037def : Pat<(v4i32 (extract_subvector (v16i32 VR512:$src), (iPTR 4))),
1038          (v4i32 (VEXTRACTI32x4Z256rr
1039                  (v8i32 (EXTRACT_SUBREG (v16i32 VR512:$src), sub_ymm)),
1040                  (iPTR 1)))>;
1041def : Pat<(v4f32 (extract_subvector (v16f32 VR512:$src), (iPTR 4))),
1042          (v4f32 (VEXTRACTF32x4Z256rr
1043                  (v8f32 (EXTRACT_SUBREG (v16f32 VR512:$src), sub_ymm)),
1044                  (iPTR 1)))>;
1045def : Pat<(v8i16 (extract_subvector (v32i16 VR512:$src), (iPTR 8))),
1046          (v8i16 (VEXTRACTI32x4Z256rr
1047                  (v16i16 (EXTRACT_SUBREG (v32i16 VR512:$src), sub_ymm)),
1048                  (iPTR 1)))>;
1049def : Pat<(v8f16 (extract_subvector (v32f16 VR512:$src), (iPTR 8))),
1050          (v8f16 (VEXTRACTF32x4Z256rr
1051                  (v16f16 (EXTRACT_SUBREG (v32f16 VR512:$src), sub_ymm)),
1052                  (iPTR 1)))>;
1053def : Pat<(v16i8 (extract_subvector (v64i8 VR512:$src), (iPTR 16))),
1054          (v16i8 (VEXTRACTI32x4Z256rr
1055                  (v32i8 (EXTRACT_SUBREG (v64i8 VR512:$src), sub_ymm)),
1056                  (iPTR 1)))>;
1057}
1058
1059
1060// Additional patterns for handling a bitcast between the vselect and the
1061// extract_subvector.
1062multiclass vextract_for_mask_cast<string InstrStr, X86VectorVTInfo From,
1063                                  X86VectorVTInfo To, X86VectorVTInfo Cast,
1064                                  PatFrag vextract_extract,
1065                                  SDNodeXForm EXTRACT_get_vextract_imm,
1066                                  list<Predicate> p> {
1067let Predicates = p in {
1068  def : Pat<(Cast.VT (vselect_mask Cast.KRCWM:$mask,
1069                                   (bitconvert
1070                                    (To.VT (vextract_extract:$ext
1071                                            (From.VT From.RC:$src), (iPTR imm)))),
1072                                   To.RC:$src0)),
1073            (Cast.VT (!cast<Instruction>(InstrStr#"rrk")
1074                      Cast.RC:$src0, Cast.KRCWM:$mask, From.RC:$src,
1075                      (EXTRACT_get_vextract_imm To.RC:$ext)))>;
1076
1077  def : Pat<(Cast.VT (vselect_mask Cast.KRCWM:$mask,
1078                                   (bitconvert
1079                                    (To.VT (vextract_extract:$ext
1080                                            (From.VT From.RC:$src), (iPTR imm)))),
1081                                   Cast.ImmAllZerosV)),
1082            (Cast.VT (!cast<Instruction>(InstrStr#"rrkz")
1083                      Cast.KRCWM:$mask, From.RC:$src,
1084                      (EXTRACT_get_vextract_imm To.RC:$ext)))>;
1085}
1086}
1087
1088defm : vextract_for_mask_cast<"VEXTRACTF32x4Z256", v4f64x_info, v2f64x_info,
1089                              v4f32x_info, vextract128_extract,
1090                              EXTRACT_get_vextract128_imm, [HasVLX]>;
1091defm : vextract_for_mask_cast<"VEXTRACTF64x2Z256", v8f32x_info, v4f32x_info,
1092                              v2f64x_info, vextract128_extract,
1093                              EXTRACT_get_vextract128_imm, [HasDQI, HasVLX]>;
1094
1095defm : vextract_for_mask_cast<"VEXTRACTI32x4Z256", v4i64x_info, v2i64x_info,
1096                              v4i32x_info, vextract128_extract,
1097                              EXTRACT_get_vextract128_imm, [HasVLX]>;
1098defm : vextract_for_mask_cast<"VEXTRACTI32x4Z256", v16i16x_info, v8i16x_info,
1099                              v4i32x_info, vextract128_extract,
1100                              EXTRACT_get_vextract128_imm, [HasVLX]>;
1101defm : vextract_for_mask_cast<"VEXTRACTI32x4Z256", v32i8x_info, v16i8x_info,
1102                              v4i32x_info, vextract128_extract,
1103                              EXTRACT_get_vextract128_imm, [HasVLX]>;
1104defm : vextract_for_mask_cast<"VEXTRACTI64x2Z256", v8i32x_info, v4i32x_info,
1105                              v2i64x_info, vextract128_extract,
1106                              EXTRACT_get_vextract128_imm, [HasDQI, HasVLX]>;
1107defm : vextract_for_mask_cast<"VEXTRACTI64x2Z256", v16i16x_info, v8i16x_info,
1108                              v2i64x_info, vextract128_extract,
1109                              EXTRACT_get_vextract128_imm, [HasDQI, HasVLX]>;
1110defm : vextract_for_mask_cast<"VEXTRACTI64x2Z256", v32i8x_info, v16i8x_info,
1111                              v2i64x_info, vextract128_extract,
1112                              EXTRACT_get_vextract128_imm, [HasDQI, HasVLX]>;
1113
1114defm : vextract_for_mask_cast<"VEXTRACTF32x4Z", v8f64_info, v2f64x_info,
1115                              v4f32x_info, vextract128_extract,
1116                              EXTRACT_get_vextract128_imm, [HasAVX512]>;
1117defm : vextract_for_mask_cast<"VEXTRACTF64x2Z", v16f32_info, v4f32x_info,
1118                              v2f64x_info, vextract128_extract,
1119                              EXTRACT_get_vextract128_imm, [HasDQI]>;
1120
1121defm : vextract_for_mask_cast<"VEXTRACTI32x4Z", v8i64_info, v2i64x_info,
1122                              v4i32x_info, vextract128_extract,
1123                              EXTRACT_get_vextract128_imm, [HasAVX512]>;
1124defm : vextract_for_mask_cast<"VEXTRACTI32x4Z", v32i16_info, v8i16x_info,
1125                              v4i32x_info, vextract128_extract,
1126                              EXTRACT_get_vextract128_imm, [HasAVX512]>;
1127defm : vextract_for_mask_cast<"VEXTRACTI32x4Z", v64i8_info, v16i8x_info,
1128                              v4i32x_info, vextract128_extract,
1129                              EXTRACT_get_vextract128_imm, [HasAVX512]>;
1130defm : vextract_for_mask_cast<"VEXTRACTI64x2Z", v16i32_info, v4i32x_info,
1131                              v2i64x_info, vextract128_extract,
1132                              EXTRACT_get_vextract128_imm, [HasDQI]>;
1133defm : vextract_for_mask_cast<"VEXTRACTI64x2Z", v32i16_info, v8i16x_info,
1134                              v2i64x_info, vextract128_extract,
1135                              EXTRACT_get_vextract128_imm, [HasDQI]>;
1136defm : vextract_for_mask_cast<"VEXTRACTI64x2Z", v64i8_info, v16i8x_info,
1137                              v2i64x_info, vextract128_extract,
1138                              EXTRACT_get_vextract128_imm, [HasDQI]>;
1139
1140defm : vextract_for_mask_cast<"VEXTRACTF32x8Z", v8f64_info, v4f64x_info,
1141                              v8f32x_info, vextract256_extract,
1142                              EXTRACT_get_vextract256_imm, [HasDQI]>;
1143defm : vextract_for_mask_cast<"VEXTRACTF64x4Z", v16f32_info, v8f32x_info,
1144                              v4f64x_info, vextract256_extract,
1145                              EXTRACT_get_vextract256_imm, [HasAVX512]>;
1146
1147defm : vextract_for_mask_cast<"VEXTRACTI32x8Z", v8i64_info, v4i64x_info,
1148                              v8i32x_info, vextract256_extract,
1149                              EXTRACT_get_vextract256_imm, [HasDQI]>;
1150defm : vextract_for_mask_cast<"VEXTRACTI32x8Z", v32i16_info, v16i16x_info,
1151                              v8i32x_info, vextract256_extract,
1152                              EXTRACT_get_vextract256_imm, [HasDQI]>;
1153defm : vextract_for_mask_cast<"VEXTRACTI32x8Z", v64i8_info, v32i8x_info,
1154                              v8i32x_info, vextract256_extract,
1155                              EXTRACT_get_vextract256_imm, [HasDQI]>;
1156defm : vextract_for_mask_cast<"VEXTRACTI64x4Z", v16i32_info, v8i32x_info,
1157                              v4i64x_info, vextract256_extract,
1158                              EXTRACT_get_vextract256_imm, [HasAVX512]>;
1159defm : vextract_for_mask_cast<"VEXTRACTI64x4Z", v32i16_info, v16i16x_info,
1160                              v4i64x_info, vextract256_extract,
1161                              EXTRACT_get_vextract256_imm, [HasAVX512]>;
1162defm : vextract_for_mask_cast<"VEXTRACTI64x4Z", v64i8_info, v32i8x_info,
1163                              v4i64x_info, vextract256_extract,
1164                              EXTRACT_get_vextract256_imm, [HasAVX512]>;
1165
1166// vextractps - extract 32 bits from XMM
1167def VEXTRACTPSZrr : AVX512AIi8<0x17, MRMDestReg, (outs GR32orGR64:$dst),
1168      (ins VR128X:$src1, u8imm:$src2),
1169      "vextractps\t{$src2, $src1, $dst|$dst, $src1, $src2}",
1170      [(set GR32orGR64:$dst, (extractelt (bc_v4i32 (v4f32 VR128X:$src1)), imm:$src2))]>,
1171      EVEX, VEX_WIG, Sched<[WriteVecExtract]>;
1172
1173def VEXTRACTPSZmr : AVX512AIi8<0x17, MRMDestMem, (outs),
1174      (ins f32mem:$dst, VR128X:$src1, u8imm:$src2),
1175      "vextractps\t{$src2, $src1, $dst|$dst, $src1, $src2}",
1176      [(store (extractelt (bc_v4i32 (v4f32 VR128X:$src1)), imm:$src2),
1177                          addr:$dst)]>,
1178      EVEX, VEX_WIG, EVEX_CD8<32, CD8VT1>, Sched<[WriteVecExtractSt]>;
1179
1180//===---------------------------------------------------------------------===//
1181// AVX-512 BROADCAST
1182//---
1183// broadcast with a scalar argument.
1184multiclass avx512_broadcast_scalar<string Name, X86VectorVTInfo DestInfo,
1185                                   X86VectorVTInfo SrcInfo> {
1186  def : Pat<(DestInfo.VT (X86VBroadcast SrcInfo.FRC:$src)),
1187            (!cast<Instruction>(Name#DestInfo.ZSuffix#rr)
1188             (SrcInfo.VT (COPY_TO_REGCLASS SrcInfo.FRC:$src, SrcInfo.RC)))>;
1189  def : Pat<(DestInfo.VT (vselect_mask DestInfo.KRCWM:$mask,
1190                                       (X86VBroadcast SrcInfo.FRC:$src),
1191                                       DestInfo.RC:$src0)),
1192            (!cast<Instruction>(Name#DestInfo.ZSuffix#rrk)
1193             DestInfo.RC:$src0, DestInfo.KRCWM:$mask,
1194             (SrcInfo.VT (COPY_TO_REGCLASS SrcInfo.FRC:$src, SrcInfo.RC)))>;
1195  def : Pat<(DestInfo.VT (vselect_mask DestInfo.KRCWM:$mask,
1196                                       (X86VBroadcast SrcInfo.FRC:$src),
1197                                       DestInfo.ImmAllZerosV)),
1198            (!cast<Instruction>(Name#DestInfo.ZSuffix#rrkz)
1199             DestInfo.KRCWM:$mask, (SrcInfo.VT (COPY_TO_REGCLASS SrcInfo.FRC:$src, SrcInfo.RC)))>;
1200}
1201
1202// Split version to allow mask and broadcast node to be different types. This
1203// helps support the 32x2 broadcasts.
1204multiclass avx512_broadcast_rm_split<bits<8> opc, string OpcodeStr,
1205                                     SchedWrite SchedRR, SchedWrite SchedRM,
1206                                     X86VectorVTInfo MaskInfo,
1207                                     X86VectorVTInfo DestInfo,
1208                                     X86VectorVTInfo SrcInfo,
1209                                     bit IsConvertibleToThreeAddress,
1210                                     SDPatternOperator UnmaskedOp = X86VBroadcast,
1211                                     SDPatternOperator UnmaskedBcastOp = SrcInfo.BroadcastLdFrag> {
1212  let hasSideEffects = 0 in
1213  def rr : AVX512PI<opc, MRMSrcReg, (outs MaskInfo.RC:$dst), (ins SrcInfo.RC:$src),
1214                    !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
1215                    [(set MaskInfo.RC:$dst,
1216                      (MaskInfo.VT
1217                       (bitconvert
1218                        (DestInfo.VT
1219                         (UnmaskedOp (SrcInfo.VT SrcInfo.RC:$src))))))],
1220                    DestInfo.ExeDomain>, T8PD, EVEX, Sched<[SchedRR]>;
1221  def rrkz : AVX512PI<opc, MRMSrcReg, (outs MaskInfo.RC:$dst),
1222                      (ins MaskInfo.KRCWM:$mask, SrcInfo.RC:$src),
1223                      !strconcat(OpcodeStr, "\t{$src, ${dst} {${mask}} {z}|",
1224                       "${dst} {${mask}} {z}, $src}"),
1225                       [(set MaskInfo.RC:$dst,
1226                         (vselect_mask MaskInfo.KRCWM:$mask,
1227                          (MaskInfo.VT
1228                           (bitconvert
1229                            (DestInfo.VT
1230                             (X86VBroadcast (SrcInfo.VT SrcInfo.RC:$src))))),
1231                          MaskInfo.ImmAllZerosV))],
1232                       DestInfo.ExeDomain>, T8PD, EVEX, EVEX_KZ, Sched<[SchedRR]>;
1233  let Constraints = "$src0 = $dst" in
1234  def rrk : AVX512PI<opc, MRMSrcReg, (outs MaskInfo.RC:$dst),
1235                     (ins MaskInfo.RC:$src0, MaskInfo.KRCWM:$mask,
1236                          SrcInfo.RC:$src),
1237                     !strconcat(OpcodeStr, "\t{$src, ${dst} {${mask}}|",
1238                     "${dst} {${mask}}, $src}"),
1239                     [(set MaskInfo.RC:$dst,
1240                       (vselect_mask MaskInfo.KRCWM:$mask,
1241                        (MaskInfo.VT
1242                         (bitconvert
1243                          (DestInfo.VT
1244                           (X86VBroadcast (SrcInfo.VT SrcInfo.RC:$src))))),
1245                        MaskInfo.RC:$src0))],
1246                      DestInfo.ExeDomain>, T8PD, EVEX, EVEX_K, Sched<[SchedRR]>;
1247
1248  let hasSideEffects = 0, mayLoad = 1 in
1249  def rm : AVX512PI<opc, MRMSrcMem, (outs MaskInfo.RC:$dst),
1250                    (ins SrcInfo.ScalarMemOp:$src),
1251                    !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
1252                    [(set MaskInfo.RC:$dst,
1253                      (MaskInfo.VT
1254                       (bitconvert
1255                        (DestInfo.VT
1256                         (UnmaskedBcastOp addr:$src)))))],
1257                    DestInfo.ExeDomain>, T8PD, EVEX,
1258                    EVEX_CD8<SrcInfo.EltSize, CD8VT1>, Sched<[SchedRM]>;
1259
1260  def rmkz : AVX512PI<opc, MRMSrcMem, (outs MaskInfo.RC:$dst),
1261                      (ins MaskInfo.KRCWM:$mask, SrcInfo.ScalarMemOp:$src),
1262                      !strconcat(OpcodeStr, "\t{$src, ${dst} {${mask}} {z}|",
1263                       "${dst} {${mask}} {z}, $src}"),
1264                       [(set MaskInfo.RC:$dst,
1265                         (vselect_mask MaskInfo.KRCWM:$mask,
1266                          (MaskInfo.VT
1267                           (bitconvert
1268                            (DestInfo.VT
1269                             (SrcInfo.BroadcastLdFrag addr:$src)))),
1270                          MaskInfo.ImmAllZerosV))],
1271                       DestInfo.ExeDomain>, T8PD, EVEX, EVEX_KZ,
1272                       EVEX_CD8<SrcInfo.EltSize, CD8VT1>, Sched<[SchedRM]>;
1273
1274  let Constraints = "$src0 = $dst",
1275      isConvertibleToThreeAddress = IsConvertibleToThreeAddress in
1276  def rmk : AVX512PI<opc, MRMSrcMem, (outs MaskInfo.RC:$dst),
1277                     (ins MaskInfo.RC:$src0, MaskInfo.KRCWM:$mask,
1278                          SrcInfo.ScalarMemOp:$src),
1279                     !strconcat(OpcodeStr, "\t{$src, ${dst} {${mask}}|",
1280                     "${dst} {${mask}}, $src}"),
1281                     [(set MaskInfo.RC:$dst,
1282                       (vselect_mask MaskInfo.KRCWM:$mask,
1283                        (MaskInfo.VT
1284                         (bitconvert
1285                          (DestInfo.VT
1286                           (SrcInfo.BroadcastLdFrag addr:$src)))),
1287                        MaskInfo.RC:$src0))],
1288                      DestInfo.ExeDomain>, T8PD, EVEX, EVEX_K,
1289                      EVEX_CD8<SrcInfo.EltSize, CD8VT1>, Sched<[SchedRM]>;
1290}
1291
1292// Helper class to force mask and broadcast result to same type.
1293multiclass avx512_broadcast_rm<bits<8> opc, string OpcodeStr,
1294                               SchedWrite SchedRR, SchedWrite SchedRM,
1295                               X86VectorVTInfo DestInfo,
1296                               X86VectorVTInfo SrcInfo,
1297                               bit IsConvertibleToThreeAddress> :
1298  avx512_broadcast_rm_split<opc, OpcodeStr, SchedRR, SchedRM,
1299                            DestInfo, DestInfo, SrcInfo,
1300                            IsConvertibleToThreeAddress>;
1301
1302multiclass avx512_fp_broadcast_sd<bits<8> opc, string OpcodeStr,
1303                                  AVX512VLVectorVTInfo _> {
1304  let Predicates = [HasAVX512] in {
1305    defm Z  : avx512_broadcast_rm<opc, OpcodeStr, WriteFShuffle256,
1306                                  WriteFShuffle256Ld, _.info512, _.info128, 1>,
1307              avx512_broadcast_scalar<NAME, _.info512, _.info128>,
1308              EVEX_V512;
1309  }
1310
1311  let Predicates = [HasVLX] in {
1312    defm Z256  : avx512_broadcast_rm<opc, OpcodeStr, WriteFShuffle256,
1313                                     WriteFShuffle256Ld, _.info256, _.info128, 1>,
1314                 avx512_broadcast_scalar<NAME, _.info256, _.info128>,
1315                 EVEX_V256;
1316  }
1317}
1318
1319multiclass avx512_fp_broadcast_ss<bits<8> opc, string OpcodeStr,
1320                                  AVX512VLVectorVTInfo _> {
1321  let Predicates = [HasAVX512] in {
1322    defm Z  : avx512_broadcast_rm<opc, OpcodeStr, WriteFShuffle256,
1323                                  WriteFShuffle256Ld, _.info512, _.info128, 1>,
1324              avx512_broadcast_scalar<NAME, _.info512, _.info128>,
1325              EVEX_V512;
1326  }
1327
1328  let Predicates = [HasVLX] in {
1329    defm Z256  : avx512_broadcast_rm<opc, OpcodeStr, WriteFShuffle256,
1330                                     WriteFShuffle256Ld, _.info256, _.info128, 1>,
1331                 avx512_broadcast_scalar<NAME, _.info256, _.info128>,
1332                 EVEX_V256;
1333    defm Z128  : avx512_broadcast_rm<opc, OpcodeStr, WriteFShuffle256,
1334                                     WriteFShuffle256Ld, _.info128, _.info128, 1>,
1335                 avx512_broadcast_scalar<NAME, _.info128, _.info128>,
1336                 EVEX_V128;
1337  }
1338}
1339defm VBROADCASTSS  : avx512_fp_broadcast_ss<0x18, "vbroadcastss",
1340                                       avx512vl_f32_info>;
1341defm VBROADCASTSD  : avx512_fp_broadcast_sd<0x19, "vbroadcastsd",
1342                                       avx512vl_f64_info>, VEX_W1X;
1343
1344multiclass avx512_int_broadcast_reg<bits<8> opc, SchedWrite SchedRR,
1345                                    X86VectorVTInfo _, SDPatternOperator OpNode,
1346                                    RegisterClass SrcRC> {
1347  // Fold with a mask even if it has multiple uses since it is cheap.
1348  let ExeDomain = _.ExeDomain in
1349  defm rr : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
1350                          (ins SrcRC:$src),
1351                          "vpbroadcast"#_.Suffix, "$src", "$src",
1352                          (_.VT (OpNode SrcRC:$src)), /*IsCommutable*/0,
1353                          /*IsKCommutable*/0, /*IsKZCommutable*/0, vselect>,
1354                          T8PD, EVEX, Sched<[SchedRR]>;
1355}
1356
1357multiclass avx512_int_broadcastbw_reg<bits<8> opc, string Name, SchedWrite SchedRR,
1358                                    X86VectorVTInfo _, SDPatternOperator OpNode,
1359                                    RegisterClass SrcRC, SubRegIndex Subreg> {
1360  let hasSideEffects = 0, ExeDomain = _.ExeDomain in
1361  defm rr : AVX512_maskable_custom<opc, MRMSrcReg,
1362                         (outs _.RC:$dst), (ins GR32:$src),
1363                         !con((ins _.RC:$src0, _.KRCWM:$mask), (ins GR32:$src)),
1364                         !con((ins _.KRCWM:$mask), (ins GR32:$src)),
1365                         "vpbroadcast"#_.Suffix, "$src", "$src", [], [], [],
1366                         "$src0 = $dst">, T8PD, EVEX, Sched<[SchedRR]>;
1367
1368  def : Pat <(_.VT (OpNode SrcRC:$src)),
1369             (!cast<Instruction>(Name#rr)
1370              (i32 (INSERT_SUBREG (i32 (IMPLICIT_DEF)), SrcRC:$src, Subreg)))>;
1371
1372  // Fold with a mask even if it has multiple uses since it is cheap.
1373  def : Pat <(vselect _.KRCWM:$mask, (_.VT (OpNode SrcRC:$src)), _.RC:$src0),
1374             (!cast<Instruction>(Name#rrk) _.RC:$src0, _.KRCWM:$mask,
1375              (i32 (INSERT_SUBREG (i32 (IMPLICIT_DEF)), SrcRC:$src, Subreg)))>;
1376
1377  def : Pat <(vselect _.KRCWM:$mask, (_.VT (OpNode SrcRC:$src)), _.ImmAllZerosV),
1378             (!cast<Instruction>(Name#rrkz) _.KRCWM:$mask,
1379              (i32 (INSERT_SUBREG (i32 (IMPLICIT_DEF)), SrcRC:$src, Subreg)))>;
1380}
1381
1382multiclass avx512_int_broadcastbw_reg_vl<bits<8> opc, string Name,
1383                      AVX512VLVectorVTInfo _, SDPatternOperator OpNode,
1384                      RegisterClass SrcRC, SubRegIndex Subreg, Predicate prd> {
1385  let Predicates = [prd] in
1386    defm Z : avx512_int_broadcastbw_reg<opc, Name#Z, WriteShuffle256, _.info512,
1387              OpNode, SrcRC, Subreg>, EVEX_V512;
1388  let Predicates = [prd, HasVLX] in {
1389    defm Z256 : avx512_int_broadcastbw_reg<opc, Name#Z256, WriteShuffle256,
1390              _.info256, OpNode, SrcRC, Subreg>, EVEX_V256;
1391    defm Z128 : avx512_int_broadcastbw_reg<opc, Name#Z128, WriteShuffle,
1392              _.info128, OpNode, SrcRC, Subreg>, EVEX_V128;
1393  }
1394}
1395
1396multiclass avx512_int_broadcast_reg_vl<bits<8> opc, AVX512VLVectorVTInfo _,
1397                                       SDPatternOperator OpNode,
1398                                       RegisterClass SrcRC, Predicate prd> {
1399  let Predicates = [prd] in
1400    defm Z : avx512_int_broadcast_reg<opc, WriteShuffle256, _.info512, OpNode,
1401                                      SrcRC>, EVEX_V512;
1402  let Predicates = [prd, HasVLX] in {
1403    defm Z256 : avx512_int_broadcast_reg<opc, WriteShuffle256, _.info256, OpNode,
1404                                         SrcRC>, EVEX_V256;
1405    defm Z128 : avx512_int_broadcast_reg<opc, WriteShuffle, _.info128, OpNode,
1406                                         SrcRC>, EVEX_V128;
1407  }
1408}
1409
1410defm VPBROADCASTBr : avx512_int_broadcastbw_reg_vl<0x7A, "VPBROADCASTBr",
1411                       avx512vl_i8_info, X86VBroadcast, GR8, sub_8bit, HasBWI>;
1412defm VPBROADCASTWr : avx512_int_broadcastbw_reg_vl<0x7B, "VPBROADCASTWr",
1413                       avx512vl_i16_info, X86VBroadcast, GR16, sub_16bit,
1414                       HasBWI>;
1415defm VPBROADCASTDr : avx512_int_broadcast_reg_vl<0x7C, avx512vl_i32_info,
1416                                                 X86VBroadcast, GR32, HasAVX512>;
1417defm VPBROADCASTQr : avx512_int_broadcast_reg_vl<0x7C, avx512vl_i64_info,
1418                                                 X86VBroadcast, GR64, HasAVX512>, VEX_W;
1419
1420multiclass avx512_int_broadcast_rm_vl<bits<8> opc, string OpcodeStr,
1421                                      AVX512VLVectorVTInfo _, Predicate prd,
1422                                      bit IsConvertibleToThreeAddress> {
1423  let Predicates = [prd] in {
1424    defm Z :   avx512_broadcast_rm<opc, OpcodeStr, WriteShuffle256,
1425                                   WriteShuffle256Ld, _.info512, _.info128,
1426                                   IsConvertibleToThreeAddress>,
1427                                  EVEX_V512;
1428  }
1429  let Predicates = [prd, HasVLX] in {
1430    defm Z256 : avx512_broadcast_rm<opc, OpcodeStr, WriteShuffle256,
1431                                    WriteShuffle256Ld, _.info256, _.info128,
1432                                    IsConvertibleToThreeAddress>,
1433                                 EVEX_V256;
1434    defm Z128 : avx512_broadcast_rm<opc, OpcodeStr, WriteShuffle,
1435                                    WriteShuffleXLd, _.info128, _.info128,
1436                                    IsConvertibleToThreeAddress>,
1437                                 EVEX_V128;
1438  }
1439}
1440
1441defm VPBROADCASTB  : avx512_int_broadcast_rm_vl<0x78, "vpbroadcastb",
1442                                           avx512vl_i8_info, HasBWI, 0>;
1443defm VPBROADCASTW  : avx512_int_broadcast_rm_vl<0x79, "vpbroadcastw",
1444                                           avx512vl_i16_info, HasBWI, 0>;
1445defm VPBROADCASTD  : avx512_int_broadcast_rm_vl<0x58, "vpbroadcastd",
1446                                           avx512vl_i32_info, HasAVX512, 1>;
1447defm VPBROADCASTQ  : avx512_int_broadcast_rm_vl<0x59, "vpbroadcastq",
1448                                           avx512vl_i64_info, HasAVX512, 1>, VEX_W1X;
1449
1450multiclass avx512_subvec_broadcast_rm<bits<8> opc, string OpcodeStr,
1451                                      SDPatternOperator OpNode,
1452                                      X86VectorVTInfo _Dst,
1453                                      X86VectorVTInfo _Src> {
1454  defm rm : AVX512_maskable<opc, MRMSrcMem, _Dst, (outs _Dst.RC:$dst),
1455                           (ins _Src.MemOp:$src), OpcodeStr, "$src", "$src",
1456                           (_Dst.VT (OpNode addr:$src))>,
1457                           Sched<[SchedWriteShuffle.YMM.Folded]>,
1458                           AVX5128IBase, EVEX;
1459}
1460
1461// This should be used for the AVX512DQ broadcast instructions. It disables
1462// the unmasked patterns so that we only use the DQ instructions when masking
1463//  is requested.
1464multiclass avx512_subvec_broadcast_rm_dq<bits<8> opc, string OpcodeStr,
1465                                         SDPatternOperator OpNode,
1466                                         X86VectorVTInfo _Dst,
1467                                         X86VectorVTInfo _Src> {
1468  let hasSideEffects = 0, mayLoad = 1 in
1469  defm rm : AVX512_maskable_split<opc, MRMSrcMem, _Dst, (outs _Dst.RC:$dst),
1470                           (ins _Src.MemOp:$src), OpcodeStr, "$src", "$src",
1471                           (null_frag),
1472                           (_Dst.VT (OpNode addr:$src))>,
1473                           Sched<[SchedWriteShuffle.YMM.Folded]>,
1474                           AVX5128IBase, EVEX;
1475}
1476let Predicates = [HasBWI] in {
1477  def : Pat<(v32f16 (X86VBroadcastld16 addr:$src)),
1478            (VPBROADCASTWZrm addr:$src)>;
1479
1480  def : Pat<(v32f16 (X86VBroadcast (v8f16 VR128X:$src))),
1481            (VPBROADCASTWZrr VR128X:$src)>;
1482  def : Pat<(v32f16 (X86VBroadcast (f16 FR16X:$src))),
1483            (VPBROADCASTWZrr (COPY_TO_REGCLASS FR16X:$src, VR128X))>;
1484}
1485let Predicates = [HasVLX, HasBWI] in {
1486  def : Pat<(v8f16 (X86VBroadcastld16 addr:$src)),
1487            (VPBROADCASTWZ128rm addr:$src)>;
1488  def : Pat<(v16f16 (X86VBroadcastld16 addr:$src)),
1489            (VPBROADCASTWZ256rm addr:$src)>;
1490
1491  def : Pat<(v8f16 (X86VBroadcast (v8f16 VR128X:$src))),
1492            (VPBROADCASTWZ128rr VR128X:$src)>;
1493  def : Pat<(v16f16 (X86VBroadcast (v8f16 VR128X:$src))),
1494            (VPBROADCASTWZ256rr VR128X:$src)>;
1495
1496  def : Pat<(v8f16 (X86VBroadcast (f16 FR16X:$src))),
1497            (VPBROADCASTWZ128rr (COPY_TO_REGCLASS FR16X:$src, VR128X))>;
1498  def : Pat<(v16f16 (X86VBroadcast (f16 FR16X:$src))),
1499            (VPBROADCASTWZ256rr (COPY_TO_REGCLASS FR16X:$src, VR128X))>;
1500}
1501
1502//===----------------------------------------------------------------------===//
1503// AVX-512 BROADCAST SUBVECTORS
1504//
1505
1506defm VBROADCASTI32X4 : avx512_subvec_broadcast_rm<0x5a, "vbroadcasti32x4",
1507                       X86SubVBroadcastld128, v16i32_info, v4i32x_info>,
1508                       EVEX_V512, EVEX_CD8<32, CD8VT4>;
1509defm VBROADCASTF32X4 : avx512_subvec_broadcast_rm<0x1a, "vbroadcastf32x4",
1510                       X86SubVBroadcastld128, v16f32_info, v4f32x_info>,
1511                       EVEX_V512, EVEX_CD8<32, CD8VT4>;
1512defm VBROADCASTI64X4 : avx512_subvec_broadcast_rm<0x5b, "vbroadcasti64x4",
1513                       X86SubVBroadcastld256, v8i64_info, v4i64x_info>, VEX_W,
1514                       EVEX_V512, EVEX_CD8<64, CD8VT4>;
1515defm VBROADCASTF64X4 : avx512_subvec_broadcast_rm<0x1b, "vbroadcastf64x4",
1516                       X86SubVBroadcastld256, v8f64_info, v4f64x_info>, VEX_W,
1517                       EVEX_V512, EVEX_CD8<64, CD8VT4>;
1518
1519let Predicates = [HasAVX512] in {
1520def : Pat<(v8f64 (X86SubVBroadcastld256 addr:$src)),
1521          (VBROADCASTF64X4rm addr:$src)>;
1522def : Pat<(v16f32 (X86SubVBroadcastld256 addr:$src)),
1523          (VBROADCASTF64X4rm addr:$src)>;
1524def : Pat<(v32f16 (X86SubVBroadcastld256 addr:$src)),
1525          (VBROADCASTF64X4rm addr:$src)>;
1526def : Pat<(v8i64 (X86SubVBroadcastld256 addr:$src)),
1527          (VBROADCASTI64X4rm addr:$src)>;
1528def : Pat<(v16i32 (X86SubVBroadcastld256 addr:$src)),
1529          (VBROADCASTI64X4rm addr:$src)>;
1530def : Pat<(v32i16 (X86SubVBroadcastld256 addr:$src)),
1531          (VBROADCASTI64X4rm addr:$src)>;
1532def : Pat<(v64i8 (X86SubVBroadcastld256 addr:$src)),
1533          (VBROADCASTI64X4rm addr:$src)>;
1534
1535def : Pat<(v8f64 (X86SubVBroadcastld128 addr:$src)),
1536          (VBROADCASTF32X4rm addr:$src)>;
1537def : Pat<(v16f32 (X86SubVBroadcastld128 addr:$src)),
1538          (VBROADCASTF32X4rm addr:$src)>;
1539def : Pat<(v32f16 (X86SubVBroadcastld128 addr:$src)),
1540          (VBROADCASTF32X4rm addr:$src)>;
1541def : Pat<(v8i64 (X86SubVBroadcastld128 addr:$src)),
1542          (VBROADCASTI32X4rm addr:$src)>;
1543def : Pat<(v16i32 (X86SubVBroadcastld128 addr:$src)),
1544          (VBROADCASTI32X4rm addr:$src)>;
1545def : Pat<(v32i16 (X86SubVBroadcastld128 addr:$src)),
1546          (VBROADCASTI32X4rm addr:$src)>;
1547def : Pat<(v64i8 (X86SubVBroadcastld128 addr:$src)),
1548          (VBROADCASTI32X4rm addr:$src)>;
1549
1550// Patterns for selects of bitcasted operations.
1551def : Pat<(vselect_mask VK16WM:$mask,
1552                        (bc_v16f32 (v8f64 (X86SubVBroadcastld128 addr:$src))),
1553                        (v16f32 immAllZerosV)),
1554          (VBROADCASTF32X4rmkz VK16WM:$mask, addr:$src)>;
1555def : Pat<(vselect_mask VK16WM:$mask,
1556                        (bc_v16f32 (v8f64 (X86SubVBroadcastld128 addr:$src))),
1557                        VR512:$src0),
1558          (VBROADCASTF32X4rmk VR512:$src0, VK16WM:$mask, addr:$src)>;
1559def : Pat<(vselect_mask VK16WM:$mask,
1560                        (bc_v16i32 (v8i64 (X86SubVBroadcastld128 addr:$src))),
1561                        (v16i32 immAllZerosV)),
1562          (VBROADCASTI32X4rmkz VK16WM:$mask, addr:$src)>;
1563def : Pat<(vselect_mask VK16WM:$mask,
1564                        (bc_v16i32 (v8i64 (X86SubVBroadcastld128 addr:$src))),
1565                        VR512:$src0),
1566          (VBROADCASTI32X4rmk VR512:$src0, VK16WM:$mask, addr:$src)>;
1567
1568def : Pat<(vselect_mask VK8WM:$mask,
1569                        (bc_v8f64 (v16f32 (X86SubVBroadcastld256 addr:$src))),
1570                        (v8f64 immAllZerosV)),
1571          (VBROADCASTF64X4rmkz VK8WM:$mask, addr:$src)>;
1572def : Pat<(vselect_mask VK8WM:$mask,
1573                        (bc_v8f64 (v16f32 (X86SubVBroadcastld256 addr:$src))),
1574                        VR512:$src0),
1575          (VBROADCASTF64X4rmk VR512:$src0, VK8WM:$mask, addr:$src)>;
1576def : Pat<(vselect_mask VK8WM:$mask,
1577                        (bc_v8i64 (v16i32 (X86SubVBroadcastld256 addr:$src))),
1578                        (v8i64 immAllZerosV)),
1579          (VBROADCASTI64X4rmkz VK8WM:$mask, addr:$src)>;
1580def : Pat<(vselect_mask VK8WM:$mask,
1581                        (bc_v8i64 (v16i32 (X86SubVBroadcastld256 addr:$src))),
1582                        VR512:$src0),
1583          (VBROADCASTI64X4rmk VR512:$src0, VK8WM:$mask, addr:$src)>;
1584}
1585
1586let Predicates = [HasVLX] in {
1587defm VBROADCASTI32X4Z256 : avx512_subvec_broadcast_rm<0x5a, "vbroadcasti32x4",
1588                           X86SubVBroadcastld128, v8i32x_info, v4i32x_info>,
1589                           EVEX_V256, EVEX_CD8<32, CD8VT4>;
1590defm VBROADCASTF32X4Z256 : avx512_subvec_broadcast_rm<0x1a, "vbroadcastf32x4",
1591                           X86SubVBroadcastld128, v8f32x_info, v4f32x_info>,
1592                           EVEX_V256, EVEX_CD8<32, CD8VT4>;
1593
1594def : Pat<(v4f64 (X86SubVBroadcastld128 addr:$src)),
1595          (VBROADCASTF32X4Z256rm addr:$src)>;
1596def : Pat<(v8f32 (X86SubVBroadcastld128 addr:$src)),
1597          (VBROADCASTF32X4Z256rm addr:$src)>;
1598def : Pat<(v16f16 (X86SubVBroadcastld128 addr:$src)),
1599          (VBROADCASTF32X4Z256rm addr:$src)>;
1600def : Pat<(v4i64 (X86SubVBroadcastld128 addr:$src)),
1601          (VBROADCASTI32X4Z256rm addr:$src)>;
1602def : Pat<(v8i32 (X86SubVBroadcastld128 addr:$src)),
1603          (VBROADCASTI32X4Z256rm addr:$src)>;
1604def : Pat<(v16i16 (X86SubVBroadcastld128 addr:$src)),
1605          (VBROADCASTI32X4Z256rm addr:$src)>;
1606def : Pat<(v32i8 (X86SubVBroadcastld128 addr:$src)),
1607          (VBROADCASTI32X4Z256rm addr:$src)>;
1608
1609// Patterns for selects of bitcasted operations.
1610def : Pat<(vselect_mask VK8WM:$mask,
1611                        (bc_v8f32 (v4f64 (X86SubVBroadcastld128 addr:$src))),
1612                        (v8f32 immAllZerosV)),
1613          (VBROADCASTF32X4Z256rmkz VK8WM:$mask, addr:$src)>;
1614def : Pat<(vselect_mask VK8WM:$mask,
1615                        (bc_v8f32 (v4f64 (X86SubVBroadcastld128 addr:$src))),
1616                        VR256X:$src0),
1617          (VBROADCASTF32X4Z256rmk VR256X:$src0, VK8WM:$mask, addr:$src)>;
1618def : Pat<(vselect_mask VK8WM:$mask,
1619                        (bc_v8i32 (v4i64 (X86SubVBroadcastld128 addr:$src))),
1620                        (v8i32 immAllZerosV)),
1621          (VBROADCASTI32X4Z256rmkz VK8WM:$mask, addr:$src)>;
1622def : Pat<(vselect_mask VK8WM:$mask,
1623                        (bc_v8i32 (v4i64 (X86SubVBroadcastld128 addr:$src))),
1624                        VR256X:$src0),
1625          (VBROADCASTI32X4Z256rmk VR256X:$src0, VK8WM:$mask, addr:$src)>;
1626}
1627
1628let Predicates = [HasVLX, HasDQI] in {
1629defm VBROADCASTI64X2Z128 : avx512_subvec_broadcast_rm_dq<0x5a, "vbroadcasti64x2",
1630                           X86SubVBroadcastld128, v4i64x_info, v2i64x_info>, VEX_W1X,
1631                           EVEX_V256, EVEX_CD8<64, CD8VT2>;
1632defm VBROADCASTF64X2Z128 : avx512_subvec_broadcast_rm_dq<0x1a, "vbroadcastf64x2",
1633                           X86SubVBroadcastld128, v4f64x_info, v2f64x_info>, VEX_W1X,
1634                           EVEX_V256, EVEX_CD8<64, CD8VT2>;
1635
1636// Patterns for selects of bitcasted operations.
1637def : Pat<(vselect_mask VK4WM:$mask,
1638                        (bc_v4f64 (v8f32 (X86SubVBroadcastld128 addr:$src))),
1639                        (v4f64 immAllZerosV)),
1640          (VBROADCASTF64X2Z128rmkz VK4WM:$mask, addr:$src)>;
1641def : Pat<(vselect_mask VK4WM:$mask,
1642                        (bc_v4f64 (v8f32 (X86SubVBroadcastld128 addr:$src))),
1643                        VR256X:$src0),
1644          (VBROADCASTF64X2Z128rmk VR256X:$src0, VK4WM:$mask, addr:$src)>;
1645def : Pat<(vselect_mask VK4WM:$mask,
1646                        (bc_v4i64 (v8i32 (X86SubVBroadcastld128 addr:$src))),
1647                        (v4i64 immAllZerosV)),
1648          (VBROADCASTI64X2Z128rmkz VK4WM:$mask, addr:$src)>;
1649def : Pat<(vselect_mask VK4WM:$mask,
1650                        (bc_v4i64 (v8i32 (X86SubVBroadcastld128 addr:$src))),
1651                        VR256X:$src0),
1652          (VBROADCASTI64X2Z128rmk VR256X:$src0, VK4WM:$mask, addr:$src)>;
1653}
1654
1655let Predicates = [HasDQI] in {
1656defm VBROADCASTI64X2 : avx512_subvec_broadcast_rm_dq<0x5a, "vbroadcasti64x2",
1657                       X86SubVBroadcastld128, v8i64_info, v2i64x_info>, VEX_W,
1658                       EVEX_V512, EVEX_CD8<64, CD8VT2>;
1659defm VBROADCASTI32X8 : avx512_subvec_broadcast_rm_dq<0x5b, "vbroadcasti32x8",
1660                       X86SubVBroadcastld256, v16i32_info, v8i32x_info>,
1661                       EVEX_V512, EVEX_CD8<32, CD8VT8>;
1662defm VBROADCASTF64X2 : avx512_subvec_broadcast_rm_dq<0x1a, "vbroadcastf64x2",
1663                       X86SubVBroadcastld128, v8f64_info, v2f64x_info>, VEX_W,
1664                       EVEX_V512, EVEX_CD8<64, CD8VT2>;
1665defm VBROADCASTF32X8 : avx512_subvec_broadcast_rm_dq<0x1b, "vbroadcastf32x8",
1666                       X86SubVBroadcastld256, v16f32_info, v8f32x_info>,
1667                       EVEX_V512, EVEX_CD8<32, CD8VT8>;
1668
1669// Patterns for selects of bitcasted operations.
1670def : Pat<(vselect_mask VK16WM:$mask,
1671                        (bc_v16f32 (v8f64 (X86SubVBroadcastld256 addr:$src))),
1672                        (v16f32 immAllZerosV)),
1673          (VBROADCASTF32X8rmkz VK16WM:$mask, addr:$src)>;
1674def : Pat<(vselect_mask VK16WM:$mask,
1675                        (bc_v16f32 (v8f64 (X86SubVBroadcastld256 addr:$src))),
1676                        VR512:$src0),
1677          (VBROADCASTF32X8rmk VR512:$src0, VK16WM:$mask, addr:$src)>;
1678def : Pat<(vselect_mask VK16WM:$mask,
1679                        (bc_v16i32 (v8i64 (X86SubVBroadcastld256 addr:$src))),
1680                        (v16i32 immAllZerosV)),
1681          (VBROADCASTI32X8rmkz VK16WM:$mask, addr:$src)>;
1682def : Pat<(vselect_mask VK16WM:$mask,
1683                        (bc_v16i32 (v8i64 (X86SubVBroadcastld256 addr:$src))),
1684                        VR512:$src0),
1685          (VBROADCASTI32X8rmk VR512:$src0, VK16WM:$mask, addr:$src)>;
1686
1687def : Pat<(vselect_mask VK8WM:$mask,
1688                        (bc_v8f64 (v16f32 (X86SubVBroadcastld128 addr:$src))),
1689                        (v8f64 immAllZerosV)),
1690          (VBROADCASTF64X2rmkz VK8WM:$mask, addr:$src)>;
1691def : Pat<(vselect_mask VK8WM:$mask,
1692                        (bc_v8f64 (v16f32 (X86SubVBroadcastld128 addr:$src))),
1693                        VR512:$src0),
1694          (VBROADCASTF64X2rmk VR512:$src0, VK8WM:$mask, addr:$src)>;
1695def : Pat<(vselect_mask VK8WM:$mask,
1696                        (bc_v8i64 (v16i32 (X86SubVBroadcastld128 addr:$src))),
1697                        (v8i64 immAllZerosV)),
1698          (VBROADCASTI64X2rmkz VK8WM:$mask, addr:$src)>;
1699def : Pat<(vselect_mask VK8WM:$mask,
1700                        (bc_v8i64 (v16i32 (X86SubVBroadcastld128 addr:$src))),
1701                        VR512:$src0),
1702          (VBROADCASTI64X2rmk VR512:$src0, VK8WM:$mask, addr:$src)>;
1703}
1704
1705multiclass avx512_common_broadcast_32x2<bits<8> opc, string OpcodeStr,
1706                                        AVX512VLVectorVTInfo _Dst,
1707                                        AVX512VLVectorVTInfo _Src> {
1708  let Predicates = [HasDQI] in
1709    defm Z :    avx512_broadcast_rm_split<opc, OpcodeStr, WriteShuffle256,
1710                                          WriteShuffle256Ld, _Dst.info512,
1711                                          _Src.info512, _Src.info128, 0, null_frag, null_frag>,
1712                                          EVEX_V512;
1713  let Predicates = [HasDQI, HasVLX] in
1714    defm Z256 : avx512_broadcast_rm_split<opc, OpcodeStr, WriteShuffle256,
1715                                          WriteShuffle256Ld, _Dst.info256,
1716                                          _Src.info256, _Src.info128, 0, null_frag, null_frag>,
1717                                          EVEX_V256;
1718}
1719
1720multiclass avx512_common_broadcast_i32x2<bits<8> opc, string OpcodeStr,
1721                                         AVX512VLVectorVTInfo _Dst,
1722                                         AVX512VLVectorVTInfo _Src> :
1723  avx512_common_broadcast_32x2<opc, OpcodeStr, _Dst, _Src> {
1724
1725  let Predicates = [HasDQI, HasVLX] in
1726    defm Z128 : avx512_broadcast_rm_split<opc, OpcodeStr, WriteShuffle,
1727                                          WriteShuffleXLd, _Dst.info128,
1728                                          _Src.info128, _Src.info128, 0, null_frag, null_frag>,
1729                                          EVEX_V128;
1730}
1731
1732defm VBROADCASTI32X2  : avx512_common_broadcast_i32x2<0x59, "vbroadcasti32x2",
1733                                          avx512vl_i32_info, avx512vl_i64_info>;
1734defm VBROADCASTF32X2  : avx512_common_broadcast_32x2<0x19, "vbroadcastf32x2",
1735                                          avx512vl_f32_info, avx512vl_f64_info>;
1736
1737//===----------------------------------------------------------------------===//
1738// AVX-512 BROADCAST MASK TO VECTOR REGISTER
1739//---
1740multiclass avx512_mask_broadcastm<bits<8> opc, string OpcodeStr,
1741                                  X86VectorVTInfo _, RegisterClass KRC> {
1742  def rr : AVX512XS8I<opc, MRMSrcReg, (outs _.RC:$dst), (ins KRC:$src),
1743                  !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
1744                  [(set _.RC:$dst, (_.VT (X86VBroadcastm KRC:$src)))]>,
1745                  EVEX, Sched<[WriteShuffle]>;
1746}
1747
1748multiclass avx512_mask_broadcast<bits<8> opc, string OpcodeStr,
1749                                 AVX512VLVectorVTInfo VTInfo, RegisterClass KRC> {
1750  let Predicates = [HasCDI] in
1751    defm Z : avx512_mask_broadcastm<opc, OpcodeStr, VTInfo.info512, KRC>, EVEX_V512;
1752  let Predicates = [HasCDI, HasVLX] in {
1753    defm Z256 : avx512_mask_broadcastm<opc, OpcodeStr, VTInfo.info256, KRC>, EVEX_V256;
1754    defm Z128 : avx512_mask_broadcastm<opc, OpcodeStr, VTInfo.info128, KRC>, EVEX_V128;
1755  }
1756}
1757
1758defm VPBROADCASTMW2D : avx512_mask_broadcast<0x3A, "vpbroadcastmw2d",
1759                                               avx512vl_i32_info, VK16>;
1760defm VPBROADCASTMB2Q : avx512_mask_broadcast<0x2A, "vpbroadcastmb2q",
1761                                               avx512vl_i64_info, VK8>, VEX_W;
1762
1763//===----------------------------------------------------------------------===//
1764// -- VPERMI2 - 3 source operands form --
1765multiclass avx512_perm_i<bits<8> opc, string OpcodeStr,
1766                         X86FoldableSchedWrite sched,
1767                         X86VectorVTInfo _, X86VectorVTInfo IdxVT> {
1768let Constraints = "$src1 = $dst", ExeDomain = _.ExeDomain,
1769    hasSideEffects = 0 in {
1770  defm rr: AVX512_maskable_3src_cast<opc, MRMSrcReg, _, IdxVT, (outs _.RC:$dst),
1771          (ins _.RC:$src2, _.RC:$src3),
1772          OpcodeStr, "$src3, $src2", "$src2, $src3",
1773          (_.VT (X86VPermt2 _.RC:$src2, IdxVT.RC:$src1, _.RC:$src3)), 1>,
1774          EVEX_4V, AVX5128IBase, Sched<[sched]>;
1775
1776  let mayLoad = 1 in
1777  defm rm: AVX512_maskable_3src_cast<opc, MRMSrcMem, _, IdxVT, (outs _.RC:$dst),
1778            (ins _.RC:$src2, _.MemOp:$src3),
1779            OpcodeStr, "$src3, $src2", "$src2, $src3",
1780            (_.VT (X86VPermt2 _.RC:$src2, IdxVT.RC:$src1,
1781                   (_.VT (_.LdFrag addr:$src3)))), 1>,
1782            EVEX_4V, AVX5128IBase, Sched<[sched.Folded, sched.ReadAfterFold]>;
1783  }
1784}
1785
1786multiclass avx512_perm_i_mb<bits<8> opc, string OpcodeStr,
1787                            X86FoldableSchedWrite sched,
1788                            X86VectorVTInfo _, X86VectorVTInfo IdxVT> {
1789  let Constraints = "$src1 = $dst", ExeDomain = _.ExeDomain,
1790      hasSideEffects = 0, mayLoad = 1 in
1791  defm rmb: AVX512_maskable_3src_cast<opc, MRMSrcMem, _, IdxVT, (outs _.RC:$dst),
1792              (ins _.RC:$src2, _.ScalarMemOp:$src3),
1793              OpcodeStr,   !strconcat("${src3}", _.BroadcastStr,", $src2"),
1794              !strconcat("$src2, ${src3}", _.BroadcastStr ),
1795              (_.VT (X86VPermt2 _.RC:$src2,
1796               IdxVT.RC:$src1,(_.VT (_.BroadcastLdFrag addr:$src3)))), 1>,
1797              AVX5128IBase, EVEX_4V, EVEX_B,
1798              Sched<[sched.Folded, sched.ReadAfterFold]>;
1799}
1800
1801multiclass avx512_perm_i_sizes<bits<8> opc, string OpcodeStr,
1802                               X86FoldableSchedWrite sched,
1803                               AVX512VLVectorVTInfo VTInfo,
1804                               AVX512VLVectorVTInfo ShuffleMask> {
1805  defm NAME: avx512_perm_i<opc, OpcodeStr, sched, VTInfo.info512,
1806                           ShuffleMask.info512>,
1807            avx512_perm_i_mb<opc, OpcodeStr, sched, VTInfo.info512,
1808                             ShuffleMask.info512>, EVEX_V512;
1809  let Predicates = [HasVLX] in {
1810  defm NAME#128: avx512_perm_i<opc, OpcodeStr, sched, VTInfo.info128,
1811                               ShuffleMask.info128>,
1812                 avx512_perm_i_mb<opc, OpcodeStr, sched, VTInfo.info128,
1813                                  ShuffleMask.info128>, EVEX_V128;
1814  defm NAME#256: avx512_perm_i<opc, OpcodeStr, sched, VTInfo.info256,
1815                               ShuffleMask.info256>,
1816                 avx512_perm_i_mb<opc, OpcodeStr, sched, VTInfo.info256,
1817                                  ShuffleMask.info256>, EVEX_V256;
1818  }
1819}
1820
1821multiclass avx512_perm_i_sizes_bw<bits<8> opc, string OpcodeStr,
1822                                  X86FoldableSchedWrite sched,
1823                                  AVX512VLVectorVTInfo VTInfo,
1824                                  AVX512VLVectorVTInfo Idx,
1825                                  Predicate Prd> {
1826  let Predicates = [Prd] in
1827  defm NAME: avx512_perm_i<opc, OpcodeStr, sched, VTInfo.info512,
1828                           Idx.info512>, EVEX_V512;
1829  let Predicates = [Prd, HasVLX] in {
1830  defm NAME#128: avx512_perm_i<opc, OpcodeStr, sched, VTInfo.info128,
1831                               Idx.info128>, EVEX_V128;
1832  defm NAME#256: avx512_perm_i<opc, OpcodeStr, sched, VTInfo.info256,
1833                               Idx.info256>,  EVEX_V256;
1834  }
1835}
1836
1837defm VPERMI2D  : avx512_perm_i_sizes<0x76, "vpermi2d", WriteVarShuffle256,
1838                  avx512vl_i32_info, avx512vl_i32_info>, EVEX_CD8<32, CD8VF>;
1839defm VPERMI2Q  : avx512_perm_i_sizes<0x76, "vpermi2q", WriteVarShuffle256,
1840                  avx512vl_i64_info, avx512vl_i64_info>, VEX_W, EVEX_CD8<64, CD8VF>;
1841defm VPERMI2W  : avx512_perm_i_sizes_bw<0x75, "vpermi2w", WriteVarShuffle256,
1842                  avx512vl_i16_info, avx512vl_i16_info, HasBWI>,
1843                  VEX_W, EVEX_CD8<16, CD8VF>;
1844defm VPERMI2B  : avx512_perm_i_sizes_bw<0x75, "vpermi2b", WriteVarShuffle256,
1845                  avx512vl_i8_info, avx512vl_i8_info, HasVBMI>,
1846                  EVEX_CD8<8, CD8VF>;
1847defm VPERMI2PS : avx512_perm_i_sizes<0x77, "vpermi2ps", WriteFVarShuffle256,
1848                  avx512vl_f32_info, avx512vl_i32_info>, EVEX_CD8<32, CD8VF>;
1849defm VPERMI2PD : avx512_perm_i_sizes<0x77, "vpermi2pd", WriteFVarShuffle256,
1850                  avx512vl_f64_info, avx512vl_i64_info>, VEX_W, EVEX_CD8<64, CD8VF>;
1851
1852// Extra patterns to deal with extra bitcasts due to passthru and index being
1853// different types on the fp versions.
1854multiclass avx512_perm_i_lowering<string InstrStr, X86VectorVTInfo _,
1855                                  X86VectorVTInfo IdxVT,
1856                                  X86VectorVTInfo CastVT> {
1857  def : Pat<(_.VT (vselect_mask _.KRCWM:$mask,
1858                                (X86VPermt2 (_.VT _.RC:$src2),
1859                                            (IdxVT.VT (bitconvert
1860                                                       (CastVT.VT _.RC:$src1))),
1861                                            _.RC:$src3),
1862                                (_.VT (bitconvert (CastVT.VT _.RC:$src1))))),
1863            (!cast<Instruction>(InstrStr#"rrk") _.RC:$src1, _.KRCWM:$mask,
1864                                                _.RC:$src2, _.RC:$src3)>;
1865  def : Pat<(_.VT (vselect_mask _.KRCWM:$mask,
1866                                (X86VPermt2 _.RC:$src2,
1867                                            (IdxVT.VT (bitconvert
1868                                                       (CastVT.VT _.RC:$src1))),
1869                                            (_.LdFrag addr:$src3)),
1870                                (_.VT (bitconvert  (CastVT.VT _.RC:$src1))))),
1871            (!cast<Instruction>(InstrStr#"rmk") _.RC:$src1, _.KRCWM:$mask,
1872                                                _.RC:$src2, addr:$src3)>;
1873  def : Pat<(_.VT (vselect_mask _.KRCWM:$mask,
1874                                 (X86VPermt2 _.RC:$src2,
1875                                             (IdxVT.VT (bitconvert  (CastVT.VT _.RC:$src1))),
1876                                             (_.BroadcastLdFrag addr:$src3)),
1877                                 (_.VT (bitconvert  (CastVT.VT _.RC:$src1))))),
1878            (!cast<Instruction>(InstrStr#"rmbk") _.RC:$src1, _.KRCWM:$mask,
1879                                                 _.RC:$src2, addr:$src3)>;
1880}
1881
1882// TODO: Should we add more casts? The vXi64 case is common due to ABI.
1883defm : avx512_perm_i_lowering<"VPERMI2PS", v16f32_info, v16i32_info, v8i64_info>;
1884defm : avx512_perm_i_lowering<"VPERMI2PS256", v8f32x_info, v8i32x_info, v4i64x_info>;
1885defm : avx512_perm_i_lowering<"VPERMI2PS128", v4f32x_info, v4i32x_info, v2i64x_info>;
1886
1887// VPERMT2
1888multiclass avx512_perm_t<bits<8> opc, string OpcodeStr,
1889                         X86FoldableSchedWrite sched,
1890                         X86VectorVTInfo _, X86VectorVTInfo IdxVT> {
1891let Constraints = "$src1 = $dst", ExeDomain = _.ExeDomain in {
1892  defm rr: AVX512_maskable_3src<opc, MRMSrcReg, _, (outs _.RC:$dst),
1893          (ins IdxVT.RC:$src2, _.RC:$src3),
1894          OpcodeStr, "$src3, $src2", "$src2, $src3",
1895          (_.VT (X86VPermt2 _.RC:$src1, IdxVT.RC:$src2, _.RC:$src3)), 1>,
1896          EVEX_4V, AVX5128IBase, Sched<[sched]>;
1897
1898  defm rm: AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst),
1899            (ins IdxVT.RC:$src2, _.MemOp:$src3),
1900            OpcodeStr, "$src3, $src2", "$src2, $src3",
1901            (_.VT (X86VPermt2 _.RC:$src1, IdxVT.RC:$src2,
1902                   (_.LdFrag addr:$src3))), 1>,
1903            EVEX_4V, AVX5128IBase, Sched<[sched.Folded, sched.ReadAfterFold]>;
1904  }
1905}
1906multiclass avx512_perm_t_mb<bits<8> opc, string OpcodeStr,
1907                            X86FoldableSchedWrite sched,
1908                            X86VectorVTInfo _, X86VectorVTInfo IdxVT> {
1909  let Constraints = "$src1 = $dst", ExeDomain = _.ExeDomain in
1910  defm rmb: AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst),
1911              (ins IdxVT.RC:$src2, _.ScalarMemOp:$src3),
1912              OpcodeStr,   !strconcat("${src3}", _.BroadcastStr,", $src2"),
1913              !strconcat("$src2, ${src3}", _.BroadcastStr ),
1914              (_.VT (X86VPermt2 _.RC:$src1,
1915               IdxVT.RC:$src2,(_.VT (_.BroadcastLdFrag addr:$src3)))), 1>,
1916              AVX5128IBase, EVEX_4V, EVEX_B,
1917              Sched<[sched.Folded, sched.ReadAfterFold]>;
1918}
1919
1920multiclass avx512_perm_t_sizes<bits<8> opc, string OpcodeStr,
1921                               X86FoldableSchedWrite sched,
1922                               AVX512VLVectorVTInfo VTInfo,
1923                               AVX512VLVectorVTInfo ShuffleMask> {
1924  defm NAME: avx512_perm_t<opc, OpcodeStr, sched, VTInfo.info512,
1925                              ShuffleMask.info512>,
1926            avx512_perm_t_mb<opc, OpcodeStr, sched, VTInfo.info512,
1927                              ShuffleMask.info512>, EVEX_V512;
1928  let Predicates = [HasVLX] in {
1929  defm NAME#128: avx512_perm_t<opc, OpcodeStr, sched, VTInfo.info128,
1930                              ShuffleMask.info128>,
1931                 avx512_perm_t_mb<opc, OpcodeStr, sched, VTInfo.info128,
1932                              ShuffleMask.info128>, EVEX_V128;
1933  defm NAME#256: avx512_perm_t<opc, OpcodeStr, sched, VTInfo.info256,
1934                              ShuffleMask.info256>,
1935                 avx512_perm_t_mb<opc, OpcodeStr, sched, VTInfo.info256,
1936                              ShuffleMask.info256>, EVEX_V256;
1937  }
1938}
1939
1940multiclass avx512_perm_t_sizes_bw<bits<8> opc, string OpcodeStr,
1941                                  X86FoldableSchedWrite sched,
1942                                  AVX512VLVectorVTInfo VTInfo,
1943                                  AVX512VLVectorVTInfo Idx, Predicate Prd> {
1944  let Predicates = [Prd] in
1945  defm NAME: avx512_perm_t<opc, OpcodeStr, sched, VTInfo.info512,
1946                           Idx.info512>, EVEX_V512;
1947  let Predicates = [Prd, HasVLX] in {
1948  defm NAME#128: avx512_perm_t<opc, OpcodeStr, sched, VTInfo.info128,
1949                               Idx.info128>, EVEX_V128;
1950  defm NAME#256: avx512_perm_t<opc, OpcodeStr, sched, VTInfo.info256,
1951                               Idx.info256>, EVEX_V256;
1952  }
1953}
1954
1955defm VPERMT2D  : avx512_perm_t_sizes<0x7E, "vpermt2d", WriteVarShuffle256,
1956                  avx512vl_i32_info, avx512vl_i32_info>, EVEX_CD8<32, CD8VF>;
1957defm VPERMT2Q  : avx512_perm_t_sizes<0x7E, "vpermt2q", WriteVarShuffle256,
1958                  avx512vl_i64_info, avx512vl_i64_info>, VEX_W, EVEX_CD8<64, CD8VF>;
1959defm VPERMT2W  : avx512_perm_t_sizes_bw<0x7D, "vpermt2w", WriteVarShuffle256,
1960                  avx512vl_i16_info, avx512vl_i16_info, HasBWI>,
1961                  VEX_W, EVEX_CD8<16, CD8VF>;
1962defm VPERMT2B  : avx512_perm_t_sizes_bw<0x7D, "vpermt2b", WriteVarShuffle256,
1963                  avx512vl_i8_info, avx512vl_i8_info, HasVBMI>,
1964                  EVEX_CD8<8, CD8VF>;
1965defm VPERMT2PS : avx512_perm_t_sizes<0x7F, "vpermt2ps", WriteFVarShuffle256,
1966                  avx512vl_f32_info, avx512vl_i32_info>, EVEX_CD8<32, CD8VF>;
1967defm VPERMT2PD : avx512_perm_t_sizes<0x7F, "vpermt2pd", WriteFVarShuffle256,
1968                  avx512vl_f64_info, avx512vl_i64_info>, VEX_W, EVEX_CD8<64, CD8VF>;
1969
1970//===----------------------------------------------------------------------===//
1971// AVX-512 - BLEND using mask
1972//
1973
1974multiclass WriteFVarBlendask<bits<8> opc, string OpcodeStr,
1975                             X86FoldableSchedWrite sched, X86VectorVTInfo _> {
1976  let ExeDomain = _.ExeDomain, hasSideEffects = 0 in {
1977  def rr : AVX5128I<opc, MRMSrcReg, (outs _.RC:$dst),
1978             (ins _.RC:$src1, _.RC:$src2),
1979             !strconcat(OpcodeStr,
1980             "\t{$src2, $src1, ${dst}|${dst}, $src1, $src2}"), []>,
1981             EVEX_4V, Sched<[sched]>;
1982  def rrk : AVX5128I<opc, MRMSrcReg, (outs _.RC:$dst),
1983             (ins _.KRCWM:$mask, _.RC:$src1, _.RC:$src2),
1984             !strconcat(OpcodeStr,
1985             "\t{$src2, $src1, ${dst} {${mask}}|${dst} {${mask}}, $src1, $src2}"),
1986             []>, EVEX_4V, EVEX_K, Sched<[sched]>;
1987  def rrkz : AVX5128I<opc, MRMSrcReg, (outs _.RC:$dst),
1988             (ins _.KRCWM:$mask, _.RC:$src1, _.RC:$src2),
1989             !strconcat(OpcodeStr,
1990             "\t{$src2, $src1, ${dst} {${mask}} {z}|${dst} {${mask}} {z}, $src1, $src2}"),
1991             []>, EVEX_4V, EVEX_KZ, Sched<[sched]>, NotMemoryFoldable;
1992  let mayLoad = 1 in {
1993  def rm  : AVX5128I<opc, MRMSrcMem, (outs _.RC:$dst),
1994             (ins _.RC:$src1, _.MemOp:$src2),
1995             !strconcat(OpcodeStr,
1996             "\t{$src2, $src1, ${dst}|${dst}, $src1, $src2}"),
1997             []>, EVEX_4V, EVEX_CD8<_.EltSize, CD8VF>,
1998             Sched<[sched.Folded, sched.ReadAfterFold]>;
1999  def rmk : AVX5128I<opc, MRMSrcMem, (outs _.RC:$dst),
2000             (ins _.KRCWM:$mask, _.RC:$src1, _.MemOp:$src2),
2001             !strconcat(OpcodeStr,
2002             "\t{$src2, $src1, ${dst} {${mask}}|${dst} {${mask}}, $src1, $src2}"),
2003             []>, EVEX_4V, EVEX_K, EVEX_CD8<_.EltSize, CD8VF>,
2004             Sched<[sched.Folded, sched.ReadAfterFold]>;
2005  def rmkz : AVX5128I<opc, MRMSrcMem, (outs _.RC:$dst),
2006             (ins _.KRCWM:$mask, _.RC:$src1, _.MemOp:$src2),
2007             !strconcat(OpcodeStr,
2008             "\t{$src2, $src1, ${dst} {${mask}} {z}|${dst} {${mask}} {z}, $src1, $src2}"),
2009             []>, EVEX_4V, EVEX_KZ, EVEX_CD8<_.EltSize, CD8VF>,
2010             Sched<[sched.Folded, sched.ReadAfterFold]>, NotMemoryFoldable;
2011  }
2012  }
2013}
2014multiclass WriteFVarBlendask_rmb<bits<8> opc, string OpcodeStr,
2015                                 X86FoldableSchedWrite sched, X86VectorVTInfo _> {
2016  let ExeDomain = _.ExeDomain, mayLoad = 1, hasSideEffects = 0 in {
2017  def rmbk : AVX5128I<opc, MRMSrcMem, (outs _.RC:$dst),
2018      (ins _.KRCWM:$mask, _.RC:$src1, _.ScalarMemOp:$src2),
2019       !strconcat(OpcodeStr,
2020            "\t{${src2}", _.BroadcastStr, ", $src1, $dst {${mask}}|",
2021            "$dst {${mask}}, $src1, ${src2}", _.BroadcastStr, "}"), []>,
2022      EVEX_4V, EVEX_K, EVEX_B, EVEX_CD8<_.EltSize, CD8VF>,
2023      Sched<[sched.Folded, sched.ReadAfterFold]>;
2024
2025  def rmbkz : AVX5128I<opc, MRMSrcMem, (outs _.RC:$dst),
2026      (ins _.KRCWM:$mask, _.RC:$src1, _.ScalarMemOp:$src2),
2027       !strconcat(OpcodeStr,
2028            "\t{${src2}", _.BroadcastStr, ", $src1, $dst {${mask}} {z}|",
2029            "$dst {${mask}} {z}, $src1, ${src2}", _.BroadcastStr, "}"), []>,
2030      EVEX_4V, EVEX_KZ, EVEX_B, EVEX_CD8<_.EltSize, CD8VF>,
2031      Sched<[sched.Folded, sched.ReadAfterFold]>, NotMemoryFoldable;
2032
2033  def rmb : AVX5128I<opc, MRMSrcMem, (outs _.RC:$dst),
2034      (ins _.RC:$src1, _.ScalarMemOp:$src2),
2035       !strconcat(OpcodeStr,
2036            "\t{${src2}", _.BroadcastStr, ", $src1, $dst|",
2037            "$dst, $src1, ${src2}", _.BroadcastStr, "}"), []>,
2038      EVEX_4V, EVEX_B, EVEX_CD8<_.EltSize, CD8VF>,
2039      Sched<[sched.Folded, sched.ReadAfterFold]>;
2040  }
2041}
2042
2043multiclass blendmask_dq<bits<8> opc, string OpcodeStr, X86SchedWriteWidths sched,
2044                        AVX512VLVectorVTInfo VTInfo> {
2045  defm Z : WriteFVarBlendask<opc, OpcodeStr, sched.ZMM, VTInfo.info512>,
2046           WriteFVarBlendask_rmb<opc, OpcodeStr, sched.ZMM, VTInfo.info512>,
2047                                 EVEX_V512;
2048
2049  let Predicates = [HasVLX] in {
2050    defm Z256 : WriteFVarBlendask<opc, OpcodeStr, sched.YMM, VTInfo.info256>,
2051                WriteFVarBlendask_rmb<opc, OpcodeStr, sched.YMM, VTInfo.info256>,
2052                                      EVEX_V256;
2053    defm Z128 : WriteFVarBlendask<opc, OpcodeStr, sched.XMM, VTInfo.info128>,
2054                WriteFVarBlendask_rmb<opc, OpcodeStr, sched.XMM, VTInfo.info128>,
2055                                      EVEX_V128;
2056  }
2057}
2058
2059multiclass blendmask_bw<bits<8> opc, string OpcodeStr, X86SchedWriteWidths sched,
2060                        AVX512VLVectorVTInfo VTInfo> {
2061  let Predicates = [HasBWI] in
2062    defm Z : WriteFVarBlendask<opc, OpcodeStr, sched.ZMM, VTInfo.info512>,
2063                               EVEX_V512;
2064
2065  let Predicates = [HasBWI, HasVLX] in {
2066    defm Z256 : WriteFVarBlendask<opc, OpcodeStr, sched.YMM, VTInfo.info256>,
2067                                  EVEX_V256;
2068    defm Z128 : WriteFVarBlendask<opc, OpcodeStr, sched.XMM, VTInfo.info128>,
2069                                  EVEX_V128;
2070  }
2071}
2072
2073defm VBLENDMPS : blendmask_dq<0x65, "vblendmps", SchedWriteFVarBlend,
2074                              avx512vl_f32_info>;
2075defm VBLENDMPD : blendmask_dq<0x65, "vblendmpd", SchedWriteFVarBlend,
2076                              avx512vl_f64_info>, VEX_W;
2077defm VPBLENDMD : blendmask_dq<0x64, "vpblendmd", SchedWriteVarBlend,
2078                              avx512vl_i32_info>;
2079defm VPBLENDMQ : blendmask_dq<0x64, "vpblendmq", SchedWriteVarBlend,
2080                              avx512vl_i64_info>, VEX_W;
2081defm VPBLENDMB : blendmask_bw<0x66, "vpblendmb", SchedWriteVarBlend,
2082                              avx512vl_i8_info>;
2083defm VPBLENDMW : blendmask_bw<0x66, "vpblendmw", SchedWriteVarBlend,
2084                              avx512vl_i16_info>, VEX_W;
2085
2086//===----------------------------------------------------------------------===//
2087// Compare Instructions
2088//===----------------------------------------------------------------------===//
2089
2090// avx512_cmp_scalar - AVX512 CMPSS and CMPSD
2091
2092multiclass avx512_cmp_scalar<X86VectorVTInfo _, SDNode OpNode, SDNode OpNodeSAE,
2093                             PatFrag OpNode_su, PatFrag OpNodeSAE_su,
2094                             X86FoldableSchedWrite sched> {
2095  defm  rr_Int  : AVX512_maskable_cmp<0xC2, MRMSrcReg, _,
2096                      (outs _.KRC:$dst),
2097                      (ins _.RC:$src1, _.RC:$src2, u8imm:$cc),
2098                      "vcmp"#_.Suffix,
2099                      "$cc, $src2, $src1", "$src1, $src2, $cc",
2100                      (OpNode (_.VT _.RC:$src1), (_.VT _.RC:$src2), timm:$cc),
2101                      (OpNode_su (_.VT _.RC:$src1), (_.VT _.RC:$src2),
2102                                 timm:$cc)>, EVEX_4V, VEX_LIG, Sched<[sched]>, SIMD_EXC;
2103  let mayLoad = 1 in
2104  defm  rm_Int  : AVX512_maskable_cmp<0xC2, MRMSrcMem, _,
2105                    (outs _.KRC:$dst),
2106                    (ins _.RC:$src1, _.IntScalarMemOp:$src2, u8imm:$cc),
2107                    "vcmp"#_.Suffix,
2108                    "$cc, $src2, $src1", "$src1, $src2, $cc",
2109                    (OpNode (_.VT _.RC:$src1), (_.ScalarIntMemFrags addr:$src2),
2110                        timm:$cc),
2111                    (OpNode_su (_.VT _.RC:$src1), (_.ScalarIntMemFrags addr:$src2),
2112                        timm:$cc)>, EVEX_4V, VEX_LIG, EVEX_CD8<_.EltSize, CD8VT1>,
2113                    Sched<[sched.Folded, sched.ReadAfterFold]>, SIMD_EXC;
2114
2115  let Uses = [MXCSR] in
2116  defm  rrb_Int  : AVX512_maskable_cmp<0xC2, MRMSrcReg, _,
2117                     (outs _.KRC:$dst),
2118                     (ins _.RC:$src1, _.RC:$src2, u8imm:$cc),
2119                     "vcmp"#_.Suffix,
2120                     "$cc, {sae}, $src2, $src1","$src1, $src2, {sae}, $cc",
2121                     (OpNodeSAE (_.VT _.RC:$src1), (_.VT _.RC:$src2),
2122                                timm:$cc),
2123                     (OpNodeSAE_su (_.VT _.RC:$src1), (_.VT _.RC:$src2),
2124                                   timm:$cc)>,
2125                     EVEX_4V, VEX_LIG, EVEX_B, Sched<[sched]>;
2126
2127  let isCodeGenOnly = 1 in {
2128    let isCommutable = 1 in
2129    def rr : AVX512Ii8<0xC2, MRMSrcReg,
2130                (outs _.KRC:$dst), (ins _.FRC:$src1, _.FRC:$src2, u8imm:$cc),
2131                !strconcat("vcmp", _.Suffix,
2132                           "\t{$cc, $src2, $src1, $dst|$dst, $src1, $src2, $cc}"),
2133                [(set _.KRC:$dst, (OpNode _.FRC:$src1,
2134                                          _.FRC:$src2,
2135                                          timm:$cc))]>,
2136                EVEX_4V, VEX_LIG, Sched<[sched]>, SIMD_EXC;
2137    def rm : AVX512Ii8<0xC2, MRMSrcMem,
2138              (outs _.KRC:$dst),
2139              (ins _.FRC:$src1, _.ScalarMemOp:$src2, u8imm:$cc),
2140              !strconcat("vcmp", _.Suffix,
2141                         "\t{$cc, $src2, $src1, $dst|$dst, $src1, $src2, $cc}"),
2142              [(set _.KRC:$dst, (OpNode _.FRC:$src1,
2143                                        (_.ScalarLdFrag addr:$src2),
2144                                        timm:$cc))]>,
2145              EVEX_4V, VEX_LIG, EVEX_CD8<_.EltSize, CD8VT1>,
2146              Sched<[sched.Folded, sched.ReadAfterFold]>, SIMD_EXC;
2147  }
2148}
2149
2150def X86cmpms_su : PatFrag<(ops node:$src1, node:$src2, node:$cc),
2151                          (X86cmpms node:$src1, node:$src2, node:$cc), [{
2152  return N->hasOneUse();
2153}]>;
2154def X86cmpmsSAE_su : PatFrag<(ops node:$src1, node:$src2, node:$cc),
2155                          (X86cmpmsSAE node:$src1, node:$src2, node:$cc), [{
2156  return N->hasOneUse();
2157}]>;
2158
2159let Predicates = [HasAVX512] in {
2160  let ExeDomain = SSEPackedSingle in
2161  defm VCMPSSZ : avx512_cmp_scalar<f32x_info, X86cmpms, X86cmpmsSAE,
2162                                   X86cmpms_su, X86cmpmsSAE_su,
2163                                   SchedWriteFCmp.Scl>, AVX512XSIi8Base;
2164  let ExeDomain = SSEPackedDouble in
2165  defm VCMPSDZ : avx512_cmp_scalar<f64x_info, X86cmpms, X86cmpmsSAE,
2166                                   X86cmpms_su, X86cmpmsSAE_su,
2167                                   SchedWriteFCmp.Scl>, AVX512XDIi8Base, VEX_W;
2168}
2169let Predicates = [HasFP16], ExeDomain = SSEPackedSingle in
2170  defm VCMPSHZ : avx512_cmp_scalar<f16x_info, X86cmpms, X86cmpmsSAE,
2171                                   X86cmpms_su, X86cmpmsSAE_su,
2172                                   SchedWriteFCmp.Scl>, AVX512XSIi8Base, TA;
2173
2174multiclass avx512_icmp_packed<bits<8> opc, string OpcodeStr,
2175                              X86FoldableSchedWrite sched,
2176                              X86VectorVTInfo _, bit IsCommutable> {
2177  let isCommutable = IsCommutable, hasSideEffects = 0 in
2178  def rr : AVX512BI<opc, MRMSrcReg,
2179             (outs _.KRC:$dst), (ins _.RC:$src1, _.RC:$src2),
2180             !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
2181             []>, EVEX_4V, Sched<[sched]>;
2182  let mayLoad = 1, hasSideEffects = 0 in
2183  def rm : AVX512BI<opc, MRMSrcMem,
2184             (outs _.KRC:$dst), (ins _.RC:$src1, _.MemOp:$src2),
2185             !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
2186             []>, EVEX_4V, Sched<[sched.Folded, sched.ReadAfterFold]>;
2187  let isCommutable = IsCommutable, hasSideEffects = 0 in
2188  def rrk : AVX512BI<opc, MRMSrcReg,
2189              (outs _.KRC:$dst), (ins _.KRCWM:$mask, _.RC:$src1, _.RC:$src2),
2190              !strconcat(OpcodeStr, "\t{$src2, $src1, $dst {${mask}}|",
2191                          "$dst {${mask}}, $src1, $src2}"),
2192              []>, EVEX_4V, EVEX_K, Sched<[sched]>;
2193  let mayLoad = 1, hasSideEffects = 0 in
2194  def rmk : AVX512BI<opc, MRMSrcMem,
2195              (outs _.KRC:$dst), (ins _.KRCWM:$mask, _.RC:$src1, _.MemOp:$src2),
2196              !strconcat(OpcodeStr, "\t{$src2, $src1, $dst {${mask}}|",
2197                          "$dst {${mask}}, $src1, $src2}"),
2198              []>, EVEX_4V, EVEX_K, Sched<[sched.Folded, sched.ReadAfterFold]>;
2199}
2200
2201multiclass avx512_icmp_packed_rmb<bits<8> opc, string OpcodeStr,
2202                                  X86FoldableSchedWrite sched, X86VectorVTInfo _,
2203                                  bit IsCommutable> :
2204           avx512_icmp_packed<opc, OpcodeStr, sched, _, IsCommutable> {
2205  let mayLoad = 1, hasSideEffects = 0 in {
2206  def rmb : AVX512BI<opc, MRMSrcMem,
2207              (outs _.KRC:$dst), (ins _.RC:$src1, _.ScalarMemOp:$src2),
2208              !strconcat(OpcodeStr, "\t{${src2}", _.BroadcastStr, ", $src1, $dst",
2209                                    "|$dst, $src1, ${src2}", _.BroadcastStr, "}"),
2210              []>, EVEX_4V, EVEX_B, Sched<[sched.Folded, sched.ReadAfterFold]>;
2211  def rmbk : AVX512BI<opc, MRMSrcMem,
2212               (outs _.KRC:$dst), (ins _.KRCWM:$mask, _.RC:$src1,
2213                                       _.ScalarMemOp:$src2),
2214               !strconcat(OpcodeStr,
2215                          "\t{${src2}", _.BroadcastStr, ", $src1, $dst {${mask}}|",
2216                          "$dst {${mask}}, $src1, ${src2}", _.BroadcastStr, "}"),
2217               []>, EVEX_4V, EVEX_K, EVEX_B,
2218               Sched<[sched.Folded, sched.ReadAfterFold]>;
2219  }
2220}
2221
2222multiclass avx512_icmp_packed_vl<bits<8> opc, string OpcodeStr,
2223                                 X86SchedWriteWidths sched,
2224                                 AVX512VLVectorVTInfo VTInfo, Predicate prd,
2225                                 bit IsCommutable = 0> {
2226  let Predicates = [prd] in
2227  defm Z : avx512_icmp_packed<opc, OpcodeStr, sched.ZMM,
2228                              VTInfo.info512, IsCommutable>, EVEX_V512;
2229
2230  let Predicates = [prd, HasVLX] in {
2231    defm Z256 : avx512_icmp_packed<opc, OpcodeStr, sched.YMM,
2232                                   VTInfo.info256, IsCommutable>, EVEX_V256;
2233    defm Z128 : avx512_icmp_packed<opc, OpcodeStr, sched.XMM,
2234                                   VTInfo.info128, IsCommutable>, EVEX_V128;
2235  }
2236}
2237
2238multiclass avx512_icmp_packed_rmb_vl<bits<8> opc, string OpcodeStr,
2239                                     X86SchedWriteWidths sched,
2240                                     AVX512VLVectorVTInfo VTInfo,
2241                                     Predicate prd, bit IsCommutable = 0> {
2242  let Predicates = [prd] in
2243  defm Z : avx512_icmp_packed_rmb<opc, OpcodeStr, sched.ZMM,
2244                                  VTInfo.info512, IsCommutable>, EVEX_V512;
2245
2246  let Predicates = [prd, HasVLX] in {
2247    defm Z256 : avx512_icmp_packed_rmb<opc, OpcodeStr, sched.YMM,
2248                                       VTInfo.info256, IsCommutable>, EVEX_V256;
2249    defm Z128 : avx512_icmp_packed_rmb<opc, OpcodeStr, sched.XMM,
2250                                       VTInfo.info128, IsCommutable>, EVEX_V128;
2251  }
2252}
2253
2254// This fragment treats X86cmpm as commutable to help match loads in both
2255// operands for PCMPEQ.
2256def X86setcc_commute : SDNode<"ISD::SETCC", SDTSetCC, [SDNPCommutative]>;
2257def X86pcmpgtm : PatFrag<(ops node:$src1, node:$src2),
2258                         (setcc node:$src1, node:$src2, SETGT)>;
2259
2260// AddedComplexity is needed because the explicit SETEQ/SETGT CondCode doesn't
2261// increase the pattern complexity the way an immediate would.
2262let AddedComplexity = 2 in {
2263// FIXME: Is there a better scheduler class for VPCMP?
2264defm VPCMPEQB : avx512_icmp_packed_vl<0x74, "vpcmpeqb",
2265                      SchedWriteVecALU, avx512vl_i8_info, HasBWI, 1>,
2266                EVEX_CD8<8, CD8VF>, VEX_WIG;
2267
2268defm VPCMPEQW : avx512_icmp_packed_vl<0x75, "vpcmpeqw",
2269                      SchedWriteVecALU, avx512vl_i16_info, HasBWI, 1>,
2270                EVEX_CD8<16, CD8VF>, VEX_WIG;
2271
2272defm VPCMPEQD : avx512_icmp_packed_rmb_vl<0x76, "vpcmpeqd",
2273                      SchedWriteVecALU, avx512vl_i32_info, HasAVX512, 1>,
2274                EVEX_CD8<32, CD8VF>;
2275
2276defm VPCMPEQQ : avx512_icmp_packed_rmb_vl<0x29, "vpcmpeqq",
2277                      SchedWriteVecALU, avx512vl_i64_info, HasAVX512, 1>,
2278                T8PD, VEX_W, EVEX_CD8<64, CD8VF>;
2279
2280defm VPCMPGTB : avx512_icmp_packed_vl<0x64, "vpcmpgtb",
2281                      SchedWriteVecALU, avx512vl_i8_info, HasBWI>,
2282                EVEX_CD8<8, CD8VF>, VEX_WIG;
2283
2284defm VPCMPGTW : avx512_icmp_packed_vl<0x65, "vpcmpgtw",
2285                      SchedWriteVecALU, avx512vl_i16_info, HasBWI>,
2286                EVEX_CD8<16, CD8VF>, VEX_WIG;
2287
2288defm VPCMPGTD : avx512_icmp_packed_rmb_vl<0x66, "vpcmpgtd",
2289                      SchedWriteVecALU, avx512vl_i32_info, HasAVX512>,
2290                EVEX_CD8<32, CD8VF>;
2291
2292defm VPCMPGTQ : avx512_icmp_packed_rmb_vl<0x37, "vpcmpgtq",
2293                      SchedWriteVecALU, avx512vl_i64_info, HasAVX512>,
2294                T8PD, VEX_W, EVEX_CD8<64, CD8VF>;
2295}
2296
2297def X86pcmpm_imm : SDNodeXForm<setcc, [{
2298  ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(2))->get();
2299  uint8_t SSECC = X86::getVPCMPImmForCond(CC);
2300  return getI8Imm(SSECC, SDLoc(N));
2301}]>;
2302
2303// Swapped operand version of the above.
2304def X86pcmpm_imm_commute : SDNodeXForm<setcc, [{
2305  ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(2))->get();
2306  uint8_t SSECC = X86::getVPCMPImmForCond(CC);
2307  SSECC = X86::getSwappedVPCMPImm(SSECC);
2308  return getI8Imm(SSECC, SDLoc(N));
2309}]>;
2310
2311multiclass avx512_icmp_cc<bits<8> opc, string Suffix, PatFrag Frag,
2312                          PatFrag Frag_su,
2313                          X86FoldableSchedWrite sched,
2314                          X86VectorVTInfo _, string Name> {
2315  let isCommutable = 1 in
2316  def rri : AVX512AIi8<opc, MRMSrcReg,
2317             (outs _.KRC:$dst), (ins _.RC:$src1, _.RC:$src2, u8imm:$cc),
2318             !strconcat("vpcmp", Suffix,
2319                        "\t{$cc, $src2, $src1, $dst|$dst, $src1, $src2, $cc}"),
2320             [(set _.KRC:$dst, (_.KVT (Frag:$cc (_.VT _.RC:$src1),
2321                                                (_.VT _.RC:$src2),
2322                                                cond)))]>,
2323             EVEX_4V, Sched<[sched]>;
2324  def rmi : AVX512AIi8<opc, MRMSrcMem,
2325             (outs _.KRC:$dst), (ins _.RC:$src1, _.MemOp:$src2, u8imm:$cc),
2326             !strconcat("vpcmp", Suffix,
2327                        "\t{$cc, $src2, $src1, $dst|$dst, $src1, $src2, $cc}"),
2328             [(set _.KRC:$dst, (_.KVT
2329                                (Frag:$cc
2330                                 (_.VT _.RC:$src1),
2331                                 (_.VT (_.LdFrag addr:$src2)),
2332                                 cond)))]>,
2333             EVEX_4V, Sched<[sched.Folded, sched.ReadAfterFold]>;
2334  let isCommutable = 1 in
2335  def rrik : AVX512AIi8<opc, MRMSrcReg,
2336              (outs _.KRC:$dst), (ins _.KRCWM:$mask, _.RC:$src1, _.RC:$src2,
2337                                      u8imm:$cc),
2338              !strconcat("vpcmp", Suffix,
2339                         "\t{$cc, $src2, $src1, $dst {${mask}}|",
2340                         "$dst {${mask}}, $src1, $src2, $cc}"),
2341              [(set _.KRC:$dst, (and _.KRCWM:$mask,
2342                                     (_.KVT (Frag_su:$cc (_.VT _.RC:$src1),
2343                                                         (_.VT _.RC:$src2),
2344                                                         cond))))]>,
2345              EVEX_4V, EVEX_K, Sched<[sched]>;
2346  def rmik : AVX512AIi8<opc, MRMSrcMem,
2347              (outs _.KRC:$dst), (ins _.KRCWM:$mask, _.RC:$src1, _.MemOp:$src2,
2348                                    u8imm:$cc),
2349              !strconcat("vpcmp", Suffix,
2350                         "\t{$cc, $src2, $src1, $dst {${mask}}|",
2351                         "$dst {${mask}}, $src1, $src2, $cc}"),
2352              [(set _.KRC:$dst, (and _.KRCWM:$mask,
2353                                     (_.KVT
2354                                      (Frag_su:$cc
2355                                       (_.VT _.RC:$src1),
2356                                       (_.VT (_.LdFrag addr:$src2)),
2357                                       cond))))]>,
2358              EVEX_4V, EVEX_K, Sched<[sched.Folded, sched.ReadAfterFold]>;
2359
2360  def : Pat<(_.KVT (Frag:$cc (_.LdFrag addr:$src2),
2361                             (_.VT _.RC:$src1), cond)),
2362            (!cast<Instruction>(Name#_.ZSuffix#"rmi")
2363             _.RC:$src1, addr:$src2, (X86pcmpm_imm_commute $cc))>;
2364
2365  def : Pat<(and _.KRCWM:$mask,
2366                 (_.KVT (Frag_su:$cc (_.LdFrag addr:$src2),
2367                                     (_.VT _.RC:$src1), cond))),
2368            (!cast<Instruction>(Name#_.ZSuffix#"rmik")
2369             _.KRCWM:$mask, _.RC:$src1, addr:$src2,
2370             (X86pcmpm_imm_commute $cc))>;
2371}
2372
2373multiclass avx512_icmp_cc_rmb<bits<8> opc, string Suffix, PatFrag Frag,
2374                              PatFrag Frag_su, X86FoldableSchedWrite sched,
2375                              X86VectorVTInfo _, string Name> :
2376           avx512_icmp_cc<opc, Suffix, Frag, Frag_su, sched, _, Name> {
2377  def rmib : AVX512AIi8<opc, MRMSrcMem,
2378             (outs _.KRC:$dst), (ins _.RC:$src1, _.ScalarMemOp:$src2,
2379                                     u8imm:$cc),
2380             !strconcat("vpcmp", Suffix,
2381                        "\t{$cc, ${src2}", _.BroadcastStr, ", $src1, $dst|",
2382                        "$dst, $src1, ${src2}", _.BroadcastStr, ", $cc}"),
2383             [(set _.KRC:$dst, (_.KVT (Frag:$cc
2384                                       (_.VT _.RC:$src1),
2385                                       (_.BroadcastLdFrag addr:$src2),
2386                                       cond)))]>,
2387             EVEX_4V, EVEX_B, Sched<[sched.Folded, sched.ReadAfterFold]>;
2388  def rmibk : AVX512AIi8<opc, MRMSrcMem,
2389              (outs _.KRC:$dst), (ins _.KRCWM:$mask, _.RC:$src1,
2390                                       _.ScalarMemOp:$src2, u8imm:$cc),
2391              !strconcat("vpcmp", Suffix,
2392                  "\t{$cc, ${src2}", _.BroadcastStr, ", $src1, $dst {${mask}}|",
2393                  "$dst {${mask}}, $src1, ${src2}", _.BroadcastStr, ", $cc}"),
2394              [(set _.KRC:$dst, (and _.KRCWM:$mask,
2395                                     (_.KVT (Frag_su:$cc
2396                                             (_.VT _.RC:$src1),
2397                                             (_.BroadcastLdFrag addr:$src2),
2398                                             cond))))]>,
2399              EVEX_4V, EVEX_K, EVEX_B, Sched<[sched.Folded, sched.ReadAfterFold]>;
2400
2401  def : Pat<(_.KVT (Frag:$cc (_.BroadcastLdFrag addr:$src2),
2402                    (_.VT _.RC:$src1), cond)),
2403            (!cast<Instruction>(Name#_.ZSuffix#"rmib")
2404             _.RC:$src1, addr:$src2, (X86pcmpm_imm_commute $cc))>;
2405
2406  def : Pat<(and _.KRCWM:$mask,
2407                 (_.KVT (Frag_su:$cc (_.BroadcastLdFrag addr:$src2),
2408                                     (_.VT _.RC:$src1), cond))),
2409            (!cast<Instruction>(Name#_.ZSuffix#"rmibk")
2410             _.KRCWM:$mask, _.RC:$src1, addr:$src2,
2411             (X86pcmpm_imm_commute $cc))>;
2412}
2413
2414multiclass avx512_icmp_cc_vl<bits<8> opc, string Suffix, PatFrag Frag,
2415                             PatFrag Frag_su, X86SchedWriteWidths sched,
2416                             AVX512VLVectorVTInfo VTInfo, Predicate prd> {
2417  let Predicates = [prd] in
2418  defm Z : avx512_icmp_cc<opc, Suffix, Frag, Frag_su,
2419                          sched.ZMM, VTInfo.info512, NAME>, EVEX_V512;
2420
2421  let Predicates = [prd, HasVLX] in {
2422    defm Z256 : avx512_icmp_cc<opc, Suffix, Frag, Frag_su,
2423                               sched.YMM, VTInfo.info256, NAME>, EVEX_V256;
2424    defm Z128 : avx512_icmp_cc<opc, Suffix, Frag, Frag_su,
2425                               sched.XMM, VTInfo.info128, NAME>, EVEX_V128;
2426  }
2427}
2428
2429multiclass avx512_icmp_cc_rmb_vl<bits<8> opc, string Suffix, PatFrag Frag,
2430                                 PatFrag Frag_su, X86SchedWriteWidths sched,
2431                                 AVX512VLVectorVTInfo VTInfo, Predicate prd> {
2432  let Predicates = [prd] in
2433  defm Z : avx512_icmp_cc_rmb<opc, Suffix, Frag, Frag_su,
2434                              sched.ZMM, VTInfo.info512, NAME>, EVEX_V512;
2435
2436  let Predicates = [prd, HasVLX] in {
2437    defm Z256 : avx512_icmp_cc_rmb<opc, Suffix, Frag, Frag_su,
2438                                   sched.YMM, VTInfo.info256, NAME>, EVEX_V256;
2439    defm Z128 : avx512_icmp_cc_rmb<opc, Suffix, Frag, Frag_su,
2440                                   sched.XMM, VTInfo.info128, NAME>, EVEX_V128;
2441  }
2442}
2443
2444def X86pcmpm : PatFrag<(ops node:$src1, node:$src2, node:$cc),
2445                       (setcc node:$src1, node:$src2, node:$cc), [{
2446  ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(2))->get();
2447  return !ISD::isUnsignedIntSetCC(CC);
2448}], X86pcmpm_imm>;
2449
2450def X86pcmpm_su : PatFrag<(ops node:$src1, node:$src2, node:$cc),
2451                          (setcc node:$src1, node:$src2, node:$cc), [{
2452  ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(2))->get();
2453  return N->hasOneUse() && !ISD::isUnsignedIntSetCC(CC);
2454}], X86pcmpm_imm>;
2455
2456def X86pcmpum : PatFrag<(ops node:$src1, node:$src2, node:$cc),
2457                        (setcc node:$src1, node:$src2, node:$cc), [{
2458  ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(2))->get();
2459  return ISD::isUnsignedIntSetCC(CC);
2460}], X86pcmpm_imm>;
2461
2462def X86pcmpum_su : PatFrag<(ops node:$src1, node:$src2, node:$cc),
2463                           (setcc node:$src1, node:$src2, node:$cc), [{
2464  ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(2))->get();
2465  return N->hasOneUse() && ISD::isUnsignedIntSetCC(CC);
2466}], X86pcmpm_imm>;
2467
2468// FIXME: Is there a better scheduler class for VPCMP/VPCMPU?
2469defm VPCMPB : avx512_icmp_cc_vl<0x3F, "b", X86pcmpm, X86pcmpm_su,
2470                                SchedWriteVecALU, avx512vl_i8_info, HasBWI>,
2471                                EVEX_CD8<8, CD8VF>;
2472defm VPCMPUB : avx512_icmp_cc_vl<0x3E, "ub", X86pcmpum, X86pcmpum_su,
2473                                 SchedWriteVecALU, avx512vl_i8_info, HasBWI>,
2474                                 EVEX_CD8<8, CD8VF>;
2475
2476defm VPCMPW : avx512_icmp_cc_vl<0x3F, "w", X86pcmpm, X86pcmpm_su,
2477                                SchedWriteVecALU, avx512vl_i16_info, HasBWI>,
2478                                VEX_W, EVEX_CD8<16, CD8VF>;
2479defm VPCMPUW : avx512_icmp_cc_vl<0x3E, "uw", X86pcmpum, X86pcmpum_su,
2480                                 SchedWriteVecALU, avx512vl_i16_info, HasBWI>,
2481                                 VEX_W, EVEX_CD8<16, CD8VF>;
2482
2483defm VPCMPD : avx512_icmp_cc_rmb_vl<0x1F, "d", X86pcmpm, X86pcmpm_su,
2484                                    SchedWriteVecALU, avx512vl_i32_info,
2485                                    HasAVX512>, EVEX_CD8<32, CD8VF>;
2486defm VPCMPUD : avx512_icmp_cc_rmb_vl<0x1E, "ud", X86pcmpum, X86pcmpum_su,
2487                                     SchedWriteVecALU, avx512vl_i32_info,
2488                                     HasAVX512>, EVEX_CD8<32, CD8VF>;
2489
2490defm VPCMPQ : avx512_icmp_cc_rmb_vl<0x1F, "q", X86pcmpm, X86pcmpm_su,
2491                                    SchedWriteVecALU, avx512vl_i64_info,
2492                                    HasAVX512>, VEX_W, EVEX_CD8<64, CD8VF>;
2493defm VPCMPUQ : avx512_icmp_cc_rmb_vl<0x1E, "uq", X86pcmpum, X86pcmpum_su,
2494                                     SchedWriteVecALU, avx512vl_i64_info,
2495                                     HasAVX512>, VEX_W, EVEX_CD8<64, CD8VF>;
2496
2497def X86cmpm_su : PatFrag<(ops node:$src1, node:$src2, node:$cc),
2498                         (X86cmpm node:$src1, node:$src2, node:$cc), [{
2499  return N->hasOneUse();
2500}]>;
2501
2502def X86cmpm_imm_commute : SDNodeXForm<timm, [{
2503  uint8_t Imm = X86::getSwappedVCMPImm(N->getZExtValue() & 0x1f);
2504  return getI8Imm(Imm, SDLoc(N));
2505}]>;
2506
2507multiclass avx512_vcmp_common<X86FoldableSchedWrite sched, X86VectorVTInfo _,
2508                              string Name> {
2509let Uses = [MXCSR], mayRaiseFPException = 1 in {
2510  defm  rri  : AVX512_maskable_cmp<0xC2, MRMSrcReg, _,
2511                   (outs _.KRC:$dst), (ins _.RC:$src1, _.RC:$src2,u8imm:$cc),
2512                   "vcmp"#_.Suffix,
2513                   "$cc, $src2, $src1", "$src1, $src2, $cc",
2514                   (X86any_cmpm (_.VT _.RC:$src1), (_.VT _.RC:$src2), timm:$cc),
2515                   (X86cmpm_su (_.VT _.RC:$src1), (_.VT _.RC:$src2), timm:$cc),
2516                   1>, Sched<[sched]>;
2517
2518  defm  rmi  : AVX512_maskable_cmp<0xC2, MRMSrcMem, _,
2519                (outs _.KRC:$dst),(ins _.RC:$src1, _.MemOp:$src2, u8imm:$cc),
2520                "vcmp"#_.Suffix,
2521                "$cc, $src2, $src1", "$src1, $src2, $cc",
2522                (X86any_cmpm (_.VT _.RC:$src1), (_.VT (_.LdFrag addr:$src2)),
2523                             timm:$cc),
2524                (X86cmpm_su (_.VT _.RC:$src1), (_.VT (_.LdFrag addr:$src2)),
2525                            timm:$cc)>,
2526                Sched<[sched.Folded, sched.ReadAfterFold]>;
2527
2528  defm  rmbi : AVX512_maskable_cmp<0xC2, MRMSrcMem, _,
2529                (outs _.KRC:$dst),
2530                (ins _.RC:$src1, _.ScalarMemOp:$src2, u8imm:$cc),
2531                "vcmp"#_.Suffix,
2532                "$cc, ${src2}"#_.BroadcastStr#", $src1",
2533                "$src1, ${src2}"#_.BroadcastStr#", $cc",
2534                (X86any_cmpm (_.VT _.RC:$src1),
2535                             (_.VT (_.BroadcastLdFrag addr:$src2)),
2536                             timm:$cc),
2537                (X86cmpm_su (_.VT _.RC:$src1),
2538                            (_.VT (_.BroadcastLdFrag addr:$src2)),
2539                            timm:$cc)>,
2540                EVEX_B, Sched<[sched.Folded, sched.ReadAfterFold]>;
2541  }
2542
2543  // Patterns for selecting with loads in other operand.
2544  def : Pat<(X86any_cmpm (_.LdFrag addr:$src2), (_.VT _.RC:$src1),
2545                         timm:$cc),
2546            (!cast<Instruction>(Name#_.ZSuffix#"rmi") _.RC:$src1, addr:$src2,
2547                                                      (X86cmpm_imm_commute timm:$cc))>;
2548
2549  def : Pat<(and _.KRCWM:$mask, (X86cmpm_su (_.LdFrag addr:$src2),
2550                                            (_.VT _.RC:$src1),
2551                                            timm:$cc)),
2552            (!cast<Instruction>(Name#_.ZSuffix#"rmik") _.KRCWM:$mask,
2553                                                       _.RC:$src1, addr:$src2,
2554                                                       (X86cmpm_imm_commute timm:$cc))>;
2555
2556  def : Pat<(X86any_cmpm (_.BroadcastLdFrag addr:$src2),
2557                         (_.VT _.RC:$src1), timm:$cc),
2558            (!cast<Instruction>(Name#_.ZSuffix#"rmbi") _.RC:$src1, addr:$src2,
2559                                                       (X86cmpm_imm_commute timm:$cc))>;
2560
2561  def : Pat<(and _.KRCWM:$mask, (X86cmpm_su (_.BroadcastLdFrag addr:$src2),
2562                                            (_.VT _.RC:$src1),
2563                                            timm:$cc)),
2564            (!cast<Instruction>(Name#_.ZSuffix#"rmbik") _.KRCWM:$mask,
2565                                                        _.RC:$src1, addr:$src2,
2566                                                        (X86cmpm_imm_commute timm:$cc))>;
2567
2568  // Patterns for mask intrinsics.
2569  def : Pat<(X86cmpmm (_.VT _.RC:$src1), (_.VT _.RC:$src2), timm:$cc,
2570                      (_.KVT immAllOnesV)),
2571            (!cast<Instruction>(Name#_.ZSuffix#"rri") _.RC:$src1, _.RC:$src2, timm:$cc)>;
2572
2573  def : Pat<(X86cmpmm (_.VT _.RC:$src1), (_.VT _.RC:$src2), timm:$cc, _.KRCWM:$mask),
2574            (!cast<Instruction>(Name#_.ZSuffix#"rrik") _.KRCWM:$mask, _.RC:$src1,
2575                                                       _.RC:$src2, timm:$cc)>;
2576
2577  def : Pat<(X86cmpmm (_.VT _.RC:$src1), (_.VT (_.LdFrag addr:$src2)), timm:$cc,
2578                      (_.KVT immAllOnesV)),
2579            (!cast<Instruction>(Name#_.ZSuffix#"rmi") _.RC:$src1, addr:$src2, timm:$cc)>;
2580
2581  def : Pat<(X86cmpmm (_.VT _.RC:$src1), (_.VT (_.LdFrag addr:$src2)), timm:$cc,
2582                      _.KRCWM:$mask),
2583            (!cast<Instruction>(Name#_.ZSuffix#"rmik") _.KRCWM:$mask, _.RC:$src1,
2584                                                       addr:$src2, timm:$cc)>;
2585
2586  def : Pat<(X86cmpmm (_.VT _.RC:$src1), (_.VT (_.BroadcastLdFrag addr:$src2)), timm:$cc,
2587                      (_.KVT immAllOnesV)),
2588            (!cast<Instruction>(Name#_.ZSuffix#"rmbi") _.RC:$src1, addr:$src2, timm:$cc)>;
2589
2590  def : Pat<(X86cmpmm (_.VT _.RC:$src1), (_.VT (_.BroadcastLdFrag addr:$src2)), timm:$cc,
2591                      _.KRCWM:$mask),
2592            (!cast<Instruction>(Name#_.ZSuffix#"rmbik") _.KRCWM:$mask, _.RC:$src1,
2593                                                        addr:$src2, timm:$cc)>;
2594
2595  // Patterns for mask intrinsics with loads in other operand.
2596  def : Pat<(X86cmpmm (_.VT (_.LdFrag addr:$src2)), (_.VT _.RC:$src1), timm:$cc,
2597                      (_.KVT immAllOnesV)),
2598            (!cast<Instruction>(Name#_.ZSuffix#"rmi") _.RC:$src1, addr:$src2,
2599                                                      (X86cmpm_imm_commute timm:$cc))>;
2600
2601  def : Pat<(X86cmpmm (_.VT (_.LdFrag addr:$src2)), (_.VT _.RC:$src1), timm:$cc,
2602                      _.KRCWM:$mask),
2603            (!cast<Instruction>(Name#_.ZSuffix#"rmik") _.KRCWM:$mask,
2604                                                       _.RC:$src1, addr:$src2,
2605                                                       (X86cmpm_imm_commute timm:$cc))>;
2606
2607  def : Pat<(X86cmpmm (_.VT (_.BroadcastLdFrag addr:$src2)), (_.VT _.RC:$src1), timm:$cc,
2608                      (_.KVT immAllOnesV)),
2609            (!cast<Instruction>(Name#_.ZSuffix#"rmbi") _.RC:$src1, addr:$src2,
2610                                                       (X86cmpm_imm_commute timm:$cc))>;
2611
2612  def : Pat<(X86cmpmm (_.VT (_.BroadcastLdFrag addr:$src2)), (_.VT _.RC:$src1), timm:$cc,
2613                      _.KRCWM:$mask),
2614            (!cast<Instruction>(Name#_.ZSuffix#"rmbik") _.KRCWM:$mask,
2615                                                        _.RC:$src1, addr:$src2,
2616                                                        (X86cmpm_imm_commute  timm:$cc))>;
2617}
2618
2619multiclass avx512_vcmp_sae<X86FoldableSchedWrite sched, X86VectorVTInfo _> {
2620  // comparison code form (VCMP[EQ/LT/LE/...]
2621  let Uses = [MXCSR] in
2622  defm  rrib  : AVX512_maskable_custom_cmp<0xC2, MRMSrcReg, (outs _.KRC:$dst),
2623                     (ins _.RC:$src1, _.RC:$src2, u8imm:$cc),
2624                     (ins _.KRCWM:$mask, _.RC:$src1, _.RC:$src2, u8imm:$cc),
2625                     "vcmp"#_.Suffix,
2626                     "$cc, {sae}, $src2, $src1",
2627                     "$src1, $src2, {sae}, $cc",
2628                     [(set _.KRC:$dst, (X86cmpmmSAE (_.VT _.RC:$src1),
2629                                        (_.VT _.RC:$src2), timm:$cc, (_.KVT immAllOnesV)))],
2630                     [(set _.KRC:$dst, (X86cmpmmSAE (_.VT _.RC:$src1),
2631                                        (_.VT _.RC:$src2), timm:$cc, _.KRCWM:$mask))]>,
2632                     EVEX_B, Sched<[sched]>;
2633}
2634
2635multiclass avx512_vcmp<X86SchedWriteWidths sched, AVX512VLVectorVTInfo _,
2636                       Predicate Pred = HasAVX512> {
2637  let Predicates = [Pred] in {
2638    defm Z    : avx512_vcmp_common<sched.ZMM, _.info512, NAME>,
2639                avx512_vcmp_sae<sched.ZMM, _.info512>, EVEX_V512;
2640
2641  }
2642  let Predicates = [Pred,HasVLX] in {
2643   defm Z128 : avx512_vcmp_common<sched.XMM, _.info128, NAME>, EVEX_V128;
2644   defm Z256 : avx512_vcmp_common<sched.YMM, _.info256, NAME>, EVEX_V256;
2645  }
2646}
2647
2648defm VCMPPD : avx512_vcmp<SchedWriteFCmp, avx512vl_f64_info>,
2649                          AVX512PDIi8Base, EVEX_4V, EVEX_CD8<64, CD8VF>, VEX_W;
2650defm VCMPPS : avx512_vcmp<SchedWriteFCmp, avx512vl_f32_info>,
2651                          AVX512PSIi8Base, EVEX_4V, EVEX_CD8<32, CD8VF>;
2652defm VCMPPH : avx512_vcmp<SchedWriteFCmp, avx512vl_f16_info, HasFP16>,
2653                          AVX512PSIi8Base, EVEX_4V, EVEX_CD8<16, CD8VF>, TA;
2654
2655// Patterns to select fp compares with load as first operand.
2656let Predicates = [HasAVX512] in {
2657  def : Pat<(v1i1 (X86cmpms (loadf64 addr:$src2), FR64X:$src1, timm:$cc)),
2658            (VCMPSDZrm FR64X:$src1, addr:$src2, (X86cmpm_imm_commute timm:$cc))>;
2659
2660  def : Pat<(v1i1 (X86cmpms (loadf32 addr:$src2), FR32X:$src1, timm:$cc)),
2661            (VCMPSSZrm FR32X:$src1, addr:$src2, (X86cmpm_imm_commute timm:$cc))>;
2662}
2663
2664let Predicates = [HasFP16] in {
2665  def : Pat<(v1i1 (X86cmpms (loadf16 addr:$src2), FR16X:$src1, timm:$cc)),
2666            (VCMPSHZrm FR16X:$src1, addr:$src2, (X86cmpm_imm_commute timm:$cc))>;
2667}
2668
2669// ----------------------------------------------------------------
2670// FPClass
2671
2672def X86Vfpclasss_su : PatFrag<(ops node:$src1, node:$src2),
2673                              (X86Vfpclasss node:$src1, node:$src2), [{
2674  return N->hasOneUse();
2675}]>;
2676
2677def X86Vfpclass_su : PatFrag<(ops node:$src1, node:$src2),
2678                             (X86Vfpclass node:$src1, node:$src2), [{
2679  return N->hasOneUse();
2680}]>;
2681
2682//handle fpclass instruction  mask =  op(reg_scalar,imm)
2683//                                    op(mem_scalar,imm)
2684multiclass avx512_scalar_fpclass<bits<8> opc, string OpcodeStr,
2685                                 X86FoldableSchedWrite sched, X86VectorVTInfo _,
2686                                 Predicate prd> {
2687  let Predicates = [prd], ExeDomain = _.ExeDomain, Uses = [MXCSR] in {
2688      def rr : AVX512<opc, MRMSrcReg, (outs _.KRC:$dst),
2689                      (ins _.RC:$src1, i32u8imm:$src2),
2690                      OpcodeStr#_.Suffix#"\t{$src2, $src1, $dst|$dst, $src1, $src2}",
2691                      [(set _.KRC:$dst,(X86Vfpclasss (_.VT _.RC:$src1),
2692                              (i32 timm:$src2)))]>,
2693                      Sched<[sched]>;
2694      def rrk : AVX512<opc, MRMSrcReg, (outs _.KRC:$dst),
2695                      (ins _.KRCWM:$mask, _.RC:$src1, i32u8imm:$src2),
2696                      OpcodeStr#_.Suffix#
2697                      "\t{$src2, $src1, $dst {${mask}}|$dst {${mask}}, $src1, $src2}",
2698                      [(set _.KRC:$dst,(and _.KRCWM:$mask,
2699                                      (X86Vfpclasss_su (_.VT _.RC:$src1),
2700                                      (i32 timm:$src2))))]>,
2701                      EVEX_K, Sched<[sched]>;
2702    def rm : AVX512<opc, MRMSrcMem, (outs _.KRC:$dst),
2703                    (ins _.IntScalarMemOp:$src1, i32u8imm:$src2),
2704                    OpcodeStr#_.Suffix#
2705                              "\t{$src2, $src1, $dst|$dst, $src1, $src2}",
2706                    [(set _.KRC:$dst,
2707                          (X86Vfpclasss (_.ScalarIntMemFrags addr:$src1),
2708                                        (i32 timm:$src2)))]>,
2709                    Sched<[sched.Folded, sched.ReadAfterFold]>;
2710    def rmk : AVX512<opc, MRMSrcMem, (outs _.KRC:$dst),
2711                    (ins _.KRCWM:$mask, _.IntScalarMemOp:$src1, i32u8imm:$src2),
2712                    OpcodeStr#_.Suffix#
2713                    "\t{$src2, $src1, $dst {${mask}}|$dst {${mask}}, $src1, $src2}",
2714                    [(set _.KRC:$dst,(and _.KRCWM:$mask,
2715                        (X86Vfpclasss_su (_.ScalarIntMemFrags addr:$src1),
2716                            (i32 timm:$src2))))]>,
2717                    EVEX_K, Sched<[sched.Folded, sched.ReadAfterFold]>;
2718  }
2719}
2720
2721//handle fpclass instruction mask = fpclass(reg_vec, reg_vec, imm)
2722//                                  fpclass(reg_vec, mem_vec, imm)
2723//                                  fpclass(reg_vec, broadcast(eltVt), imm)
2724multiclass avx512_vector_fpclass<bits<8> opc, string OpcodeStr,
2725                                 X86FoldableSchedWrite sched, X86VectorVTInfo _,
2726                                 string mem>{
2727  let ExeDomain = _.ExeDomain, Uses = [MXCSR] in {
2728  def rr : AVX512<opc, MRMSrcReg, (outs _.KRC:$dst),
2729                      (ins _.RC:$src1, i32u8imm:$src2),
2730                      OpcodeStr#_.Suffix#"\t{$src2, $src1, $dst|$dst, $src1, $src2}",
2731                      [(set _.KRC:$dst,(X86Vfpclass (_.VT _.RC:$src1),
2732                                       (i32 timm:$src2)))]>,
2733                      Sched<[sched]>;
2734  def rrk : AVX512<opc, MRMSrcReg, (outs _.KRC:$dst),
2735                      (ins _.KRCWM:$mask, _.RC:$src1, i32u8imm:$src2),
2736                      OpcodeStr#_.Suffix#
2737                      "\t{$src2, $src1, $dst {${mask}}|$dst {${mask}}, $src1, $src2}",
2738                      [(set _.KRC:$dst,(and _.KRCWM:$mask,
2739                                       (X86Vfpclass_su (_.VT _.RC:$src1),
2740                                       (i32 timm:$src2))))]>,
2741                      EVEX_K, Sched<[sched]>;
2742  def rm : AVX512<opc, MRMSrcMem, (outs _.KRC:$dst),
2743                    (ins _.MemOp:$src1, i32u8imm:$src2),
2744                    OpcodeStr#_.Suffix#"{"#mem#"}"#
2745                    "\t{$src2, $src1, $dst|$dst, $src1, $src2}",
2746                    [(set _.KRC:$dst,(X86Vfpclass
2747                                     (_.VT (_.LdFrag addr:$src1)),
2748                                     (i32 timm:$src2)))]>,
2749                    Sched<[sched.Folded, sched.ReadAfterFold]>;
2750  def rmk : AVX512<opc, MRMSrcMem, (outs _.KRC:$dst),
2751                    (ins _.KRCWM:$mask, _.MemOp:$src1, i32u8imm:$src2),
2752                    OpcodeStr#_.Suffix#"{"#mem#"}"#
2753                    "\t{$src2, $src1, $dst {${mask}}|$dst {${mask}}, $src1, $src2}",
2754                    [(set _.KRC:$dst, (and _.KRCWM:$mask, (X86Vfpclass_su
2755                                  (_.VT (_.LdFrag addr:$src1)),
2756                                  (i32 timm:$src2))))]>,
2757                    EVEX_K, Sched<[sched.Folded, sched.ReadAfterFold]>;
2758  def rmb : AVX512<opc, MRMSrcMem, (outs _.KRC:$dst),
2759                    (ins _.ScalarMemOp:$src1, i32u8imm:$src2),
2760                    OpcodeStr#_.Suffix#"\t{$src2, ${src1}"#
2761                                      _.BroadcastStr#", $dst|$dst, ${src1}"
2762                                                  #_.BroadcastStr#", $src2}",
2763                    [(set _.KRC:$dst,(X86Vfpclass
2764                                     (_.VT (_.BroadcastLdFrag addr:$src1)),
2765                                     (i32 timm:$src2)))]>,
2766                    EVEX_B, Sched<[sched.Folded, sched.ReadAfterFold]>;
2767  def rmbk : AVX512<opc, MRMSrcMem, (outs _.KRC:$dst),
2768                    (ins _.KRCWM:$mask, _.ScalarMemOp:$src1, i32u8imm:$src2),
2769                    OpcodeStr#_.Suffix#"\t{$src2, ${src1}"#
2770                          _.BroadcastStr#", $dst {${mask}}|$dst {${mask}}, ${src1}"#
2771                                                   _.BroadcastStr#", $src2}",
2772                    [(set _.KRC:$dst,(and _.KRCWM:$mask, (X86Vfpclass_su
2773                                     (_.VT (_.BroadcastLdFrag addr:$src1)),
2774                                     (i32 timm:$src2))))]>,
2775                    EVEX_B, EVEX_K,  Sched<[sched.Folded, sched.ReadAfterFold]>;
2776  }
2777
2778  // Allow registers or broadcast with the x, y, z suffix we use to disambiguate
2779  // the memory form.
2780  def : InstAlias<OpcodeStr#_.Suffix#mem#
2781                  "\t{$src2, $src1, $dst|$dst, $src1, $src2}",
2782                  (!cast<Instruction>(NAME#"rr")
2783                   _.KRC:$dst, _.RC:$src1, i32u8imm:$src2), 0, "att">;
2784  def : InstAlias<OpcodeStr#_.Suffix#mem#
2785                  "\t{$src2, $src1, $dst {${mask}}|$dst {${mask}}, $src1, $src2}",
2786                  (!cast<Instruction>(NAME#"rrk")
2787                   _.KRC:$dst, _.KRCWM:$mask, _.RC:$src1, i32u8imm:$src2), 0, "att">;
2788  def : InstAlias<OpcodeStr#_.Suffix#mem#
2789                  "\t{$src2, ${src1}"#_.BroadcastStr#", $dst|$dst, ${src1}"#
2790                  _.BroadcastStr#", $src2}",
2791                  (!cast<Instruction>(NAME#"rmb")
2792                   _.KRC:$dst, _.ScalarMemOp:$src1, i32u8imm:$src2), 0, "att">;
2793  def : InstAlias<OpcodeStr#_.Suffix#mem#
2794                  "\t{$src2, ${src1}"#_.BroadcastStr#", $dst {${mask}}|"
2795                  "$dst {${mask}}, ${src1}"#_.BroadcastStr#", $src2}",
2796                  (!cast<Instruction>(NAME#"rmbk")
2797                   _.KRC:$dst, _.KRCWM:$mask, _.ScalarMemOp:$src1, i32u8imm:$src2), 0, "att">;
2798}
2799
2800multiclass avx512_vector_fpclass_all<string OpcodeStr, AVX512VLVectorVTInfo _,
2801                                     bits<8> opc, X86SchedWriteWidths sched,
2802                                     Predicate prd>{
2803  let Predicates = [prd] in {
2804    defm Z    : avx512_vector_fpclass<opc, OpcodeStr, sched.ZMM,
2805                                      _.info512, "z">, EVEX_V512;
2806  }
2807  let Predicates = [prd, HasVLX] in {
2808    defm Z128 : avx512_vector_fpclass<opc, OpcodeStr, sched.XMM,
2809                                      _.info128, "x">, EVEX_V128;
2810    defm Z256 : avx512_vector_fpclass<opc, OpcodeStr, sched.YMM,
2811                                      _.info256, "y">, EVEX_V256;
2812  }
2813}
2814
2815multiclass avx512_fp_fpclass_all<string OpcodeStr, bits<8> opcVec,
2816                                 bits<8> opcScalar, X86SchedWriteWidths sched> {
2817  defm PH : avx512_vector_fpclass_all<OpcodeStr,  avx512vl_f16_info, opcVec,
2818                                      sched, HasFP16>,
2819                                      EVEX_CD8<16, CD8VF>, AVX512PSIi8Base, TA;
2820  defm SHZ : avx512_scalar_fpclass<opcScalar, OpcodeStr,
2821                                   sched.Scl, f16x_info, HasFP16>,
2822                                   EVEX_CD8<16, CD8VT1>, AVX512PSIi8Base, TA;
2823  defm PS : avx512_vector_fpclass_all<OpcodeStr,  avx512vl_f32_info, opcVec,
2824                                      sched, HasDQI>,
2825                                      EVEX_CD8<32, CD8VF>, AVX512AIi8Base;
2826  defm PD : avx512_vector_fpclass_all<OpcodeStr,  avx512vl_f64_info, opcVec,
2827                                      sched, HasDQI>,
2828                                      EVEX_CD8<64, CD8VF>, AVX512AIi8Base, VEX_W;
2829  defm SSZ : avx512_scalar_fpclass<opcScalar, OpcodeStr,
2830                                   sched.Scl, f32x_info, HasDQI>, VEX_LIG,
2831                                   EVEX_CD8<32, CD8VT1>, AVX512AIi8Base;
2832  defm SDZ : avx512_scalar_fpclass<opcScalar, OpcodeStr,
2833                                   sched.Scl, f64x_info, HasDQI>, VEX_LIG,
2834                                   EVEX_CD8<64, CD8VT1>, AVX512AIi8Base, VEX_W;
2835}
2836
2837defm VFPCLASS : avx512_fp_fpclass_all<"vfpclass", 0x66, 0x67, SchedWriteFCmp>, EVEX;
2838
2839//-----------------------------------------------------------------
2840// Mask register copy, including
2841// - copy between mask registers
2842// - load/store mask registers
2843// - copy from GPR to mask register and vice versa
2844//
2845multiclass avx512_mask_mov<bits<8> opc_kk, bits<8> opc_km, bits<8> opc_mk,
2846                         string OpcodeStr, RegisterClass KRC,
2847                         ValueType vvt, X86MemOperand x86memop> {
2848  let isMoveReg = 1, hasSideEffects = 0, SchedRW = [WriteMove] in
2849  def kk : I<opc_kk, MRMSrcReg, (outs KRC:$dst), (ins KRC:$src),
2850             !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), []>,
2851             Sched<[WriteMove]>;
2852  def km : I<opc_km, MRMSrcMem, (outs KRC:$dst), (ins x86memop:$src),
2853             !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
2854             [(set KRC:$dst, (vvt (load addr:$src)))]>,
2855             Sched<[WriteLoad]>;
2856  def mk : I<opc_mk, MRMDestMem, (outs), (ins x86memop:$dst, KRC:$src),
2857             !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
2858             [(store KRC:$src, addr:$dst)]>,
2859             Sched<[WriteStore]>;
2860}
2861
2862multiclass avx512_mask_mov_gpr<bits<8> opc_kr, bits<8> opc_rk,
2863                             string OpcodeStr,
2864                             RegisterClass KRC, RegisterClass GRC> {
2865  let hasSideEffects = 0 in {
2866    def kr : I<opc_kr, MRMSrcReg, (outs KRC:$dst), (ins GRC:$src),
2867               !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), []>,
2868               Sched<[WriteMove]>;
2869    def rk : I<opc_rk, MRMSrcReg, (outs GRC:$dst), (ins KRC:$src),
2870               !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), []>,
2871               Sched<[WriteMove]>;
2872  }
2873}
2874
2875let Predicates = [HasDQI] in
2876  defm KMOVB : avx512_mask_mov<0x90, 0x90, 0x91, "kmovb", VK8, v8i1, i8mem>,
2877               avx512_mask_mov_gpr<0x92, 0x93, "kmovb", VK8, GR32>,
2878               VEX, PD;
2879
2880let Predicates = [HasAVX512] in
2881  defm KMOVW : avx512_mask_mov<0x90, 0x90, 0x91, "kmovw", VK16, v16i1, i16mem>,
2882               avx512_mask_mov_gpr<0x92, 0x93, "kmovw", VK16, GR32>,
2883               VEX, PS;
2884
2885let Predicates = [HasBWI] in {
2886  defm KMOVD : avx512_mask_mov<0x90, 0x90, 0x91, "kmovd", VK32, v32i1,i32mem>,
2887               VEX, PD, VEX_W;
2888  defm KMOVD : avx512_mask_mov_gpr<0x92, 0x93, "kmovd", VK32, GR32>,
2889               VEX, XD;
2890  defm KMOVQ : avx512_mask_mov<0x90, 0x90, 0x91, "kmovq", VK64, v64i1, i64mem>,
2891               VEX, PS, VEX_W;
2892  defm KMOVQ : avx512_mask_mov_gpr<0x92, 0x93, "kmovq", VK64, GR64>,
2893               VEX, XD, VEX_W;
2894}
2895
2896// GR from/to mask register
2897def : Pat<(v16i1 (bitconvert (i16 GR16:$src))),
2898          (COPY_TO_REGCLASS (i32 (INSERT_SUBREG (IMPLICIT_DEF), GR16:$src, sub_16bit)), VK16)>;
2899def : Pat<(i16 (bitconvert (v16i1 VK16:$src))),
2900          (EXTRACT_SUBREG (i32 (COPY_TO_REGCLASS VK16:$src, GR32)), sub_16bit)>;
2901def : Pat<(i8 (trunc (i16 (bitconvert (v16i1 VK16:$src))))),
2902          (EXTRACT_SUBREG (i32 (COPY_TO_REGCLASS VK16:$src, GR32)), sub_8bit)>;
2903
2904def : Pat<(v8i1 (bitconvert (i8 GR8:$src))),
2905          (COPY_TO_REGCLASS (i32 (INSERT_SUBREG (IMPLICIT_DEF), GR8:$src, sub_8bit)), VK8)>;
2906def : Pat<(i8 (bitconvert (v8i1 VK8:$src))),
2907          (EXTRACT_SUBREG (i32 (COPY_TO_REGCLASS VK8:$src, GR32)), sub_8bit)>;
2908
2909def : Pat<(i32 (zext (i16 (bitconvert (v16i1 VK16:$src))))),
2910          (KMOVWrk VK16:$src)>;
2911def : Pat<(i64 (zext (i16 (bitconvert (v16i1 VK16:$src))))),
2912          (SUBREG_TO_REG (i64 0), (KMOVWrk VK16:$src), sub_32bit)>;
2913def : Pat<(i32 (anyext (i16 (bitconvert (v16i1 VK16:$src))))),
2914          (COPY_TO_REGCLASS VK16:$src, GR32)>;
2915def : Pat<(i64 (anyext (i16 (bitconvert (v16i1 VK16:$src))))),
2916          (INSERT_SUBREG (IMPLICIT_DEF), (COPY_TO_REGCLASS VK16:$src, GR32), sub_32bit)>;
2917
2918def : Pat<(i32 (zext (i8 (bitconvert (v8i1 VK8:$src))))),
2919          (KMOVBrk VK8:$src)>, Requires<[HasDQI]>;
2920def : Pat<(i64 (zext (i8 (bitconvert (v8i1 VK8:$src))))),
2921          (SUBREG_TO_REG (i64 0), (KMOVBrk VK8:$src), sub_32bit)>, Requires<[HasDQI]>;
2922def : Pat<(i32 (anyext (i8 (bitconvert (v8i1 VK8:$src))))),
2923          (COPY_TO_REGCLASS VK8:$src, GR32)>;
2924def : Pat<(i64 (anyext (i8 (bitconvert (v8i1 VK8:$src))))),
2925          (INSERT_SUBREG (IMPLICIT_DEF), (COPY_TO_REGCLASS VK8:$src, GR32), sub_32bit)>;
2926
2927def : Pat<(v32i1 (bitconvert (i32 GR32:$src))),
2928          (COPY_TO_REGCLASS GR32:$src, VK32)>;
2929def : Pat<(i32 (bitconvert (v32i1 VK32:$src))),
2930          (COPY_TO_REGCLASS VK32:$src, GR32)>;
2931def : Pat<(v64i1 (bitconvert (i64 GR64:$src))),
2932          (COPY_TO_REGCLASS GR64:$src, VK64)>;
2933def : Pat<(i64 (bitconvert (v64i1 VK64:$src))),
2934          (COPY_TO_REGCLASS VK64:$src, GR64)>;
2935
2936// Load/store kreg
2937let Predicates = [HasDQI] in {
2938  def : Pat<(v1i1 (load addr:$src)),
2939            (COPY_TO_REGCLASS (KMOVBkm addr:$src), VK1)>;
2940  def : Pat<(v2i1 (load addr:$src)),
2941            (COPY_TO_REGCLASS (KMOVBkm addr:$src), VK2)>;
2942  def : Pat<(v4i1 (load addr:$src)),
2943            (COPY_TO_REGCLASS (KMOVBkm addr:$src), VK4)>;
2944}
2945
2946let Predicates = [HasAVX512] in {
2947  def : Pat<(v8i1 (bitconvert (i8 (load addr:$src)))),
2948            (COPY_TO_REGCLASS (MOVZX32rm8 addr:$src), VK8)>;
2949  def : Pat<(v16i1 (bitconvert (loadi16 addr:$src))),
2950            (KMOVWkm addr:$src)>;
2951}
2952
2953def X86kextract : SDNode<"ISD::EXTRACT_VECTOR_ELT",
2954                         SDTypeProfile<1, 2, [SDTCisVT<0, i8>,
2955                                              SDTCVecEltisVT<1, i1>,
2956                                              SDTCisPtrTy<2>]>>;
2957
2958let Predicates = [HasAVX512] in {
2959  multiclass operation_gpr_mask_copy_lowering<RegisterClass maskRC, ValueType maskVT> {
2960    def : Pat<(maskVT (scalar_to_vector GR32:$src)),
2961              (COPY_TO_REGCLASS GR32:$src, maskRC)>;
2962
2963    def : Pat<(maskVT (scalar_to_vector GR8:$src)),
2964              (COPY_TO_REGCLASS (INSERT_SUBREG (i32 (IMPLICIT_DEF)), GR8:$src, sub_8bit), maskRC)>;
2965
2966    def : Pat<(i8 (X86kextract maskRC:$src, (iPTR 0))),
2967              (EXTRACT_SUBREG (i32 (COPY_TO_REGCLASS maskRC:$src, GR32)), sub_8bit)>;
2968
2969    def : Pat<(i32 (anyext (i8 (X86kextract maskRC:$src, (iPTR 0))))),
2970              (i32 (COPY_TO_REGCLASS maskRC:$src, GR32))>;
2971  }
2972
2973  defm : operation_gpr_mask_copy_lowering<VK1,  v1i1>;
2974  defm : operation_gpr_mask_copy_lowering<VK2,  v2i1>;
2975  defm : operation_gpr_mask_copy_lowering<VK4,  v4i1>;
2976  defm : operation_gpr_mask_copy_lowering<VK8,  v8i1>;
2977  defm : operation_gpr_mask_copy_lowering<VK16,  v16i1>;
2978  defm : operation_gpr_mask_copy_lowering<VK32,  v32i1>;
2979  defm : operation_gpr_mask_copy_lowering<VK64,  v64i1>;
2980
2981  def : Pat<(insert_subvector (v16i1 immAllZerosV),
2982                              (v1i1 (scalar_to_vector GR8:$src)), (iPTR 0)),
2983            (KMOVWkr (AND32ri8
2984                      (INSERT_SUBREG (i32 (IMPLICIT_DEF)), GR8:$src, sub_8bit),
2985                      (i32 1)))>;
2986}
2987
2988// Mask unary operation
2989// - KNOT
2990multiclass avx512_mask_unop<bits<8> opc, string OpcodeStr,
2991                            RegisterClass KRC, SDPatternOperator OpNode,
2992                            X86FoldableSchedWrite sched, Predicate prd> {
2993  let Predicates = [prd] in
2994    def rr : I<opc, MRMSrcReg, (outs KRC:$dst), (ins KRC:$src),
2995               !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
2996               [(set KRC:$dst, (OpNode KRC:$src))]>,
2997               Sched<[sched]>;
2998}
2999
3000multiclass avx512_mask_unop_all<bits<8> opc, string OpcodeStr,
3001                                SDPatternOperator OpNode,
3002                                X86FoldableSchedWrite sched> {
3003  defm B : avx512_mask_unop<opc, !strconcat(OpcodeStr, "b"), VK8, OpNode,
3004                            sched, HasDQI>, VEX, PD;
3005  defm W : avx512_mask_unop<opc, !strconcat(OpcodeStr, "w"), VK16, OpNode,
3006                            sched, HasAVX512>, VEX, PS;
3007  defm D : avx512_mask_unop<opc, !strconcat(OpcodeStr, "d"), VK32, OpNode,
3008                            sched, HasBWI>, VEX, PD, VEX_W;
3009  defm Q : avx512_mask_unop<opc, !strconcat(OpcodeStr, "q"), VK64, OpNode,
3010                            sched, HasBWI>, VEX, PS, VEX_W;
3011}
3012
3013// TODO - do we need a X86SchedWriteWidths::KMASK type?
3014defm KNOT : avx512_mask_unop_all<0x44, "knot", vnot, SchedWriteVecLogic.XMM>;
3015
3016// KNL does not support KMOVB, 8-bit mask is promoted to 16-bit
3017let Predicates = [HasAVX512, NoDQI] in
3018def : Pat<(vnot VK8:$src),
3019          (COPY_TO_REGCLASS (KNOTWrr (COPY_TO_REGCLASS VK8:$src, VK16)), VK8)>;
3020
3021def : Pat<(vnot VK4:$src),
3022          (COPY_TO_REGCLASS (KNOTWrr (COPY_TO_REGCLASS VK4:$src, VK16)), VK4)>;
3023def : Pat<(vnot VK2:$src),
3024          (COPY_TO_REGCLASS (KNOTWrr (COPY_TO_REGCLASS VK2:$src, VK16)), VK2)>;
3025def : Pat<(vnot VK1:$src),
3026          (COPY_TO_REGCLASS (KNOTWrr (COPY_TO_REGCLASS VK1:$src, VK16)), VK2)>;
3027
3028// Mask binary operation
3029// - KAND, KANDN, KOR, KXNOR, KXOR
3030multiclass avx512_mask_binop<bits<8> opc, string OpcodeStr,
3031                           RegisterClass KRC, SDPatternOperator OpNode,
3032                           X86FoldableSchedWrite sched, Predicate prd,
3033                           bit IsCommutable> {
3034  let Predicates = [prd], isCommutable = IsCommutable in
3035    def rr : I<opc, MRMSrcReg, (outs KRC:$dst), (ins KRC:$src1, KRC:$src2),
3036               !strconcat(OpcodeStr,
3037                          "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
3038               [(set KRC:$dst, (OpNode KRC:$src1, KRC:$src2))]>,
3039               Sched<[sched]>;
3040}
3041
3042multiclass avx512_mask_binop_all<bits<8> opc, string OpcodeStr,
3043                                 SDPatternOperator OpNode,
3044                                 X86FoldableSchedWrite sched, bit IsCommutable,
3045                                 Predicate prdW = HasAVX512> {
3046  defm B : avx512_mask_binop<opc, !strconcat(OpcodeStr, "b"), VK8, OpNode,
3047                             sched, HasDQI, IsCommutable>, VEX_4V, VEX_L, PD;
3048  defm W : avx512_mask_binop<opc, !strconcat(OpcodeStr, "w"), VK16, OpNode,
3049                             sched, prdW, IsCommutable>, VEX_4V, VEX_L, PS;
3050  defm D : avx512_mask_binop<opc, !strconcat(OpcodeStr, "d"), VK32, OpNode,
3051                             sched, HasBWI, IsCommutable>, VEX_4V, VEX_L, VEX_W, PD;
3052  defm Q : avx512_mask_binop<opc, !strconcat(OpcodeStr, "q"), VK64, OpNode,
3053                             sched, HasBWI, IsCommutable>, VEX_4V, VEX_L, VEX_W, PS;
3054}
3055
3056// These nodes use 'vnot' instead of 'not' to support vectors.
3057def vandn : PatFrag<(ops node:$i0, node:$i1), (and (vnot node:$i0), node:$i1)>;
3058def vxnor : PatFrag<(ops node:$i0, node:$i1), (vnot (xor node:$i0, node:$i1))>;
3059
3060// TODO - do we need a X86SchedWriteWidths::KMASK type?
3061defm KAND  : avx512_mask_binop_all<0x41, "kand",  and,     SchedWriteVecLogic.XMM, 1>;
3062defm KOR   : avx512_mask_binop_all<0x45, "kor",   or,      SchedWriteVecLogic.XMM, 1>;
3063defm KXNOR : avx512_mask_binop_all<0x46, "kxnor", vxnor,   SchedWriteVecLogic.XMM, 1>;
3064defm KXOR  : avx512_mask_binop_all<0x47, "kxor",  xor,     SchedWriteVecLogic.XMM, 1>;
3065defm KANDN : avx512_mask_binop_all<0x42, "kandn", vandn,   SchedWriteVecLogic.XMM, 0>;
3066defm KADD  : avx512_mask_binop_all<0x4A, "kadd",  X86kadd, SchedWriteVecLogic.XMM, 1, HasDQI>;
3067
3068multiclass avx512_binop_pat<SDPatternOperator VOpNode,
3069                            Instruction Inst> {
3070  // With AVX512F, 8-bit mask is promoted to 16-bit mask,
3071  // for the DQI set, this type is legal and KxxxB instruction is used
3072  let Predicates = [NoDQI] in
3073  def : Pat<(VOpNode VK8:$src1, VK8:$src2),
3074            (COPY_TO_REGCLASS
3075              (Inst (COPY_TO_REGCLASS VK8:$src1, VK16),
3076                    (COPY_TO_REGCLASS VK8:$src2, VK16)), VK8)>;
3077
3078  // All types smaller than 8 bits require conversion anyway
3079  def : Pat<(VOpNode VK1:$src1, VK1:$src2),
3080        (COPY_TO_REGCLASS (Inst
3081                           (COPY_TO_REGCLASS VK1:$src1, VK16),
3082                           (COPY_TO_REGCLASS VK1:$src2, VK16)), VK1)>;
3083  def : Pat<(VOpNode VK2:$src1, VK2:$src2),
3084        (COPY_TO_REGCLASS (Inst
3085                           (COPY_TO_REGCLASS VK2:$src1, VK16),
3086                           (COPY_TO_REGCLASS VK2:$src2, VK16)), VK2)>;
3087  def : Pat<(VOpNode VK4:$src1, VK4:$src2),
3088        (COPY_TO_REGCLASS (Inst
3089                           (COPY_TO_REGCLASS VK4:$src1, VK16),
3090                           (COPY_TO_REGCLASS VK4:$src2, VK16)), VK4)>;
3091}
3092
3093defm : avx512_binop_pat<and,   KANDWrr>;
3094defm : avx512_binop_pat<vandn, KANDNWrr>;
3095defm : avx512_binop_pat<or,    KORWrr>;
3096defm : avx512_binop_pat<vxnor, KXNORWrr>;
3097defm : avx512_binop_pat<xor,   KXORWrr>;
3098
3099// Mask unpacking
3100multiclass avx512_mask_unpck<string Suffix, X86KVectorVTInfo Dst,
3101                             X86KVectorVTInfo Src, X86FoldableSchedWrite sched,
3102                             Predicate prd> {
3103  let Predicates = [prd] in {
3104    let hasSideEffects = 0 in
3105    def rr : I<0x4b, MRMSrcReg, (outs Dst.KRC:$dst),
3106               (ins Src.KRC:$src1, Src.KRC:$src2),
3107               "kunpck"#Suffix#"\t{$src2, $src1, $dst|$dst, $src1, $src2}", []>,
3108               VEX_4V, VEX_L, Sched<[sched]>;
3109
3110    def : Pat<(Dst.KVT (concat_vectors Src.KRC:$src1, Src.KRC:$src2)),
3111              (!cast<Instruction>(NAME#rr) Src.KRC:$src2, Src.KRC:$src1)>;
3112  }
3113}
3114
3115defm KUNPCKBW : avx512_mask_unpck<"bw", v16i1_info, v8i1_info,  WriteShuffle, HasAVX512>, PD;
3116defm KUNPCKWD : avx512_mask_unpck<"wd", v32i1_info, v16i1_info, WriteShuffle, HasBWI>, PS;
3117defm KUNPCKDQ : avx512_mask_unpck<"dq", v64i1_info, v32i1_info, WriteShuffle, HasBWI>, PS, VEX_W;
3118
3119// Mask bit testing
3120multiclass avx512_mask_testop<bits<8> opc, string OpcodeStr, RegisterClass KRC,
3121                              SDNode OpNode, X86FoldableSchedWrite sched,
3122                              Predicate prd> {
3123  let Predicates = [prd], Defs = [EFLAGS] in
3124    def rr : I<opc, MRMSrcReg, (outs), (ins KRC:$src1, KRC:$src2),
3125               !strconcat(OpcodeStr, "\t{$src2, $src1|$src1, $src2}"),
3126               [(set EFLAGS, (OpNode KRC:$src1, KRC:$src2))]>,
3127               Sched<[sched]>;
3128}
3129
3130multiclass avx512_mask_testop_w<bits<8> opc, string OpcodeStr, SDNode OpNode,
3131                                X86FoldableSchedWrite sched,
3132                                Predicate prdW = HasAVX512> {
3133  defm B : avx512_mask_testop<opc, OpcodeStr#"b", VK8, OpNode, sched, HasDQI>,
3134                                                                VEX, PD;
3135  defm W : avx512_mask_testop<opc, OpcodeStr#"w", VK16, OpNode, sched, prdW>,
3136                                                                VEX, PS;
3137  defm Q : avx512_mask_testop<opc, OpcodeStr#"q", VK64, OpNode, sched, HasBWI>,
3138                                                                VEX, PS, VEX_W;
3139  defm D : avx512_mask_testop<opc, OpcodeStr#"d", VK32, OpNode, sched, HasBWI>,
3140                                                                VEX, PD, VEX_W;
3141}
3142
3143// TODO - do we need a X86SchedWriteWidths::KMASK type?
3144defm KORTEST : avx512_mask_testop_w<0x98, "kortest", X86kortest, SchedWriteVecLogic.XMM>;
3145defm KTEST   : avx512_mask_testop_w<0x99, "ktest", X86ktest, SchedWriteVecLogic.XMM, HasDQI>;
3146
3147// Mask shift
3148multiclass avx512_mask_shiftop<bits<8> opc, string OpcodeStr, RegisterClass KRC,
3149                               SDNode OpNode, X86FoldableSchedWrite sched> {
3150  let Predicates = [HasAVX512] in
3151    def ri : Ii8<opc, MRMSrcReg, (outs KRC:$dst), (ins KRC:$src, u8imm:$imm),
3152                 !strconcat(OpcodeStr,
3153                            "\t{$imm, $src, $dst|$dst, $src, $imm}"),
3154                            [(set KRC:$dst, (OpNode KRC:$src, (i8 timm:$imm)))]>,
3155                 Sched<[sched]>;
3156}
3157
3158multiclass avx512_mask_shiftop_w<bits<8> opc1, bits<8> opc2, string OpcodeStr,
3159                                 SDNode OpNode, X86FoldableSchedWrite sched> {
3160  defm W : avx512_mask_shiftop<opc1, !strconcat(OpcodeStr, "w"), VK16, OpNode,
3161                               sched>, VEX, TAPD, VEX_W;
3162  let Predicates = [HasDQI] in
3163  defm B : avx512_mask_shiftop<opc1, !strconcat(OpcodeStr, "b"), VK8, OpNode,
3164                               sched>, VEX, TAPD;
3165  let Predicates = [HasBWI] in {
3166  defm Q : avx512_mask_shiftop<opc2, !strconcat(OpcodeStr, "q"), VK64, OpNode,
3167                               sched>, VEX, TAPD, VEX_W;
3168  defm D : avx512_mask_shiftop<opc2, !strconcat(OpcodeStr, "d"), VK32, OpNode,
3169                               sched>, VEX, TAPD;
3170  }
3171}
3172
3173defm KSHIFTL : avx512_mask_shiftop_w<0x32, 0x33, "kshiftl", X86kshiftl, WriteShuffle>;
3174defm KSHIFTR : avx512_mask_shiftop_w<0x30, 0x31, "kshiftr", X86kshiftr, WriteShuffle>;
3175
3176// Patterns for comparing 128/256-bit integer vectors using 512-bit instruction.
3177multiclass axv512_icmp_packed_cc_no_vlx_lowering<PatFrag Frag, PatFrag Frag_su,
3178                                                 string InstStr,
3179                                                 X86VectorVTInfo Narrow,
3180                                                 X86VectorVTInfo Wide> {
3181def : Pat<(Narrow.KVT (Frag:$cc (Narrow.VT Narrow.RC:$src1),
3182                                (Narrow.VT Narrow.RC:$src2), cond)),
3183          (COPY_TO_REGCLASS
3184           (!cast<Instruction>(InstStr#"Zrri")
3185            (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src1, Narrow.SubRegIdx)),
3186            (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src2, Narrow.SubRegIdx)),
3187            (X86pcmpm_imm $cc)), Narrow.KRC)>;
3188
3189def : Pat<(Narrow.KVT (and Narrow.KRC:$mask,
3190                           (Narrow.KVT (Frag_su:$cc (Narrow.VT Narrow.RC:$src1),
3191                                                    (Narrow.VT Narrow.RC:$src2),
3192                                                    cond)))),
3193          (COPY_TO_REGCLASS (!cast<Instruction>(InstStr#"Zrrik")
3194           (COPY_TO_REGCLASS Narrow.KRC:$mask, Wide.KRC),
3195           (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src1, Narrow.SubRegIdx)),
3196           (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src2, Narrow.SubRegIdx)),
3197           (X86pcmpm_imm $cc)), Narrow.KRC)>;
3198}
3199
3200multiclass axv512_icmp_packed_cc_rmb_no_vlx_lowering<PatFrag Frag, PatFrag Frag_su,
3201                                                     string InstStr,
3202                                                     X86VectorVTInfo Narrow,
3203                                                     X86VectorVTInfo Wide> {
3204// Broadcast load.
3205def : Pat<(Narrow.KVT (Frag:$cc (Narrow.VT Narrow.RC:$src1),
3206                                (Narrow.BroadcastLdFrag addr:$src2), cond)),
3207          (COPY_TO_REGCLASS
3208           (!cast<Instruction>(InstStr#"Zrmib")
3209            (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src1, Narrow.SubRegIdx)),
3210            addr:$src2, (X86pcmpm_imm $cc)), Narrow.KRC)>;
3211
3212def : Pat<(Narrow.KVT (and Narrow.KRC:$mask,
3213                           (Narrow.KVT
3214                            (Frag_su:$cc (Narrow.VT Narrow.RC:$src1),
3215                                         (Narrow.BroadcastLdFrag addr:$src2),
3216                                         cond)))),
3217          (COPY_TO_REGCLASS (!cast<Instruction>(InstStr#"Zrmibk")
3218           (COPY_TO_REGCLASS Narrow.KRC:$mask, Wide.KRC),
3219           (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src1, Narrow.SubRegIdx)),
3220           addr:$src2, (X86pcmpm_imm $cc)), Narrow.KRC)>;
3221
3222// Commuted with broadcast load.
3223def : Pat<(Narrow.KVT (Frag:$cc (Narrow.BroadcastLdFrag addr:$src2),
3224                                (Narrow.VT Narrow.RC:$src1),
3225                                cond)),
3226          (COPY_TO_REGCLASS
3227           (!cast<Instruction>(InstStr#"Zrmib")
3228            (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src1, Narrow.SubRegIdx)),
3229            addr:$src2, (X86pcmpm_imm_commute $cc)), Narrow.KRC)>;
3230
3231def : Pat<(Narrow.KVT (and Narrow.KRC:$mask,
3232                           (Narrow.KVT
3233                            (Frag_su:$cc (Narrow.BroadcastLdFrag addr:$src2),
3234                                         (Narrow.VT Narrow.RC:$src1),
3235                                         cond)))),
3236          (COPY_TO_REGCLASS (!cast<Instruction>(InstStr#"Zrmibk")
3237           (COPY_TO_REGCLASS Narrow.KRC:$mask, Wide.KRC),
3238           (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src1, Narrow.SubRegIdx)),
3239           addr:$src2, (X86pcmpm_imm_commute $cc)), Narrow.KRC)>;
3240}
3241
3242// Same as above, but for fp types which don't use PatFrags.
3243multiclass axv512_cmp_packed_cc_no_vlx_lowering<string InstStr,
3244                                                X86VectorVTInfo Narrow,
3245                                                X86VectorVTInfo Wide> {
3246def : Pat<(Narrow.KVT (X86cmpm (Narrow.VT Narrow.RC:$src1),
3247                               (Narrow.VT Narrow.RC:$src2), timm:$cc)),
3248          (COPY_TO_REGCLASS
3249           (!cast<Instruction>(InstStr#"Zrri")
3250            (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src1, Narrow.SubRegIdx)),
3251            (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src2, Narrow.SubRegIdx)),
3252            timm:$cc), Narrow.KRC)>;
3253
3254def : Pat<(Narrow.KVT (and Narrow.KRC:$mask,
3255                           (X86cmpm_su (Narrow.VT Narrow.RC:$src1),
3256                                       (Narrow.VT Narrow.RC:$src2), timm:$cc))),
3257          (COPY_TO_REGCLASS (!cast<Instruction>(InstStr#"Zrrik")
3258           (COPY_TO_REGCLASS Narrow.KRC:$mask, Wide.KRC),
3259           (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src1, Narrow.SubRegIdx)),
3260           (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src2, Narrow.SubRegIdx)),
3261           timm:$cc), Narrow.KRC)>;
3262
3263// Broadcast load.
3264def : Pat<(Narrow.KVT (X86cmpm (Narrow.VT Narrow.RC:$src1),
3265                               (Narrow.VT (Narrow.BroadcastLdFrag addr:$src2)), timm:$cc)),
3266          (COPY_TO_REGCLASS
3267           (!cast<Instruction>(InstStr#"Zrmbi")
3268            (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src1, Narrow.SubRegIdx)),
3269            addr:$src2, timm:$cc), Narrow.KRC)>;
3270
3271def : Pat<(Narrow.KVT (and Narrow.KRC:$mask,
3272                           (X86cmpm_su (Narrow.VT Narrow.RC:$src1),
3273                                       (Narrow.VT (Narrow.BroadcastLdFrag addr:$src2)), timm:$cc))),
3274          (COPY_TO_REGCLASS (!cast<Instruction>(InstStr#"Zrmbik")
3275           (COPY_TO_REGCLASS Narrow.KRC:$mask, Wide.KRC),
3276           (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src1, Narrow.SubRegIdx)),
3277           addr:$src2, timm:$cc), Narrow.KRC)>;
3278
3279// Commuted with broadcast load.
3280def : Pat<(Narrow.KVT (X86cmpm (Narrow.VT (Narrow.BroadcastLdFrag addr:$src2)),
3281                               (Narrow.VT Narrow.RC:$src1), timm:$cc)),
3282          (COPY_TO_REGCLASS
3283           (!cast<Instruction>(InstStr#"Zrmbi")
3284            (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src1, Narrow.SubRegIdx)),
3285            addr:$src2, (X86cmpm_imm_commute timm:$cc)), Narrow.KRC)>;
3286
3287def : Pat<(Narrow.KVT (and Narrow.KRC:$mask,
3288                           (X86cmpm_su (Narrow.VT (Narrow.BroadcastLdFrag addr:$src2)),
3289                                       (Narrow.VT Narrow.RC:$src1), timm:$cc))),
3290          (COPY_TO_REGCLASS (!cast<Instruction>(InstStr#"Zrmbik")
3291           (COPY_TO_REGCLASS Narrow.KRC:$mask, Wide.KRC),
3292           (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src1, Narrow.SubRegIdx)),
3293           addr:$src2, (X86cmpm_imm_commute timm:$cc)), Narrow.KRC)>;
3294}
3295
3296let Predicates = [HasAVX512, NoVLX] in {
3297  defm : axv512_icmp_packed_cc_no_vlx_lowering<X86pcmpm, X86pcmpm_su, "VPCMPD", v8i32x_info, v16i32_info>;
3298  defm : axv512_icmp_packed_cc_no_vlx_lowering<X86pcmpum, X86pcmpum_su, "VPCMPUD", v8i32x_info, v16i32_info>;
3299
3300  defm : axv512_icmp_packed_cc_no_vlx_lowering<X86pcmpm, X86pcmpm_su, "VPCMPD", v4i32x_info, v16i32_info>;
3301  defm : axv512_icmp_packed_cc_no_vlx_lowering<X86pcmpum, X86pcmpum_su, "VPCMPUD", v4i32x_info, v16i32_info>;
3302
3303  defm : axv512_icmp_packed_cc_no_vlx_lowering<X86pcmpm, X86pcmpm_su, "VPCMPQ", v4i64x_info, v8i64_info>;
3304  defm : axv512_icmp_packed_cc_no_vlx_lowering<X86pcmpum, X86pcmpum_su, "VPCMPUQ", v4i64x_info, v8i64_info>;
3305
3306  defm : axv512_icmp_packed_cc_no_vlx_lowering<X86pcmpm, X86pcmpm_su, "VPCMPQ", v2i64x_info, v8i64_info>;
3307  defm : axv512_icmp_packed_cc_no_vlx_lowering<X86pcmpum, X86pcmpum_su, "VPCMPUQ", v2i64x_info, v8i64_info>;
3308
3309  defm : axv512_icmp_packed_cc_rmb_no_vlx_lowering<X86pcmpm, X86pcmpm_su, "VPCMPD", v8i32x_info, v16i32_info>;
3310  defm : axv512_icmp_packed_cc_rmb_no_vlx_lowering<X86pcmpum, X86pcmpum_su, "VPCMPUD", v8i32x_info, v16i32_info>;
3311
3312  defm : axv512_icmp_packed_cc_rmb_no_vlx_lowering<X86pcmpm, X86pcmpm_su, "VPCMPD", v4i32x_info, v16i32_info>;
3313  defm : axv512_icmp_packed_cc_rmb_no_vlx_lowering<X86pcmpum, X86pcmpum_su, "VPCMPUD", v4i32x_info, v16i32_info>;
3314
3315  defm : axv512_icmp_packed_cc_rmb_no_vlx_lowering<X86pcmpm, X86pcmpm_su, "VPCMPQ", v4i64x_info, v8i64_info>;
3316  defm : axv512_icmp_packed_cc_rmb_no_vlx_lowering<X86pcmpum, X86pcmpum_su, "VPCMPUQ", v4i64x_info, v8i64_info>;
3317
3318  defm : axv512_icmp_packed_cc_rmb_no_vlx_lowering<X86pcmpm, X86pcmpm_su, "VPCMPQ", v2i64x_info, v8i64_info>;
3319  defm : axv512_icmp_packed_cc_rmb_no_vlx_lowering<X86pcmpum, X86pcmpum_su, "VPCMPUQ", v2i64x_info, v8i64_info>;
3320
3321  defm : axv512_cmp_packed_cc_no_vlx_lowering<"VCMPPS", v8f32x_info, v16f32_info>;
3322  defm : axv512_cmp_packed_cc_no_vlx_lowering<"VCMPPS", v4f32x_info, v16f32_info>;
3323  defm : axv512_cmp_packed_cc_no_vlx_lowering<"VCMPPD", v4f64x_info, v8f64_info>;
3324  defm : axv512_cmp_packed_cc_no_vlx_lowering<"VCMPPD", v2f64x_info, v8f64_info>;
3325}
3326
3327let Predicates = [HasBWI, NoVLX] in {
3328  defm : axv512_icmp_packed_cc_no_vlx_lowering<X86pcmpm, X86pcmpm_su, "VPCMPB", v32i8x_info, v64i8_info>;
3329  defm : axv512_icmp_packed_cc_no_vlx_lowering<X86pcmpum, X86pcmpum_su, "VPCMPUB", v32i8x_info, v64i8_info>;
3330
3331  defm : axv512_icmp_packed_cc_no_vlx_lowering<X86pcmpm, X86pcmpm_su, "VPCMPB", v16i8x_info, v64i8_info>;
3332  defm : axv512_icmp_packed_cc_no_vlx_lowering<X86pcmpum, X86pcmpum_su, "VPCMPUB", v16i8x_info, v64i8_info>;
3333
3334  defm : axv512_icmp_packed_cc_no_vlx_lowering<X86pcmpm, X86pcmpm_su, "VPCMPW", v16i16x_info, v32i16_info>;
3335  defm : axv512_icmp_packed_cc_no_vlx_lowering<X86pcmpum, X86pcmpum_su, "VPCMPUW", v16i16x_info, v32i16_info>;
3336
3337  defm : axv512_icmp_packed_cc_no_vlx_lowering<X86pcmpm, X86pcmpm_su, "VPCMPW", v8i16x_info, v32i16_info>;
3338  defm : axv512_icmp_packed_cc_no_vlx_lowering<X86pcmpum, X86pcmpum_su, "VPCMPUW", v8i16x_info, v32i16_info>;
3339}
3340
3341// Mask setting all 0s or 1s
3342multiclass avx512_mask_setop<RegisterClass KRC, ValueType VT, SDPatternOperator Val> {
3343  let Predicates = [HasAVX512] in
3344    let isReMaterializable = 1, isAsCheapAsAMove = 1, isPseudo = 1,
3345        SchedRW = [WriteZero] in
3346      def NAME# : I<0, Pseudo, (outs KRC:$dst), (ins), "",
3347                     [(set KRC:$dst, (VT Val))]>;
3348}
3349
3350multiclass avx512_mask_setop_w<SDPatternOperator Val> {
3351  defm W : avx512_mask_setop<VK16, v16i1, Val>;
3352  defm D : avx512_mask_setop<VK32,  v32i1, Val>;
3353  defm Q : avx512_mask_setop<VK64, v64i1, Val>;
3354}
3355
3356defm KSET0 : avx512_mask_setop_w<immAllZerosV>;
3357defm KSET1 : avx512_mask_setop_w<immAllOnesV>;
3358
3359// With AVX-512 only, 8-bit mask is promoted to 16-bit mask.
3360let Predicates = [HasAVX512] in {
3361  def : Pat<(v8i1 immAllZerosV), (COPY_TO_REGCLASS (KSET0W), VK8)>;
3362  def : Pat<(v4i1 immAllZerosV), (COPY_TO_REGCLASS (KSET0W), VK4)>;
3363  def : Pat<(v2i1 immAllZerosV), (COPY_TO_REGCLASS (KSET0W), VK2)>;
3364  def : Pat<(v1i1 immAllZerosV), (COPY_TO_REGCLASS (KSET0W), VK1)>;
3365  def : Pat<(v8i1 immAllOnesV),  (COPY_TO_REGCLASS (KSET1W), VK8)>;
3366  def : Pat<(v4i1 immAllOnesV),  (COPY_TO_REGCLASS (KSET1W), VK4)>;
3367  def : Pat<(v2i1 immAllOnesV),  (COPY_TO_REGCLASS (KSET1W), VK2)>;
3368  def : Pat<(v1i1 immAllOnesV),  (COPY_TO_REGCLASS (KSET1W), VK1)>;
3369}
3370
3371// Patterns for kmask insert_subvector/extract_subvector to/from index=0
3372multiclass operation_subvector_mask_lowering<RegisterClass subRC, ValueType subVT,
3373                                             RegisterClass RC, ValueType VT> {
3374  def : Pat<(subVT (extract_subvector (VT RC:$src), (iPTR 0))),
3375            (subVT (COPY_TO_REGCLASS RC:$src, subRC))>;
3376
3377  def : Pat<(VT (insert_subvector undef, subRC:$src, (iPTR 0))),
3378            (VT (COPY_TO_REGCLASS subRC:$src, RC))>;
3379}
3380defm : operation_subvector_mask_lowering<VK1,  v1i1,  VK2,  v2i1>;
3381defm : operation_subvector_mask_lowering<VK1,  v1i1,  VK4,  v4i1>;
3382defm : operation_subvector_mask_lowering<VK1,  v1i1,  VK8,  v8i1>;
3383defm : operation_subvector_mask_lowering<VK1,  v1i1,  VK16, v16i1>;
3384defm : operation_subvector_mask_lowering<VK1,  v1i1,  VK32, v32i1>;
3385defm : operation_subvector_mask_lowering<VK1,  v1i1,  VK64, v64i1>;
3386
3387defm : operation_subvector_mask_lowering<VK2,  v2i1,  VK4,  v4i1>;
3388defm : operation_subvector_mask_lowering<VK2,  v2i1,  VK8,  v8i1>;
3389defm : operation_subvector_mask_lowering<VK2,  v2i1,  VK16, v16i1>;
3390defm : operation_subvector_mask_lowering<VK2,  v2i1,  VK32, v32i1>;
3391defm : operation_subvector_mask_lowering<VK2,  v2i1,  VK64, v64i1>;
3392
3393defm : operation_subvector_mask_lowering<VK4,  v4i1,  VK8,  v8i1>;
3394defm : operation_subvector_mask_lowering<VK4,  v4i1,  VK16, v16i1>;
3395defm : operation_subvector_mask_lowering<VK4,  v4i1,  VK32, v32i1>;
3396defm : operation_subvector_mask_lowering<VK4,  v4i1,  VK64, v64i1>;
3397
3398defm : operation_subvector_mask_lowering<VK8,  v8i1,  VK16, v16i1>;
3399defm : operation_subvector_mask_lowering<VK8,  v8i1,  VK32, v32i1>;
3400defm : operation_subvector_mask_lowering<VK8,  v8i1,  VK64, v64i1>;
3401
3402defm : operation_subvector_mask_lowering<VK16, v16i1, VK32, v32i1>;
3403defm : operation_subvector_mask_lowering<VK16, v16i1, VK64, v64i1>;
3404
3405defm : operation_subvector_mask_lowering<VK32, v32i1, VK64, v64i1>;
3406
3407//===----------------------------------------------------------------------===//
3408// AVX-512 - Aligned and unaligned load and store
3409//
3410
3411multiclass avx512_load<bits<8> opc, string OpcodeStr, string Name,
3412                       X86VectorVTInfo _, PatFrag ld_frag, PatFrag mload,
3413                       X86SchedWriteMoveLS Sched, string EVEX2VEXOvrd,
3414                       bit NoRMPattern = 0,
3415                       SDPatternOperator SelectOprr = vselect> {
3416  let hasSideEffects = 0 in {
3417  let isMoveReg = 1 in
3418  def rr : AVX512PI<opc, MRMSrcReg, (outs _.RC:$dst), (ins _.RC:$src),
3419                    !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), [],
3420                    _.ExeDomain>, EVEX, Sched<[Sched.RR]>,
3421                    EVEX2VEXOverride<EVEX2VEXOvrd#"rr">;
3422  def rrkz : AVX512PI<opc, MRMSrcReg, (outs _.RC:$dst),
3423                      (ins _.KRCWM:$mask,  _.RC:$src),
3424                      !strconcat(OpcodeStr, "\t{$src, ${dst} {${mask}} {z}|",
3425                       "${dst} {${mask}} {z}, $src}"),
3426                       [(set _.RC:$dst, (_.VT (SelectOprr _.KRCWM:$mask,
3427                                           (_.VT _.RC:$src),
3428                                           _.ImmAllZerosV)))], _.ExeDomain>,
3429                       EVEX, EVEX_KZ, Sched<[Sched.RR]>;
3430
3431  let mayLoad = 1, canFoldAsLoad = 1, isReMaterializable = 1 in
3432  def rm : AVX512PI<opc, MRMSrcMem, (outs _.RC:$dst), (ins _.MemOp:$src),
3433                    !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
3434                    !if(NoRMPattern, [],
3435                        [(set _.RC:$dst,
3436                          (_.VT (ld_frag addr:$src)))]),
3437                    _.ExeDomain>, EVEX, Sched<[Sched.RM]>,
3438                    EVEX2VEXOverride<EVEX2VEXOvrd#"rm">;
3439
3440  let Constraints = "$src0 = $dst", isConvertibleToThreeAddress = 1 in {
3441    def rrk : AVX512PI<opc, MRMSrcReg, (outs _.RC:$dst),
3442                      (ins _.RC:$src0, _.KRCWM:$mask, _.RC:$src1),
3443                      !strconcat(OpcodeStr, "\t{$src1, ${dst} {${mask}}|",
3444                      "${dst} {${mask}}, $src1}"),
3445                      [(set _.RC:$dst, (_.VT (SelectOprr _.KRCWM:$mask,
3446                                          (_.VT _.RC:$src1),
3447                                          (_.VT _.RC:$src0))))], _.ExeDomain>,
3448                       EVEX, EVEX_K, Sched<[Sched.RR]>;
3449    def rmk : AVX512PI<opc, MRMSrcMem, (outs _.RC:$dst),
3450                     (ins _.RC:$src0, _.KRCWM:$mask, _.MemOp:$src1),
3451                     !strconcat(OpcodeStr, "\t{$src1, ${dst} {${mask}}|",
3452                      "${dst} {${mask}}, $src1}"),
3453                     [(set _.RC:$dst, (_.VT
3454                         (vselect_mask _.KRCWM:$mask,
3455                          (_.VT (ld_frag addr:$src1)),
3456                           (_.VT _.RC:$src0))))], _.ExeDomain>,
3457                     EVEX, EVEX_K, Sched<[Sched.RM]>;
3458  }
3459  def rmkz : AVX512PI<opc, MRMSrcMem, (outs _.RC:$dst),
3460                  (ins _.KRCWM:$mask, _.MemOp:$src),
3461                  OpcodeStr #"\t{$src, ${dst} {${mask}} {z}|"#
3462                                "${dst} {${mask}} {z}, $src}",
3463                  [(set _.RC:$dst, (_.VT (vselect_mask _.KRCWM:$mask,
3464                    (_.VT (ld_frag addr:$src)), _.ImmAllZerosV)))],
3465                  _.ExeDomain>, EVEX, EVEX_KZ, Sched<[Sched.RM]>;
3466  }
3467  def : Pat<(_.VT (mload addr:$ptr, _.KRCWM:$mask, undef)),
3468            (!cast<Instruction>(Name#_.ZSuffix#rmkz) _.KRCWM:$mask, addr:$ptr)>;
3469
3470  def : Pat<(_.VT (mload addr:$ptr, _.KRCWM:$mask, _.ImmAllZerosV)),
3471            (!cast<Instruction>(Name#_.ZSuffix#rmkz) _.KRCWM:$mask, addr:$ptr)>;
3472
3473  def : Pat<(_.VT (mload addr:$ptr, _.KRCWM:$mask, (_.VT _.RC:$src0))),
3474            (!cast<Instruction>(Name#_.ZSuffix#rmk) _.RC:$src0,
3475             _.KRCWM:$mask, addr:$ptr)>;
3476}
3477
3478multiclass avx512_alignedload_vl<bits<8> opc, string OpcodeStr,
3479                                 AVX512VLVectorVTInfo _, Predicate prd,
3480                                 X86SchedWriteMoveLSWidths Sched,
3481                                 string EVEX2VEXOvrd, bit NoRMPattern = 0> {
3482  let Predicates = [prd] in
3483  defm Z : avx512_load<opc, OpcodeStr, NAME, _.info512,
3484                       _.info512.AlignedLdFrag, masked_load_aligned,
3485                       Sched.ZMM, "", NoRMPattern>, EVEX_V512;
3486
3487  let Predicates = [prd, HasVLX] in {
3488  defm Z256 : avx512_load<opc, OpcodeStr, NAME, _.info256,
3489                          _.info256.AlignedLdFrag, masked_load_aligned,
3490                          Sched.YMM, EVEX2VEXOvrd#"Y", NoRMPattern>, EVEX_V256;
3491  defm Z128 : avx512_load<opc, OpcodeStr, NAME, _.info128,
3492                          _.info128.AlignedLdFrag, masked_load_aligned,
3493                          Sched.XMM, EVEX2VEXOvrd, NoRMPattern>, EVEX_V128;
3494  }
3495}
3496
3497multiclass avx512_load_vl<bits<8> opc, string OpcodeStr,
3498                          AVX512VLVectorVTInfo _, Predicate prd,
3499                          X86SchedWriteMoveLSWidths Sched,
3500                          string EVEX2VEXOvrd, bit NoRMPattern = 0,
3501                          SDPatternOperator SelectOprr = vselect> {
3502  let Predicates = [prd] in
3503  defm Z : avx512_load<opc, OpcodeStr, NAME, _.info512, _.info512.LdFrag,
3504                       masked_load, Sched.ZMM, "",
3505                       NoRMPattern, SelectOprr>, EVEX_V512;
3506
3507  let Predicates = [prd, HasVLX] in {
3508  defm Z256 : avx512_load<opc, OpcodeStr, NAME, _.info256, _.info256.LdFrag,
3509                         masked_load, Sched.YMM, EVEX2VEXOvrd#"Y",
3510                         NoRMPattern, SelectOprr>, EVEX_V256;
3511  defm Z128 : avx512_load<opc, OpcodeStr, NAME, _.info128, _.info128.LdFrag,
3512                         masked_load, Sched.XMM, EVEX2VEXOvrd,
3513                         NoRMPattern, SelectOprr>, EVEX_V128;
3514  }
3515}
3516
3517multiclass avx512_store<bits<8> opc, string OpcodeStr, string BaseName,
3518                        X86VectorVTInfo _, PatFrag st_frag, PatFrag mstore,
3519                        X86SchedWriteMoveLS Sched, string EVEX2VEXOvrd,
3520                        bit NoMRPattern = 0> {
3521  let hasSideEffects = 0, isCodeGenOnly = 1, ForceDisassemble = 1 in {
3522  let isMoveReg = 1 in
3523  def rr_REV  : AVX512PI<opc, MRMDestReg, (outs _.RC:$dst), (ins _.RC:$src),
3524                         OpcodeStr # "\t{$src, $dst|$dst, $src}",
3525                         [], _.ExeDomain>, EVEX,
3526                         FoldGenData<BaseName#_.ZSuffix#rr>, Sched<[Sched.RR]>,
3527                         EVEX2VEXOverride<EVEX2VEXOvrd#"rr_REV">;
3528  def rrk_REV : AVX512PI<opc, MRMDestReg, (outs  _.RC:$dst),
3529                         (ins _.KRCWM:$mask, _.RC:$src),
3530                         OpcodeStr # "\t{$src, ${dst} {${mask}}|"#
3531                         "${dst} {${mask}}, $src}",
3532                         [], _.ExeDomain>,  EVEX, EVEX_K,
3533                         FoldGenData<BaseName#_.ZSuffix#rrk>,
3534                         Sched<[Sched.RR]>;
3535  def rrkz_REV : AVX512PI<opc, MRMDestReg, (outs  _.RC:$dst),
3536                          (ins _.KRCWM:$mask, _.RC:$src),
3537                          OpcodeStr # "\t{$src, ${dst} {${mask}} {z}|" #
3538                          "${dst} {${mask}} {z}, $src}",
3539                          [], _.ExeDomain>, EVEX, EVEX_KZ,
3540                          FoldGenData<BaseName#_.ZSuffix#rrkz>,
3541                          Sched<[Sched.RR]>;
3542  }
3543
3544  let hasSideEffects = 0, mayStore = 1 in
3545  def mr : AVX512PI<opc, MRMDestMem, (outs), (ins _.MemOp:$dst, _.RC:$src),
3546                    !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
3547                    !if(NoMRPattern, [],
3548                        [(st_frag (_.VT _.RC:$src), addr:$dst)]),
3549                    _.ExeDomain>, EVEX, Sched<[Sched.MR]>,
3550                    EVEX2VEXOverride<EVEX2VEXOvrd#"mr">;
3551  def mrk : AVX512PI<opc, MRMDestMem, (outs),
3552                     (ins _.MemOp:$dst, _.KRCWM:$mask, _.RC:$src),
3553              OpcodeStr # "\t{$src, ${dst} {${mask}}|${dst} {${mask}}, $src}",
3554               [], _.ExeDomain>, EVEX, EVEX_K, Sched<[Sched.MR]>,
3555               NotMemoryFoldable;
3556
3557  def: Pat<(mstore (_.VT _.RC:$src), addr:$ptr, _.KRCWM:$mask),
3558           (!cast<Instruction>(BaseName#_.ZSuffix#mrk) addr:$ptr,
3559                                                        _.KRCWM:$mask, _.RC:$src)>;
3560
3561  def : InstAlias<OpcodeStr#".s\t{$src, $dst|$dst, $src}",
3562                  (!cast<Instruction>(BaseName#_.ZSuffix#"rr_REV")
3563                   _.RC:$dst, _.RC:$src), 0>;
3564  def : InstAlias<OpcodeStr#".s\t{$src, ${dst} {${mask}}|${dst} {${mask}}, $src}",
3565                  (!cast<Instruction>(BaseName#_.ZSuffix#"rrk_REV")
3566                   _.RC:$dst, _.KRCWM:$mask, _.RC:$src), 0>;
3567  def : InstAlias<OpcodeStr#".s\t{$src, ${dst} {${mask}} {z}|${dst} {${mask}} {z}, $src}",
3568                  (!cast<Instruction>(BaseName#_.ZSuffix#"rrkz_REV")
3569                   _.RC:$dst, _.KRCWM:$mask, _.RC:$src), 0>;
3570}
3571
3572multiclass avx512_store_vl< bits<8> opc, string OpcodeStr,
3573                            AVX512VLVectorVTInfo _, Predicate prd,
3574                            X86SchedWriteMoveLSWidths Sched,
3575                            string EVEX2VEXOvrd, bit NoMRPattern = 0> {
3576  let Predicates = [prd] in
3577  defm Z : avx512_store<opc, OpcodeStr, NAME, _.info512, store,
3578                        masked_store, Sched.ZMM, "",
3579                        NoMRPattern>, EVEX_V512;
3580  let Predicates = [prd, HasVLX] in {
3581    defm Z256 : avx512_store<opc, OpcodeStr, NAME, _.info256, store,
3582                             masked_store, Sched.YMM,
3583                             EVEX2VEXOvrd#"Y", NoMRPattern>, EVEX_V256;
3584    defm Z128 : avx512_store<opc, OpcodeStr, NAME, _.info128, store,
3585                             masked_store, Sched.XMM, EVEX2VEXOvrd,
3586                             NoMRPattern>, EVEX_V128;
3587  }
3588}
3589
3590multiclass avx512_alignedstore_vl<bits<8> opc, string OpcodeStr,
3591                                  AVX512VLVectorVTInfo _, Predicate prd,
3592                                  X86SchedWriteMoveLSWidths Sched,
3593                                  string EVEX2VEXOvrd, bit NoMRPattern = 0> {
3594  let Predicates = [prd] in
3595  defm Z : avx512_store<opc, OpcodeStr, NAME, _.info512, alignedstore,
3596                        masked_store_aligned, Sched.ZMM, "",
3597                        NoMRPattern>, EVEX_V512;
3598
3599  let Predicates = [prd, HasVLX] in {
3600    defm Z256 : avx512_store<opc, OpcodeStr, NAME, _.info256, alignedstore,
3601                             masked_store_aligned, Sched.YMM,
3602                             EVEX2VEXOvrd#"Y", NoMRPattern>, EVEX_V256;
3603    defm Z128 : avx512_store<opc, OpcodeStr, NAME, _.info128, alignedstore,
3604                             masked_store_aligned, Sched.XMM, EVEX2VEXOvrd,
3605                             NoMRPattern>, EVEX_V128;
3606  }
3607}
3608
3609defm VMOVAPS : avx512_alignedload_vl<0x28, "vmovaps", avx512vl_f32_info,
3610                                     HasAVX512, SchedWriteFMoveLS, "VMOVAPS">,
3611               avx512_alignedstore_vl<0x29, "vmovaps", avx512vl_f32_info,
3612                                      HasAVX512, SchedWriteFMoveLS, "VMOVAPS">,
3613               PS, EVEX_CD8<32, CD8VF>;
3614
3615defm VMOVAPD : avx512_alignedload_vl<0x28, "vmovapd", avx512vl_f64_info,
3616                                     HasAVX512, SchedWriteFMoveLS, "VMOVAPD">,
3617               avx512_alignedstore_vl<0x29, "vmovapd", avx512vl_f64_info,
3618                                      HasAVX512, SchedWriteFMoveLS, "VMOVAPD">,
3619               PD, VEX_W, EVEX_CD8<64, CD8VF>;
3620
3621defm VMOVUPS : avx512_load_vl<0x10, "vmovups", avx512vl_f32_info, HasAVX512,
3622                              SchedWriteFMoveLS, "VMOVUPS", 0, null_frag>,
3623               avx512_store_vl<0x11, "vmovups", avx512vl_f32_info, HasAVX512,
3624                               SchedWriteFMoveLS, "VMOVUPS">,
3625                               PS, EVEX_CD8<32, CD8VF>;
3626
3627defm VMOVUPD : avx512_load_vl<0x10, "vmovupd", avx512vl_f64_info, HasAVX512,
3628                              SchedWriteFMoveLS, "VMOVUPD", 0, null_frag>,
3629               avx512_store_vl<0x11, "vmovupd", avx512vl_f64_info, HasAVX512,
3630                               SchedWriteFMoveLS, "VMOVUPD">,
3631               PD, VEX_W, EVEX_CD8<64, CD8VF>;
3632
3633defm VMOVDQA32 : avx512_alignedload_vl<0x6F, "vmovdqa32", avx512vl_i32_info,
3634                                       HasAVX512, SchedWriteVecMoveLS,
3635                                       "VMOVDQA", 1>,
3636                 avx512_alignedstore_vl<0x7F, "vmovdqa32", avx512vl_i32_info,
3637                                        HasAVX512, SchedWriteVecMoveLS,
3638                                        "VMOVDQA", 1>,
3639                 PD, EVEX_CD8<32, CD8VF>;
3640
3641defm VMOVDQA64 : avx512_alignedload_vl<0x6F, "vmovdqa64", avx512vl_i64_info,
3642                                       HasAVX512, SchedWriteVecMoveLS,
3643                                       "VMOVDQA">,
3644                 avx512_alignedstore_vl<0x7F, "vmovdqa64", avx512vl_i64_info,
3645                                        HasAVX512, SchedWriteVecMoveLS,
3646                                        "VMOVDQA">,
3647                 PD, VEX_W, EVEX_CD8<64, CD8VF>;
3648
3649defm VMOVDQU8 : avx512_load_vl<0x6F, "vmovdqu8", avx512vl_i8_info, HasBWI,
3650                               SchedWriteVecMoveLS, "VMOVDQU", 1>,
3651                avx512_store_vl<0x7F, "vmovdqu8", avx512vl_i8_info, HasBWI,
3652                                SchedWriteVecMoveLS, "VMOVDQU", 1>,
3653                XD, EVEX_CD8<8, CD8VF>;
3654
3655defm VMOVDQU16 : avx512_load_vl<0x6F, "vmovdqu16", avx512vl_i16_info, HasBWI,
3656                                SchedWriteVecMoveLS, "VMOVDQU", 1>,
3657                 avx512_store_vl<0x7F, "vmovdqu16", avx512vl_i16_info, HasBWI,
3658                                 SchedWriteVecMoveLS, "VMOVDQU", 1>,
3659                 XD, VEX_W, EVEX_CD8<16, CD8VF>;
3660
3661defm VMOVDQU32 : avx512_load_vl<0x6F, "vmovdqu32", avx512vl_i32_info, HasAVX512,
3662                                SchedWriteVecMoveLS, "VMOVDQU", 1, null_frag>,
3663                 avx512_store_vl<0x7F, "vmovdqu32", avx512vl_i32_info, HasAVX512,
3664                                 SchedWriteVecMoveLS, "VMOVDQU", 1>,
3665                 XS, EVEX_CD8<32, CD8VF>;
3666
3667defm VMOVDQU64 : avx512_load_vl<0x6F, "vmovdqu64", avx512vl_i64_info, HasAVX512,
3668                                SchedWriteVecMoveLS, "VMOVDQU", 0, null_frag>,
3669                 avx512_store_vl<0x7F, "vmovdqu64", avx512vl_i64_info, HasAVX512,
3670                                 SchedWriteVecMoveLS, "VMOVDQU">,
3671                 XS, VEX_W, EVEX_CD8<64, CD8VF>;
3672
3673// Special instructions to help with spilling when we don't have VLX. We need
3674// to load or store from a ZMM register instead. These are converted in
3675// expandPostRAPseudos.
3676let isReMaterializable = 1, canFoldAsLoad = 1,
3677    isPseudo = 1, mayLoad = 1, hasSideEffects = 0 in {
3678def VMOVAPSZ128rm_NOVLX : I<0, Pseudo, (outs VR128X:$dst), (ins f128mem:$src),
3679                            "", []>, Sched<[WriteFLoadX]>;
3680def VMOVAPSZ256rm_NOVLX : I<0, Pseudo, (outs VR256X:$dst), (ins f256mem:$src),
3681                            "", []>, Sched<[WriteFLoadY]>;
3682def VMOVUPSZ128rm_NOVLX : I<0, Pseudo, (outs VR128X:$dst), (ins f128mem:$src),
3683                            "", []>, Sched<[WriteFLoadX]>;
3684def VMOVUPSZ256rm_NOVLX : I<0, Pseudo, (outs VR256X:$dst), (ins f256mem:$src),
3685                            "", []>, Sched<[WriteFLoadY]>;
3686}
3687
3688let isPseudo = 1, mayStore = 1, hasSideEffects = 0 in {
3689def VMOVAPSZ128mr_NOVLX : I<0, Pseudo, (outs), (ins f128mem:$dst, VR128X:$src),
3690                            "", []>, Sched<[WriteFStoreX]>;
3691def VMOVAPSZ256mr_NOVLX : I<0, Pseudo, (outs), (ins f256mem:$dst, VR256X:$src),
3692                            "", []>, Sched<[WriteFStoreY]>;
3693def VMOVUPSZ128mr_NOVLX : I<0, Pseudo, (outs), (ins f128mem:$dst, VR128X:$src),
3694                            "", []>, Sched<[WriteFStoreX]>;
3695def VMOVUPSZ256mr_NOVLX : I<0, Pseudo, (outs), (ins f256mem:$dst, VR256X:$src),
3696                            "", []>, Sched<[WriteFStoreY]>;
3697}
3698
3699def : Pat<(v8i64 (vselect VK8WM:$mask, (v8i64 immAllZerosV),
3700                          (v8i64 VR512:$src))),
3701   (VMOVDQA64Zrrkz (COPY_TO_REGCLASS (KNOTWrr (COPY_TO_REGCLASS VK8:$mask, VK16)),
3702                                              VK8), VR512:$src)>;
3703
3704def : Pat<(v16i32 (vselect VK16WM:$mask, (v16i32 immAllZerosV),
3705                           (v16i32 VR512:$src))),
3706                  (VMOVDQA32Zrrkz (KNOTWrr VK16WM:$mask), VR512:$src)>;
3707
3708// These patterns exist to prevent the above patterns from introducing a second
3709// mask inversion when one already exists.
3710def : Pat<(v8i64 (vselect (v8i1 (vnot VK8:$mask)),
3711                          (v8i64 immAllZerosV),
3712                          (v8i64 VR512:$src))),
3713                 (VMOVDQA64Zrrkz VK8:$mask, VR512:$src)>;
3714def : Pat<(v16i32 (vselect (v16i1 (vnot VK16:$mask)),
3715                           (v16i32 immAllZerosV),
3716                           (v16i32 VR512:$src))),
3717                  (VMOVDQA32Zrrkz VK16WM:$mask, VR512:$src)>;
3718
3719multiclass mask_move_lowering<string InstrStr, X86VectorVTInfo Narrow,
3720                              X86VectorVTInfo Wide> {
3721 def : Pat<(Narrow.VT (vselect (Narrow.KVT Narrow.KRCWM:$mask),
3722                               Narrow.RC:$src1, Narrow.RC:$src0)),
3723           (EXTRACT_SUBREG
3724            (Wide.VT
3725             (!cast<Instruction>(InstrStr#"rrk")
3726              (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src0, Narrow.SubRegIdx)),
3727              (COPY_TO_REGCLASS Narrow.KRCWM:$mask, Wide.KRCWM),
3728              (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src1, Narrow.SubRegIdx)))),
3729            Narrow.SubRegIdx)>;
3730
3731 def : Pat<(Narrow.VT (vselect (Narrow.KVT Narrow.KRCWM:$mask),
3732                               Narrow.RC:$src1, Narrow.ImmAllZerosV)),
3733           (EXTRACT_SUBREG
3734            (Wide.VT
3735             (!cast<Instruction>(InstrStr#"rrkz")
3736              (COPY_TO_REGCLASS Narrow.KRCWM:$mask, Wide.KRCWM),
3737              (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src1, Narrow.SubRegIdx)))),
3738            Narrow.SubRegIdx)>;
3739}
3740
3741// Patterns for handling v8i1 selects of 256-bit vectors when VLX isn't
3742// available. Use a 512-bit operation and extract.
3743let Predicates = [HasAVX512, NoVLX] in {
3744  defm : mask_move_lowering<"VMOVAPSZ", v4f32x_info, v16f32_info>;
3745  defm : mask_move_lowering<"VMOVDQA32Z", v4i32x_info, v16i32_info>;
3746  defm : mask_move_lowering<"VMOVAPSZ", v8f32x_info, v16f32_info>;
3747  defm : mask_move_lowering<"VMOVDQA32Z", v8i32x_info, v16i32_info>;
3748
3749  defm : mask_move_lowering<"VMOVAPDZ", v2f64x_info, v8f64_info>;
3750  defm : mask_move_lowering<"VMOVDQA64Z", v2i64x_info, v8i64_info>;
3751  defm : mask_move_lowering<"VMOVAPDZ", v4f64x_info, v8f64_info>;
3752  defm : mask_move_lowering<"VMOVDQA64Z", v4i64x_info, v8i64_info>;
3753}
3754
3755let Predicates = [HasBWI, NoVLX] in {
3756  defm : mask_move_lowering<"VMOVDQU8Z", v16i8x_info, v64i8_info>;
3757  defm : mask_move_lowering<"VMOVDQU8Z", v32i8x_info, v64i8_info>;
3758
3759  defm : mask_move_lowering<"VMOVDQU16Z", v8i16x_info, v32i16_info>;
3760  defm : mask_move_lowering<"VMOVDQU16Z", v16i16x_info, v32i16_info>;
3761
3762  defm : mask_move_lowering<"VMOVDQU16Z", v8f16x_info, v32f16_info>;
3763  defm : mask_move_lowering<"VMOVDQU16Z", v16f16x_info, v32f16_info>;
3764}
3765
3766let Predicates = [HasAVX512] in {
3767  // 512-bit load.
3768  def : Pat<(alignedloadv16i32 addr:$src),
3769            (VMOVDQA64Zrm addr:$src)>;
3770  def : Pat<(alignedloadv32i16 addr:$src),
3771            (VMOVDQA64Zrm addr:$src)>;
3772  def : Pat<(alignedloadv32f16 addr:$src),
3773            (VMOVAPSZrm addr:$src)>;
3774  def : Pat<(alignedloadv64i8 addr:$src),
3775            (VMOVDQA64Zrm addr:$src)>;
3776  def : Pat<(loadv16i32 addr:$src),
3777            (VMOVDQU64Zrm addr:$src)>;
3778  def : Pat<(loadv32i16 addr:$src),
3779            (VMOVDQU64Zrm addr:$src)>;
3780  def : Pat<(loadv32f16 addr:$src),
3781            (VMOVUPSZrm addr:$src)>;
3782  def : Pat<(loadv64i8 addr:$src),
3783            (VMOVDQU64Zrm addr:$src)>;
3784
3785  // 512-bit store.
3786  def : Pat<(alignedstore (v16i32 VR512:$src), addr:$dst),
3787            (VMOVDQA64Zmr addr:$dst, VR512:$src)>;
3788  def : Pat<(alignedstore (v32i16 VR512:$src), addr:$dst),
3789            (VMOVDQA64Zmr addr:$dst, VR512:$src)>;
3790  def : Pat<(alignedstore (v32f16 VR512:$src), addr:$dst),
3791            (VMOVAPSZmr addr:$dst, VR512:$src)>;
3792  def : Pat<(alignedstore (v64i8 VR512:$src), addr:$dst),
3793            (VMOVDQA64Zmr addr:$dst, VR512:$src)>;
3794  def : Pat<(store (v16i32 VR512:$src), addr:$dst),
3795            (VMOVDQU64Zmr addr:$dst, VR512:$src)>;
3796  def : Pat<(store (v32i16 VR512:$src), addr:$dst),
3797            (VMOVDQU64Zmr addr:$dst, VR512:$src)>;
3798  def : Pat<(store (v32f16 VR512:$src), addr:$dst),
3799            (VMOVUPSZmr addr:$dst, VR512:$src)>;
3800  def : Pat<(store (v64i8 VR512:$src), addr:$dst),
3801            (VMOVDQU64Zmr addr:$dst, VR512:$src)>;
3802}
3803
3804let Predicates = [HasVLX] in {
3805  // 128-bit load.
3806  def : Pat<(alignedloadv4i32 addr:$src),
3807            (VMOVDQA64Z128rm addr:$src)>;
3808  def : Pat<(alignedloadv8i16 addr:$src),
3809            (VMOVDQA64Z128rm addr:$src)>;
3810  def : Pat<(alignedloadv8f16 addr:$src),
3811            (VMOVAPSZ128rm addr:$src)>;
3812  def : Pat<(alignedloadv16i8 addr:$src),
3813            (VMOVDQA64Z128rm addr:$src)>;
3814  def : Pat<(loadv4i32 addr:$src),
3815            (VMOVDQU64Z128rm addr:$src)>;
3816  def : Pat<(loadv8i16 addr:$src),
3817            (VMOVDQU64Z128rm addr:$src)>;
3818  def : Pat<(loadv8f16 addr:$src),
3819            (VMOVUPSZ128rm addr:$src)>;
3820  def : Pat<(loadv16i8 addr:$src),
3821            (VMOVDQU64Z128rm addr:$src)>;
3822
3823  // 128-bit store.
3824  def : Pat<(alignedstore (v4i32 VR128X:$src), addr:$dst),
3825            (VMOVDQA64Z128mr addr:$dst, VR128X:$src)>;
3826  def : Pat<(alignedstore (v8i16 VR128X:$src), addr:$dst),
3827            (VMOVDQA64Z128mr addr:$dst, VR128X:$src)>;
3828  def : Pat<(alignedstore (v8f16 VR128X:$src), addr:$dst),
3829            (VMOVAPSZ128mr addr:$dst, VR128X:$src)>;
3830  def : Pat<(alignedstore (v16i8 VR128X:$src), addr:$dst),
3831            (VMOVDQA64Z128mr addr:$dst, VR128X:$src)>;
3832  def : Pat<(store (v4i32 VR128X:$src), addr:$dst),
3833            (VMOVDQU64Z128mr addr:$dst, VR128X:$src)>;
3834  def : Pat<(store (v8i16 VR128X:$src), addr:$dst),
3835            (VMOVDQU64Z128mr addr:$dst, VR128X:$src)>;
3836  def : Pat<(store (v8f16 VR128X:$src), addr:$dst),
3837            (VMOVUPSZ128mr addr:$dst, VR128X:$src)>;
3838  def : Pat<(store (v16i8 VR128X:$src), addr:$dst),
3839            (VMOVDQU64Z128mr addr:$dst, VR128X:$src)>;
3840
3841  // 256-bit load.
3842  def : Pat<(alignedloadv8i32 addr:$src),
3843            (VMOVDQA64Z256rm addr:$src)>;
3844  def : Pat<(alignedloadv16i16 addr:$src),
3845            (VMOVDQA64Z256rm addr:$src)>;
3846  def : Pat<(alignedloadv16f16 addr:$src),
3847            (VMOVAPSZ256rm addr:$src)>;
3848  def : Pat<(alignedloadv32i8 addr:$src),
3849            (VMOVDQA64Z256rm addr:$src)>;
3850  def : Pat<(loadv8i32 addr:$src),
3851            (VMOVDQU64Z256rm addr:$src)>;
3852  def : Pat<(loadv16i16 addr:$src),
3853            (VMOVDQU64Z256rm addr:$src)>;
3854  def : Pat<(loadv16f16 addr:$src),
3855            (VMOVUPSZ256rm addr:$src)>;
3856  def : Pat<(loadv32i8 addr:$src),
3857            (VMOVDQU64Z256rm addr:$src)>;
3858
3859  // 256-bit store.
3860  def : Pat<(alignedstore (v8i32 VR256X:$src), addr:$dst),
3861            (VMOVDQA64Z256mr addr:$dst, VR256X:$src)>;
3862  def : Pat<(alignedstore (v16i16 VR256X:$src), addr:$dst),
3863            (VMOVDQA64Z256mr addr:$dst, VR256X:$src)>;
3864  def : Pat<(alignedstore (v16f16 VR256X:$src), addr:$dst),
3865            (VMOVAPSZ256mr addr:$dst, VR256X:$src)>;
3866  def : Pat<(alignedstore (v32i8 VR256X:$src), addr:$dst),
3867            (VMOVDQA64Z256mr addr:$dst, VR256X:$src)>;
3868  def : Pat<(store (v8i32 VR256X:$src), addr:$dst),
3869            (VMOVDQU64Z256mr addr:$dst, VR256X:$src)>;
3870  def : Pat<(store (v16i16 VR256X:$src), addr:$dst),
3871            (VMOVDQU64Z256mr addr:$dst, VR256X:$src)>;
3872  def : Pat<(store (v16f16 VR256X:$src), addr:$dst),
3873            (VMOVUPSZ256mr addr:$dst, VR256X:$src)>;
3874  def : Pat<(store (v32i8 VR256X:$src), addr:$dst),
3875            (VMOVDQU64Z256mr addr:$dst, VR256X:$src)>;
3876}
3877let Predicates = [HasBWI] in {
3878  def : Pat<(v32f16 (vselect VK32WM:$mask, (v32f16 VR512:$src1), (v32f16 VR512:$src0))),
3879            (VMOVDQU16Zrrk VR512:$src0, VK32WM:$mask, VR512:$src1)>;
3880  def : Pat<(v32f16 (vselect VK32WM:$mask, (v32f16 VR512:$src1), v32f16_info.ImmAllZerosV)),
3881            (VMOVDQU16Zrrkz VK32WM:$mask, VR512:$src1)>;
3882  def : Pat<(v32f16 (vselect VK32WM:$mask,
3883                     (v32f16 (alignedloadv32f16 addr:$src)), (v32f16 VR512:$src0))),
3884            (VMOVDQU16Zrmk VR512:$src0, VK32WM:$mask, addr:$src)>;
3885  def : Pat<(v32f16 (vselect VK32WM:$mask,
3886                     (v32f16 (alignedloadv32f16 addr:$src)), v32f16_info.ImmAllZerosV)),
3887            (VMOVDQU16Zrmkz VK32WM:$mask, addr:$src)>;
3888  def : Pat<(v32f16 (vselect VK32WM:$mask,
3889                     (v32f16 (loadv32f16 addr:$src)), (v32f16 VR512:$src0))),
3890            (VMOVDQU16Zrmk VR512:$src0, VK32WM:$mask, addr:$src)>;
3891  def : Pat<(v32f16 (vselect VK32WM:$mask,
3892                     (v32f16 (loadv32f16 addr:$src)), v32f16_info.ImmAllZerosV)),
3893            (VMOVDQU16Zrmkz VK32WM:$mask, addr:$src)>;
3894  def : Pat<(v32f16 (masked_load addr:$src, VK32WM:$mask, (v32f16 VR512:$src0))),
3895            (VMOVDQU16Zrmk VR512:$src0, VK32WM:$mask, addr:$src)>;
3896  def : Pat<(v32f16 (masked_load addr:$src, VK32WM:$mask, undef)),
3897            (VMOVDQU16Zrmkz VK32WM:$mask, addr:$src)>;
3898  def : Pat<(v32f16 (masked_load addr:$src, VK32WM:$mask, v32f16_info.ImmAllZerosV)),
3899            (VMOVDQU16Zrmkz VK32WM:$mask, addr:$src)>;
3900
3901  def : Pat<(masked_store (v32f16 VR512:$src), addr:$dst, VK32WM:$mask),
3902            (VMOVDQU16Zmrk addr:$dst, VK32WM:$mask, VR512:$src)>;
3903}
3904let Predicates = [HasBWI, HasVLX] in {
3905  def : Pat<(v16f16 (vselect VK16WM:$mask, (v16f16 VR256X:$src1), (v16f16 VR256X:$src0))),
3906            (VMOVDQU16Z256rrk VR256X:$src0, VK16WM:$mask, VR256X:$src1)>;
3907  def : Pat<(v16f16 (vselect VK16WM:$mask, (v16f16 VR256X:$src1), v16f16x_info.ImmAllZerosV)),
3908            (VMOVDQU16Z256rrkz VK16WM:$mask, VR256X:$src1)>;
3909  def : Pat<(v16f16 (vselect VK16WM:$mask,
3910                     (v16f16 (alignedloadv16f16 addr:$src)), (v16f16 VR256X:$src0))),
3911            (VMOVDQU16Z256rmk VR256X:$src0, VK16WM:$mask, addr:$src)>;
3912  def : Pat<(v16f16 (vselect VK16WM:$mask,
3913                     (v16f16 (alignedloadv16f16 addr:$src)), v16f16x_info.ImmAllZerosV)),
3914            (VMOVDQU16Z256rmkz VK16WM:$mask, addr:$src)>;
3915  def : Pat<(v16f16 (vselect VK16WM:$mask,
3916                     (v16f16 (loadv16f16 addr:$src)), (v16f16 VR256X:$src0))),
3917            (VMOVDQU16Z256rmk VR256X:$src0, VK16WM:$mask, addr:$src)>;
3918  def : Pat<(v16f16 (vselect VK16WM:$mask,
3919                     (v16f16 (loadv16f16 addr:$src)), v16f16x_info.ImmAllZerosV)),
3920            (VMOVDQU16Z256rmkz VK16WM:$mask, addr:$src)>;
3921  def : Pat<(v16f16 (masked_load addr:$src, VK16WM:$mask, (v16f16 VR256X:$src0))),
3922            (VMOVDQU16Z256rmk VR256X:$src0, VK16WM:$mask, addr:$src)>;
3923  def : Pat<(v16f16 (masked_load addr:$src, VK16WM:$mask, undef)),
3924            (VMOVDQU16Z256rmkz VK16WM:$mask, addr:$src)>;
3925  def : Pat<(v16f16 (masked_load addr:$src, VK16WM:$mask, v16f16x_info.ImmAllZerosV)),
3926            (VMOVDQU16Z256rmkz VK16WM:$mask, addr:$src)>;
3927
3928  def : Pat<(masked_store (v16f16 VR256X:$src), addr:$dst, VK16WM:$mask),
3929            (VMOVDQU16Z256mrk addr:$dst, VK16WM:$mask, VR256X:$src)>;
3930
3931  def : Pat<(v8f16 (vselect VK8WM:$mask, (v8f16 VR128X:$src1), (v8f16 VR128X:$src0))),
3932            (VMOVDQU16Z128rrk VR128X:$src0, VK8WM:$mask, VR128X:$src1)>;
3933  def : Pat<(v8f16 (vselect VK8WM:$mask, (v8f16 VR128X:$src1), v8f16x_info.ImmAllZerosV)),
3934            (VMOVDQU16Z128rrkz VK8WM:$mask, VR128X:$src1)>;
3935  def : Pat<(v8f16 (vselect VK8WM:$mask,
3936                     (v8f16 (alignedloadv8f16 addr:$src)), (v8f16 VR128X:$src0))),
3937            (VMOVDQU16Z128rmk VR128X:$src0, VK8WM:$mask, addr:$src)>;
3938  def : Pat<(v8f16 (vselect VK8WM:$mask,
3939                     (v8f16 (alignedloadv8f16 addr:$src)), v8f16x_info.ImmAllZerosV)),
3940            (VMOVDQU16Z128rmkz VK8WM:$mask, addr:$src)>;
3941  def : Pat<(v8f16 (vselect VK8WM:$mask,
3942                     (v8f16 (loadv8f16 addr:$src)), (v8f16 VR128X:$src0))),
3943            (VMOVDQU16Z128rmk VR128X:$src0, VK8WM:$mask, addr:$src)>;
3944  def : Pat<(v8f16 (vselect VK8WM:$mask,
3945                     (v8f16 (loadv8f16 addr:$src)), v8f16x_info.ImmAllZerosV)),
3946            (VMOVDQU16Z128rmkz VK8WM:$mask, addr:$src)>;
3947  def : Pat<(v8f16 (masked_load addr:$src, VK8WM:$mask, (v8f16 VR128X:$src0))),
3948            (VMOVDQU16Z128rmk VR128X:$src0, VK8WM:$mask, addr:$src)>;
3949  def : Pat<(v8f16 (masked_load addr:$src, VK8WM:$mask, undef)),
3950            (VMOVDQU16Z128rmkz VK8WM:$mask, addr:$src)>;
3951  def : Pat<(v8f16 (masked_load addr:$src, VK8WM:$mask, v8f16x_info.ImmAllZerosV)),
3952            (VMOVDQU16Z128rmkz VK8WM:$mask, addr:$src)>;
3953
3954  def : Pat<(masked_store (v8f16 VR128X:$src), addr:$dst, VK8WM:$mask),
3955            (VMOVDQU16Z128mrk addr:$dst, VK8WM:$mask, VR128X:$src)>;
3956}
3957
3958// Move Int Doubleword to Packed Double Int
3959//
3960let ExeDomain = SSEPackedInt in {
3961def VMOVDI2PDIZrr : AVX512BI<0x6E, MRMSrcReg, (outs VR128X:$dst), (ins GR32:$src),
3962                      "vmovd\t{$src, $dst|$dst, $src}",
3963                      [(set VR128X:$dst,
3964                        (v4i32 (scalar_to_vector GR32:$src)))]>,
3965                        EVEX, Sched<[WriteVecMoveFromGpr]>;
3966def VMOVDI2PDIZrm : AVX512BI<0x6E, MRMSrcMem, (outs VR128X:$dst), (ins i32mem:$src),
3967                      "vmovd\t{$src, $dst|$dst, $src}",
3968                      [(set VR128X:$dst,
3969                        (v4i32 (scalar_to_vector (loadi32 addr:$src))))]>,
3970                      EVEX, EVEX_CD8<32, CD8VT1>, Sched<[WriteVecLoad]>;
3971def VMOV64toPQIZrr : AVX512BI<0x6E, MRMSrcReg, (outs VR128X:$dst), (ins GR64:$src),
3972                      "vmovq\t{$src, $dst|$dst, $src}",
3973                        [(set VR128X:$dst,
3974                          (v2i64 (scalar_to_vector GR64:$src)))]>,
3975                      EVEX, VEX_W, Sched<[WriteVecMoveFromGpr]>;
3976let isCodeGenOnly = 1, ForceDisassemble = 1, hasSideEffects = 0, mayLoad = 1 in
3977def VMOV64toPQIZrm : AVX512BI<0x6E, MRMSrcMem, (outs VR128X:$dst),
3978                      (ins i64mem:$src),
3979                      "vmovq\t{$src, $dst|$dst, $src}", []>,
3980                      EVEX, VEX_W, EVEX_CD8<64, CD8VT1>, Sched<[WriteVecLoad]>;
3981let isCodeGenOnly = 1 in {
3982def VMOV64toSDZrr : AVX512BI<0x6E, MRMSrcReg, (outs FR64X:$dst), (ins GR64:$src),
3983                       "vmovq\t{$src, $dst|$dst, $src}",
3984                       [(set FR64X:$dst, (bitconvert GR64:$src))]>,
3985                       EVEX, VEX_W, Sched<[WriteVecMoveFromGpr]>;
3986def VMOVSDto64Zrr : AVX512BI<0x7E, MRMDestReg, (outs GR64:$dst), (ins FR64X:$src),
3987                         "vmovq\t{$src, $dst|$dst, $src}",
3988                         [(set GR64:$dst, (bitconvert FR64X:$src))]>,
3989                         EVEX, VEX_W, Sched<[WriteVecMoveFromGpr]>;
3990}
3991} // ExeDomain = SSEPackedInt
3992
3993// Move Int Doubleword to Single Scalar
3994//
3995let ExeDomain = SSEPackedInt, isCodeGenOnly = 1 in {
3996def VMOVDI2SSZrr  : AVX512BI<0x6E, MRMSrcReg, (outs FR32X:$dst), (ins GR32:$src),
3997                      "vmovd\t{$src, $dst|$dst, $src}",
3998                      [(set FR32X:$dst, (bitconvert GR32:$src))]>,
3999                      EVEX, Sched<[WriteVecMoveFromGpr]>;
4000} // ExeDomain = SSEPackedInt, isCodeGenOnly = 1
4001
4002// Move doubleword from xmm register to r/m32
4003//
4004let ExeDomain = SSEPackedInt in {
4005def VMOVPDI2DIZrr  : AVX512BI<0x7E, MRMDestReg, (outs GR32:$dst), (ins VR128X:$src),
4006                       "vmovd\t{$src, $dst|$dst, $src}",
4007                       [(set GR32:$dst, (extractelt (v4i32 VR128X:$src),
4008                                        (iPTR 0)))]>,
4009                       EVEX, Sched<[WriteVecMoveToGpr]>;
4010def VMOVPDI2DIZmr  : AVX512BI<0x7E, MRMDestMem, (outs),
4011                       (ins i32mem:$dst, VR128X:$src),
4012                       "vmovd\t{$src, $dst|$dst, $src}",
4013                       [(store (i32 (extractelt (v4i32 VR128X:$src),
4014                                     (iPTR 0))), addr:$dst)]>,
4015                       EVEX, EVEX_CD8<32, CD8VT1>, Sched<[WriteVecStore]>;
4016} // ExeDomain = SSEPackedInt
4017
4018// Move quadword from xmm1 register to r/m64
4019//
4020let ExeDomain = SSEPackedInt in {
4021def VMOVPQIto64Zrr : I<0x7E, MRMDestReg, (outs GR64:$dst), (ins VR128X:$src),
4022                      "vmovq\t{$src, $dst|$dst, $src}",
4023                      [(set GR64:$dst, (extractelt (v2i64 VR128X:$src),
4024                                                   (iPTR 0)))]>,
4025                      PD, EVEX, VEX_W, Sched<[WriteVecMoveToGpr]>,
4026                      Requires<[HasAVX512]>;
4027
4028let isCodeGenOnly = 1, ForceDisassemble = 1, hasSideEffects = 0, mayStore = 1 in
4029def VMOVPQIto64Zmr : I<0x7E, MRMDestMem, (outs), (ins i64mem:$dst, VR128X:$src),
4030                      "vmovq\t{$src, $dst|$dst, $src}", []>, PD,
4031                      EVEX, VEX_W, Sched<[WriteVecStore]>,
4032                      Requires<[HasAVX512, In64BitMode]>;
4033
4034def VMOVPQI2QIZmr : I<0xD6, MRMDestMem, (outs),
4035                      (ins i64mem:$dst, VR128X:$src),
4036                      "vmovq\t{$src, $dst|$dst, $src}",
4037                      [(store (extractelt (v2i64 VR128X:$src), (iPTR 0)),
4038                              addr:$dst)]>,
4039                      EVEX, PD, VEX_W, EVEX_CD8<64, CD8VT1>,
4040                      Sched<[WriteVecStore]>, Requires<[HasAVX512]>;
4041
4042let hasSideEffects = 0, isCodeGenOnly = 1, ForceDisassemble = 1 in
4043def VMOVPQI2QIZrr : AVX512BI<0xD6, MRMDestReg, (outs VR128X:$dst),
4044                             (ins VR128X:$src),
4045                             "vmovq\t{$src, $dst|$dst, $src}", []>,
4046                             EVEX, VEX_W, Sched<[SchedWriteVecLogic.XMM]>;
4047} // ExeDomain = SSEPackedInt
4048
4049def : InstAlias<"vmovq.s\t{$src, $dst|$dst, $src}",
4050                (VMOVPQI2QIZrr VR128X:$dst, VR128X:$src), 0>;
4051
4052let Predicates = [HasAVX512] in {
4053  def : Pat<(X86vextractstore64 (v2i64 VR128X:$src), addr:$dst),
4054            (VMOVPQI2QIZmr addr:$dst, VR128X:$src)>;
4055}
4056
4057// Move Scalar Single to Double Int
4058//
4059let ExeDomain = SSEPackedInt, isCodeGenOnly = 1 in {
4060def VMOVSS2DIZrr  : AVX512BI<0x7E, MRMDestReg, (outs GR32:$dst),
4061                      (ins FR32X:$src),
4062                      "vmovd\t{$src, $dst|$dst, $src}",
4063                      [(set GR32:$dst, (bitconvert FR32X:$src))]>,
4064                      EVEX, Sched<[WriteVecMoveToGpr]>;
4065} // ExeDomain = SSEPackedInt, isCodeGenOnly = 1
4066
4067// Move Quadword Int to Packed Quadword Int
4068//
4069let ExeDomain = SSEPackedInt in {
4070def VMOVQI2PQIZrm : AVX512XSI<0x7E, MRMSrcMem, (outs VR128X:$dst),
4071                      (ins i64mem:$src),
4072                      "vmovq\t{$src, $dst|$dst, $src}",
4073                      [(set VR128X:$dst,
4074                        (v2i64 (scalar_to_vector (loadi64 addr:$src))))]>,
4075                      EVEX, VEX_W, EVEX_CD8<8, CD8VT8>, Sched<[WriteVecLoad]>;
4076} // ExeDomain = SSEPackedInt
4077
4078// Allow "vmovd" but print "vmovq".
4079def : InstAlias<"vmovd\t{$src, $dst|$dst, $src}",
4080                (VMOV64toPQIZrr VR128X:$dst, GR64:$src), 0>;
4081def : InstAlias<"vmovd\t{$src, $dst|$dst, $src}",
4082                (VMOVPQIto64Zrr GR64:$dst, VR128X:$src), 0>;
4083
4084// Conversions between masks and scalar fp.
4085def : Pat<(v32i1 (bitconvert FR32X:$src)),
4086          (KMOVDkr (VMOVSS2DIZrr FR32X:$src))>;
4087def : Pat<(f32 (bitconvert VK32:$src)),
4088          (VMOVDI2SSZrr (KMOVDrk VK32:$src))>;
4089
4090def : Pat<(v64i1 (bitconvert FR64X:$src)),
4091          (KMOVQkr (VMOVSDto64Zrr FR64X:$src))>;
4092def : Pat<(f64 (bitconvert VK64:$src)),
4093          (VMOV64toSDZrr (KMOVQrk VK64:$src))>;
4094
4095//===----------------------------------------------------------------------===//
4096// AVX-512  MOVSH, MOVSS, MOVSD
4097//===----------------------------------------------------------------------===//
4098
4099multiclass avx512_move_scalar<string asm, SDNode OpNode, PatFrag vzload_frag,
4100                              X86VectorVTInfo _, Predicate prd = HasAVX512> {
4101  let Predicates = !if (!eq (prd, HasFP16), [HasFP16], [prd, OptForSize]) in
4102  def rr : AVX512PI<0x10, MRMSrcReg, (outs _.RC:$dst),
4103             (ins _.RC:$src1, _.RC:$src2),
4104             !strconcat(asm, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
4105             [(set _.RC:$dst, (_.VT (OpNode _.RC:$src1, _.RC:$src2)))],
4106             _.ExeDomain>, EVEX_4V, Sched<[SchedWriteFShuffle.XMM]>;
4107  let Predicates = [prd] in {
4108  def rrkz : AVX512PI<0x10, MRMSrcReg, (outs _.RC:$dst),
4109              (ins _.KRCWM:$mask, _.RC:$src1, _.RC:$src2),
4110              !strconcat(asm, "\t{$src2, $src1, $dst {${mask}} {z}|",
4111              "$dst {${mask}} {z}, $src1, $src2}"),
4112              [(set _.RC:$dst, (_.VT (X86selects _.KRCWM:$mask,
4113                                      (_.VT (OpNode _.RC:$src1, _.RC:$src2)),
4114                                      _.ImmAllZerosV)))],
4115              _.ExeDomain>, EVEX_4V, EVEX_KZ, Sched<[SchedWriteFShuffle.XMM]>;
4116  let Constraints = "$src0 = $dst"  in
4117  def rrk : AVX512PI<0x10, MRMSrcReg, (outs _.RC:$dst),
4118             (ins _.RC:$src0, _.KRCWM:$mask, _.RC:$src1, _.RC:$src2),
4119             !strconcat(asm, "\t{$src2, $src1, $dst {${mask}}|",
4120             "$dst {${mask}}, $src1, $src2}"),
4121             [(set _.RC:$dst, (_.VT (X86selects _.KRCWM:$mask,
4122                                     (_.VT (OpNode _.RC:$src1, _.RC:$src2)),
4123                                     (_.VT _.RC:$src0))))],
4124             _.ExeDomain>, EVEX_4V, EVEX_K, Sched<[SchedWriteFShuffle.XMM]>;
4125  let canFoldAsLoad = 1, isReMaterializable = 1 in {
4126  def rm : AVX512PI<0x10, MRMSrcMem, (outs _.RC:$dst), (ins _.ScalarMemOp:$src),
4127             !strconcat(asm, "\t{$src, $dst|$dst, $src}"),
4128             [(set _.RC:$dst, (_.VT (vzload_frag addr:$src)))],
4129             _.ExeDomain>, EVEX, Sched<[WriteFLoad]>;
4130  // _alt version uses FR32/FR64 register class.
4131  let isCodeGenOnly = 1 in
4132  def rm_alt : AVX512PI<0x10, MRMSrcMem, (outs _.FRC:$dst), (ins _.ScalarMemOp:$src),
4133                 !strconcat(asm, "\t{$src, $dst|$dst, $src}"),
4134                 [(set _.FRC:$dst, (_.ScalarLdFrag addr:$src))],
4135                 _.ExeDomain>, EVEX, Sched<[WriteFLoad]>;
4136  }
4137  let mayLoad = 1, hasSideEffects = 0 in {
4138    let Constraints = "$src0 = $dst" in
4139    def rmk : AVX512PI<0x10, MRMSrcMem, (outs _.RC:$dst),
4140               (ins _.RC:$src0, _.KRCWM:$mask, _.ScalarMemOp:$src),
4141               !strconcat(asm, "\t{$src, $dst {${mask}}|",
4142               "$dst {${mask}}, $src}"),
4143               [], _.ExeDomain>, EVEX, EVEX_K, Sched<[WriteFLoad]>;
4144    def rmkz : AVX512PI<0x10, MRMSrcMem, (outs _.RC:$dst),
4145               (ins _.KRCWM:$mask, _.ScalarMemOp:$src),
4146               !strconcat(asm, "\t{$src, $dst {${mask}} {z}|",
4147               "$dst {${mask}} {z}, $src}"),
4148               [], _.ExeDomain>, EVEX, EVEX_KZ, Sched<[WriteFLoad]>;
4149  }
4150  def mr: AVX512PI<0x11, MRMDestMem, (outs), (ins _.ScalarMemOp:$dst, _.FRC:$src),
4151             !strconcat(asm, "\t{$src, $dst|$dst, $src}"),
4152             [(store _.FRC:$src, addr:$dst)],  _.ExeDomain>,
4153             EVEX, Sched<[WriteFStore]>;
4154  let mayStore = 1, hasSideEffects = 0 in
4155  def mrk: AVX512PI<0x11, MRMDestMem, (outs),
4156              (ins _.ScalarMemOp:$dst, VK1WM:$mask, _.RC:$src),
4157              !strconcat(asm, "\t{$src, $dst {${mask}}|$dst {${mask}}, $src}"),
4158              [], _.ExeDomain>, EVEX, EVEX_K, Sched<[WriteFStore]>,
4159              NotMemoryFoldable;
4160  }
4161}
4162
4163defm VMOVSSZ : avx512_move_scalar<"vmovss", X86Movss, X86vzload32, f32x_info>,
4164                                  VEX_LIG, XS, EVEX_CD8<32, CD8VT1>;
4165
4166defm VMOVSDZ : avx512_move_scalar<"vmovsd", X86Movsd, X86vzload64, f64x_info>,
4167                                  VEX_LIG, XD, VEX_W, EVEX_CD8<64, CD8VT1>;
4168
4169defm VMOVSHZ : avx512_move_scalar<"vmovsh", X86Movsh, X86vzload16, f16x_info,
4170                                  HasFP16>,
4171                                  VEX_LIG, T_MAP5XS, EVEX_CD8<16, CD8VT1>;
4172
4173multiclass avx512_move_scalar_lowering<string InstrStr, SDNode OpNode,
4174                                       PatLeaf ZeroFP, X86VectorVTInfo _> {
4175
4176def : Pat<(_.VT (OpNode _.RC:$src0,
4177                        (_.VT (scalar_to_vector
4178                                  (_.EltVT (X86selects VK1WM:$mask,
4179                                                       (_.EltVT _.FRC:$src1),
4180                                                       (_.EltVT _.FRC:$src2))))))),
4181          (!cast<Instruction>(InstrStr#rrk)
4182                        (_.VT (COPY_TO_REGCLASS _.FRC:$src2, _.RC)),
4183                        VK1WM:$mask,
4184                        (_.VT _.RC:$src0),
4185                        (_.VT (COPY_TO_REGCLASS _.FRC:$src1, _.RC)))>;
4186
4187def : Pat<(_.VT (OpNode _.RC:$src0,
4188                        (_.VT (scalar_to_vector
4189                                  (_.EltVT (X86selects VK1WM:$mask,
4190                                                       (_.EltVT _.FRC:$src1),
4191                                                       (_.EltVT ZeroFP))))))),
4192          (!cast<Instruction>(InstrStr#rrkz)
4193                        VK1WM:$mask,
4194                        (_.VT _.RC:$src0),
4195                        (_.VT (COPY_TO_REGCLASS _.FRC:$src1, _.RC)))>;
4196}
4197
4198multiclass avx512_store_scalar_lowering<string InstrStr, AVX512VLVectorVTInfo _,
4199                                        dag Mask, RegisterClass MaskRC> {
4200
4201def : Pat<(masked_store
4202             (_.info512.VT (insert_subvector undef,
4203                               (_.info128.VT _.info128.RC:$src),
4204                               (iPTR 0))), addr:$dst, Mask),
4205          (!cast<Instruction>(InstrStr#mrk) addr:$dst,
4206                      (COPY_TO_REGCLASS MaskRC:$mask, VK1WM),
4207                      _.info128.RC:$src)>;
4208
4209}
4210
4211multiclass avx512_store_scalar_lowering_subreg<string InstrStr,
4212                                               AVX512VLVectorVTInfo _,
4213                                               dag Mask, RegisterClass MaskRC,
4214                                               SubRegIndex subreg> {
4215
4216def : Pat<(masked_store
4217             (_.info512.VT (insert_subvector undef,
4218                               (_.info128.VT _.info128.RC:$src),
4219                               (iPTR 0))), addr:$dst, Mask),
4220          (!cast<Instruction>(InstrStr#mrk) addr:$dst,
4221                      (COPY_TO_REGCLASS (i32 (INSERT_SUBREG (IMPLICIT_DEF), MaskRC:$mask, subreg)), VK1WM),
4222                      _.info128.RC:$src)>;
4223
4224}
4225
4226// This matches the more recent codegen from clang that avoids emitting a 512
4227// bit masked store directly. Codegen will widen 128-bit masked store to 512
4228// bits on AVX512F only targets.
4229multiclass avx512_store_scalar_lowering_subreg2<string InstrStr,
4230                                               AVX512VLVectorVTInfo _,
4231                                               dag Mask512, dag Mask128,
4232                                               RegisterClass MaskRC,
4233                                               SubRegIndex subreg> {
4234
4235// AVX512F pattern.
4236def : Pat<(masked_store
4237             (_.info512.VT (insert_subvector undef,
4238                               (_.info128.VT _.info128.RC:$src),
4239                               (iPTR 0))), addr:$dst, Mask512),
4240          (!cast<Instruction>(InstrStr#mrk) addr:$dst,
4241                      (COPY_TO_REGCLASS (i32 (INSERT_SUBREG (IMPLICIT_DEF), MaskRC:$mask, subreg)), VK1WM),
4242                      _.info128.RC:$src)>;
4243
4244// AVX512VL pattern.
4245def : Pat<(masked_store (_.info128.VT _.info128.RC:$src), addr:$dst, Mask128),
4246          (!cast<Instruction>(InstrStr#mrk) addr:$dst,
4247                      (COPY_TO_REGCLASS (i32 (INSERT_SUBREG (IMPLICIT_DEF), MaskRC:$mask, subreg)), VK1WM),
4248                      _.info128.RC:$src)>;
4249}
4250
4251multiclass avx512_load_scalar_lowering<string InstrStr, AVX512VLVectorVTInfo _,
4252                                       dag Mask, RegisterClass MaskRC> {
4253
4254def : Pat<(_.info128.VT (extract_subvector
4255                         (_.info512.VT (masked_load addr:$srcAddr, Mask,
4256                                        _.info512.ImmAllZerosV)),
4257                           (iPTR 0))),
4258          (!cast<Instruction>(InstrStr#rmkz)
4259                      (COPY_TO_REGCLASS MaskRC:$mask, VK1WM),
4260                      addr:$srcAddr)>;
4261
4262def : Pat<(_.info128.VT (extract_subvector
4263                (_.info512.VT (masked_load addr:$srcAddr, Mask,
4264                      (_.info512.VT (insert_subvector undef,
4265                            (_.info128.VT (X86vzmovl _.info128.RC:$src)),
4266                            (iPTR 0))))),
4267                (iPTR 0))),
4268          (!cast<Instruction>(InstrStr#rmk) _.info128.RC:$src,
4269                      (COPY_TO_REGCLASS MaskRC:$mask, VK1WM),
4270                      addr:$srcAddr)>;
4271
4272}
4273
4274multiclass avx512_load_scalar_lowering_subreg<string InstrStr,
4275                                              AVX512VLVectorVTInfo _,
4276                                              dag Mask, RegisterClass MaskRC,
4277                                              SubRegIndex subreg> {
4278
4279def : Pat<(_.info128.VT (extract_subvector
4280                         (_.info512.VT (masked_load addr:$srcAddr, Mask,
4281                                        _.info512.ImmAllZerosV)),
4282                           (iPTR 0))),
4283          (!cast<Instruction>(InstrStr#rmkz)
4284                      (COPY_TO_REGCLASS (i32 (INSERT_SUBREG (IMPLICIT_DEF), MaskRC:$mask, subreg)), VK1WM),
4285                      addr:$srcAddr)>;
4286
4287def : Pat<(_.info128.VT (extract_subvector
4288                (_.info512.VT (masked_load addr:$srcAddr, Mask,
4289                      (_.info512.VT (insert_subvector undef,
4290                            (_.info128.VT (X86vzmovl _.info128.RC:$src)),
4291                            (iPTR 0))))),
4292                (iPTR 0))),
4293          (!cast<Instruction>(InstrStr#rmk) _.info128.RC:$src,
4294                      (COPY_TO_REGCLASS (i32 (INSERT_SUBREG (IMPLICIT_DEF), MaskRC:$mask, subreg)), VK1WM),
4295                      addr:$srcAddr)>;
4296
4297}
4298
4299// This matches the more recent codegen from clang that avoids emitting a 512
4300// bit masked load directly. Codegen will widen 128-bit masked load to 512
4301// bits on AVX512F only targets.
4302multiclass avx512_load_scalar_lowering_subreg2<string InstrStr,
4303                                              AVX512VLVectorVTInfo _,
4304                                              dag Mask512, dag Mask128,
4305                                              RegisterClass MaskRC,
4306                                              SubRegIndex subreg> {
4307// AVX512F patterns.
4308def : Pat<(_.info128.VT (extract_subvector
4309                         (_.info512.VT (masked_load addr:$srcAddr, Mask512,
4310                                        _.info512.ImmAllZerosV)),
4311                           (iPTR 0))),
4312          (!cast<Instruction>(InstrStr#rmkz)
4313                      (COPY_TO_REGCLASS (i32 (INSERT_SUBREG (IMPLICIT_DEF), MaskRC:$mask, subreg)), VK1WM),
4314                      addr:$srcAddr)>;
4315
4316def : Pat<(_.info128.VT (extract_subvector
4317                (_.info512.VT (masked_load addr:$srcAddr, Mask512,
4318                      (_.info512.VT (insert_subvector undef,
4319                            (_.info128.VT (X86vzmovl _.info128.RC:$src)),
4320                            (iPTR 0))))),
4321                (iPTR 0))),
4322          (!cast<Instruction>(InstrStr#rmk) _.info128.RC:$src,
4323                      (COPY_TO_REGCLASS (i32 (INSERT_SUBREG (IMPLICIT_DEF), MaskRC:$mask, subreg)), VK1WM),
4324                      addr:$srcAddr)>;
4325
4326// AVX512Vl patterns.
4327def : Pat<(_.info128.VT (masked_load addr:$srcAddr, Mask128,
4328                         _.info128.ImmAllZerosV)),
4329          (!cast<Instruction>(InstrStr#rmkz)
4330                      (COPY_TO_REGCLASS (i32 (INSERT_SUBREG (IMPLICIT_DEF), MaskRC:$mask, subreg)), VK1WM),
4331                      addr:$srcAddr)>;
4332
4333def : Pat<(_.info128.VT (masked_load addr:$srcAddr, Mask128,
4334                         (_.info128.VT (X86vzmovl _.info128.RC:$src)))),
4335          (!cast<Instruction>(InstrStr#rmk) _.info128.RC:$src,
4336                      (COPY_TO_REGCLASS (i32 (INSERT_SUBREG (IMPLICIT_DEF), MaskRC:$mask, subreg)), VK1WM),
4337                      addr:$srcAddr)>;
4338}
4339
4340defm : avx512_move_scalar_lowering<"VMOVSSZ", X86Movss, fp32imm0, v4f32x_info>;
4341defm : avx512_move_scalar_lowering<"VMOVSDZ", X86Movsd, fp64imm0, v2f64x_info>;
4342
4343defm : avx512_store_scalar_lowering<"VMOVSSZ", avx512vl_f32_info,
4344                   (v16i1 (bitconvert (i16 (trunc (and GR32:$mask, (i32 1)))))), GR32>;
4345defm : avx512_store_scalar_lowering_subreg<"VMOVSSZ", avx512vl_f32_info,
4346                   (v16i1 (bitconvert (i16 (and GR16:$mask, (i16 1))))), GR16, sub_16bit>;
4347defm : avx512_store_scalar_lowering_subreg<"VMOVSDZ", avx512vl_f64_info,
4348                   (v8i1 (bitconvert (i8 (and GR8:$mask, (i8 1))))), GR8, sub_8bit>;
4349
4350let Predicates = [HasFP16] in {
4351defm : avx512_move_scalar_lowering<"VMOVSHZ", X86Movsh, fp16imm0, v8f16x_info>;
4352defm : avx512_store_scalar_lowering<"VMOVSHZ", avx512vl_f16_info,
4353                   (v32i1 (bitconvert (and GR32:$mask, (i32 1)))), GR32>;
4354defm : avx512_store_scalar_lowering_subreg<"VMOVSHZ", avx512vl_f16_info,
4355                   (v32i1 (bitconvert (and GR32:$mask, (i32 1)))), GR32, sub_32bit>;
4356defm : avx512_store_scalar_lowering_subreg2<"VMOVSHZ", avx512vl_f16_info,
4357                   (v32i1 (insert_subvector
4358                           (v32i1 immAllZerosV),
4359                           (v8i1 (bitconvert (and GR8:$mask, (i8 1)))),
4360                           (iPTR 0))),
4361                   (v8i1 (bitconvert (and GR8:$mask, (i8 1)))),
4362                   GR8, sub_8bit>;
4363
4364defm : avx512_load_scalar_lowering<"VMOVSHZ", avx512vl_f16_info,
4365                   (v32i1 (bitconvert (and GR32:$mask, (i32 1)))), GR32>;
4366defm : avx512_load_scalar_lowering_subreg<"VMOVSHZ", avx512vl_f16_info,
4367                   (v32i1 (bitconvert (and GR32:$mask, (i32 1)))), GR32, sub_32bit>;
4368defm : avx512_load_scalar_lowering_subreg2<"VMOVSHZ", avx512vl_f16_info,
4369                   (v32i1 (insert_subvector
4370                           (v32i1 immAllZerosV),
4371                           (v8i1 (bitconvert (and GR8:$mask, (i8 1)))),
4372                           (iPTR 0))),
4373                   (v8i1 (bitconvert (and GR8:$mask, (i8 1)))),
4374                   GR8, sub_8bit>;
4375
4376def : Pat<(f16 (X86selects VK1WM:$mask, (f16 FR16X:$src1), (f16 FR16X:$src2))),
4377          (COPY_TO_REGCLASS (v8f16 (VMOVSHZrrk
4378           (v8f16 (COPY_TO_REGCLASS FR16X:$src2, VR128X)),
4379           VK1WM:$mask, (v8f16 (IMPLICIT_DEF)),
4380           (v8f16 (COPY_TO_REGCLASS FR16X:$src1, VR128X)))), FR16X)>;
4381
4382def : Pat<(f16 (X86selects VK1WM:$mask, (f16 FR16X:$src1), fp16imm0)),
4383          (COPY_TO_REGCLASS (v8f16 (VMOVSHZrrkz VK1WM:$mask, (v8f16 (IMPLICIT_DEF)),
4384           (v8f16 (COPY_TO_REGCLASS FR16X:$src1, VR128X)))), FR16X)>;
4385}
4386
4387defm : avx512_store_scalar_lowering_subreg2<"VMOVSSZ", avx512vl_f32_info,
4388                   (v16i1 (insert_subvector
4389                           (v16i1 immAllZerosV),
4390                           (v4i1 (extract_subvector
4391                                  (v8i1 (bitconvert (and GR8:$mask, (i8 1)))),
4392                                  (iPTR 0))),
4393                           (iPTR 0))),
4394                   (v4i1 (extract_subvector
4395                          (v8i1 (bitconvert (and GR8:$mask, (i8 1)))),
4396                          (iPTR 0))), GR8, sub_8bit>;
4397defm : avx512_store_scalar_lowering_subreg2<"VMOVSDZ", avx512vl_f64_info,
4398                   (v8i1
4399                    (extract_subvector
4400                     (v16i1
4401                      (insert_subvector
4402                       (v16i1 immAllZerosV),
4403                       (v2i1 (extract_subvector
4404                              (v8i1 (bitconvert (i8 (and GR8:$mask, (i8 1))))),
4405                              (iPTR 0))),
4406                       (iPTR 0))),
4407                     (iPTR 0))),
4408                   (v2i1 (extract_subvector
4409                          (v8i1 (bitconvert (i8 (and GR8:$mask, (i8 1))))),
4410                          (iPTR 0))), GR8, sub_8bit>;
4411
4412defm : avx512_load_scalar_lowering<"VMOVSSZ", avx512vl_f32_info,
4413                   (v16i1 (bitconvert (i16 (trunc (and GR32:$mask, (i32 1)))))), GR32>;
4414defm : avx512_load_scalar_lowering_subreg<"VMOVSSZ", avx512vl_f32_info,
4415                   (v16i1 (bitconvert (i16 (and GR16:$mask, (i16 1))))), GR16, sub_16bit>;
4416defm : avx512_load_scalar_lowering_subreg<"VMOVSDZ", avx512vl_f64_info,
4417                   (v8i1 (bitconvert (i8 (and GR8:$mask, (i8 1))))), GR8, sub_8bit>;
4418
4419defm : avx512_load_scalar_lowering_subreg2<"VMOVSSZ", avx512vl_f32_info,
4420                   (v16i1 (insert_subvector
4421                           (v16i1 immAllZerosV),
4422                           (v4i1 (extract_subvector
4423                                  (v8i1 (bitconvert (and GR8:$mask, (i8 1)))),
4424                                  (iPTR 0))),
4425                           (iPTR 0))),
4426                   (v4i1 (extract_subvector
4427                          (v8i1 (bitconvert (and GR8:$mask, (i8 1)))),
4428                          (iPTR 0))), GR8, sub_8bit>;
4429defm : avx512_load_scalar_lowering_subreg2<"VMOVSDZ", avx512vl_f64_info,
4430                   (v8i1
4431                    (extract_subvector
4432                     (v16i1
4433                      (insert_subvector
4434                       (v16i1 immAllZerosV),
4435                       (v2i1 (extract_subvector
4436                              (v8i1 (bitconvert (i8 (and GR8:$mask, (i8 1))))),
4437                              (iPTR 0))),
4438                       (iPTR 0))),
4439                     (iPTR 0))),
4440                   (v2i1 (extract_subvector
4441                          (v8i1 (bitconvert (i8 (and GR8:$mask, (i8 1))))),
4442                          (iPTR 0))), GR8, sub_8bit>;
4443
4444def : Pat<(f32 (X86selects VK1WM:$mask, (f32 FR32X:$src1), (f32 FR32X:$src2))),
4445          (COPY_TO_REGCLASS (v4f32 (VMOVSSZrrk
4446           (v4f32 (COPY_TO_REGCLASS FR32X:$src2, VR128X)),
4447           VK1WM:$mask, (v4f32 (IMPLICIT_DEF)),
4448           (v4f32 (COPY_TO_REGCLASS FR32X:$src1, VR128X)))), FR32X)>;
4449
4450def : Pat<(f32 (X86selects VK1WM:$mask, (f32 FR32X:$src1), fp32imm0)),
4451          (COPY_TO_REGCLASS (v4f32 (VMOVSSZrrkz VK1WM:$mask, (v4f32 (IMPLICIT_DEF)),
4452           (v4f32 (COPY_TO_REGCLASS FR32X:$src1, VR128X)))), FR32X)>;
4453
4454def : Pat<(f32 (X86selects VK1WM:$mask, (loadf32 addr:$src), (f32 FR32X:$src0))),
4455          (COPY_TO_REGCLASS
4456           (v4f32 (VMOVSSZrmk (v4f32 (COPY_TO_REGCLASS FR32X:$src0, VR128X)),
4457                                                       VK1WM:$mask, addr:$src)),
4458           FR32X)>;
4459def : Pat<(f32 (X86selects VK1WM:$mask, (loadf32 addr:$src), fp32imm0)),
4460          (COPY_TO_REGCLASS (v4f32 (VMOVSSZrmkz VK1WM:$mask, addr:$src)), FR32X)>;
4461
4462def : Pat<(f64 (X86selects VK1WM:$mask, (f64 FR64X:$src1), (f64 FR64X:$src2))),
4463          (COPY_TO_REGCLASS (v2f64 (VMOVSDZrrk
4464           (v2f64 (COPY_TO_REGCLASS FR64X:$src2, VR128X)),
4465           VK1WM:$mask, (v2f64 (IMPLICIT_DEF)),
4466           (v2f64 (COPY_TO_REGCLASS FR64X:$src1, VR128X)))), FR64X)>;
4467
4468def : Pat<(f64 (X86selects VK1WM:$mask, (f64 FR64X:$src1), fp64imm0)),
4469          (COPY_TO_REGCLASS (v2f64 (VMOVSDZrrkz VK1WM:$mask, (v2f64 (IMPLICIT_DEF)),
4470           (v2f64 (COPY_TO_REGCLASS FR64X:$src1, VR128X)))), FR64X)>;
4471
4472def : Pat<(f64 (X86selects VK1WM:$mask, (loadf64 addr:$src), (f64 FR64X:$src0))),
4473          (COPY_TO_REGCLASS
4474           (v2f64 (VMOVSDZrmk (v2f64 (COPY_TO_REGCLASS FR64X:$src0, VR128X)),
4475                                                       VK1WM:$mask, addr:$src)),
4476           FR64X)>;
4477def : Pat<(f64 (X86selects VK1WM:$mask, (loadf64 addr:$src), fp64imm0)),
4478          (COPY_TO_REGCLASS (v2f64 (VMOVSDZrmkz VK1WM:$mask, addr:$src)), FR64X)>;
4479
4480
4481def : Pat<(v4f32 (X86selects VK1WM:$mask, (v4f32 VR128X:$src1), (v4f32 VR128X:$src2))),
4482          (VMOVSSZrrk VR128X:$src2, VK1WM:$mask, VR128X:$src1, VR128X:$src1)>;
4483def : Pat<(v2f64 (X86selects VK1WM:$mask, (v2f64 VR128X:$src1), (v2f64 VR128X:$src2))),
4484          (VMOVSDZrrk VR128X:$src2, VK1WM:$mask, VR128X:$src1, VR128X:$src1)>;
4485
4486def : Pat<(v4f32 (X86selects VK1WM:$mask, (v4f32 VR128X:$src1), (v4f32 immAllZerosV))),
4487          (VMOVSSZrrkz VK1WM:$mask, VR128X:$src1, VR128X:$src1)>;
4488def : Pat<(v2f64 (X86selects VK1WM:$mask, (v2f64 VR128X:$src1), (v2f64 immAllZerosV))),
4489          (VMOVSDZrrkz VK1WM:$mask, VR128X:$src1, VR128X:$src1)>;
4490
4491let hasSideEffects = 0, isCodeGenOnly = 1, ForceDisassemble = 1 in {
4492  let Predicates = [HasFP16] in {
4493    def VMOVSHZrr_REV: AVX512<0x11, MRMDestReg, (outs VR128X:$dst),
4494        (ins VR128X:$src1, VR128X:$src2),
4495        "vmovsh\t{$src2, $src1, $dst|$dst, $src1, $src2}",
4496        []>, T_MAP5XS, EVEX_4V, VEX_LIG,
4497        FoldGenData<"VMOVSHZrr">,
4498        Sched<[SchedWriteFShuffle.XMM]>;
4499
4500    let Constraints = "$src0 = $dst" in
4501    def VMOVSHZrrk_REV: AVX512<0x11, MRMDestReg, (outs VR128X:$dst),
4502        (ins f16x_info.RC:$src0, f16x_info.KRCWM:$mask,
4503         VR128X:$src1, VR128X:$src2),
4504        "vmovsh\t{$src2, $src1, $dst {${mask}}|"#
4505          "$dst {${mask}}, $src1, $src2}",
4506        []>, T_MAP5XS, EVEX_K, EVEX_4V, VEX_LIG,
4507        FoldGenData<"VMOVSHZrrk">,
4508        Sched<[SchedWriteFShuffle.XMM]>;
4509
4510    def VMOVSHZrrkz_REV: AVX512<0x11, MRMDestReg, (outs VR128X:$dst),
4511        (ins f16x_info.KRCWM:$mask, VR128X:$src1, VR128X:$src2),
4512        "vmovsh\t{$src2, $src1, $dst {${mask}} {z}|"#
4513          "$dst {${mask}} {z}, $src1, $src2}",
4514        []>, EVEX_KZ, T_MAP5XS, EVEX_4V, VEX_LIG,
4515        FoldGenData<"VMOVSHZrrkz">,
4516        Sched<[SchedWriteFShuffle.XMM]>;
4517  }
4518  def VMOVSSZrr_REV: AVX512<0x11, MRMDestReg, (outs VR128X:$dst),
4519                           (ins VR128X:$src1, VR128X:$src2),
4520                           "vmovss\t{$src2, $src1, $dst|$dst, $src1, $src2}",
4521                           []>, XS, EVEX_4V, VEX_LIG,
4522                           FoldGenData<"VMOVSSZrr">,
4523                           Sched<[SchedWriteFShuffle.XMM]>;
4524
4525  let Constraints = "$src0 = $dst" in
4526  def VMOVSSZrrk_REV: AVX512<0x11, MRMDestReg, (outs VR128X:$dst),
4527                             (ins f32x_info.RC:$src0, f32x_info.KRCWM:$mask,
4528                                                   VR128X:$src1, VR128X:$src2),
4529                             "vmovss\t{$src2, $src1, $dst {${mask}}|"#
4530                                        "$dst {${mask}}, $src1, $src2}",
4531                             []>, EVEX_K, XS, EVEX_4V, VEX_LIG,
4532                             FoldGenData<"VMOVSSZrrk">,
4533                             Sched<[SchedWriteFShuffle.XMM]>;
4534
4535  def VMOVSSZrrkz_REV: AVX512<0x11, MRMDestReg, (outs VR128X:$dst),
4536                         (ins f32x_info.KRCWM:$mask, VR128X:$src1, VR128X:$src2),
4537                         "vmovss\t{$src2, $src1, $dst {${mask}} {z}|"#
4538                                    "$dst {${mask}} {z}, $src1, $src2}",
4539                         []>, EVEX_KZ, XS, EVEX_4V, VEX_LIG,
4540                         FoldGenData<"VMOVSSZrrkz">,
4541                         Sched<[SchedWriteFShuffle.XMM]>;
4542
4543  def VMOVSDZrr_REV: AVX512<0x11, MRMDestReg, (outs VR128X:$dst),
4544                           (ins VR128X:$src1, VR128X:$src2),
4545                           "vmovsd\t{$src2, $src1, $dst|$dst, $src1, $src2}",
4546                           []>, XD, EVEX_4V, VEX_LIG, VEX_W,
4547                           FoldGenData<"VMOVSDZrr">,
4548                           Sched<[SchedWriteFShuffle.XMM]>;
4549
4550  let Constraints = "$src0 = $dst" in
4551  def VMOVSDZrrk_REV: AVX512<0x11, MRMDestReg, (outs VR128X:$dst),
4552                             (ins f64x_info.RC:$src0, f64x_info.KRCWM:$mask,
4553                                                   VR128X:$src1, VR128X:$src2),
4554                             "vmovsd\t{$src2, $src1, $dst {${mask}}|"#
4555                                        "$dst {${mask}}, $src1, $src2}",
4556                             []>, EVEX_K, XD, EVEX_4V, VEX_LIG,
4557                             VEX_W, FoldGenData<"VMOVSDZrrk">,
4558                             Sched<[SchedWriteFShuffle.XMM]>;
4559
4560  def VMOVSDZrrkz_REV: AVX512<0x11, MRMDestReg, (outs VR128X:$dst),
4561                              (ins f64x_info.KRCWM:$mask, VR128X:$src1,
4562                                                          VR128X:$src2),
4563                              "vmovsd\t{$src2, $src1, $dst {${mask}} {z}|"#
4564                                         "$dst {${mask}} {z}, $src1, $src2}",
4565                              []>, EVEX_KZ, XD, EVEX_4V, VEX_LIG,
4566                              VEX_W, FoldGenData<"VMOVSDZrrkz">,
4567                              Sched<[SchedWriteFShuffle.XMM]>;
4568}
4569
4570def : InstAlias<"vmovsh.s\t{$src2, $src1, $dst|$dst, $src1, $src2}",
4571                (VMOVSHZrr_REV VR128X:$dst, VR128X:$src1, VR128X:$src2), 0>;
4572def : InstAlias<"vmovsh.s\t{$src2, $src1, $dst {${mask}}|"#
4573                             "$dst {${mask}}, $src1, $src2}",
4574                (VMOVSHZrrk_REV VR128X:$dst, VK1WM:$mask,
4575                                VR128X:$src1, VR128X:$src2), 0>;
4576def : InstAlias<"vmovsh.s\t{$src2, $src1, $dst {${mask}} {z}|"#
4577                             "$dst {${mask}} {z}, $src1, $src2}",
4578                (VMOVSHZrrkz_REV VR128X:$dst, VK1WM:$mask,
4579                                 VR128X:$src1, VR128X:$src2), 0>;
4580def : InstAlias<"vmovss.s\t{$src2, $src1, $dst|$dst, $src1, $src2}",
4581                (VMOVSSZrr_REV VR128X:$dst, VR128X:$src1, VR128X:$src2), 0>;
4582def : InstAlias<"vmovss.s\t{$src2, $src1, $dst {${mask}}|"#
4583                             "$dst {${mask}}, $src1, $src2}",
4584                (VMOVSSZrrk_REV VR128X:$dst, VK1WM:$mask,
4585                                VR128X:$src1, VR128X:$src2), 0>;
4586def : InstAlias<"vmovss.s\t{$src2, $src1, $dst {${mask}} {z}|"#
4587                             "$dst {${mask}} {z}, $src1, $src2}",
4588                (VMOVSSZrrkz_REV VR128X:$dst, VK1WM:$mask,
4589                                 VR128X:$src1, VR128X:$src2), 0>;
4590def : InstAlias<"vmovsd.s\t{$src2, $src1, $dst|$dst, $src1, $src2}",
4591                (VMOVSDZrr_REV VR128X:$dst, VR128X:$src1, VR128X:$src2), 0>;
4592def : InstAlias<"vmovsd.s\t{$src2, $src1, $dst {${mask}}|"#
4593                             "$dst {${mask}}, $src1, $src2}",
4594                (VMOVSDZrrk_REV VR128X:$dst, VK1WM:$mask,
4595                                VR128X:$src1, VR128X:$src2), 0>;
4596def : InstAlias<"vmovsd.s\t{$src2, $src1, $dst {${mask}} {z}|"#
4597                             "$dst {${mask}} {z}, $src1, $src2}",
4598                (VMOVSDZrrkz_REV VR128X:$dst, VK1WM:$mask,
4599                                 VR128X:$src1, VR128X:$src2), 0>;
4600
4601let Predicates = [HasAVX512, OptForSize] in {
4602  def : Pat<(v4f32 (X86vzmovl (v4f32 VR128X:$src))),
4603            (VMOVSSZrr (v4f32 (AVX512_128_SET0)), VR128X:$src)>;
4604  def : Pat<(v4i32 (X86vzmovl (v4i32 VR128X:$src))),
4605            (VMOVSSZrr (v4i32 (AVX512_128_SET0)), VR128X:$src)>;
4606
4607  // Move low f32 and clear high bits.
4608  def : Pat<(v8f32 (X86vzmovl (v8f32 VR256X:$src))),
4609            (SUBREG_TO_REG (i32 0),
4610             (v4f32 (VMOVSSZrr (v4f32 (AVX512_128_SET0)),
4611              (v4f32 (EXTRACT_SUBREG (v8f32 VR256X:$src), sub_xmm)))), sub_xmm)>;
4612  def : Pat<(v8i32 (X86vzmovl (v8i32 VR256X:$src))),
4613            (SUBREG_TO_REG (i32 0),
4614             (v4i32 (VMOVSSZrr (v4i32 (AVX512_128_SET0)),
4615              (v4i32 (EXTRACT_SUBREG (v8i32 VR256X:$src), sub_xmm)))), sub_xmm)>;
4616
4617  def : Pat<(v16f32 (X86vzmovl (v16f32 VR512:$src))),
4618            (SUBREG_TO_REG (i32 0),
4619             (v4f32 (VMOVSSZrr (v4f32 (AVX512_128_SET0)),
4620              (v4f32 (EXTRACT_SUBREG (v16f32 VR512:$src), sub_xmm)))), sub_xmm)>;
4621  def : Pat<(v16i32 (X86vzmovl (v16i32 VR512:$src))),
4622            (SUBREG_TO_REG (i32 0),
4623             (v4i32 (VMOVSSZrr (v4i32 (AVX512_128_SET0)),
4624              (v4i32 (EXTRACT_SUBREG (v16i32 VR512:$src), sub_xmm)))), sub_xmm)>;
4625}
4626
4627// Use 128-bit blends for OptForSpeed since BLENDs have better throughput than
4628// VMOVSS/SD. Unfortunately, loses the ability to use XMM16-31.
4629let Predicates = [HasAVX512, OptForSpeed] in {
4630  def : Pat<(v16f32 (X86vzmovl (v16f32 VR512:$src))),
4631            (SUBREG_TO_REG (i32 0),
4632             (v4f32 (VBLENDPSrri (v4f32 (V_SET0)),
4633                          (v4f32 (EXTRACT_SUBREG (v16f32 VR512:$src), sub_xmm)),
4634                          (i8 1))), sub_xmm)>;
4635  def : Pat<(v16i32 (X86vzmovl (v16i32 VR512:$src))),
4636            (SUBREG_TO_REG (i32 0),
4637             (v4i32 (VPBLENDWrri (v4i32 (V_SET0)),
4638                          (v4i32 (EXTRACT_SUBREG (v16i32 VR512:$src), sub_xmm)),
4639                          (i8 3))), sub_xmm)>;
4640}
4641
4642let Predicates = [HasAVX512] in {
4643  def : Pat<(v4f32 (scalar_to_vector (loadf32 addr:$src))),
4644            (VMOVSSZrm addr:$src)>;
4645  def : Pat<(v2f64 (scalar_to_vector (loadf64 addr:$src))),
4646            (VMOVSDZrm addr:$src)>;
4647
4648  // Represent the same patterns above but in the form they appear for
4649  // 256-bit types
4650  def : Pat<(v8f32 (X86vzload32 addr:$src)),
4651            (SUBREG_TO_REG (i32 0), (VMOVSSZrm addr:$src), sub_xmm)>;
4652  def : Pat<(v4f64 (X86vzload64 addr:$src)),
4653            (SUBREG_TO_REG (i32 0), (VMOVSDZrm addr:$src), sub_xmm)>;
4654
4655  // Represent the same patterns above but in the form they appear for
4656  // 512-bit types
4657  def : Pat<(v16f32 (X86vzload32 addr:$src)),
4658            (SUBREG_TO_REG (i32 0), (VMOVSSZrm addr:$src), sub_xmm)>;
4659  def : Pat<(v8f64 (X86vzload64 addr:$src)),
4660            (SUBREG_TO_REG (i32 0), (VMOVSDZrm addr:$src), sub_xmm)>;
4661}
4662let Predicates = [HasFP16] in {
4663  def : Pat<(v8f16 (X86vzmovl (v8f16 VR128X:$src))),
4664            (VMOVSHZrr (v8f16 (AVX512_128_SET0)), VR128X:$src)>;
4665
4666  // FIXME we need better canonicalization in dag combine
4667  def : Pat<(v16f16 (X86vzmovl (v16f16 VR256X:$src))),
4668            (SUBREG_TO_REG (i32 0),
4669             (v8f16 (VMOVSHZrr (v8f16 (AVX512_128_SET0)),
4670              (v8f16 (EXTRACT_SUBREG (v16f16 VR256X:$src), sub_xmm)))), sub_xmm)>;
4671  def : Pat<(v32f16 (X86vzmovl (v32f16 VR512:$src))),
4672            (SUBREG_TO_REG (i32 0),
4673             (v8f16 (VMOVSHZrr (v8f16 (AVX512_128_SET0)),
4674              (v8f16 (EXTRACT_SUBREG (v32f16 VR512:$src), sub_xmm)))), sub_xmm)>;
4675
4676  def : Pat<(v8f16 (X86vzload16 addr:$src)),
4677            (VMOVSHZrm addr:$src)>;
4678
4679  def : Pat<(v16f16 (X86vzload16 addr:$src)),
4680            (SUBREG_TO_REG (i32 0), (VMOVSHZrm addr:$src), sub_xmm)>;
4681
4682  def : Pat<(v32f16 (X86vzload16 addr:$src)),
4683            (SUBREG_TO_REG (i32 0), (VMOVSHZrm addr:$src), sub_xmm)>;
4684}
4685
4686let ExeDomain = SSEPackedInt, SchedRW = [SchedWriteVecLogic.XMM] in {
4687def VMOVZPQILo2PQIZrr : AVX512XSI<0x7E, MRMSrcReg, (outs VR128X:$dst),
4688                                (ins VR128X:$src),
4689                                "vmovq\t{$src, $dst|$dst, $src}",
4690                                [(set VR128X:$dst, (v2i64 (X86vzmovl
4691                                                   (v2i64 VR128X:$src))))]>,
4692                                EVEX, VEX_W;
4693}
4694
4695let Predicates = [HasAVX512] in {
4696  def : Pat<(v4i32 (X86vzmovl (v4i32 (scalar_to_vector GR32:$src)))),
4697            (VMOVDI2PDIZrr GR32:$src)>;
4698
4699  def : Pat<(v2i64 (X86vzmovl (v2i64 (scalar_to_vector GR64:$src)))),
4700            (VMOV64toPQIZrr GR64:$src)>;
4701
4702  // AVX 128-bit movd/movq instruction write zeros in the high 128-bit part.
4703  def : Pat<(v4i32 (X86vzload32 addr:$src)),
4704            (VMOVDI2PDIZrm addr:$src)>;
4705  def : Pat<(v8i32 (X86vzload32 addr:$src)),
4706            (SUBREG_TO_REG (i32 0), (v4i32 (VMOVDI2PDIZrm addr:$src)), sub_xmm)>;
4707  def : Pat<(v2f64 (X86vzmovl (v2f64 VR128X:$src))),
4708            (VMOVZPQILo2PQIZrr VR128X:$src)>;
4709  def : Pat<(v2i64 (X86vzload64 addr:$src)),
4710            (VMOVQI2PQIZrm addr:$src)>;
4711  def : Pat<(v4i64 (X86vzload64 addr:$src)),
4712            (SUBREG_TO_REG (i64 0), (v2i64 (VMOVQI2PQIZrm addr:$src)), sub_xmm)>;
4713
4714  // Use regular 128-bit instructions to match 512-bit scalar_to_vec+zext.
4715  def : Pat<(v16i32 (X86vzload32 addr:$src)),
4716            (SUBREG_TO_REG (i32 0), (v4i32 (VMOVDI2PDIZrm addr:$src)), sub_xmm)>;
4717  def : Pat<(v8i64 (X86vzload64 addr:$src)),
4718            (SUBREG_TO_REG (i64 0), (v2i64 (VMOVQI2PQIZrm addr:$src)), sub_xmm)>;
4719
4720  def : Pat<(v4f64 (X86vzmovl (v4f64 VR256X:$src))),
4721            (SUBREG_TO_REG (i32 0),
4722             (v2f64 (VMOVZPQILo2PQIZrr
4723                     (v2f64 (EXTRACT_SUBREG (v4f64 VR256X:$src), sub_xmm)))),
4724             sub_xmm)>;
4725  def : Pat<(v4i64 (X86vzmovl (v4i64 VR256X:$src))),
4726            (SUBREG_TO_REG (i32 0),
4727             (v2i64 (VMOVZPQILo2PQIZrr
4728                     (v2i64 (EXTRACT_SUBREG (v4i64 VR256X:$src), sub_xmm)))),
4729             sub_xmm)>;
4730
4731  def : Pat<(v8f64 (X86vzmovl (v8f64 VR512:$src))),
4732            (SUBREG_TO_REG (i32 0),
4733             (v2f64 (VMOVZPQILo2PQIZrr
4734                     (v2f64 (EXTRACT_SUBREG (v8f64 VR512:$src), sub_xmm)))),
4735             sub_xmm)>;
4736  def : Pat<(v8i64 (X86vzmovl (v8i64 VR512:$src))),
4737            (SUBREG_TO_REG (i32 0),
4738             (v2i64 (VMOVZPQILo2PQIZrr
4739                     (v2i64 (EXTRACT_SUBREG (v8i64 VR512:$src), sub_xmm)))),
4740             sub_xmm)>;
4741}
4742
4743//===----------------------------------------------------------------------===//
4744// AVX-512 - Non-temporals
4745//===----------------------------------------------------------------------===//
4746
4747def VMOVNTDQAZrm : AVX512PI<0x2A, MRMSrcMem, (outs VR512:$dst),
4748                      (ins i512mem:$src), "vmovntdqa\t{$src, $dst|$dst, $src}",
4749                      [], SSEPackedInt>, Sched<[SchedWriteVecMoveLS.ZMM.RM]>,
4750                      EVEX, T8PD, EVEX_V512, EVEX_CD8<64, CD8VF>;
4751
4752let Predicates = [HasVLX] in {
4753  def VMOVNTDQAZ256rm : AVX512PI<0x2A, MRMSrcMem, (outs VR256X:$dst),
4754                       (ins i256mem:$src),
4755                       "vmovntdqa\t{$src, $dst|$dst, $src}",
4756                       [], SSEPackedInt>, Sched<[SchedWriteVecMoveLS.YMM.RM]>,
4757                       EVEX, T8PD, EVEX_V256, EVEX_CD8<64, CD8VF>;
4758
4759  def VMOVNTDQAZ128rm : AVX512PI<0x2A, MRMSrcMem, (outs VR128X:$dst),
4760                      (ins i128mem:$src),
4761                      "vmovntdqa\t{$src, $dst|$dst, $src}",
4762                      [], SSEPackedInt>, Sched<[SchedWriteVecMoveLS.XMM.RM]>,
4763                      EVEX, T8PD, EVEX_V128, EVEX_CD8<64, CD8VF>;
4764}
4765
4766multiclass avx512_movnt<bits<8> opc, string OpcodeStr, X86VectorVTInfo _,
4767                        X86SchedWriteMoveLS Sched,
4768                        PatFrag st_frag = alignednontemporalstore> {
4769  let SchedRW = [Sched.MR], AddedComplexity = 400 in
4770  def mr : AVX512PI<opc, MRMDestMem, (outs), (ins _.MemOp:$dst, _.RC:$src),
4771                    !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
4772                    [(st_frag (_.VT _.RC:$src), addr:$dst)],
4773                    _.ExeDomain>, EVEX, EVEX_CD8<_.EltSize, CD8VF>;
4774}
4775
4776multiclass avx512_movnt_vl<bits<8> opc, string OpcodeStr,
4777                           AVX512VLVectorVTInfo VTInfo,
4778                           X86SchedWriteMoveLSWidths Sched> {
4779  let Predicates = [HasAVX512] in
4780    defm Z : avx512_movnt<opc, OpcodeStr, VTInfo.info512, Sched.ZMM>, EVEX_V512;
4781
4782  let Predicates = [HasAVX512, HasVLX] in {
4783    defm Z256 : avx512_movnt<opc, OpcodeStr, VTInfo.info256, Sched.YMM>, EVEX_V256;
4784    defm Z128 : avx512_movnt<opc, OpcodeStr, VTInfo.info128, Sched.XMM>, EVEX_V128;
4785  }
4786}
4787
4788defm VMOVNTDQ : avx512_movnt_vl<0xE7, "vmovntdq", avx512vl_i64_info,
4789                                SchedWriteVecMoveLSNT>, PD;
4790defm VMOVNTPD : avx512_movnt_vl<0x2B, "vmovntpd", avx512vl_f64_info,
4791                                SchedWriteFMoveLSNT>, PD, VEX_W;
4792defm VMOVNTPS : avx512_movnt_vl<0x2B, "vmovntps", avx512vl_f32_info,
4793                                SchedWriteFMoveLSNT>, PS;
4794
4795let Predicates = [HasAVX512], AddedComplexity = 400 in {
4796  def : Pat<(alignednontemporalstore (v16i32 VR512:$src), addr:$dst),
4797            (VMOVNTDQZmr addr:$dst, VR512:$src)>;
4798  def : Pat<(alignednontemporalstore (v32i16 VR512:$src), addr:$dst),
4799            (VMOVNTDQZmr addr:$dst, VR512:$src)>;
4800  def : Pat<(alignednontemporalstore (v64i8 VR512:$src), addr:$dst),
4801            (VMOVNTDQZmr addr:$dst, VR512:$src)>;
4802
4803  def : Pat<(v8f64 (alignednontemporalload addr:$src)),
4804            (VMOVNTDQAZrm addr:$src)>;
4805  def : Pat<(v16f32 (alignednontemporalload addr:$src)),
4806            (VMOVNTDQAZrm addr:$src)>;
4807  def : Pat<(v8i64 (alignednontemporalload addr:$src)),
4808            (VMOVNTDQAZrm addr:$src)>;
4809  def : Pat<(v16i32 (alignednontemporalload addr:$src)),
4810            (VMOVNTDQAZrm addr:$src)>;
4811  def : Pat<(v32i16 (alignednontemporalload addr:$src)),
4812            (VMOVNTDQAZrm addr:$src)>;
4813  def : Pat<(v64i8 (alignednontemporalload addr:$src)),
4814            (VMOVNTDQAZrm addr:$src)>;
4815}
4816
4817let Predicates = [HasVLX], AddedComplexity = 400 in {
4818  def : Pat<(alignednontemporalstore (v8i32 VR256X:$src), addr:$dst),
4819            (VMOVNTDQZ256mr addr:$dst, VR256X:$src)>;
4820  def : Pat<(alignednontemporalstore (v16i16 VR256X:$src), addr:$dst),
4821            (VMOVNTDQZ256mr addr:$dst, VR256X:$src)>;
4822  def : Pat<(alignednontemporalstore (v32i8 VR256X:$src), addr:$dst),
4823            (VMOVNTDQZ256mr addr:$dst, VR256X:$src)>;
4824
4825  def : Pat<(v4f64 (alignednontemporalload addr:$src)),
4826            (VMOVNTDQAZ256rm addr:$src)>;
4827  def : Pat<(v8f32 (alignednontemporalload addr:$src)),
4828            (VMOVNTDQAZ256rm addr:$src)>;
4829  def : Pat<(v4i64 (alignednontemporalload addr:$src)),
4830            (VMOVNTDQAZ256rm addr:$src)>;
4831  def : Pat<(v8i32 (alignednontemporalload addr:$src)),
4832            (VMOVNTDQAZ256rm addr:$src)>;
4833  def : Pat<(v16i16 (alignednontemporalload addr:$src)),
4834            (VMOVNTDQAZ256rm addr:$src)>;
4835  def : Pat<(v32i8 (alignednontemporalload addr:$src)),
4836            (VMOVNTDQAZ256rm addr:$src)>;
4837
4838  def : Pat<(alignednontemporalstore (v4i32 VR128X:$src), addr:$dst),
4839            (VMOVNTDQZ128mr addr:$dst, VR128X:$src)>;
4840  def : Pat<(alignednontemporalstore (v8i16 VR128X:$src), addr:$dst),
4841            (VMOVNTDQZ128mr addr:$dst, VR128X:$src)>;
4842  def : Pat<(alignednontemporalstore (v16i8 VR128X:$src), addr:$dst),
4843            (VMOVNTDQZ128mr addr:$dst, VR128X:$src)>;
4844
4845  def : Pat<(v2f64 (alignednontemporalload addr:$src)),
4846            (VMOVNTDQAZ128rm addr:$src)>;
4847  def : Pat<(v4f32 (alignednontemporalload addr:$src)),
4848            (VMOVNTDQAZ128rm addr:$src)>;
4849  def : Pat<(v2i64 (alignednontemporalload addr:$src)),
4850            (VMOVNTDQAZ128rm addr:$src)>;
4851  def : Pat<(v4i32 (alignednontemporalload addr:$src)),
4852            (VMOVNTDQAZ128rm addr:$src)>;
4853  def : Pat<(v8i16 (alignednontemporalload addr:$src)),
4854            (VMOVNTDQAZ128rm addr:$src)>;
4855  def : Pat<(v16i8 (alignednontemporalload addr:$src)),
4856            (VMOVNTDQAZ128rm addr:$src)>;
4857}
4858
4859//===----------------------------------------------------------------------===//
4860// AVX-512 - Integer arithmetic
4861//
4862multiclass avx512_binop_rm<bits<8> opc, string OpcodeStr, SDNode OpNode,
4863                           X86VectorVTInfo _, X86FoldableSchedWrite sched,
4864                           bit IsCommutable = 0> {
4865  defm rr : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
4866                    (ins _.RC:$src1, _.RC:$src2), OpcodeStr,
4867                    "$src2, $src1", "$src1, $src2",
4868                    (_.VT (OpNode _.RC:$src1, _.RC:$src2)),
4869                    IsCommutable, IsCommutable>, AVX512BIBase, EVEX_4V,
4870                    Sched<[sched]>;
4871
4872  defm rm : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
4873                  (ins _.RC:$src1, _.MemOp:$src2), OpcodeStr,
4874                  "$src2, $src1", "$src1, $src2",
4875                  (_.VT (OpNode _.RC:$src1, (_.LdFrag addr:$src2)))>,
4876                  AVX512BIBase, EVEX_4V,
4877                  Sched<[sched.Folded, sched.ReadAfterFold]>;
4878}
4879
4880multiclass avx512_binop_rmb<bits<8> opc, string OpcodeStr, SDNode OpNode,
4881                            X86VectorVTInfo _, X86FoldableSchedWrite sched,
4882                            bit IsCommutable = 0> :
4883           avx512_binop_rm<opc, OpcodeStr, OpNode, _, sched, IsCommutable> {
4884  defm rmb : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
4885                  (ins _.RC:$src1, _.ScalarMemOp:$src2), OpcodeStr,
4886                  "${src2}"#_.BroadcastStr#", $src1",
4887                  "$src1, ${src2}"#_.BroadcastStr,
4888                  (_.VT (OpNode _.RC:$src1,
4889                                (_.BroadcastLdFrag addr:$src2)))>,
4890                  AVX512BIBase, EVEX_4V, EVEX_B,
4891                  Sched<[sched.Folded, sched.ReadAfterFold]>;
4892}
4893
4894multiclass avx512_binop_rm_vl<bits<8> opc, string OpcodeStr, SDNode OpNode,
4895                              AVX512VLVectorVTInfo VTInfo,
4896                              X86SchedWriteWidths sched, Predicate prd,
4897                              bit IsCommutable = 0> {
4898  let Predicates = [prd] in
4899    defm Z : avx512_binop_rm<opc, OpcodeStr, OpNode, VTInfo.info512, sched.ZMM,
4900                             IsCommutable>, EVEX_V512;
4901
4902  let Predicates = [prd, HasVLX] in {
4903    defm Z256 : avx512_binop_rm<opc, OpcodeStr, OpNode, VTInfo.info256,
4904                                sched.YMM, IsCommutable>, EVEX_V256;
4905    defm Z128 : avx512_binop_rm<opc, OpcodeStr, OpNode, VTInfo.info128,
4906                                sched.XMM, IsCommutable>, EVEX_V128;
4907  }
4908}
4909
4910multiclass avx512_binop_rmb_vl<bits<8> opc, string OpcodeStr, SDNode OpNode,
4911                               AVX512VLVectorVTInfo VTInfo,
4912                               X86SchedWriteWidths sched, Predicate prd,
4913                               bit IsCommutable = 0> {
4914  let Predicates = [prd] in
4915    defm Z : avx512_binop_rmb<opc, OpcodeStr, OpNode, VTInfo.info512, sched.ZMM,
4916                             IsCommutable>, EVEX_V512;
4917
4918  let Predicates = [prd, HasVLX] in {
4919    defm Z256 : avx512_binop_rmb<opc, OpcodeStr, OpNode, VTInfo.info256,
4920                                 sched.YMM, IsCommutable>, EVEX_V256;
4921    defm Z128 : avx512_binop_rmb<opc, OpcodeStr, OpNode, VTInfo.info128,
4922                                 sched.XMM, IsCommutable>, EVEX_V128;
4923  }
4924}
4925
4926multiclass avx512_binop_rm_vl_q<bits<8> opc, string OpcodeStr, SDNode OpNode,
4927                                X86SchedWriteWidths sched, Predicate prd,
4928                                bit IsCommutable = 0> {
4929  defm NAME : avx512_binop_rmb_vl<opc, OpcodeStr, OpNode, avx512vl_i64_info,
4930                                  sched, prd, IsCommutable>,
4931                                  VEX_W, EVEX_CD8<64, CD8VF>;
4932}
4933
4934multiclass avx512_binop_rm_vl_d<bits<8> opc, string OpcodeStr, SDNode OpNode,
4935                                X86SchedWriteWidths sched, Predicate prd,
4936                                bit IsCommutable = 0> {
4937  defm NAME : avx512_binop_rmb_vl<opc, OpcodeStr, OpNode, avx512vl_i32_info,
4938                                  sched, prd, IsCommutable>, EVEX_CD8<32, CD8VF>;
4939}
4940
4941multiclass avx512_binop_rm_vl_w<bits<8> opc, string OpcodeStr, SDNode OpNode,
4942                                X86SchedWriteWidths sched, Predicate prd,
4943                                bit IsCommutable = 0> {
4944  defm NAME : avx512_binop_rm_vl<opc, OpcodeStr, OpNode, avx512vl_i16_info,
4945                                 sched, prd, IsCommutable>, EVEX_CD8<16, CD8VF>,
4946                                 VEX_WIG;
4947}
4948
4949multiclass avx512_binop_rm_vl_b<bits<8> opc, string OpcodeStr, SDNode OpNode,
4950                                X86SchedWriteWidths sched, Predicate prd,
4951                                bit IsCommutable = 0> {
4952  defm NAME : avx512_binop_rm_vl<opc, OpcodeStr, OpNode, avx512vl_i8_info,
4953                                 sched, prd, IsCommutable>, EVEX_CD8<8, CD8VF>,
4954                                 VEX_WIG;
4955}
4956
4957multiclass avx512_binop_rm_vl_dq<bits<8> opc_d, bits<8> opc_q, string OpcodeStr,
4958                                 SDNode OpNode, X86SchedWriteWidths sched,
4959                                 Predicate prd, bit IsCommutable = 0> {
4960  defm Q : avx512_binop_rm_vl_q<opc_q, OpcodeStr#"q", OpNode, sched, prd,
4961                                   IsCommutable>;
4962
4963  defm D : avx512_binop_rm_vl_d<opc_d, OpcodeStr#"d", OpNode, sched, prd,
4964                                   IsCommutable>;
4965}
4966
4967multiclass avx512_binop_rm_vl_bw<bits<8> opc_b, bits<8> opc_w, string OpcodeStr,
4968                                 SDNode OpNode, X86SchedWriteWidths sched,
4969                                 Predicate prd, bit IsCommutable = 0> {
4970  defm W : avx512_binop_rm_vl_w<opc_w, OpcodeStr#"w", OpNode, sched, prd,
4971                                   IsCommutable>;
4972
4973  defm B : avx512_binop_rm_vl_b<opc_b, OpcodeStr#"b", OpNode, sched, prd,
4974                                   IsCommutable>;
4975}
4976
4977multiclass avx512_binop_rm_vl_all<bits<8> opc_b, bits<8> opc_w,
4978                                  bits<8> opc_d, bits<8> opc_q,
4979                                  string OpcodeStr, SDNode OpNode,
4980                                  X86SchedWriteWidths sched,
4981                                  bit IsCommutable = 0> {
4982  defm NAME : avx512_binop_rm_vl_dq<opc_d, opc_q, OpcodeStr, OpNode,
4983                                    sched, HasAVX512, IsCommutable>,
4984              avx512_binop_rm_vl_bw<opc_b, opc_w, OpcodeStr, OpNode,
4985                                    sched, HasBWI, IsCommutable>;
4986}
4987
4988multiclass avx512_binop_rm2<bits<8> opc, string OpcodeStr,
4989                            X86FoldableSchedWrite sched,
4990                            SDNode OpNode,X86VectorVTInfo _Src,
4991                            X86VectorVTInfo _Dst, X86VectorVTInfo _Brdct,
4992                            bit IsCommutable = 0> {
4993  defm rr : AVX512_maskable<opc, MRMSrcReg, _Dst, (outs _Dst.RC:$dst),
4994                            (ins _Src.RC:$src1, _Src.RC:$src2), OpcodeStr,
4995                            "$src2, $src1","$src1, $src2",
4996                            (_Dst.VT (OpNode
4997                                         (_Src.VT _Src.RC:$src1),
4998                                         (_Src.VT _Src.RC:$src2))),
4999                            IsCommutable>,
5000                            AVX512BIBase, EVEX_4V, Sched<[sched]>;
5001  defm rm : AVX512_maskable<opc, MRMSrcMem, _Dst, (outs _Dst.RC:$dst),
5002                        (ins _Src.RC:$src1, _Src.MemOp:$src2), OpcodeStr,
5003                        "$src2, $src1", "$src1, $src2",
5004                        (_Dst.VT (OpNode (_Src.VT _Src.RC:$src1),
5005                                      (_Src.LdFrag addr:$src2)))>,
5006                        AVX512BIBase, EVEX_4V,
5007                        Sched<[sched.Folded, sched.ReadAfterFold]>;
5008
5009  defm rmb : AVX512_maskable<opc, MRMSrcMem, _Dst, (outs _Dst.RC:$dst),
5010                    (ins _Src.RC:$src1, _Brdct.ScalarMemOp:$src2),
5011                    OpcodeStr,
5012                    "${src2}"#_Brdct.BroadcastStr#", $src1",
5013                     "$src1, ${src2}"#_Brdct.BroadcastStr,
5014                    (_Dst.VT (OpNode (_Src.VT _Src.RC:$src1), (bitconvert
5015                                 (_Brdct.VT (_Brdct.BroadcastLdFrag addr:$src2)))))>,
5016                    AVX512BIBase, EVEX_4V, EVEX_B,
5017                    Sched<[sched.Folded, sched.ReadAfterFold]>;
5018}
5019
5020defm VPADD : avx512_binop_rm_vl_all<0xFC, 0xFD, 0xFE, 0xD4, "vpadd", add,
5021                                    SchedWriteVecALU, 1>;
5022defm VPSUB : avx512_binop_rm_vl_all<0xF8, 0xF9, 0xFA, 0xFB, "vpsub", sub,
5023                                    SchedWriteVecALU, 0>;
5024defm VPADDS : avx512_binop_rm_vl_bw<0xEC, 0xED, "vpadds", saddsat,
5025                                    SchedWriteVecALU, HasBWI, 1>;
5026defm VPSUBS : avx512_binop_rm_vl_bw<0xE8, 0xE9, "vpsubs", ssubsat,
5027                                    SchedWriteVecALU, HasBWI, 0>;
5028defm VPADDUS : avx512_binop_rm_vl_bw<0xDC, 0xDD, "vpaddus", uaddsat,
5029                                     SchedWriteVecALU, HasBWI, 1>;
5030defm VPSUBUS : avx512_binop_rm_vl_bw<0xD8, 0xD9, "vpsubus", usubsat,
5031                                     SchedWriteVecALU, HasBWI, 0>;
5032defm VPMULLD : avx512_binop_rm_vl_d<0x40, "vpmulld", mul,
5033                                    SchedWritePMULLD, HasAVX512, 1>, T8PD;
5034defm VPMULLW : avx512_binop_rm_vl_w<0xD5, "vpmullw", mul,
5035                                    SchedWriteVecIMul, HasBWI, 1>;
5036defm VPMULLQ : avx512_binop_rm_vl_q<0x40, "vpmullq", mul,
5037                                    SchedWriteVecIMul, HasDQI, 1>, T8PD,
5038                                    NotEVEX2VEXConvertible;
5039defm VPMULHW : avx512_binop_rm_vl_w<0xE5, "vpmulhw", mulhs, SchedWriteVecIMul,
5040                                    HasBWI, 1>;
5041defm VPMULHUW : avx512_binop_rm_vl_w<0xE4, "vpmulhuw", mulhu, SchedWriteVecIMul,
5042                                     HasBWI, 1>;
5043defm VPMULHRSW : avx512_binop_rm_vl_w<0x0B, "vpmulhrsw", X86mulhrs,
5044                                      SchedWriteVecIMul, HasBWI, 1>, T8PD;
5045defm VPAVG : avx512_binop_rm_vl_bw<0xE0, 0xE3, "vpavg", avgceilu,
5046                                   SchedWriteVecALU, HasBWI, 1>;
5047defm VPMULDQ : avx512_binop_rm_vl_q<0x28, "vpmuldq", X86pmuldq,
5048                                    SchedWriteVecIMul, HasAVX512, 1>, T8PD;
5049defm VPMULUDQ : avx512_binop_rm_vl_q<0xF4, "vpmuludq", X86pmuludq,
5050                                     SchedWriteVecIMul, HasAVX512, 1>;
5051
5052multiclass avx512_binop_all<bits<8> opc, string OpcodeStr,
5053                            X86SchedWriteWidths sched,
5054                            AVX512VLVectorVTInfo _SrcVTInfo,
5055                            AVX512VLVectorVTInfo _DstVTInfo,
5056                            SDNode OpNode, Predicate prd,  bit IsCommutable = 0> {
5057  let Predicates = [prd] in
5058    defm NAME#Z : avx512_binop_rm2<opc, OpcodeStr, sched.ZMM, OpNode,
5059                                 _SrcVTInfo.info512, _DstVTInfo.info512,
5060                                 v8i64_info, IsCommutable>,
5061                                  EVEX_V512, EVEX_CD8<64, CD8VF>, VEX_W;
5062  let Predicates = [HasVLX, prd] in {
5063    defm NAME#Z256 : avx512_binop_rm2<opc, OpcodeStr, sched.YMM, OpNode,
5064                                      _SrcVTInfo.info256, _DstVTInfo.info256,
5065                                      v4i64x_info, IsCommutable>,
5066                                      EVEX_V256, EVEX_CD8<64, CD8VF>, VEX_W;
5067    defm NAME#Z128 : avx512_binop_rm2<opc, OpcodeStr, sched.XMM, OpNode,
5068                                      _SrcVTInfo.info128, _DstVTInfo.info128,
5069                                      v2i64x_info, IsCommutable>,
5070                                     EVEX_V128, EVEX_CD8<64, CD8VF>, VEX_W;
5071  }
5072}
5073
5074defm VPMULTISHIFTQB : avx512_binop_all<0x83, "vpmultishiftqb", SchedWriteVecALU,
5075                                avx512vl_i8_info, avx512vl_i8_info,
5076                                X86multishift, HasVBMI, 0>, T8PD;
5077
5078multiclass avx512_packs_rmb<bits<8> opc, string OpcodeStr, SDNode OpNode,
5079                            X86VectorVTInfo _Src, X86VectorVTInfo _Dst,
5080                            X86FoldableSchedWrite sched> {
5081  defm rmb : AVX512_maskable<opc, MRMSrcMem, _Dst, (outs _Dst.RC:$dst),
5082                    (ins _Src.RC:$src1, _Src.ScalarMemOp:$src2),
5083                    OpcodeStr,
5084                    "${src2}"#_Src.BroadcastStr#", $src1",
5085                     "$src1, ${src2}"#_Src.BroadcastStr,
5086                    (_Dst.VT (OpNode (_Src.VT _Src.RC:$src1), (bitconvert
5087                                 (_Src.VT (_Src.BroadcastLdFrag addr:$src2)))))>,
5088                    EVEX_4V, EVEX_B, EVEX_CD8<_Src.EltSize, CD8VF>,
5089                    Sched<[sched.Folded, sched.ReadAfterFold]>;
5090}
5091
5092multiclass avx512_packs_rm<bits<8> opc, string OpcodeStr,
5093                            SDNode OpNode,X86VectorVTInfo _Src,
5094                            X86VectorVTInfo _Dst, X86FoldableSchedWrite sched,
5095                            bit IsCommutable = 0> {
5096  defm rr : AVX512_maskable<opc, MRMSrcReg, _Dst, (outs _Dst.RC:$dst),
5097                            (ins _Src.RC:$src1, _Src.RC:$src2), OpcodeStr,
5098                            "$src2, $src1","$src1, $src2",
5099                            (_Dst.VT (OpNode
5100                                         (_Src.VT _Src.RC:$src1),
5101                                         (_Src.VT _Src.RC:$src2))),
5102                            IsCommutable, IsCommutable>,
5103                            EVEX_CD8<_Src.EltSize, CD8VF>, EVEX_4V, Sched<[sched]>;
5104  defm rm : AVX512_maskable<opc, MRMSrcMem, _Dst, (outs _Dst.RC:$dst),
5105                        (ins _Src.RC:$src1, _Src.MemOp:$src2), OpcodeStr,
5106                        "$src2, $src1", "$src1, $src2",
5107                        (_Dst.VT (OpNode (_Src.VT _Src.RC:$src1),
5108                                      (_Src.LdFrag addr:$src2)))>,
5109                         EVEX_4V, EVEX_CD8<_Src.EltSize, CD8VF>,
5110                         Sched<[sched.Folded, sched.ReadAfterFold]>;
5111}
5112
5113multiclass avx512_packs_all_i32_i16<bits<8> opc, string OpcodeStr,
5114                                    SDNode OpNode> {
5115  let Predicates = [HasBWI] in
5116  defm NAME#Z : avx512_packs_rm<opc, OpcodeStr, OpNode, v16i32_info,
5117                                 v32i16_info, SchedWriteShuffle.ZMM>,
5118                avx512_packs_rmb<opc, OpcodeStr, OpNode, v16i32_info,
5119                                 v32i16_info, SchedWriteShuffle.ZMM>, EVEX_V512;
5120  let Predicates = [HasBWI, HasVLX] in {
5121    defm NAME#Z256 : avx512_packs_rm<opc, OpcodeStr, OpNode, v8i32x_info,
5122                                     v16i16x_info, SchedWriteShuffle.YMM>,
5123                     avx512_packs_rmb<opc, OpcodeStr, OpNode, v8i32x_info,
5124                                      v16i16x_info, SchedWriteShuffle.YMM>,
5125                                      EVEX_V256;
5126    defm NAME#Z128 : avx512_packs_rm<opc, OpcodeStr, OpNode, v4i32x_info,
5127                                     v8i16x_info, SchedWriteShuffle.XMM>,
5128                     avx512_packs_rmb<opc, OpcodeStr, OpNode, v4i32x_info,
5129                                      v8i16x_info, SchedWriteShuffle.XMM>,
5130                                      EVEX_V128;
5131  }
5132}
5133multiclass avx512_packs_all_i16_i8<bits<8> opc, string OpcodeStr,
5134                            SDNode OpNode> {
5135  let Predicates = [HasBWI] in
5136  defm NAME#Z : avx512_packs_rm<opc, OpcodeStr, OpNode, v32i16_info, v64i8_info,
5137                                SchedWriteShuffle.ZMM>, EVEX_V512, VEX_WIG;
5138  let Predicates = [HasBWI, HasVLX] in {
5139    defm NAME#Z256 : avx512_packs_rm<opc, OpcodeStr, OpNode, v16i16x_info,
5140                                     v32i8x_info, SchedWriteShuffle.YMM>,
5141                                     EVEX_V256, VEX_WIG;
5142    defm NAME#Z128 : avx512_packs_rm<opc, OpcodeStr, OpNode, v8i16x_info,
5143                                     v16i8x_info, SchedWriteShuffle.XMM>,
5144                                     EVEX_V128, VEX_WIG;
5145  }
5146}
5147
5148multiclass avx512_vpmadd<bits<8> opc, string OpcodeStr,
5149                            SDNode OpNode, AVX512VLVectorVTInfo _Src,
5150                            AVX512VLVectorVTInfo _Dst, bit IsCommutable = 0> {
5151  let Predicates = [HasBWI] in
5152  defm NAME#Z : avx512_packs_rm<opc, OpcodeStr, OpNode, _Src.info512,
5153                                _Dst.info512, SchedWriteVecIMul.ZMM,
5154                                IsCommutable>, EVEX_V512;
5155  let Predicates = [HasBWI, HasVLX] in {
5156    defm NAME#Z256 : avx512_packs_rm<opc, OpcodeStr, OpNode, _Src.info256,
5157                                     _Dst.info256, SchedWriteVecIMul.YMM,
5158                                     IsCommutable>, EVEX_V256;
5159    defm NAME#Z128 : avx512_packs_rm<opc, OpcodeStr, OpNode, _Src.info128,
5160                                     _Dst.info128, SchedWriteVecIMul.XMM,
5161                                     IsCommutable>, EVEX_V128;
5162  }
5163}
5164
5165defm VPACKSSDW : avx512_packs_all_i32_i16<0x6B, "vpackssdw", X86Packss>, AVX512BIBase;
5166defm VPACKUSDW : avx512_packs_all_i32_i16<0x2b, "vpackusdw", X86Packus>, AVX5128IBase;
5167defm VPACKSSWB : avx512_packs_all_i16_i8 <0x63, "vpacksswb", X86Packss>, AVX512BIBase;
5168defm VPACKUSWB : avx512_packs_all_i16_i8 <0x67, "vpackuswb", X86Packus>, AVX512BIBase;
5169
5170defm VPMADDUBSW : avx512_vpmadd<0x04, "vpmaddubsw", X86vpmaddubsw,
5171                     avx512vl_i8_info, avx512vl_i16_info>, AVX512BIBase, T8PD, VEX_WIG;
5172defm VPMADDWD   : avx512_vpmadd<0xF5, "vpmaddwd", X86vpmaddwd,
5173                     avx512vl_i16_info, avx512vl_i32_info, 1>, AVX512BIBase, VEX_WIG;
5174
5175defm VPMAXSB : avx512_binop_rm_vl_b<0x3C, "vpmaxsb", smax,
5176                                    SchedWriteVecALU, HasBWI, 1>, T8PD;
5177defm VPMAXSW : avx512_binop_rm_vl_w<0xEE, "vpmaxsw", smax,
5178                                    SchedWriteVecALU, HasBWI, 1>;
5179defm VPMAXSD : avx512_binop_rm_vl_d<0x3D, "vpmaxsd", smax,
5180                                    SchedWriteVecALU, HasAVX512, 1>, T8PD;
5181defm VPMAXSQ : avx512_binop_rm_vl_q<0x3D, "vpmaxsq", smax,
5182                                    SchedWriteVecALU, HasAVX512, 1>, T8PD,
5183                                    NotEVEX2VEXConvertible;
5184
5185defm VPMAXUB : avx512_binop_rm_vl_b<0xDE, "vpmaxub", umax,
5186                                    SchedWriteVecALU, HasBWI, 1>;
5187defm VPMAXUW : avx512_binop_rm_vl_w<0x3E, "vpmaxuw", umax,
5188                                    SchedWriteVecALU, HasBWI, 1>, T8PD;
5189defm VPMAXUD : avx512_binop_rm_vl_d<0x3F, "vpmaxud", umax,
5190                                    SchedWriteVecALU, HasAVX512, 1>, T8PD;
5191defm VPMAXUQ : avx512_binop_rm_vl_q<0x3F, "vpmaxuq", umax,
5192                                    SchedWriteVecALU, HasAVX512, 1>, T8PD,
5193                                    NotEVEX2VEXConvertible;
5194
5195defm VPMINSB : avx512_binop_rm_vl_b<0x38, "vpminsb", smin,
5196                                    SchedWriteVecALU, HasBWI, 1>, T8PD;
5197defm VPMINSW : avx512_binop_rm_vl_w<0xEA, "vpminsw", smin,
5198                                    SchedWriteVecALU, HasBWI, 1>;
5199defm VPMINSD : avx512_binop_rm_vl_d<0x39, "vpminsd", smin,
5200                                    SchedWriteVecALU, HasAVX512, 1>, T8PD;
5201defm VPMINSQ : avx512_binop_rm_vl_q<0x39, "vpminsq", smin,
5202                                    SchedWriteVecALU, HasAVX512, 1>, T8PD,
5203                                    NotEVEX2VEXConvertible;
5204
5205defm VPMINUB : avx512_binop_rm_vl_b<0xDA, "vpminub", umin,
5206                                    SchedWriteVecALU, HasBWI, 1>;
5207defm VPMINUW : avx512_binop_rm_vl_w<0x3A, "vpminuw", umin,
5208                                    SchedWriteVecALU, HasBWI, 1>, T8PD;
5209defm VPMINUD : avx512_binop_rm_vl_d<0x3B, "vpminud", umin,
5210                                    SchedWriteVecALU, HasAVX512, 1>, T8PD;
5211defm VPMINUQ : avx512_binop_rm_vl_q<0x3B, "vpminuq", umin,
5212                                    SchedWriteVecALU, HasAVX512, 1>, T8PD,
5213                                    NotEVEX2VEXConvertible;
5214
5215// PMULLQ: Use 512bit version to implement 128/256 bit in case NoVLX.
5216let Predicates = [HasDQI, NoVLX] in {
5217  def : Pat<(v4i64 (mul (v4i64 VR256X:$src1), (v4i64 VR256X:$src2))),
5218            (EXTRACT_SUBREG
5219                (VPMULLQZrr
5220                    (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src1, sub_ymm),
5221                    (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src2, sub_ymm)),
5222             sub_ymm)>;
5223  def : Pat<(v4i64 (mul (v4i64 VR256X:$src1), (v4i64 (X86VBroadcastld64 addr:$src2)))),
5224            (EXTRACT_SUBREG
5225                (VPMULLQZrmb
5226                    (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src1, sub_ymm),
5227                    addr:$src2),
5228             sub_ymm)>;
5229
5230  def : Pat<(v2i64 (mul (v2i64 VR128X:$src1), (v2i64 VR128X:$src2))),
5231            (EXTRACT_SUBREG
5232                (VPMULLQZrr
5233                    (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src1, sub_xmm),
5234                    (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src2, sub_xmm)),
5235             sub_xmm)>;
5236  def : Pat<(v2i64 (mul (v2i64 VR128X:$src1), (v2i64 (X86VBroadcastld64 addr:$src2)))),
5237            (EXTRACT_SUBREG
5238                (VPMULLQZrmb
5239                    (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src1, sub_xmm),
5240                    addr:$src2),
5241             sub_xmm)>;
5242}
5243
5244multiclass avx512_min_max_lowering<string Instr, SDNode OpNode> {
5245  def : Pat<(v4i64 (OpNode VR256X:$src1, VR256X:$src2)),
5246            (EXTRACT_SUBREG
5247                (!cast<Instruction>(Instr#"rr")
5248                    (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src1, sub_ymm),
5249                    (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src2, sub_ymm)),
5250             sub_ymm)>;
5251  def : Pat<(v4i64 (OpNode (v4i64 VR256X:$src1), (v4i64 (X86VBroadcastld64 addr:$src2)))),
5252            (EXTRACT_SUBREG
5253                (!cast<Instruction>(Instr#"rmb")
5254                    (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src1, sub_ymm),
5255                    addr:$src2),
5256             sub_ymm)>;
5257
5258  def : Pat<(v2i64 (OpNode VR128X:$src1, VR128X:$src2)),
5259            (EXTRACT_SUBREG
5260                (!cast<Instruction>(Instr#"rr")
5261                    (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src1, sub_xmm),
5262                    (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src2, sub_xmm)),
5263             sub_xmm)>;
5264  def : Pat<(v2i64 (OpNode (v2i64 VR128X:$src1), (v2i64 (X86VBroadcastld64 addr:$src2)))),
5265            (EXTRACT_SUBREG
5266                (!cast<Instruction>(Instr#"rmb")
5267                    (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src1, sub_xmm),
5268                    addr:$src2),
5269             sub_xmm)>;
5270}
5271
5272let Predicates = [HasAVX512, NoVLX] in {
5273  defm : avx512_min_max_lowering<"VPMAXUQZ", umax>;
5274  defm : avx512_min_max_lowering<"VPMINUQZ", umin>;
5275  defm : avx512_min_max_lowering<"VPMAXSQZ", smax>;
5276  defm : avx512_min_max_lowering<"VPMINSQZ", smin>;
5277}
5278
5279//===----------------------------------------------------------------------===//
5280// AVX-512  Logical Instructions
5281//===----------------------------------------------------------------------===//
5282
5283defm VPAND : avx512_binop_rm_vl_dq<0xDB, 0xDB, "vpand", and,
5284                                   SchedWriteVecLogic, HasAVX512, 1>;
5285defm VPOR : avx512_binop_rm_vl_dq<0xEB, 0xEB, "vpor", or,
5286                                  SchedWriteVecLogic, HasAVX512, 1>;
5287defm VPXOR : avx512_binop_rm_vl_dq<0xEF, 0xEF, "vpxor", xor,
5288                                   SchedWriteVecLogic, HasAVX512, 1>;
5289defm VPANDN : avx512_binop_rm_vl_dq<0xDF, 0xDF, "vpandn", X86andnp,
5290                                    SchedWriteVecLogic, HasAVX512>;
5291
5292let Predicates = [HasVLX] in {
5293  def : Pat<(v16i8 (and VR128X:$src1, VR128X:$src2)),
5294            (VPANDQZ128rr VR128X:$src1, VR128X:$src2)>;
5295  def : Pat<(v8i16 (and VR128X:$src1, VR128X:$src2)),
5296            (VPANDQZ128rr VR128X:$src1, VR128X:$src2)>;
5297
5298  def : Pat<(v16i8 (or VR128X:$src1, VR128X:$src2)),
5299            (VPORQZ128rr VR128X:$src1, VR128X:$src2)>;
5300  def : Pat<(v8i16 (or VR128X:$src1, VR128X:$src2)),
5301            (VPORQZ128rr VR128X:$src1, VR128X:$src2)>;
5302
5303  def : Pat<(v16i8 (xor VR128X:$src1, VR128X:$src2)),
5304            (VPXORQZ128rr VR128X:$src1, VR128X:$src2)>;
5305  def : Pat<(v8i16 (xor VR128X:$src1, VR128X:$src2)),
5306            (VPXORQZ128rr VR128X:$src1, VR128X:$src2)>;
5307
5308  def : Pat<(v16i8 (X86andnp VR128X:$src1, VR128X:$src2)),
5309            (VPANDNQZ128rr VR128X:$src1, VR128X:$src2)>;
5310  def : Pat<(v8i16 (X86andnp VR128X:$src1, VR128X:$src2)),
5311            (VPANDNQZ128rr VR128X:$src1, VR128X:$src2)>;
5312
5313  def : Pat<(and VR128X:$src1, (loadv16i8 addr:$src2)),
5314            (VPANDQZ128rm VR128X:$src1, addr:$src2)>;
5315  def : Pat<(and VR128X:$src1, (loadv8i16 addr:$src2)),
5316            (VPANDQZ128rm VR128X:$src1, addr:$src2)>;
5317
5318  def : Pat<(or VR128X:$src1, (loadv16i8 addr:$src2)),
5319            (VPORQZ128rm VR128X:$src1, addr:$src2)>;
5320  def : Pat<(or VR128X:$src1, (loadv8i16 addr:$src2)),
5321            (VPORQZ128rm VR128X:$src1, addr:$src2)>;
5322
5323  def : Pat<(xor VR128X:$src1, (loadv16i8 addr:$src2)),
5324            (VPXORQZ128rm VR128X:$src1, addr:$src2)>;
5325  def : Pat<(xor VR128X:$src1, (loadv8i16 addr:$src2)),
5326            (VPXORQZ128rm VR128X:$src1, addr:$src2)>;
5327
5328  def : Pat<(X86andnp VR128X:$src1, (loadv16i8 addr:$src2)),
5329            (VPANDNQZ128rm VR128X:$src1, addr:$src2)>;
5330  def : Pat<(X86andnp VR128X:$src1, (loadv8i16 addr:$src2)),
5331            (VPANDNQZ128rm VR128X:$src1, addr:$src2)>;
5332
5333  def : Pat<(v32i8 (and VR256X:$src1, VR256X:$src2)),
5334            (VPANDQZ256rr VR256X:$src1, VR256X:$src2)>;
5335  def : Pat<(v16i16 (and VR256X:$src1, VR256X:$src2)),
5336            (VPANDQZ256rr VR256X:$src1, VR256X:$src2)>;
5337
5338  def : Pat<(v32i8 (or VR256X:$src1, VR256X:$src2)),
5339            (VPORQZ256rr VR256X:$src1, VR256X:$src2)>;
5340  def : Pat<(v16i16 (or VR256X:$src1, VR256X:$src2)),
5341            (VPORQZ256rr VR256X:$src1, VR256X:$src2)>;
5342
5343  def : Pat<(v32i8 (xor VR256X:$src1, VR256X:$src2)),
5344            (VPXORQZ256rr VR256X:$src1, VR256X:$src2)>;
5345  def : Pat<(v16i16 (xor VR256X:$src1, VR256X:$src2)),
5346            (VPXORQZ256rr VR256X:$src1, VR256X:$src2)>;
5347
5348  def : Pat<(v32i8 (X86andnp VR256X:$src1, VR256X:$src2)),
5349            (VPANDNQZ256rr VR256X:$src1, VR256X:$src2)>;
5350  def : Pat<(v16i16 (X86andnp VR256X:$src1, VR256X:$src2)),
5351            (VPANDNQZ256rr VR256X:$src1, VR256X:$src2)>;
5352
5353  def : Pat<(and VR256X:$src1, (loadv32i8 addr:$src2)),
5354            (VPANDQZ256rm VR256X:$src1, addr:$src2)>;
5355  def : Pat<(and VR256X:$src1, (loadv16i16 addr:$src2)),
5356            (VPANDQZ256rm VR256X:$src1, addr:$src2)>;
5357
5358  def : Pat<(or VR256X:$src1, (loadv32i8 addr:$src2)),
5359            (VPORQZ256rm VR256X:$src1, addr:$src2)>;
5360  def : Pat<(or VR256X:$src1, (loadv16i16 addr:$src2)),
5361            (VPORQZ256rm VR256X:$src1, addr:$src2)>;
5362
5363  def : Pat<(xor VR256X:$src1, (loadv32i8 addr:$src2)),
5364            (VPXORQZ256rm VR256X:$src1, addr:$src2)>;
5365  def : Pat<(xor VR256X:$src1, (loadv16i16 addr:$src2)),
5366            (VPXORQZ256rm VR256X:$src1, addr:$src2)>;
5367
5368  def : Pat<(X86andnp VR256X:$src1, (loadv32i8 addr:$src2)),
5369            (VPANDNQZ256rm VR256X:$src1, addr:$src2)>;
5370  def : Pat<(X86andnp VR256X:$src1, (loadv16i16 addr:$src2)),
5371            (VPANDNQZ256rm VR256X:$src1, addr:$src2)>;
5372}
5373
5374let Predicates = [HasAVX512] in {
5375  def : Pat<(v64i8 (and VR512:$src1, VR512:$src2)),
5376            (VPANDQZrr VR512:$src1, VR512:$src2)>;
5377  def : Pat<(v32i16 (and VR512:$src1, VR512:$src2)),
5378            (VPANDQZrr VR512:$src1, VR512:$src2)>;
5379
5380  def : Pat<(v64i8 (or VR512:$src1, VR512:$src2)),
5381            (VPORQZrr VR512:$src1, VR512:$src2)>;
5382  def : Pat<(v32i16 (or VR512:$src1, VR512:$src2)),
5383            (VPORQZrr VR512:$src1, VR512:$src2)>;
5384
5385  def : Pat<(v64i8 (xor VR512:$src1, VR512:$src2)),
5386            (VPXORQZrr VR512:$src1, VR512:$src2)>;
5387  def : Pat<(v32i16 (xor VR512:$src1, VR512:$src2)),
5388            (VPXORQZrr VR512:$src1, VR512:$src2)>;
5389
5390  def : Pat<(v64i8 (X86andnp VR512:$src1, VR512:$src2)),
5391            (VPANDNQZrr VR512:$src1, VR512:$src2)>;
5392  def : Pat<(v32i16 (X86andnp VR512:$src1, VR512:$src2)),
5393            (VPANDNQZrr VR512:$src1, VR512:$src2)>;
5394
5395  def : Pat<(and VR512:$src1, (loadv64i8 addr:$src2)),
5396            (VPANDQZrm VR512:$src1, addr:$src2)>;
5397  def : Pat<(and VR512:$src1, (loadv32i16 addr:$src2)),
5398            (VPANDQZrm VR512:$src1, addr:$src2)>;
5399
5400  def : Pat<(or VR512:$src1, (loadv64i8 addr:$src2)),
5401            (VPORQZrm VR512:$src1, addr:$src2)>;
5402  def : Pat<(or VR512:$src1, (loadv32i16 addr:$src2)),
5403            (VPORQZrm VR512:$src1, addr:$src2)>;
5404
5405  def : Pat<(xor VR512:$src1, (loadv64i8 addr:$src2)),
5406            (VPXORQZrm VR512:$src1, addr:$src2)>;
5407  def : Pat<(xor VR512:$src1, (loadv32i16 addr:$src2)),
5408            (VPXORQZrm VR512:$src1, addr:$src2)>;
5409
5410  def : Pat<(X86andnp VR512:$src1, (loadv64i8 addr:$src2)),
5411            (VPANDNQZrm VR512:$src1, addr:$src2)>;
5412  def : Pat<(X86andnp VR512:$src1, (loadv32i16 addr:$src2)),
5413            (VPANDNQZrm VR512:$src1, addr:$src2)>;
5414}
5415
5416// Patterns to catch vselect with different type than logic op.
5417multiclass avx512_logical_lowering<string InstrStr, SDNode OpNode,
5418                                    X86VectorVTInfo _,
5419                                    X86VectorVTInfo IntInfo> {
5420  // Masked register-register logical operations.
5421  def : Pat<(_.VT (vselect_mask _.KRCWM:$mask,
5422                   (bitconvert (IntInfo.VT (OpNode _.RC:$src1, _.RC:$src2))),
5423                   _.RC:$src0)),
5424            (!cast<Instruction>(InstrStr#rrk) _.RC:$src0, _.KRCWM:$mask,
5425             _.RC:$src1, _.RC:$src2)>;
5426
5427  def : Pat<(_.VT (vselect_mask _.KRCWM:$mask,
5428                   (bitconvert (IntInfo.VT (OpNode _.RC:$src1, _.RC:$src2))),
5429                   _.ImmAllZerosV)),
5430            (!cast<Instruction>(InstrStr#rrkz) _.KRCWM:$mask, _.RC:$src1,
5431             _.RC:$src2)>;
5432
5433  // Masked register-memory logical operations.
5434  def : Pat<(_.VT (vselect_mask _.KRCWM:$mask,
5435                   (bitconvert (IntInfo.VT (OpNode _.RC:$src1,
5436                                            (load addr:$src2)))),
5437                   _.RC:$src0)),
5438            (!cast<Instruction>(InstrStr#rmk) _.RC:$src0, _.KRCWM:$mask,
5439             _.RC:$src1, addr:$src2)>;
5440  def : Pat<(_.VT (vselect_mask _.KRCWM:$mask,
5441                   (bitconvert (IntInfo.VT (OpNode _.RC:$src1,
5442                                            (load addr:$src2)))),
5443                   _.ImmAllZerosV)),
5444            (!cast<Instruction>(InstrStr#rmkz) _.KRCWM:$mask, _.RC:$src1,
5445             addr:$src2)>;
5446}
5447
5448multiclass avx512_logical_lowering_bcast<string InstrStr, SDNode OpNode,
5449                                         X86VectorVTInfo _,
5450                                         X86VectorVTInfo IntInfo> {
5451  // Register-broadcast logical operations.
5452  def : Pat<(_.VT (vselect_mask _.KRCWM:$mask,
5453                   (bitconvert
5454                    (IntInfo.VT (OpNode _.RC:$src1,
5455                                 (IntInfo.VT (IntInfo.BroadcastLdFrag addr:$src2))))),
5456                   _.RC:$src0)),
5457            (!cast<Instruction>(InstrStr#rmbk) _.RC:$src0, _.KRCWM:$mask,
5458             _.RC:$src1, addr:$src2)>;
5459  def : Pat<(_.VT (vselect_mask _.KRCWM:$mask,
5460                   (bitconvert
5461                    (IntInfo.VT (OpNode _.RC:$src1,
5462                                 (IntInfo.VT (IntInfo.BroadcastLdFrag addr:$src2))))),
5463                   _.ImmAllZerosV)),
5464            (!cast<Instruction>(InstrStr#rmbkz)  _.KRCWM:$mask,
5465             _.RC:$src1, addr:$src2)>;
5466}
5467
5468multiclass avx512_logical_lowering_sizes<string InstrStr, SDNode OpNode,
5469                                         AVX512VLVectorVTInfo SelectInfo,
5470                                         AVX512VLVectorVTInfo IntInfo> {
5471let Predicates = [HasVLX] in {
5472  defm : avx512_logical_lowering<InstrStr#"Z128", OpNode, SelectInfo.info128,
5473                                 IntInfo.info128>;
5474  defm : avx512_logical_lowering<InstrStr#"Z256", OpNode, SelectInfo.info256,
5475                                 IntInfo.info256>;
5476}
5477let Predicates = [HasAVX512] in {
5478  defm : avx512_logical_lowering<InstrStr#"Z", OpNode, SelectInfo.info512,
5479                                 IntInfo.info512>;
5480}
5481}
5482
5483multiclass avx512_logical_lowering_sizes_bcast<string InstrStr, SDNode OpNode,
5484                                               AVX512VLVectorVTInfo SelectInfo,
5485                                               AVX512VLVectorVTInfo IntInfo> {
5486let Predicates = [HasVLX] in {
5487  defm : avx512_logical_lowering_bcast<InstrStr#"Z128", OpNode,
5488                                       SelectInfo.info128, IntInfo.info128>;
5489  defm : avx512_logical_lowering_bcast<InstrStr#"Z256", OpNode,
5490                                       SelectInfo.info256, IntInfo.info256>;
5491}
5492let Predicates = [HasAVX512] in {
5493  defm : avx512_logical_lowering_bcast<InstrStr#"Z", OpNode,
5494                                       SelectInfo.info512, IntInfo.info512>;
5495}
5496}
5497
5498multiclass avx512_logical_lowering_types<string InstrStr, SDNode OpNode> {
5499  // i64 vselect with i32/i16/i8 logic op
5500  defm : avx512_logical_lowering_sizes<InstrStr#"Q", OpNode, avx512vl_i64_info,
5501                                       avx512vl_i32_info>;
5502  defm : avx512_logical_lowering_sizes<InstrStr#"Q", OpNode, avx512vl_i64_info,
5503                                       avx512vl_i16_info>;
5504  defm : avx512_logical_lowering_sizes<InstrStr#"Q", OpNode, avx512vl_i64_info,
5505                                       avx512vl_i8_info>;
5506
5507  // i32 vselect with i64/i16/i8 logic op
5508  defm : avx512_logical_lowering_sizes<InstrStr#"D", OpNode, avx512vl_i32_info,
5509                                       avx512vl_i64_info>;
5510  defm : avx512_logical_lowering_sizes<InstrStr#"D", OpNode, avx512vl_i32_info,
5511                                       avx512vl_i16_info>;
5512  defm : avx512_logical_lowering_sizes<InstrStr#"D", OpNode, avx512vl_i32_info,
5513                                       avx512vl_i8_info>;
5514
5515  // f32 vselect with i64/i32/i16/i8 logic op
5516  defm : avx512_logical_lowering_sizes<InstrStr#"D", OpNode, avx512vl_f32_info,
5517                                       avx512vl_i64_info>;
5518  defm : avx512_logical_lowering_sizes<InstrStr#"D", OpNode, avx512vl_f32_info,
5519                                       avx512vl_i32_info>;
5520  defm : avx512_logical_lowering_sizes<InstrStr#"D", OpNode, avx512vl_f32_info,
5521                                       avx512vl_i16_info>;
5522  defm : avx512_logical_lowering_sizes<InstrStr#"D", OpNode, avx512vl_f32_info,
5523                                       avx512vl_i8_info>;
5524
5525  // f64 vselect with i64/i32/i16/i8 logic op
5526  defm : avx512_logical_lowering_sizes<InstrStr#"Q", OpNode, avx512vl_f64_info,
5527                                       avx512vl_i64_info>;
5528  defm : avx512_logical_lowering_sizes<InstrStr#"Q", OpNode, avx512vl_f64_info,
5529                                       avx512vl_i32_info>;
5530  defm : avx512_logical_lowering_sizes<InstrStr#"Q", OpNode, avx512vl_f64_info,
5531                                       avx512vl_i16_info>;
5532  defm : avx512_logical_lowering_sizes<InstrStr#"Q", OpNode, avx512vl_f64_info,
5533                                       avx512vl_i8_info>;
5534
5535  defm : avx512_logical_lowering_sizes_bcast<InstrStr#"D", OpNode,
5536                                             avx512vl_f32_info,
5537                                             avx512vl_i32_info>;
5538  defm : avx512_logical_lowering_sizes_bcast<InstrStr#"Q", OpNode,
5539                                             avx512vl_f64_info,
5540                                             avx512vl_i64_info>;
5541}
5542
5543defm : avx512_logical_lowering_types<"VPAND", and>;
5544defm : avx512_logical_lowering_types<"VPOR",  or>;
5545defm : avx512_logical_lowering_types<"VPXOR", xor>;
5546defm : avx512_logical_lowering_types<"VPANDN", X86andnp>;
5547
5548//===----------------------------------------------------------------------===//
5549// AVX-512  FP arithmetic
5550//===----------------------------------------------------------------------===//
5551
5552multiclass avx512_fp_scalar<bits<8> opc, string OpcodeStr,X86VectorVTInfo _,
5553                            SDPatternOperator OpNode, SDNode VecNode,
5554                            X86FoldableSchedWrite sched, bit IsCommutable> {
5555  let ExeDomain = _.ExeDomain, Uses = [MXCSR], mayRaiseFPException = 1 in {
5556  defm rr_Int : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
5557                           (ins _.RC:$src1, _.RC:$src2), OpcodeStr,
5558                           "$src2, $src1", "$src1, $src2",
5559                           (_.VT (VecNode _.RC:$src1, _.RC:$src2))>,
5560                           Sched<[sched]>;
5561
5562  defm rm_Int : AVX512_maskable_scalar<opc, MRMSrcMem, _, (outs _.RC:$dst),
5563                         (ins _.RC:$src1, _.IntScalarMemOp:$src2), OpcodeStr,
5564                         "$src2, $src1", "$src1, $src2",
5565                         (_.VT (VecNode _.RC:$src1,
5566                                        (_.ScalarIntMemFrags addr:$src2)))>,
5567                         Sched<[sched.Folded, sched.ReadAfterFold]>;
5568  let isCodeGenOnly = 1, Predicates = [HasAVX512] in {
5569  def rr : I< opc, MRMSrcReg, (outs _.FRC:$dst),
5570                         (ins _.FRC:$src1, _.FRC:$src2),
5571                          OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}",
5572                          [(set _.FRC:$dst, (OpNode _.FRC:$src1, _.FRC:$src2))]>,
5573                          Sched<[sched]> {
5574    let isCommutable = IsCommutable;
5575  }
5576  def rm : I< opc, MRMSrcMem, (outs _.FRC:$dst),
5577                         (ins _.FRC:$src1, _.ScalarMemOp:$src2),
5578                         OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}",
5579                         [(set _.FRC:$dst, (OpNode _.FRC:$src1,
5580                         (_.ScalarLdFrag addr:$src2)))]>,
5581                         Sched<[sched.Folded, sched.ReadAfterFold]>;
5582  }
5583  }
5584}
5585
5586multiclass avx512_fp_scalar_round<bits<8> opc, string OpcodeStr,X86VectorVTInfo _,
5587                                  SDNode VecNode, X86FoldableSchedWrite sched> {
5588  let ExeDomain = _.ExeDomain, Uses = [MXCSR] in
5589  defm rrb_Int : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
5590                          (ins _.RC:$src1, _.RC:$src2, AVX512RC:$rc), OpcodeStr,
5591                          "$rc, $src2, $src1", "$src1, $src2, $rc",
5592                          (VecNode (_.VT _.RC:$src1), (_.VT _.RC:$src2),
5593                          (i32 timm:$rc))>,
5594                          EVEX_B, EVEX_RC, Sched<[sched]>;
5595}
5596multiclass avx512_fp_scalar_sae<bits<8> opc, string OpcodeStr,X86VectorVTInfo _,
5597                                SDNode OpNode, SDNode VecNode, SDNode SaeNode,
5598                                X86FoldableSchedWrite sched, bit IsCommutable,
5599                                string EVEX2VexOvrd> {
5600  let ExeDomain = _.ExeDomain in {
5601  defm rr_Int : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
5602                           (ins _.RC:$src1, _.RC:$src2), OpcodeStr,
5603                           "$src2, $src1", "$src1, $src2",
5604                           (_.VT (VecNode _.RC:$src1, _.RC:$src2))>,
5605                           Sched<[sched]>, SIMD_EXC;
5606
5607  defm rm_Int : AVX512_maskable_scalar<opc, MRMSrcMem, _, (outs _.RC:$dst),
5608                         (ins _.RC:$src1, _.IntScalarMemOp:$src2), OpcodeStr,
5609                         "$src2, $src1", "$src1, $src2",
5610                         (_.VT (VecNode _.RC:$src1,
5611                                        (_.ScalarIntMemFrags addr:$src2)))>,
5612                         Sched<[sched.Folded, sched.ReadAfterFold]>, SIMD_EXC;
5613
5614  let isCodeGenOnly = 1, Predicates = [HasAVX512],
5615      Uses = [MXCSR], mayRaiseFPException = 1 in {
5616  def rr : I< opc, MRMSrcReg, (outs _.FRC:$dst),
5617                         (ins _.FRC:$src1, _.FRC:$src2),
5618                          OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}",
5619                          [(set _.FRC:$dst, (OpNode _.FRC:$src1, _.FRC:$src2))]>,
5620                          Sched<[sched]>,
5621                          EVEX2VEXOverride<EVEX2VexOvrd#"rr"> {
5622    let isCommutable = IsCommutable;
5623  }
5624  def rm : I< opc, MRMSrcMem, (outs _.FRC:$dst),
5625                         (ins _.FRC:$src1, _.ScalarMemOp:$src2),
5626                         OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}",
5627                         [(set _.FRC:$dst, (OpNode _.FRC:$src1,
5628                         (_.ScalarLdFrag addr:$src2)))]>,
5629                         Sched<[sched.Folded, sched.ReadAfterFold]>,
5630                         EVEX2VEXOverride<EVEX2VexOvrd#"rm">;
5631  }
5632
5633  let Uses = [MXCSR] in
5634  defm rrb_Int : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
5635                            (ins _.RC:$src1, _.RC:$src2), OpcodeStr,
5636                            "{sae}, $src2, $src1", "$src1, $src2, {sae}",
5637                            (SaeNode (_.VT _.RC:$src1), (_.VT _.RC:$src2))>,
5638                            EVEX_B, Sched<[sched]>;
5639  }
5640}
5641
5642multiclass avx512_binop_s_round<bits<8> opc, string OpcodeStr, SDPatternOperator OpNode,
5643                                SDNode VecNode, SDNode RndNode,
5644                                X86SchedWriteSizes sched, bit IsCommutable> {
5645  defm SSZ : avx512_fp_scalar<opc, OpcodeStr#"ss", f32x_info, OpNode, VecNode,
5646                              sched.PS.Scl, IsCommutable>,
5647             avx512_fp_scalar_round<opc, OpcodeStr#"ss", f32x_info, RndNode,
5648                              sched.PS.Scl>,
5649                              XS, EVEX_4V, VEX_LIG,  EVEX_CD8<32, CD8VT1>;
5650  defm SDZ : avx512_fp_scalar<opc, OpcodeStr#"sd", f64x_info, OpNode, VecNode,
5651                              sched.PD.Scl, IsCommutable>,
5652             avx512_fp_scalar_round<opc, OpcodeStr#"sd", f64x_info, RndNode,
5653                              sched.PD.Scl>,
5654                              XD, VEX_W, EVEX_4V, VEX_LIG, EVEX_CD8<64, CD8VT1>;
5655  let Predicates = [HasFP16] in
5656    defm SHZ : avx512_fp_scalar<opc, OpcodeStr#"sh", f16x_info, OpNode,
5657                                VecNode, sched.PH.Scl, IsCommutable>,
5658               avx512_fp_scalar_round<opc, OpcodeStr#"sh", f16x_info, RndNode,
5659                                sched.PH.Scl>,
5660                                T_MAP5XS, EVEX_4V, VEX_LIG, EVEX_CD8<16, CD8VT1>;
5661}
5662
5663multiclass avx512_binop_s_sae<bits<8> opc, string OpcodeStr, SDNode OpNode,
5664                              SDNode VecNode, SDNode SaeNode,
5665                              X86SchedWriteSizes sched, bit IsCommutable> {
5666  defm SSZ : avx512_fp_scalar_sae<opc, OpcodeStr#"ss", f32x_info, OpNode,
5667                              VecNode, SaeNode, sched.PS.Scl, IsCommutable,
5668                              NAME#"SS">,
5669                              XS, EVEX_4V, VEX_LIG,  EVEX_CD8<32, CD8VT1>;
5670  defm SDZ : avx512_fp_scalar_sae<opc, OpcodeStr#"sd", f64x_info, OpNode,
5671                              VecNode, SaeNode, sched.PD.Scl, IsCommutable,
5672                              NAME#"SD">,
5673                              XD, VEX_W, EVEX_4V, VEX_LIG, EVEX_CD8<64, CD8VT1>;
5674  let Predicates = [HasFP16] in {
5675    defm SHZ : avx512_fp_scalar_sae<opc, OpcodeStr#"sh", f16x_info, OpNode,
5676                                VecNode, SaeNode, sched.PH.Scl, IsCommutable,
5677                                NAME#"SH">,
5678                                T_MAP5XS, EVEX_4V, VEX_LIG, EVEX_CD8<16, CD8VT1>,
5679                                NotEVEX2VEXConvertible;
5680  }
5681}
5682defm VADD : avx512_binop_s_round<0x58, "vadd", any_fadd, X86fadds, X86faddRnds,
5683                                 SchedWriteFAddSizes, 1>;
5684defm VMUL : avx512_binop_s_round<0x59, "vmul", any_fmul, X86fmuls, X86fmulRnds,
5685                                 SchedWriteFMulSizes, 1>;
5686defm VSUB : avx512_binop_s_round<0x5C, "vsub", any_fsub, X86fsubs, X86fsubRnds,
5687                                 SchedWriteFAddSizes, 0>;
5688defm VDIV : avx512_binop_s_round<0x5E, "vdiv", any_fdiv, X86fdivs, X86fdivRnds,
5689                                 SchedWriteFDivSizes, 0>;
5690defm VMIN : avx512_binop_s_sae<0x5D, "vmin", X86fmin, X86fmins, X86fminSAEs,
5691                               SchedWriteFCmpSizes, 0>;
5692defm VMAX : avx512_binop_s_sae<0x5F, "vmax", X86fmax, X86fmaxs, X86fmaxSAEs,
5693                               SchedWriteFCmpSizes, 0>;
5694
5695// MIN/MAX nodes are commutable under "unsafe-fp-math". In this case we use
5696// X86fminc and X86fmaxc instead of X86fmin and X86fmax
5697multiclass avx512_comutable_binop_s<bits<8> opc, string OpcodeStr,
5698                                    X86VectorVTInfo _, SDNode OpNode,
5699                                    X86FoldableSchedWrite sched,
5700                                    string EVEX2VEXOvrd> {
5701  let isCodeGenOnly = 1, Predicates = [HasAVX512], ExeDomain = _.ExeDomain in {
5702  def rr : I< opc, MRMSrcReg, (outs _.FRC:$dst),
5703                         (ins _.FRC:$src1, _.FRC:$src2),
5704                          OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}",
5705                          [(set _.FRC:$dst, (OpNode _.FRC:$src1, _.FRC:$src2))]>,
5706                          Sched<[sched]>, EVEX2VEXOverride<EVEX2VEXOvrd#"rr"> {
5707    let isCommutable = 1;
5708  }
5709  def rm : I< opc, MRMSrcMem, (outs _.FRC:$dst),
5710                         (ins _.FRC:$src1, _.ScalarMemOp:$src2),
5711                         OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}",
5712                         [(set _.FRC:$dst, (OpNode _.FRC:$src1,
5713                         (_.ScalarLdFrag addr:$src2)))]>,
5714                         Sched<[sched.Folded, sched.ReadAfterFold]>,
5715                         EVEX2VEXOverride<EVEX2VEXOvrd#"rm">;
5716  }
5717}
5718defm VMINCSSZ : avx512_comutable_binop_s<0x5D, "vminss", f32x_info, X86fminc,
5719                                         SchedWriteFCmp.Scl, "VMINCSS">, XS,
5720                                         EVEX_4V, VEX_LIG, EVEX_CD8<32, CD8VT1>, SIMD_EXC;
5721
5722defm VMINCSDZ : avx512_comutable_binop_s<0x5D, "vminsd", f64x_info, X86fminc,
5723                                         SchedWriteFCmp.Scl, "VMINCSD">, XD,
5724                                         VEX_W, EVEX_4V, VEX_LIG,
5725                                         EVEX_CD8<64, CD8VT1>, SIMD_EXC;
5726
5727defm VMAXCSSZ : avx512_comutable_binop_s<0x5F, "vmaxss", f32x_info, X86fmaxc,
5728                                         SchedWriteFCmp.Scl, "VMAXCSS">, XS,
5729                                         EVEX_4V, VEX_LIG, EVEX_CD8<32, CD8VT1>, SIMD_EXC;
5730
5731defm VMAXCSDZ : avx512_comutable_binop_s<0x5F, "vmaxsd", f64x_info, X86fmaxc,
5732                                         SchedWriteFCmp.Scl, "VMAXCSD">, XD,
5733                                         VEX_W, EVEX_4V, VEX_LIG,
5734                                         EVEX_CD8<64, CD8VT1>, SIMD_EXC;
5735
5736defm VMINCSHZ : avx512_comutable_binop_s<0x5D, "vminsh", f16x_info, X86fminc,
5737                                         SchedWriteFCmp.Scl, "VMINCSH">, T_MAP5XS,
5738                                         EVEX_4V, VEX_LIG, EVEX_CD8<16, CD8VT1>, SIMD_EXC,
5739                                         NotEVEX2VEXConvertible;
5740defm VMAXCSHZ : avx512_comutable_binop_s<0x5F, "vmaxsh", f16x_info, X86fmaxc,
5741                                         SchedWriteFCmp.Scl, "VMAXCSH">, T_MAP5XS,
5742                                         EVEX_4V, VEX_LIG, EVEX_CD8<16, CD8VT1>, SIMD_EXC,
5743                                         NotEVEX2VEXConvertible;
5744
5745multiclass avx512_fp_packed<bits<8> opc, string OpcodeStr, SDPatternOperator OpNode,
5746                            SDPatternOperator MaskOpNode,
5747                            X86VectorVTInfo _, X86FoldableSchedWrite sched,
5748                            bit IsCommutable,
5749                            bit IsKCommutable = IsCommutable,
5750                            string suffix = _.Suffix,
5751                            string ClobberConstraint = "",
5752                            bit MayRaiseFPException = 1> {
5753  let ExeDomain = _.ExeDomain, hasSideEffects = 0,
5754      Uses = [MXCSR], mayRaiseFPException = MayRaiseFPException in {
5755  defm rr: AVX512_maskable_split<opc, MRMSrcReg, _, (outs _.RC:$dst),
5756                                 (ins _.RC:$src1, _.RC:$src2), OpcodeStr#suffix,
5757                                 "$src2, $src1", "$src1, $src2",
5758                                 (_.VT (OpNode _.RC:$src1, _.RC:$src2)),
5759                                 (_.VT (MaskOpNode _.RC:$src1, _.RC:$src2)), ClobberConstraint,
5760                                 IsCommutable, IsKCommutable, IsKCommutable>, EVEX_4V, Sched<[sched]>;
5761  let mayLoad = 1 in {
5762    defm rm: AVX512_maskable_split<opc, MRMSrcMem, _, (outs _.RC:$dst),
5763                                   (ins _.RC:$src1, _.MemOp:$src2), OpcodeStr#suffix,
5764                                   "$src2, $src1", "$src1, $src2",
5765                                   (OpNode _.RC:$src1, (_.LdFrag addr:$src2)),
5766                                   (MaskOpNode _.RC:$src1, (_.LdFrag addr:$src2)),
5767                                   ClobberConstraint>, EVEX_4V, Sched<[sched.Folded, sched.ReadAfterFold]>;
5768    defm rmb: AVX512_maskable_split<opc, MRMSrcMem, _, (outs _.RC:$dst),
5769                                    (ins _.RC:$src1, _.ScalarMemOp:$src2), OpcodeStr#suffix,
5770                                    "${src2}"#_.BroadcastStr#", $src1",
5771                                    "$src1, ${src2}"#_.BroadcastStr,
5772                                    (OpNode  _.RC:$src1, (_.VT (_.BroadcastLdFrag addr:$src2))),
5773                                    (MaskOpNode  _.RC:$src1, (_.VT (_.BroadcastLdFrag addr:$src2))),
5774                                    ClobberConstraint>, EVEX_4V, EVEX_B, Sched<[sched.Folded, sched.ReadAfterFold]>;
5775    }
5776  }
5777}
5778
5779multiclass avx512_fp_round_packed<bits<8> opc, string OpcodeStr,
5780                                  SDPatternOperator OpNodeRnd,
5781                                  X86FoldableSchedWrite sched, X86VectorVTInfo _,
5782                                  string suffix = _.Suffix,
5783                                  string ClobberConstraint = ""> {
5784  let ExeDomain = _.ExeDomain, Uses = [MXCSR] in
5785  defm rrb: AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
5786                  (ins _.RC:$src1, _.RC:$src2, AVX512RC:$rc), OpcodeStr#suffix,
5787                  "$rc, $src2, $src1", "$src1, $src2, $rc",
5788                  (_.VT (OpNodeRnd _.RC:$src1, _.RC:$src2, (i32 timm:$rc))),
5789                  0, 0, 0, vselect_mask, ClobberConstraint>,
5790                  EVEX_4V, EVEX_B, EVEX_RC, Sched<[sched]>;
5791}
5792
5793multiclass avx512_fp_sae_packed<bits<8> opc, string OpcodeStr,
5794                                SDPatternOperator OpNodeSAE,
5795                                X86FoldableSchedWrite sched, X86VectorVTInfo _> {
5796  let ExeDomain = _.ExeDomain, Uses = [MXCSR] in
5797  defm rrb: AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
5798                  (ins _.RC:$src1, _.RC:$src2), OpcodeStr#_.Suffix,
5799                  "{sae}, $src2, $src1", "$src1, $src2, {sae}",
5800                  (_.VT (OpNodeSAE _.RC:$src1, _.RC:$src2))>,
5801                  EVEX_4V, EVEX_B, Sched<[sched]>;
5802}
5803
5804multiclass avx512_fp_binop_p<bits<8> opc, string OpcodeStr, SDPatternOperator OpNode,
5805                             SDPatternOperator MaskOpNode,
5806                             Predicate prd, X86SchedWriteSizes sched,
5807                             bit IsCommutable = 0,
5808                             bit IsPD128Commutable = IsCommutable> {
5809  let Predicates = [prd] in {
5810  defm PSZ : avx512_fp_packed<opc, OpcodeStr, OpNode, MaskOpNode, v16f32_info,
5811                              sched.PS.ZMM, IsCommutable>, EVEX_V512, PS,
5812                              EVEX_CD8<32, CD8VF>;
5813  defm PDZ : avx512_fp_packed<opc, OpcodeStr, OpNode, MaskOpNode, v8f64_info,
5814                              sched.PD.ZMM, IsCommutable>, EVEX_V512, PD, VEX_W,
5815                              EVEX_CD8<64, CD8VF>;
5816  }
5817
5818    // Define only if AVX512VL feature is present.
5819  let Predicates = [prd, HasVLX] in {
5820    defm PSZ128 : avx512_fp_packed<opc, OpcodeStr, OpNode, MaskOpNode, v4f32x_info,
5821                                   sched.PS.XMM, IsCommutable>, EVEX_V128, PS,
5822                                   EVEX_CD8<32, CD8VF>;
5823    defm PSZ256 : avx512_fp_packed<opc, OpcodeStr, OpNode, MaskOpNode, v8f32x_info,
5824                                   sched.PS.YMM, IsCommutable>, EVEX_V256, PS,
5825                                   EVEX_CD8<32, CD8VF>;
5826    defm PDZ128 : avx512_fp_packed<opc, OpcodeStr, OpNode, MaskOpNode, v2f64x_info,
5827                                   sched.PD.XMM, IsPD128Commutable,
5828                                   IsCommutable>, EVEX_V128, PD, VEX_W,
5829                                   EVEX_CD8<64, CD8VF>;
5830    defm PDZ256 : avx512_fp_packed<opc, OpcodeStr, OpNode, MaskOpNode, v4f64x_info,
5831                                   sched.PD.YMM, IsCommutable>, EVEX_V256, PD, VEX_W,
5832                                   EVEX_CD8<64, CD8VF>;
5833  }
5834}
5835
5836multiclass avx512_fp_binop_ph<bits<8> opc, string OpcodeStr, SDPatternOperator OpNode,
5837                              SDPatternOperator MaskOpNode,
5838                              X86SchedWriteSizes sched, bit IsCommutable = 0> {
5839  let Predicates = [HasFP16] in {
5840    defm PHZ : avx512_fp_packed<opc, OpcodeStr, OpNode, MaskOpNode, v32f16_info,
5841                                sched.PH.ZMM, IsCommutable>, EVEX_V512, T_MAP5PS,
5842                                EVEX_CD8<16, CD8VF>;
5843  }
5844  let Predicates = [HasVLX, HasFP16] in {
5845    defm PHZ128 : avx512_fp_packed<opc, OpcodeStr, OpNode, MaskOpNode, v8f16x_info,
5846                                   sched.PH.XMM, IsCommutable>, EVEX_V128, T_MAP5PS,
5847                                   EVEX_CD8<16, CD8VF>;
5848    defm PHZ256 : avx512_fp_packed<opc, OpcodeStr, OpNode, MaskOpNode, v16f16x_info,
5849                                   sched.PH.YMM, IsCommutable>, EVEX_V256, T_MAP5PS,
5850                                   EVEX_CD8<16, CD8VF>;
5851  }
5852}
5853
5854let Uses = [MXCSR] in
5855multiclass avx512_fp_binop_p_round<bits<8> opc, string OpcodeStr, SDNode OpNodeRnd,
5856                                   X86SchedWriteSizes sched> {
5857  let Predicates = [HasFP16] in {
5858    defm PHZ : avx512_fp_round_packed<opc, OpcodeStr, OpNodeRnd, sched.PH.ZMM,
5859                                      v32f16_info>,
5860                                      EVEX_V512, T_MAP5PS, EVEX_CD8<16, CD8VF>;
5861  }
5862  defm PSZ : avx512_fp_round_packed<opc, OpcodeStr, OpNodeRnd, sched.PS.ZMM,
5863                                    v16f32_info>,
5864                                    EVEX_V512, PS, EVEX_CD8<32, CD8VF>;
5865  defm PDZ : avx512_fp_round_packed<opc, OpcodeStr, OpNodeRnd, sched.PD.ZMM,
5866                                    v8f64_info>,
5867                                    EVEX_V512, PD, VEX_W,EVEX_CD8<64, CD8VF>;
5868}
5869
5870let Uses = [MXCSR] in
5871multiclass avx512_fp_binop_p_sae<bits<8> opc, string OpcodeStr, SDNode OpNodeRnd,
5872                                 X86SchedWriteSizes sched> {
5873  let Predicates = [HasFP16] in {
5874    defm PHZ : avx512_fp_sae_packed<opc, OpcodeStr, OpNodeRnd, sched.PH.ZMM,
5875                                    v32f16_info>,
5876                                    EVEX_V512, T_MAP5PS, EVEX_CD8<16, CD8VF>;
5877  }
5878  defm PSZ : avx512_fp_sae_packed<opc, OpcodeStr, OpNodeRnd, sched.PS.ZMM,
5879                                  v16f32_info>,
5880                                  EVEX_V512, PS, EVEX_CD8<32, CD8VF>;
5881  defm PDZ : avx512_fp_sae_packed<opc, OpcodeStr, OpNodeRnd, sched.PD.ZMM,
5882                                  v8f64_info>,
5883                                  EVEX_V512, PD, VEX_W,EVEX_CD8<64, CD8VF>;
5884}
5885
5886defm VADD : avx512_fp_binop_p<0x58, "vadd", any_fadd, fadd, HasAVX512,
5887                              SchedWriteFAddSizes, 1>,
5888            avx512_fp_binop_ph<0x58, "vadd", any_fadd, fadd, SchedWriteFAddSizes, 1>,
5889            avx512_fp_binop_p_round<0x58, "vadd", X86faddRnd, SchedWriteFAddSizes>;
5890defm VMUL : avx512_fp_binop_p<0x59, "vmul", any_fmul, fmul, HasAVX512,
5891                              SchedWriteFMulSizes, 1>,
5892            avx512_fp_binop_ph<0x59, "vmul", any_fmul, fmul, SchedWriteFMulSizes, 1>,
5893            avx512_fp_binop_p_round<0x59, "vmul", X86fmulRnd, SchedWriteFMulSizes>;
5894defm VSUB : avx512_fp_binop_p<0x5C, "vsub", any_fsub, fsub, HasAVX512,
5895                              SchedWriteFAddSizes>,
5896            avx512_fp_binop_ph<0x5C, "vsub", any_fsub, fsub, SchedWriteFAddSizes>,
5897            avx512_fp_binop_p_round<0x5C, "vsub", X86fsubRnd, SchedWriteFAddSizes>;
5898defm VDIV : avx512_fp_binop_p<0x5E, "vdiv", any_fdiv, fdiv, HasAVX512,
5899                              SchedWriteFDivSizes>,
5900            avx512_fp_binop_ph<0x5E, "vdiv", any_fdiv, fdiv, SchedWriteFDivSizes>,
5901            avx512_fp_binop_p_round<0x5E, "vdiv", X86fdivRnd, SchedWriteFDivSizes>;
5902defm VMIN : avx512_fp_binop_p<0x5D, "vmin", X86fmin, X86fmin, HasAVX512,
5903                              SchedWriteFCmpSizes, 0>,
5904            avx512_fp_binop_ph<0x5D, "vmin", X86fmin, X86fmin, SchedWriteFCmpSizes, 0>,
5905            avx512_fp_binop_p_sae<0x5D, "vmin", X86fminSAE, SchedWriteFCmpSizes>;
5906defm VMAX : avx512_fp_binop_p<0x5F, "vmax", X86fmax, X86fmax, HasAVX512,
5907                              SchedWriteFCmpSizes, 0>,
5908            avx512_fp_binop_ph<0x5F, "vmax", X86fmax, X86fmax, SchedWriteFCmpSizes, 0>,
5909            avx512_fp_binop_p_sae<0x5F, "vmax", X86fmaxSAE, SchedWriteFCmpSizes>;
5910let isCodeGenOnly = 1 in {
5911  defm VMINC : avx512_fp_binop_p<0x5D, "vmin", X86fminc, X86fminc, HasAVX512,
5912                                 SchedWriteFCmpSizes, 1>,
5913               avx512_fp_binop_ph<0x5D, "vmin", X86fminc, X86fminc,
5914                                 SchedWriteFCmpSizes, 1>;
5915  defm VMAXC : avx512_fp_binop_p<0x5F, "vmax", X86fmaxc, X86fmaxc, HasAVX512,
5916                                 SchedWriteFCmpSizes, 1>,
5917               avx512_fp_binop_ph<0x5F, "vmax", X86fmaxc, X86fmaxc,
5918                                 SchedWriteFCmpSizes, 1>;
5919}
5920let Uses = []<Register>, mayRaiseFPException = 0 in {
5921defm VAND  : avx512_fp_binop_p<0x54, "vand", null_frag, null_frag, HasDQI,
5922                               SchedWriteFLogicSizes, 1>;
5923defm VANDN : avx512_fp_binop_p<0x55, "vandn", null_frag, null_frag, HasDQI,
5924                               SchedWriteFLogicSizes, 0>;
5925defm VOR   : avx512_fp_binop_p<0x56, "vor", null_frag, null_frag, HasDQI,
5926                               SchedWriteFLogicSizes, 1>;
5927defm VXOR  : avx512_fp_binop_p<0x57, "vxor", null_frag, null_frag, HasDQI,
5928                               SchedWriteFLogicSizes, 1>;
5929}
5930
5931multiclass avx512_fp_scalef_p<bits<8> opc, string OpcodeStr, SDNode OpNode,
5932                              X86FoldableSchedWrite sched, X86VectorVTInfo _> {
5933  let ExeDomain = _.ExeDomain, Uses = [MXCSR], mayRaiseFPException = 1 in {
5934  defm rr: AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
5935                  (ins _.RC:$src1, _.RC:$src2), OpcodeStr#_.Suffix,
5936                  "$src2, $src1", "$src1, $src2",
5937                  (_.VT (OpNode _.RC:$src1, _.RC:$src2))>,
5938                  EVEX_4V, Sched<[sched]>;
5939  defm rm: AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
5940                  (ins _.RC:$src1, _.MemOp:$src2), OpcodeStr#_.Suffix,
5941                  "$src2, $src1", "$src1, $src2",
5942                  (OpNode _.RC:$src1, (_.LdFrag addr:$src2))>,
5943                  EVEX_4V, Sched<[sched.Folded, sched.ReadAfterFold]>;
5944  defm rmb: AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
5945                   (ins _.RC:$src1, _.ScalarMemOp:$src2), OpcodeStr#_.Suffix,
5946                   "${src2}"#_.BroadcastStr#", $src1",
5947                   "$src1, ${src2}"#_.BroadcastStr,
5948                   (OpNode  _.RC:$src1, (_.VT (_.BroadcastLdFrag addr:$src2)))>,
5949                   EVEX_4V, EVEX_B, Sched<[sched.Folded, sched.ReadAfterFold]>;
5950  }
5951}
5952
5953multiclass avx512_fp_scalef_scalar<bits<8> opc, string OpcodeStr, SDNode OpNode,
5954                                   X86FoldableSchedWrite sched, X86VectorVTInfo _> {
5955  let ExeDomain = _.ExeDomain, Uses = [MXCSR], mayRaiseFPException = 1 in {
5956  defm rr: AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
5957                  (ins _.RC:$src1, _.RC:$src2), OpcodeStr#_.Suffix,
5958                  "$src2, $src1", "$src1, $src2",
5959                  (_.VT (OpNode _.RC:$src1, _.RC:$src2))>,
5960                  Sched<[sched]>;
5961  defm rm: AVX512_maskable_scalar<opc, MRMSrcMem, _, (outs _.RC:$dst),
5962                  (ins _.RC:$src1, _.IntScalarMemOp:$src2), OpcodeStr#_.Suffix,
5963                  "$src2, $src1", "$src1, $src2",
5964                  (OpNode _.RC:$src1, (_.ScalarIntMemFrags addr:$src2))>,
5965                  Sched<[sched.Folded, sched.ReadAfterFold]>;
5966  }
5967}
5968
5969multiclass avx512_fp_scalef_all<bits<8> opc, bits<8> opcScaler, string OpcodeStr,
5970                                X86SchedWriteWidths sched> {
5971  let Predicates = [HasFP16] in {
5972    defm PHZ : avx512_fp_scalef_p<opc, OpcodeStr, X86scalef, sched.ZMM, v32f16_info>,
5973               avx512_fp_round_packed<opc, OpcodeStr, X86scalefRnd, sched.ZMM, v32f16_info>,
5974                                EVEX_V512, T_MAP6PD, EVEX_CD8<16, CD8VF>;
5975    defm SHZ : avx512_fp_scalef_scalar<opcScaler, OpcodeStr, X86scalefs, sched.Scl, f16x_info>,
5976               avx512_fp_scalar_round<opcScaler, OpcodeStr#"sh", f16x_info, X86scalefsRnd, sched.Scl>,
5977                             EVEX_4V, T_MAP6PD, EVEX_CD8<16, CD8VT1>;
5978  }
5979  defm PSZ : avx512_fp_scalef_p<opc, OpcodeStr, X86scalef, sched.ZMM, v16f32_info>,
5980             avx512_fp_round_packed<opc, OpcodeStr, X86scalefRnd, sched.ZMM, v16f32_info>,
5981                              EVEX_V512, EVEX_CD8<32, CD8VF>, T8PD;
5982  defm PDZ : avx512_fp_scalef_p<opc, OpcodeStr, X86scalef, sched.ZMM, v8f64_info>,
5983             avx512_fp_round_packed<opc, OpcodeStr, X86scalefRnd, sched.ZMM, v8f64_info>,
5984                              EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>, T8PD;
5985  defm SSZ : avx512_fp_scalef_scalar<opcScaler, OpcodeStr, X86scalefs, sched.Scl, f32x_info>,
5986             avx512_fp_scalar_round<opcScaler, OpcodeStr#"ss", f32x_info,
5987                                    X86scalefsRnd, sched.Scl>,
5988                                    EVEX_4V, VEX_LIG, EVEX_CD8<32, CD8VT1>, T8PD;
5989  defm SDZ : avx512_fp_scalef_scalar<opcScaler, OpcodeStr, X86scalefs, sched.Scl, f64x_info>,
5990             avx512_fp_scalar_round<opcScaler, OpcodeStr#"sd", f64x_info,
5991                                    X86scalefsRnd, sched.Scl>,
5992                                    EVEX_4V, VEX_LIG, EVEX_CD8<64, CD8VT1>, VEX_W, T8PD;
5993
5994  // Define only if AVX512VL feature is present.
5995  let Predicates = [HasVLX] in {
5996    defm PSZ128 : avx512_fp_scalef_p<opc, OpcodeStr, X86scalef, sched.XMM, v4f32x_info>,
5997                                   EVEX_V128, EVEX_CD8<32, CD8VF>, T8PD;
5998    defm PSZ256 : avx512_fp_scalef_p<opc, OpcodeStr, X86scalef, sched.YMM, v8f32x_info>,
5999                                   EVEX_V256, EVEX_CD8<32, CD8VF>, T8PD;
6000    defm PDZ128 : avx512_fp_scalef_p<opc, OpcodeStr, X86scalef, sched.XMM, v2f64x_info>,
6001                                   EVEX_V128, VEX_W, EVEX_CD8<64, CD8VF>, T8PD;
6002    defm PDZ256 : avx512_fp_scalef_p<opc, OpcodeStr, X86scalef, sched.YMM, v4f64x_info>,
6003                                   EVEX_V256, VEX_W, EVEX_CD8<64, CD8VF>, T8PD;
6004  }
6005
6006  let Predicates = [HasFP16, HasVLX] in {
6007    defm PHZ128 : avx512_fp_scalef_p<opc, OpcodeStr, X86scalef, sched.XMM, v8f16x_info>,
6008                                   EVEX_V128, EVEX_CD8<16, CD8VF>, T_MAP6PD;
6009    defm PHZ256 : avx512_fp_scalef_p<opc, OpcodeStr, X86scalef, sched.YMM, v16f16x_info>,
6010                                   EVEX_V256, EVEX_CD8<16, CD8VF>, T_MAP6PD;
6011  }
6012}
6013defm VSCALEF : avx512_fp_scalef_all<0x2C, 0x2D, "vscalef",
6014                                    SchedWriteFAdd>, NotEVEX2VEXConvertible;
6015
6016//===----------------------------------------------------------------------===//
6017// AVX-512  VPTESTM instructions
6018//===----------------------------------------------------------------------===//
6019
6020multiclass avx512_vptest<bits<8> opc, string OpcodeStr,
6021                         X86FoldableSchedWrite sched, X86VectorVTInfo _> {
6022  // NOTE: Patterns are omitted in favor of manual selection in X86ISelDAGToDAG.
6023  // There are just too many permutations due to commutability and bitcasts.
6024  let ExeDomain = _.ExeDomain, hasSideEffects = 0 in {
6025  defm rr : AVX512_maskable_cmp<opc, MRMSrcReg, _, (outs _.KRC:$dst),
6026                   (ins _.RC:$src1, _.RC:$src2), OpcodeStr,
6027                      "$src2, $src1", "$src1, $src2",
6028                   (null_frag), (null_frag), 1>,
6029                   EVEX_4V, Sched<[sched]>;
6030  let mayLoad = 1 in
6031  defm rm : AVX512_maskable_cmp<opc, MRMSrcMem, _, (outs _.KRC:$dst),
6032                   (ins _.RC:$src1, _.MemOp:$src2), OpcodeStr,
6033                       "$src2, $src1", "$src1, $src2",
6034                   (null_frag), (null_frag)>,
6035                   EVEX_4V, EVEX_CD8<_.EltSize, CD8VF>,
6036                   Sched<[sched.Folded, sched.ReadAfterFold]>;
6037  }
6038}
6039
6040multiclass avx512_vptest_mb<bits<8> opc, string OpcodeStr,
6041                            X86FoldableSchedWrite sched, X86VectorVTInfo _> {
6042  let ExeDomain = _.ExeDomain, mayLoad = 1, hasSideEffects = 0 in
6043  defm rmb : AVX512_maskable_cmp<opc, MRMSrcMem, _, (outs _.KRC:$dst),
6044                    (ins _.RC:$src1, _.ScalarMemOp:$src2), OpcodeStr,
6045                    "${src2}"#_.BroadcastStr#", $src1",
6046                    "$src1, ${src2}"#_.BroadcastStr,
6047                    (null_frag), (null_frag)>,
6048                    EVEX_B, EVEX_4V, EVEX_CD8<_.EltSize, CD8VF>,
6049                    Sched<[sched.Folded, sched.ReadAfterFold]>;
6050}
6051
6052multiclass avx512_vptest_dq_sizes<bits<8> opc, string OpcodeStr,
6053                                  X86SchedWriteWidths sched,
6054                                  AVX512VLVectorVTInfo _> {
6055  let Predicates  = [HasAVX512] in
6056  defm Z : avx512_vptest<opc, OpcodeStr, sched.ZMM, _.info512>,
6057           avx512_vptest_mb<opc, OpcodeStr, sched.ZMM, _.info512>, EVEX_V512;
6058
6059  let Predicates = [HasAVX512, HasVLX] in {
6060  defm Z256 : avx512_vptest<opc, OpcodeStr, sched.YMM, _.info256>,
6061              avx512_vptest_mb<opc, OpcodeStr, sched.YMM, _.info256>, EVEX_V256;
6062  defm Z128 : avx512_vptest<opc, OpcodeStr, sched.XMM, _.info128>,
6063              avx512_vptest_mb<opc, OpcodeStr, sched.XMM, _.info128>, EVEX_V128;
6064  }
6065}
6066
6067multiclass avx512_vptest_dq<bits<8> opc, string OpcodeStr,
6068                            X86SchedWriteWidths sched> {
6069  defm D : avx512_vptest_dq_sizes<opc, OpcodeStr#"d", sched,
6070                                 avx512vl_i32_info>;
6071  defm Q : avx512_vptest_dq_sizes<opc, OpcodeStr#"q", sched,
6072                                 avx512vl_i64_info>, VEX_W;
6073}
6074
6075multiclass avx512_vptest_wb<bits<8> opc, string OpcodeStr,
6076                            X86SchedWriteWidths sched> {
6077  let Predicates = [HasBWI] in {
6078  defm WZ:    avx512_vptest<opc, OpcodeStr#"w", sched.ZMM,
6079                            v32i16_info>, EVEX_V512, VEX_W;
6080  defm BZ:    avx512_vptest<opc, OpcodeStr#"b", sched.ZMM,
6081                            v64i8_info>, EVEX_V512;
6082  }
6083
6084  let Predicates = [HasVLX, HasBWI] in {
6085  defm WZ256: avx512_vptest<opc, OpcodeStr#"w", sched.YMM,
6086                            v16i16x_info>, EVEX_V256, VEX_W;
6087  defm WZ128: avx512_vptest<opc, OpcodeStr#"w", sched.XMM,
6088                            v8i16x_info>, EVEX_V128, VEX_W;
6089  defm BZ256: avx512_vptest<opc, OpcodeStr#"b", sched.YMM,
6090                            v32i8x_info>, EVEX_V256;
6091  defm BZ128: avx512_vptest<opc, OpcodeStr#"b", sched.XMM,
6092                            v16i8x_info>, EVEX_V128;
6093  }
6094}
6095
6096multiclass avx512_vptest_all_forms<bits<8> opc_wb, bits<8> opc_dq, string OpcodeStr,
6097                                   X86SchedWriteWidths sched> :
6098  avx512_vptest_wb<opc_wb, OpcodeStr, sched>,
6099  avx512_vptest_dq<opc_dq, OpcodeStr, sched>;
6100
6101defm VPTESTM   : avx512_vptest_all_forms<0x26, 0x27, "vptestm",
6102                                         SchedWriteVecLogic>, T8PD;
6103defm VPTESTNM  : avx512_vptest_all_forms<0x26, 0x27, "vptestnm",
6104                                         SchedWriteVecLogic>, T8XS;
6105
6106//===----------------------------------------------------------------------===//
6107// AVX-512  Shift instructions
6108//===----------------------------------------------------------------------===//
6109
6110multiclass avx512_shift_rmi<bits<8> opc, Format ImmFormR, Format ImmFormM,
6111                            string OpcodeStr, SDNode OpNode,
6112                            X86FoldableSchedWrite sched, X86VectorVTInfo _> {
6113  let ExeDomain = _.ExeDomain in {
6114  defm ri : AVX512_maskable<opc, ImmFormR, _, (outs _.RC:$dst),
6115                   (ins _.RC:$src1, u8imm:$src2), OpcodeStr,
6116                      "$src2, $src1", "$src1, $src2",
6117                   (_.VT (OpNode _.RC:$src1, (i8 timm:$src2)))>,
6118                   Sched<[sched]>;
6119  defm mi : AVX512_maskable<opc, ImmFormM, _, (outs _.RC:$dst),
6120                   (ins _.MemOp:$src1, u8imm:$src2), OpcodeStr,
6121                       "$src2, $src1", "$src1, $src2",
6122                   (_.VT (OpNode (_.VT (_.LdFrag addr:$src1)),
6123                          (i8 timm:$src2)))>,
6124                   Sched<[sched.Folded]>;
6125  }
6126}
6127
6128multiclass avx512_shift_rmbi<bits<8> opc, Format ImmFormM,
6129                             string OpcodeStr, SDNode OpNode,
6130                             X86FoldableSchedWrite sched, X86VectorVTInfo _> {
6131  let ExeDomain = _.ExeDomain in
6132  defm mbi : AVX512_maskable<opc, ImmFormM, _, (outs _.RC:$dst),
6133                   (ins _.ScalarMemOp:$src1, u8imm:$src2), OpcodeStr,
6134      "$src2, ${src1}"#_.BroadcastStr, "${src1}"#_.BroadcastStr#", $src2",
6135     (_.VT (OpNode (_.BroadcastLdFrag addr:$src1), (i8 timm:$src2)))>,
6136     EVEX_B, Sched<[sched.Folded]>;
6137}
6138
6139multiclass avx512_shift_rrm<bits<8> opc, string OpcodeStr, SDNode OpNode,
6140                            X86FoldableSchedWrite sched, ValueType SrcVT,
6141                            X86VectorVTInfo _> {
6142   // src2 is always 128-bit
6143  let ExeDomain = _.ExeDomain in {
6144  defm rr : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
6145                   (ins _.RC:$src1, VR128X:$src2), OpcodeStr,
6146                      "$src2, $src1", "$src1, $src2",
6147                   (_.VT (OpNode _.RC:$src1, (SrcVT VR128X:$src2)))>,
6148                   AVX512BIBase, EVEX_4V, Sched<[sched]>;
6149  defm rm : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
6150                   (ins _.RC:$src1, i128mem:$src2), OpcodeStr,
6151                       "$src2, $src1", "$src1, $src2",
6152                   (_.VT (OpNode _.RC:$src1, (SrcVT (load addr:$src2))))>,
6153                   AVX512BIBase,
6154                   EVEX_4V, Sched<[sched.Folded, sched.ReadAfterFold]>;
6155  }
6156}
6157
6158multiclass avx512_shift_sizes<bits<8> opc, string OpcodeStr, SDNode OpNode,
6159                              X86SchedWriteWidths sched, ValueType SrcVT,
6160                              AVX512VLVectorVTInfo VTInfo,
6161                              Predicate prd> {
6162  let Predicates = [prd] in
6163  defm Z    : avx512_shift_rrm<opc, OpcodeStr, OpNode, sched.ZMM, SrcVT,
6164                               VTInfo.info512>, EVEX_V512,
6165                               EVEX_CD8<VTInfo.info512.EltSize, CD8VQ> ;
6166  let Predicates = [prd, HasVLX] in {
6167  defm Z256 : avx512_shift_rrm<opc, OpcodeStr, OpNode, sched.YMM, SrcVT,
6168                               VTInfo.info256>, EVEX_V256,
6169                               EVEX_CD8<VTInfo.info256.EltSize, CD8VH>;
6170  defm Z128 : avx512_shift_rrm<opc, OpcodeStr, OpNode, sched.XMM, SrcVT,
6171                               VTInfo.info128>, EVEX_V128,
6172                               EVEX_CD8<VTInfo.info128.EltSize, CD8VF>;
6173  }
6174}
6175
6176multiclass avx512_shift_types<bits<8> opcd, bits<8> opcq, bits<8> opcw,
6177                              string OpcodeStr, SDNode OpNode,
6178                              X86SchedWriteWidths sched,
6179                              bit NotEVEX2VEXConvertibleQ = 0> {
6180  defm D : avx512_shift_sizes<opcd, OpcodeStr#"d", OpNode, sched, v4i32,
6181                              avx512vl_i32_info, HasAVX512>;
6182  let notEVEX2VEXConvertible = NotEVEX2VEXConvertibleQ in
6183  defm Q : avx512_shift_sizes<opcq, OpcodeStr#"q", OpNode, sched, v2i64,
6184                              avx512vl_i64_info, HasAVX512>, VEX_W;
6185  defm W : avx512_shift_sizes<opcw, OpcodeStr#"w", OpNode, sched, v8i16,
6186                              avx512vl_i16_info, HasBWI>;
6187}
6188
6189multiclass avx512_shift_rmi_sizes<bits<8> opc, Format ImmFormR, Format ImmFormM,
6190                                  string OpcodeStr, SDNode OpNode,
6191                                  X86SchedWriteWidths sched,
6192                                  AVX512VLVectorVTInfo VTInfo> {
6193  let Predicates = [HasAVX512] in
6194  defm Z:    avx512_shift_rmi<opc, ImmFormR, ImmFormM, OpcodeStr, OpNode,
6195                              sched.ZMM, VTInfo.info512>,
6196             avx512_shift_rmbi<opc, ImmFormM, OpcodeStr, OpNode, sched.ZMM,
6197                               VTInfo.info512>, EVEX_V512;
6198  let Predicates = [HasAVX512, HasVLX] in {
6199  defm Z256: avx512_shift_rmi<opc, ImmFormR, ImmFormM, OpcodeStr, OpNode,
6200                              sched.YMM, VTInfo.info256>,
6201             avx512_shift_rmbi<opc, ImmFormM, OpcodeStr, OpNode, sched.YMM,
6202                               VTInfo.info256>, EVEX_V256;
6203  defm Z128: avx512_shift_rmi<opc, ImmFormR, ImmFormM, OpcodeStr, OpNode,
6204                              sched.XMM, VTInfo.info128>,
6205             avx512_shift_rmbi<opc, ImmFormM, OpcodeStr, OpNode, sched.XMM,
6206                               VTInfo.info128>, EVEX_V128;
6207  }
6208}
6209
6210multiclass avx512_shift_rmi_w<bits<8> opcw, Format ImmFormR, Format ImmFormM,
6211                              string OpcodeStr, SDNode OpNode,
6212                              X86SchedWriteWidths sched> {
6213  let Predicates = [HasBWI] in
6214  defm WZ:    avx512_shift_rmi<opcw, ImmFormR, ImmFormM, OpcodeStr, OpNode,
6215                               sched.ZMM, v32i16_info>, EVEX_V512, VEX_WIG;
6216  let Predicates = [HasVLX, HasBWI] in {
6217  defm WZ256: avx512_shift_rmi<opcw, ImmFormR, ImmFormM, OpcodeStr, OpNode,
6218                               sched.YMM, v16i16x_info>, EVEX_V256, VEX_WIG;
6219  defm WZ128: avx512_shift_rmi<opcw, ImmFormR, ImmFormM, OpcodeStr, OpNode,
6220                               sched.XMM, v8i16x_info>, EVEX_V128, VEX_WIG;
6221  }
6222}
6223
6224multiclass avx512_shift_rmi_dq<bits<8> opcd, bits<8> opcq,
6225                               Format ImmFormR, Format ImmFormM,
6226                               string OpcodeStr, SDNode OpNode,
6227                               X86SchedWriteWidths sched,
6228                               bit NotEVEX2VEXConvertibleQ = 0> {
6229  defm D: avx512_shift_rmi_sizes<opcd, ImmFormR, ImmFormM, OpcodeStr#"d", OpNode,
6230                                 sched, avx512vl_i32_info>, EVEX_CD8<32, CD8VF>;
6231  let notEVEX2VEXConvertible = NotEVEX2VEXConvertibleQ in
6232  defm Q: avx512_shift_rmi_sizes<opcq, ImmFormR, ImmFormM, OpcodeStr#"q", OpNode,
6233                                 sched, avx512vl_i64_info>, EVEX_CD8<64, CD8VF>, VEX_W;
6234}
6235
6236defm VPSRL : avx512_shift_rmi_dq<0x72, 0x73, MRM2r, MRM2m, "vpsrl", X86vsrli,
6237                                 SchedWriteVecShiftImm>,
6238             avx512_shift_rmi_w<0x71, MRM2r, MRM2m, "vpsrlw", X86vsrli,
6239                                SchedWriteVecShiftImm>, AVX512BIi8Base, EVEX_4V;
6240
6241defm VPSLL : avx512_shift_rmi_dq<0x72, 0x73, MRM6r, MRM6m, "vpsll", X86vshli,
6242                                 SchedWriteVecShiftImm>,
6243             avx512_shift_rmi_w<0x71, MRM6r, MRM6m, "vpsllw", X86vshli,
6244                                SchedWriteVecShiftImm>, AVX512BIi8Base, EVEX_4V;
6245
6246defm VPSRA : avx512_shift_rmi_dq<0x72, 0x72, MRM4r, MRM4m, "vpsra", X86vsrai,
6247                                 SchedWriteVecShiftImm, 1>,
6248             avx512_shift_rmi_w<0x71, MRM4r, MRM4m, "vpsraw", X86vsrai,
6249                                SchedWriteVecShiftImm>, AVX512BIi8Base, EVEX_4V;
6250
6251defm VPROR : avx512_shift_rmi_dq<0x72, 0x72, MRM0r, MRM0m, "vpror", X86vrotri,
6252                                 SchedWriteVecShiftImm>, AVX512BIi8Base, EVEX_4V;
6253defm VPROL : avx512_shift_rmi_dq<0x72, 0x72, MRM1r, MRM1m, "vprol", X86vrotli,
6254                                 SchedWriteVecShiftImm>, AVX512BIi8Base, EVEX_4V;
6255
6256defm VPSLL : avx512_shift_types<0xF2, 0xF3, 0xF1, "vpsll", X86vshl,
6257                                SchedWriteVecShift>;
6258defm VPSRA : avx512_shift_types<0xE2, 0xE2, 0xE1, "vpsra", X86vsra,
6259                                SchedWriteVecShift, 1>;
6260defm VPSRL : avx512_shift_types<0xD2, 0xD3, 0xD1, "vpsrl", X86vsrl,
6261                                SchedWriteVecShift>;
6262
6263// Use 512bit VPSRA/VPSRAI version to implement v2i64/v4i64 in case NoVLX.
6264let Predicates = [HasAVX512, NoVLX] in {
6265  def : Pat<(v4i64 (X86vsra (v4i64 VR256X:$src1), (v2i64 VR128X:$src2))),
6266            (EXTRACT_SUBREG (v8i64
6267              (VPSRAQZrr
6268                (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src1, sub_ymm)),
6269                 VR128X:$src2)), sub_ymm)>;
6270
6271  def : Pat<(v2i64 (X86vsra (v2i64 VR128X:$src1), (v2i64 VR128X:$src2))),
6272            (EXTRACT_SUBREG (v8i64
6273              (VPSRAQZrr
6274                (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR128X:$src1, sub_xmm)),
6275                 VR128X:$src2)), sub_xmm)>;
6276
6277  def : Pat<(v4i64 (X86vsrai (v4i64 VR256X:$src1), (i8 timm:$src2))),
6278            (EXTRACT_SUBREG (v8i64
6279              (VPSRAQZri
6280                (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src1, sub_ymm)),
6281                 timm:$src2)), sub_ymm)>;
6282
6283  def : Pat<(v2i64 (X86vsrai (v2i64 VR128X:$src1), (i8 timm:$src2))),
6284            (EXTRACT_SUBREG (v8i64
6285              (VPSRAQZri
6286                (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR128X:$src1, sub_xmm)),
6287                 timm:$src2)), sub_xmm)>;
6288}
6289
6290//===-------------------------------------------------------------------===//
6291// Variable Bit Shifts
6292//===-------------------------------------------------------------------===//
6293
6294multiclass avx512_var_shift<bits<8> opc, string OpcodeStr, SDNode OpNode,
6295                            X86FoldableSchedWrite sched, X86VectorVTInfo _> {
6296  let ExeDomain = _.ExeDomain in {
6297  defm rr : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
6298                   (ins _.RC:$src1, _.RC:$src2), OpcodeStr,
6299                      "$src2, $src1", "$src1, $src2",
6300                   (_.VT (OpNode _.RC:$src1, (_.VT _.RC:$src2)))>,
6301                   AVX5128IBase, EVEX_4V, Sched<[sched]>;
6302  defm rm : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
6303                   (ins _.RC:$src1, _.MemOp:$src2), OpcodeStr,
6304                       "$src2, $src1", "$src1, $src2",
6305                   (_.VT (OpNode _.RC:$src1,
6306                   (_.VT (_.LdFrag addr:$src2))))>,
6307                   AVX5128IBase, EVEX_4V, EVEX_CD8<_.EltSize, CD8VF>,
6308                   Sched<[sched.Folded, sched.ReadAfterFold]>;
6309  }
6310}
6311
6312multiclass avx512_var_shift_mb<bits<8> opc, string OpcodeStr, SDNode OpNode,
6313                               X86FoldableSchedWrite sched, X86VectorVTInfo _> {
6314  let ExeDomain = _.ExeDomain in
6315  defm rmb : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
6316                    (ins _.RC:$src1, _.ScalarMemOp:$src2), OpcodeStr,
6317                    "${src2}"#_.BroadcastStr#", $src1",
6318                    "$src1, ${src2}"#_.BroadcastStr,
6319                    (_.VT (OpNode _.RC:$src1, (_.VT (_.BroadcastLdFrag addr:$src2))))>,
6320                    AVX5128IBase, EVEX_B, EVEX_4V, EVEX_CD8<_.EltSize, CD8VF>,
6321                    Sched<[sched.Folded, sched.ReadAfterFold]>;
6322}
6323
6324multiclass avx512_var_shift_sizes<bits<8> opc, string OpcodeStr, SDNode OpNode,
6325                                  X86SchedWriteWidths sched, AVX512VLVectorVTInfo _> {
6326  let Predicates  = [HasAVX512] in
6327  defm Z : avx512_var_shift<opc, OpcodeStr, OpNode, sched.ZMM, _.info512>,
6328           avx512_var_shift_mb<opc, OpcodeStr, OpNode, sched.ZMM, _.info512>, EVEX_V512;
6329
6330  let Predicates = [HasAVX512, HasVLX] in {
6331  defm Z256 : avx512_var_shift<opc, OpcodeStr, OpNode, sched.YMM, _.info256>,
6332              avx512_var_shift_mb<opc, OpcodeStr, OpNode, sched.YMM, _.info256>, EVEX_V256;
6333  defm Z128 : avx512_var_shift<opc, OpcodeStr, OpNode, sched.XMM, _.info128>,
6334              avx512_var_shift_mb<opc, OpcodeStr, OpNode, sched.XMM, _.info128>, EVEX_V128;
6335  }
6336}
6337
6338multiclass avx512_var_shift_types<bits<8> opc, string OpcodeStr,
6339                                  SDNode OpNode, X86SchedWriteWidths sched> {
6340  defm D : avx512_var_shift_sizes<opc, OpcodeStr#"d", OpNode, sched,
6341                                 avx512vl_i32_info>;
6342  defm Q : avx512_var_shift_sizes<opc, OpcodeStr#"q", OpNode, sched,
6343                                 avx512vl_i64_info>, VEX_W;
6344}
6345
6346// Use 512bit version to implement 128/256 bit in case NoVLX.
6347multiclass avx512_var_shift_lowering<AVX512VLVectorVTInfo _, string OpcodeStr,
6348                                     SDNode OpNode, list<Predicate> p> {
6349  let Predicates = p in {
6350  def : Pat<(_.info256.VT (OpNode (_.info256.VT _.info256.RC:$src1),
6351                                  (_.info256.VT _.info256.RC:$src2))),
6352            (EXTRACT_SUBREG
6353                (!cast<Instruction>(OpcodeStr#"Zrr")
6354                    (INSERT_SUBREG (_.info512.VT (IMPLICIT_DEF)), VR256X:$src1, sub_ymm),
6355                    (INSERT_SUBREG (_.info512.VT (IMPLICIT_DEF)), VR256X:$src2, sub_ymm)),
6356             sub_ymm)>;
6357
6358  def : Pat<(_.info128.VT (OpNode (_.info128.VT _.info128.RC:$src1),
6359                                  (_.info128.VT _.info128.RC:$src2))),
6360            (EXTRACT_SUBREG
6361                (!cast<Instruction>(OpcodeStr#"Zrr")
6362                    (INSERT_SUBREG (_.info512.VT (IMPLICIT_DEF)), VR128X:$src1, sub_xmm),
6363                    (INSERT_SUBREG (_.info512.VT (IMPLICIT_DEF)), VR128X:$src2, sub_xmm)),
6364             sub_xmm)>;
6365  }
6366}
6367multiclass avx512_var_shift_w<bits<8> opc, string OpcodeStr,
6368                              SDNode OpNode, X86SchedWriteWidths sched> {
6369  let Predicates = [HasBWI] in
6370  defm WZ:    avx512_var_shift<opc, OpcodeStr, OpNode, sched.ZMM, v32i16_info>,
6371              EVEX_V512, VEX_W;
6372  let Predicates = [HasVLX, HasBWI] in {
6373
6374  defm WZ256: avx512_var_shift<opc, OpcodeStr, OpNode, sched.YMM, v16i16x_info>,
6375              EVEX_V256, VEX_W;
6376  defm WZ128: avx512_var_shift<opc, OpcodeStr, OpNode, sched.XMM, v8i16x_info>,
6377              EVEX_V128, VEX_W;
6378  }
6379}
6380
6381defm VPSLLV : avx512_var_shift_types<0x47, "vpsllv", X86vshlv, SchedWriteVarVecShift>,
6382              avx512_var_shift_w<0x12, "vpsllvw", X86vshlv, SchedWriteVarVecShift>;
6383
6384defm VPSRAV : avx512_var_shift_types<0x46, "vpsrav", X86vsrav, SchedWriteVarVecShift>,
6385              avx512_var_shift_w<0x11, "vpsravw", X86vsrav, SchedWriteVarVecShift>;
6386
6387defm VPSRLV : avx512_var_shift_types<0x45, "vpsrlv", X86vsrlv, SchedWriteVarVecShift>,
6388              avx512_var_shift_w<0x10, "vpsrlvw", X86vsrlv, SchedWriteVarVecShift>;
6389
6390defm VPRORV : avx512_var_shift_types<0x14, "vprorv", rotr, SchedWriteVarVecShift>;
6391defm VPROLV : avx512_var_shift_types<0x15, "vprolv", rotl, SchedWriteVarVecShift>;
6392
6393defm : avx512_var_shift_lowering<avx512vl_i64_info, "VPSRAVQ", X86vsrav, [HasAVX512, NoVLX]>;
6394defm : avx512_var_shift_lowering<avx512vl_i16_info, "VPSLLVW", X86vshlv, [HasBWI, NoVLX]>;
6395defm : avx512_var_shift_lowering<avx512vl_i16_info, "VPSRAVW", X86vsrav, [HasBWI, NoVLX]>;
6396defm : avx512_var_shift_lowering<avx512vl_i16_info, "VPSRLVW", X86vsrlv, [HasBWI, NoVLX]>;
6397
6398
6399// Use 512bit VPROL/VPROLI version to implement v2i64/v4i64 + v4i32/v8i32 in case NoVLX.
6400let Predicates = [HasAVX512, NoVLX] in {
6401  def : Pat<(v2i64 (rotl (v2i64 VR128X:$src1), (v2i64 VR128X:$src2))),
6402            (EXTRACT_SUBREG (v8i64
6403              (VPROLVQZrr
6404                (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR128X:$src1, sub_xmm)),
6405                (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR128X:$src2, sub_xmm)))),
6406                       sub_xmm)>;
6407  def : Pat<(v4i64 (rotl (v4i64 VR256X:$src1), (v4i64 VR256X:$src2))),
6408            (EXTRACT_SUBREG (v8i64
6409              (VPROLVQZrr
6410                (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src1, sub_ymm)),
6411                (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src2, sub_ymm)))),
6412                       sub_ymm)>;
6413
6414  def : Pat<(v4i32 (rotl (v4i32 VR128X:$src1), (v4i32 VR128X:$src2))),
6415            (EXTRACT_SUBREG (v16i32
6416              (VPROLVDZrr
6417                (v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR128X:$src1, sub_xmm)),
6418                (v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR128X:$src2, sub_xmm)))),
6419                        sub_xmm)>;
6420  def : Pat<(v8i32 (rotl (v8i32 VR256X:$src1), (v8i32 VR256X:$src2))),
6421            (EXTRACT_SUBREG (v16i32
6422              (VPROLVDZrr
6423                (v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src1, sub_ymm)),
6424                (v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src2, sub_ymm)))),
6425                        sub_ymm)>;
6426
6427  def : Pat<(v2i64 (X86vrotli (v2i64 VR128X:$src1), (i8 timm:$src2))),
6428            (EXTRACT_SUBREG (v8i64
6429              (VPROLQZri
6430                (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR128X:$src1, sub_xmm)),
6431                        timm:$src2)), sub_xmm)>;
6432  def : Pat<(v4i64 (X86vrotli (v4i64 VR256X:$src1), (i8 timm:$src2))),
6433            (EXTRACT_SUBREG (v8i64
6434              (VPROLQZri
6435                (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src1, sub_ymm)),
6436                       timm:$src2)), sub_ymm)>;
6437
6438  def : Pat<(v4i32 (X86vrotli (v4i32 VR128X:$src1), (i8 timm:$src2))),
6439            (EXTRACT_SUBREG (v16i32
6440              (VPROLDZri
6441                (v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR128X:$src1, sub_xmm)),
6442                        timm:$src2)), sub_xmm)>;
6443  def : Pat<(v8i32 (X86vrotli (v8i32 VR256X:$src1), (i8 timm:$src2))),
6444            (EXTRACT_SUBREG (v16i32
6445              (VPROLDZri
6446                (v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src1, sub_ymm)),
6447                        timm:$src2)), sub_ymm)>;
6448}
6449
6450// Use 512bit VPROR/VPRORI version to implement v2i64/v4i64 + v4i32/v8i32 in case NoVLX.
6451let Predicates = [HasAVX512, NoVLX] in {
6452  def : Pat<(v2i64 (rotr (v2i64 VR128X:$src1), (v2i64 VR128X:$src2))),
6453            (EXTRACT_SUBREG (v8i64
6454              (VPRORVQZrr
6455                (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR128X:$src1, sub_xmm)),
6456                (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR128X:$src2, sub_xmm)))),
6457                       sub_xmm)>;
6458  def : Pat<(v4i64 (rotr (v4i64 VR256X:$src1), (v4i64 VR256X:$src2))),
6459            (EXTRACT_SUBREG (v8i64
6460              (VPRORVQZrr
6461                (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src1, sub_ymm)),
6462                (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src2, sub_ymm)))),
6463                       sub_ymm)>;
6464
6465  def : Pat<(v4i32 (rotr (v4i32 VR128X:$src1), (v4i32 VR128X:$src2))),
6466            (EXTRACT_SUBREG (v16i32
6467              (VPRORVDZrr
6468                (v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR128X:$src1, sub_xmm)),
6469                (v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR128X:$src2, sub_xmm)))),
6470                        sub_xmm)>;
6471  def : Pat<(v8i32 (rotr (v8i32 VR256X:$src1), (v8i32 VR256X:$src2))),
6472            (EXTRACT_SUBREG (v16i32
6473              (VPRORVDZrr
6474                (v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src1, sub_ymm)),
6475                (v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src2, sub_ymm)))),
6476                        sub_ymm)>;
6477
6478  def : Pat<(v2i64 (X86vrotri (v2i64 VR128X:$src1), (i8 timm:$src2))),
6479            (EXTRACT_SUBREG (v8i64
6480              (VPRORQZri
6481                (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR128X:$src1, sub_xmm)),
6482                        timm:$src2)), sub_xmm)>;
6483  def : Pat<(v4i64 (X86vrotri (v4i64 VR256X:$src1), (i8 timm:$src2))),
6484            (EXTRACT_SUBREG (v8i64
6485              (VPRORQZri
6486                (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src1, sub_ymm)),
6487                       timm:$src2)), sub_ymm)>;
6488
6489  def : Pat<(v4i32 (X86vrotri (v4i32 VR128X:$src1), (i8 timm:$src2))),
6490            (EXTRACT_SUBREG (v16i32
6491              (VPRORDZri
6492                (v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR128X:$src1, sub_xmm)),
6493                        timm:$src2)), sub_xmm)>;
6494  def : Pat<(v8i32 (X86vrotri (v8i32 VR256X:$src1), (i8 timm:$src2))),
6495            (EXTRACT_SUBREG (v16i32
6496              (VPRORDZri
6497                (v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src1, sub_ymm)),
6498                        timm:$src2)), sub_ymm)>;
6499}
6500
6501//===-------------------------------------------------------------------===//
6502// 1-src variable permutation VPERMW/D/Q
6503//===-------------------------------------------------------------------===//
6504
6505multiclass avx512_vperm_dq_sizes<bits<8> opc, string OpcodeStr, SDNode OpNode,
6506                                 X86FoldableSchedWrite sched, AVX512VLVectorVTInfo _> {
6507  let Predicates  = [HasAVX512] in
6508  defm Z : avx512_var_shift<opc, OpcodeStr, OpNode, sched, _.info512>,
6509           avx512_var_shift_mb<opc, OpcodeStr, OpNode, sched, _.info512>, EVEX_V512;
6510
6511  let Predicates = [HasAVX512, HasVLX] in
6512  defm Z256 : avx512_var_shift<opc, OpcodeStr, OpNode, sched, _.info256>,
6513              avx512_var_shift_mb<opc, OpcodeStr, OpNode, sched, _.info256>, EVEX_V256;
6514}
6515
6516multiclass avx512_vpermi_dq_sizes<bits<8> opc, Format ImmFormR, Format ImmFormM,
6517                                 string OpcodeStr, SDNode OpNode,
6518                                 X86FoldableSchedWrite sched, AVX512VLVectorVTInfo VTInfo> {
6519  let Predicates = [HasAVX512] in
6520  defm Z:    avx512_shift_rmi<opc, ImmFormR, ImmFormM, OpcodeStr, OpNode,
6521                              sched, VTInfo.info512>,
6522             avx512_shift_rmbi<opc, ImmFormM, OpcodeStr, OpNode,
6523                               sched, VTInfo.info512>, EVEX_V512;
6524  let Predicates = [HasAVX512, HasVLX] in
6525  defm Z256: avx512_shift_rmi<opc, ImmFormR, ImmFormM, OpcodeStr, OpNode,
6526                              sched, VTInfo.info256>,
6527             avx512_shift_rmbi<opc, ImmFormM, OpcodeStr, OpNode,
6528                               sched, VTInfo.info256>, EVEX_V256;
6529}
6530
6531multiclass avx512_vperm_bw<bits<8> opc, string OpcodeStr,
6532                              Predicate prd, SDNode OpNode,
6533                              X86FoldableSchedWrite sched, AVX512VLVectorVTInfo _> {
6534  let Predicates = [prd] in
6535  defm Z:    avx512_var_shift<opc, OpcodeStr, OpNode, sched, _.info512>,
6536              EVEX_V512 ;
6537  let Predicates = [HasVLX, prd] in {
6538  defm Z256: avx512_var_shift<opc, OpcodeStr, OpNode, sched, _.info256>,
6539              EVEX_V256 ;
6540  defm Z128: avx512_var_shift<opc, OpcodeStr, OpNode, sched, _.info128>,
6541              EVEX_V128 ;
6542  }
6543}
6544
6545defm VPERMW  : avx512_vperm_bw<0x8D, "vpermw", HasBWI, X86VPermv,
6546                               WriteVarShuffle256, avx512vl_i16_info>, VEX_W;
6547defm VPERMB  : avx512_vperm_bw<0x8D, "vpermb", HasVBMI, X86VPermv,
6548                               WriteVarShuffle256, avx512vl_i8_info>;
6549
6550defm VPERMD : avx512_vperm_dq_sizes<0x36, "vpermd", X86VPermv,
6551                                    WriteVarShuffle256, avx512vl_i32_info>;
6552defm VPERMQ : avx512_vperm_dq_sizes<0x36, "vpermq", X86VPermv,
6553                                    WriteVarShuffle256, avx512vl_i64_info>, VEX_W;
6554defm VPERMPS : avx512_vperm_dq_sizes<0x16, "vpermps", X86VPermv,
6555                                     WriteFVarShuffle256, avx512vl_f32_info>;
6556defm VPERMPD : avx512_vperm_dq_sizes<0x16, "vpermpd", X86VPermv,
6557                                     WriteFVarShuffle256, avx512vl_f64_info>, VEX_W;
6558
6559defm VPERMQ : avx512_vpermi_dq_sizes<0x00, MRMSrcReg, MRMSrcMem, "vpermq",
6560                             X86VPermi, WriteShuffle256, avx512vl_i64_info>,
6561                             EVEX, AVX512AIi8Base, EVEX_CD8<64, CD8VF>, VEX_W;
6562defm VPERMPD : avx512_vpermi_dq_sizes<0x01, MRMSrcReg, MRMSrcMem, "vpermpd",
6563                             X86VPermi, WriteFShuffle256, avx512vl_f64_info>,
6564                             EVEX, AVX512AIi8Base, EVEX_CD8<64, CD8VF>, VEX_W;
6565
6566//===----------------------------------------------------------------------===//
6567// AVX-512 - VPERMIL
6568//===----------------------------------------------------------------------===//
6569
6570multiclass avx512_permil_vec<bits<8> OpcVar, string OpcodeStr, SDNode OpNode,
6571                             X86FoldableSchedWrite sched, X86VectorVTInfo _,
6572                             X86VectorVTInfo Ctrl> {
6573  defm rr: AVX512_maskable<OpcVar, MRMSrcReg, _, (outs _.RC:$dst),
6574                  (ins _.RC:$src1, Ctrl.RC:$src2), OpcodeStr,
6575                  "$src2, $src1", "$src1, $src2",
6576                  (_.VT (OpNode _.RC:$src1,
6577                               (Ctrl.VT Ctrl.RC:$src2)))>,
6578                  T8PD, EVEX_4V, Sched<[sched]>;
6579  defm rm: AVX512_maskable<OpcVar, MRMSrcMem, _, (outs _.RC:$dst),
6580                  (ins _.RC:$src1, Ctrl.MemOp:$src2), OpcodeStr,
6581                  "$src2, $src1", "$src1, $src2",
6582                  (_.VT (OpNode
6583                           _.RC:$src1,
6584                           (Ctrl.VT (Ctrl.LdFrag addr:$src2))))>,
6585                  T8PD, EVEX_4V, EVEX_CD8<_.EltSize, CD8VF>,
6586                  Sched<[sched.Folded, sched.ReadAfterFold]>;
6587  defm rmb: AVX512_maskable<OpcVar, MRMSrcMem, _, (outs _.RC:$dst),
6588                   (ins _.RC:$src1, _.ScalarMemOp:$src2), OpcodeStr,
6589                   "${src2}"#_.BroadcastStr#", $src1",
6590                   "$src1, ${src2}"#_.BroadcastStr,
6591                   (_.VT (OpNode
6592                            _.RC:$src1,
6593                            (Ctrl.VT (Ctrl.BroadcastLdFrag addr:$src2))))>,
6594                   T8PD, EVEX_4V, EVEX_B, EVEX_CD8<_.EltSize, CD8VF>,
6595                   Sched<[sched.Folded, sched.ReadAfterFold]>;
6596}
6597
6598multiclass avx512_permil_vec_common<string OpcodeStr, bits<8> OpcVar,
6599                                    X86SchedWriteWidths sched,
6600                                    AVX512VLVectorVTInfo _,
6601                                    AVX512VLVectorVTInfo Ctrl> {
6602  let Predicates = [HasAVX512] in {
6603    defm Z    : avx512_permil_vec<OpcVar, OpcodeStr, X86VPermilpv, sched.ZMM,
6604                                  _.info512, Ctrl.info512>, EVEX_V512;
6605  }
6606  let Predicates = [HasAVX512, HasVLX] in {
6607    defm Z128 : avx512_permil_vec<OpcVar, OpcodeStr, X86VPermilpv, sched.XMM,
6608                                  _.info128, Ctrl.info128>, EVEX_V128;
6609    defm Z256 : avx512_permil_vec<OpcVar, OpcodeStr, X86VPermilpv, sched.YMM,
6610                                  _.info256, Ctrl.info256>, EVEX_V256;
6611  }
6612}
6613
6614multiclass avx512_permil<string OpcodeStr, bits<8> OpcImm, bits<8> OpcVar,
6615                         AVX512VLVectorVTInfo _, AVX512VLVectorVTInfo Ctrl>{
6616  defm NAME: avx512_permil_vec_common<OpcodeStr, OpcVar, SchedWriteFVarShuffle,
6617                                      _, Ctrl>;
6618  defm NAME: avx512_shift_rmi_sizes<OpcImm, MRMSrcReg, MRMSrcMem, OpcodeStr,
6619                                    X86VPermilpi, SchedWriteFShuffle, _>,
6620                    EVEX, AVX512AIi8Base, EVEX_CD8<_.info128.EltSize, CD8VF>;
6621}
6622
6623let ExeDomain = SSEPackedSingle in
6624defm VPERMILPS : avx512_permil<"vpermilps", 0x04, 0x0C, avx512vl_f32_info,
6625                               avx512vl_i32_info>;
6626let ExeDomain = SSEPackedDouble in
6627defm VPERMILPD : avx512_permil<"vpermilpd", 0x05, 0x0D, avx512vl_f64_info,
6628                               avx512vl_i64_info>, VEX_W1X;
6629
6630//===----------------------------------------------------------------------===//
6631// AVX-512 - VPSHUFD, VPSHUFLW, VPSHUFHW
6632//===----------------------------------------------------------------------===//
6633
6634defm VPSHUFD : avx512_shift_rmi_sizes<0x70, MRMSrcReg, MRMSrcMem, "vpshufd",
6635                             X86PShufd, SchedWriteShuffle, avx512vl_i32_info>,
6636                             EVEX, AVX512BIi8Base, EVEX_CD8<32, CD8VF>;
6637defm VPSHUFH : avx512_shift_rmi_w<0x70, MRMSrcReg, MRMSrcMem, "vpshufhw",
6638                                  X86PShufhw, SchedWriteShuffle>,
6639                                  EVEX, AVX512XSIi8Base;
6640defm VPSHUFL : avx512_shift_rmi_w<0x70, MRMSrcReg, MRMSrcMem, "vpshuflw",
6641                                  X86PShuflw, SchedWriteShuffle>,
6642                                  EVEX, AVX512XDIi8Base;
6643
6644//===----------------------------------------------------------------------===//
6645// AVX-512 - VPSHUFB
6646//===----------------------------------------------------------------------===//
6647
6648multiclass avx512_pshufb_sizes<bits<8> opc, string OpcodeStr, SDNode OpNode,
6649                               X86SchedWriteWidths sched> {
6650  let Predicates = [HasBWI] in
6651  defm Z:    avx512_var_shift<opc, OpcodeStr, OpNode, sched.ZMM, v64i8_info>,
6652                              EVEX_V512;
6653
6654  let Predicates = [HasVLX, HasBWI] in {
6655  defm Z256: avx512_var_shift<opc, OpcodeStr, OpNode, sched.YMM, v32i8x_info>,
6656                              EVEX_V256;
6657  defm Z128: avx512_var_shift<opc, OpcodeStr, OpNode, sched.XMM, v16i8x_info>,
6658                              EVEX_V128;
6659  }
6660}
6661
6662defm VPSHUFB: avx512_pshufb_sizes<0x00, "vpshufb", X86pshufb,
6663                                  SchedWriteVarShuffle>, VEX_WIG;
6664
6665//===----------------------------------------------------------------------===//
6666// Move Low to High and High to Low packed FP Instructions
6667//===----------------------------------------------------------------------===//
6668
6669def VMOVLHPSZrr : AVX512PSI<0x16, MRMSrcReg, (outs VR128X:$dst),
6670          (ins VR128X:$src1, VR128X:$src2),
6671          "vmovlhps\t{$src2, $src1, $dst|$dst, $src1, $src2}",
6672          [(set VR128X:$dst, (v4f32 (X86Movlhps VR128X:$src1, VR128X:$src2)))]>,
6673          Sched<[SchedWriteFShuffle.XMM]>, EVEX_4V;
6674let isCommutable = 1 in
6675def VMOVHLPSZrr : AVX512PSI<0x12, MRMSrcReg, (outs VR128X:$dst),
6676          (ins VR128X:$src1, VR128X:$src2),
6677          "vmovhlps\t{$src2, $src1, $dst|$dst, $src1, $src2}",
6678          [(set VR128X:$dst, (v4f32 (X86Movhlps VR128X:$src1, VR128X:$src2)))]>,
6679          Sched<[SchedWriteFShuffle.XMM]>, EVEX_4V, NotMemoryFoldable;
6680
6681//===----------------------------------------------------------------------===//
6682// VMOVHPS/PD VMOVLPS Instructions
6683// All patterns was taken from SSS implementation.
6684//===----------------------------------------------------------------------===//
6685
6686multiclass avx512_mov_hilo_packed<bits<8> opc, string OpcodeStr,
6687                                  SDPatternOperator OpNode,
6688                                  X86VectorVTInfo _> {
6689  let hasSideEffects = 0, mayLoad = 1, ExeDomain = _.ExeDomain in
6690  def rm : AVX512<opc, MRMSrcMem, (outs _.RC:$dst),
6691                  (ins _.RC:$src1, f64mem:$src2),
6692                  !strconcat(OpcodeStr,
6693                             "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
6694                  [(set _.RC:$dst,
6695                     (OpNode _.RC:$src1,
6696                       (_.VT (bitconvert
6697                         (v2f64 (scalar_to_vector (loadf64 addr:$src2)))))))]>,
6698                  Sched<[SchedWriteFShuffle.XMM.Folded, SchedWriteFShuffle.XMM.ReadAfterFold]>, EVEX_4V;
6699}
6700
6701// No patterns for MOVLPS/MOVHPS as the Movlhps node should only be created in
6702// SSE1. And MOVLPS pattern is even more complex.
6703defm VMOVHPSZ128 : avx512_mov_hilo_packed<0x16, "vmovhps", null_frag,
6704                                  v4f32x_info>, EVEX_CD8<32, CD8VT2>, PS;
6705defm VMOVHPDZ128 : avx512_mov_hilo_packed<0x16, "vmovhpd", X86Unpckl,
6706                                  v2f64x_info>, EVEX_CD8<64, CD8VT1>, PD, VEX_W;
6707defm VMOVLPSZ128 : avx512_mov_hilo_packed<0x12, "vmovlps", null_frag,
6708                                  v4f32x_info>, EVEX_CD8<32, CD8VT2>, PS;
6709defm VMOVLPDZ128 : avx512_mov_hilo_packed<0x12, "vmovlpd", X86Movsd,
6710                                  v2f64x_info>, EVEX_CD8<64, CD8VT1>, PD, VEX_W;
6711
6712let Predicates = [HasAVX512] in {
6713  // VMOVHPD patterns
6714  def : Pat<(v2f64 (X86Unpckl VR128X:$src1, (X86vzload64 addr:$src2))),
6715            (VMOVHPDZ128rm VR128X:$src1, addr:$src2)>;
6716
6717  // VMOVLPD patterns
6718  def : Pat<(v2f64 (X86Movsd VR128X:$src1, (X86vzload64 addr:$src2))),
6719            (VMOVLPDZ128rm VR128X:$src1, addr:$src2)>;
6720}
6721
6722let SchedRW = [WriteFStore] in {
6723let mayStore = 1, hasSideEffects = 0 in
6724def VMOVHPSZ128mr : AVX512PSI<0x17, MRMDestMem, (outs),
6725                       (ins f64mem:$dst, VR128X:$src),
6726                       "vmovhps\t{$src, $dst|$dst, $src}",
6727                       []>, EVEX, EVEX_CD8<32, CD8VT2>;
6728def VMOVHPDZ128mr : AVX512PDI<0x17, MRMDestMem, (outs),
6729                       (ins f64mem:$dst, VR128X:$src),
6730                       "vmovhpd\t{$src, $dst|$dst, $src}",
6731                       [(store (f64 (extractelt
6732                                     (v2f64 (X86Unpckh VR128X:$src, VR128X:$src)),
6733                                     (iPTR 0))), addr:$dst)]>,
6734                       EVEX, EVEX_CD8<64, CD8VT1>, VEX_W;
6735let mayStore = 1, hasSideEffects = 0 in
6736def VMOVLPSZ128mr : AVX512PSI<0x13, MRMDestMem, (outs),
6737                       (ins f64mem:$dst, VR128X:$src),
6738                       "vmovlps\t{$src, $dst|$dst, $src}",
6739                       []>, EVEX, EVEX_CD8<32, CD8VT2>;
6740def VMOVLPDZ128mr : AVX512PDI<0x13, MRMDestMem, (outs),
6741                       (ins f64mem:$dst, VR128X:$src),
6742                       "vmovlpd\t{$src, $dst|$dst, $src}",
6743                       [(store (f64 (extractelt (v2f64 VR128X:$src),
6744                                     (iPTR 0))), addr:$dst)]>,
6745                       EVEX, EVEX_CD8<64, CD8VT1>, VEX_W;
6746} // SchedRW
6747
6748let Predicates = [HasAVX512] in {
6749  // VMOVHPD patterns
6750  def : Pat<(store (f64 (extractelt
6751                           (v2f64 (X86VPermilpi VR128X:$src, (i8 1))),
6752                           (iPTR 0))), addr:$dst),
6753           (VMOVHPDZ128mr addr:$dst, VR128X:$src)>;
6754}
6755//===----------------------------------------------------------------------===//
6756// FMA - Fused Multiply Operations
6757//
6758
6759multiclass avx512_fma3p_213_rm<bits<8> opc, string OpcodeStr, SDPatternOperator OpNode,
6760                               SDNode MaskOpNode, X86FoldableSchedWrite sched,
6761                               X86VectorVTInfo _> {
6762  let Constraints = "$src1 = $dst", ExeDomain = _.ExeDomain, hasSideEffects = 0,
6763      Uses = [MXCSR], mayRaiseFPException = 1 in {
6764  defm r: AVX512_maskable_fma<opc, MRMSrcReg, _, (outs _.RC:$dst),
6765          (ins _.RC:$src2, _.RC:$src3),
6766          OpcodeStr, "$src3, $src2", "$src2, $src3",
6767          (_.VT (OpNode _.RC:$src2, _.RC:$src1, _.RC:$src3)),
6768          (_.VT (MaskOpNode _.RC:$src2, _.RC:$src1, _.RC:$src3)), 1, 1>,
6769          EVEX_4V, Sched<[sched]>;
6770
6771  defm m: AVX512_maskable_fma<opc, MRMSrcMem, _, (outs _.RC:$dst),
6772          (ins _.RC:$src2, _.MemOp:$src3),
6773          OpcodeStr, "$src3, $src2", "$src2, $src3",
6774          (_.VT (OpNode _.RC:$src2, _.RC:$src1, (_.LdFrag addr:$src3))),
6775          (_.VT (MaskOpNode _.RC:$src2, _.RC:$src1, (_.LdFrag addr:$src3))), 1, 0>,
6776          EVEX_4V, Sched<[sched.Folded, sched.ReadAfterFold]>;
6777
6778  defm mb: AVX512_maskable_fma<opc, MRMSrcMem, _, (outs _.RC:$dst),
6779            (ins _.RC:$src2, _.ScalarMemOp:$src3),
6780            OpcodeStr,   !strconcat("${src3}", _.BroadcastStr,", $src2"),
6781            !strconcat("$src2, ${src3}", _.BroadcastStr ),
6782            (OpNode _.RC:$src2,
6783             _.RC:$src1,(_.VT (_.BroadcastLdFrag addr:$src3))),
6784            (MaskOpNode _.RC:$src2,
6785             _.RC:$src1,(_.VT (_.BroadcastLdFrag addr:$src3))), 1, 0>,
6786            EVEX_4V, EVEX_B, Sched<[sched.Folded, sched.ReadAfterFold]>;
6787  }
6788}
6789
6790multiclass avx512_fma3_213_round<bits<8> opc, string OpcodeStr, SDNode OpNode,
6791                                 X86FoldableSchedWrite sched,
6792                                 X86VectorVTInfo _> {
6793  let Constraints = "$src1 = $dst", ExeDomain = _.ExeDomain, hasSideEffects = 0,
6794      Uses = [MXCSR] in
6795  defm rb: AVX512_maskable_fma<opc, MRMSrcReg, _, (outs _.RC:$dst),
6796          (ins _.RC:$src2, _.RC:$src3, AVX512RC:$rc),
6797          OpcodeStr, "$rc, $src3, $src2", "$src2, $src3, $rc",
6798          (_.VT ( OpNode _.RC:$src2, _.RC:$src1, _.RC:$src3, (i32 timm:$rc))),
6799          (_.VT ( OpNode _.RC:$src2, _.RC:$src1, _.RC:$src3, (i32 timm:$rc))), 1, 1>,
6800          EVEX_4V, EVEX_B, EVEX_RC, Sched<[sched]>;
6801}
6802
6803multiclass avx512_fma3p_213_common<bits<8> opc, string OpcodeStr, SDPatternOperator OpNode,
6804                                   SDNode MaskOpNode, SDNode OpNodeRnd,
6805                                   X86SchedWriteWidths sched,
6806                                   AVX512VLVectorVTInfo _,
6807                                   Predicate prd = HasAVX512> {
6808  let Predicates = [prd] in {
6809    defm Z      : avx512_fma3p_213_rm<opc, OpcodeStr, OpNode, MaskOpNode,
6810                                      sched.ZMM, _.info512>,
6811                  avx512_fma3_213_round<opc, OpcodeStr, OpNodeRnd, sched.ZMM,
6812                                        _.info512>,
6813                              EVEX_V512, EVEX_CD8<_.info512.EltSize, CD8VF>;
6814  }
6815  let Predicates = [HasVLX, prd] in {
6816    defm Z256 : avx512_fma3p_213_rm<opc, OpcodeStr, OpNode, MaskOpNode,
6817                                    sched.YMM, _.info256>,
6818                      EVEX_V256, EVEX_CD8<_.info256.EltSize, CD8VF>;
6819    defm Z128 : avx512_fma3p_213_rm<opc, OpcodeStr, OpNode, MaskOpNode,
6820                                    sched.XMM, _.info128>,
6821                      EVEX_V128, EVEX_CD8<_.info128.EltSize, CD8VF>;
6822  }
6823}
6824
6825multiclass avx512_fma3p_213_f<bits<8> opc, string OpcodeStr, SDPatternOperator OpNode,
6826                              SDNode MaskOpNode, SDNode OpNodeRnd> {
6827    defm PH : avx512_fma3p_213_common<opc, OpcodeStr#"ph", OpNode, MaskOpNode,
6828                                      OpNodeRnd, SchedWriteFMA,
6829                                      avx512vl_f16_info, HasFP16>, T_MAP6PD;
6830    defm PS : avx512_fma3p_213_common<opc, OpcodeStr#"ps", OpNode, MaskOpNode,
6831                                      OpNodeRnd, SchedWriteFMA,
6832                                      avx512vl_f32_info>, T8PD;
6833    defm PD : avx512_fma3p_213_common<opc, OpcodeStr#"pd", OpNode, MaskOpNode,
6834                                      OpNodeRnd, SchedWriteFMA,
6835                                      avx512vl_f64_info>, T8PD, VEX_W;
6836}
6837
6838defm VFMADD213    : avx512_fma3p_213_f<0xA8, "vfmadd213", any_fma,
6839                                       fma, X86FmaddRnd>;
6840defm VFMSUB213    : avx512_fma3p_213_f<0xAA, "vfmsub213", X86any_Fmsub,
6841                                       X86Fmsub, X86FmsubRnd>;
6842defm VFMADDSUB213 : avx512_fma3p_213_f<0xA6, "vfmaddsub213", X86Fmaddsub,
6843                                       X86Fmaddsub, X86FmaddsubRnd>;
6844defm VFMSUBADD213 : avx512_fma3p_213_f<0xA7, "vfmsubadd213", X86Fmsubadd,
6845                                       X86Fmsubadd, X86FmsubaddRnd>;
6846defm VFNMADD213   : avx512_fma3p_213_f<0xAC, "vfnmadd213", X86any_Fnmadd,
6847                                       X86Fnmadd, X86FnmaddRnd>;
6848defm VFNMSUB213   : avx512_fma3p_213_f<0xAE, "vfnmsub213", X86any_Fnmsub,
6849                                       X86Fnmsub, X86FnmsubRnd>;
6850
6851
6852multiclass avx512_fma3p_231_rm<bits<8> opc, string OpcodeStr, SDPatternOperator OpNode,
6853                               SDNode MaskOpNode, X86FoldableSchedWrite sched,
6854                               X86VectorVTInfo _> {
6855  let Constraints = "$src1 = $dst", ExeDomain = _.ExeDomain, hasSideEffects = 0,
6856      Uses = [MXCSR], mayRaiseFPException = 1 in {
6857  defm r: AVX512_maskable_fma<opc, MRMSrcReg, _, (outs _.RC:$dst),
6858          (ins _.RC:$src2, _.RC:$src3),
6859          OpcodeStr, "$src3, $src2", "$src2, $src3",
6860          (null_frag),
6861          (_.VT (MaskOpNode _.RC:$src2, _.RC:$src3, _.RC:$src1)), 1, 1>,
6862          EVEX_4V, Sched<[sched]>;
6863
6864  defm m: AVX512_maskable_fma<opc, MRMSrcMem, _, (outs _.RC:$dst),
6865          (ins _.RC:$src2, _.MemOp:$src3),
6866          OpcodeStr, "$src3, $src2", "$src2, $src3",
6867          (_.VT (OpNode _.RC:$src2, (_.LdFrag addr:$src3), _.RC:$src1)),
6868          (_.VT (MaskOpNode _.RC:$src2, (_.LdFrag addr:$src3), _.RC:$src1)), 1, 0>,
6869          EVEX_4V, Sched<[sched.Folded, sched.ReadAfterFold]>;
6870
6871  defm mb: AVX512_maskable_fma<opc, MRMSrcMem, _, (outs _.RC:$dst),
6872         (ins _.RC:$src2, _.ScalarMemOp:$src3),
6873         OpcodeStr, "${src3}"#_.BroadcastStr#", $src2",
6874         "$src2, ${src3}"#_.BroadcastStr,
6875         (_.VT (OpNode _.RC:$src2,
6876                      (_.VT (_.BroadcastLdFrag addr:$src3)),
6877                      _.RC:$src1)),
6878         (_.VT (MaskOpNode _.RC:$src2,
6879                           (_.VT (_.BroadcastLdFrag addr:$src3)),
6880                           _.RC:$src1)), 1, 0>, EVEX_4V, EVEX_B,
6881         Sched<[sched.Folded, sched.ReadAfterFold]>;
6882  }
6883}
6884
6885multiclass avx512_fma3_231_round<bits<8> opc, string OpcodeStr, SDNode OpNode,
6886                                 X86FoldableSchedWrite sched,
6887                                 X86VectorVTInfo _> {
6888  let Constraints = "$src1 = $dst", ExeDomain = _.ExeDomain, hasSideEffects = 0,
6889      Uses = [MXCSR] in
6890  defm rb: AVX512_maskable_fma<opc, MRMSrcReg, _, (outs _.RC:$dst),
6891          (ins _.RC:$src2, _.RC:$src3, AVX512RC:$rc),
6892          OpcodeStr, "$rc, $src3, $src2", "$src2, $src3, $rc",
6893          (null_frag),
6894          (_.VT (OpNode _.RC:$src2, _.RC:$src3, _.RC:$src1, (i32 timm:$rc))),
6895          1, 1>, EVEX_4V, EVEX_B, EVEX_RC, Sched<[sched]>;
6896}
6897
6898multiclass avx512_fma3p_231_common<bits<8> opc, string OpcodeStr, SDPatternOperator OpNode,
6899                                   SDNode MaskOpNode, SDNode OpNodeRnd,
6900                                   X86SchedWriteWidths sched,
6901                                   AVX512VLVectorVTInfo _,
6902                                   Predicate prd = HasAVX512> {
6903  let Predicates = [prd] in {
6904    defm Z      : avx512_fma3p_231_rm<opc, OpcodeStr, OpNode, MaskOpNode,
6905                                      sched.ZMM, _.info512>,
6906                  avx512_fma3_231_round<opc, OpcodeStr, OpNodeRnd, sched.ZMM,
6907                                        _.info512>,
6908                              EVEX_V512, EVEX_CD8<_.info512.EltSize, CD8VF>;
6909  }
6910  let Predicates = [HasVLX, prd] in {
6911    defm Z256 : avx512_fma3p_231_rm<opc, OpcodeStr, OpNode, MaskOpNode,
6912                                    sched.YMM, _.info256>,
6913                      EVEX_V256, EVEX_CD8<_.info256.EltSize, CD8VF>;
6914    defm Z128 : avx512_fma3p_231_rm<opc, OpcodeStr, OpNode, MaskOpNode,
6915                                    sched.XMM, _.info128>,
6916                      EVEX_V128, EVEX_CD8<_.info128.EltSize, CD8VF>;
6917  }
6918}
6919
6920multiclass avx512_fma3p_231_f<bits<8> opc, string OpcodeStr, SDPatternOperator OpNode,
6921                              SDNode MaskOpNode, SDNode OpNodeRnd > {
6922    defm PH : avx512_fma3p_231_common<opc, OpcodeStr#"ph", OpNode, MaskOpNode,
6923                                      OpNodeRnd, SchedWriteFMA,
6924                                      avx512vl_f16_info, HasFP16>, T_MAP6PD;
6925    defm PS : avx512_fma3p_231_common<opc, OpcodeStr#"ps", OpNode, MaskOpNode,
6926                                      OpNodeRnd, SchedWriteFMA,
6927                                      avx512vl_f32_info>, T8PD;
6928    defm PD : avx512_fma3p_231_common<opc, OpcodeStr#"pd", OpNode, MaskOpNode,
6929                                      OpNodeRnd, SchedWriteFMA,
6930                                      avx512vl_f64_info>, T8PD, VEX_W;
6931}
6932
6933defm VFMADD231    : avx512_fma3p_231_f<0xB8, "vfmadd231", any_fma,
6934                                       fma, X86FmaddRnd>;
6935defm VFMSUB231    : avx512_fma3p_231_f<0xBA, "vfmsub231", X86any_Fmsub,
6936                                       X86Fmsub, X86FmsubRnd>;
6937defm VFMADDSUB231 : avx512_fma3p_231_f<0xB6, "vfmaddsub231", X86Fmaddsub,
6938                                       X86Fmaddsub, X86FmaddsubRnd>;
6939defm VFMSUBADD231 : avx512_fma3p_231_f<0xB7, "vfmsubadd231", X86Fmsubadd,
6940                                       X86Fmsubadd, X86FmsubaddRnd>;
6941defm VFNMADD231   : avx512_fma3p_231_f<0xBC, "vfnmadd231", X86any_Fnmadd,
6942                                       X86Fnmadd, X86FnmaddRnd>;
6943defm VFNMSUB231   : avx512_fma3p_231_f<0xBE, "vfnmsub231", X86any_Fnmsub,
6944                                       X86Fnmsub, X86FnmsubRnd>;
6945
6946multiclass avx512_fma3p_132_rm<bits<8> opc, string OpcodeStr, SDPatternOperator OpNode,
6947                               SDNode MaskOpNode, X86FoldableSchedWrite sched,
6948                               X86VectorVTInfo _> {
6949  let Constraints = "$src1 = $dst", ExeDomain = _.ExeDomain, hasSideEffects = 0,
6950      Uses = [MXCSR], mayRaiseFPException = 1 in {
6951  defm r: AVX512_maskable_fma<opc, MRMSrcReg, _, (outs _.RC:$dst),
6952          (ins _.RC:$src2, _.RC:$src3),
6953          OpcodeStr, "$src3, $src2", "$src2, $src3",
6954          (null_frag),
6955          (_.VT (MaskOpNode _.RC:$src1, _.RC:$src3, _.RC:$src2)), 1, 1>,
6956          EVEX_4V, Sched<[sched]>;
6957
6958  // Pattern is 312 order so that the load is in a different place from the
6959  // 213 and 231 patterns this helps tablegen's duplicate pattern detection.
6960  defm m: AVX512_maskable_fma<opc, MRMSrcMem, _, (outs _.RC:$dst),
6961          (ins _.RC:$src2, _.MemOp:$src3),
6962          OpcodeStr, "$src3, $src2", "$src2, $src3",
6963          (_.VT (OpNode (_.LdFrag addr:$src3), _.RC:$src1, _.RC:$src2)),
6964          (_.VT (MaskOpNode (_.LdFrag addr:$src3), _.RC:$src1, _.RC:$src2)), 1, 0>,
6965          EVEX_4V, Sched<[sched.Folded, sched.ReadAfterFold]>;
6966
6967  // Pattern is 312 order so that the load is in a different place from the
6968  // 213 and 231 patterns this helps tablegen's duplicate pattern detection.
6969  defm mb: AVX512_maskable_fma<opc, MRMSrcMem, _, (outs _.RC:$dst),
6970         (ins _.RC:$src2, _.ScalarMemOp:$src3),
6971         OpcodeStr, "${src3}"#_.BroadcastStr#", $src2",
6972         "$src2, ${src3}"#_.BroadcastStr,
6973         (_.VT (OpNode (_.VT (_.BroadcastLdFrag addr:$src3)),
6974                       _.RC:$src1, _.RC:$src2)),
6975         (_.VT (MaskOpNode (_.VT (_.BroadcastLdFrag addr:$src3)),
6976                           _.RC:$src1, _.RC:$src2)), 1, 0>,
6977         EVEX_4V, EVEX_B, Sched<[sched.Folded, sched.ReadAfterFold]>;
6978  }
6979}
6980
6981multiclass avx512_fma3_132_round<bits<8> opc, string OpcodeStr, SDNode OpNode,
6982                                 X86FoldableSchedWrite sched,
6983                                 X86VectorVTInfo _> {
6984  let Constraints = "$src1 = $dst", ExeDomain = _.ExeDomain, hasSideEffects = 0,
6985      Uses = [MXCSR] in
6986  defm rb: AVX512_maskable_fma<opc, MRMSrcReg, _, (outs _.RC:$dst),
6987          (ins _.RC:$src2, _.RC:$src3, AVX512RC:$rc),
6988          OpcodeStr, "$rc, $src3, $src2", "$src2, $src3, $rc",
6989          (null_frag),
6990          (_.VT (OpNode _.RC:$src1, _.RC:$src3, _.RC:$src2, (i32 timm:$rc))),
6991          1, 1>, EVEX_4V, EVEX_B, EVEX_RC, Sched<[sched]>;
6992}
6993
6994multiclass avx512_fma3p_132_common<bits<8> opc, string OpcodeStr, SDPatternOperator OpNode,
6995                                   SDNode MaskOpNode, SDNode OpNodeRnd,
6996                                   X86SchedWriteWidths sched,
6997                                   AVX512VLVectorVTInfo _,
6998                                   Predicate prd = HasAVX512> {
6999  let Predicates = [prd] in {
7000    defm Z      : avx512_fma3p_132_rm<opc, OpcodeStr, OpNode, MaskOpNode,
7001                                      sched.ZMM, _.info512>,
7002                  avx512_fma3_132_round<opc, OpcodeStr, OpNodeRnd, sched.ZMM,
7003                                        _.info512>,
7004                              EVEX_V512, EVEX_CD8<_.info512.EltSize, CD8VF>;
7005  }
7006  let Predicates = [HasVLX, prd] in {
7007    defm Z256 : avx512_fma3p_132_rm<opc, OpcodeStr, OpNode, MaskOpNode,
7008                                    sched.YMM, _.info256>,
7009                      EVEX_V256, EVEX_CD8<_.info256.EltSize, CD8VF>;
7010    defm Z128 : avx512_fma3p_132_rm<opc, OpcodeStr, OpNode, MaskOpNode,
7011                                    sched.XMM, _.info128>,
7012                      EVEX_V128, EVEX_CD8<_.info128.EltSize, CD8VF>;
7013  }
7014}
7015
7016multiclass avx512_fma3p_132_f<bits<8> opc, string OpcodeStr, SDPatternOperator OpNode,
7017                              SDNode MaskOpNode, SDNode OpNodeRnd > {
7018    defm PH : avx512_fma3p_132_common<opc, OpcodeStr#"ph", OpNode, MaskOpNode,
7019                                      OpNodeRnd, SchedWriteFMA,
7020                                      avx512vl_f16_info, HasFP16>, T_MAP6PD;
7021    defm PS : avx512_fma3p_132_common<opc, OpcodeStr#"ps", OpNode, MaskOpNode,
7022                                      OpNodeRnd, SchedWriteFMA,
7023                                      avx512vl_f32_info>, T8PD;
7024    defm PD : avx512_fma3p_132_common<opc, OpcodeStr#"pd", OpNode, MaskOpNode,
7025                                      OpNodeRnd, SchedWriteFMA,
7026                                      avx512vl_f64_info>, T8PD, VEX_W;
7027}
7028
7029defm VFMADD132    : avx512_fma3p_132_f<0x98, "vfmadd132", any_fma,
7030                                       fma, X86FmaddRnd>;
7031defm VFMSUB132    : avx512_fma3p_132_f<0x9A, "vfmsub132", X86any_Fmsub,
7032                                       X86Fmsub, X86FmsubRnd>;
7033defm VFMADDSUB132 : avx512_fma3p_132_f<0x96, "vfmaddsub132", X86Fmaddsub,
7034                                       X86Fmaddsub, X86FmaddsubRnd>;
7035defm VFMSUBADD132 : avx512_fma3p_132_f<0x97, "vfmsubadd132", X86Fmsubadd,
7036                                       X86Fmsubadd, X86FmsubaddRnd>;
7037defm VFNMADD132   : avx512_fma3p_132_f<0x9C, "vfnmadd132", X86any_Fnmadd,
7038                                       X86Fnmadd, X86FnmaddRnd>;
7039defm VFNMSUB132   : avx512_fma3p_132_f<0x9E, "vfnmsub132", X86any_Fnmsub,
7040                                       X86Fnmsub, X86FnmsubRnd>;
7041
7042// Scalar FMA
7043multiclass avx512_fma3s_common<bits<8> opc, string OpcodeStr, X86VectorVTInfo _,
7044                               dag RHS_r, dag RHS_m, dag RHS_b, bit MaskOnlyReg> {
7045let Constraints = "$src1 = $dst", hasSideEffects = 0 in {
7046  defm r_Int: AVX512_maskable_3src_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
7047          (ins _.RC:$src2, _.RC:$src3), OpcodeStr,
7048          "$src3, $src2", "$src2, $src3", (null_frag), 1, 1>,
7049          EVEX_4V, Sched<[SchedWriteFMA.Scl]>, SIMD_EXC;
7050
7051  let mayLoad = 1 in
7052  defm m_Int: AVX512_maskable_3src_scalar<opc, MRMSrcMem, _, (outs _.RC:$dst),
7053          (ins _.RC:$src2, _.IntScalarMemOp:$src3), OpcodeStr,
7054          "$src3, $src2", "$src2, $src3", (null_frag), 1, 1>,
7055          EVEX_4V, Sched<[SchedWriteFMA.Scl.Folded, SchedWriteFMA.Scl.ReadAfterFold]>, SIMD_EXC;
7056
7057  let Uses = [MXCSR] in
7058  defm rb_Int: AVX512_maskable_3src_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
7059         (ins _.RC:$src2, _.RC:$src3, AVX512RC:$rc),
7060         OpcodeStr, "$rc, $src3, $src2", "$src2, $src3, $rc", (null_frag), 1, 1>,
7061         EVEX_4V, EVEX_B, EVEX_RC, Sched<[SchedWriteFMA.Scl]>;
7062
7063  let isCodeGenOnly = 1, isCommutable = 1 in {
7064    def r     : AVX512<opc, MRMSrcReg, (outs _.FRC:$dst),
7065                     (ins _.FRC:$src1, _.FRC:$src2, _.FRC:$src3),
7066                     !strconcat(OpcodeStr,
7067                              "\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
7068                     !if(MaskOnlyReg, [], [RHS_r])>, Sched<[SchedWriteFMA.Scl]>, EVEX_4V, SIMD_EXC;
7069    def m     : AVX512<opc, MRMSrcMem, (outs _.FRC:$dst),
7070                    (ins _.FRC:$src1, _.FRC:$src2, _.ScalarMemOp:$src3),
7071                    !strconcat(OpcodeStr,
7072                               "\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
7073                    [RHS_m]>, Sched<[SchedWriteFMA.Scl.Folded, SchedWriteFMA.Scl.ReadAfterFold]>, EVEX_4V, SIMD_EXC;
7074
7075    let Uses = [MXCSR] in
7076    def rb    : AVX512<opc, MRMSrcReg, (outs _.FRC:$dst),
7077                     (ins _.FRC:$src1, _.FRC:$src2, _.FRC:$src3, AVX512RC:$rc),
7078                     !strconcat(OpcodeStr,
7079                              "\t{$rc, $src3, $src2, $dst|$dst, $src2, $src3, $rc}"),
7080                     !if(MaskOnlyReg, [], [RHS_b])>, EVEX_B, EVEX_RC,
7081                     Sched<[SchedWriteFMA.Scl]>, EVEX_4V;
7082  }// isCodeGenOnly = 1
7083}// Constraints = "$src1 = $dst"
7084}
7085
7086multiclass avx512_fma3s_all<bits<8> opc213, bits<8> opc231, bits<8> opc132,
7087                            string OpcodeStr, SDPatternOperator OpNode, SDNode OpNodeRnd,
7088                            X86VectorVTInfo _, string SUFF> {
7089  let ExeDomain = _.ExeDomain in {
7090  defm NAME#213#SUFF#Z: avx512_fma3s_common<opc213, OpcodeStr#"213"#_.Suffix, _,
7091                // Operands for intrinsic are in 123 order to preserve passthu
7092                // semantics.
7093                (set _.FRC:$dst, (_.EltVT (OpNode _.FRC:$src2, _.FRC:$src1,
7094                         _.FRC:$src3))),
7095                (set _.FRC:$dst, (_.EltVT (OpNode _.FRC:$src2, _.FRC:$src1,
7096                         (_.ScalarLdFrag addr:$src3)))),
7097                (set _.FRC:$dst, (_.EltVT (OpNodeRnd _.FRC:$src2, _.FRC:$src1,
7098                         _.FRC:$src3, (i32 timm:$rc)))), 0>;
7099
7100  defm NAME#231#SUFF#Z: avx512_fma3s_common<opc231, OpcodeStr#"231"#_.Suffix, _,
7101                (set _.FRC:$dst, (_.EltVT (OpNode _.FRC:$src2, _.FRC:$src3,
7102                                          _.FRC:$src1))),
7103                (set _.FRC:$dst, (_.EltVT (OpNode _.FRC:$src2,
7104                            (_.ScalarLdFrag addr:$src3), _.FRC:$src1))),
7105                (set _.FRC:$dst, (_.EltVT (OpNodeRnd _.FRC:$src2, _.FRC:$src3,
7106                         _.FRC:$src1, (i32 timm:$rc)))), 1>;
7107
7108  // One pattern is 312 order so that the load is in a different place from the
7109  // 213 and 231 patterns this helps tablegen's duplicate pattern detection.
7110  defm NAME#132#SUFF#Z: avx512_fma3s_common<opc132, OpcodeStr#"132"#_.Suffix, _,
7111                (set _.FRC:$dst, (_.EltVT (OpNode _.FRC:$src1, _.FRC:$src3,
7112                         _.FRC:$src2))),
7113                (set _.FRC:$dst, (_.EltVT (OpNode (_.ScalarLdFrag addr:$src3),
7114                                 _.FRC:$src1, _.FRC:$src2))),
7115                (set _.FRC:$dst, (_.EltVT (OpNodeRnd _.FRC:$src1, _.FRC:$src3,
7116                         _.FRC:$src2, (i32 timm:$rc)))), 1>;
7117  }
7118}
7119
7120multiclass avx512_fma3s<bits<8> opc213, bits<8> opc231, bits<8> opc132,
7121                        string OpcodeStr, SDPatternOperator OpNode, SDNode OpNodeRnd> {
7122  let Predicates = [HasAVX512] in {
7123    defm NAME : avx512_fma3s_all<opc213, opc231, opc132, OpcodeStr, OpNode,
7124                                 OpNodeRnd, f32x_info, "SS">,
7125                                 EVEX_CD8<32, CD8VT1>, VEX_LIG, T8PD;
7126    defm NAME : avx512_fma3s_all<opc213, opc231, opc132, OpcodeStr, OpNode,
7127                                 OpNodeRnd, f64x_info, "SD">,
7128                                 EVEX_CD8<64, CD8VT1>, VEX_LIG, VEX_W, T8PD;
7129  }
7130  let Predicates = [HasFP16] in {
7131    defm NAME : avx512_fma3s_all<opc213, opc231, opc132, OpcodeStr, OpNode,
7132                                 OpNodeRnd, f16x_info, "SH">,
7133                                 EVEX_CD8<16, CD8VT1>, VEX_LIG, T_MAP6PD;
7134  }
7135}
7136
7137defm VFMADD  : avx512_fma3s<0xA9, 0xB9, 0x99, "vfmadd", any_fma, X86FmaddRnd>;
7138defm VFMSUB  : avx512_fma3s<0xAB, 0xBB, 0x9B, "vfmsub", X86any_Fmsub, X86FmsubRnd>;
7139defm VFNMADD : avx512_fma3s<0xAD, 0xBD, 0x9D, "vfnmadd", X86any_Fnmadd, X86FnmaddRnd>;
7140defm VFNMSUB : avx512_fma3s<0xAF, 0xBF, 0x9F, "vfnmsub", X86any_Fnmsub, X86FnmsubRnd>;
7141
7142multiclass avx512_scalar_fma_patterns<SDPatternOperator Op, SDNode MaskedOp,
7143                                      SDNode RndOp, string Prefix,
7144                                      string Suffix, SDNode Move,
7145                                      X86VectorVTInfo _, PatLeaf ZeroFP,
7146                                      Predicate prd = HasAVX512> {
7147  let Predicates = [prd] in {
7148    def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector
7149                (Op _.FRC:$src2,
7150                    (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0))),
7151                    _.FRC:$src3))))),
7152              (!cast<I>(Prefix#"213"#Suffix#"Zr_Int")
7153               VR128X:$src1, (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)),
7154               (_.VT (COPY_TO_REGCLASS _.FRC:$src3, VR128X)))>;
7155
7156    def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector
7157                (Op _.FRC:$src2, _.FRC:$src3,
7158                    (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0)))))))),
7159              (!cast<I>(Prefix#"231"#Suffix#"Zr_Int")
7160               VR128X:$src1, (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)),
7161               (_.VT (COPY_TO_REGCLASS _.FRC:$src3, VR128X)))>;
7162
7163    def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector
7164                (Op _.FRC:$src2,
7165                    (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0))),
7166                    (_.ScalarLdFrag addr:$src3)))))),
7167              (!cast<I>(Prefix#"213"#Suffix#"Zm_Int")
7168               VR128X:$src1, (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)),
7169               addr:$src3)>;
7170
7171    def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector
7172                (Op (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0))),
7173                    (_.ScalarLdFrag addr:$src3), _.FRC:$src2))))),
7174              (!cast<I>(Prefix#"132"#Suffix#"Zm_Int")
7175               VR128X:$src1, (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)),
7176               addr:$src3)>;
7177
7178    def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector
7179                (Op _.FRC:$src2, (_.ScalarLdFrag addr:$src3),
7180                    (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0)))))))),
7181              (!cast<I>(Prefix#"231"#Suffix#"Zm_Int")
7182               VR128X:$src1, (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)),
7183               addr:$src3)>;
7184
7185    def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector
7186               (X86selects_mask VK1WM:$mask,
7187                (MaskedOp _.FRC:$src2,
7188                    (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0))),
7189                    _.FRC:$src3),
7190                (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0)))))))),
7191              (!cast<I>(Prefix#"213"#Suffix#"Zr_Intk")
7192               VR128X:$src1, VK1WM:$mask,
7193               (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)),
7194               (_.VT (COPY_TO_REGCLASS _.FRC:$src3, VR128X)))>;
7195
7196    def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector
7197               (X86selects_mask VK1WM:$mask,
7198                (MaskedOp _.FRC:$src2,
7199                    (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0))),
7200                    (_.ScalarLdFrag addr:$src3)),
7201                (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0)))))))),
7202              (!cast<I>(Prefix#"213"#Suffix#"Zm_Intk")
7203               VR128X:$src1, VK1WM:$mask,
7204               (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)), addr:$src3)>;
7205
7206    def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector
7207               (X86selects_mask VK1WM:$mask,
7208                (MaskedOp (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0))),
7209                          (_.ScalarLdFrag addr:$src3), _.FRC:$src2),
7210                (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0)))))))),
7211              (!cast<I>(Prefix#"132"#Suffix#"Zm_Intk")
7212               VR128X:$src1, VK1WM:$mask,
7213               (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)), addr:$src3)>;
7214
7215    def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector
7216               (X86selects_mask VK1WM:$mask,
7217                (MaskedOp _.FRC:$src2, _.FRC:$src3,
7218                          (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0)))),
7219                (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0)))))))),
7220              (!cast<I>(Prefix#"231"#Suffix#"Zr_Intk")
7221               VR128X:$src1, VK1WM:$mask,
7222               (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)),
7223               (_.VT (COPY_TO_REGCLASS _.FRC:$src3, VR128X)))>;
7224
7225    def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector
7226               (X86selects_mask VK1WM:$mask,
7227                (MaskedOp _.FRC:$src2, (_.ScalarLdFrag addr:$src3),
7228                          (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0)))),
7229                (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0)))))))),
7230              (!cast<I>(Prefix#"231"#Suffix#"Zm_Intk")
7231               VR128X:$src1, VK1WM:$mask,
7232               (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)), addr:$src3)>;
7233
7234    def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector
7235               (X86selects_mask VK1WM:$mask,
7236                (MaskedOp _.FRC:$src2,
7237                          (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0))),
7238                          _.FRC:$src3),
7239                (_.EltVT ZeroFP)))))),
7240              (!cast<I>(Prefix#"213"#Suffix#"Zr_Intkz")
7241               VR128X:$src1, VK1WM:$mask,
7242               (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)),
7243               (_.VT (COPY_TO_REGCLASS _.FRC:$src3, VR128X)))>;
7244
7245    def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector
7246               (X86selects_mask VK1WM:$mask,
7247                (MaskedOp _.FRC:$src2, _.FRC:$src3,
7248                          (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0)))),
7249                (_.EltVT ZeroFP)))))),
7250              (!cast<I>(Prefix#"231"#Suffix#"Zr_Intkz")
7251               VR128X:$src1, VK1WM:$mask,
7252               (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)),
7253               (_.VT (COPY_TO_REGCLASS _.FRC:$src3, VR128X)))>;
7254
7255    def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector
7256               (X86selects_mask VK1WM:$mask,
7257                (MaskedOp _.FRC:$src2,
7258                          (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0))),
7259                          (_.ScalarLdFrag addr:$src3)),
7260                (_.EltVT ZeroFP)))))),
7261              (!cast<I>(Prefix#"213"#Suffix#"Zm_Intkz")
7262               VR128X:$src1, VK1WM:$mask,
7263               (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)), addr:$src3)>;
7264
7265    def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector
7266               (X86selects_mask VK1WM:$mask,
7267                (MaskedOp (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0))),
7268                          _.FRC:$src2, (_.ScalarLdFrag addr:$src3)),
7269                (_.EltVT ZeroFP)))))),
7270              (!cast<I>(Prefix#"132"#Suffix#"Zm_Intkz")
7271               VR128X:$src1, VK1WM:$mask,
7272               (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)), addr:$src3)>;
7273
7274    def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector
7275               (X86selects_mask VK1WM:$mask,
7276                (MaskedOp _.FRC:$src2, (_.ScalarLdFrag addr:$src3),
7277                          (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0)))),
7278                (_.EltVT ZeroFP)))))),
7279              (!cast<I>(Prefix#"231"#Suffix#"Zm_Intkz")
7280               VR128X:$src1, VK1WM:$mask,
7281               (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)), addr:$src3)>;
7282
7283    // Patterns with rounding mode.
7284    def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector
7285                (RndOp _.FRC:$src2,
7286                       (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0))),
7287                       _.FRC:$src3, (i32 timm:$rc)))))),
7288              (!cast<I>(Prefix#"213"#Suffix#"Zrb_Int")
7289               VR128X:$src1, (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)),
7290               (_.VT (COPY_TO_REGCLASS _.FRC:$src3, VR128X)), AVX512RC:$rc)>;
7291
7292    def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector
7293                (RndOp _.FRC:$src2, _.FRC:$src3,
7294                       (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0))),
7295                       (i32 timm:$rc)))))),
7296              (!cast<I>(Prefix#"231"#Suffix#"Zrb_Int")
7297               VR128X:$src1, (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)),
7298               (_.VT (COPY_TO_REGCLASS _.FRC:$src3, VR128X)), AVX512RC:$rc)>;
7299
7300    def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector
7301               (X86selects_mask VK1WM:$mask,
7302                (RndOp _.FRC:$src2,
7303                       (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0))),
7304                       _.FRC:$src3, (i32 timm:$rc)),
7305                (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0)))))))),
7306              (!cast<I>(Prefix#"213"#Suffix#"Zrb_Intk")
7307               VR128X:$src1, VK1WM:$mask,
7308               (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)),
7309               (_.VT (COPY_TO_REGCLASS _.FRC:$src3, VR128X)), AVX512RC:$rc)>;
7310
7311    def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector
7312               (X86selects_mask VK1WM:$mask,
7313                (RndOp _.FRC:$src2, _.FRC:$src3,
7314                       (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0))),
7315                       (i32 timm:$rc)),
7316                (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0)))))))),
7317              (!cast<I>(Prefix#"231"#Suffix#"Zrb_Intk")
7318               VR128X:$src1, VK1WM:$mask,
7319               (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)),
7320               (_.VT (COPY_TO_REGCLASS _.FRC:$src3, VR128X)), AVX512RC:$rc)>;
7321
7322    def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector
7323               (X86selects_mask VK1WM:$mask,
7324                (RndOp _.FRC:$src2,
7325                       (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0))),
7326                       _.FRC:$src3, (i32 timm:$rc)),
7327                (_.EltVT ZeroFP)))))),
7328              (!cast<I>(Prefix#"213"#Suffix#"Zrb_Intkz")
7329               VR128X:$src1, VK1WM:$mask,
7330               (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)),
7331               (_.VT (COPY_TO_REGCLASS _.FRC:$src3, VR128X)), AVX512RC:$rc)>;
7332
7333    def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector
7334               (X86selects_mask VK1WM:$mask,
7335                (RndOp _.FRC:$src2, _.FRC:$src3,
7336                       (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0))),
7337                       (i32 timm:$rc)),
7338                (_.EltVT ZeroFP)))))),
7339              (!cast<I>(Prefix#"231"#Suffix#"Zrb_Intkz")
7340               VR128X:$src1, VK1WM:$mask,
7341               (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)),
7342               (_.VT (COPY_TO_REGCLASS _.FRC:$src3, VR128X)), AVX512RC:$rc)>;
7343  }
7344}
7345defm : avx512_scalar_fma_patterns<any_fma, fma, X86FmaddRnd, "VFMADD", "SH",
7346                                  X86Movsh, v8f16x_info, fp16imm0, HasFP16>;
7347defm : avx512_scalar_fma_patterns<X86any_Fmsub, X86Fmsub, X86FmsubRnd, "VFMSUB", "SH",
7348                                  X86Movsh, v8f16x_info, fp16imm0, HasFP16>;
7349defm : avx512_scalar_fma_patterns<X86any_Fnmadd, X86Fnmadd, X86FnmaddRnd, "VFNMADD", "SH",
7350                                  X86Movsh, v8f16x_info, fp16imm0, HasFP16>;
7351defm : avx512_scalar_fma_patterns<X86any_Fnmsub, X86Fnmsub, X86FnmsubRnd, "VFNMSUB", "SH",
7352                                  X86Movsh, v8f16x_info, fp16imm0, HasFP16>;
7353
7354defm : avx512_scalar_fma_patterns<any_fma, fma, X86FmaddRnd, "VFMADD",
7355                                  "SS", X86Movss, v4f32x_info, fp32imm0>;
7356defm : avx512_scalar_fma_patterns<X86any_Fmsub, X86Fmsub, X86FmsubRnd, "VFMSUB",
7357                                  "SS", X86Movss, v4f32x_info, fp32imm0>;
7358defm : avx512_scalar_fma_patterns<X86any_Fnmadd, X86Fnmadd, X86FnmaddRnd, "VFNMADD",
7359                                  "SS", X86Movss, v4f32x_info, fp32imm0>;
7360defm : avx512_scalar_fma_patterns<X86any_Fnmsub, X86Fnmsub, X86FnmsubRnd, "VFNMSUB",
7361                                  "SS", X86Movss, v4f32x_info, fp32imm0>;
7362
7363defm : avx512_scalar_fma_patterns<any_fma, fma, X86FmaddRnd, "VFMADD",
7364                                  "SD", X86Movsd, v2f64x_info, fp64imm0>;
7365defm : avx512_scalar_fma_patterns<X86any_Fmsub, X86Fmsub, X86FmsubRnd, "VFMSUB",
7366                                  "SD", X86Movsd, v2f64x_info, fp64imm0>;
7367defm : avx512_scalar_fma_patterns<X86any_Fnmadd, X86Fnmadd, X86FnmaddRnd, "VFNMADD",
7368                                  "SD", X86Movsd, v2f64x_info, fp64imm0>;
7369defm : avx512_scalar_fma_patterns<X86any_Fnmsub, X86Fnmsub, X86FnmsubRnd, "VFNMSUB",
7370                                  "SD", X86Movsd, v2f64x_info, fp64imm0>;
7371
7372//===----------------------------------------------------------------------===//
7373// AVX-512  Packed Multiply of Unsigned 52-bit Integers and Add the Low 52-bit IFMA
7374//===----------------------------------------------------------------------===//
7375let Constraints = "$src1 = $dst" in {
7376multiclass avx512_pmadd52_rm<bits<8> opc, string OpcodeStr, SDNode OpNode,
7377                             X86FoldableSchedWrite sched, X86VectorVTInfo _> {
7378  // NOTE: The SDNode have the multiply operands first with the add last.
7379  // This enables commuted load patterns to be autogenerated by tablegen.
7380  let ExeDomain = _.ExeDomain in {
7381  defm r: AVX512_maskable_3src<opc, MRMSrcReg, _, (outs _.RC:$dst),
7382          (ins _.RC:$src2, _.RC:$src3),
7383          OpcodeStr, "$src3, $src2", "$src2, $src3",
7384          (_.VT (OpNode _.RC:$src2, _.RC:$src3, _.RC:$src1)), 1, 1>,
7385          T8PD, EVEX_4V, Sched<[sched]>;
7386
7387  defm m: AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst),
7388          (ins _.RC:$src2, _.MemOp:$src3),
7389          OpcodeStr, "$src3, $src2", "$src2, $src3",
7390          (_.VT (OpNode _.RC:$src2, (_.LdFrag addr:$src3), _.RC:$src1))>,
7391          T8PD, EVEX_4V, Sched<[sched.Folded, sched.ReadAfterFold]>;
7392
7393  defm mb: AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst),
7394            (ins _.RC:$src2, _.ScalarMemOp:$src3),
7395            OpcodeStr,   !strconcat("${src3}", _.BroadcastStr,", $src2"),
7396            !strconcat("$src2, ${src3}", _.BroadcastStr ),
7397            (OpNode _.RC:$src2,
7398                    (_.VT (_.BroadcastLdFrag addr:$src3)),
7399                    _.RC:$src1)>,
7400            T8PD, EVEX_4V, EVEX_B, Sched<[sched.Folded, sched.ReadAfterFold]>;
7401  }
7402}
7403} // Constraints = "$src1 = $dst"
7404
7405multiclass avx512_pmadd52_common<bits<8> opc, string OpcodeStr, SDNode OpNode,
7406                                 X86SchedWriteWidths sched, AVX512VLVectorVTInfo _> {
7407  let Predicates = [HasIFMA] in {
7408    defm Z      : avx512_pmadd52_rm<opc, OpcodeStr, OpNode, sched.ZMM, _.info512>,
7409                      EVEX_V512, EVEX_CD8<_.info512.EltSize, CD8VF>;
7410  }
7411  let Predicates = [HasVLX, HasIFMA] in {
7412    defm Z256 : avx512_pmadd52_rm<opc, OpcodeStr, OpNode, sched.YMM, _.info256>,
7413                      EVEX_V256, EVEX_CD8<_.info256.EltSize, CD8VF>;
7414    defm Z128 : avx512_pmadd52_rm<opc, OpcodeStr, OpNode, sched.XMM, _.info128>,
7415                      EVEX_V128, EVEX_CD8<_.info128.EltSize, CD8VF>;
7416  }
7417}
7418
7419defm VPMADD52LUQ : avx512_pmadd52_common<0xb4, "vpmadd52luq", x86vpmadd52l,
7420                                         SchedWriteVecIMul, avx512vl_i64_info>,
7421                                         VEX_W;
7422defm VPMADD52HUQ : avx512_pmadd52_common<0xb5, "vpmadd52huq", x86vpmadd52h,
7423                                         SchedWriteVecIMul, avx512vl_i64_info>,
7424                                         VEX_W;
7425
7426//===----------------------------------------------------------------------===//
7427// AVX-512  Scalar convert from sign integer to float/double
7428//===----------------------------------------------------------------------===//
7429
7430multiclass avx512_vcvtsi<bits<8> opc, SDPatternOperator OpNode, X86FoldableSchedWrite sched,
7431                    RegisterClass SrcRC, X86VectorVTInfo DstVT,
7432                    X86MemOperand x86memop, PatFrag ld_frag, string asm,
7433                    string mem, list<Register> _Uses = [MXCSR],
7434                    bit _mayRaiseFPException = 1> {
7435let ExeDomain = DstVT.ExeDomain, Uses = _Uses,
7436    mayRaiseFPException = _mayRaiseFPException in {
7437  let hasSideEffects = 0, isCodeGenOnly = 1 in {
7438    def rr : SI<opc, MRMSrcReg, (outs DstVT.FRC:$dst),
7439              (ins DstVT.FRC:$src1, SrcRC:$src),
7440              !strconcat(asm,"\t{$src, $src1, $dst|$dst, $src1, $src}"), []>,
7441              EVEX_4V, Sched<[sched, ReadDefault, ReadInt2Fpu]>;
7442    let mayLoad = 1 in
7443      def rm : SI<opc, MRMSrcMem, (outs DstVT.FRC:$dst),
7444              (ins DstVT.FRC:$src1, x86memop:$src),
7445              asm#"{"#mem#"}\t{$src, $src1, $dst|$dst, $src1, $src}", []>,
7446              EVEX_4V, Sched<[sched.Folded, sched.ReadAfterFold]>;
7447  } // hasSideEffects = 0
7448  def rr_Int : SI<opc, MRMSrcReg, (outs DstVT.RC:$dst),
7449                (ins DstVT.RC:$src1, SrcRC:$src2),
7450                !strconcat(asm,"\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
7451                [(set DstVT.RC:$dst,
7452                      (OpNode (DstVT.VT DstVT.RC:$src1), SrcRC:$src2))]>,
7453               EVEX_4V, Sched<[sched, ReadDefault, ReadInt2Fpu]>;
7454
7455  def rm_Int : SI<opc, MRMSrcMem, (outs DstVT.RC:$dst),
7456                (ins DstVT.RC:$src1, x86memop:$src2),
7457                asm#"{"#mem#"}\t{$src2, $src1, $dst|$dst, $src1, $src2}",
7458                [(set DstVT.RC:$dst,
7459                      (OpNode (DstVT.VT DstVT.RC:$src1),
7460                               (ld_frag addr:$src2)))]>,
7461                EVEX_4V, Sched<[sched.Folded, sched.ReadAfterFold]>;
7462}
7463  def : InstAlias<"v"#asm#mem#"\t{$src2, $src1, $dst|$dst, $src1, $src2}",
7464                  (!cast<Instruction>(NAME#"rr_Int") DstVT.RC:$dst,
7465                  DstVT.RC:$src1, SrcRC:$src2), 0, "att">;
7466}
7467
7468multiclass avx512_vcvtsi_round<bits<8> opc, SDNode OpNode,
7469                               X86FoldableSchedWrite sched, RegisterClass SrcRC,
7470                               X86VectorVTInfo DstVT, string asm,
7471                               string mem> {
7472  let ExeDomain = DstVT.ExeDomain, Uses = [MXCSR] in
7473  def rrb_Int : SI<opc, MRMSrcReg, (outs DstVT.RC:$dst),
7474              (ins DstVT.RC:$src1, SrcRC:$src2, AVX512RC:$rc),
7475              !strconcat(asm,
7476                  "\t{$src2, $rc, $src1, $dst|$dst, $src1, $rc, $src2}"),
7477              [(set DstVT.RC:$dst,
7478                    (OpNode (DstVT.VT DstVT.RC:$src1),
7479                             SrcRC:$src2,
7480                             (i32 timm:$rc)))]>,
7481              EVEX_4V, EVEX_B, EVEX_RC, Sched<[sched, ReadDefault, ReadInt2Fpu]>;
7482  def : InstAlias<"v"#asm#mem#"\t{$src2, $rc, $src1, $dst|$dst, $src1, $rc, $src2}",
7483                  (!cast<Instruction>(NAME#"rrb_Int") DstVT.RC:$dst,
7484                  DstVT.RC:$src1, SrcRC:$src2, AVX512RC:$rc), 0, "att">;
7485}
7486
7487multiclass avx512_vcvtsi_common<bits<8> opc, SDNode OpNode, SDNode OpNodeRnd,
7488                                X86FoldableSchedWrite sched,
7489                                RegisterClass SrcRC, X86VectorVTInfo DstVT,
7490                                X86MemOperand x86memop, PatFrag ld_frag,
7491                                string asm, string mem> {
7492  defm NAME : avx512_vcvtsi_round<opc, OpNodeRnd, sched, SrcRC, DstVT, asm, mem>,
7493              avx512_vcvtsi<opc, OpNode, sched, SrcRC, DstVT, x86memop,
7494                            ld_frag, asm, mem>, VEX_LIG;
7495}
7496
7497let Predicates = [HasAVX512] in {
7498defm VCVTSI2SSZ  : avx512_vcvtsi_common<0x2A, X86SintToFp, X86SintToFpRnd,
7499                                 WriteCvtI2SS, GR32,
7500                                 v4f32x_info, i32mem, loadi32, "cvtsi2ss", "l">,
7501                                 XS, EVEX_CD8<32, CD8VT1>;
7502defm VCVTSI642SSZ: avx512_vcvtsi_common<0x2A, X86SintToFp, X86SintToFpRnd,
7503                                 WriteCvtI2SS, GR64,
7504                                 v4f32x_info, i64mem, loadi64, "cvtsi2ss", "q">,
7505                                 XS, VEX_W, EVEX_CD8<64, CD8VT1>;
7506defm VCVTSI2SDZ  : avx512_vcvtsi<0x2A, null_frag, WriteCvtI2SD, GR32,
7507                                 v2f64x_info, i32mem, loadi32, "cvtsi2sd", "l", [], 0>,
7508                                 XD, VEX_LIG, EVEX_CD8<32, CD8VT1>;
7509defm VCVTSI642SDZ: avx512_vcvtsi_common<0x2A, X86SintToFp, X86SintToFpRnd,
7510                                 WriteCvtI2SD, GR64,
7511                                 v2f64x_info, i64mem, loadi64, "cvtsi2sd", "q">,
7512                                 XD, VEX_W, EVEX_CD8<64, CD8VT1>;
7513
7514def : InstAlias<"vcvtsi2ss\t{$src, $src1, $dst|$dst, $src1, $src}",
7515              (VCVTSI2SSZrm_Int VR128X:$dst, VR128X:$src1, i32mem:$src), 0, "att">;
7516def : InstAlias<"vcvtsi2sd\t{$src, $src1, $dst|$dst, $src1, $src}",
7517              (VCVTSI2SDZrm_Int VR128X:$dst, VR128X:$src1, i32mem:$src), 0, "att">;
7518
7519def : Pat<(f32 (any_sint_to_fp (loadi32 addr:$src))),
7520          (VCVTSI2SSZrm (f32 (IMPLICIT_DEF)), addr:$src)>;
7521def : Pat<(f32 (any_sint_to_fp (loadi64 addr:$src))),
7522          (VCVTSI642SSZrm (f32 (IMPLICIT_DEF)), addr:$src)>;
7523def : Pat<(f64 (any_sint_to_fp (loadi32 addr:$src))),
7524          (VCVTSI2SDZrm (f64 (IMPLICIT_DEF)), addr:$src)>;
7525def : Pat<(f64 (any_sint_to_fp (loadi64 addr:$src))),
7526          (VCVTSI642SDZrm (f64 (IMPLICIT_DEF)), addr:$src)>;
7527
7528def : Pat<(f32 (any_sint_to_fp GR32:$src)),
7529          (VCVTSI2SSZrr (f32 (IMPLICIT_DEF)), GR32:$src)>;
7530def : Pat<(f32 (any_sint_to_fp GR64:$src)),
7531          (VCVTSI642SSZrr (f32 (IMPLICIT_DEF)), GR64:$src)>;
7532def : Pat<(f64 (any_sint_to_fp GR32:$src)),
7533          (VCVTSI2SDZrr (f64 (IMPLICIT_DEF)), GR32:$src)>;
7534def : Pat<(f64 (any_sint_to_fp GR64:$src)),
7535          (VCVTSI642SDZrr (f64 (IMPLICIT_DEF)), GR64:$src)>;
7536
7537defm VCVTUSI2SSZ   : avx512_vcvtsi_common<0x7B, X86UintToFp, X86UintToFpRnd,
7538                                  WriteCvtI2SS, GR32,
7539                                  v4f32x_info, i32mem, loadi32,
7540                                  "cvtusi2ss", "l">, XS, EVEX_CD8<32, CD8VT1>;
7541defm VCVTUSI642SSZ : avx512_vcvtsi_common<0x7B, X86UintToFp, X86UintToFpRnd,
7542                                  WriteCvtI2SS, GR64,
7543                                  v4f32x_info, i64mem, loadi64, "cvtusi2ss", "q">,
7544                                  XS, VEX_W, EVEX_CD8<64, CD8VT1>;
7545defm VCVTUSI2SDZ   : avx512_vcvtsi<0x7B, null_frag, WriteCvtI2SD, GR32, v2f64x_info,
7546                                  i32mem, loadi32, "cvtusi2sd", "l", [], 0>,
7547                                  XD, VEX_LIG, EVEX_CD8<32, CD8VT1>;
7548defm VCVTUSI642SDZ : avx512_vcvtsi_common<0x7B, X86UintToFp, X86UintToFpRnd,
7549                                  WriteCvtI2SD, GR64,
7550                                  v2f64x_info, i64mem, loadi64, "cvtusi2sd", "q">,
7551                                  XD, VEX_W, EVEX_CD8<64, CD8VT1>;
7552
7553def : InstAlias<"vcvtusi2ss\t{$src, $src1, $dst|$dst, $src1, $src}",
7554              (VCVTUSI2SSZrm_Int VR128X:$dst, VR128X:$src1, i32mem:$src), 0, "att">;
7555def : InstAlias<"vcvtusi2sd\t{$src, $src1, $dst|$dst, $src1, $src}",
7556              (VCVTUSI2SDZrm_Int VR128X:$dst, VR128X:$src1, i32mem:$src), 0, "att">;
7557
7558def : Pat<(f32 (any_uint_to_fp (loadi32 addr:$src))),
7559          (VCVTUSI2SSZrm (f32 (IMPLICIT_DEF)), addr:$src)>;
7560def : Pat<(f32 (any_uint_to_fp (loadi64 addr:$src))),
7561          (VCVTUSI642SSZrm (f32 (IMPLICIT_DEF)), addr:$src)>;
7562def : Pat<(f64 (any_uint_to_fp (loadi32 addr:$src))),
7563          (VCVTUSI2SDZrm (f64 (IMPLICIT_DEF)), addr:$src)>;
7564def : Pat<(f64 (any_uint_to_fp (loadi64 addr:$src))),
7565          (VCVTUSI642SDZrm (f64 (IMPLICIT_DEF)), addr:$src)>;
7566
7567def : Pat<(f32 (any_uint_to_fp GR32:$src)),
7568          (VCVTUSI2SSZrr (f32 (IMPLICIT_DEF)), GR32:$src)>;
7569def : Pat<(f32 (any_uint_to_fp GR64:$src)),
7570          (VCVTUSI642SSZrr (f32 (IMPLICIT_DEF)), GR64:$src)>;
7571def : Pat<(f64 (any_uint_to_fp GR32:$src)),
7572          (VCVTUSI2SDZrr (f64 (IMPLICIT_DEF)), GR32:$src)>;
7573def : Pat<(f64 (any_uint_to_fp GR64:$src)),
7574          (VCVTUSI642SDZrr (f64 (IMPLICIT_DEF)), GR64:$src)>;
7575}
7576
7577//===----------------------------------------------------------------------===//
7578// AVX-512  Scalar convert from float/double to integer
7579//===----------------------------------------------------------------------===//
7580
7581multiclass avx512_cvt_s_int_round<bits<8> opc, X86VectorVTInfo SrcVT,
7582                                  X86VectorVTInfo DstVT, SDNode OpNode,
7583                                  SDNode OpNodeRnd,
7584                                  X86FoldableSchedWrite sched, string asm,
7585                                  string aliasStr, Predicate prd = HasAVX512> {
7586  let Predicates = [prd], ExeDomain = SrcVT.ExeDomain in {
7587    def rr_Int : SI<opc, MRMSrcReg, (outs DstVT.RC:$dst), (ins SrcVT.RC:$src),
7588                !strconcat(asm,"\t{$src, $dst|$dst, $src}"),
7589                [(set DstVT.RC:$dst, (OpNode (SrcVT.VT SrcVT.RC:$src)))]>,
7590                EVEX, VEX_LIG, Sched<[sched]>, SIMD_EXC;
7591    let Uses = [MXCSR] in
7592    def rrb_Int : SI<opc, MRMSrcReg, (outs DstVT.RC:$dst), (ins SrcVT.RC:$src, AVX512RC:$rc),
7593                 !strconcat(asm,"\t{$rc, $src, $dst|$dst, $src, $rc}"),
7594                 [(set DstVT.RC:$dst, (OpNodeRnd (SrcVT.VT SrcVT.RC:$src),(i32 timm:$rc)))]>,
7595                 EVEX, VEX_LIG, EVEX_B, EVEX_RC,
7596                 Sched<[sched]>;
7597    def rm_Int : SI<opc, MRMSrcMem, (outs DstVT.RC:$dst), (ins SrcVT.IntScalarMemOp:$src),
7598                !strconcat(asm,"\t{$src, $dst|$dst, $src}"),
7599                [(set DstVT.RC:$dst, (OpNode
7600                      (SrcVT.ScalarIntMemFrags addr:$src)))]>,
7601                EVEX, VEX_LIG, Sched<[sched.Folded, sched.ReadAfterFold]>, SIMD_EXC;
7602  } // Predicates = [prd]
7603
7604  def : InstAlias<"v" # asm # aliasStr # "\t{$src, $dst|$dst, $src}",
7605          (!cast<Instruction>(NAME # "rr_Int") DstVT.RC:$dst, SrcVT.RC:$src), 0, "att">;
7606  def : InstAlias<"v" # asm # aliasStr # "\t{$rc, $src, $dst|$dst, $src, $rc}",
7607          (!cast<Instruction>(NAME # "rrb_Int") DstVT.RC:$dst, SrcVT.RC:$src, AVX512RC:$rc), 0, "att">;
7608  def : InstAlias<"v" # asm # aliasStr # "\t{$src, $dst|$dst, $src}",
7609          (!cast<Instruction>(NAME # "rm_Int") DstVT.RC:$dst,
7610                                          SrcVT.IntScalarMemOp:$src), 0, "att">;
7611}
7612
7613// Convert float/double to signed/unsigned int 32/64
7614defm VCVTSS2SIZ: avx512_cvt_s_int_round<0x2D, f32x_info, i32x_info,X86cvts2si,
7615                                   X86cvts2siRnd, WriteCvtSS2I, "cvtss2si", "{l}">,
7616                                   XS, EVEX_CD8<32, CD8VT1>;
7617defm VCVTSS2SI64Z: avx512_cvt_s_int_round<0x2D, f32x_info, i64x_info, X86cvts2si,
7618                                   X86cvts2siRnd, WriteCvtSS2I, "cvtss2si", "{q}">,
7619                                   XS, VEX_W, EVEX_CD8<32, CD8VT1>;
7620defm VCVTSS2USIZ: avx512_cvt_s_int_round<0x79, f32x_info, i32x_info, X86cvts2usi,
7621                                   X86cvts2usiRnd, WriteCvtSS2I, "cvtss2usi", "{l}">,
7622                                   XS, EVEX_CD8<32, CD8VT1>;
7623defm VCVTSS2USI64Z: avx512_cvt_s_int_round<0x79, f32x_info, i64x_info, X86cvts2usi,
7624                                   X86cvts2usiRnd, WriteCvtSS2I, "cvtss2usi", "{q}">,
7625                                   XS, VEX_W, EVEX_CD8<32, CD8VT1>;
7626defm VCVTSD2SIZ: avx512_cvt_s_int_round<0x2D, f64x_info, i32x_info, X86cvts2si,
7627                                   X86cvts2siRnd, WriteCvtSD2I, "cvtsd2si", "{l}">,
7628                                   XD, EVEX_CD8<64, CD8VT1>;
7629defm VCVTSD2SI64Z: avx512_cvt_s_int_round<0x2D, f64x_info, i64x_info, X86cvts2si,
7630                                   X86cvts2siRnd, WriteCvtSD2I, "cvtsd2si", "{q}">,
7631                                   XD, VEX_W, EVEX_CD8<64, CD8VT1>;
7632defm VCVTSD2USIZ:   avx512_cvt_s_int_round<0x79, f64x_info, i32x_info, X86cvts2usi,
7633                                   X86cvts2usiRnd, WriteCvtSD2I, "cvtsd2usi", "{l}">,
7634                                   XD, EVEX_CD8<64, CD8VT1>;
7635defm VCVTSD2USI64Z: avx512_cvt_s_int_round<0x79, f64x_info, i64x_info, X86cvts2usi,
7636                                   X86cvts2usiRnd, WriteCvtSD2I, "cvtsd2usi", "{q}">,
7637                                   XD, VEX_W, EVEX_CD8<64, CD8VT1>;
7638
7639multiclass avx512_cvt_s<bits<8> opc, string asm, X86VectorVTInfo SrcVT,
7640                        X86VectorVTInfo DstVT, SDNode OpNode,
7641                        X86FoldableSchedWrite sched> {
7642  let Predicates = [HasAVX512], ExeDomain = SrcVT.ExeDomain in {
7643    let isCodeGenOnly = 1 in {
7644    def rr : AVX512<opc, MRMSrcReg, (outs DstVT.RC:$dst), (ins SrcVT.FRC:$src),
7645                !strconcat(asm,"\t{$src, $dst|$dst, $src}"),
7646                [(set DstVT.RC:$dst, (OpNode SrcVT.FRC:$src))]>,
7647                EVEX, VEX_LIG, Sched<[sched]>, SIMD_EXC;
7648    def rm : AVX512<opc, MRMSrcMem, (outs DstVT.RC:$dst), (ins SrcVT.ScalarMemOp:$src),
7649                !strconcat(asm,"\t{$src, $dst|$dst, $src}"),
7650                [(set DstVT.RC:$dst, (OpNode (SrcVT.ScalarLdFrag addr:$src)))]>,
7651                EVEX, VEX_LIG, Sched<[sched.Folded, sched.ReadAfterFold]>, SIMD_EXC;
7652    }
7653  } // Predicates = [HasAVX512]
7654}
7655
7656defm VCVTSS2SIZ: avx512_cvt_s<0x2D, "vcvtss2si", f32x_info, i32x_info,
7657                       lrint, WriteCvtSS2I>, XS, EVEX_CD8<32, CD8VT1>;
7658defm VCVTSS2SI64Z: avx512_cvt_s<0x2D, "vcvtss2si", f32x_info, i64x_info,
7659                       llrint, WriteCvtSS2I>, VEX_W, XS, EVEX_CD8<32, CD8VT1>;
7660defm VCVTSD2SIZ: avx512_cvt_s<0x2D, "vcvtsd2si", f64x_info, i32x_info,
7661                       lrint, WriteCvtSD2I>, XD, EVEX_CD8<64, CD8VT1>;
7662defm VCVTSD2SI64Z: avx512_cvt_s<0x2D, "vcvtsd2si", f64x_info, i64x_info,
7663                       llrint, WriteCvtSD2I>, VEX_W, XD, EVEX_CD8<64, CD8VT1>;
7664
7665let Predicates = [HasAVX512] in {
7666  def : Pat<(i64 (lrint FR32:$src)), (VCVTSS2SI64Zrr FR32:$src)>;
7667  def : Pat<(i64 (lrint (loadf32 addr:$src))), (VCVTSS2SI64Zrm addr:$src)>;
7668
7669  def : Pat<(i64 (lrint FR64:$src)), (VCVTSD2SI64Zrr FR64:$src)>;
7670  def : Pat<(i64 (lrint (loadf64 addr:$src))), (VCVTSD2SI64Zrm addr:$src)>;
7671}
7672
7673// Patterns used for matching vcvtsi2s{s,d} intrinsic sequences from clang
7674// which produce unnecessary vmovs{s,d} instructions
7675let Predicates = [HasAVX512] in {
7676def : Pat<(v4f32 (X86Movss
7677                   (v4f32 VR128X:$dst),
7678                   (v4f32 (scalar_to_vector (f32 (any_sint_to_fp GR64:$src)))))),
7679          (VCVTSI642SSZrr_Int VR128X:$dst, GR64:$src)>;
7680
7681def : Pat<(v4f32 (X86Movss
7682                   (v4f32 VR128X:$dst),
7683                   (v4f32 (scalar_to_vector (f32 (any_sint_to_fp (loadi64 addr:$src))))))),
7684          (VCVTSI642SSZrm_Int VR128X:$dst, addr:$src)>;
7685
7686def : Pat<(v4f32 (X86Movss
7687                   (v4f32 VR128X:$dst),
7688                   (v4f32 (scalar_to_vector (f32 (any_sint_to_fp GR32:$src)))))),
7689          (VCVTSI2SSZrr_Int VR128X:$dst, GR32:$src)>;
7690
7691def : Pat<(v4f32 (X86Movss
7692                   (v4f32 VR128X:$dst),
7693                   (v4f32 (scalar_to_vector (f32 (any_sint_to_fp (loadi32 addr:$src))))))),
7694          (VCVTSI2SSZrm_Int VR128X:$dst, addr:$src)>;
7695
7696def : Pat<(v2f64 (X86Movsd
7697                   (v2f64 VR128X:$dst),
7698                   (v2f64 (scalar_to_vector (f64 (any_sint_to_fp GR64:$src)))))),
7699          (VCVTSI642SDZrr_Int VR128X:$dst, GR64:$src)>;
7700
7701def : Pat<(v2f64 (X86Movsd
7702                   (v2f64 VR128X:$dst),
7703                   (v2f64 (scalar_to_vector (f64 (any_sint_to_fp (loadi64 addr:$src))))))),
7704          (VCVTSI642SDZrm_Int VR128X:$dst, addr:$src)>;
7705
7706def : Pat<(v2f64 (X86Movsd
7707                   (v2f64 VR128X:$dst),
7708                   (v2f64 (scalar_to_vector (f64 (any_sint_to_fp GR32:$src)))))),
7709          (VCVTSI2SDZrr_Int VR128X:$dst, GR32:$src)>;
7710
7711def : Pat<(v2f64 (X86Movsd
7712                   (v2f64 VR128X:$dst),
7713                   (v2f64 (scalar_to_vector (f64 (any_sint_to_fp (loadi32 addr:$src))))))),
7714          (VCVTSI2SDZrm_Int VR128X:$dst, addr:$src)>;
7715
7716def : Pat<(v4f32 (X86Movss
7717                   (v4f32 VR128X:$dst),
7718                   (v4f32 (scalar_to_vector (f32 (any_uint_to_fp GR64:$src)))))),
7719          (VCVTUSI642SSZrr_Int VR128X:$dst, GR64:$src)>;
7720
7721def : Pat<(v4f32 (X86Movss
7722                   (v4f32 VR128X:$dst),
7723                   (v4f32 (scalar_to_vector (f32 (any_uint_to_fp (loadi64 addr:$src))))))),
7724          (VCVTUSI642SSZrm_Int VR128X:$dst, addr:$src)>;
7725
7726def : Pat<(v4f32 (X86Movss
7727                   (v4f32 VR128X:$dst),
7728                   (v4f32 (scalar_to_vector (f32 (any_uint_to_fp GR32:$src)))))),
7729          (VCVTUSI2SSZrr_Int VR128X:$dst, GR32:$src)>;
7730
7731def : Pat<(v4f32 (X86Movss
7732                   (v4f32 VR128X:$dst),
7733                   (v4f32 (scalar_to_vector (f32 (any_uint_to_fp (loadi32 addr:$src))))))),
7734          (VCVTUSI2SSZrm_Int VR128X:$dst, addr:$src)>;
7735
7736def : Pat<(v2f64 (X86Movsd
7737                   (v2f64 VR128X:$dst),
7738                   (v2f64 (scalar_to_vector (f64 (any_uint_to_fp GR64:$src)))))),
7739          (VCVTUSI642SDZrr_Int VR128X:$dst, GR64:$src)>;
7740
7741def : Pat<(v2f64 (X86Movsd
7742                   (v2f64 VR128X:$dst),
7743                   (v2f64 (scalar_to_vector (f64 (any_uint_to_fp (loadi64 addr:$src))))))),
7744          (VCVTUSI642SDZrm_Int VR128X:$dst, addr:$src)>;
7745
7746def : Pat<(v2f64 (X86Movsd
7747                   (v2f64 VR128X:$dst),
7748                   (v2f64 (scalar_to_vector (f64 (any_uint_to_fp GR32:$src)))))),
7749          (VCVTUSI2SDZrr_Int VR128X:$dst, GR32:$src)>;
7750
7751def : Pat<(v2f64 (X86Movsd
7752                   (v2f64 VR128X:$dst),
7753                   (v2f64 (scalar_to_vector (f64 (any_uint_to_fp (loadi32 addr:$src))))))),
7754          (VCVTUSI2SDZrm_Int VR128X:$dst, addr:$src)>;
7755} // Predicates = [HasAVX512]
7756
7757// Convert float/double to signed/unsigned int 32/64 with truncation
7758multiclass avx512_cvt_s_all<bits<8> opc, string asm, X86VectorVTInfo _SrcRC,
7759                            X86VectorVTInfo _DstRC, SDPatternOperator OpNode,
7760                            SDNode OpNodeInt, SDNode OpNodeSAE,
7761                            X86FoldableSchedWrite sched, string aliasStr,
7762                            Predicate prd = HasAVX512> {
7763let Predicates = [prd], ExeDomain = _SrcRC.ExeDomain in {
7764  let isCodeGenOnly = 1 in {
7765  def rr : AVX512<opc, MRMSrcReg, (outs _DstRC.RC:$dst), (ins _SrcRC.FRC:$src),
7766              !strconcat(asm,"\t{$src, $dst|$dst, $src}"),
7767              [(set _DstRC.RC:$dst, (OpNode _SrcRC.FRC:$src))]>,
7768              EVEX, VEX_LIG, Sched<[sched]>, SIMD_EXC;
7769  def rm : AVX512<opc, MRMSrcMem, (outs _DstRC.RC:$dst), (ins _SrcRC.ScalarMemOp:$src),
7770              !strconcat(asm,"\t{$src, $dst|$dst, $src}"),
7771              [(set _DstRC.RC:$dst, (OpNode (_SrcRC.ScalarLdFrag addr:$src)))]>,
7772              EVEX, VEX_LIG, Sched<[sched.Folded, sched.ReadAfterFold]>, SIMD_EXC;
7773  }
7774
7775  def rr_Int : AVX512<opc, MRMSrcReg, (outs _DstRC.RC:$dst), (ins _SrcRC.RC:$src),
7776            !strconcat(asm,"\t{$src, $dst|$dst, $src}"),
7777           [(set _DstRC.RC:$dst, (OpNodeInt (_SrcRC.VT _SrcRC.RC:$src)))]>,
7778           EVEX, VEX_LIG, Sched<[sched]>, SIMD_EXC;
7779  let Uses = [MXCSR] in
7780  def rrb_Int : AVX512<opc, MRMSrcReg, (outs _DstRC.RC:$dst), (ins _SrcRC.RC:$src),
7781            !strconcat(asm,"\t{{sae}, $src, $dst|$dst, $src, {sae}}"),
7782            [(set _DstRC.RC:$dst, (OpNodeSAE (_SrcRC.VT _SrcRC.RC:$src)))]>,
7783                                  EVEX, VEX_LIG, EVEX_B, Sched<[sched]>;
7784  def rm_Int : AVX512<opc, MRMSrcMem, (outs _DstRC.RC:$dst),
7785              (ins _SrcRC.IntScalarMemOp:$src),
7786              !strconcat(asm,"\t{$src, $dst|$dst, $src}"),
7787              [(set _DstRC.RC:$dst,
7788                (OpNodeInt (_SrcRC.ScalarIntMemFrags addr:$src)))]>,
7789              EVEX, VEX_LIG, Sched<[sched.Folded, sched.ReadAfterFold]>, SIMD_EXC;
7790} // Predicates = [prd]
7791
7792  def : InstAlias<asm # aliasStr # "\t{$src, $dst|$dst, $src}",
7793          (!cast<Instruction>(NAME # "rr_Int") _DstRC.RC:$dst, _SrcRC.RC:$src), 0, "att">;
7794  def : InstAlias<asm # aliasStr # "\t{{sae}, $src, $dst|$dst, $src, {sae}}",
7795          (!cast<Instruction>(NAME # "rrb_Int") _DstRC.RC:$dst, _SrcRC.RC:$src), 0, "att">;
7796  def : InstAlias<asm # aliasStr # "\t{$src, $dst|$dst, $src}",
7797          (!cast<Instruction>(NAME # "rm_Int") _DstRC.RC:$dst,
7798                                          _SrcRC.IntScalarMemOp:$src), 0, "att">;
7799}
7800
7801defm VCVTTSS2SIZ: avx512_cvt_s_all<0x2C, "vcvttss2si", f32x_info, i32x_info,
7802                        any_fp_to_sint, X86cvtts2Int, X86cvtts2IntSAE, WriteCvtSS2I,
7803                        "{l}">, XS, EVEX_CD8<32, CD8VT1>;
7804defm VCVTTSS2SI64Z: avx512_cvt_s_all<0x2C, "vcvttss2si", f32x_info, i64x_info,
7805                        any_fp_to_sint, X86cvtts2Int, X86cvtts2IntSAE, WriteCvtSS2I,
7806                        "{q}">, VEX_W, XS, EVEX_CD8<32, CD8VT1>;
7807defm VCVTTSD2SIZ: avx512_cvt_s_all<0x2C, "vcvttsd2si", f64x_info, i32x_info,
7808                        any_fp_to_sint, X86cvtts2Int, X86cvtts2IntSAE, WriteCvtSD2I,
7809                        "{l}">, XD, EVEX_CD8<64, CD8VT1>;
7810defm VCVTTSD2SI64Z: avx512_cvt_s_all<0x2C, "vcvttsd2si", f64x_info, i64x_info,
7811                        any_fp_to_sint, X86cvtts2Int, X86cvtts2IntSAE, WriteCvtSD2I,
7812                        "{q}">, VEX_W, XD, EVEX_CD8<64, CD8VT1>;
7813
7814defm VCVTTSS2USIZ: avx512_cvt_s_all<0x78, "vcvttss2usi", f32x_info, i32x_info,
7815                        any_fp_to_uint, X86cvtts2UInt, X86cvtts2UIntSAE, WriteCvtSS2I,
7816                        "{l}">, XS, EVEX_CD8<32, CD8VT1>;
7817defm VCVTTSS2USI64Z: avx512_cvt_s_all<0x78, "vcvttss2usi", f32x_info, i64x_info,
7818                        any_fp_to_uint, X86cvtts2UInt, X86cvtts2UIntSAE, WriteCvtSS2I,
7819                        "{q}">, XS,VEX_W, EVEX_CD8<32, CD8VT1>;
7820defm VCVTTSD2USIZ: avx512_cvt_s_all<0x78, "vcvttsd2usi", f64x_info, i32x_info,
7821                        any_fp_to_uint, X86cvtts2UInt, X86cvtts2UIntSAE, WriteCvtSD2I,
7822                        "{l}">, XD, EVEX_CD8<64, CD8VT1>;
7823defm VCVTTSD2USI64Z: avx512_cvt_s_all<0x78, "vcvttsd2usi", f64x_info, i64x_info,
7824                        any_fp_to_uint, X86cvtts2UInt, X86cvtts2UIntSAE, WriteCvtSD2I,
7825                        "{q}">, XD, VEX_W, EVEX_CD8<64, CD8VT1>;
7826
7827//===----------------------------------------------------------------------===//
7828// AVX-512  Convert form float to double and back
7829//===----------------------------------------------------------------------===//
7830
7831let Uses = [MXCSR], mayRaiseFPException = 1 in
7832multiclass avx512_cvt_fp_scalar<bits<8> opc, string OpcodeStr, X86VectorVTInfo _,
7833                                X86VectorVTInfo _Src, SDNode OpNode,
7834                                X86FoldableSchedWrite sched> {
7835  defm rr_Int : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
7836                         (ins _.RC:$src1, _Src.RC:$src2), OpcodeStr,
7837                         "$src2, $src1", "$src1, $src2",
7838                         (_.VT (OpNode (_.VT _.RC:$src1),
7839                                       (_Src.VT _Src.RC:$src2)))>,
7840                         EVEX_4V, VEX_LIG, Sched<[sched]>;
7841  defm rm_Int : AVX512_maskable_scalar<opc, MRMSrcMem, _, (outs _.RC:$dst),
7842                         (ins _.RC:$src1, _Src.IntScalarMemOp:$src2), OpcodeStr,
7843                         "$src2, $src1", "$src1, $src2",
7844                         (_.VT (OpNode (_.VT _.RC:$src1),
7845                                  (_Src.ScalarIntMemFrags addr:$src2)))>,
7846                         EVEX_4V, VEX_LIG,
7847                         Sched<[sched.Folded, sched.ReadAfterFold]>;
7848
7849  let isCodeGenOnly = 1, hasSideEffects = 0 in {
7850    def rr : I<opc, MRMSrcReg, (outs _.FRC:$dst),
7851               (ins _.FRC:$src1, _Src.FRC:$src2),
7852               OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}", []>,
7853               EVEX_4V, VEX_LIG, Sched<[sched]>;
7854    let mayLoad = 1 in
7855    def rm : I<opc, MRMSrcMem, (outs _.FRC:$dst),
7856               (ins _.FRC:$src1, _Src.ScalarMemOp:$src2),
7857               OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}", []>,
7858               EVEX_4V, VEX_LIG, Sched<[sched.Folded, sched.ReadAfterFold]>;
7859  }
7860}
7861
7862// Scalar Conversion with SAE - suppress all exceptions
7863multiclass avx512_cvt_fp_sae_scalar<bits<8> opc, string OpcodeStr, X86VectorVTInfo _,
7864                                    X86VectorVTInfo _Src, SDNode OpNodeSAE,
7865                                    X86FoldableSchedWrite sched> {
7866  let Uses = [MXCSR] in
7867  defm rrb_Int : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
7868                        (ins _.RC:$src1, _Src.RC:$src2), OpcodeStr,
7869                        "{sae}, $src2, $src1", "$src1, $src2, {sae}",
7870                        (_.VT (OpNodeSAE (_.VT _.RC:$src1),
7871                                         (_Src.VT _Src.RC:$src2)))>,
7872                        EVEX_4V, VEX_LIG, EVEX_B, Sched<[sched]>;
7873}
7874
7875// Scalar Conversion with rounding control (RC)
7876multiclass avx512_cvt_fp_rc_scalar<bits<8> opc, string OpcodeStr, X86VectorVTInfo _,
7877                                   X86VectorVTInfo _Src, SDNode OpNodeRnd,
7878                                   X86FoldableSchedWrite sched> {
7879  let Uses = [MXCSR] in
7880  defm rrb_Int : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
7881                        (ins _.RC:$src1, _Src.RC:$src2, AVX512RC:$rc), OpcodeStr,
7882                        "$rc, $src2, $src1", "$src1, $src2, $rc",
7883                        (_.VT (OpNodeRnd (_.VT _.RC:$src1),
7884                                         (_Src.VT _Src.RC:$src2), (i32 timm:$rc)))>,
7885                        EVEX_4V, VEX_LIG, Sched<[sched]>,
7886                        EVEX_B, EVEX_RC;
7887}
7888multiclass avx512_cvt_fp_scalar_trunc<bits<8> opc, string OpcodeStr,
7889                                      SDNode OpNode, SDNode OpNodeRnd,
7890                                      X86FoldableSchedWrite sched,
7891                                      X86VectorVTInfo _src, X86VectorVTInfo _dst,
7892                                      Predicate prd = HasAVX512> {
7893  let Predicates = [prd], ExeDomain = SSEPackedSingle in {
7894    defm Z : avx512_cvt_fp_scalar<opc, OpcodeStr, _dst, _src, OpNode, sched>,
7895             avx512_cvt_fp_rc_scalar<opc, OpcodeStr, _dst, _src,
7896                               OpNodeRnd, sched>, EVEX_CD8<_src.EltSize, CD8VT1>;
7897  }
7898}
7899
7900multiclass avx512_cvt_fp_scalar_extend<bits<8> opc, string OpcodeStr,
7901                                       SDNode OpNode, SDNode OpNodeSAE,
7902                                       X86FoldableSchedWrite sched,
7903                                       X86VectorVTInfo _src, X86VectorVTInfo _dst,
7904                                       Predicate prd = HasAVX512> {
7905  let Predicates = [prd], ExeDomain = SSEPackedSingle in {
7906    defm Z : avx512_cvt_fp_scalar<opc, OpcodeStr, _dst, _src, OpNode, sched>,
7907             avx512_cvt_fp_sae_scalar<opc, OpcodeStr, _dst, _src, OpNodeSAE, sched>,
7908             EVEX_CD8<_src.EltSize, CD8VT1>;
7909  }
7910}
7911defm VCVTSD2SS : avx512_cvt_fp_scalar_trunc<0x5A, "vcvtsd2ss", X86frounds,
7912                                         X86froundsRnd, WriteCvtSD2SS, f64x_info,
7913                                         f32x_info>, XD, VEX_W;
7914defm VCVTSS2SD : avx512_cvt_fp_scalar_extend<0x5A, "vcvtss2sd", X86fpexts,
7915                                          X86fpextsSAE, WriteCvtSS2SD, f32x_info,
7916                                          f64x_info>, XS;
7917defm VCVTSD2SH : avx512_cvt_fp_scalar_trunc<0x5A, "vcvtsd2sh", X86frounds,
7918                                          X86froundsRnd, WriteCvtSD2SS, f64x_info,
7919                                          f16x_info, HasFP16>, T_MAP5XD, VEX_W;
7920defm VCVTSH2SD : avx512_cvt_fp_scalar_extend<0x5A, "vcvtsh2sd", X86fpexts,
7921                                          X86fpextsSAE, WriteCvtSS2SD, f16x_info,
7922                                          f64x_info, HasFP16>, T_MAP5XS;
7923defm VCVTSS2SH : avx512_cvt_fp_scalar_trunc<0x1D, "vcvtss2sh", X86frounds,
7924                                          X86froundsRnd, WriteCvtSD2SS, f32x_info,
7925                                          f16x_info, HasFP16>, T_MAP5PS;
7926defm VCVTSH2SS : avx512_cvt_fp_scalar_extend<0x13, "vcvtsh2ss", X86fpexts,
7927                                          X86fpextsSAE, WriteCvtSS2SD, f16x_info,
7928                                          f32x_info, HasFP16>, T_MAP6PS;
7929
7930def : Pat<(f64 (any_fpextend FR32X:$src)),
7931          (VCVTSS2SDZrr (f64 (IMPLICIT_DEF)), FR32X:$src)>,
7932          Requires<[HasAVX512]>;
7933def : Pat<(f64 (any_fpextend (loadf32 addr:$src))),
7934          (VCVTSS2SDZrm (f64 (IMPLICIT_DEF)), addr:$src)>,
7935          Requires<[HasAVX512, OptForSize]>;
7936
7937def : Pat<(f32 (any_fpround FR64X:$src)),
7938          (VCVTSD2SSZrr (f32 (IMPLICIT_DEF)), FR64X:$src)>,
7939           Requires<[HasAVX512]>;
7940
7941def : Pat<(f32 (any_fpextend FR16X:$src)),
7942          (VCVTSH2SSZrr (f32 (IMPLICIT_DEF)), FR16X:$src)>,
7943          Requires<[HasFP16]>;
7944def : Pat<(f32 (any_fpextend (loadf16 addr:$src))),
7945          (VCVTSH2SSZrm (f32 (IMPLICIT_DEF)), addr:$src)>,
7946          Requires<[HasFP16, OptForSize]>;
7947
7948def : Pat<(f64 (any_fpextend FR16X:$src)),
7949          (VCVTSH2SDZrr (f64 (IMPLICIT_DEF)), FR16X:$src)>,
7950          Requires<[HasFP16]>;
7951def : Pat<(f64 (any_fpextend (loadf16 addr:$src))),
7952          (VCVTSH2SDZrm (f64 (IMPLICIT_DEF)), addr:$src)>,
7953          Requires<[HasFP16, OptForSize]>;
7954
7955def : Pat<(f16 (any_fpround FR32X:$src)),
7956          (VCVTSS2SHZrr (f16 (IMPLICIT_DEF)), FR32X:$src)>,
7957           Requires<[HasFP16]>;
7958def : Pat<(f16 (any_fpround FR64X:$src)),
7959          (VCVTSD2SHZrr (f16 (IMPLICIT_DEF)), FR64X:$src)>,
7960           Requires<[HasFP16]>;
7961
7962def : Pat<(v4f32 (X86Movss
7963                   (v4f32 VR128X:$dst),
7964                   (v4f32 (scalar_to_vector
7965                     (f32 (any_fpround (f64 (extractelt VR128X:$src, (iPTR 0))))))))),
7966          (VCVTSD2SSZrr_Int VR128X:$dst, VR128X:$src)>,
7967          Requires<[HasAVX512]>;
7968
7969def : Pat<(v2f64 (X86Movsd
7970                   (v2f64 VR128X:$dst),
7971                   (v2f64 (scalar_to_vector
7972                     (f64 (any_fpextend (f32 (extractelt VR128X:$src, (iPTR 0))))))))),
7973          (VCVTSS2SDZrr_Int VR128X:$dst, VR128X:$src)>,
7974          Requires<[HasAVX512]>;
7975
7976//===----------------------------------------------------------------------===//
7977// AVX-512  Vector convert from signed/unsigned integer to float/double
7978//          and from float/double to signed/unsigned integer
7979//===----------------------------------------------------------------------===//
7980
7981multiclass avx512_vcvt_fp<bits<8> opc, string OpcodeStr, X86VectorVTInfo _,
7982                          X86VectorVTInfo _Src, SDPatternOperator OpNode, SDPatternOperator MaskOpNode,
7983                          X86FoldableSchedWrite sched,
7984                          string Broadcast = _.BroadcastStr,
7985                          string Alias = "", X86MemOperand MemOp = _Src.MemOp,
7986                          RegisterClass MaskRC = _.KRCWM,
7987                          dag LdDAG = (_.VT (OpNode (_Src.VT (_Src.LdFrag addr:$src)))),
7988                          dag MaskLdDAG = (_.VT (MaskOpNode (_Src.VT (_Src.LdFrag addr:$src))))> {
7989let Uses = [MXCSR], mayRaiseFPException = 1 in {
7990  defm rr : AVX512_maskable_cvt<opc, MRMSrcReg, _, (outs _.RC:$dst),
7991                         (ins _Src.RC:$src),
7992                         (ins _.RC:$src0, MaskRC:$mask, _Src.RC:$src),
7993                         (ins MaskRC:$mask, _Src.RC:$src),
7994                          OpcodeStr, "$src", "$src",
7995                         (_.VT (OpNode (_Src.VT _Src.RC:$src))),
7996                         (vselect_mask MaskRC:$mask,
7997                                       (_.VT (MaskOpNode (_Src.VT _Src.RC:$src))),
7998                                       _.RC:$src0),
7999                         (vselect_mask MaskRC:$mask,
8000                                       (_.VT (MaskOpNode (_Src.VT _Src.RC:$src))),
8001                                       _.ImmAllZerosV)>,
8002                         EVEX, Sched<[sched]>;
8003
8004  defm rm : AVX512_maskable_cvt<opc, MRMSrcMem, _, (outs _.RC:$dst),
8005                         (ins MemOp:$src),
8006                         (ins _.RC:$src0, MaskRC:$mask, MemOp:$src),
8007                         (ins MaskRC:$mask, MemOp:$src),
8008                         OpcodeStr#Alias, "$src", "$src",
8009                         LdDAG,
8010                         (vselect_mask MaskRC:$mask, MaskLdDAG, _.RC:$src0),
8011                         (vselect_mask MaskRC:$mask, MaskLdDAG, _.ImmAllZerosV)>,
8012                         EVEX, Sched<[sched.Folded]>;
8013
8014  defm rmb : AVX512_maskable_cvt<opc, MRMSrcMem, _, (outs _.RC:$dst),
8015                         (ins _Src.ScalarMemOp:$src),
8016                         (ins _.RC:$src0, MaskRC:$mask, _Src.ScalarMemOp:$src),
8017                         (ins MaskRC:$mask, _Src.ScalarMemOp:$src),
8018                         OpcodeStr,
8019                         "${src}"#Broadcast, "${src}"#Broadcast,
8020                         (_.VT (OpNode (_Src.VT
8021                                  (_Src.BroadcastLdFrag addr:$src))
8022                            )),
8023                         (vselect_mask MaskRC:$mask,
8024                                       (_.VT
8025                                        (MaskOpNode
8026                                         (_Src.VT
8027                                          (_Src.BroadcastLdFrag addr:$src)))),
8028                                       _.RC:$src0),
8029                         (vselect_mask MaskRC:$mask,
8030                                       (_.VT
8031                                        (MaskOpNode
8032                                         (_Src.VT
8033                                          (_Src.BroadcastLdFrag addr:$src)))),
8034                                       _.ImmAllZerosV)>,
8035                         EVEX, EVEX_B, Sched<[sched.Folded]>;
8036  }
8037}
8038// Conversion with SAE - suppress all exceptions
8039multiclass avx512_vcvt_fp_sae<bits<8> opc, string OpcodeStr, X86VectorVTInfo _,
8040                              X86VectorVTInfo _Src, SDNode OpNodeSAE,
8041                              X86FoldableSchedWrite sched> {
8042  let Uses = [MXCSR] in
8043  defm rrb : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
8044                        (ins _Src.RC:$src), OpcodeStr,
8045                        "{sae}, $src", "$src, {sae}",
8046                        (_.VT (OpNodeSAE (_Src.VT _Src.RC:$src)))>,
8047                        EVEX, EVEX_B, Sched<[sched]>;
8048}
8049
8050// Conversion with rounding control (RC)
8051multiclass avx512_vcvt_fp_rc<bits<8> opc, string OpcodeStr, X86VectorVTInfo _,
8052                         X86VectorVTInfo _Src, SDPatternOperator OpNodeRnd,
8053                         X86FoldableSchedWrite sched> {
8054  let Uses = [MXCSR] in
8055  defm rrb : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
8056                        (ins _Src.RC:$src, AVX512RC:$rc), OpcodeStr,
8057                        "$rc, $src", "$src, $rc",
8058                        (_.VT (OpNodeRnd (_Src.VT _Src.RC:$src), (i32 timm:$rc)))>,
8059                        EVEX, EVEX_B, EVEX_RC, Sched<[sched]>;
8060}
8061
8062// Similar to avx512_vcvt_fp, but uses an extload for the memory form.
8063multiclass avx512_vcvt_fpextend<bits<8> opc, string OpcodeStr, X86VectorVTInfo _,
8064                                X86VectorVTInfo _Src, SDPatternOperator OpNode,
8065                                SDNode MaskOpNode,
8066                                X86FoldableSchedWrite sched,
8067                                string Broadcast = _.BroadcastStr,
8068                                string Alias = "", X86MemOperand MemOp = _Src.MemOp,
8069                                RegisterClass MaskRC = _.KRCWM>
8070  : avx512_vcvt_fp<opc, OpcodeStr, _, _Src, OpNode, MaskOpNode, sched, Broadcast,
8071                   Alias, MemOp, MaskRC,
8072                   (_.VT (!cast<PatFrag>("extload"#_Src.VTName) addr:$src)),
8073                   (_.VT (!cast<PatFrag>("extload"#_Src.VTName) addr:$src))>;
8074
8075// Extend [Float to Double, Half to Float]
8076multiclass avx512_cvt_extend<bits<8> opc, string OpcodeStr,
8077                             AVX512VLVectorVTInfo _dst, AVX512VLVectorVTInfo _src,
8078                             X86SchedWriteWidths sched, Predicate prd = HasAVX512> {
8079  let Predicates = [prd] in {
8080    defm Z : avx512_vcvt_fpextend<opc, OpcodeStr,  _dst.info512, _src.info256,
8081                            any_fpextend, fpextend, sched.ZMM>,
8082             avx512_vcvt_fp_sae<opc, OpcodeStr, _dst.info512, _src.info256,
8083                                X86vfpextSAE, sched.ZMM>, EVEX_V512;
8084  }
8085  let Predicates = [prd, HasVLX] in {
8086    defm Z128 : avx512_vcvt_fpextend<opc, OpcodeStr, _dst.info128, _src.info128,
8087                               X86any_vfpext, X86vfpext, sched.XMM,
8088                               _dst.info128.BroadcastStr,
8089                               "", f64mem>, EVEX_V128;
8090    defm Z256 : avx512_vcvt_fpextend<opc, OpcodeStr, _dst.info256, _src.info128,
8091                               any_fpextend, fpextend, sched.YMM>, EVEX_V256;
8092  }
8093}
8094
8095// Truncate [Double to Float, Float to Half]
8096multiclass avx512_cvt_trunc<bits<8> opc, string OpcodeStr,
8097                            AVX512VLVectorVTInfo _dst, AVX512VLVectorVTInfo _src,
8098                            X86SchedWriteWidths sched, Predicate prd = HasAVX512,
8099                            PatFrag bcast128 = _src.info128.BroadcastLdFrag,
8100                            PatFrag loadVT128 = _src.info128.LdFrag,
8101                            RegisterClass maskRC128 = _src.info128.KRCWM> {
8102  let Predicates = [prd] in {
8103    defm Z : avx512_vcvt_fp<opc, OpcodeStr, _dst.info256, _src.info512,
8104                            X86any_vfpround, X86vfpround, sched.ZMM>,
8105             avx512_vcvt_fp_rc<opc, OpcodeStr, _dst.info256, _src.info512,
8106                               X86vfproundRnd, sched.ZMM>, EVEX_V512;
8107  }
8108  let Predicates = [prd, HasVLX] in {
8109    defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, _dst.info128, _src.info128,
8110                               null_frag, null_frag, sched.XMM,
8111                               _src.info128.BroadcastStr, "{x}",
8112                               f128mem, maskRC128>, EVEX_V128;
8113    defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, _dst.info128, _src.info256,
8114                               X86any_vfpround, X86vfpround,
8115                               sched.YMM, _src.info256.BroadcastStr, "{y}">, EVEX_V256;
8116
8117    // Special patterns to allow use of X86vmfpround for masking. Instruction
8118    // patterns have been disabled with null_frag.
8119    def : Pat<(_dst.info128.VT (X86any_vfpround (_src.info128.VT VR128X:$src))),
8120              (!cast<Instruction>(NAME # "Z128rr") VR128X:$src)>;
8121    def : Pat<(X86vmfpround (_src.info128.VT VR128X:$src), (_dst.info128.VT VR128X:$src0),
8122                            maskRC128:$mask),
8123              (!cast<Instruction>(NAME # "Z128rrk") VR128X:$src0, maskRC128:$mask, VR128X:$src)>;
8124    def : Pat<(X86vmfpround (_src.info128.VT VR128X:$src), _dst.info128.ImmAllZerosV,
8125                            maskRC128:$mask),
8126              (!cast<Instruction>(NAME # "Z128rrkz") maskRC128:$mask, VR128X:$src)>;
8127
8128    def : Pat<(_dst.info128.VT (X86any_vfpround (loadVT128 addr:$src))),
8129              (!cast<Instruction>(NAME # "Z128rm") addr:$src)>;
8130    def : Pat<(X86vmfpround (loadVT128 addr:$src), (_dst.info128.VT VR128X:$src0),
8131                            maskRC128:$mask),
8132              (!cast<Instruction>(NAME # "Z128rmk") VR128X:$src0, maskRC128:$mask, addr:$src)>;
8133    def : Pat<(X86vmfpround (loadVT128 addr:$src), _dst.info128.ImmAllZerosV,
8134                            maskRC128:$mask),
8135              (!cast<Instruction>(NAME # "Z128rmkz") maskRC128:$mask, addr:$src)>;
8136
8137    def : Pat<(_dst.info128.VT (X86any_vfpround (_src.info128.VT (bcast128 addr:$src)))),
8138              (!cast<Instruction>(NAME # "Z128rmb") addr:$src)>;
8139    def : Pat<(X86vmfpround (_src.info128.VT (bcast128 addr:$src)),
8140                            (_dst.info128.VT VR128X:$src0), maskRC128:$mask),
8141              (!cast<Instruction>(NAME # "Z128rmbk") VR128X:$src0, maskRC128:$mask, addr:$src)>;
8142    def : Pat<(X86vmfpround (_src.info128.VT (bcast128 addr:$src)),
8143                            _dst.info128.ImmAllZerosV, maskRC128:$mask),
8144              (!cast<Instruction>(NAME # "Z128rmbkz") maskRC128:$mask, addr:$src)>;
8145  }
8146
8147  def : InstAlias<OpcodeStr#"x\t{$src, $dst|$dst, $src}",
8148                  (!cast<Instruction>(NAME # "Z128rr") VR128X:$dst, VR128X:$src), 0, "att">;
8149  def : InstAlias<OpcodeStr#"x\t{$src, $dst {${mask}}|$dst {${mask}}, $src}",
8150                  (!cast<Instruction>(NAME # "Z128rrk") VR128X:$dst,
8151                  VK2WM:$mask, VR128X:$src), 0, "att">;
8152  def : InstAlias<OpcodeStr#"x\t{$src, $dst {${mask}} {z}|"
8153                  "$dst {${mask}} {z}, $src}",
8154                  (!cast<Instruction>(NAME # "Z128rrkz") VR128X:$dst,
8155                  VK2WM:$mask, VR128X:$src), 0, "att">;
8156  def : InstAlias<OpcodeStr#"x\t{${src}{1to2}, $dst|$dst, ${src}{1to2}}",
8157                  (!cast<Instruction>(NAME # "Z128rmb") VR128X:$dst, f64mem:$src), 0, "att">;
8158  def : InstAlias<OpcodeStr#"x\t{${src}{1to2}, $dst {${mask}}|"
8159                  "$dst {${mask}}, ${src}{1to2}}",
8160                  (!cast<Instruction>(NAME # "Z128rmbk") VR128X:$dst,
8161                  VK2WM:$mask, f64mem:$src), 0, "att">;
8162  def : InstAlias<OpcodeStr#"x\t{${src}{1to2}, $dst {${mask}} {z}|"
8163                  "$dst {${mask}} {z}, ${src}{1to2}}",
8164                  (!cast<Instruction>(NAME # "Z128rmbkz") VR128X:$dst,
8165                  VK2WM:$mask, f64mem:$src), 0, "att">;
8166
8167  def : InstAlias<OpcodeStr#"y\t{$src, $dst|$dst, $src}",
8168                  (!cast<Instruction>(NAME # "Z256rr") VR128X:$dst, VR256X:$src), 0, "att">;
8169  def : InstAlias<OpcodeStr#"y\t{$src, $dst {${mask}}|$dst {${mask}}, $src}",
8170                  (!cast<Instruction>(NAME # "Z256rrk") VR128X:$dst,
8171                  VK4WM:$mask, VR256X:$src), 0, "att">;
8172  def : InstAlias<OpcodeStr#"y\t{$src, $dst {${mask}} {z}|"
8173                  "$dst {${mask}} {z}, $src}",
8174                  (!cast<Instruction>(NAME # "Z256rrkz") VR128X:$dst,
8175                  VK4WM:$mask, VR256X:$src), 0, "att">;
8176  def : InstAlias<OpcodeStr#"y\t{${src}{1to4}, $dst|$dst, ${src}{1to4}}",
8177                  (!cast<Instruction>(NAME # "Z256rmb") VR128X:$dst, f64mem:$src), 0, "att">;
8178  def : InstAlias<OpcodeStr#"y\t{${src}{1to4}, $dst {${mask}}|"
8179                  "$dst {${mask}}, ${src}{1to4}}",
8180                  (!cast<Instruction>(NAME # "Z256rmbk") VR128X:$dst,
8181                  VK4WM:$mask, f64mem:$src), 0, "att">;
8182  def : InstAlias<OpcodeStr#"y\t{${src}{1to4}, $dst {${mask}} {z}|"
8183                  "$dst {${mask}} {z}, ${src}{1to4}}",
8184                  (!cast<Instruction>(NAME # "Z256rmbkz") VR128X:$dst,
8185                  VK4WM:$mask, f64mem:$src), 0, "att">;
8186}
8187
8188defm VCVTPD2PS : avx512_cvt_trunc<0x5A, "vcvtpd2ps",
8189                                  avx512vl_f32_info, avx512vl_f64_info, SchedWriteCvtPD2PS>,
8190                                  VEX_W, PD, EVEX_CD8<64, CD8VF>;
8191defm VCVTPS2PD : avx512_cvt_extend<0x5A, "vcvtps2pd",
8192                                   avx512vl_f64_info, avx512vl_f32_info, SchedWriteCvtPS2PD>,
8193                                   PS, EVEX_CD8<32, CD8VH>;
8194
8195// Extend Half to Double
8196multiclass avx512_cvtph2pd<bits<8> opc, string OpcodeStr,
8197                            X86SchedWriteWidths sched> {
8198  let Predicates = [HasFP16] in {
8199    defm Z : avx512_vcvt_fpextend<opc, OpcodeStr, v8f64_info, v8f16x_info,
8200                                  any_fpextend, fpextend, sched.ZMM>,
8201             avx512_vcvt_fp_sae<opc, OpcodeStr, v8f64_info, v8f16x_info,
8202                                X86vfpextSAE, sched.ZMM>, EVEX_V512;
8203    def : Pat<(v8f64 (extloadv8f16 addr:$src)),
8204                (!cast<Instruction>(NAME # "Zrm") addr:$src)>;
8205  }
8206  let Predicates = [HasFP16, HasVLX] in {
8207    defm Z128 : avx512_vcvt_fpextend<opc, OpcodeStr, v2f64x_info, v8f16x_info,
8208                                     X86any_vfpext, X86vfpext, sched.XMM, "{1to2}", "",
8209                                     f32mem>, EVEX_V128;
8210    defm Z256 : avx512_vcvt_fpextend<opc, OpcodeStr, v4f64x_info, v8f16x_info,
8211                                     X86any_vfpext, X86vfpext, sched.YMM, "{1to4}", "",
8212                                     f64mem>, EVEX_V256;
8213  }
8214}
8215
8216// Truncate Double to Half
8217multiclass avx512_cvtpd2ph<bits<8> opc, string OpcodeStr, X86SchedWriteWidths sched> {
8218  let Predicates = [HasFP16] in {
8219    defm Z : avx512_vcvt_fp<opc, OpcodeStr, v8f16x_info, v8f64_info,
8220                            X86any_vfpround, X86vfpround, sched.ZMM, "{1to8}", "{z}">,
8221             avx512_vcvt_fp_rc<opc, OpcodeStr, v8f16x_info, v8f64_info,
8222                               X86vfproundRnd, sched.ZMM>, EVEX_V512;
8223  }
8224  let Predicates = [HasFP16, HasVLX] in {
8225    defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v8f16x_info, v2f64x_info, null_frag,
8226                               null_frag, sched.XMM, "{1to2}", "{x}", f128mem,
8227                               VK2WM>, EVEX_V128;
8228    defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v8f16x_info, v4f64x_info, null_frag,
8229                               null_frag, sched.YMM, "{1to4}", "{y}", f256mem,
8230                               VK4WM>, EVEX_V256;
8231  }
8232  def : InstAlias<OpcodeStr#"x\t{$src, $dst|$dst, $src}",
8233                  (!cast<Instruction>(NAME # "Z128rr") VR128X:$dst,
8234                  VR128X:$src), 0, "att">;
8235  def : InstAlias<OpcodeStr#"x\t{$src, $dst {${mask}}|$dst {${mask}}, $src}",
8236                  (!cast<Instruction>(NAME # "Z128rrk") VR128X:$dst,
8237                  VK2WM:$mask, VR128X:$src), 0, "att">;
8238  def : InstAlias<OpcodeStr#"x\t{$src, $dst {${mask}} {z}|$dst {${mask}} {z}, $src}",
8239                  (!cast<Instruction>(NAME # "Z128rrkz") VR128X:$dst,
8240                  VK2WM:$mask, VR128X:$src), 0, "att">;
8241  def : InstAlias<OpcodeStr#"x\t{${src}{1to2}, $dst|$dst, ${src}{1to2}}",
8242                  (!cast<Instruction>(NAME # "Z128rmb") VR128X:$dst,
8243                  i64mem:$src), 0, "att">;
8244  def : InstAlias<OpcodeStr#"x\t{${src}{1to2}, $dst {${mask}}|"
8245                  "$dst {${mask}}, ${src}{1to2}}",
8246                  (!cast<Instruction>(NAME # "Z128rmbk") VR128X:$dst,
8247                  VK2WM:$mask, i64mem:$src), 0, "att">;
8248  def : InstAlias<OpcodeStr#"x\t{${src}{1to2}, $dst {${mask}} {z}|"
8249                  "$dst {${mask}} {z}, ${src}{1to2}}",
8250                  (!cast<Instruction>(NAME # "Z128rmbkz") VR128X:$dst,
8251                  VK2WM:$mask, i64mem:$src), 0, "att">;
8252
8253  def : InstAlias<OpcodeStr#"y\t{$src, $dst|$dst, $src}",
8254                  (!cast<Instruction>(NAME # "Z256rr") VR128X:$dst,
8255                  VR256X:$src), 0, "att">;
8256  def : InstAlias<OpcodeStr#"y\t{$src, $dst {${mask}}|"
8257                  "$dst {${mask}}, $src}",
8258                  (!cast<Instruction>(NAME # "Z256rrk") VR128X:$dst,
8259                  VK4WM:$mask, VR256X:$src), 0, "att">;
8260  def : InstAlias<OpcodeStr#"y\t{$src, $dst {${mask}} {z}|"
8261                  "$dst {${mask}} {z}, $src}",
8262                  (!cast<Instruction>(NAME # "Z256rrkz") VR128X:$dst,
8263                  VK4WM:$mask, VR256X:$src), 0, "att">;
8264  def : InstAlias<OpcodeStr#"y\t{${src}{1to4}, $dst|$dst, ${src}{1to4}}",
8265                  (!cast<Instruction>(NAME # "Z256rmb") VR128X:$dst,
8266                  i64mem:$src), 0, "att">;
8267  def : InstAlias<OpcodeStr#"y\t{${src}{1to4}, $dst {${mask}}|"
8268                  "$dst {${mask}}, ${src}{1to4}}",
8269                  (!cast<Instruction>(NAME # "Z256rmbk") VR128X:$dst,
8270                  VK4WM:$mask, i64mem:$src), 0, "att">;
8271  def : InstAlias<OpcodeStr#"y\t{${src}{1to4}, $dst {${mask}} {z}|"
8272                  "$dst {${mask}} {z}, ${src}{1to4}}",
8273                  (!cast<Instruction>(NAME # "Z256rmbkz") VR128X:$dst,
8274                  VK4WM:$mask, i64mem:$src), 0, "att">;
8275
8276  def : InstAlias<OpcodeStr#"z\t{$src, $dst|$dst, $src}",
8277                  (!cast<Instruction>(NAME # "Zrr") VR128X:$dst,
8278                  VR512:$src), 0, "att">;
8279  def : InstAlias<OpcodeStr#"z\t{$src, $dst {${mask}}|"
8280                  "$dst {${mask}}, $src}",
8281                  (!cast<Instruction>(NAME # "Zrrk") VR128X:$dst,
8282                  VK8WM:$mask, VR512:$src), 0, "att">;
8283  def : InstAlias<OpcodeStr#"z\t{$src, $dst {${mask}} {z}|"
8284                  "$dst {${mask}} {z}, $src}",
8285                  (!cast<Instruction>(NAME # "Zrrkz") VR128X:$dst,
8286                  VK8WM:$mask, VR512:$src), 0, "att">;
8287  def : InstAlias<OpcodeStr#"z\t{${src}{1to8}, $dst|$dst, ${src}{1to8}}",
8288                  (!cast<Instruction>(NAME # "Zrmb") VR128X:$dst,
8289                  i64mem:$src), 0, "att">;
8290  def : InstAlias<OpcodeStr#"z\t{${src}{1to8}, $dst {${mask}}|"
8291                  "$dst {${mask}}, ${src}{1to8}}",
8292                  (!cast<Instruction>(NAME # "Zrmbk") VR128X:$dst,
8293                  VK8WM:$mask, i64mem:$src), 0, "att">;
8294  def : InstAlias<OpcodeStr#"z\t{${src}{1to8}, $dst {${mask}} {z}|"
8295                  "$dst {${mask}} {z}, ${src}{1to8}}",
8296                  (!cast<Instruction>(NAME # "Zrmbkz") VR128X:$dst,
8297                  VK8WM:$mask, i64mem:$src), 0, "att">;
8298}
8299
8300defm VCVTPS2PHX : avx512_cvt_trunc<0x1D, "vcvtps2phx", avx512vl_f16_info,
8301                                   avx512vl_f32_info, SchedWriteCvtPD2PS,
8302                                   HasFP16>, T_MAP5PD, EVEX_CD8<32, CD8VF>;
8303defm VCVTPH2PSX : avx512_cvt_extend<0x13, "vcvtph2psx", avx512vl_f32_info,
8304                                    avx512vl_f16_info, SchedWriteCvtPS2PD,
8305                                    HasFP16>, T_MAP6PD, EVEX_CD8<16, CD8VH>;
8306defm VCVTPD2PH : avx512_cvtpd2ph<0x5A, "vcvtpd2ph", SchedWriteCvtPD2PS>,
8307                                 VEX_W, T_MAP5PD, EVEX_CD8<64, CD8VF>;
8308defm VCVTPH2PD : avx512_cvtph2pd<0x5A, "vcvtph2pd", SchedWriteCvtPS2PD>,
8309                                 T_MAP5PS, EVEX_CD8<16, CD8VQ>;
8310
8311let Predicates = [HasFP16, HasVLX] in {
8312  // Special patterns to allow use of X86vmfpround for masking. Instruction
8313  // patterns have been disabled with null_frag.
8314  def : Pat<(v8f16 (X86any_vfpround (v4f64 VR256X:$src))),
8315            (VCVTPD2PHZ256rr VR256X:$src)>;
8316  def : Pat<(v8f16 (X86vmfpround (v4f64 VR256X:$src), (v8f16 VR128X:$src0),
8317                          VK4WM:$mask)),
8318            (VCVTPD2PHZ256rrk VR128X:$src0, VK4WM:$mask, VR256X:$src)>;
8319  def : Pat<(X86vmfpround (v4f64 VR256X:$src), v8f16x_info.ImmAllZerosV,
8320                          VK4WM:$mask),
8321            (VCVTPD2PHZ256rrkz VK4WM:$mask, VR256X:$src)>;
8322
8323  def : Pat<(v8f16 (X86any_vfpround (loadv4f64 addr:$src))),
8324            (VCVTPD2PHZ256rm addr:$src)>;
8325  def : Pat<(X86vmfpround (loadv4f64 addr:$src), (v8f16 VR128X:$src0),
8326                          VK4WM:$mask),
8327            (VCVTPD2PHZ256rmk VR128X:$src0, VK4WM:$mask, addr:$src)>;
8328  def : Pat<(X86vmfpround (loadv4f64 addr:$src), v8f16x_info.ImmAllZerosV,
8329                          VK4WM:$mask),
8330            (VCVTPD2PHZ256rmkz VK4WM:$mask, addr:$src)>;
8331
8332  def : Pat<(v8f16 (X86any_vfpround (v4f64 (X86VBroadcastld64 addr:$src)))),
8333            (VCVTPD2PHZ256rmb addr:$src)>;
8334  def : Pat<(X86vmfpround (v4f64 (X86VBroadcastld64 addr:$src)),
8335                          (v8f16 VR128X:$src0), VK4WM:$mask),
8336            (VCVTPD2PHZ256rmbk VR128X:$src0, VK4WM:$mask, addr:$src)>;
8337  def : Pat<(X86vmfpround (v4f64 (X86VBroadcastld64 addr:$src)),
8338                          v8f16x_info.ImmAllZerosV, VK4WM:$mask),
8339            (VCVTPD2PHZ256rmbkz VK4WM:$mask, addr:$src)>;
8340
8341  def : Pat<(v8f16 (X86any_vfpround (v2f64 VR128X:$src))),
8342            (VCVTPD2PHZ128rr VR128X:$src)>;
8343  def : Pat<(X86vmfpround (v2f64 VR128X:$src), (v8f16 VR128X:$src0),
8344                          VK2WM:$mask),
8345            (VCVTPD2PHZ128rrk VR128X:$src0, VK2WM:$mask, VR128X:$src)>;
8346  def : Pat<(X86vmfpround (v2f64 VR128X:$src), v8f16x_info.ImmAllZerosV,
8347                          VK2WM:$mask),
8348            (VCVTPD2PHZ128rrkz VK2WM:$mask, VR128X:$src)>;
8349
8350  def : Pat<(v8f16 (X86any_vfpround (loadv2f64 addr:$src))),
8351            (VCVTPD2PHZ128rm addr:$src)>;
8352  def : Pat<(X86vmfpround (loadv2f64 addr:$src), (v8f16 VR128X:$src0),
8353                          VK2WM:$mask),
8354            (VCVTPD2PHZ128rmk VR128X:$src0, VK2WM:$mask, addr:$src)>;
8355  def : Pat<(X86vmfpround (loadv2f64 addr:$src), v8f16x_info.ImmAllZerosV,
8356                          VK2WM:$mask),
8357            (VCVTPD2PHZ128rmkz VK2WM:$mask, addr:$src)>;
8358
8359  def : Pat<(v8f16 (X86any_vfpround (v2f64 (X86VBroadcastld64 addr:$src)))),
8360            (VCVTPD2PHZ128rmb addr:$src)>;
8361  def : Pat<(X86vmfpround (v2f64 (X86VBroadcastld64 addr:$src)),
8362                          (v8f16 VR128X:$src0), VK2WM:$mask),
8363            (VCVTPD2PHZ128rmbk VR128X:$src0, VK2WM:$mask, addr:$src)>;
8364  def : Pat<(X86vmfpround (v2f64 (X86VBroadcastld64 addr:$src)),
8365                          v8f16x_info.ImmAllZerosV, VK2WM:$mask),
8366            (VCVTPD2PHZ128rmbkz VK2WM:$mask, addr:$src)>;
8367}
8368
8369// Convert Signed/Unsigned Doubleword to Double
8370let Uses = []<Register>, mayRaiseFPException = 0 in
8371multiclass avx512_cvtdq2pd<bits<8> opc, string OpcodeStr, SDPatternOperator OpNode,
8372                           SDNode MaskOpNode, SDPatternOperator OpNode128,
8373                           SDNode MaskOpNode128,
8374                           X86SchedWriteWidths sched> {
8375  // No rounding in this op
8376  let Predicates = [HasAVX512] in
8377    defm Z : avx512_vcvt_fp<opc, OpcodeStr, v8f64_info, v8i32x_info, OpNode,
8378                            MaskOpNode, sched.ZMM>, EVEX_V512;
8379
8380  let Predicates = [HasVLX] in {
8381    defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v2f64x_info, v4i32x_info,
8382                               OpNode128, MaskOpNode128, sched.XMM, "{1to2}",
8383                               "", i64mem, VK2WM,
8384                               (v2f64 (OpNode128 (bc_v4i32
8385                                (v2i64
8386                                 (scalar_to_vector (loadi64 addr:$src)))))),
8387                               (v2f64 (MaskOpNode128 (bc_v4i32
8388                                (v2i64
8389                                 (scalar_to_vector (loadi64 addr:$src))))))>,
8390                               EVEX_V128;
8391    defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v4f64x_info, v4i32x_info, OpNode,
8392                               MaskOpNode, sched.YMM>, EVEX_V256;
8393  }
8394}
8395
8396// Convert Signed/Unsigned Doubleword to Float
8397multiclass avx512_cvtdq2ps<bits<8> opc, string OpcodeStr, SDPatternOperator OpNode,
8398                           SDNode MaskOpNode, SDNode OpNodeRnd,
8399                           X86SchedWriteWidths sched> {
8400  let Predicates = [HasAVX512] in
8401    defm Z : avx512_vcvt_fp<opc, OpcodeStr, v16f32_info, v16i32_info, OpNode,
8402                            MaskOpNode, sched.ZMM>,
8403             avx512_vcvt_fp_rc<opc, OpcodeStr, v16f32_info, v16i32_info,
8404                               OpNodeRnd, sched.ZMM>, EVEX_V512;
8405
8406  let Predicates = [HasVLX] in {
8407    defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v4f32x_info, v4i32x_info, OpNode,
8408                               MaskOpNode, sched.XMM>, EVEX_V128;
8409    defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v8f32x_info, v8i32x_info, OpNode,
8410                               MaskOpNode, sched.YMM>, EVEX_V256;
8411  }
8412}
8413
8414// Convert Float to Signed/Unsigned Doubleword with truncation
8415multiclass avx512_cvttps2dq<bits<8> opc, string OpcodeStr, SDPatternOperator OpNode,
8416                            SDNode MaskOpNode,
8417                            SDNode OpNodeSAE, X86SchedWriteWidths sched> {
8418  let Predicates = [HasAVX512] in {
8419    defm Z : avx512_vcvt_fp<opc, OpcodeStr, v16i32_info, v16f32_info, OpNode,
8420                            MaskOpNode, sched.ZMM>,
8421             avx512_vcvt_fp_sae<opc, OpcodeStr, v16i32_info, v16f32_info,
8422                                OpNodeSAE, sched.ZMM>, EVEX_V512;
8423  }
8424  let Predicates = [HasVLX] in {
8425    defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v4i32x_info, v4f32x_info, OpNode,
8426                               MaskOpNode, sched.XMM>, EVEX_V128;
8427    defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v8i32x_info, v8f32x_info, OpNode,
8428                               MaskOpNode, sched.YMM>, EVEX_V256;
8429  }
8430}
8431
8432// Convert Float to Signed/Unsigned Doubleword
8433multiclass avx512_cvtps2dq<bits<8> opc, string OpcodeStr, SDNode OpNode,
8434                           SDNode MaskOpNode, SDNode OpNodeRnd,
8435                           X86SchedWriteWidths sched> {
8436  let Predicates = [HasAVX512] in {
8437    defm Z : avx512_vcvt_fp<opc, OpcodeStr, v16i32_info, v16f32_info, OpNode,
8438                            MaskOpNode, sched.ZMM>,
8439             avx512_vcvt_fp_rc<opc, OpcodeStr, v16i32_info, v16f32_info,
8440                                OpNodeRnd, sched.ZMM>, EVEX_V512;
8441  }
8442  let Predicates = [HasVLX] in {
8443    defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v4i32x_info, v4f32x_info, OpNode,
8444                               MaskOpNode, sched.XMM>, EVEX_V128;
8445    defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v8i32x_info, v8f32x_info, OpNode,
8446                               MaskOpNode, sched.YMM>, EVEX_V256;
8447  }
8448}
8449
8450// Convert Double to Signed/Unsigned Doubleword with truncation
8451multiclass avx512_cvttpd2dq<bits<8> opc, string OpcodeStr, SDPatternOperator OpNode,
8452                            SDNode MaskOpNode, SDNode OpNodeSAE,
8453                            X86SchedWriteWidths sched> {
8454  let Predicates = [HasAVX512] in {
8455    defm Z : avx512_vcvt_fp<opc, OpcodeStr, v8i32x_info, v8f64_info, OpNode,
8456                            MaskOpNode, sched.ZMM>,
8457             avx512_vcvt_fp_sae<opc, OpcodeStr, v8i32x_info, v8f64_info,
8458                                OpNodeSAE, sched.ZMM>, EVEX_V512;
8459  }
8460  let Predicates = [HasVLX] in {
8461    // we need "x"/"y" suffixes in order to distinguish between 128 and 256
8462    // memory forms of these instructions in Asm Parser. They have the same
8463    // dest type - 'v4i32x_info'. We also specify the broadcast string explicitly
8464    // due to the same reason.
8465    defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v4i32x_info, v2f64x_info,
8466                               null_frag, null_frag, sched.XMM, "{1to2}", "{x}", f128mem,
8467                               VK2WM>, EVEX_V128;
8468    defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v4i32x_info, v4f64x_info, OpNode,
8469                               MaskOpNode, sched.YMM, "{1to4}", "{y}">, EVEX_V256;
8470  }
8471
8472  def : InstAlias<OpcodeStr#"x\t{$src, $dst|$dst, $src}",
8473                  (!cast<Instruction>(NAME # "Z128rr") VR128X:$dst,
8474                  VR128X:$src), 0, "att">;
8475  def : InstAlias<OpcodeStr#"x\t{$src, $dst {${mask}}|$dst {${mask}}, $src}",
8476                  (!cast<Instruction>(NAME # "Z128rrk") VR128X:$dst,
8477                  VK2WM:$mask, VR128X:$src), 0, "att">;
8478  def : InstAlias<OpcodeStr#"x\t{$src, $dst {${mask}} {z}|$dst {${mask}} {z}, $src}",
8479                  (!cast<Instruction>(NAME # "Z128rrkz") VR128X:$dst,
8480                  VK2WM:$mask, VR128X:$src), 0, "att">;
8481  def : InstAlias<OpcodeStr#"x\t{${src}{1to2}, $dst|$dst, ${src}{1to2}}",
8482                  (!cast<Instruction>(NAME # "Z128rmb") VR128X:$dst,
8483                  f64mem:$src), 0, "att">;
8484  def : InstAlias<OpcodeStr#"x\t{${src}{1to2}, $dst {${mask}}|"
8485                  "$dst {${mask}}, ${src}{1to2}}",
8486                  (!cast<Instruction>(NAME # "Z128rmbk") VR128X:$dst,
8487                  VK2WM:$mask, f64mem:$src), 0, "att">;
8488  def : InstAlias<OpcodeStr#"x\t{${src}{1to2}, $dst {${mask}} {z}|"
8489                  "$dst {${mask}} {z}, ${src}{1to2}}",
8490                  (!cast<Instruction>(NAME # "Z128rmbkz") VR128X:$dst,
8491                  VK2WM:$mask, f64mem:$src), 0, "att">;
8492
8493  def : InstAlias<OpcodeStr#"y\t{$src, $dst|$dst, $src}",
8494                  (!cast<Instruction>(NAME # "Z256rr") VR128X:$dst,
8495                  VR256X:$src), 0, "att">;
8496  def : InstAlias<OpcodeStr#"y\t{$src, $dst {${mask}}|$dst {${mask}}, $src}",
8497                  (!cast<Instruction>(NAME # "Z256rrk") VR128X:$dst,
8498                  VK4WM:$mask, VR256X:$src), 0, "att">;
8499  def : InstAlias<OpcodeStr#"y\t{$src, $dst {${mask}} {z}|$dst {${mask}} {z}, $src}",
8500                  (!cast<Instruction>(NAME # "Z256rrkz") VR128X:$dst,
8501                  VK4WM:$mask, VR256X:$src), 0, "att">;
8502  def : InstAlias<OpcodeStr#"y\t{${src}{1to4}, $dst|$dst, ${src}{1to4}}",
8503                  (!cast<Instruction>(NAME # "Z256rmb") VR128X:$dst,
8504                  f64mem:$src), 0, "att">;
8505  def : InstAlias<OpcodeStr#"y\t{${src}{1to4}, $dst {${mask}}|"
8506                  "$dst {${mask}}, ${src}{1to4}}",
8507                  (!cast<Instruction>(NAME # "Z256rmbk") VR128X:$dst,
8508                  VK4WM:$mask, f64mem:$src), 0, "att">;
8509  def : InstAlias<OpcodeStr#"y\t{${src}{1to4}, $dst {${mask}} {z}|"
8510                  "$dst {${mask}} {z}, ${src}{1to4}}",
8511                  (!cast<Instruction>(NAME # "Z256rmbkz") VR128X:$dst,
8512                  VK4WM:$mask, f64mem:$src), 0, "att">;
8513}
8514
8515// Convert Double to Signed/Unsigned Doubleword
8516multiclass avx512_cvtpd2dq<bits<8> opc, string OpcodeStr, SDNode OpNode,
8517                           SDNode MaskOpNode, SDNode OpNodeRnd,
8518                           X86SchedWriteWidths sched> {
8519  let Predicates = [HasAVX512] in {
8520    defm Z : avx512_vcvt_fp<opc, OpcodeStr, v8i32x_info, v8f64_info, OpNode,
8521                            MaskOpNode, sched.ZMM>,
8522             avx512_vcvt_fp_rc<opc, OpcodeStr, v8i32x_info, v8f64_info,
8523                               OpNodeRnd, sched.ZMM>, EVEX_V512;
8524  }
8525  let Predicates = [HasVLX] in {
8526    // we need "x"/"y" suffixes in order to distinguish between 128 and 256
8527    // memory forms of these instructions in Asm Parcer. They have the same
8528    // dest type - 'v4i32x_info'. We also specify the broadcast string explicitly
8529    // due to the same reason.
8530    defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v4i32x_info, v2f64x_info,
8531                               null_frag, null_frag, sched.XMM, "{1to2}", "{x}", f128mem,
8532                               VK2WM>, EVEX_V128;
8533    defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v4i32x_info, v4f64x_info, OpNode,
8534                               MaskOpNode, sched.YMM, "{1to4}", "{y}">, EVEX_V256;
8535  }
8536
8537  def : InstAlias<OpcodeStr#"x\t{$src, $dst|$dst, $src}",
8538                  (!cast<Instruction>(NAME # "Z128rr") VR128X:$dst, VR128X:$src), 0, "att">;
8539  def : InstAlias<OpcodeStr#"x\t{$src, $dst {${mask}}|$dst {${mask}}, $src}",
8540                  (!cast<Instruction>(NAME # "Z128rrk") VR128X:$dst,
8541                  VK2WM:$mask, VR128X:$src), 0, "att">;
8542  def : InstAlias<OpcodeStr#"x\t{$src, $dst {${mask}} {z}|$dst {${mask}} {z}, $src}",
8543                  (!cast<Instruction>(NAME # "Z128rrkz") VR128X:$dst,
8544                  VK2WM:$mask, VR128X:$src), 0, "att">;
8545  def : InstAlias<OpcodeStr#"x\t{${src}{1to2}, $dst|$dst, ${src}{1to2}}",
8546                  (!cast<Instruction>(NAME # "Z128rmb") VR128X:$dst,
8547                  f64mem:$src), 0, "att">;
8548  def : InstAlias<OpcodeStr#"x\t{${src}{1to2}, $dst {${mask}}|"
8549                  "$dst {${mask}}, ${src}{1to2}}",
8550                  (!cast<Instruction>(NAME # "Z128rmbk") VR128X:$dst,
8551                  VK2WM:$mask, f64mem:$src), 0, "att">;
8552  def : InstAlias<OpcodeStr#"x\t{${src}{1to2}, $dst {${mask}} {z}|"
8553                  "$dst {${mask}} {z}, ${src}{1to2}}",
8554                  (!cast<Instruction>(NAME # "Z128rmbkz") VR128X:$dst,
8555                  VK2WM:$mask, f64mem:$src), 0, "att">;
8556
8557  def : InstAlias<OpcodeStr#"y\t{$src, $dst|$dst, $src}",
8558                  (!cast<Instruction>(NAME # "Z256rr") VR128X:$dst, VR256X:$src), 0, "att">;
8559  def : InstAlias<OpcodeStr#"y\t{$src, $dst {${mask}}|$dst {${mask}}, $src}",
8560                  (!cast<Instruction>(NAME # "Z256rrk") VR128X:$dst,
8561                  VK4WM:$mask, VR256X:$src), 0, "att">;
8562  def : InstAlias<OpcodeStr#"y\t{$src, $dst {${mask}} {z}|$dst {${mask}} {z}, $src}",
8563                  (!cast<Instruction>(NAME # "Z256rrkz") VR128X:$dst,
8564                  VK4WM:$mask, VR256X:$src), 0, "att">;
8565  def : InstAlias<OpcodeStr#"y\t{${src}{1to4}, $dst|$dst, ${src}{1to4}}",
8566                  (!cast<Instruction>(NAME # "Z256rmb") VR128X:$dst,
8567                  f64mem:$src), 0, "att">;
8568  def : InstAlias<OpcodeStr#"y\t{${src}{1to4}, $dst {${mask}}|"
8569                  "$dst {${mask}}, ${src}{1to4}}",
8570                  (!cast<Instruction>(NAME # "Z256rmbk") VR128X:$dst,
8571                  VK4WM:$mask, f64mem:$src), 0, "att">;
8572  def : InstAlias<OpcodeStr#"y\t{${src}{1to4}, $dst {${mask}} {z}|"
8573                  "$dst {${mask}} {z}, ${src}{1to4}}",
8574                  (!cast<Instruction>(NAME # "Z256rmbkz") VR128X:$dst,
8575                  VK4WM:$mask, f64mem:$src), 0, "att">;
8576}
8577
8578// Convert Double to Signed/Unsigned Quardword
8579multiclass avx512_cvtpd2qq<bits<8> opc, string OpcodeStr, SDNode OpNode,
8580                           SDNode MaskOpNode, SDNode OpNodeRnd,
8581                           X86SchedWriteWidths sched> {
8582  let Predicates = [HasDQI] in {
8583    defm Z : avx512_vcvt_fp<opc, OpcodeStr, v8i64_info, v8f64_info, OpNode,
8584                            MaskOpNode, sched.ZMM>,
8585             avx512_vcvt_fp_rc<opc, OpcodeStr, v8i64_info, v8f64_info,
8586                               OpNodeRnd, sched.ZMM>, EVEX_V512;
8587  }
8588  let Predicates = [HasDQI, HasVLX] in {
8589    defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v2i64x_info, v2f64x_info, OpNode,
8590                               MaskOpNode, sched.XMM>, EVEX_V128;
8591    defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v4i64x_info, v4f64x_info, OpNode,
8592                               MaskOpNode, sched.YMM>, EVEX_V256;
8593  }
8594}
8595
8596// Convert Double to Signed/Unsigned Quardword with truncation
8597multiclass avx512_cvttpd2qq<bits<8> opc, string OpcodeStr, SDPatternOperator OpNode,
8598                            SDNode MaskOpNode, SDNode OpNodeRnd,
8599                            X86SchedWriteWidths sched> {
8600  let Predicates = [HasDQI] in {
8601    defm Z : avx512_vcvt_fp<opc, OpcodeStr, v8i64_info, v8f64_info, OpNode,
8602                            MaskOpNode, sched.ZMM>,
8603             avx512_vcvt_fp_sae<opc, OpcodeStr, v8i64_info, v8f64_info,
8604                                OpNodeRnd, sched.ZMM>, EVEX_V512;
8605  }
8606  let Predicates = [HasDQI, HasVLX] in {
8607    defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v2i64x_info, v2f64x_info, OpNode,
8608                               MaskOpNode, sched.XMM>, EVEX_V128;
8609    defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v4i64x_info, v4f64x_info, OpNode,
8610                               MaskOpNode, sched.YMM>, EVEX_V256;
8611  }
8612}
8613
8614// Convert Signed/Unsigned Quardword to Double
8615multiclass avx512_cvtqq2pd<bits<8> opc, string OpcodeStr, SDPatternOperator OpNode,
8616                           SDNode MaskOpNode, SDNode OpNodeRnd,
8617                           X86SchedWriteWidths sched> {
8618  let Predicates = [HasDQI] in {
8619    defm Z : avx512_vcvt_fp<opc, OpcodeStr, v8f64_info, v8i64_info, OpNode,
8620                            MaskOpNode, sched.ZMM>,
8621             avx512_vcvt_fp_rc<opc, OpcodeStr, v8f64_info, v8i64_info,
8622                               OpNodeRnd, sched.ZMM>, EVEX_V512;
8623  }
8624  let Predicates = [HasDQI, HasVLX] in {
8625    defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v2f64x_info, v2i64x_info, OpNode,
8626                               MaskOpNode, sched.XMM>, EVEX_V128, NotEVEX2VEXConvertible;
8627    defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v4f64x_info, v4i64x_info, OpNode,
8628                               MaskOpNode, sched.YMM>, EVEX_V256, NotEVEX2VEXConvertible;
8629  }
8630}
8631
8632// Convert Float to Signed/Unsigned Quardword
8633multiclass avx512_cvtps2qq<bits<8> opc, string OpcodeStr, SDNode OpNode,
8634                           SDNode MaskOpNode, SDNode OpNodeRnd,
8635                           X86SchedWriteWidths sched> {
8636  let Predicates = [HasDQI] in {
8637    defm Z : avx512_vcvt_fp<opc, OpcodeStr, v8i64_info, v8f32x_info, OpNode,
8638                            MaskOpNode, sched.ZMM>,
8639             avx512_vcvt_fp_rc<opc, OpcodeStr, v8i64_info, v8f32x_info,
8640                               OpNodeRnd, sched.ZMM>, EVEX_V512;
8641  }
8642  let Predicates = [HasDQI, HasVLX] in {
8643    // Explicitly specified broadcast string, since we take only 2 elements
8644    // from v4f32x_info source
8645    defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v2i64x_info, v4f32x_info, OpNode,
8646                               MaskOpNode, sched.XMM, "{1to2}", "", f64mem, VK2WM,
8647                               (v2i64 (OpNode (bc_v4f32
8648                                (v2f64
8649                                 (scalar_to_vector (loadf64 addr:$src)))))),
8650                               (v2i64 (MaskOpNode (bc_v4f32
8651                                (v2f64
8652                                 (scalar_to_vector (loadf64 addr:$src))))))>,
8653                               EVEX_V128;
8654    defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v4i64x_info, v4f32x_info, OpNode,
8655                               MaskOpNode, sched.YMM>, EVEX_V256;
8656  }
8657}
8658
8659// Convert Float to Signed/Unsigned Quardword with truncation
8660multiclass avx512_cvttps2qq<bits<8> opc, string OpcodeStr, SDPatternOperator OpNode,
8661                            SDNode MaskOpNode, SDNode OpNodeRnd,
8662                            X86SchedWriteWidths sched> {
8663  let Predicates = [HasDQI] in {
8664    defm Z : avx512_vcvt_fp<opc, OpcodeStr, v8i64_info, v8f32x_info, OpNode,
8665                            MaskOpNode, sched.ZMM>,
8666             avx512_vcvt_fp_sae<opc, OpcodeStr, v8i64_info, v8f32x_info,
8667                                OpNodeRnd, sched.ZMM>, EVEX_V512;
8668  }
8669  let Predicates = [HasDQI, HasVLX] in {
8670    // Explicitly specified broadcast string, since we take only 2 elements
8671    // from v4f32x_info source
8672    defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v2i64x_info, v4f32x_info, OpNode,
8673                               MaskOpNode, sched.XMM, "{1to2}", "", f64mem, VK2WM,
8674                               (v2i64 (OpNode (bc_v4f32
8675                                (v2f64
8676                                 (scalar_to_vector (loadf64 addr:$src)))))),
8677                               (v2i64 (MaskOpNode (bc_v4f32
8678                                (v2f64
8679                                 (scalar_to_vector (loadf64 addr:$src))))))>,
8680                               EVEX_V128;
8681    defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v4i64x_info, v4f32x_info, OpNode,
8682                               MaskOpNode, sched.YMM>, EVEX_V256;
8683  }
8684}
8685
8686// Convert Signed/Unsigned Quardword to Float
8687// Also Convert Signed/Unsigned Doubleword to Half
8688multiclass avx512_cvtqq2ps_dq2ph<bits<8> opc, string OpcodeStr, SDPatternOperator OpNode,
8689                                 SDPatternOperator MaskOpNode, SDPatternOperator OpNode128,
8690                                 SDPatternOperator OpNode128M, SDPatternOperator OpNodeRnd,
8691                                 AVX512VLVectorVTInfo _dst, AVX512VLVectorVTInfo _src,
8692                                 X86SchedWriteWidths sched, Predicate prd = HasDQI> {
8693  let Predicates = [prd] in {
8694    defm Z : avx512_vcvt_fp<opc, OpcodeStr, _dst.info256, _src.info512, OpNode,
8695                            MaskOpNode, sched.ZMM>,
8696             avx512_vcvt_fp_rc<opc, OpcodeStr, _dst.info256, _src.info512,
8697                               OpNodeRnd, sched.ZMM>, EVEX_V512;
8698  }
8699  let Predicates = [prd, HasVLX] in {
8700    // we need "x"/"y" suffixes in order to distinguish between 128 and 256
8701    // memory forms of these instructions in Asm Parcer. They have the same
8702    // dest type - 'v4i32x_info'. We also specify the broadcast string explicitly
8703    // due to the same reason.
8704    defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, _dst.info128, _src.info128, null_frag,
8705                               null_frag, sched.XMM, _src.info128.BroadcastStr,
8706                               "{x}", i128mem, _src.info128.KRCWM>,
8707                               EVEX_V128, NotEVEX2VEXConvertible;
8708    defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, _dst.info128, _src.info256, OpNode,
8709                               MaskOpNode, sched.YMM, _src.info256.BroadcastStr,
8710                               "{y}">, EVEX_V256,
8711                               NotEVEX2VEXConvertible;
8712
8713    // Special patterns to allow use of X86VM[SU]intToFP for masking. Instruction
8714    // patterns have been disabled with null_frag.
8715    def : Pat<(_dst.info128.VT (OpNode128 (_src.info128.VT VR128X:$src))),
8716              (!cast<Instruction>(NAME # "Z128rr") VR128X:$src)>;
8717    def : Pat<(OpNode128M (_src.info128.VT VR128X:$src), (_dst.info128.VT VR128X:$src0),
8718                             _src.info128.KRCWM:$mask),
8719              (!cast<Instruction>(NAME # "Z128rrk") VR128X:$src0, _src.info128.KRCWM:$mask, VR128X:$src)>;
8720    def : Pat<(OpNode128M (_src.info128.VT VR128X:$src), _dst.info128.ImmAllZerosV,
8721                             _src.info128.KRCWM:$mask),
8722              (!cast<Instruction>(NAME # "Z128rrkz") _src.info128.KRCWM:$mask, VR128X:$src)>;
8723
8724    def : Pat<(_dst.info128.VT (OpNode128 (_src.info128.LdFrag addr:$src))),
8725              (!cast<Instruction>(NAME # "Z128rm") addr:$src)>;
8726    def : Pat<(OpNode128M (_src.info128.LdFrag addr:$src), (_dst.info128.VT VR128X:$src0),
8727                             _src.info128.KRCWM:$mask),
8728              (!cast<Instruction>(NAME # "Z128rmk") VR128X:$src0, _src.info128.KRCWM:$mask, addr:$src)>;
8729    def : Pat<(OpNode128M (_src.info128.LdFrag addr:$src), _dst.info128.ImmAllZerosV,
8730                             _src.info128.KRCWM:$mask),
8731              (!cast<Instruction>(NAME # "Z128rmkz") _src.info128.KRCWM:$mask, addr:$src)>;
8732
8733    def : Pat<(_dst.info128.VT (OpNode128 (_src.info128.VT (X86VBroadcastld64 addr:$src)))),
8734              (!cast<Instruction>(NAME # "Z128rmb") addr:$src)>;
8735    def : Pat<(OpNode128M (_src.info128.VT (X86VBroadcastld64 addr:$src)),
8736                             (_dst.info128.VT VR128X:$src0), _src.info128.KRCWM:$mask),
8737              (!cast<Instruction>(NAME # "Z128rmbk") VR128X:$src0, _src.info128.KRCWM:$mask, addr:$src)>;
8738    def : Pat<(OpNode128M (_src.info128.VT (X86VBroadcastld64 addr:$src)),
8739                             _dst.info128.ImmAllZerosV, _src.info128.KRCWM:$mask),
8740              (!cast<Instruction>(NAME # "Z128rmbkz") _src.info128.KRCWM:$mask, addr:$src)>;
8741  }
8742
8743  def : InstAlias<OpcodeStr#"x\t{$src, $dst|$dst, $src}",
8744                  (!cast<Instruction>(NAME # "Z128rr") VR128X:$dst,
8745                  VR128X:$src), 0, "att">;
8746  def : InstAlias<OpcodeStr#"x\t{$src, $dst {${mask}}|$dst {${mask}}, $src}",
8747                  (!cast<Instruction>(NAME # "Z128rrk") VR128X:$dst,
8748                  VK2WM:$mask, VR128X:$src), 0, "att">;
8749  def : InstAlias<OpcodeStr#"x\t{$src, $dst {${mask}} {z}|$dst {${mask}} {z}, $src}",
8750                  (!cast<Instruction>(NAME # "Z128rrkz") VR128X:$dst,
8751                  VK2WM:$mask, VR128X:$src), 0, "att">;
8752  def : InstAlias<OpcodeStr#"x\t{${src}{1to2}, $dst|$dst, ${src}{1to2}}",
8753                  (!cast<Instruction>(NAME # "Z128rmb") VR128X:$dst,
8754                  i64mem:$src), 0, "att">;
8755  def : InstAlias<OpcodeStr#"x\t{${src}{1to2}, $dst {${mask}}|"
8756                  "$dst {${mask}}, ${src}{1to2}}",
8757                  (!cast<Instruction>(NAME # "Z128rmbk") VR128X:$dst,
8758                  VK2WM:$mask, i64mem:$src), 0, "att">;
8759  def : InstAlias<OpcodeStr#"x\t{${src}{1to2}, $dst {${mask}} {z}|"
8760                  "$dst {${mask}} {z}, ${src}{1to2}}",
8761                  (!cast<Instruction>(NAME # "Z128rmbkz") VR128X:$dst,
8762                  VK2WM:$mask, i64mem:$src), 0, "att">;
8763
8764  def : InstAlias<OpcodeStr#"y\t{$src, $dst|$dst, $src}",
8765                  (!cast<Instruction>(NAME # "Z256rr") VR128X:$dst,
8766                  VR256X:$src), 0, "att">;
8767  def : InstAlias<OpcodeStr#"y\t{$src, $dst {${mask}}|"
8768                  "$dst {${mask}}, $src}",
8769                  (!cast<Instruction>(NAME # "Z256rrk") VR128X:$dst,
8770                  VK4WM:$mask, VR256X:$src), 0, "att">;
8771  def : InstAlias<OpcodeStr#"y\t{$src, $dst {${mask}} {z}|"
8772                  "$dst {${mask}} {z}, $src}",
8773                  (!cast<Instruction>(NAME # "Z256rrkz") VR128X:$dst,
8774                  VK4WM:$mask, VR256X:$src), 0, "att">;
8775  def : InstAlias<OpcodeStr#"y\t{${src}{1to4}, $dst|$dst, ${src}{1to4}}",
8776                  (!cast<Instruction>(NAME # "Z256rmb") VR128X:$dst,
8777                  i64mem:$src), 0, "att">;
8778  def : InstAlias<OpcodeStr#"y\t{${src}{1to4}, $dst {${mask}}|"
8779                  "$dst {${mask}}, ${src}{1to4}}",
8780                  (!cast<Instruction>(NAME # "Z256rmbk") VR128X:$dst,
8781                  VK4WM:$mask, i64mem:$src), 0, "att">;
8782  def : InstAlias<OpcodeStr#"y\t{${src}{1to4}, $dst {${mask}} {z}|"
8783                  "$dst {${mask}} {z}, ${src}{1to4}}",
8784                  (!cast<Instruction>(NAME # "Z256rmbkz") VR128X:$dst,
8785                  VK4WM:$mask, i64mem:$src), 0, "att">;
8786}
8787
8788defm VCVTDQ2PD : avx512_cvtdq2pd<0xE6, "vcvtdq2pd", any_sint_to_fp, sint_to_fp,
8789                                 X86any_VSintToFP, X86VSintToFP,
8790                                 SchedWriteCvtDQ2PD>, XS, EVEX_CD8<32, CD8VH>;
8791
8792defm VCVTDQ2PS : avx512_cvtdq2ps<0x5B, "vcvtdq2ps", any_sint_to_fp, sint_to_fp,
8793                                X86VSintToFpRnd, SchedWriteCvtDQ2PS>,
8794                                PS, EVEX_CD8<32, CD8VF>;
8795
8796defm VCVTTPS2DQ : avx512_cvttps2dq<0x5B, "vcvttps2dq", X86any_cvttp2si,
8797                                 X86cvttp2si, X86cvttp2siSAE,
8798                                 SchedWriteCvtPS2DQ>, XS, EVEX_CD8<32, CD8VF>;
8799
8800defm VCVTTPD2DQ : avx512_cvttpd2dq<0xE6, "vcvttpd2dq", X86any_cvttp2si,
8801                                 X86cvttp2si, X86cvttp2siSAE,
8802                                 SchedWriteCvtPD2DQ>,
8803                                 PD, VEX_W, EVEX_CD8<64, CD8VF>;
8804
8805defm VCVTTPS2UDQ : avx512_cvttps2dq<0x78, "vcvttps2udq", X86any_cvttp2ui,
8806                                 X86cvttp2ui, X86cvttp2uiSAE,
8807                                 SchedWriteCvtPS2DQ>, PS, EVEX_CD8<32, CD8VF>;
8808
8809defm VCVTTPD2UDQ : avx512_cvttpd2dq<0x78, "vcvttpd2udq", X86any_cvttp2ui,
8810                                 X86cvttp2ui, X86cvttp2uiSAE,
8811                                 SchedWriteCvtPD2DQ>,
8812                                 PS, VEX_W, EVEX_CD8<64, CD8VF>;
8813
8814defm VCVTUDQ2PD : avx512_cvtdq2pd<0x7A, "vcvtudq2pd", any_uint_to_fp,
8815                                  uint_to_fp, X86any_VUintToFP, X86VUintToFP,
8816                                  SchedWriteCvtDQ2PD>, XS, EVEX_CD8<32, CD8VH>;
8817
8818defm VCVTUDQ2PS : avx512_cvtdq2ps<0x7A, "vcvtudq2ps", any_uint_to_fp,
8819                                 uint_to_fp, X86VUintToFpRnd,
8820                                 SchedWriteCvtDQ2PS>, XD, EVEX_CD8<32, CD8VF>;
8821
8822defm VCVTPS2DQ : avx512_cvtps2dq<0x5B, "vcvtps2dq", X86cvtp2Int, X86cvtp2Int,
8823                                 X86cvtp2IntRnd, SchedWriteCvtPS2DQ>, PD,
8824                                 EVEX_CD8<32, CD8VF>;
8825
8826defm VCVTPD2DQ : avx512_cvtpd2dq<0xE6, "vcvtpd2dq", X86cvtp2Int, X86cvtp2Int,
8827                                 X86cvtp2IntRnd, SchedWriteCvtPD2DQ>, XD,
8828                                 VEX_W, EVEX_CD8<64, CD8VF>;
8829
8830defm VCVTPS2UDQ : avx512_cvtps2dq<0x79, "vcvtps2udq", X86cvtp2UInt, X86cvtp2UInt,
8831                                 X86cvtp2UIntRnd, SchedWriteCvtPS2DQ>,
8832                                 PS, EVEX_CD8<32, CD8VF>;
8833
8834defm VCVTPD2UDQ : avx512_cvtpd2dq<0x79, "vcvtpd2udq", X86cvtp2UInt, X86cvtp2UInt,
8835                                 X86cvtp2UIntRnd, SchedWriteCvtPD2DQ>, VEX_W,
8836                                 PS, EVEX_CD8<64, CD8VF>;
8837
8838defm VCVTPD2QQ : avx512_cvtpd2qq<0x7B, "vcvtpd2qq", X86cvtp2Int, X86cvtp2Int,
8839                                 X86cvtp2IntRnd, SchedWriteCvtPD2DQ>, VEX_W,
8840                                 PD, EVEX_CD8<64, CD8VF>;
8841
8842defm VCVTPS2QQ : avx512_cvtps2qq<0x7B, "vcvtps2qq", X86cvtp2Int, X86cvtp2Int,
8843                                 X86cvtp2IntRnd, SchedWriteCvtPS2DQ>, PD,
8844                                 EVEX_CD8<32, CD8VH>;
8845
8846defm VCVTPD2UQQ : avx512_cvtpd2qq<0x79, "vcvtpd2uqq", X86cvtp2UInt, X86cvtp2UInt,
8847                                 X86cvtp2UIntRnd, SchedWriteCvtPD2DQ>, VEX_W,
8848                                 PD, EVEX_CD8<64, CD8VF>;
8849
8850defm VCVTPS2UQQ : avx512_cvtps2qq<0x79, "vcvtps2uqq", X86cvtp2UInt, X86cvtp2UInt,
8851                                 X86cvtp2UIntRnd, SchedWriteCvtPS2DQ>, PD,
8852                                 EVEX_CD8<32, CD8VH>;
8853
8854defm VCVTTPD2QQ : avx512_cvttpd2qq<0x7A, "vcvttpd2qq", X86any_cvttp2si,
8855                                 X86cvttp2si, X86cvttp2siSAE,
8856                                 SchedWriteCvtPD2DQ>, VEX_W,
8857                                 PD, EVEX_CD8<64, CD8VF>;
8858
8859defm VCVTTPS2QQ : avx512_cvttps2qq<0x7A, "vcvttps2qq", X86any_cvttp2si,
8860                                 X86cvttp2si, X86cvttp2siSAE,
8861                                 SchedWriteCvtPS2DQ>, PD,
8862                                 EVEX_CD8<32, CD8VH>;
8863
8864defm VCVTTPD2UQQ : avx512_cvttpd2qq<0x78, "vcvttpd2uqq", X86any_cvttp2ui,
8865                                 X86cvttp2ui, X86cvttp2uiSAE,
8866                                 SchedWriteCvtPD2DQ>, VEX_W,
8867                                 PD, EVEX_CD8<64, CD8VF>;
8868
8869defm VCVTTPS2UQQ : avx512_cvttps2qq<0x78, "vcvttps2uqq", X86any_cvttp2ui,
8870                                 X86cvttp2ui, X86cvttp2uiSAE,
8871                                 SchedWriteCvtPS2DQ>, PD,
8872                                 EVEX_CD8<32, CD8VH>;
8873
8874defm VCVTQQ2PD : avx512_cvtqq2pd<0xE6, "vcvtqq2pd", any_sint_to_fp,
8875                            sint_to_fp, X86VSintToFpRnd,
8876                            SchedWriteCvtDQ2PD>, VEX_W, XS, EVEX_CD8<64, CD8VF>;
8877
8878defm VCVTUQQ2PD : avx512_cvtqq2pd<0x7A, "vcvtuqq2pd", any_uint_to_fp,
8879                            uint_to_fp, X86VUintToFpRnd, SchedWriteCvtDQ2PD>,
8880                            VEX_W, XS, EVEX_CD8<64, CD8VF>;
8881
8882defm VCVTDQ2PH : avx512_cvtqq2ps_dq2ph<0x5B, "vcvtdq2ph", any_sint_to_fp, sint_to_fp,
8883                            X86any_VSintToFP, X86VMSintToFP,
8884                            X86VSintToFpRnd, avx512vl_f16_info, avx512vl_i32_info,
8885                            SchedWriteCvtDQ2PS, HasFP16>,
8886                            T_MAP5PS, EVEX_CD8<32, CD8VF>;
8887
8888defm VCVTUDQ2PH : avx512_cvtqq2ps_dq2ph<0x7A, "vcvtudq2ph", any_uint_to_fp, uint_to_fp,
8889                            X86any_VUintToFP, X86VMUintToFP,
8890                            X86VUintToFpRnd, avx512vl_f16_info, avx512vl_i32_info,
8891                            SchedWriteCvtDQ2PS, HasFP16>, T_MAP5XD,
8892                            EVEX_CD8<32, CD8VF>;
8893
8894defm VCVTQQ2PS : avx512_cvtqq2ps_dq2ph<0x5B, "vcvtqq2ps", any_sint_to_fp, sint_to_fp,
8895                            X86any_VSintToFP, X86VMSintToFP,
8896                            X86VSintToFpRnd, avx512vl_f32_info, avx512vl_i64_info,
8897                            SchedWriteCvtDQ2PS>, VEX_W, PS,
8898                            EVEX_CD8<64, CD8VF>;
8899
8900defm VCVTUQQ2PS : avx512_cvtqq2ps_dq2ph<0x7A, "vcvtuqq2ps", any_uint_to_fp, uint_to_fp,
8901                            X86any_VUintToFP, X86VMUintToFP,
8902                            X86VUintToFpRnd, avx512vl_f32_info, avx512vl_i64_info,
8903                            SchedWriteCvtDQ2PS>, VEX_W, XD,
8904                            EVEX_CD8<64, CD8VF>;
8905
8906let Predicates = [HasVLX] in {
8907  // Special patterns to allow use of X86mcvtp2Int for masking. Instruction
8908  // patterns have been disabled with null_frag.
8909  def : Pat<(v4i32 (X86cvtp2Int (v2f64 VR128X:$src))),
8910            (VCVTPD2DQZ128rr VR128X:$src)>;
8911  def : Pat<(X86mcvtp2Int (v2f64 VR128X:$src), (v4i32 VR128X:$src0),
8912                          VK2WM:$mask),
8913            (VCVTPD2DQZ128rrk VR128X:$src0, VK2WM:$mask, VR128X:$src)>;
8914  def : Pat<(X86mcvtp2Int (v2f64 VR128X:$src), v4i32x_info.ImmAllZerosV,
8915                          VK2WM:$mask),
8916            (VCVTPD2DQZ128rrkz VK2WM:$mask, VR128X:$src)>;
8917
8918  def : Pat<(v4i32 (X86cvtp2Int (loadv2f64 addr:$src))),
8919            (VCVTPD2DQZ128rm addr:$src)>;
8920  def : Pat<(X86mcvtp2Int (loadv2f64 addr:$src), (v4i32 VR128X:$src0),
8921                          VK2WM:$mask),
8922            (VCVTPD2DQZ128rmk VR128X:$src0, VK2WM:$mask, addr:$src)>;
8923  def : Pat<(X86mcvtp2Int (loadv2f64 addr:$src), v4i32x_info.ImmAllZerosV,
8924                          VK2WM:$mask),
8925            (VCVTPD2DQZ128rmkz VK2WM:$mask, addr:$src)>;
8926
8927  def : Pat<(v4i32 (X86cvtp2Int (v2f64 (X86VBroadcastld64 addr:$src)))),
8928            (VCVTPD2DQZ128rmb addr:$src)>;
8929  def : Pat<(X86mcvtp2Int (v2f64 (X86VBroadcastld64 addr:$src)),
8930                          (v4i32 VR128X:$src0), VK2WM:$mask),
8931            (VCVTPD2DQZ128rmbk VR128X:$src0, VK2WM:$mask, addr:$src)>;
8932  def : Pat<(X86mcvtp2Int (v2f64 (X86VBroadcastld64 addr:$src)),
8933                          v4i32x_info.ImmAllZerosV, VK2WM:$mask),
8934            (VCVTPD2DQZ128rmbkz VK2WM:$mask, addr:$src)>;
8935
8936  // Special patterns to allow use of X86mcvttp2si for masking. Instruction
8937  // patterns have been disabled with null_frag.
8938  def : Pat<(v4i32 (X86any_cvttp2si (v2f64 VR128X:$src))),
8939            (VCVTTPD2DQZ128rr VR128X:$src)>;
8940  def : Pat<(X86mcvttp2si (v2f64 VR128X:$src), (v4i32 VR128X:$src0),
8941                          VK2WM:$mask),
8942            (VCVTTPD2DQZ128rrk VR128X:$src0, VK2WM:$mask, VR128X:$src)>;
8943  def : Pat<(X86mcvttp2si (v2f64 VR128X:$src), v4i32x_info.ImmAllZerosV,
8944                          VK2WM:$mask),
8945            (VCVTTPD2DQZ128rrkz VK2WM:$mask, VR128X:$src)>;
8946
8947  def : Pat<(v4i32 (X86any_cvttp2si (loadv2f64 addr:$src))),
8948            (VCVTTPD2DQZ128rm addr:$src)>;
8949  def : Pat<(X86mcvttp2si (loadv2f64 addr:$src), (v4i32 VR128X:$src0),
8950                          VK2WM:$mask),
8951            (VCVTTPD2DQZ128rmk VR128X:$src0, VK2WM:$mask, addr:$src)>;
8952  def : Pat<(X86mcvttp2si (loadv2f64 addr:$src), v4i32x_info.ImmAllZerosV,
8953                          VK2WM:$mask),
8954            (VCVTTPD2DQZ128rmkz VK2WM:$mask, addr:$src)>;
8955
8956  def : Pat<(v4i32 (X86any_cvttp2si (v2f64 (X86VBroadcastld64 addr:$src)))),
8957            (VCVTTPD2DQZ128rmb addr:$src)>;
8958  def : Pat<(X86mcvttp2si (v2f64 (X86VBroadcastld64 addr:$src)),
8959                          (v4i32 VR128X:$src0), VK2WM:$mask),
8960            (VCVTTPD2DQZ128rmbk VR128X:$src0, VK2WM:$mask, addr:$src)>;
8961  def : Pat<(X86mcvttp2si (v2f64 (X86VBroadcastld64 addr:$src)),
8962                          v4i32x_info.ImmAllZerosV, VK2WM:$mask),
8963            (VCVTTPD2DQZ128rmbkz VK2WM:$mask, addr:$src)>;
8964
8965  // Special patterns to allow use of X86mcvtp2UInt for masking. Instruction
8966  // patterns have been disabled with null_frag.
8967  def : Pat<(v4i32 (X86cvtp2UInt (v2f64 VR128X:$src))),
8968            (VCVTPD2UDQZ128rr VR128X:$src)>;
8969  def : Pat<(X86mcvtp2UInt (v2f64 VR128X:$src), (v4i32 VR128X:$src0),
8970                           VK2WM:$mask),
8971            (VCVTPD2UDQZ128rrk VR128X:$src0, VK2WM:$mask, VR128X:$src)>;
8972  def : Pat<(X86mcvtp2UInt (v2f64 VR128X:$src), v4i32x_info.ImmAllZerosV,
8973                           VK2WM:$mask),
8974            (VCVTPD2UDQZ128rrkz VK2WM:$mask, VR128X:$src)>;
8975
8976  def : Pat<(v4i32 (X86cvtp2UInt (loadv2f64 addr:$src))),
8977            (VCVTPD2UDQZ128rm addr:$src)>;
8978  def : Pat<(X86mcvtp2UInt (loadv2f64 addr:$src), (v4i32 VR128X:$src0),
8979                           VK2WM:$mask),
8980            (VCVTPD2UDQZ128rmk VR128X:$src0, VK2WM:$mask, addr:$src)>;
8981  def : Pat<(X86mcvtp2UInt (loadv2f64 addr:$src), v4i32x_info.ImmAllZerosV,
8982                           VK2WM:$mask),
8983            (VCVTPD2UDQZ128rmkz VK2WM:$mask, addr:$src)>;
8984
8985  def : Pat<(v4i32 (X86cvtp2UInt (v2f64 (X86VBroadcastld64 addr:$src)))),
8986            (VCVTPD2UDQZ128rmb addr:$src)>;
8987  def : Pat<(X86mcvtp2UInt (v2f64 (X86VBroadcastld64 addr:$src)),
8988                           (v4i32 VR128X:$src0), VK2WM:$mask),
8989            (VCVTPD2UDQZ128rmbk VR128X:$src0, VK2WM:$mask, addr:$src)>;
8990  def : Pat<(X86mcvtp2UInt (v2f64 (X86VBroadcastld64 addr:$src)),
8991                           v4i32x_info.ImmAllZerosV, VK2WM:$mask),
8992            (VCVTPD2UDQZ128rmbkz VK2WM:$mask, addr:$src)>;
8993
8994  // Special patterns to allow use of X86mcvtp2UInt for masking. Instruction
8995  // patterns have been disabled with null_frag.
8996  def : Pat<(v4i32 (X86any_cvttp2ui (v2f64 VR128X:$src))),
8997            (VCVTTPD2UDQZ128rr VR128X:$src)>;
8998  def : Pat<(X86mcvttp2ui (v2f64 VR128X:$src), (v4i32 VR128X:$src0),
8999                          VK2WM:$mask),
9000            (VCVTTPD2UDQZ128rrk VR128X:$src0, VK2WM:$mask, VR128X:$src)>;
9001  def : Pat<(X86mcvttp2ui (v2f64 VR128X:$src), v4i32x_info.ImmAllZerosV,
9002                          VK2WM:$mask),
9003            (VCVTTPD2UDQZ128rrkz VK2WM:$mask, VR128X:$src)>;
9004
9005  def : Pat<(v4i32 (X86any_cvttp2ui (loadv2f64 addr:$src))),
9006            (VCVTTPD2UDQZ128rm addr:$src)>;
9007  def : Pat<(X86mcvttp2ui (loadv2f64 addr:$src), (v4i32 VR128X:$src0),
9008                          VK2WM:$mask),
9009            (VCVTTPD2UDQZ128rmk VR128X:$src0, VK2WM:$mask, addr:$src)>;
9010  def : Pat<(X86mcvttp2ui (loadv2f64 addr:$src), v4i32x_info.ImmAllZerosV,
9011                          VK2WM:$mask),
9012            (VCVTTPD2UDQZ128rmkz VK2WM:$mask, addr:$src)>;
9013
9014  def : Pat<(v4i32 (X86any_cvttp2ui (v2f64 (X86VBroadcastld64 addr:$src)))),
9015            (VCVTTPD2UDQZ128rmb addr:$src)>;
9016  def : Pat<(X86mcvttp2ui (v2f64 (X86VBroadcastld64 addr:$src)),
9017                          (v4i32 VR128X:$src0), VK2WM:$mask),
9018            (VCVTTPD2UDQZ128rmbk VR128X:$src0, VK2WM:$mask, addr:$src)>;
9019  def : Pat<(X86mcvttp2ui (v2f64 (X86VBroadcastld64 addr:$src)),
9020                          v4i32x_info.ImmAllZerosV, VK2WM:$mask),
9021            (VCVTTPD2UDQZ128rmbkz VK2WM:$mask, addr:$src)>;
9022}
9023
9024let Predicates = [HasDQI, HasVLX] in {
9025  def : Pat<(v2i64 (X86cvtp2Int (bc_v4f32 (v2f64 (X86vzload64 addr:$src))))),
9026            (VCVTPS2QQZ128rm addr:$src)>;
9027  def : Pat<(v2i64 (vselect_mask VK2WM:$mask,
9028                                 (X86cvtp2Int (bc_v4f32 (v2f64 (X86vzload64 addr:$src)))),
9029                                 VR128X:$src0)),
9030            (VCVTPS2QQZ128rmk VR128X:$src0, VK2WM:$mask, addr:$src)>;
9031  def : Pat<(v2i64 (vselect_mask VK2WM:$mask,
9032                                 (X86cvtp2Int (bc_v4f32 (v2f64 (X86vzload64 addr:$src)))),
9033                                 v2i64x_info.ImmAllZerosV)),
9034            (VCVTPS2QQZ128rmkz VK2WM:$mask, addr:$src)>;
9035
9036  def : Pat<(v2i64 (X86cvtp2UInt (bc_v4f32 (v2f64 (X86vzload64 addr:$src))))),
9037            (VCVTPS2UQQZ128rm addr:$src)>;
9038  def : Pat<(v2i64 (vselect_mask VK2WM:$mask,
9039                                 (X86cvtp2UInt (bc_v4f32 (v2f64 (X86vzload64 addr:$src)))),
9040                                 VR128X:$src0)),
9041            (VCVTPS2UQQZ128rmk VR128X:$src0, VK2WM:$mask, addr:$src)>;
9042  def : Pat<(v2i64 (vselect_mask VK2WM:$mask,
9043                                 (X86cvtp2UInt (bc_v4f32 (v2f64 (X86vzload64 addr:$src)))),
9044                                 v2i64x_info.ImmAllZerosV)),
9045            (VCVTPS2UQQZ128rmkz VK2WM:$mask, addr:$src)>;
9046
9047  def : Pat<(v2i64 (X86any_cvttp2si (bc_v4f32 (v2f64 (X86vzload64 addr:$src))))),
9048            (VCVTTPS2QQZ128rm addr:$src)>;
9049  def : Pat<(v2i64 (vselect_mask VK2WM:$mask,
9050                                 (X86cvttp2si (bc_v4f32 (v2f64 (X86vzload64 addr:$src)))),
9051                                 VR128X:$src0)),
9052            (VCVTTPS2QQZ128rmk VR128X:$src0, VK2WM:$mask, addr:$src)>;
9053  def : Pat<(v2i64 (vselect_mask VK2WM:$mask,
9054                                 (X86cvttp2si (bc_v4f32 (v2f64 (X86vzload64 addr:$src)))),
9055                                 v2i64x_info.ImmAllZerosV)),
9056            (VCVTTPS2QQZ128rmkz VK2WM:$mask, addr:$src)>;
9057
9058  def : Pat<(v2i64 (X86any_cvttp2ui (bc_v4f32 (v2f64 (X86vzload64 addr:$src))))),
9059            (VCVTTPS2UQQZ128rm addr:$src)>;
9060  def : Pat<(v2i64 (vselect_mask VK2WM:$mask,
9061                                 (X86cvttp2ui (bc_v4f32 (v2f64 (X86vzload64 addr:$src)))),
9062                                 VR128X:$src0)),
9063            (VCVTTPS2UQQZ128rmk VR128X:$src0, VK2WM:$mask, addr:$src)>;
9064  def : Pat<(v2i64 (vselect_mask VK2WM:$mask,
9065                                 (X86cvttp2ui (bc_v4f32 (v2f64 (X86vzload64 addr:$src)))),
9066                                 v2i64x_info.ImmAllZerosV)),
9067            (VCVTTPS2UQQZ128rmkz VK2WM:$mask, addr:$src)>;
9068}
9069
9070let Predicates = [HasVLX] in {
9071  def : Pat<(v2f64 (X86any_VSintToFP (bc_v4i32 (v2i64 (X86vzload64 addr:$src))))),
9072            (VCVTDQ2PDZ128rm addr:$src)>;
9073  def : Pat<(v2f64 (vselect_mask VK2WM:$mask,
9074                                 (X86VSintToFP (bc_v4i32 (v2i64 (X86vzload64 addr:$src)))),
9075                                 VR128X:$src0)),
9076            (VCVTDQ2PDZ128rmk VR128X:$src0, VK2WM:$mask, addr:$src)>;
9077  def : Pat<(v2f64 (vselect_mask VK2WM:$mask,
9078                                 (X86VSintToFP (bc_v4i32 (v2i64 (X86vzload64 addr:$src)))),
9079                                 v2f64x_info.ImmAllZerosV)),
9080            (VCVTDQ2PDZ128rmkz VK2WM:$mask, addr:$src)>;
9081
9082  def : Pat<(v2f64 (X86any_VUintToFP (bc_v4i32 (v2i64 (X86vzload64 addr:$src))))),
9083            (VCVTUDQ2PDZ128rm addr:$src)>;
9084  def : Pat<(v2f64 (vselect_mask VK2WM:$mask,
9085                                 (X86VUintToFP (bc_v4i32 (v2i64 (X86vzload64 addr:$src)))),
9086                                 VR128X:$src0)),
9087            (VCVTUDQ2PDZ128rmk VR128X:$src0, VK2WM:$mask, addr:$src)>;
9088  def : Pat<(v2f64 (vselect_mask VK2WM:$mask,
9089                                 (X86VUintToFP (bc_v4i32 (v2i64 (X86vzload64 addr:$src)))),
9090                                 v2f64x_info.ImmAllZerosV)),
9091            (VCVTUDQ2PDZ128rmkz VK2WM:$mask, addr:$src)>;
9092}
9093
9094//===----------------------------------------------------------------------===//
9095// Half precision conversion instructions
9096//===----------------------------------------------------------------------===//
9097
9098let Uses = [MXCSR], mayRaiseFPException = 1 in
9099multiclass avx512_cvtph2ps<X86VectorVTInfo _dest, X86VectorVTInfo _src,
9100                           X86MemOperand x86memop, dag ld_dag,
9101                           X86FoldableSchedWrite sched> {
9102  defm rr : AVX512_maskable_split<0x13, MRMSrcReg, _dest ,(outs _dest.RC:$dst),
9103                            (ins _src.RC:$src), "vcvtph2ps", "$src", "$src",
9104                            (X86any_cvtph2ps (_src.VT _src.RC:$src)),
9105                            (X86cvtph2ps (_src.VT _src.RC:$src))>,
9106                            T8PD, Sched<[sched]>;
9107  defm rm : AVX512_maskable_split<0x13, MRMSrcMem, _dest, (outs _dest.RC:$dst),
9108                            (ins x86memop:$src), "vcvtph2ps", "$src", "$src",
9109                            (X86any_cvtph2ps (_src.VT ld_dag)),
9110                            (X86cvtph2ps (_src.VT ld_dag))>,
9111                            T8PD, Sched<[sched.Folded]>;
9112}
9113
9114multiclass avx512_cvtph2ps_sae<X86VectorVTInfo _dest, X86VectorVTInfo _src,
9115                               X86FoldableSchedWrite sched> {
9116  let Uses = [MXCSR] in
9117  defm rrb : AVX512_maskable<0x13, MRMSrcReg, _dest, (outs _dest.RC:$dst),
9118                             (ins _src.RC:$src), "vcvtph2ps",
9119                             "{sae}, $src", "$src, {sae}",
9120                             (X86cvtph2psSAE (_src.VT _src.RC:$src))>,
9121                             T8PD, EVEX_B, Sched<[sched]>;
9122}
9123
9124let Predicates = [HasAVX512] in
9125  defm VCVTPH2PSZ : avx512_cvtph2ps<v16f32_info, v16i16x_info, f256mem,
9126                                    (load addr:$src), WriteCvtPH2PSZ>,
9127                    avx512_cvtph2ps_sae<v16f32_info, v16i16x_info, WriteCvtPH2PSZ>,
9128                    EVEX, EVEX_V512, EVEX_CD8<32, CD8VH>;
9129
9130let Predicates = [HasVLX] in {
9131  defm VCVTPH2PSZ256 : avx512_cvtph2ps<v8f32x_info, v8i16x_info, f128mem,
9132                       (load addr:$src), WriteCvtPH2PSY>, EVEX, EVEX_V256,
9133                       EVEX_CD8<32, CD8VH>;
9134  defm VCVTPH2PSZ128 : avx512_cvtph2ps<v4f32x_info, v8i16x_info, f64mem,
9135                       (bitconvert (v2i64 (X86vzload64 addr:$src))),
9136                       WriteCvtPH2PS>, EVEX, EVEX_V128,
9137                       EVEX_CD8<32, CD8VH>;
9138
9139  // Pattern match vcvtph2ps of a scalar i64 load.
9140  def : Pat<(v4f32 (X86any_cvtph2ps (v8i16 (bitconvert
9141              (v2i64 (scalar_to_vector (loadi64 addr:$src))))))),
9142            (VCVTPH2PSZ128rm addr:$src)>;
9143}
9144
9145multiclass avx512_cvtps2ph<X86VectorVTInfo _dest, X86VectorVTInfo _src,
9146                           X86MemOperand x86memop, SchedWrite RR, SchedWrite MR> {
9147let ExeDomain = GenericDomain, Uses = [MXCSR], mayRaiseFPException = 1 in {
9148  def rr : AVX512AIi8<0x1D, MRMDestReg, (outs _dest.RC:$dst),
9149             (ins _src.RC:$src1, i32u8imm:$src2),
9150             "vcvtps2ph\t{$src2, $src1, $dst|$dst, $src1, $src2}",
9151             [(set _dest.RC:$dst,
9152                   (X86any_cvtps2ph (_src.VT _src.RC:$src1), (i32 timm:$src2)))]>,
9153             Sched<[RR]>;
9154  let Constraints = "$src0 = $dst" in
9155  def rrk : AVX512AIi8<0x1D, MRMDestReg, (outs _dest.RC:$dst),
9156             (ins _dest.RC:$src0, _src.KRCWM:$mask, _src.RC:$src1, i32u8imm:$src2),
9157             "vcvtps2ph\t{$src2, $src1, $dst {${mask}}|$dst {${mask}}, $src1, $src2}",
9158             [(set _dest.RC:$dst,
9159                   (X86mcvtps2ph (_src.VT _src.RC:$src1), (i32 timm:$src2),
9160                                 _dest.RC:$src0, _src.KRCWM:$mask))]>,
9161             Sched<[RR]>, EVEX_K;
9162  def rrkz : AVX512AIi8<0x1D, MRMDestReg, (outs _dest.RC:$dst),
9163             (ins _src.KRCWM:$mask, _src.RC:$src1, i32u8imm:$src2),
9164             "vcvtps2ph\t{$src2, $src1, $dst {${mask}} {z}|$dst {${mask}} {z}, $src1, $src2}",
9165             [(set _dest.RC:$dst,
9166                   (X86mcvtps2ph (_src.VT _src.RC:$src1), (i32 timm:$src2),
9167                                 _dest.ImmAllZerosV, _src.KRCWM:$mask))]>,
9168             Sched<[RR]>, EVEX_KZ;
9169  let hasSideEffects = 0, mayStore = 1 in {
9170    def mr : AVX512AIi8<0x1D, MRMDestMem, (outs),
9171               (ins x86memop:$dst, _src.RC:$src1, i32u8imm:$src2),
9172               "vcvtps2ph\t{$src2, $src1, $dst|$dst, $src1, $src2}", []>,
9173               Sched<[MR]>;
9174    def mrk : AVX512AIi8<0x1D, MRMDestMem, (outs),
9175               (ins x86memop:$dst, _dest.KRCWM:$mask, _src.RC:$src1, i32u8imm:$src2),
9176               "vcvtps2ph\t{$src2, $src1, $dst {${mask}}|$dst {${mask}}, $src1, $src2}", []>,
9177                EVEX_K, Sched<[MR]>, NotMemoryFoldable;
9178  }
9179}
9180}
9181
9182multiclass avx512_cvtps2ph_sae<X86VectorVTInfo _dest, X86VectorVTInfo _src,
9183                               SchedWrite Sched> {
9184  let hasSideEffects = 0, Uses = [MXCSR] in
9185  defm rrb : AVX512_maskable_in_asm<0x1D, MRMDestReg, _dest,
9186                   (outs _dest.RC:$dst),
9187                   (ins _src.RC:$src1, i32u8imm:$src2),
9188                   "vcvtps2ph", "$src2, {sae}, $src1", "$src1, {sae}, $src2", []>,
9189                   EVEX_B, AVX512AIi8Base, Sched<[Sched]>;
9190}
9191
9192let Predicates = [HasAVX512] in {
9193  defm VCVTPS2PHZ : avx512_cvtps2ph<v16i16x_info, v16f32_info, f256mem,
9194                                    WriteCvtPS2PHZ, WriteCvtPS2PHZSt>,
9195                    avx512_cvtps2ph_sae<v16i16x_info, v16f32_info, WriteCvtPS2PHZ>,
9196                                        EVEX, EVEX_V512, EVEX_CD8<32, CD8VH>;
9197
9198  def : Pat<(store (v16i16 (X86any_cvtps2ph VR512:$src1, timm:$src2)), addr:$dst),
9199            (VCVTPS2PHZmr addr:$dst, VR512:$src1, timm:$src2)>;
9200}
9201
9202let Predicates = [HasVLX] in {
9203  defm VCVTPS2PHZ256 : avx512_cvtps2ph<v8i16x_info, v8f32x_info, f128mem,
9204                                       WriteCvtPS2PHY, WriteCvtPS2PHYSt>,
9205                                       EVEX, EVEX_V256, EVEX_CD8<32, CD8VH>;
9206  defm VCVTPS2PHZ128 : avx512_cvtps2ph<v8i16x_info, v4f32x_info, f64mem,
9207                                       WriteCvtPS2PH, WriteCvtPS2PHSt>,
9208                                       EVEX, EVEX_V128, EVEX_CD8<32, CD8VH>;
9209
9210  def : Pat<(store (f64 (extractelt
9211                         (bc_v2f64 (v8i16 (X86any_cvtps2ph VR128X:$src1, timm:$src2))),
9212                         (iPTR 0))), addr:$dst),
9213            (VCVTPS2PHZ128mr addr:$dst, VR128X:$src1, timm:$src2)>;
9214  def : Pat<(store (i64 (extractelt
9215                         (bc_v2i64 (v8i16 (X86any_cvtps2ph VR128X:$src1, timm:$src2))),
9216                         (iPTR 0))), addr:$dst),
9217            (VCVTPS2PHZ128mr addr:$dst, VR128X:$src1, timm:$src2)>;
9218  def : Pat<(store (v8i16 (X86any_cvtps2ph VR256X:$src1, timm:$src2)), addr:$dst),
9219            (VCVTPS2PHZ256mr addr:$dst, VR256X:$src1, timm:$src2)>;
9220}
9221
9222//  Unordered/Ordered scalar fp compare with Sae and set EFLAGS
9223multiclass avx512_ord_cmp_sae<bits<8> opc, X86VectorVTInfo _,
9224                              string OpcodeStr, Domain d,
9225                              X86FoldableSchedWrite sched = WriteFComX> {
9226  let ExeDomain = d, hasSideEffects = 0, Uses = [MXCSR] in
9227  def rrb: AVX512<opc, MRMSrcReg, (outs), (ins _.RC:$src1, _.RC:$src2),
9228                  !strconcat(OpcodeStr, "\t{{sae}, $src2, $src1|$src1, $src2, {sae}}"), []>,
9229                  EVEX, EVEX_B, VEX_LIG, EVEX_V128, Sched<[sched]>;
9230}
9231
9232let Defs = [EFLAGS], Predicates = [HasAVX512] in {
9233  defm VUCOMISSZ : avx512_ord_cmp_sae<0x2E, v4f32x_info, "vucomiss", SSEPackedSingle>,
9234                                   AVX512PSIi8Base, EVEX_CD8<32, CD8VT1>;
9235  defm VUCOMISDZ : avx512_ord_cmp_sae<0x2E, v2f64x_info, "vucomisd", SSEPackedDouble>,
9236                                   AVX512PDIi8Base, VEX_W, EVEX_CD8<64, CD8VT1>;
9237  defm VCOMISSZ : avx512_ord_cmp_sae<0x2F, v4f32x_info, "vcomiss", SSEPackedSingle>,
9238                                   AVX512PSIi8Base, EVEX_CD8<32, CD8VT1>;
9239  defm VCOMISDZ : avx512_ord_cmp_sae<0x2F, v2f64x_info, "vcomisd", SSEPackedDouble>,
9240                                   AVX512PDIi8Base, VEX_W, EVEX_CD8<64, CD8VT1>;
9241}
9242
9243let Defs = [EFLAGS], Predicates = [HasAVX512] in {
9244  defm VUCOMISSZ : sse12_ord_cmp<0x2E, FR32X, X86any_fcmp, f32, f32mem, loadf32,
9245                                 "ucomiss", SSEPackedSingle>, PS, EVEX, VEX_LIG,
9246                                 EVEX_CD8<32, CD8VT1>;
9247  defm VUCOMISDZ : sse12_ord_cmp<0x2E, FR64X, X86any_fcmp, f64, f64mem, loadf64,
9248                                  "ucomisd", SSEPackedDouble>, PD, EVEX,
9249                                  VEX_LIG, VEX_W, EVEX_CD8<64, CD8VT1>;
9250  defm VCOMISSZ  : sse12_ord_cmp<0x2F, FR32X, X86strict_fcmps, f32, f32mem, loadf32,
9251                                 "comiss", SSEPackedSingle>, PS, EVEX, VEX_LIG,
9252                                 EVEX_CD8<32, CD8VT1>;
9253  defm VCOMISDZ  : sse12_ord_cmp<0x2F, FR64X, X86strict_fcmps, f64, f64mem, loadf64,
9254                                 "comisd", SSEPackedDouble>, PD, EVEX,
9255                                  VEX_LIG, VEX_W, EVEX_CD8<64, CD8VT1>;
9256  let isCodeGenOnly = 1 in {
9257    defm VUCOMISSZ  : sse12_ord_cmp_int<0x2E, VR128X, X86ucomi, v4f32, ssmem,
9258                          sse_load_f32, "ucomiss", SSEPackedSingle>, PS, EVEX, VEX_LIG,
9259                          EVEX_CD8<32, CD8VT1>;
9260    defm VUCOMISDZ  : sse12_ord_cmp_int<0x2E, VR128X, X86ucomi, v2f64, sdmem,
9261                          sse_load_f64, "ucomisd", SSEPackedDouble>, PD, EVEX,
9262                          VEX_LIG, VEX_W, EVEX_CD8<64, CD8VT1>;
9263
9264    defm VCOMISSZ  : sse12_ord_cmp_int<0x2F, VR128X, X86comi, v4f32, ssmem,
9265                          sse_load_f32, "comiss", SSEPackedSingle>, PS, EVEX, VEX_LIG,
9266                          EVEX_CD8<32, CD8VT1>;
9267    defm VCOMISDZ  : sse12_ord_cmp_int<0x2F, VR128X, X86comi, v2f64, sdmem,
9268                          sse_load_f64, "comisd", SSEPackedDouble>, PD, EVEX,
9269                          VEX_LIG, VEX_W, EVEX_CD8<64, CD8VT1>;
9270  }
9271}
9272
9273let Defs = [EFLAGS], Predicates = [HasFP16] in {
9274  defm VUCOMISHZ : avx512_ord_cmp_sae<0x2E, v8f16x_info, "vucomish",
9275                                SSEPackedSingle>, AVX512PSIi8Base, T_MAP5PS,
9276                                EVEX_CD8<16, CD8VT1>;
9277  defm VCOMISHZ : avx512_ord_cmp_sae<0x2F, v8f16x_info, "vcomish",
9278                                SSEPackedSingle>, AVX512PSIi8Base, T_MAP5PS,
9279                                EVEX_CD8<16, CD8VT1>;
9280  defm VUCOMISHZ : sse12_ord_cmp<0x2E, FR16X, X86any_fcmp, f16, f16mem, loadf16,
9281                                "ucomish", SSEPackedSingle>, T_MAP5PS, EVEX,
9282                                VEX_LIG, EVEX_CD8<16, CD8VT1>;
9283  defm VCOMISHZ : sse12_ord_cmp<0x2F, FR16X, X86strict_fcmps, f16, f16mem, loadf16,
9284                                "comish", SSEPackedSingle>, T_MAP5PS, EVEX,
9285                                VEX_LIG, EVEX_CD8<16, CD8VT1>;
9286  let isCodeGenOnly = 1 in {
9287    defm VUCOMISHZ : sse12_ord_cmp_int<0x2E, VR128X, X86ucomi, v8f16, shmem,
9288                                sse_load_f16, "ucomish", SSEPackedSingle>,
9289                                T_MAP5PS, EVEX, VEX_LIG, EVEX_CD8<16, CD8VT1>;
9290
9291    defm VCOMISHZ : sse12_ord_cmp_int<0x2F, VR128X, X86comi, v8f16, shmem,
9292                                sse_load_f16, "comish", SSEPackedSingle>,
9293                                T_MAP5PS, EVEX, VEX_LIG, EVEX_CD8<16, CD8VT1>;
9294  }
9295}
9296
9297/// avx512_fp14_s rcp14ss, rcp14sd, rsqrt14ss, rsqrt14sd, rcpsh, rsqrtsh
9298multiclass avx512_fp14_s<bits<8> opc, string OpcodeStr, SDNode OpNode,
9299                         X86FoldableSchedWrite sched, X86VectorVTInfo _,
9300                         Predicate prd = HasAVX512> {
9301  let Predicates = [prd], ExeDomain = _.ExeDomain in {
9302  defm rr : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
9303                           (ins _.RC:$src1, _.RC:$src2), OpcodeStr,
9304                           "$src2, $src1", "$src1, $src2",
9305                           (OpNode (_.VT _.RC:$src1), (_.VT _.RC:$src2))>,
9306                           EVEX_4V, VEX_LIG, Sched<[sched]>;
9307  defm rm : AVX512_maskable_scalar<opc, MRMSrcMem, _, (outs _.RC:$dst),
9308                         (ins _.RC:$src1, _.IntScalarMemOp:$src2), OpcodeStr,
9309                         "$src2, $src1", "$src1, $src2",
9310                         (OpNode (_.VT _.RC:$src1),
9311                          (_.ScalarIntMemFrags addr:$src2))>, EVEX_4V, VEX_LIG,
9312                          Sched<[sched.Folded, sched.ReadAfterFold]>;
9313}
9314}
9315
9316defm VRCPSHZ : avx512_fp14_s<0x4D, "vrcpsh", X86rcp14s, SchedWriteFRcp.Scl,
9317                               f16x_info, HasFP16>, EVEX_CD8<16, CD8VT1>,
9318                               T_MAP6PD;
9319defm VRSQRTSHZ : avx512_fp14_s<0x4F, "vrsqrtsh", X86rsqrt14s,
9320                                 SchedWriteFRsqrt.Scl, f16x_info, HasFP16>,
9321                                 EVEX_CD8<16, CD8VT1>, T_MAP6PD;
9322let Uses = [MXCSR] in {
9323defm VRCP14SSZ : avx512_fp14_s<0x4D, "vrcp14ss", X86rcp14s, SchedWriteFRcp.Scl,
9324                               f32x_info>, EVEX_CD8<32, CD8VT1>,
9325                               T8PD;
9326defm VRCP14SDZ : avx512_fp14_s<0x4D, "vrcp14sd", X86rcp14s, SchedWriteFRcp.Scl,
9327                               f64x_info>, VEX_W, EVEX_CD8<64, CD8VT1>,
9328                               T8PD;
9329defm VRSQRT14SSZ : avx512_fp14_s<0x4F, "vrsqrt14ss", X86rsqrt14s,
9330                                 SchedWriteFRsqrt.Scl, f32x_info>,
9331                                 EVEX_CD8<32, CD8VT1>, T8PD;
9332defm VRSQRT14SDZ : avx512_fp14_s<0x4F, "vrsqrt14sd", X86rsqrt14s,
9333                                 SchedWriteFRsqrt.Scl, f64x_info>, VEX_W,
9334                                 EVEX_CD8<64, CD8VT1>, T8PD;
9335}
9336
9337/// avx512_fp14_p rcp14ps, rcp14pd, rsqrt14ps, rsqrt14pd
9338multiclass avx512_fp14_p<bits<8> opc, string OpcodeStr, SDNode OpNode,
9339                         X86FoldableSchedWrite sched, X86VectorVTInfo _> {
9340  let ExeDomain = _.ExeDomain in {
9341  defm r: AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
9342                         (ins _.RC:$src), OpcodeStr, "$src", "$src",
9343                         (_.VT (OpNode _.RC:$src))>, EVEX, T8PD,
9344                         Sched<[sched]>;
9345  defm m: AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
9346                         (ins _.MemOp:$src), OpcodeStr, "$src", "$src",
9347                         (OpNode (_.VT
9348                           (bitconvert (_.LdFrag addr:$src))))>, EVEX, T8PD,
9349                         Sched<[sched.Folded, sched.ReadAfterFold]>;
9350  defm mb: AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
9351                          (ins _.ScalarMemOp:$src), OpcodeStr,
9352                          "${src}"#_.BroadcastStr, "${src}"#_.BroadcastStr,
9353                          (OpNode (_.VT
9354                            (_.BroadcastLdFrag addr:$src)))>,
9355                          EVEX, T8PD, EVEX_B, Sched<[sched.Folded, sched.ReadAfterFold]>;
9356  }
9357}
9358
9359multiclass avx512_fp14_p_vl_all<bits<8> opc, string OpcodeStr, SDNode OpNode,
9360                                X86SchedWriteWidths sched> {
9361  let Uses = [MXCSR] in {
9362  defm 14PSZ : avx512_fp14_p<opc, !strconcat(OpcodeStr, "14ps"), OpNode, sched.ZMM,
9363                             v16f32_info>, EVEX_V512, EVEX_CD8<32, CD8VF>;
9364  defm 14PDZ : avx512_fp14_p<opc, !strconcat(OpcodeStr, "14pd"), OpNode, sched.ZMM,
9365                             v8f64_info>, EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>;
9366  }
9367  let Predicates = [HasFP16] in
9368  defm PHZ : avx512_fp14_p<opc, !strconcat(OpcodeStr, "ph"), OpNode, sched.ZMM,
9369                           v32f16_info>, EVEX_V512, T_MAP6PD, EVEX_CD8<16, CD8VF>;
9370
9371  // Define only if AVX512VL feature is present.
9372  let Predicates = [HasVLX], Uses = [MXCSR] in {
9373    defm 14PSZ128 : avx512_fp14_p<opc, !strconcat(OpcodeStr, "14ps"),
9374                                  OpNode, sched.XMM, v4f32x_info>,
9375                                  EVEX_V128, EVEX_CD8<32, CD8VF>;
9376    defm 14PSZ256 : avx512_fp14_p<opc, !strconcat(OpcodeStr, "14ps"),
9377                                  OpNode, sched.YMM, v8f32x_info>,
9378                                  EVEX_V256, EVEX_CD8<32, CD8VF>;
9379    defm 14PDZ128 : avx512_fp14_p<opc, !strconcat(OpcodeStr, "14pd"),
9380                                  OpNode, sched.XMM, v2f64x_info>,
9381                                  EVEX_V128, VEX_W, EVEX_CD8<64, CD8VF>;
9382    defm 14PDZ256 : avx512_fp14_p<opc, !strconcat(OpcodeStr, "14pd"),
9383                                  OpNode, sched.YMM, v4f64x_info>,
9384                                  EVEX_V256, VEX_W, EVEX_CD8<64, CD8VF>;
9385  }
9386  let Predicates = [HasFP16, HasVLX] in {
9387    defm PHZ128 : avx512_fp14_p<opc, !strconcat(OpcodeStr, "ph"),
9388                                OpNode, sched.XMM, v8f16x_info>,
9389                                EVEX_V128, T_MAP6PD, EVEX_CD8<16, CD8VF>;
9390    defm PHZ256 : avx512_fp14_p<opc, !strconcat(OpcodeStr, "ph"),
9391                                OpNode, sched.YMM, v16f16x_info>,
9392                                EVEX_V256, T_MAP6PD, EVEX_CD8<16, CD8VF>;
9393  }
9394}
9395
9396defm VRSQRT : avx512_fp14_p_vl_all<0x4E, "vrsqrt", X86rsqrt14, SchedWriteFRsqrt>;
9397defm VRCP : avx512_fp14_p_vl_all<0x4C, "vrcp", X86rcp14, SchedWriteFRcp>;
9398
9399/// avx512_fp28_s rcp28ss, rcp28sd, rsqrt28ss, rsqrt28sd
9400multiclass avx512_fp28_s<bits<8> opc, string OpcodeStr,X86VectorVTInfo _,
9401                         SDNode OpNode, SDNode OpNodeSAE,
9402                         X86FoldableSchedWrite sched> {
9403  let ExeDomain = _.ExeDomain, Uses = [MXCSR] in {
9404  defm r : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
9405                           (ins _.RC:$src1, _.RC:$src2), OpcodeStr,
9406                           "$src2, $src1", "$src1, $src2",
9407                           (OpNode (_.VT _.RC:$src1), (_.VT _.RC:$src2))>,
9408                           Sched<[sched]>, SIMD_EXC;
9409
9410  defm rb : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
9411                            (ins _.RC:$src1, _.RC:$src2), OpcodeStr,
9412                            "{sae}, $src2, $src1", "$src1, $src2, {sae}",
9413                            (OpNodeSAE (_.VT _.RC:$src1), (_.VT _.RC:$src2))>,
9414                            EVEX_B, Sched<[sched]>;
9415
9416  defm m : AVX512_maskable_scalar<opc, MRMSrcMem, _, (outs _.RC:$dst),
9417                         (ins _.RC:$src1, _.IntScalarMemOp:$src2), OpcodeStr,
9418                         "$src2, $src1", "$src1, $src2",
9419                         (OpNode (_.VT _.RC:$src1), (_.ScalarIntMemFrags addr:$src2))>,
9420                         Sched<[sched.Folded, sched.ReadAfterFold]>, SIMD_EXC;
9421  }
9422}
9423
9424multiclass avx512_eri_s<bits<8> opc, string OpcodeStr, SDNode OpNode,
9425                        SDNode OpNodeSAE, X86FoldableSchedWrite sched> {
9426  defm SSZ : avx512_fp28_s<opc, OpcodeStr#"ss", f32x_info, OpNode, OpNodeSAE,
9427                           sched>, EVEX_CD8<32, CD8VT1>, VEX_LIG, T8PD, EVEX_4V;
9428  defm SDZ : avx512_fp28_s<opc, OpcodeStr#"sd", f64x_info, OpNode, OpNodeSAE,
9429                           sched>, EVEX_CD8<64, CD8VT1>, VEX_LIG, VEX_W, T8PD, EVEX_4V;
9430}
9431
9432multiclass avx512_vgetexpsh<bits<8> opc, string OpcodeStr, SDNode OpNode,
9433                        SDNode OpNodeSAE, X86FoldableSchedWrite sched> {
9434  let Predicates = [HasFP16] in
9435  defm SHZ : avx512_fp28_s<opc, OpcodeStr#"sh", f16x_info, OpNode,  OpNodeSAE, sched>,
9436               EVEX_CD8<16, CD8VT1>, T_MAP6PD, EVEX_4V;
9437}
9438
9439let Predicates = [HasERI] in {
9440  defm VRCP28   : avx512_eri_s<0xCB, "vrcp28", X86rcp28s, X86rcp28SAEs,
9441                               SchedWriteFRcp.Scl>;
9442  defm VRSQRT28 : avx512_eri_s<0xCD, "vrsqrt28", X86rsqrt28s, X86rsqrt28SAEs,
9443                               SchedWriteFRsqrt.Scl>;
9444}
9445
9446defm VGETEXP   : avx512_eri_s<0x43, "vgetexp", X86fgetexps, X86fgetexpSAEs,
9447                              SchedWriteFRnd.Scl>,
9448                 avx512_vgetexpsh<0x43, "vgetexp", X86fgetexps, X86fgetexpSAEs,
9449                                  SchedWriteFRnd.Scl>;
9450/// avx512_fp28_p rcp28ps, rcp28pd, rsqrt28ps, rsqrt28pd
9451
9452multiclass avx512_fp28_p<bits<8> opc, string OpcodeStr, X86VectorVTInfo _,
9453                         SDNode OpNode, X86FoldableSchedWrite sched> {
9454  let ExeDomain = _.ExeDomain, Uses = [MXCSR], mayRaiseFPException = 1 in {
9455  defm r : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
9456                         (ins _.RC:$src), OpcodeStr, "$src", "$src",
9457                         (OpNode (_.VT _.RC:$src))>,
9458                         Sched<[sched]>;
9459
9460  defm m : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
9461                         (ins _.MemOp:$src), OpcodeStr, "$src", "$src",
9462                         (OpNode (_.VT
9463                             (bitconvert (_.LdFrag addr:$src))))>,
9464                          Sched<[sched.Folded, sched.ReadAfterFold]>;
9465
9466  defm mb : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
9467                         (ins _.ScalarMemOp:$src), OpcodeStr,
9468                         "${src}"#_.BroadcastStr, "${src}"#_.BroadcastStr,
9469                         (OpNode (_.VT
9470                                  (_.BroadcastLdFrag addr:$src)))>,
9471                         EVEX_B, Sched<[sched.Folded, sched.ReadAfterFold]>;
9472  }
9473}
9474multiclass avx512_fp28_p_sae<bits<8> opc, string OpcodeStr, X86VectorVTInfo _,
9475                         SDNode OpNode, X86FoldableSchedWrite sched> {
9476  let ExeDomain = _.ExeDomain, Uses = [MXCSR] in
9477  defm rb : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
9478                        (ins _.RC:$src), OpcodeStr,
9479                        "{sae}, $src", "$src, {sae}",
9480                        (OpNode (_.VT _.RC:$src))>,
9481                        EVEX_B, Sched<[sched]>;
9482}
9483
9484multiclass  avx512_eri<bits<8> opc, string OpcodeStr, SDNode OpNode,
9485                       SDNode OpNodeSAE, X86SchedWriteWidths sched> {
9486   defm PSZ : avx512_fp28_p<opc, OpcodeStr#"ps", v16f32_info, OpNode, sched.ZMM>,
9487              avx512_fp28_p_sae<opc, OpcodeStr#"ps", v16f32_info, OpNodeSAE, sched.ZMM>,
9488              T8PD, EVEX_V512, EVEX_CD8<32, CD8VF>;
9489   defm PDZ : avx512_fp28_p<opc, OpcodeStr#"pd", v8f64_info, OpNode, sched.ZMM>,
9490              avx512_fp28_p_sae<opc, OpcodeStr#"pd", v8f64_info, OpNodeSAE, sched.ZMM>,
9491              T8PD, EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>;
9492}
9493
9494multiclass avx512_fp_unaryop_packed<bits<8> opc, string OpcodeStr,
9495                                  SDNode OpNode, X86SchedWriteWidths sched> {
9496  // Define only if AVX512VL feature is present.
9497  let Predicates = [HasVLX] in {
9498    defm PSZ128 : avx512_fp28_p<opc, OpcodeStr#"ps", v4f32x_info, OpNode,
9499                                sched.XMM>,
9500                                EVEX_V128, T8PD, EVEX_CD8<32, CD8VF>;
9501    defm PSZ256 : avx512_fp28_p<opc, OpcodeStr#"ps", v8f32x_info, OpNode,
9502                                sched.YMM>,
9503                                EVEX_V256, T8PD, EVEX_CD8<32, CD8VF>;
9504    defm PDZ128 : avx512_fp28_p<opc, OpcodeStr#"pd", v2f64x_info, OpNode,
9505                                sched.XMM>,
9506                                EVEX_V128, VEX_W, T8PD, EVEX_CD8<64, CD8VF>;
9507    defm PDZ256 : avx512_fp28_p<opc, OpcodeStr#"pd", v4f64x_info, OpNode,
9508                                sched.YMM>,
9509                                EVEX_V256, VEX_W, T8PD, EVEX_CD8<64, CD8VF>;
9510  }
9511}
9512
9513multiclass  avx512_vgetexp_fp16<bits<8> opc, string OpcodeStr, SDNode OpNode,
9514                       SDNode OpNodeSAE, X86SchedWriteWidths sched> {
9515  let Predicates = [HasFP16] in
9516  defm PHZ : avx512_fp28_p<opc, OpcodeStr#"ph", v32f16_info, OpNode, sched.ZMM>,
9517              avx512_fp28_p_sae<opc, OpcodeStr#"ph", v32f16_info, OpNodeSAE, sched.ZMM>,
9518              T_MAP6PD, EVEX_V512, EVEX_CD8<16, CD8VF>;
9519  let Predicates = [HasFP16, HasVLX] in {
9520    defm PHZ128 : avx512_fp28_p<opc, OpcodeStr#"ph", v8f16x_info, OpNode, sched.XMM>,
9521                                     EVEX_V128, T_MAP6PD, EVEX_CD8<16, CD8VF>;
9522    defm PHZ256 : avx512_fp28_p<opc, OpcodeStr#"ph", v16f16x_info, OpNode, sched.YMM>,
9523                                     EVEX_V256, T_MAP6PD, EVEX_CD8<16, CD8VF>;
9524  }
9525}
9526let Predicates = [HasERI] in {
9527 defm VRSQRT28 : avx512_eri<0xCC, "vrsqrt28", X86rsqrt28, X86rsqrt28SAE,
9528                            SchedWriteFRsqrt>, EVEX;
9529 defm VRCP28   : avx512_eri<0xCA, "vrcp28", X86rcp28, X86rcp28SAE,
9530                            SchedWriteFRcp>, EVEX;
9531 defm VEXP2    : avx512_eri<0xC8, "vexp2", X86exp2, X86exp2SAE,
9532                            SchedWriteFAdd>, EVEX;
9533}
9534defm VGETEXP   : avx512_eri<0x42, "vgetexp", X86fgetexp, X86fgetexpSAE,
9535                            SchedWriteFRnd>,
9536                 avx512_vgetexp_fp16<0x42, "vgetexp", X86fgetexp, X86fgetexpSAE,
9537                                     SchedWriteFRnd>,
9538                 avx512_fp_unaryop_packed<0x42, "vgetexp", X86fgetexp,
9539                                          SchedWriteFRnd>, EVEX;
9540
9541multiclass avx512_sqrt_packed_round<bits<8> opc, string OpcodeStr,
9542                                    X86FoldableSchedWrite sched, X86VectorVTInfo _>{
9543  let ExeDomain = _.ExeDomain in
9544  defm rb: AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
9545                         (ins _.RC:$src, AVX512RC:$rc), OpcodeStr, "$rc, $src", "$src, $rc",
9546                         (_.VT (X86fsqrtRnd _.RC:$src, (i32 timm:$rc)))>,
9547                         EVEX, EVEX_B, EVEX_RC, Sched<[sched]>;
9548}
9549
9550multiclass avx512_sqrt_packed<bits<8> opc, string OpcodeStr,
9551                              X86FoldableSchedWrite sched, X86VectorVTInfo _>{
9552  let ExeDomain = _.ExeDomain, Uses = [MXCSR], mayRaiseFPException = 1 in {
9553  defm r: AVX512_maskable_split<opc, MRMSrcReg, _, (outs _.RC:$dst),
9554                         (ins _.RC:$src), OpcodeStr, "$src", "$src",
9555                         (_.VT (any_fsqrt _.RC:$src)),
9556                         (_.VT (fsqrt _.RC:$src))>, EVEX,
9557                         Sched<[sched]>;
9558  defm m: AVX512_maskable_split<opc, MRMSrcMem, _, (outs _.RC:$dst),
9559                         (ins _.MemOp:$src), OpcodeStr, "$src", "$src",
9560                         (any_fsqrt (_.VT (_.LdFrag addr:$src))),
9561                         (fsqrt (_.VT (_.LdFrag addr:$src)))>, EVEX,
9562                         Sched<[sched.Folded, sched.ReadAfterFold]>;
9563  defm mb: AVX512_maskable_split<opc, MRMSrcMem, _, (outs _.RC:$dst),
9564                          (ins _.ScalarMemOp:$src), OpcodeStr,
9565                          "${src}"#_.BroadcastStr, "${src}"#_.BroadcastStr,
9566                          (any_fsqrt (_.VT (_.BroadcastLdFrag addr:$src))),
9567                          (fsqrt (_.VT (_.BroadcastLdFrag addr:$src)))>,
9568                          EVEX, EVEX_B, Sched<[sched.Folded, sched.ReadAfterFold]>;
9569  }
9570}
9571
9572let Uses = [MXCSR], mayRaiseFPException = 1 in
9573multiclass avx512_sqrt_packed_all<bits<8> opc, string OpcodeStr,
9574                                  X86SchedWriteSizes sched> {
9575  let Predicates = [HasFP16] in
9576  defm PHZ : avx512_sqrt_packed<opc, !strconcat(OpcodeStr, "ph"),
9577                                sched.PH.ZMM, v32f16_info>,
9578                                EVEX_V512, T_MAP5PS, EVEX_CD8<16, CD8VF>;
9579  let Predicates = [HasFP16, HasVLX] in {
9580    defm PHZ128 : avx512_sqrt_packed<opc, !strconcat(OpcodeStr, "ph"),
9581                                     sched.PH.XMM, v8f16x_info>,
9582                                     EVEX_V128, T_MAP5PS, EVEX_CD8<16, CD8VF>;
9583    defm PHZ256 : avx512_sqrt_packed<opc, !strconcat(OpcodeStr, "ph"),
9584                                     sched.PH.YMM, v16f16x_info>,
9585                                     EVEX_V256, T_MAP5PS, EVEX_CD8<16, CD8VF>;
9586  }
9587  defm PSZ : avx512_sqrt_packed<opc, !strconcat(OpcodeStr, "ps"),
9588                                sched.PS.ZMM, v16f32_info>,
9589                                EVEX_V512, PS, EVEX_CD8<32, CD8VF>;
9590  defm PDZ : avx512_sqrt_packed<opc, !strconcat(OpcodeStr, "pd"),
9591                                sched.PD.ZMM, v8f64_info>,
9592                                EVEX_V512, VEX_W, PD, EVEX_CD8<64, CD8VF>;
9593  // Define only if AVX512VL feature is present.
9594  let Predicates = [HasVLX] in {
9595    defm PSZ128 : avx512_sqrt_packed<opc, !strconcat(OpcodeStr, "ps"),
9596                                     sched.PS.XMM, v4f32x_info>,
9597                                     EVEX_V128, PS, EVEX_CD8<32, CD8VF>;
9598    defm PSZ256 : avx512_sqrt_packed<opc, !strconcat(OpcodeStr, "ps"),
9599                                     sched.PS.YMM, v8f32x_info>,
9600                                     EVEX_V256, PS, EVEX_CD8<32, CD8VF>;
9601    defm PDZ128 : avx512_sqrt_packed<opc, !strconcat(OpcodeStr, "pd"),
9602                                     sched.PD.XMM, v2f64x_info>,
9603                                     EVEX_V128, VEX_W, PD, EVEX_CD8<64, CD8VF>;
9604    defm PDZ256 : avx512_sqrt_packed<opc, !strconcat(OpcodeStr, "pd"),
9605                                     sched.PD.YMM, v4f64x_info>,
9606                                     EVEX_V256, VEX_W, PD, EVEX_CD8<64, CD8VF>;
9607  }
9608}
9609
9610let Uses = [MXCSR] in
9611multiclass avx512_sqrt_packed_all_round<bits<8> opc, string OpcodeStr,
9612                                        X86SchedWriteSizes sched> {
9613  let Predicates = [HasFP16] in
9614  defm PHZ : avx512_sqrt_packed_round<opc, !strconcat(OpcodeStr, "ph"),
9615                                      sched.PH.ZMM, v32f16_info>,
9616                                      EVEX_V512, T_MAP5PS, EVEX_CD8<16, CD8VF>;
9617  defm PSZ : avx512_sqrt_packed_round<opc, !strconcat(OpcodeStr, "ps"),
9618                                      sched.PS.ZMM, v16f32_info>,
9619                                      EVEX_V512, PS, EVEX_CD8<32, CD8VF>;
9620  defm PDZ : avx512_sqrt_packed_round<opc, !strconcat(OpcodeStr, "pd"),
9621                                      sched.PD.ZMM, v8f64_info>,
9622                                      EVEX_V512, VEX_W, PD, EVEX_CD8<64, CD8VF>;
9623}
9624
9625multiclass avx512_sqrt_scalar<bits<8> opc, string OpcodeStr, X86FoldableSchedWrite sched,
9626                              X86VectorVTInfo _, string Name, Predicate prd = HasAVX512> {
9627  let ExeDomain = _.ExeDomain, Predicates = [prd] in {
9628    defm r_Int : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
9629                         (ins _.RC:$src1, _.RC:$src2), OpcodeStr,
9630                         "$src2, $src1", "$src1, $src2",
9631                         (X86fsqrts (_.VT _.RC:$src1),
9632                                    (_.VT _.RC:$src2))>,
9633                         Sched<[sched]>, SIMD_EXC;
9634    defm m_Int : AVX512_maskable_scalar<opc, MRMSrcMem, _, (outs _.RC:$dst),
9635                         (ins _.RC:$src1, _.IntScalarMemOp:$src2), OpcodeStr,
9636                         "$src2, $src1", "$src1, $src2",
9637                         (X86fsqrts (_.VT _.RC:$src1),
9638                                    (_.ScalarIntMemFrags addr:$src2))>,
9639                         Sched<[sched.Folded, sched.ReadAfterFold]>, SIMD_EXC;
9640    let Uses = [MXCSR] in
9641    defm rb_Int : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
9642                         (ins _.RC:$src1, _.RC:$src2, AVX512RC:$rc), OpcodeStr,
9643                         "$rc, $src2, $src1", "$src1, $src2, $rc",
9644                         (X86fsqrtRnds (_.VT _.RC:$src1),
9645                                     (_.VT _.RC:$src2),
9646                                     (i32 timm:$rc))>,
9647                         EVEX_B, EVEX_RC, Sched<[sched]>;
9648
9649    let isCodeGenOnly = 1, hasSideEffects = 0 in {
9650      def r : I<opc, MRMSrcReg, (outs _.FRC:$dst),
9651                (ins _.FRC:$src1, _.FRC:$src2),
9652                OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}", []>,
9653                Sched<[sched]>, SIMD_EXC;
9654      let mayLoad = 1 in
9655        def m : I<opc, MRMSrcMem, (outs _.FRC:$dst),
9656                  (ins _.FRC:$src1, _.ScalarMemOp:$src2),
9657                  OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}", []>,
9658                  Sched<[sched.Folded, sched.ReadAfterFold]>, SIMD_EXC;
9659    }
9660  }
9661
9662  let Predicates = [prd] in {
9663    def : Pat<(_.EltVT (any_fsqrt _.FRC:$src)),
9664              (!cast<Instruction>(Name#Zr)
9665                  (_.EltVT (IMPLICIT_DEF)), _.FRC:$src)>;
9666  }
9667
9668  let Predicates = [prd, OptForSize] in {
9669    def : Pat<(_.EltVT (any_fsqrt (load addr:$src))),
9670              (!cast<Instruction>(Name#Zm)
9671                  (_.EltVT (IMPLICIT_DEF)), addr:$src)>;
9672  }
9673}
9674
9675multiclass avx512_sqrt_scalar_all<bits<8> opc, string OpcodeStr,
9676                                  X86SchedWriteSizes sched> {
9677  defm SHZ : avx512_sqrt_scalar<opc, OpcodeStr#"sh", sched.PH.Scl, f16x_info, NAME#"SH", HasFP16>,
9678                        EVEX_CD8<16, CD8VT1>, EVEX_4V, T_MAP5XS;
9679  defm SSZ : avx512_sqrt_scalar<opc, OpcodeStr#"ss", sched.PS.Scl, f32x_info, NAME#"SS">,
9680                        EVEX_CD8<32, CD8VT1>, EVEX_4V, XS;
9681  defm SDZ : avx512_sqrt_scalar<opc, OpcodeStr#"sd", sched.PD.Scl, f64x_info, NAME#"SD">,
9682                        EVEX_CD8<64, CD8VT1>, EVEX_4V, XD, VEX_W;
9683}
9684
9685defm VSQRT : avx512_sqrt_packed_all<0x51, "vsqrt", SchedWriteFSqrtSizes>,
9686             avx512_sqrt_packed_all_round<0x51, "vsqrt", SchedWriteFSqrtSizes>;
9687
9688defm VSQRT : avx512_sqrt_scalar_all<0x51, "vsqrt", SchedWriteFSqrtSizes>, VEX_LIG;
9689
9690multiclass avx512_rndscale_scalar<bits<8> opc, string OpcodeStr,
9691                                  X86FoldableSchedWrite sched, X86VectorVTInfo _> {
9692  let ExeDomain = _.ExeDomain in {
9693  defm r_Int : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
9694                           (ins _.RC:$src1, _.RC:$src2, i32u8imm:$src3), OpcodeStr,
9695                           "$src3, $src2, $src1", "$src1, $src2, $src3",
9696                           (_.VT (X86RndScales (_.VT _.RC:$src1), (_.VT _.RC:$src2),
9697                           (i32 timm:$src3)))>,
9698                           Sched<[sched]>, SIMD_EXC;
9699
9700  let Uses = [MXCSR] in
9701  defm rb_Int : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
9702                         (ins _.RC:$src1, _.RC:$src2, i32u8imm:$src3), OpcodeStr,
9703                         "$src3, {sae}, $src2, $src1", "$src1, $src2, {sae}, $src3",
9704                         (_.VT (X86RndScalesSAE (_.VT _.RC:$src1), (_.VT _.RC:$src2),
9705                         (i32 timm:$src3)))>, EVEX_B,
9706                         Sched<[sched]>;
9707
9708  defm m_Int : AVX512_maskable_scalar<opc, MRMSrcMem, _, (outs _.RC:$dst),
9709                         (ins _.RC:$src1, _.IntScalarMemOp:$src2, i32u8imm:$src3),
9710                         OpcodeStr,
9711                         "$src3, $src2, $src1", "$src1, $src2, $src3",
9712                         (_.VT (X86RndScales _.RC:$src1,
9713                                (_.ScalarIntMemFrags addr:$src2), (i32 timm:$src3)))>,
9714                         Sched<[sched.Folded, sched.ReadAfterFold]>, SIMD_EXC;
9715
9716  let isCodeGenOnly = 1, hasSideEffects = 0, Predicates = [HasAVX512] in {
9717    def r : I<opc, MRMSrcReg, (outs _.FRC:$dst),
9718               (ins _.FRC:$src1, _.FRC:$src2, i32u8imm:$src3),
9719               OpcodeStr#"\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}",
9720               []>, Sched<[sched]>, SIMD_EXC;
9721
9722    let mayLoad = 1 in
9723      def m : I<opc, MRMSrcMem, (outs _.FRC:$dst),
9724                 (ins _.FRC:$src1, _.ScalarMemOp:$src2, i32u8imm:$src3),
9725                 OpcodeStr#"\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}",
9726                 []>, Sched<[sched.Folded, sched.ReadAfterFold]>, SIMD_EXC;
9727  }
9728  }
9729
9730  let Predicates = [HasAVX512] in {
9731    def : Pat<(X86any_VRndScale _.FRC:$src1, timm:$src2),
9732              (_.EltVT (!cast<Instruction>(NAME#r) (_.EltVT (IMPLICIT_DEF)),
9733               _.FRC:$src1, timm:$src2))>;
9734  }
9735
9736  let Predicates = [HasAVX512, OptForSize] in {
9737    def : Pat<(X86any_VRndScale (_.ScalarLdFrag addr:$src1), timm:$src2),
9738              (_.EltVT (!cast<Instruction>(NAME#m) (_.EltVT (IMPLICIT_DEF)),
9739               addr:$src1, timm:$src2))>;
9740  }
9741}
9742
9743let Predicates = [HasFP16] in
9744defm VRNDSCALESHZ : avx512_rndscale_scalar<0x0A, "vrndscalesh",
9745                                           SchedWriteFRnd.Scl, f16x_info>,
9746                                           AVX512PSIi8Base, TA, EVEX_4V,
9747                                           EVEX_CD8<16, CD8VT1>;
9748
9749defm VRNDSCALESSZ : avx512_rndscale_scalar<0x0A, "vrndscaless",
9750                                           SchedWriteFRnd.Scl, f32x_info>,
9751                                           AVX512AIi8Base, EVEX_4V, VEX_LIG,
9752                                           EVEX_CD8<32, CD8VT1>;
9753
9754defm VRNDSCALESDZ : avx512_rndscale_scalar<0x0B, "vrndscalesd",
9755                                           SchedWriteFRnd.Scl, f64x_info>,
9756                                           VEX_W, AVX512AIi8Base, EVEX_4V, VEX_LIG,
9757                                           EVEX_CD8<64, CD8VT1>;
9758
9759multiclass avx512_masked_scalar<SDNode OpNode, string OpcPrefix, SDNode Move,
9760                                dag Mask, X86VectorVTInfo _, PatLeaf ZeroFP,
9761                                dag OutMask, Predicate BasePredicate> {
9762  let Predicates = [BasePredicate] in {
9763    def : Pat<(Move _.VT:$src1, (scalar_to_vector (X86selects_mask Mask,
9764               (OpNode (extractelt _.VT:$src2, (iPTR 0))),
9765               (extractelt _.VT:$dst, (iPTR 0))))),
9766              (!cast<Instruction>("V"#OpcPrefix#r_Intk)
9767               _.VT:$dst, OutMask, _.VT:$src2, _.VT:$src1)>;
9768
9769    def : Pat<(Move _.VT:$src1, (scalar_to_vector (X86selects_mask Mask,
9770               (OpNode (extractelt _.VT:$src2, (iPTR 0))),
9771               ZeroFP))),
9772              (!cast<Instruction>("V"#OpcPrefix#r_Intkz)
9773               OutMask, _.VT:$src2, _.VT:$src1)>;
9774  }
9775}
9776
9777defm : avx512_masked_scalar<fsqrt, "SQRTSHZ", X86Movsh,
9778                            (v1i1 (scalar_to_vector (i8 (trunc (i32 GR32:$mask))))), v8f16x_info,
9779                            fp16imm0, (COPY_TO_REGCLASS  $mask, VK1WM), HasFP16>;
9780defm : avx512_masked_scalar<fsqrt, "SQRTSSZ", X86Movss,
9781                            (v1i1 (scalar_to_vector (i8 (trunc (i32 GR32:$mask))))), v4f32x_info,
9782                            fp32imm0, (COPY_TO_REGCLASS  $mask, VK1WM), HasAVX512>;
9783defm : avx512_masked_scalar<fsqrt, "SQRTSDZ", X86Movsd,
9784                            (v1i1 (scalar_to_vector (i8 (trunc (i32 GR32:$mask))))), v2f64x_info,
9785                            fp64imm0, (COPY_TO_REGCLASS  $mask, VK1WM), HasAVX512>;
9786
9787
9788//-------------------------------------------------
9789// Integer truncate and extend operations
9790//-------------------------------------------------
9791
9792// PatFrags that contain a select and a truncate op. The take operands in the
9793// same order as X86vmtrunc, X86vmtruncs, X86vmtruncus. This allows us to pass
9794// either to the multiclasses.
9795def select_trunc : PatFrag<(ops node:$src, node:$src0, node:$mask),
9796                           (vselect_mask node:$mask,
9797                                         (trunc node:$src), node:$src0)>;
9798def select_truncs : PatFrag<(ops node:$src, node:$src0, node:$mask),
9799                            (vselect_mask node:$mask,
9800                                          (X86vtruncs node:$src), node:$src0)>;
9801def select_truncus : PatFrag<(ops node:$src, node:$src0, node:$mask),
9802                             (vselect_mask node:$mask,
9803                                           (X86vtruncus node:$src), node:$src0)>;
9804
9805multiclass avx512_trunc_common<bits<8> opc, string OpcodeStr, SDNode OpNode,
9806                              SDPatternOperator MaskNode,
9807                              X86FoldableSchedWrite sched, X86VectorVTInfo SrcInfo,
9808                              X86VectorVTInfo DestInfo, X86MemOperand x86memop> {
9809  let ExeDomain = DestInfo.ExeDomain in {
9810  def rr : AVX512XS8I<opc, MRMDestReg, (outs DestInfo.RC:$dst),
9811             (ins SrcInfo.RC:$src),
9812             OpcodeStr # "\t{$src, $dst|$dst, $src}",
9813             [(set DestInfo.RC:$dst,
9814                   (DestInfo.VT (OpNode (SrcInfo.VT SrcInfo.RC:$src))))]>,
9815             EVEX, Sched<[sched]>;
9816  let Constraints = "$src0 = $dst" in
9817  def rrk : AVX512XS8I<opc, MRMDestReg, (outs DestInfo.RC:$dst),
9818             (ins DestInfo.RC:$src0, SrcInfo.KRCWM:$mask, SrcInfo.RC:$src),
9819             OpcodeStr # "\t{$src, $dst {${mask}}|$dst {${mask}}, $src}",
9820             [(set DestInfo.RC:$dst,
9821                   (MaskNode (SrcInfo.VT SrcInfo.RC:$src),
9822                             (DestInfo.VT DestInfo.RC:$src0),
9823                             SrcInfo.KRCWM:$mask))]>,
9824             EVEX, EVEX_K, Sched<[sched]>;
9825  def rrkz : AVX512XS8I<opc, MRMDestReg, (outs DestInfo.RC:$dst),
9826             (ins SrcInfo.KRCWM:$mask, SrcInfo.RC:$src),
9827             OpcodeStr # "\t{$src, $dst {${mask}} {z}|$dst {${mask}} {z}, $src}",
9828             [(set DestInfo.RC:$dst,
9829                   (DestInfo.VT (MaskNode (SrcInfo.VT SrcInfo.RC:$src),
9830                             DestInfo.ImmAllZerosV, SrcInfo.KRCWM:$mask)))]>,
9831             EVEX, EVEX_KZ, Sched<[sched]>;
9832  }
9833
9834  let mayStore = 1, hasSideEffects = 0, ExeDomain = DestInfo.ExeDomain in {
9835    def mr : AVX512XS8I<opc, MRMDestMem, (outs),
9836               (ins x86memop:$dst, SrcInfo.RC:$src),
9837               OpcodeStr # "\t{$src, $dst|$dst, $src}", []>,
9838               EVEX, Sched<[sched.Folded]>;
9839
9840    def mrk : AVX512XS8I<opc, MRMDestMem, (outs),
9841               (ins x86memop:$dst, SrcInfo.KRCWM:$mask, SrcInfo.RC:$src),
9842               OpcodeStr # "\t{$src, $dst {${mask}}|$dst {${mask}}, $src}", []>,
9843               EVEX, EVEX_K, Sched<[sched.Folded]>, NotMemoryFoldable;
9844  }//mayStore = 1, hasSideEffects = 0
9845}
9846
9847multiclass avx512_trunc_mr_lowering<X86VectorVTInfo SrcInfo,
9848                                    PatFrag truncFrag, PatFrag mtruncFrag,
9849                                    string Name> {
9850
9851  def : Pat<(truncFrag (SrcInfo.VT SrcInfo.RC:$src), addr:$dst),
9852            (!cast<Instruction>(Name#SrcInfo.ZSuffix#mr)
9853                                    addr:$dst, SrcInfo.RC:$src)>;
9854
9855  def : Pat<(mtruncFrag (SrcInfo.VT SrcInfo.RC:$src), addr:$dst,
9856                        SrcInfo.KRCWM:$mask),
9857            (!cast<Instruction>(Name#SrcInfo.ZSuffix#mrk)
9858                            addr:$dst, SrcInfo.KRCWM:$mask, SrcInfo.RC:$src)>;
9859}
9860
9861multiclass avx512_trunc<bits<8> opc, string OpcodeStr, SDNode OpNode128,
9862                        SDNode OpNode256, SDNode OpNode512,
9863                        SDPatternOperator MaskNode128,
9864                        SDPatternOperator MaskNode256,
9865                        SDPatternOperator MaskNode512,
9866                        X86FoldableSchedWrite sched,
9867                        AVX512VLVectorVTInfo VTSrcInfo,
9868                        X86VectorVTInfo DestInfoZ128,
9869                        X86VectorVTInfo DestInfoZ256, X86VectorVTInfo DestInfoZ,
9870                        X86MemOperand x86memopZ128, X86MemOperand x86memopZ256,
9871                        X86MemOperand x86memopZ, PatFrag truncFrag,
9872                        PatFrag mtruncFrag, Predicate prd = HasAVX512>{
9873
9874  let Predicates = [HasVLX, prd] in {
9875    defm Z128:  avx512_trunc_common<opc, OpcodeStr, OpNode128, MaskNode128, sched,
9876                             VTSrcInfo.info128, DestInfoZ128, x86memopZ128>,
9877                avx512_trunc_mr_lowering<VTSrcInfo.info128, truncFrag,
9878                                         mtruncFrag, NAME>, EVEX_V128;
9879
9880    defm Z256:  avx512_trunc_common<opc, OpcodeStr, OpNode256, MaskNode256, sched,
9881                             VTSrcInfo.info256, DestInfoZ256, x86memopZ256>,
9882                avx512_trunc_mr_lowering<VTSrcInfo.info256, truncFrag,
9883                                         mtruncFrag, NAME>, EVEX_V256;
9884  }
9885  let Predicates = [prd] in
9886    defm Z:     avx512_trunc_common<opc, OpcodeStr, OpNode512, MaskNode512, sched,
9887                             VTSrcInfo.info512, DestInfoZ, x86memopZ>,
9888                avx512_trunc_mr_lowering<VTSrcInfo.info512, truncFrag,
9889                                         mtruncFrag, NAME>, EVEX_V512;
9890}
9891
9892multiclass avx512_trunc_qb<bits<8> opc, string OpcodeStr,
9893                           X86FoldableSchedWrite sched, PatFrag StoreNode,
9894                           PatFrag MaskedStoreNode, SDNode InVecNode,
9895                           SDPatternOperator InVecMaskNode> {
9896  defm NAME: avx512_trunc<opc, OpcodeStr, InVecNode, InVecNode, InVecNode,
9897                          InVecMaskNode, InVecMaskNode, InVecMaskNode, sched,
9898                          avx512vl_i64_info, v16i8x_info, v16i8x_info,
9899                          v16i8x_info, i16mem, i32mem, i64mem, StoreNode,
9900                          MaskedStoreNode>, EVEX_CD8<8, CD8VO>;
9901}
9902
9903multiclass avx512_trunc_qw<bits<8> opc, string OpcodeStr, SDNode OpNode,
9904                           SDPatternOperator MaskNode,
9905                           X86FoldableSchedWrite sched, PatFrag StoreNode,
9906                           PatFrag MaskedStoreNode, SDNode InVecNode,
9907                           SDPatternOperator InVecMaskNode> {
9908  defm NAME: avx512_trunc<opc, OpcodeStr, InVecNode, InVecNode, OpNode,
9909                          InVecMaskNode, InVecMaskNode, MaskNode, sched,
9910                          avx512vl_i64_info, v8i16x_info, v8i16x_info,
9911                          v8i16x_info, i32mem, i64mem, i128mem, StoreNode,
9912                          MaskedStoreNode>, EVEX_CD8<16, CD8VQ>;
9913}
9914
9915multiclass avx512_trunc_qd<bits<8> opc, string OpcodeStr, SDNode OpNode,
9916                           SDPatternOperator MaskNode,
9917                           X86FoldableSchedWrite sched, PatFrag StoreNode,
9918                           PatFrag MaskedStoreNode, SDNode InVecNode,
9919                           SDPatternOperator InVecMaskNode> {
9920  defm NAME: avx512_trunc<opc, OpcodeStr, InVecNode, OpNode, OpNode,
9921                          InVecMaskNode, MaskNode, MaskNode, sched,
9922                          avx512vl_i64_info, v4i32x_info, v4i32x_info,
9923                          v8i32x_info, i64mem, i128mem, i256mem, StoreNode,
9924                          MaskedStoreNode>, EVEX_CD8<32, CD8VH>;
9925}
9926
9927multiclass avx512_trunc_db<bits<8> opc, string OpcodeStr, SDNode OpNode,
9928                           SDPatternOperator MaskNode,
9929                           X86FoldableSchedWrite sched, PatFrag StoreNode,
9930                           PatFrag MaskedStoreNode, SDNode InVecNode,
9931                           SDPatternOperator InVecMaskNode> {
9932  defm NAME: avx512_trunc<opc, OpcodeStr, InVecNode, InVecNode, OpNode,
9933                          InVecMaskNode, InVecMaskNode, MaskNode, sched,
9934                          avx512vl_i32_info, v16i8x_info, v16i8x_info,
9935                          v16i8x_info, i32mem, i64mem, i128mem, StoreNode,
9936                          MaskedStoreNode>, EVEX_CD8<8, CD8VQ>;
9937}
9938
9939multiclass avx512_trunc_dw<bits<8> opc, string OpcodeStr, SDNode OpNode,
9940                           SDPatternOperator MaskNode,
9941                           X86FoldableSchedWrite sched, PatFrag StoreNode,
9942                           PatFrag MaskedStoreNode, SDNode InVecNode,
9943                           SDPatternOperator InVecMaskNode> {
9944  defm NAME: avx512_trunc<opc, OpcodeStr, InVecNode, OpNode, OpNode,
9945                          InVecMaskNode, MaskNode, MaskNode, sched,
9946                          avx512vl_i32_info, v8i16x_info, v8i16x_info,
9947                          v16i16x_info, i64mem, i128mem, i256mem, StoreNode,
9948                          MaskedStoreNode>, EVEX_CD8<16, CD8VH>;
9949}
9950
9951multiclass avx512_trunc_wb<bits<8> opc, string OpcodeStr, SDNode OpNode,
9952                           SDPatternOperator MaskNode,
9953                           X86FoldableSchedWrite sched, PatFrag StoreNode,
9954                           PatFrag MaskedStoreNode, SDNode InVecNode,
9955                           SDPatternOperator InVecMaskNode> {
9956  defm NAME: avx512_trunc<opc, OpcodeStr, InVecNode, OpNode, OpNode,
9957                          InVecMaskNode, MaskNode, MaskNode, sched,
9958                          avx512vl_i16_info, v16i8x_info, v16i8x_info,
9959                          v32i8x_info, i64mem, i128mem, i256mem, StoreNode,
9960                          MaskedStoreNode, HasBWI>, EVEX_CD8<16, CD8VH>;
9961}
9962
9963defm VPMOVQB    : avx512_trunc_qb<0x32, "vpmovqb",
9964                                  WriteVPMOV256, truncstorevi8,
9965                                  masked_truncstorevi8, X86vtrunc, X86vmtrunc>;
9966defm VPMOVSQB   : avx512_trunc_qb<0x22, "vpmovsqb",
9967                                  WriteVPMOV256, truncstore_s_vi8,
9968                                  masked_truncstore_s_vi8, X86vtruncs,
9969                                  X86vmtruncs>;
9970defm VPMOVUSQB  : avx512_trunc_qb<0x12, "vpmovusqb",
9971                                  WriteVPMOV256, truncstore_us_vi8,
9972                                  masked_truncstore_us_vi8, X86vtruncus, X86vmtruncus>;
9973
9974defm VPMOVQW    : avx512_trunc_qw<0x34, "vpmovqw", trunc, select_trunc,
9975                                  WriteVPMOV256, truncstorevi16,
9976                                  masked_truncstorevi16, X86vtrunc, X86vmtrunc>;
9977defm VPMOVSQW   : avx512_trunc_qw<0x24, "vpmovsqw",  X86vtruncs, select_truncs,
9978                                  WriteVPMOV256, truncstore_s_vi16,
9979                                  masked_truncstore_s_vi16, X86vtruncs,
9980                                  X86vmtruncs>;
9981defm VPMOVUSQW  : avx512_trunc_qw<0x14, "vpmovusqw", X86vtruncus,
9982                                  select_truncus, WriteVPMOV256,
9983                                  truncstore_us_vi16, masked_truncstore_us_vi16,
9984                                  X86vtruncus, X86vmtruncus>;
9985
9986defm VPMOVQD    : avx512_trunc_qd<0x35, "vpmovqd", trunc, select_trunc,
9987                                  WriteVPMOV256, truncstorevi32,
9988                                  masked_truncstorevi32, X86vtrunc, X86vmtrunc>;
9989defm VPMOVSQD   : avx512_trunc_qd<0x25, "vpmovsqd",  X86vtruncs, select_truncs,
9990                                  WriteVPMOV256, truncstore_s_vi32,
9991                                  masked_truncstore_s_vi32, X86vtruncs,
9992                                  X86vmtruncs>;
9993defm VPMOVUSQD  : avx512_trunc_qd<0x15, "vpmovusqd", X86vtruncus,
9994                                  select_truncus, WriteVPMOV256,
9995                                  truncstore_us_vi32, masked_truncstore_us_vi32,
9996                                  X86vtruncus, X86vmtruncus>;
9997
9998defm VPMOVDB    : avx512_trunc_db<0x31, "vpmovdb", trunc, select_trunc,
9999                                  WriteVPMOV256, truncstorevi8,
10000                                  masked_truncstorevi8, X86vtrunc, X86vmtrunc>;
10001defm VPMOVSDB   : avx512_trunc_db<0x21, "vpmovsdb", X86vtruncs, select_truncs,
10002                                  WriteVPMOV256, truncstore_s_vi8,
10003                                  masked_truncstore_s_vi8, X86vtruncs,
10004                                  X86vmtruncs>;
10005defm VPMOVUSDB  : avx512_trunc_db<0x11, "vpmovusdb",  X86vtruncus,
10006                                  select_truncus, WriteVPMOV256,
10007                                  truncstore_us_vi8, masked_truncstore_us_vi8,
10008                                  X86vtruncus, X86vmtruncus>;
10009
10010defm VPMOVDW    : avx512_trunc_dw<0x33, "vpmovdw", trunc, select_trunc,
10011                                  WriteVPMOV256, truncstorevi16,
10012                                  masked_truncstorevi16, X86vtrunc, X86vmtrunc>;
10013defm VPMOVSDW   : avx512_trunc_dw<0x23, "vpmovsdw", X86vtruncs, select_truncs,
10014                                  WriteVPMOV256, truncstore_s_vi16,
10015                                  masked_truncstore_s_vi16, X86vtruncs,
10016                                  X86vmtruncs>;
10017defm VPMOVUSDW  : avx512_trunc_dw<0x13, "vpmovusdw", X86vtruncus,
10018                                  select_truncus, WriteVPMOV256,
10019                                  truncstore_us_vi16, masked_truncstore_us_vi16,
10020                                  X86vtruncus, X86vmtruncus>;
10021
10022defm VPMOVWB    : avx512_trunc_wb<0x30, "vpmovwb", trunc, select_trunc,
10023                                  WriteVPMOV256, truncstorevi8,
10024                                  masked_truncstorevi8, X86vtrunc,
10025                                  X86vmtrunc>;
10026defm VPMOVSWB   : avx512_trunc_wb<0x20, "vpmovswb", X86vtruncs, select_truncs,
10027                                  WriteVPMOV256, truncstore_s_vi8,
10028                                  masked_truncstore_s_vi8, X86vtruncs,
10029                                  X86vmtruncs>;
10030defm VPMOVUSWB  : avx512_trunc_wb<0x10, "vpmovuswb", X86vtruncus,
10031                                  select_truncus, WriteVPMOV256,
10032                                  truncstore_us_vi8, masked_truncstore_us_vi8,
10033                                  X86vtruncus, X86vmtruncus>;
10034
10035let Predicates = [HasAVX512, NoVLX] in {
10036def: Pat<(v8i16 (trunc (v8i32 VR256X:$src))),
10037         (v8i16 (EXTRACT_SUBREG
10038                 (v16i16 (VPMOVDWZrr (v16i32 (INSERT_SUBREG (IMPLICIT_DEF),
10039                                          VR256X:$src, sub_ymm)))), sub_xmm))>;
10040def: Pat<(v4i32 (trunc (v4i64 VR256X:$src))),
10041         (v4i32 (EXTRACT_SUBREG
10042                 (v8i32 (VPMOVQDZrr (v8i64 (INSERT_SUBREG (IMPLICIT_DEF),
10043                                           VR256X:$src, sub_ymm)))), sub_xmm))>;
10044}
10045
10046let Predicates = [HasBWI, NoVLX] in {
10047def: Pat<(v16i8 (trunc (v16i16 VR256X:$src))),
10048         (v16i8 (EXTRACT_SUBREG (VPMOVWBZrr (v32i16 (INSERT_SUBREG (IMPLICIT_DEF),
10049                                            VR256X:$src, sub_ymm))), sub_xmm))>;
10050}
10051
10052// Without BWI we can't use vXi16/vXi8 vselect so we have to use vmtrunc nodes.
10053multiclass mtrunc_lowering<string InstrName, SDNode OpNode,
10054                           X86VectorVTInfo DestInfo,
10055                           X86VectorVTInfo SrcInfo> {
10056  def : Pat<(DestInfo.VT (OpNode (SrcInfo.VT SrcInfo.RC:$src),
10057                                 DestInfo.RC:$src0,
10058                                 SrcInfo.KRCWM:$mask)),
10059            (!cast<Instruction>(InstrName#"rrk") DestInfo.RC:$src0,
10060                                                 SrcInfo.KRCWM:$mask,
10061                                                 SrcInfo.RC:$src)>;
10062
10063  def : Pat<(DestInfo.VT (OpNode (SrcInfo.VT SrcInfo.RC:$src),
10064                                 DestInfo.ImmAllZerosV,
10065                                 SrcInfo.KRCWM:$mask)),
10066            (!cast<Instruction>(InstrName#"rrkz") SrcInfo.KRCWM:$mask,
10067                                                  SrcInfo.RC:$src)>;
10068}
10069
10070let Predicates = [HasVLX] in {
10071defm : mtrunc_lowering<"VPMOVDWZ256", X86vmtrunc, v8i16x_info, v8i32x_info>;
10072defm : mtrunc_lowering<"VPMOVSDWZ256", X86vmtruncs, v8i16x_info, v8i32x_info>;
10073defm : mtrunc_lowering<"VPMOVUSDWZ256", X86vmtruncus, v8i16x_info, v8i32x_info>;
10074}
10075
10076let Predicates = [HasAVX512] in {
10077defm : mtrunc_lowering<"VPMOVDWZ", X86vmtrunc, v16i16x_info, v16i32_info>;
10078defm : mtrunc_lowering<"VPMOVSDWZ", X86vmtruncs, v16i16x_info, v16i32_info>;
10079defm : mtrunc_lowering<"VPMOVUSDWZ", X86vmtruncus, v16i16x_info, v16i32_info>;
10080
10081defm : mtrunc_lowering<"VPMOVDBZ", X86vmtrunc, v16i8x_info, v16i32_info>;
10082defm : mtrunc_lowering<"VPMOVSDBZ", X86vmtruncs, v16i8x_info, v16i32_info>;
10083defm : mtrunc_lowering<"VPMOVUSDBZ", X86vmtruncus, v16i8x_info, v16i32_info>;
10084
10085defm : mtrunc_lowering<"VPMOVQWZ", X86vmtrunc, v8i16x_info, v8i64_info>;
10086defm : mtrunc_lowering<"VPMOVSQWZ", X86vmtruncs, v8i16x_info, v8i64_info>;
10087defm : mtrunc_lowering<"VPMOVUSQWZ", X86vmtruncus, v8i16x_info, v8i64_info>;
10088}
10089
10090multiclass avx512_pmovx_common<bits<8> opc, string OpcodeStr, X86FoldableSchedWrite sched,
10091              X86VectorVTInfo DestInfo, X86VectorVTInfo SrcInfo,
10092              X86MemOperand x86memop, PatFrag LdFrag, SDNode OpNode>{
10093  let ExeDomain = DestInfo.ExeDomain in {
10094  defm rr   : AVX512_maskable<opc, MRMSrcReg, DestInfo, (outs DestInfo.RC:$dst),
10095                    (ins SrcInfo.RC:$src), OpcodeStr ,"$src", "$src",
10096                    (DestInfo.VT (OpNode (SrcInfo.VT SrcInfo.RC:$src)))>,
10097                  EVEX, Sched<[sched]>;
10098
10099  defm rm : AVX512_maskable<opc, MRMSrcMem, DestInfo, (outs DestInfo.RC:$dst),
10100                  (ins x86memop:$src), OpcodeStr ,"$src", "$src",
10101                  (DestInfo.VT (LdFrag addr:$src))>,
10102                EVEX, Sched<[sched.Folded]>;
10103  }
10104}
10105
10106multiclass avx512_pmovx_bw<bits<8> opc, string OpcodeStr,
10107          SDNode OpNode, SDNode InVecNode, string ExtTy,
10108          X86FoldableSchedWrite schedX, X86FoldableSchedWrite schedYZ,
10109          PatFrag LdFrag = !cast<PatFrag>(ExtTy#"extloadvi8")> {
10110  let Predicates = [HasVLX, HasBWI] in {
10111    defm Z128:  avx512_pmovx_common<opc, OpcodeStr, schedX, v8i16x_info,
10112                    v16i8x_info, i64mem, LdFrag, InVecNode>,
10113                     EVEX_CD8<8, CD8VH>, T8PD, EVEX_V128, VEX_WIG;
10114
10115    defm Z256:  avx512_pmovx_common<opc, OpcodeStr, schedYZ, v16i16x_info,
10116                    v16i8x_info, i128mem, LdFrag, OpNode>,
10117                     EVEX_CD8<8, CD8VH>, T8PD, EVEX_V256, VEX_WIG;
10118  }
10119  let Predicates = [HasBWI] in {
10120    defm Z   :  avx512_pmovx_common<opc, OpcodeStr, schedYZ, v32i16_info,
10121                    v32i8x_info, i256mem, LdFrag, OpNode>,
10122                     EVEX_CD8<8, CD8VH>, T8PD, EVEX_V512, VEX_WIG;
10123  }
10124}
10125
10126multiclass avx512_pmovx_bd<bits<8> opc, string OpcodeStr,
10127          SDNode OpNode, SDNode InVecNode, string ExtTy,
10128          X86FoldableSchedWrite schedX, X86FoldableSchedWrite schedYZ,
10129          PatFrag LdFrag = !cast<PatFrag>(ExtTy#"extloadvi8")> {
10130  let Predicates = [HasVLX, HasAVX512] in {
10131    defm Z128:  avx512_pmovx_common<opc, OpcodeStr, schedX, v4i32x_info,
10132                   v16i8x_info, i32mem, LdFrag, InVecNode>,
10133                         EVEX_CD8<8, CD8VQ>, T8PD, EVEX_V128, VEX_WIG;
10134
10135    defm Z256:  avx512_pmovx_common<opc, OpcodeStr, schedYZ, v8i32x_info,
10136                   v16i8x_info, i64mem, LdFrag, InVecNode>,
10137                         EVEX_CD8<8, CD8VQ>, T8PD, EVEX_V256, VEX_WIG;
10138  }
10139  let Predicates = [HasAVX512] in {
10140    defm Z   :  avx512_pmovx_common<opc, OpcodeStr, schedYZ, v16i32_info,
10141                   v16i8x_info, i128mem, LdFrag, OpNode>,
10142                         EVEX_CD8<8, CD8VQ>, T8PD, EVEX_V512, VEX_WIG;
10143  }
10144}
10145
10146multiclass avx512_pmovx_bq<bits<8> opc, string OpcodeStr,
10147                              SDNode InVecNode, string ExtTy,
10148                              X86FoldableSchedWrite schedX, X86FoldableSchedWrite schedYZ,
10149                              PatFrag LdFrag = !cast<PatFrag>(ExtTy#"extloadvi8")> {
10150  let Predicates = [HasVLX, HasAVX512] in {
10151    defm Z128:  avx512_pmovx_common<opc, OpcodeStr, schedX, v2i64x_info,
10152                   v16i8x_info, i16mem, LdFrag, InVecNode>,
10153                     EVEX_CD8<8, CD8VO>, T8PD, EVEX_V128, VEX_WIG;
10154
10155    defm Z256:  avx512_pmovx_common<opc, OpcodeStr, schedYZ, v4i64x_info,
10156                   v16i8x_info, i32mem, LdFrag, InVecNode>,
10157                     EVEX_CD8<8, CD8VO>, T8PD, EVEX_V256, VEX_WIG;
10158  }
10159  let Predicates = [HasAVX512] in {
10160    defm Z   :  avx512_pmovx_common<opc, OpcodeStr, schedYZ, v8i64_info,
10161                   v16i8x_info, i64mem, LdFrag, InVecNode>,
10162                     EVEX_CD8<8, CD8VO>, T8PD, EVEX_V512, VEX_WIG;
10163  }
10164}
10165
10166multiclass avx512_pmovx_wd<bits<8> opc, string OpcodeStr,
10167         SDNode OpNode, SDNode InVecNode, string ExtTy,
10168         X86FoldableSchedWrite schedX, X86FoldableSchedWrite schedYZ,
10169         PatFrag LdFrag = !cast<PatFrag>(ExtTy#"extloadvi16")> {
10170  let Predicates = [HasVLX, HasAVX512] in {
10171    defm Z128:  avx512_pmovx_common<opc, OpcodeStr, schedX, v4i32x_info,
10172                   v8i16x_info, i64mem, LdFrag, InVecNode>,
10173                     EVEX_CD8<16, CD8VH>, T8PD, EVEX_V128, VEX_WIG;
10174
10175    defm Z256:  avx512_pmovx_common<opc, OpcodeStr, schedYZ, v8i32x_info,
10176                   v8i16x_info, i128mem, LdFrag, OpNode>,
10177                     EVEX_CD8<16, CD8VH>, T8PD, EVEX_V256, VEX_WIG;
10178  }
10179  let Predicates = [HasAVX512] in {
10180    defm Z   :  avx512_pmovx_common<opc, OpcodeStr, schedYZ, v16i32_info,
10181                   v16i16x_info, i256mem, LdFrag, OpNode>,
10182                     EVEX_CD8<16, CD8VH>, T8PD, EVEX_V512, VEX_WIG;
10183  }
10184}
10185
10186multiclass avx512_pmovx_wq<bits<8> opc, string OpcodeStr,
10187         SDNode OpNode, SDNode InVecNode, string ExtTy,
10188         X86FoldableSchedWrite schedX, X86FoldableSchedWrite schedYZ,
10189         PatFrag LdFrag = !cast<PatFrag>(ExtTy#"extloadvi16")> {
10190  let Predicates = [HasVLX, HasAVX512] in {
10191    defm Z128:  avx512_pmovx_common<opc, OpcodeStr, schedX, v2i64x_info,
10192                   v8i16x_info, i32mem, LdFrag, InVecNode>,
10193                     EVEX_CD8<16, CD8VQ>, T8PD, EVEX_V128, VEX_WIG;
10194
10195    defm Z256:  avx512_pmovx_common<opc, OpcodeStr, schedYZ, v4i64x_info,
10196                   v8i16x_info, i64mem, LdFrag, InVecNode>,
10197                     EVEX_CD8<16, CD8VQ>, T8PD, EVEX_V256, VEX_WIG;
10198  }
10199  let Predicates = [HasAVX512] in {
10200    defm Z   :  avx512_pmovx_common<opc, OpcodeStr, schedYZ, v8i64_info,
10201                   v8i16x_info, i128mem, LdFrag, OpNode>,
10202                     EVEX_CD8<16, CD8VQ>, T8PD, EVEX_V512, VEX_WIG;
10203  }
10204}
10205
10206multiclass avx512_pmovx_dq<bits<8> opc, string OpcodeStr,
10207         SDNode OpNode, SDNode InVecNode, string ExtTy,
10208         X86FoldableSchedWrite schedX, X86FoldableSchedWrite schedYZ,
10209         PatFrag LdFrag = !cast<PatFrag>(ExtTy#"extloadvi32")> {
10210
10211  let Predicates = [HasVLX, HasAVX512] in {
10212    defm Z128:  avx512_pmovx_common<opc, OpcodeStr, schedX, v2i64x_info,
10213                   v4i32x_info, i64mem, LdFrag, InVecNode>,
10214                     EVEX_CD8<32, CD8VH>, T8PD, EVEX_V128;
10215
10216    defm Z256:  avx512_pmovx_common<opc, OpcodeStr, schedYZ, v4i64x_info,
10217                   v4i32x_info, i128mem, LdFrag, OpNode>,
10218                     EVEX_CD8<32, CD8VH>, T8PD, EVEX_V256;
10219  }
10220  let Predicates = [HasAVX512] in {
10221    defm Z   :  avx512_pmovx_common<opc, OpcodeStr, schedYZ, v8i64_info,
10222                   v8i32x_info, i256mem, LdFrag, OpNode>,
10223                     EVEX_CD8<32, CD8VH>, T8PD, EVEX_V512;
10224  }
10225}
10226
10227defm VPMOVZXBW : avx512_pmovx_bw<0x30, "vpmovzxbw", zext, zext_invec, "z", SchedWriteShuffle.XMM, WriteVPMOV256>;
10228defm VPMOVZXBD : avx512_pmovx_bd<0x31, "vpmovzxbd", zext, zext_invec, "z", SchedWriteShuffle.XMM, WriteVPMOV256>;
10229defm VPMOVZXBQ : avx512_pmovx_bq<0x32, "vpmovzxbq",       zext_invec, "z", SchedWriteShuffle.XMM, WriteVPMOV256>;
10230defm VPMOVZXWD : avx512_pmovx_wd<0x33, "vpmovzxwd", zext, zext_invec, "z", SchedWriteShuffle.XMM, WriteVPMOV256>;
10231defm VPMOVZXWQ : avx512_pmovx_wq<0x34, "vpmovzxwq", zext, zext_invec, "z", SchedWriteShuffle.XMM, WriteVPMOV256>;
10232defm VPMOVZXDQ : avx512_pmovx_dq<0x35, "vpmovzxdq", zext, zext_invec, "z", SchedWriteShuffle.XMM, WriteVPMOV256>;
10233
10234defm VPMOVSXBW: avx512_pmovx_bw<0x20, "vpmovsxbw", sext, sext_invec, "s", SchedWriteShuffle.XMM, WriteVPMOV256>;
10235defm VPMOVSXBD: avx512_pmovx_bd<0x21, "vpmovsxbd", sext, sext_invec, "s", SchedWriteShuffle.XMM, WriteVPMOV256>;
10236defm VPMOVSXBQ: avx512_pmovx_bq<0x22, "vpmovsxbq",       sext_invec, "s", SchedWriteShuffle.XMM, WriteVPMOV256>;
10237defm VPMOVSXWD: avx512_pmovx_wd<0x23, "vpmovsxwd", sext, sext_invec, "s", SchedWriteShuffle.XMM, WriteVPMOV256>;
10238defm VPMOVSXWQ: avx512_pmovx_wq<0x24, "vpmovsxwq", sext, sext_invec, "s", SchedWriteShuffle.XMM, WriteVPMOV256>;
10239defm VPMOVSXDQ: avx512_pmovx_dq<0x25, "vpmovsxdq", sext, sext_invec, "s", SchedWriteShuffle.XMM, WriteVPMOV256>;
10240
10241
10242// Patterns that we also need any extend versions of. aext_vector_inreg
10243// is currently legalized to zext_vector_inreg.
10244multiclass AVX512_pmovx_patterns_base<string OpcPrefix, SDNode ExtOp> {
10245  // 256-bit patterns
10246  let Predicates = [HasVLX, HasBWI] in {
10247    def : Pat<(v16i16 (ExtOp (loadv16i8 addr:$src))),
10248              (!cast<I>(OpcPrefix#BWZ256rm) addr:$src)>;
10249  }
10250
10251  let Predicates = [HasVLX] in {
10252    def : Pat<(v8i32 (ExtOp (loadv8i16 addr:$src))),
10253              (!cast<I>(OpcPrefix#WDZ256rm) addr:$src)>;
10254
10255    def : Pat<(v4i64 (ExtOp (loadv4i32 addr:$src))),
10256              (!cast<I>(OpcPrefix#DQZ256rm) addr:$src)>;
10257  }
10258
10259  // 512-bit patterns
10260  let Predicates = [HasBWI] in {
10261    def : Pat<(v32i16 (ExtOp (loadv32i8 addr:$src))),
10262              (!cast<I>(OpcPrefix#BWZrm) addr:$src)>;
10263  }
10264  let Predicates = [HasAVX512] in {
10265    def : Pat<(v16i32 (ExtOp (loadv16i8 addr:$src))),
10266              (!cast<I>(OpcPrefix#BDZrm) addr:$src)>;
10267    def : Pat<(v16i32 (ExtOp (loadv16i16 addr:$src))),
10268              (!cast<I>(OpcPrefix#WDZrm) addr:$src)>;
10269
10270    def : Pat<(v8i64 (ExtOp (loadv8i16 addr:$src))),
10271              (!cast<I>(OpcPrefix#WQZrm) addr:$src)>;
10272
10273    def : Pat<(v8i64 (ExtOp (loadv8i32 addr:$src))),
10274              (!cast<I>(OpcPrefix#DQZrm) addr:$src)>;
10275  }
10276}
10277
10278multiclass AVX512_pmovx_patterns<string OpcPrefix, SDNode ExtOp,
10279                                 SDNode InVecOp> :
10280    AVX512_pmovx_patterns_base<OpcPrefix, ExtOp> {
10281  // 128-bit patterns
10282  let Predicates = [HasVLX, HasBWI] in {
10283  def : Pat<(v8i16 (InVecOp (bc_v16i8 (v2i64 (scalar_to_vector (loadi64 addr:$src)))))),
10284            (!cast<I>(OpcPrefix#BWZ128rm) addr:$src)>;
10285  def : Pat<(v8i16 (InVecOp (bc_v16i8 (v2f64 (scalar_to_vector (loadf64 addr:$src)))))),
10286            (!cast<I>(OpcPrefix#BWZ128rm) addr:$src)>;
10287  def : Pat<(v8i16 (InVecOp (bc_v16i8 (v2i64 (X86vzload64 addr:$src))))),
10288            (!cast<I>(OpcPrefix#BWZ128rm) addr:$src)>;
10289  }
10290  let Predicates = [HasVLX] in {
10291  def : Pat<(v4i32 (InVecOp (bc_v16i8 (v4i32 (scalar_to_vector (loadi32 addr:$src)))))),
10292            (!cast<I>(OpcPrefix#BDZ128rm) addr:$src)>;
10293  def : Pat<(v4i32 (InVecOp (bc_v16i8 (v4i32 (X86vzload32 addr:$src))))),
10294            (!cast<I>(OpcPrefix#BDZ128rm) addr:$src)>;
10295
10296  def : Pat<(v2i64 (InVecOp (bc_v16i8 (v4i32 (scalar_to_vector (extloadi32i16 addr:$src)))))),
10297            (!cast<I>(OpcPrefix#BQZ128rm) addr:$src)>;
10298
10299  def : Pat<(v4i32 (InVecOp (bc_v8i16 (v2i64 (scalar_to_vector (loadi64 addr:$src)))))),
10300            (!cast<I>(OpcPrefix#WDZ128rm) addr:$src)>;
10301  def : Pat<(v4i32 (InVecOp (bc_v8i16 (v2f64 (scalar_to_vector (loadf64 addr:$src)))))),
10302            (!cast<I>(OpcPrefix#WDZ128rm) addr:$src)>;
10303  def : Pat<(v4i32 (InVecOp (bc_v8i16 (v2i64 (X86vzload64 addr:$src))))),
10304            (!cast<I>(OpcPrefix#WDZ128rm) addr:$src)>;
10305
10306  def : Pat<(v2i64 (InVecOp (bc_v8i16 (v4i32 (scalar_to_vector (loadi32 addr:$src)))))),
10307            (!cast<I>(OpcPrefix#WQZ128rm) addr:$src)>;
10308  def : Pat<(v2i64 (InVecOp (bc_v8i16 (v4i32 (X86vzload32 addr:$src))))),
10309            (!cast<I>(OpcPrefix#WQZ128rm) addr:$src)>;
10310
10311  def : Pat<(v2i64 (InVecOp (bc_v4i32 (v2i64 (scalar_to_vector (loadi64 addr:$src)))))),
10312            (!cast<I>(OpcPrefix#DQZ128rm) addr:$src)>;
10313  def : Pat<(v2i64 (InVecOp (bc_v4i32 (v2f64 (scalar_to_vector (loadf64 addr:$src)))))),
10314            (!cast<I>(OpcPrefix#DQZ128rm) addr:$src)>;
10315  def : Pat<(v2i64 (InVecOp (bc_v4i32 (v2i64 (X86vzload64 addr:$src))))),
10316            (!cast<I>(OpcPrefix#DQZ128rm) addr:$src)>;
10317  }
10318  let Predicates = [HasVLX] in {
10319  def : Pat<(v8i32 (InVecOp (bc_v16i8 (v2i64 (scalar_to_vector (loadi64 addr:$src)))))),
10320            (!cast<I>(OpcPrefix#BDZ256rm) addr:$src)>;
10321  def : Pat<(v8i32 (InVecOp (bc_v16i8 (v2i64 (scalar_to_vector (loadf64 addr:$src)))))),
10322            (!cast<I>(OpcPrefix#BDZ256rm) addr:$src)>;
10323  def : Pat<(v8i32 (InVecOp (bc_v16i8 (v2i64 (X86vzload64 addr:$src))))),
10324            (!cast<I>(OpcPrefix#BDZ256rm) addr:$src)>;
10325
10326  def : Pat<(v4i64 (InVecOp (bc_v16i8 (v4i32 (scalar_to_vector (loadi32 addr:$src)))))),
10327            (!cast<I>(OpcPrefix#BQZ256rm) addr:$src)>;
10328  def : Pat<(v4i64 (InVecOp (bc_v16i8 (v4i32 (X86vzload32 addr:$src))))),
10329            (!cast<I>(OpcPrefix#BQZ256rm) addr:$src)>;
10330
10331  def : Pat<(v4i64 (InVecOp (bc_v8i16 (v2i64 (scalar_to_vector (loadi64 addr:$src)))))),
10332            (!cast<I>(OpcPrefix#WQZ256rm) addr:$src)>;
10333  def : Pat<(v4i64 (InVecOp (bc_v8i16 (v2i64 (scalar_to_vector (loadf64 addr:$src)))))),
10334            (!cast<I>(OpcPrefix#WQZ256rm) addr:$src)>;
10335  def : Pat<(v4i64 (InVecOp (bc_v8i16 (v2i64 (X86vzload64 addr:$src))))),
10336            (!cast<I>(OpcPrefix#WQZ256rm) addr:$src)>;
10337  }
10338  // 512-bit patterns
10339  let Predicates = [HasAVX512] in {
10340  def : Pat<(v8i64 (InVecOp (bc_v16i8 (v2i64 (scalar_to_vector (loadi64 addr:$src)))))),
10341            (!cast<I>(OpcPrefix#BQZrm) addr:$src)>;
10342  def : Pat<(v8i64 (InVecOp (bc_v16i8 (v2f64 (scalar_to_vector (loadf64 addr:$src)))))),
10343            (!cast<I>(OpcPrefix#BQZrm) addr:$src)>;
10344  def : Pat<(v8i64 (InVecOp (bc_v16i8 (v2i64 (X86vzload64 addr:$src))))),
10345            (!cast<I>(OpcPrefix#BQZrm) addr:$src)>;
10346  }
10347}
10348
10349defm : AVX512_pmovx_patterns<"VPMOVSX", sext, sext_invec>;
10350defm : AVX512_pmovx_patterns<"VPMOVZX", zext, zext_invec>;
10351
10352// Without BWI we can't do a trunc from v16i16 to v16i8. DAG combine can merge
10353// ext+trunc aggressively making it impossible to legalize the DAG to this
10354// pattern directly.
10355let Predicates = [HasAVX512, NoBWI] in {
10356def: Pat<(v16i8 (trunc (v16i16 VR256X:$src))),
10357         (VPMOVDBZrr (v16i32 (VPMOVZXWDZrr VR256X:$src)))>;
10358def: Pat<(v16i8 (trunc (loadv16i16 addr:$src))),
10359         (VPMOVDBZrr (v16i32 (VPMOVZXWDZrm addr:$src)))>;
10360}
10361
10362//===----------------------------------------------------------------------===//
10363// GATHER - SCATTER Operations
10364
10365// FIXME: Improve scheduling of gather/scatter instructions.
10366multiclass avx512_gather<bits<8> opc, string OpcodeStr, X86VectorVTInfo _,
10367                         X86MemOperand memop, RegisterClass MaskRC = _.KRCWM> {
10368  let Constraints = "@earlyclobber $dst, $src1 = $dst, $mask = $mask_wb",
10369      ExeDomain = _.ExeDomain, mayLoad = 1, hasSideEffects = 0 in
10370  def rm  : AVX5128I<opc, MRMSrcMem, (outs _.RC:$dst, MaskRC:$mask_wb),
10371            (ins _.RC:$src1, MaskRC:$mask, memop:$src2),
10372            !strconcat(OpcodeStr#_.Suffix,
10373            "\t{$src2, ${dst} {${mask}}|${dst} {${mask}}, $src2}"),
10374            []>, EVEX, EVEX_K, EVEX_CD8<_.EltSize, CD8VT1>,
10375            Sched<[WriteLoad, WriteVecMaskedGatherWriteback]>;
10376}
10377
10378multiclass avx512_gather_q_pd<bits<8> dopc, bits<8> qopc,
10379                        AVX512VLVectorVTInfo _, string OpcodeStr, string SUFF> {
10380  defm NAME#D#SUFF#Z: avx512_gather<dopc, OpcodeStr#"d", _.info512,
10381                                      vy512xmem>, EVEX_V512, VEX_W;
10382  defm NAME#Q#SUFF#Z: avx512_gather<qopc, OpcodeStr#"q", _.info512,
10383                                      vz512mem>, EVEX_V512, VEX_W;
10384let Predicates = [HasVLX] in {
10385  defm NAME#D#SUFF#Z256: avx512_gather<dopc, OpcodeStr#"d", _.info256,
10386                              vx256xmem>, EVEX_V256, VEX_W;
10387  defm NAME#Q#SUFF#Z256: avx512_gather<qopc, OpcodeStr#"q", _.info256,
10388                              vy256xmem>, EVEX_V256, VEX_W;
10389  defm NAME#D#SUFF#Z128: avx512_gather<dopc, OpcodeStr#"d", _.info128,
10390                              vx128xmem>, EVEX_V128, VEX_W;
10391  defm NAME#Q#SUFF#Z128: avx512_gather<qopc, OpcodeStr#"q", _.info128,
10392                              vx128xmem>, EVEX_V128, VEX_W;
10393}
10394}
10395
10396multiclass avx512_gather_d_ps<bits<8> dopc, bits<8> qopc,
10397                       AVX512VLVectorVTInfo _, string OpcodeStr, string SUFF> {
10398  defm NAME#D#SUFF#Z: avx512_gather<dopc, OpcodeStr#"d", _.info512, vz512mem>,
10399                                       EVEX_V512;
10400  defm NAME#Q#SUFF#Z: avx512_gather<qopc, OpcodeStr#"q", _.info256, vz256mem>,
10401                                       EVEX_V512;
10402let Predicates = [HasVLX] in {
10403  defm NAME#D#SUFF#Z256: avx512_gather<dopc, OpcodeStr#"d", _.info256,
10404                                          vy256xmem>, EVEX_V256;
10405  defm NAME#Q#SUFF#Z256: avx512_gather<qopc, OpcodeStr#"q", _.info128,
10406                                          vy128xmem>, EVEX_V256;
10407  defm NAME#D#SUFF#Z128: avx512_gather<dopc, OpcodeStr#"d", _.info128,
10408                                          vx128xmem>, EVEX_V128;
10409  defm NAME#Q#SUFF#Z128: avx512_gather<qopc, OpcodeStr#"q", _.info128,
10410                                          vx64xmem, VK2WM>, EVEX_V128;
10411}
10412}
10413
10414
10415defm VGATHER : avx512_gather_q_pd<0x92, 0x93, avx512vl_f64_info, "vgather", "PD">,
10416               avx512_gather_d_ps<0x92, 0x93, avx512vl_f32_info, "vgather", "PS">;
10417
10418defm VPGATHER : avx512_gather_q_pd<0x90, 0x91, avx512vl_i64_info, "vpgather", "Q">,
10419                avx512_gather_d_ps<0x90, 0x91, avx512vl_i32_info, "vpgather", "D">;
10420
10421multiclass avx512_scatter<bits<8> opc, string OpcodeStr, X86VectorVTInfo _,
10422                          X86MemOperand memop, RegisterClass MaskRC = _.KRCWM> {
10423
10424let mayStore = 1, Constraints = "$mask = $mask_wb", ExeDomain = _.ExeDomain,
10425    hasSideEffects = 0 in
10426
10427  def mr  : AVX5128I<opc, MRMDestMem, (outs MaskRC:$mask_wb),
10428            (ins memop:$dst, MaskRC:$mask, _.RC:$src),
10429            !strconcat(OpcodeStr#_.Suffix,
10430            "\t{$src, ${dst} {${mask}}|${dst} {${mask}}, $src}"),
10431            []>, EVEX, EVEX_K, EVEX_CD8<_.EltSize, CD8VT1>,
10432            Sched<[WriteStore]>;
10433}
10434
10435multiclass avx512_scatter_q_pd<bits<8> dopc, bits<8> qopc,
10436                        AVX512VLVectorVTInfo _, string OpcodeStr, string SUFF> {
10437  defm NAME#D#SUFF#Z: avx512_scatter<dopc, OpcodeStr#"d", _.info512,
10438                                      vy512xmem>, EVEX_V512, VEX_W;
10439  defm NAME#Q#SUFF#Z: avx512_scatter<qopc, OpcodeStr#"q", _.info512,
10440                                      vz512mem>, EVEX_V512, VEX_W;
10441let Predicates = [HasVLX] in {
10442  defm NAME#D#SUFF#Z256: avx512_scatter<dopc, OpcodeStr#"d", _.info256,
10443                              vx256xmem>, EVEX_V256, VEX_W;
10444  defm NAME#Q#SUFF#Z256: avx512_scatter<qopc, OpcodeStr#"q", _.info256,
10445                              vy256xmem>, EVEX_V256, VEX_W;
10446  defm NAME#D#SUFF#Z128: avx512_scatter<dopc, OpcodeStr#"d", _.info128,
10447                              vx128xmem>, EVEX_V128, VEX_W;
10448  defm NAME#Q#SUFF#Z128: avx512_scatter<qopc, OpcodeStr#"q", _.info128,
10449                              vx128xmem>, EVEX_V128, VEX_W;
10450}
10451}
10452
10453multiclass avx512_scatter_d_ps<bits<8> dopc, bits<8> qopc,
10454                       AVX512VLVectorVTInfo _, string OpcodeStr, string SUFF> {
10455  defm NAME#D#SUFF#Z: avx512_scatter<dopc, OpcodeStr#"d", _.info512, vz512mem>,
10456                                       EVEX_V512;
10457  defm NAME#Q#SUFF#Z: avx512_scatter<qopc, OpcodeStr#"q", _.info256, vz256mem>,
10458                                       EVEX_V512;
10459let Predicates = [HasVLX] in {
10460  defm NAME#D#SUFF#Z256: avx512_scatter<dopc, OpcodeStr#"d", _.info256,
10461                                          vy256xmem>, EVEX_V256;
10462  defm NAME#Q#SUFF#Z256: avx512_scatter<qopc, OpcodeStr#"q", _.info128,
10463                                          vy128xmem>, EVEX_V256;
10464  defm NAME#D#SUFF#Z128: avx512_scatter<dopc, OpcodeStr#"d", _.info128,
10465                                          vx128xmem>, EVEX_V128;
10466  defm NAME#Q#SUFF#Z128: avx512_scatter<qopc, OpcodeStr#"q", _.info128,
10467                                          vx64xmem, VK2WM>, EVEX_V128;
10468}
10469}
10470
10471defm VSCATTER : avx512_scatter_q_pd<0xA2, 0xA3, avx512vl_f64_info, "vscatter", "PD">,
10472               avx512_scatter_d_ps<0xA2, 0xA3, avx512vl_f32_info, "vscatter", "PS">;
10473
10474defm VPSCATTER : avx512_scatter_q_pd<0xA0, 0xA1, avx512vl_i64_info, "vpscatter", "Q">,
10475                avx512_scatter_d_ps<0xA0, 0xA1, avx512vl_i32_info, "vpscatter", "D">;
10476
10477// prefetch
10478multiclass avx512_gather_scatter_prefetch<bits<8> opc, Format F, string OpcodeStr,
10479                       RegisterClass KRC, X86MemOperand memop> {
10480  let Predicates = [HasPFI], mayLoad = 1, mayStore = 1 in
10481  def m  : AVX5128I<opc, F, (outs), (ins KRC:$mask, memop:$src),
10482            !strconcat(OpcodeStr, "\t{$src {${mask}}|{${mask}}, $src}"), []>,
10483            EVEX, EVEX_K, Sched<[WriteLoad]>;
10484}
10485
10486defm VGATHERPF0DPS: avx512_gather_scatter_prefetch<0xC6, MRM1m, "vgatherpf0dps",
10487                     VK16WM, vz512mem>, EVEX_V512, EVEX_CD8<32, CD8VT1>;
10488
10489defm VGATHERPF0QPS: avx512_gather_scatter_prefetch<0xC7, MRM1m, "vgatherpf0qps",
10490                     VK8WM, vz256mem>, EVEX_V512, EVEX_CD8<64, CD8VT1>;
10491
10492defm VGATHERPF0DPD: avx512_gather_scatter_prefetch<0xC6, MRM1m, "vgatherpf0dpd",
10493                     VK8WM, vy512xmem>, EVEX_V512, VEX_W, EVEX_CD8<32, CD8VT1>;
10494
10495defm VGATHERPF0QPD: avx512_gather_scatter_prefetch<0xC7, MRM1m, "vgatherpf0qpd",
10496                     VK8WM, vz512mem>, EVEX_V512, VEX_W, EVEX_CD8<64, CD8VT1>;
10497
10498defm VGATHERPF1DPS: avx512_gather_scatter_prefetch<0xC6, MRM2m, "vgatherpf1dps",
10499                     VK16WM, vz512mem>, EVEX_V512, EVEX_CD8<32, CD8VT1>;
10500
10501defm VGATHERPF1QPS: avx512_gather_scatter_prefetch<0xC7, MRM2m, "vgatherpf1qps",
10502                     VK8WM, vz256mem>, EVEX_V512, EVEX_CD8<64, CD8VT1>;
10503
10504defm VGATHERPF1DPD: avx512_gather_scatter_prefetch<0xC6, MRM2m, "vgatherpf1dpd",
10505                     VK8WM, vy512xmem>, EVEX_V512, VEX_W, EVEX_CD8<32, CD8VT1>;
10506
10507defm VGATHERPF1QPD: avx512_gather_scatter_prefetch<0xC7, MRM2m, "vgatherpf1qpd",
10508                     VK8WM, vz512mem>, EVEX_V512, VEX_W, EVEX_CD8<64, CD8VT1>;
10509
10510defm VSCATTERPF0DPS: avx512_gather_scatter_prefetch<0xC6, MRM5m, "vscatterpf0dps",
10511                     VK16WM, vz512mem>, EVEX_V512, EVEX_CD8<32, CD8VT1>;
10512
10513defm VSCATTERPF0QPS: avx512_gather_scatter_prefetch<0xC7, MRM5m, "vscatterpf0qps",
10514                     VK8WM, vz256mem>, EVEX_V512, EVEX_CD8<64, CD8VT1>;
10515
10516defm VSCATTERPF0DPD: avx512_gather_scatter_prefetch<0xC6, MRM5m, "vscatterpf0dpd",
10517                     VK8WM, vy512xmem>, EVEX_V512, VEX_W, EVEX_CD8<32, CD8VT1>;
10518
10519defm VSCATTERPF0QPD: avx512_gather_scatter_prefetch<0xC7, MRM5m, "vscatterpf0qpd",
10520                     VK8WM, vz512mem>, EVEX_V512, VEX_W, EVEX_CD8<64, CD8VT1>;
10521
10522defm VSCATTERPF1DPS: avx512_gather_scatter_prefetch<0xC6, MRM6m, "vscatterpf1dps",
10523                     VK16WM, vz512mem>, EVEX_V512, EVEX_CD8<32, CD8VT1>;
10524
10525defm VSCATTERPF1QPS: avx512_gather_scatter_prefetch<0xC7, MRM6m, "vscatterpf1qps",
10526                     VK8WM, vz256mem>, EVEX_V512, EVEX_CD8<64, CD8VT1>;
10527
10528defm VSCATTERPF1DPD: avx512_gather_scatter_prefetch<0xC6, MRM6m, "vscatterpf1dpd",
10529                     VK8WM, vy512xmem>, EVEX_V512, VEX_W, EVEX_CD8<32, CD8VT1>;
10530
10531defm VSCATTERPF1QPD: avx512_gather_scatter_prefetch<0xC7, MRM6m, "vscatterpf1qpd",
10532                     VK8WM, vz512mem>, EVEX_V512, VEX_W, EVEX_CD8<64, CD8VT1>;
10533
10534multiclass cvt_by_vec_width<bits<8> opc, X86VectorVTInfo Vec, string OpcodeStr, SchedWrite Sched> {
10535def rr : AVX512XS8I<opc, MRMSrcReg, (outs Vec.RC:$dst), (ins Vec.KRC:$src),
10536                  !strconcat(OpcodeStr#Vec.Suffix, "\t{$src, $dst|$dst, $src}"),
10537                  [(set Vec.RC:$dst, (Vec.VT (sext Vec.KRC:$src)))]>,
10538                  EVEX, Sched<[Sched]>;
10539}
10540
10541multiclass cvt_mask_by_elt_width<bits<8> opc, AVX512VLVectorVTInfo VTInfo,
10542                                 string OpcodeStr, Predicate prd> {
10543let Predicates = [prd] in
10544  defm Z : cvt_by_vec_width<opc, VTInfo.info512, OpcodeStr, WriteVecMoveZ>, EVEX_V512;
10545
10546  let Predicates = [prd, HasVLX] in {
10547    defm Z256 : cvt_by_vec_width<opc, VTInfo.info256, OpcodeStr, WriteVecMoveY>, EVEX_V256;
10548    defm Z128 : cvt_by_vec_width<opc, VTInfo.info128, OpcodeStr, WriteVecMoveX>, EVEX_V128;
10549  }
10550}
10551
10552defm VPMOVM2B : cvt_mask_by_elt_width<0x28, avx512vl_i8_info, "vpmovm2" , HasBWI>;
10553defm VPMOVM2W : cvt_mask_by_elt_width<0x28, avx512vl_i16_info, "vpmovm2", HasBWI> , VEX_W;
10554defm VPMOVM2D : cvt_mask_by_elt_width<0x38, avx512vl_i32_info, "vpmovm2", HasDQI>;
10555defm VPMOVM2Q : cvt_mask_by_elt_width<0x38, avx512vl_i64_info, "vpmovm2", HasDQI> , VEX_W;
10556
10557multiclass convert_vector_to_mask_common<bits<8> opc, X86VectorVTInfo _, string OpcodeStr > {
10558    def rr : AVX512XS8I<opc, MRMSrcReg, (outs _.KRC:$dst), (ins _.RC:$src),
10559                        !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
10560                        [(set _.KRC:$dst, (X86pcmpgtm _.ImmAllZerosV, (_.VT _.RC:$src)))]>,
10561                        EVEX, Sched<[WriteMove]>;
10562}
10563
10564// Use 512bit version to implement 128/256 bit in case NoVLX.
10565multiclass convert_vector_to_mask_lowering<X86VectorVTInfo ExtendInfo,
10566                                           X86VectorVTInfo _,
10567                                           string Name> {
10568
10569  def : Pat<(_.KVT (X86pcmpgtm _.ImmAllZerosV, (_.VT _.RC:$src))),
10570            (_.KVT (COPY_TO_REGCLASS
10571                     (!cast<Instruction>(Name#"Zrr")
10572                       (INSERT_SUBREG (ExtendInfo.VT (IMPLICIT_DEF)),
10573                                      _.RC:$src, _.SubRegIdx)),
10574                   _.KRC))>;
10575}
10576
10577multiclass avx512_convert_vector_to_mask<bits<8> opc, string OpcodeStr,
10578                                   AVX512VLVectorVTInfo VTInfo, Predicate prd> {
10579  let Predicates = [prd] in
10580    defm Z : convert_vector_to_mask_common <opc, VTInfo.info512, OpcodeStr>,
10581                                            EVEX_V512;
10582
10583  let Predicates = [prd, HasVLX] in {
10584    defm Z256 : convert_vector_to_mask_common<opc, VTInfo.info256, OpcodeStr>,
10585                                              EVEX_V256;
10586    defm Z128 : convert_vector_to_mask_common<opc, VTInfo.info128, OpcodeStr>,
10587                                               EVEX_V128;
10588  }
10589  let Predicates = [prd, NoVLX] in {
10590    defm Z256_Alt : convert_vector_to_mask_lowering<VTInfo.info512, VTInfo.info256, NAME>;
10591    defm Z128_Alt : convert_vector_to_mask_lowering<VTInfo.info512, VTInfo.info128, NAME>;
10592  }
10593}
10594
10595defm VPMOVB2M : avx512_convert_vector_to_mask<0x29, "vpmovb2m",
10596                                              avx512vl_i8_info, HasBWI>;
10597defm VPMOVW2M : avx512_convert_vector_to_mask<0x29, "vpmovw2m",
10598                                              avx512vl_i16_info, HasBWI>, VEX_W;
10599defm VPMOVD2M : avx512_convert_vector_to_mask<0x39, "vpmovd2m",
10600                                              avx512vl_i32_info, HasDQI>;
10601defm VPMOVQ2M : avx512_convert_vector_to_mask<0x39, "vpmovq2m",
10602                                              avx512vl_i64_info, HasDQI>, VEX_W;
10603
10604// Patterns for handling sext from a mask register to v16i8/v16i16 when DQI
10605// is available, but BWI is not. We can't handle this in lowering because
10606// a target independent DAG combine likes to combine sext and trunc.
10607let Predicates = [HasDQI, NoBWI] in {
10608  def : Pat<(v16i8 (sext (v16i1 VK16:$src))),
10609            (VPMOVDBZrr (v16i32 (VPMOVM2DZrr VK16:$src)))>;
10610  def : Pat<(v16i16 (sext (v16i1 VK16:$src))),
10611            (VPMOVDWZrr (v16i32 (VPMOVM2DZrr VK16:$src)))>;
10612}
10613
10614let Predicates = [HasDQI, NoBWI, HasVLX] in {
10615  def : Pat<(v8i16 (sext (v8i1 VK8:$src))),
10616            (VPMOVDWZ256rr (v8i32 (VPMOVM2DZ256rr VK8:$src)))>;
10617}
10618
10619//===----------------------------------------------------------------------===//
10620// AVX-512 - COMPRESS and EXPAND
10621//
10622
10623multiclass compress_by_vec_width_common<bits<8> opc, X86VectorVTInfo _,
10624                                 string OpcodeStr, X86FoldableSchedWrite sched> {
10625  defm rr : AVX512_maskable<opc, MRMDestReg, _, (outs _.RC:$dst),
10626              (ins _.RC:$src1), OpcodeStr, "$src1", "$src1",
10627              (null_frag)>, AVX5128IBase,
10628              Sched<[sched]>;
10629
10630  let mayStore = 1, hasSideEffects = 0 in
10631  def mr : AVX5128I<opc, MRMDestMem, (outs),
10632              (ins _.MemOp:$dst, _.RC:$src),
10633              OpcodeStr # "\t{$src, $dst|$dst, $src}",
10634              []>, EVEX_CD8<_.EltSize, CD8VT1>,
10635              Sched<[sched.Folded]>;
10636
10637  def mrk : AVX5128I<opc, MRMDestMem, (outs),
10638              (ins _.MemOp:$dst, _.KRCWM:$mask, _.RC:$src),
10639              OpcodeStr # "\t{$src, $dst {${mask}}|$dst {${mask}}, $src}",
10640              []>,
10641              EVEX_K, EVEX_CD8<_.EltSize, CD8VT1>,
10642              Sched<[sched.Folded]>;
10643}
10644
10645multiclass compress_by_vec_width_lowering<X86VectorVTInfo _, string Name> {
10646  def : Pat<(X86mCompressingStore (_.VT _.RC:$src), addr:$dst, _.KRCWM:$mask),
10647            (!cast<Instruction>(Name#_.ZSuffix#mrk)
10648                            addr:$dst, _.KRCWM:$mask, _.RC:$src)>;
10649
10650  def : Pat<(X86compress (_.VT _.RC:$src), _.RC:$src0, _.KRCWM:$mask),
10651            (!cast<Instruction>(Name#_.ZSuffix#rrk)
10652                            _.RC:$src0, _.KRCWM:$mask, _.RC:$src)>;
10653  def : Pat<(X86compress (_.VT _.RC:$src), _.ImmAllZerosV, _.KRCWM:$mask),
10654            (!cast<Instruction>(Name#_.ZSuffix#rrkz)
10655                            _.KRCWM:$mask, _.RC:$src)>;
10656}
10657
10658multiclass compress_by_elt_width<bits<8> opc, string OpcodeStr,
10659                                 X86FoldableSchedWrite sched,
10660                                 AVX512VLVectorVTInfo VTInfo,
10661                                 Predicate Pred = HasAVX512> {
10662  let Predicates = [Pred] in
10663  defm Z : compress_by_vec_width_common<opc, VTInfo.info512, OpcodeStr, sched>,
10664           compress_by_vec_width_lowering<VTInfo.info512, NAME>, EVEX_V512;
10665
10666  let Predicates = [Pred, HasVLX] in {
10667    defm Z256 : compress_by_vec_width_common<opc, VTInfo.info256, OpcodeStr, sched>,
10668                compress_by_vec_width_lowering<VTInfo.info256, NAME>, EVEX_V256;
10669    defm Z128 : compress_by_vec_width_common<opc, VTInfo.info128, OpcodeStr, sched>,
10670                compress_by_vec_width_lowering<VTInfo.info128, NAME>, EVEX_V128;
10671  }
10672}
10673
10674// FIXME: Is there a better scheduler class for VPCOMPRESS?
10675defm VPCOMPRESSD : compress_by_elt_width <0x8B, "vpcompressd", WriteVarShuffle256,
10676                                          avx512vl_i32_info>, EVEX, NotMemoryFoldable;
10677defm VPCOMPRESSQ : compress_by_elt_width <0x8B, "vpcompressq", WriteVarShuffle256,
10678                                          avx512vl_i64_info>, EVEX, VEX_W, NotMemoryFoldable;
10679defm VCOMPRESSPS : compress_by_elt_width <0x8A, "vcompressps", WriteVarShuffle256,
10680                                          avx512vl_f32_info>, EVEX, NotMemoryFoldable;
10681defm VCOMPRESSPD : compress_by_elt_width <0x8A, "vcompresspd", WriteVarShuffle256,
10682                                          avx512vl_f64_info>, EVEX, VEX_W, NotMemoryFoldable;
10683
10684// expand
10685multiclass expand_by_vec_width<bits<8> opc, X86VectorVTInfo _,
10686                                 string OpcodeStr, X86FoldableSchedWrite sched> {
10687  defm rr : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
10688              (ins _.RC:$src1), OpcodeStr, "$src1", "$src1",
10689              (null_frag)>, AVX5128IBase,
10690              Sched<[sched]>;
10691
10692  defm rm : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
10693              (ins _.MemOp:$src1), OpcodeStr, "$src1", "$src1",
10694              (null_frag)>,
10695            AVX5128IBase, EVEX_CD8<_.EltSize, CD8VT1>,
10696            Sched<[sched.Folded, sched.ReadAfterFold]>;
10697}
10698
10699multiclass expand_by_vec_width_lowering<X86VectorVTInfo _, string Name> {
10700
10701  def : Pat<(_.VT (X86mExpandingLoad addr:$src, _.KRCWM:$mask, undef)),
10702            (!cast<Instruction>(Name#_.ZSuffix#rmkz)
10703                                        _.KRCWM:$mask, addr:$src)>;
10704
10705  def : Pat<(_.VT (X86mExpandingLoad addr:$src, _.KRCWM:$mask, _.ImmAllZerosV)),
10706            (!cast<Instruction>(Name#_.ZSuffix#rmkz)
10707                                        _.KRCWM:$mask, addr:$src)>;
10708
10709  def : Pat<(_.VT (X86mExpandingLoad addr:$src, _.KRCWM:$mask,
10710                                               (_.VT _.RC:$src0))),
10711            (!cast<Instruction>(Name#_.ZSuffix#rmk)
10712                            _.RC:$src0, _.KRCWM:$mask, addr:$src)>;
10713
10714  def : Pat<(X86expand (_.VT _.RC:$src), _.RC:$src0, _.KRCWM:$mask),
10715            (!cast<Instruction>(Name#_.ZSuffix#rrk)
10716                            _.RC:$src0, _.KRCWM:$mask, _.RC:$src)>;
10717  def : Pat<(X86expand (_.VT _.RC:$src), _.ImmAllZerosV, _.KRCWM:$mask),
10718            (!cast<Instruction>(Name#_.ZSuffix#rrkz)
10719                            _.KRCWM:$mask, _.RC:$src)>;
10720}
10721
10722multiclass expand_by_elt_width<bits<8> opc, string OpcodeStr,
10723                               X86FoldableSchedWrite sched,
10724                               AVX512VLVectorVTInfo VTInfo,
10725                               Predicate Pred = HasAVX512> {
10726  let Predicates = [Pred] in
10727  defm Z : expand_by_vec_width<opc, VTInfo.info512, OpcodeStr, sched>,
10728           expand_by_vec_width_lowering<VTInfo.info512, NAME>, EVEX_V512;
10729
10730  let Predicates = [Pred, HasVLX] in {
10731    defm Z256 : expand_by_vec_width<opc, VTInfo.info256, OpcodeStr, sched>,
10732                expand_by_vec_width_lowering<VTInfo.info256, NAME>, EVEX_V256;
10733    defm Z128 : expand_by_vec_width<opc, VTInfo.info128, OpcodeStr, sched>,
10734                expand_by_vec_width_lowering<VTInfo.info128, NAME>, EVEX_V128;
10735  }
10736}
10737
10738// FIXME: Is there a better scheduler class for VPEXPAND?
10739defm VPEXPANDD : expand_by_elt_width <0x89, "vpexpandd", WriteVarShuffle256,
10740                                      avx512vl_i32_info>, EVEX;
10741defm VPEXPANDQ : expand_by_elt_width <0x89, "vpexpandq", WriteVarShuffle256,
10742                                      avx512vl_i64_info>, EVEX, VEX_W;
10743defm VEXPANDPS : expand_by_elt_width <0x88, "vexpandps", WriteVarShuffle256,
10744                                      avx512vl_f32_info>, EVEX;
10745defm VEXPANDPD : expand_by_elt_width <0x88, "vexpandpd", WriteVarShuffle256,
10746                                      avx512vl_f64_info>, EVEX, VEX_W;
10747
10748//handle instruction  reg_vec1 = op(reg_vec,imm)
10749//                               op(mem_vec,imm)
10750//                               op(broadcast(eltVt),imm)
10751//all instruction created with FROUND_CURRENT
10752multiclass avx512_unary_fp_packed_imm<bits<8> opc, string OpcodeStr,
10753                                      SDPatternOperator OpNode,
10754                                      SDPatternOperator MaskOpNode,
10755                                      X86FoldableSchedWrite sched,
10756                                      X86VectorVTInfo _> {
10757  let ExeDomain = _.ExeDomain, Uses = [MXCSR], mayRaiseFPException = 1 in {
10758  defm rri : AVX512_maskable_split<opc, MRMSrcReg, _, (outs _.RC:$dst),
10759                      (ins _.RC:$src1, i32u8imm:$src2),
10760                      OpcodeStr#_.Suffix, "$src2, $src1", "$src1, $src2",
10761                      (OpNode (_.VT _.RC:$src1), (i32 timm:$src2)),
10762                      (MaskOpNode (_.VT _.RC:$src1), (i32 timm:$src2))>,
10763                      Sched<[sched]>;
10764  defm rmi : AVX512_maskable_split<opc, MRMSrcMem, _, (outs _.RC:$dst),
10765                    (ins _.MemOp:$src1, i32u8imm:$src2),
10766                    OpcodeStr#_.Suffix, "$src2, $src1", "$src1, $src2",
10767                    (OpNode (_.VT (bitconvert (_.LdFrag addr:$src1))),
10768                            (i32 timm:$src2)),
10769                    (MaskOpNode (_.VT (bitconvert (_.LdFrag addr:$src1))),
10770                                (i32 timm:$src2))>,
10771                    Sched<[sched.Folded, sched.ReadAfterFold]>;
10772  defm rmbi : AVX512_maskable_split<opc, MRMSrcMem, _, (outs _.RC:$dst),
10773                    (ins _.ScalarMemOp:$src1, i32u8imm:$src2),
10774                    OpcodeStr#_.Suffix, "$src2, ${src1}"#_.BroadcastStr,
10775                    "${src1}"#_.BroadcastStr#", $src2",
10776                    (OpNode (_.VT (_.BroadcastLdFrag addr:$src1)),
10777                            (i32 timm:$src2)),
10778                    (MaskOpNode (_.VT (_.BroadcastLdFrag addr:$src1)),
10779                                (i32 timm:$src2))>, EVEX_B,
10780                    Sched<[sched.Folded, sched.ReadAfterFold]>;
10781  }
10782}
10783
10784//handle instruction  reg_vec1 = op(reg_vec2,reg_vec3,imm),{sae}
10785multiclass avx512_unary_fp_sae_packed_imm<bits<8> opc, string OpcodeStr,
10786                                          SDNode OpNode, X86FoldableSchedWrite sched,
10787                                          X86VectorVTInfo _> {
10788  let ExeDomain = _.ExeDomain, Uses = [MXCSR] in
10789  defm rrib : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
10790                      (ins _.RC:$src1, i32u8imm:$src2),
10791                      OpcodeStr#_.Suffix, "$src2, {sae}, $src1",
10792                      "$src1, {sae}, $src2",
10793                      (OpNode (_.VT _.RC:$src1),
10794                              (i32 timm:$src2))>,
10795                      EVEX_B, Sched<[sched]>;
10796}
10797
10798multiclass avx512_common_unary_fp_sae_packed_imm<string OpcodeStr,
10799            AVX512VLVectorVTInfo _, bits<8> opc, SDPatternOperator OpNode,
10800            SDPatternOperator MaskOpNode, SDNode OpNodeSAE, X86SchedWriteWidths sched,
10801            Predicate prd>{
10802  let Predicates = [prd] in {
10803    defm Z    : avx512_unary_fp_packed_imm<opc, OpcodeStr, OpNode, MaskOpNode,
10804                                           sched.ZMM, _.info512>,
10805                avx512_unary_fp_sae_packed_imm<opc, OpcodeStr, OpNodeSAE,
10806                                               sched.ZMM, _.info512>, EVEX_V512;
10807  }
10808  let Predicates = [prd, HasVLX] in {
10809    defm Z128 : avx512_unary_fp_packed_imm<opc, OpcodeStr, OpNode, MaskOpNode,
10810                                           sched.XMM, _.info128>, EVEX_V128;
10811    defm Z256 : avx512_unary_fp_packed_imm<opc, OpcodeStr, OpNode, MaskOpNode,
10812                                           sched.YMM, _.info256>, EVEX_V256;
10813  }
10814}
10815
10816//handle instruction  reg_vec1 = op(reg_vec2,reg_vec3,imm)
10817//                               op(reg_vec2,mem_vec,imm)
10818//                               op(reg_vec2,broadcast(eltVt),imm)
10819//all instruction created with FROUND_CURRENT
10820multiclass avx512_fp_packed_imm<bits<8> opc, string OpcodeStr, SDNode OpNode,
10821                                X86FoldableSchedWrite sched, X86VectorVTInfo _>{
10822  let ExeDomain = _.ExeDomain, Uses = [MXCSR], mayRaiseFPException = 1 in {
10823  defm rri : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
10824                      (ins _.RC:$src1, _.RC:$src2, i32u8imm:$src3),
10825                      OpcodeStr, "$src3, $src2, $src1", "$src1, $src2, $src3",
10826                      (OpNode (_.VT _.RC:$src1),
10827                              (_.VT _.RC:$src2),
10828                              (i32 timm:$src3))>,
10829                      Sched<[sched]>;
10830  defm rmi : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
10831                    (ins _.RC:$src1, _.MemOp:$src2, i32u8imm:$src3),
10832                    OpcodeStr, "$src3, $src2, $src1", "$src1, $src2, $src3",
10833                    (OpNode (_.VT _.RC:$src1),
10834                            (_.VT (bitconvert (_.LdFrag addr:$src2))),
10835                            (i32 timm:$src3))>,
10836                    Sched<[sched.Folded, sched.ReadAfterFold]>;
10837  defm rmbi : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
10838                    (ins _.RC:$src1, _.ScalarMemOp:$src2, i32u8imm:$src3),
10839                    OpcodeStr, "$src3, ${src2}"#_.BroadcastStr#", $src1",
10840                    "$src1, ${src2}"#_.BroadcastStr#", $src3",
10841                    (OpNode (_.VT _.RC:$src1),
10842                            (_.VT (_.BroadcastLdFrag addr:$src2)),
10843                            (i32 timm:$src3))>, EVEX_B,
10844                    Sched<[sched.Folded, sched.ReadAfterFold]>;
10845  }
10846}
10847
10848//handle instruction  reg_vec1 = op(reg_vec2,reg_vec3,imm)
10849//                               op(reg_vec2,mem_vec,imm)
10850multiclass avx512_3Op_rm_imm8<bits<8> opc, string OpcodeStr, SDNode OpNode,
10851                              X86FoldableSchedWrite sched, X86VectorVTInfo DestInfo,
10852                              X86VectorVTInfo SrcInfo>{
10853  let ExeDomain = DestInfo.ExeDomain in {
10854  defm rri : AVX512_maskable<opc, MRMSrcReg, DestInfo, (outs DestInfo.RC:$dst),
10855                  (ins SrcInfo.RC:$src1, SrcInfo.RC:$src2, u8imm:$src3),
10856                  OpcodeStr, "$src3, $src2, $src1", "$src1, $src2, $src3",
10857                  (DestInfo.VT (OpNode (SrcInfo.VT SrcInfo.RC:$src1),
10858                               (SrcInfo.VT SrcInfo.RC:$src2),
10859                               (i8 timm:$src3)))>,
10860                  Sched<[sched]>;
10861  defm rmi : AVX512_maskable<opc, MRMSrcMem, DestInfo, (outs DestInfo.RC:$dst),
10862                (ins SrcInfo.RC:$src1, SrcInfo.MemOp:$src2, u8imm:$src3),
10863                OpcodeStr, "$src3, $src2, $src1", "$src1, $src2, $src3",
10864                (DestInfo.VT (OpNode (SrcInfo.VT SrcInfo.RC:$src1),
10865                             (SrcInfo.VT (bitconvert
10866                                                (SrcInfo.LdFrag addr:$src2))),
10867                             (i8 timm:$src3)))>,
10868                Sched<[sched.Folded, sched.ReadAfterFold]>;
10869  }
10870}
10871
10872//handle instruction  reg_vec1 = op(reg_vec2,reg_vec3,imm)
10873//                               op(reg_vec2,mem_vec,imm)
10874//                               op(reg_vec2,broadcast(eltVt),imm)
10875multiclass avx512_3Op_imm8<bits<8> opc, string OpcodeStr, SDNode OpNode,
10876                           X86FoldableSchedWrite sched, X86VectorVTInfo _>:
10877  avx512_3Op_rm_imm8<opc, OpcodeStr, OpNode, sched, _, _>{
10878
10879  let ExeDomain = _.ExeDomain in
10880  defm rmbi : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
10881                    (ins _.RC:$src1, _.ScalarMemOp:$src2, u8imm:$src3),
10882                    OpcodeStr, "$src3, ${src2}"#_.BroadcastStr#", $src1",
10883                    "$src1, ${src2}"#_.BroadcastStr#", $src3",
10884                    (OpNode (_.VT _.RC:$src1),
10885                            (_.VT (_.BroadcastLdFrag addr:$src2)),
10886                            (i8 timm:$src3))>, EVEX_B,
10887                    Sched<[sched.Folded, sched.ReadAfterFold]>;
10888}
10889
10890//handle scalar instruction  reg_vec1 = op(reg_vec2,reg_vec3,imm)
10891//                                      op(reg_vec2,mem_scalar,imm)
10892multiclass avx512_fp_scalar_imm<bits<8> opc, string OpcodeStr, SDNode OpNode,
10893                                X86FoldableSchedWrite sched, X86VectorVTInfo _> {
10894  let ExeDomain = _.ExeDomain, Uses = [MXCSR], mayRaiseFPException = 1 in {
10895  defm rri : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
10896                      (ins _.RC:$src1, _.RC:$src2, i32u8imm:$src3),
10897                      OpcodeStr, "$src3, $src2, $src1", "$src1, $src2, $src3",
10898                      (OpNode (_.VT _.RC:$src1),
10899                              (_.VT _.RC:$src2),
10900                              (i32 timm:$src3))>,
10901                      Sched<[sched]>;
10902  defm rmi : AVX512_maskable_scalar<opc, MRMSrcMem, _, (outs _.RC:$dst),
10903                    (ins _.RC:$src1, _.IntScalarMemOp:$src2, i32u8imm:$src3),
10904                    OpcodeStr, "$src3, $src2, $src1", "$src1, $src2, $src3",
10905                    (OpNode (_.VT _.RC:$src1),
10906                            (_.ScalarIntMemFrags addr:$src2),
10907                            (i32 timm:$src3))>,
10908                    Sched<[sched.Folded, sched.ReadAfterFold]>;
10909  }
10910}
10911
10912//handle instruction  reg_vec1 = op(reg_vec2,reg_vec3,imm),{sae}
10913multiclass avx512_fp_sae_packed_imm<bits<8> opc, string OpcodeStr,
10914                                    SDNode OpNode, X86FoldableSchedWrite sched,
10915                                    X86VectorVTInfo _> {
10916  let ExeDomain = _.ExeDomain, Uses = [MXCSR] in
10917  defm rrib : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
10918                      (ins _.RC:$src1, _.RC:$src2, i32u8imm:$src3),
10919                      OpcodeStr, "$src3, {sae}, $src2, $src1",
10920                      "$src1, $src2, {sae}, $src3",
10921                      (OpNode (_.VT _.RC:$src1),
10922                              (_.VT _.RC:$src2),
10923                              (i32 timm:$src3))>,
10924                      EVEX_B, Sched<[sched]>;
10925}
10926
10927//handle scalar instruction  reg_vec1 = op(reg_vec2,reg_vec3,imm),{sae}
10928multiclass avx512_fp_sae_scalar_imm<bits<8> opc, string OpcodeStr, SDNode OpNode,
10929                                    X86FoldableSchedWrite sched, X86VectorVTInfo _> {
10930  let ExeDomain = _.ExeDomain, Uses = [MXCSR] in
10931  defm NAME#rrib : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
10932                      (ins _.RC:$src1, _.RC:$src2, i32u8imm:$src3),
10933                      OpcodeStr, "$src3, {sae}, $src2, $src1",
10934                      "$src1, $src2, {sae}, $src3",
10935                      (OpNode (_.VT _.RC:$src1),
10936                              (_.VT _.RC:$src2),
10937                              (i32 timm:$src3))>,
10938                      EVEX_B, Sched<[sched]>;
10939}
10940
10941multiclass avx512_common_fp_sae_packed_imm<string OpcodeStr,
10942            AVX512VLVectorVTInfo _, bits<8> opc, SDNode OpNode,
10943            SDNode OpNodeSAE, X86SchedWriteWidths sched, Predicate prd>{
10944  let Predicates = [prd] in {
10945    defm Z    : avx512_fp_packed_imm<opc, OpcodeStr, OpNode, sched.ZMM, _.info512>,
10946                avx512_fp_sae_packed_imm<opc, OpcodeStr, OpNodeSAE, sched.ZMM, _.info512>,
10947                                  EVEX_V512;
10948
10949  }
10950  let Predicates = [prd, HasVLX] in {
10951    defm Z128 : avx512_fp_packed_imm<opc, OpcodeStr, OpNode, sched.XMM, _.info128>,
10952                                  EVEX_V128;
10953    defm Z256 : avx512_fp_packed_imm<opc, OpcodeStr, OpNode, sched.YMM, _.info256>,
10954                                  EVEX_V256;
10955  }
10956}
10957
10958multiclass avx512_common_3Op_rm_imm8<bits<8> opc, SDNode OpNode, string OpStr,
10959                   X86SchedWriteWidths sched, AVX512VLVectorVTInfo DestInfo,
10960                   AVX512VLVectorVTInfo SrcInfo, Predicate Pred = HasBWI> {
10961  let Predicates = [Pred] in {
10962    defm Z    : avx512_3Op_rm_imm8<opc, OpStr, OpNode, sched.ZMM, DestInfo.info512,
10963                           SrcInfo.info512>, EVEX_V512, AVX512AIi8Base, EVEX_4V;
10964  }
10965  let Predicates = [Pred, HasVLX] in {
10966    defm Z128 : avx512_3Op_rm_imm8<opc, OpStr, OpNode, sched.XMM, DestInfo.info128,
10967                           SrcInfo.info128>, EVEX_V128, AVX512AIi8Base, EVEX_4V;
10968    defm Z256 : avx512_3Op_rm_imm8<opc, OpStr, OpNode, sched.YMM, DestInfo.info256,
10969                           SrcInfo.info256>, EVEX_V256, AVX512AIi8Base, EVEX_4V;
10970  }
10971}
10972
10973multiclass avx512_common_3Op_imm8<string OpcodeStr, AVX512VLVectorVTInfo _,
10974                                  bits<8> opc, SDNode OpNode, X86SchedWriteWidths sched,
10975                                  Predicate Pred = HasAVX512> {
10976  let Predicates = [Pred] in {
10977    defm Z    : avx512_3Op_imm8<opc, OpcodeStr, OpNode, sched.ZMM, _.info512>,
10978                                EVEX_V512;
10979  }
10980  let Predicates = [Pred, HasVLX] in {
10981    defm Z128 : avx512_3Op_imm8<opc, OpcodeStr, OpNode, sched.XMM, _.info128>,
10982                                EVEX_V128;
10983    defm Z256 : avx512_3Op_imm8<opc, OpcodeStr, OpNode, sched.YMM, _.info256>,
10984                                EVEX_V256;
10985  }
10986}
10987
10988multiclass avx512_common_fp_sae_scalar_imm<string OpcodeStr,
10989                  X86VectorVTInfo _, bits<8> opc, SDNode OpNode,
10990                  SDNode OpNodeSAE, X86SchedWriteWidths sched, Predicate prd> {
10991  let Predicates = [prd] in {
10992     defm Z : avx512_fp_scalar_imm<opc, OpcodeStr, OpNode, sched.XMM, _>,
10993              avx512_fp_sae_scalar_imm<opc, OpcodeStr, OpNodeSAE, sched.XMM, _>;
10994  }
10995}
10996
10997multiclass avx512_common_unary_fp_sae_packed_imm_all<string OpcodeStr,
10998                    bits<8> opcPs, bits<8> opcPd, SDPatternOperator OpNode,
10999                    SDPatternOperator MaskOpNode, SDNode OpNodeSAE,
11000                    X86SchedWriteWidths sched, Predicate prd>{
11001  defm PH : avx512_common_unary_fp_sae_packed_imm<OpcodeStr, avx512vl_f16_info,
11002                            opcPs, OpNode, MaskOpNode, OpNodeSAE, sched, HasFP16>,
11003                            AVX512PSIi8Base, TA, EVEX, EVEX_CD8<16, CD8VF>;
11004  defm PS : avx512_common_unary_fp_sae_packed_imm<OpcodeStr, avx512vl_f32_info,
11005                            opcPs, OpNode, MaskOpNode, OpNodeSAE, sched, prd>,
11006                            AVX512AIi8Base, EVEX, EVEX_CD8<32, CD8VF>;
11007  defm PD : avx512_common_unary_fp_sae_packed_imm<OpcodeStr, avx512vl_f64_info,
11008                            opcPd, OpNode, MaskOpNode, OpNodeSAE, sched, prd>,
11009                            AVX512AIi8Base, EVEX, EVEX_CD8<64, CD8VF>, VEX_W;
11010}
11011
11012defm VREDUCE   : avx512_common_unary_fp_sae_packed_imm_all<"vreduce", 0x56, 0x56,
11013                              X86VReduce, X86VReduce, X86VReduceSAE,
11014                              SchedWriteFRnd, HasDQI>;
11015defm VRNDSCALE : avx512_common_unary_fp_sae_packed_imm_all<"vrndscale", 0x08, 0x09,
11016                              X86any_VRndScale, X86VRndScale, X86VRndScaleSAE,
11017                              SchedWriteFRnd, HasAVX512>;
11018defm VGETMANT : avx512_common_unary_fp_sae_packed_imm_all<"vgetmant", 0x26, 0x26,
11019                              X86VGetMant, X86VGetMant, X86VGetMantSAE,
11020                              SchedWriteFRnd, HasAVX512>;
11021
11022defm VRANGEPD : avx512_common_fp_sae_packed_imm<"vrangepd", avx512vl_f64_info,
11023                                                0x50, X86VRange, X86VRangeSAE,
11024                                                SchedWriteFAdd, HasDQI>,
11025      AVX512AIi8Base, EVEX_4V, EVEX_CD8<64, CD8VF>, VEX_W;
11026defm VRANGEPS : avx512_common_fp_sae_packed_imm<"vrangeps", avx512vl_f32_info,
11027                                                0x50, X86VRange, X86VRangeSAE,
11028                                                SchedWriteFAdd, HasDQI>,
11029      AVX512AIi8Base, EVEX_4V, EVEX_CD8<32, CD8VF>;
11030
11031defm VRANGESD: avx512_common_fp_sae_scalar_imm<"vrangesd",
11032      f64x_info, 0x51, X86Ranges, X86RangesSAE, SchedWriteFAdd, HasDQI>,
11033      AVX512AIi8Base, VEX_LIG, EVEX_4V, EVEX_CD8<64, CD8VT1>, VEX_W;
11034defm VRANGESS: avx512_common_fp_sae_scalar_imm<"vrangess", f32x_info,
11035      0x51, X86Ranges, X86RangesSAE, SchedWriteFAdd, HasDQI>,
11036      AVX512AIi8Base, VEX_LIG, EVEX_4V, EVEX_CD8<32, CD8VT1>;
11037
11038defm VREDUCESD: avx512_common_fp_sae_scalar_imm<"vreducesd", f64x_info,
11039      0x57, X86Reduces, X86ReducesSAE, SchedWriteFRnd, HasDQI>,
11040      AVX512AIi8Base, VEX_LIG, EVEX_4V, EVEX_CD8<64, CD8VT1>, VEX_W;
11041defm VREDUCESS: avx512_common_fp_sae_scalar_imm<"vreducess", f32x_info,
11042      0x57, X86Reduces, X86ReducesSAE, SchedWriteFRnd, HasDQI>,
11043      AVX512AIi8Base, VEX_LIG, EVEX_4V, EVEX_CD8<32, CD8VT1>;
11044defm VREDUCESH: avx512_common_fp_sae_scalar_imm<"vreducesh", f16x_info,
11045      0x57, X86Reduces, X86ReducesSAE, SchedWriteFRnd, HasFP16>,
11046      AVX512PSIi8Base, TA, VEX_LIG, EVEX_4V, EVEX_CD8<16, CD8VT1>;
11047
11048defm VGETMANTSD: avx512_common_fp_sae_scalar_imm<"vgetmantsd", f64x_info,
11049      0x27, X86GetMants, X86GetMantsSAE, SchedWriteFRnd, HasAVX512>,
11050      AVX512AIi8Base, VEX_LIG, EVEX_4V, EVEX_CD8<64, CD8VT1>, VEX_W;
11051defm VGETMANTSS: avx512_common_fp_sae_scalar_imm<"vgetmantss", f32x_info,
11052      0x27, X86GetMants, X86GetMantsSAE, SchedWriteFRnd, HasAVX512>,
11053      AVX512AIi8Base, VEX_LIG, EVEX_4V, EVEX_CD8<32, CD8VT1>;
11054defm VGETMANTSH: avx512_common_fp_sae_scalar_imm<"vgetmantsh", f16x_info,
11055      0x27, X86GetMants, X86GetMantsSAE, SchedWriteFRnd, HasFP16>,
11056      AVX512PSIi8Base, TA, VEX_LIG, EVEX_4V, EVEX_CD8<16, CD8VT1>;
11057
11058multiclass avx512_shuff_packed_128_common<bits<8> opc, string OpcodeStr,
11059                                          X86FoldableSchedWrite sched,
11060                                          X86VectorVTInfo _,
11061                                          X86VectorVTInfo CastInfo,
11062                                          string EVEX2VEXOvrd> {
11063  let ExeDomain = _.ExeDomain in {
11064  defm rri : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
11065                  (ins _.RC:$src1, _.RC:$src2, u8imm:$src3),
11066                  OpcodeStr, "$src3, $src2, $src1", "$src1, $src2, $src3",
11067                  (_.VT (bitconvert
11068                         (CastInfo.VT (X86Shuf128 _.RC:$src1, _.RC:$src2,
11069                                                  (i8 timm:$src3)))))>,
11070                  Sched<[sched]>, EVEX2VEXOverride<EVEX2VEXOvrd#"rr">;
11071  defm rmi : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
11072                (ins _.RC:$src1, _.MemOp:$src2, u8imm:$src3),
11073                OpcodeStr, "$src3, $src2, $src1", "$src1, $src2, $src3",
11074                (_.VT
11075                 (bitconvert
11076                  (CastInfo.VT (X86Shuf128 _.RC:$src1,
11077                                           (CastInfo.LdFrag addr:$src2),
11078                                           (i8 timm:$src3)))))>,
11079                Sched<[sched.Folded, sched.ReadAfterFold]>,
11080                EVEX2VEXOverride<EVEX2VEXOvrd#"rm">;
11081  defm rmbi : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
11082                    (ins _.RC:$src1, _.ScalarMemOp:$src2, u8imm:$src3),
11083                    OpcodeStr, "$src3, ${src2}"#_.BroadcastStr#", $src1",
11084                    "$src1, ${src2}"#_.BroadcastStr#", $src3",
11085                    (_.VT
11086                     (bitconvert
11087                      (CastInfo.VT
11088                       (X86Shuf128 _.RC:$src1,
11089                                   (_.BroadcastLdFrag addr:$src2),
11090                                   (i8 timm:$src3)))))>, EVEX_B,
11091                    Sched<[sched.Folded, sched.ReadAfterFold]>;
11092  }
11093}
11094
11095multiclass avx512_shuff_packed_128<string OpcodeStr, X86FoldableSchedWrite sched,
11096                                   AVX512VLVectorVTInfo _,
11097                                   AVX512VLVectorVTInfo CastInfo, bits<8> opc,
11098                                   string EVEX2VEXOvrd>{
11099  let Predicates = [HasAVX512] in
11100  defm Z : avx512_shuff_packed_128_common<opc, OpcodeStr, sched,
11101                                          _.info512, CastInfo.info512, "">, EVEX_V512;
11102
11103  let Predicates = [HasAVX512, HasVLX] in
11104  defm Z256 : avx512_shuff_packed_128_common<opc, OpcodeStr, sched,
11105                                             _.info256, CastInfo.info256,
11106                                             EVEX2VEXOvrd>, EVEX_V256;
11107}
11108
11109defm VSHUFF32X4 : avx512_shuff_packed_128<"vshuff32x4", WriteFShuffle256,
11110      avx512vl_f32_info, avx512vl_f64_info, 0x23, "VPERM2F128">, AVX512AIi8Base, EVEX_4V, EVEX_CD8<32, CD8VF>;
11111defm VSHUFF64X2 : avx512_shuff_packed_128<"vshuff64x2", WriteFShuffle256,
11112      avx512vl_f64_info, avx512vl_f64_info, 0x23, "VPERM2F128">, AVX512AIi8Base, EVEX_4V, EVEX_CD8<64, CD8VF>, VEX_W;
11113defm VSHUFI32X4 : avx512_shuff_packed_128<"vshufi32x4", WriteFShuffle256,
11114      avx512vl_i32_info, avx512vl_i64_info, 0x43, "VPERM2I128">, AVX512AIi8Base, EVEX_4V, EVEX_CD8<32, CD8VF>;
11115defm VSHUFI64X2 : avx512_shuff_packed_128<"vshufi64x2", WriteFShuffle256,
11116      avx512vl_i64_info, avx512vl_i64_info, 0x43, "VPERM2I128">, AVX512AIi8Base, EVEX_4V, EVEX_CD8<64, CD8VF>, VEX_W;
11117
11118multiclass avx512_valign<bits<8> opc, string OpcodeStr,
11119                         X86FoldableSchedWrite sched, X86VectorVTInfo _>{
11120  // NOTE: EVEX2VEXOverride changed back to Unset for 256-bit at the
11121  // instantiation of this class.
11122  let ExeDomain = _.ExeDomain in {
11123  defm rri : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
11124                  (ins _.RC:$src1, _.RC:$src2, u8imm:$src3),
11125                  OpcodeStr, "$src3, $src2, $src1", "$src1, $src2, $src3",
11126                  (_.VT (X86VAlign _.RC:$src1, _.RC:$src2, (i8 timm:$src3)))>,
11127                  Sched<[sched]>, EVEX2VEXOverride<"VPALIGNRrri">;
11128  defm rmi : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
11129                (ins _.RC:$src1, _.MemOp:$src2, u8imm:$src3),
11130                OpcodeStr, "$src3, $src2, $src1", "$src1, $src2, $src3",
11131                (_.VT (X86VAlign _.RC:$src1,
11132                                 (bitconvert (_.LdFrag addr:$src2)),
11133                                 (i8 timm:$src3)))>,
11134                Sched<[sched.Folded, sched.ReadAfterFold]>,
11135                EVEX2VEXOverride<"VPALIGNRrmi">;
11136
11137  defm rmbi : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
11138                   (ins _.RC:$src1, _.ScalarMemOp:$src2, u8imm:$src3),
11139                   OpcodeStr, "$src3, ${src2}"#_.BroadcastStr#", $src1",
11140                   "$src1, ${src2}"#_.BroadcastStr#", $src3",
11141                   (X86VAlign _.RC:$src1,
11142                              (_.VT (_.BroadcastLdFrag addr:$src2)),
11143                              (i8 timm:$src3))>, EVEX_B,
11144                   Sched<[sched.Folded, sched.ReadAfterFold]>;
11145  }
11146}
11147
11148multiclass avx512_valign_common<string OpcodeStr, X86SchedWriteWidths sched,
11149                                AVX512VLVectorVTInfo _> {
11150  let Predicates = [HasAVX512] in {
11151    defm Z    : avx512_valign<0x03, OpcodeStr, sched.ZMM, _.info512>,
11152                                AVX512AIi8Base, EVEX_4V, EVEX_V512;
11153  }
11154  let Predicates = [HasAVX512, HasVLX] in {
11155    defm Z128 : avx512_valign<0x03, OpcodeStr, sched.XMM, _.info128>,
11156                                AVX512AIi8Base, EVEX_4V, EVEX_V128;
11157    // We can't really override the 256-bit version so change it back to unset.
11158    let EVEX2VEXOverride = ? in
11159    defm Z256 : avx512_valign<0x03, OpcodeStr, sched.YMM, _.info256>,
11160                                AVX512AIi8Base, EVEX_4V, EVEX_V256;
11161  }
11162}
11163
11164defm VALIGND: avx512_valign_common<"valignd", SchedWriteShuffle,
11165                                   avx512vl_i32_info>, EVEX_CD8<32, CD8VF>;
11166defm VALIGNQ: avx512_valign_common<"valignq", SchedWriteShuffle,
11167                                   avx512vl_i64_info>, EVEX_CD8<64, CD8VF>,
11168                                   VEX_W;
11169
11170defm VPALIGNR: avx512_common_3Op_rm_imm8<0x0F, X86PAlignr, "vpalignr",
11171                                         SchedWriteShuffle, avx512vl_i8_info,
11172                                         avx512vl_i8_info>, EVEX_CD8<8, CD8VF>;
11173
11174// Fragments to help convert valignq into masked valignd. Or valignq/valignd
11175// into vpalignr.
11176def ValignqImm32XForm : SDNodeXForm<timm, [{
11177  return getI8Imm(N->getZExtValue() * 2, SDLoc(N));
11178}]>;
11179def ValignqImm8XForm : SDNodeXForm<timm, [{
11180  return getI8Imm(N->getZExtValue() * 8, SDLoc(N));
11181}]>;
11182def ValigndImm8XForm : SDNodeXForm<timm, [{
11183  return getI8Imm(N->getZExtValue() * 4, SDLoc(N));
11184}]>;
11185
11186multiclass avx512_vpalign_mask_lowering<string OpcodeStr, SDNode OpNode,
11187                                        X86VectorVTInfo From, X86VectorVTInfo To,
11188                                        SDNodeXForm ImmXForm> {
11189  def : Pat<(To.VT (vselect_mask To.KRCWM:$mask,
11190                                 (bitconvert
11191                                  (From.VT (OpNode From.RC:$src1, From.RC:$src2,
11192                                                   timm:$src3))),
11193                                 To.RC:$src0)),
11194            (!cast<Instruction>(OpcodeStr#"rrik") To.RC:$src0, To.KRCWM:$mask,
11195                                                  To.RC:$src1, To.RC:$src2,
11196                                                  (ImmXForm timm:$src3))>;
11197
11198  def : Pat<(To.VT (vselect_mask To.KRCWM:$mask,
11199                                 (bitconvert
11200                                  (From.VT (OpNode From.RC:$src1, From.RC:$src2,
11201                                                   timm:$src3))),
11202                                 To.ImmAllZerosV)),
11203            (!cast<Instruction>(OpcodeStr#"rrikz") To.KRCWM:$mask,
11204                                                   To.RC:$src1, To.RC:$src2,
11205                                                   (ImmXForm timm:$src3))>;
11206
11207  def : Pat<(To.VT (vselect_mask To.KRCWM:$mask,
11208                                 (bitconvert
11209                                  (From.VT (OpNode From.RC:$src1,
11210                                                   (From.LdFrag addr:$src2),
11211                                           timm:$src3))),
11212                                 To.RC:$src0)),
11213            (!cast<Instruction>(OpcodeStr#"rmik") To.RC:$src0, To.KRCWM:$mask,
11214                                                  To.RC:$src1, addr:$src2,
11215                                                  (ImmXForm timm:$src3))>;
11216
11217  def : Pat<(To.VT (vselect_mask To.KRCWM:$mask,
11218                                 (bitconvert
11219                                  (From.VT (OpNode From.RC:$src1,
11220                                                   (From.LdFrag addr:$src2),
11221                                           timm:$src3))),
11222                                 To.ImmAllZerosV)),
11223            (!cast<Instruction>(OpcodeStr#"rmikz") To.KRCWM:$mask,
11224                                                   To.RC:$src1, addr:$src2,
11225                                                   (ImmXForm timm:$src3))>;
11226}
11227
11228multiclass avx512_vpalign_mask_lowering_mb<string OpcodeStr, SDNode OpNode,
11229                                           X86VectorVTInfo From,
11230                                           X86VectorVTInfo To,
11231                                           SDNodeXForm ImmXForm> :
11232      avx512_vpalign_mask_lowering<OpcodeStr, OpNode, From, To, ImmXForm> {
11233  def : Pat<(From.VT (OpNode From.RC:$src1,
11234                             (bitconvert (To.VT (To.BroadcastLdFrag addr:$src2))),
11235                             timm:$src3)),
11236            (!cast<Instruction>(OpcodeStr#"rmbi") To.RC:$src1, addr:$src2,
11237                                                  (ImmXForm timm:$src3))>;
11238
11239  def : Pat<(To.VT (vselect_mask To.KRCWM:$mask,
11240                                 (bitconvert
11241                                  (From.VT (OpNode From.RC:$src1,
11242                                           (bitconvert
11243                                            (To.VT (To.BroadcastLdFrag addr:$src2))),
11244                                           timm:$src3))),
11245                                 To.RC:$src0)),
11246            (!cast<Instruction>(OpcodeStr#"rmbik") To.RC:$src0, To.KRCWM:$mask,
11247                                                   To.RC:$src1, addr:$src2,
11248                                                   (ImmXForm timm:$src3))>;
11249
11250  def : Pat<(To.VT (vselect_mask To.KRCWM:$mask,
11251                                 (bitconvert
11252                                  (From.VT (OpNode From.RC:$src1,
11253                                           (bitconvert
11254                                            (To.VT (To.BroadcastLdFrag addr:$src2))),
11255                                           timm:$src3))),
11256                                 To.ImmAllZerosV)),
11257            (!cast<Instruction>(OpcodeStr#"rmbikz") To.KRCWM:$mask,
11258                                                    To.RC:$src1, addr:$src2,
11259                                                    (ImmXForm timm:$src3))>;
11260}
11261
11262let Predicates = [HasAVX512] in {
11263  // For 512-bit we lower to the widest element type we can. So we only need
11264  // to handle converting valignq to valignd.
11265  defm : avx512_vpalign_mask_lowering_mb<"VALIGNDZ", X86VAlign, v8i64_info,
11266                                         v16i32_info, ValignqImm32XForm>;
11267}
11268
11269let Predicates = [HasVLX] in {
11270  // For 128-bit we lower to the widest element type we can. So we only need
11271  // to handle converting valignq to valignd.
11272  defm : avx512_vpalign_mask_lowering_mb<"VALIGNDZ128", X86VAlign, v2i64x_info,
11273                                         v4i32x_info, ValignqImm32XForm>;
11274  // For 256-bit we lower to the widest element type we can. So we only need
11275  // to handle converting valignq to valignd.
11276  defm : avx512_vpalign_mask_lowering_mb<"VALIGNDZ256", X86VAlign, v4i64x_info,
11277                                         v8i32x_info, ValignqImm32XForm>;
11278}
11279
11280let Predicates = [HasVLX, HasBWI] in {
11281  // We can turn 128 and 256 bit VALIGND/VALIGNQ into VPALIGNR.
11282  defm : avx512_vpalign_mask_lowering<"VPALIGNRZ128", X86VAlign, v2i64x_info,
11283                                      v16i8x_info, ValignqImm8XForm>;
11284  defm : avx512_vpalign_mask_lowering<"VPALIGNRZ128", X86VAlign, v4i32x_info,
11285                                      v16i8x_info, ValigndImm8XForm>;
11286}
11287
11288defm VDBPSADBW: avx512_common_3Op_rm_imm8<0x42, X86dbpsadbw, "vdbpsadbw",
11289                SchedWritePSADBW, avx512vl_i16_info, avx512vl_i8_info>,
11290                EVEX_CD8<8, CD8VF>, NotEVEX2VEXConvertible;
11291
11292multiclass avx512_unary_rm<bits<8> opc, string OpcodeStr, SDNode OpNode,
11293                           X86FoldableSchedWrite sched, X86VectorVTInfo _> {
11294  let ExeDomain = _.ExeDomain in {
11295  defm rr : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
11296                    (ins _.RC:$src1), OpcodeStr,
11297                    "$src1", "$src1",
11298                    (_.VT (OpNode (_.VT _.RC:$src1)))>, EVEX, AVX5128IBase,
11299                    Sched<[sched]>;
11300
11301  defm rm : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
11302                  (ins _.MemOp:$src1), OpcodeStr,
11303                  "$src1", "$src1",
11304                  (_.VT (OpNode (_.VT (bitconvert (_.LdFrag addr:$src1)))))>,
11305            EVEX, AVX5128IBase, EVEX_CD8<_.EltSize, CD8VF>,
11306            Sched<[sched.Folded]>;
11307  }
11308}
11309
11310multiclass avx512_unary_rmb<bits<8> opc, string OpcodeStr, SDNode OpNode,
11311                            X86FoldableSchedWrite sched, X86VectorVTInfo _> :
11312           avx512_unary_rm<opc, OpcodeStr, OpNode, sched, _> {
11313  defm rmb : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
11314                  (ins _.ScalarMemOp:$src1), OpcodeStr,
11315                  "${src1}"#_.BroadcastStr,
11316                  "${src1}"#_.BroadcastStr,
11317                  (_.VT (OpNode (_.VT (_.BroadcastLdFrag addr:$src1))))>,
11318             EVEX, AVX5128IBase, EVEX_B, EVEX_CD8<_.EltSize, CD8VF>,
11319             Sched<[sched.Folded]>;
11320}
11321
11322multiclass avx512_unary_rm_vl<bits<8> opc, string OpcodeStr, SDNode OpNode,
11323                              X86SchedWriteWidths sched,
11324                              AVX512VLVectorVTInfo VTInfo, Predicate prd> {
11325  let Predicates = [prd] in
11326    defm Z : avx512_unary_rm<opc, OpcodeStr, OpNode, sched.ZMM, VTInfo.info512>,
11327                             EVEX_V512;
11328
11329  let Predicates = [prd, HasVLX] in {
11330    defm Z256 : avx512_unary_rm<opc, OpcodeStr, OpNode, sched.YMM, VTInfo.info256>,
11331                              EVEX_V256;
11332    defm Z128 : avx512_unary_rm<opc, OpcodeStr, OpNode, sched.XMM, VTInfo.info128>,
11333                              EVEX_V128;
11334  }
11335}
11336
11337multiclass avx512_unary_rmb_vl<bits<8> opc, string OpcodeStr, SDNode OpNode,
11338                               X86SchedWriteWidths sched, AVX512VLVectorVTInfo VTInfo,
11339                               Predicate prd> {
11340  let Predicates = [prd] in
11341    defm Z : avx512_unary_rmb<opc, OpcodeStr, OpNode, sched.ZMM, VTInfo.info512>,
11342                              EVEX_V512;
11343
11344  let Predicates = [prd, HasVLX] in {
11345    defm Z256 : avx512_unary_rmb<opc, OpcodeStr, OpNode, sched.YMM, VTInfo.info256>,
11346                                 EVEX_V256;
11347    defm Z128 : avx512_unary_rmb<opc, OpcodeStr, OpNode, sched.XMM, VTInfo.info128>,
11348                                 EVEX_V128;
11349  }
11350}
11351
11352multiclass avx512_unary_rm_vl_dq<bits<8> opc_d, bits<8> opc_q, string OpcodeStr,
11353                                 SDNode OpNode, X86SchedWriteWidths sched,
11354                                 Predicate prd> {
11355  defm Q : avx512_unary_rmb_vl<opc_q, OpcodeStr#"q", OpNode, sched,
11356                               avx512vl_i64_info, prd>, VEX_W;
11357  defm D : avx512_unary_rmb_vl<opc_d, OpcodeStr#"d", OpNode, sched,
11358                               avx512vl_i32_info, prd>;
11359}
11360
11361multiclass avx512_unary_rm_vl_bw<bits<8> opc_b, bits<8> opc_w, string OpcodeStr,
11362                                 SDNode OpNode, X86SchedWriteWidths sched,
11363                                 Predicate prd> {
11364  defm W : avx512_unary_rm_vl<opc_w, OpcodeStr#"w", OpNode, sched,
11365                              avx512vl_i16_info, prd>, VEX_WIG;
11366  defm B : avx512_unary_rm_vl<opc_b, OpcodeStr#"b", OpNode, sched,
11367                              avx512vl_i8_info, prd>, VEX_WIG;
11368}
11369
11370multiclass avx512_unary_rm_vl_all<bits<8> opc_b, bits<8> opc_w,
11371                                  bits<8> opc_d, bits<8> opc_q,
11372                                  string OpcodeStr, SDNode OpNode,
11373                                  X86SchedWriteWidths sched> {
11374  defm NAME : avx512_unary_rm_vl_dq<opc_d, opc_q, OpcodeStr, OpNode, sched,
11375                                    HasAVX512>,
11376              avx512_unary_rm_vl_bw<opc_b, opc_w, OpcodeStr, OpNode, sched,
11377                                    HasBWI>;
11378}
11379
11380defm VPABS : avx512_unary_rm_vl_all<0x1C, 0x1D, 0x1E, 0x1F, "vpabs", abs,
11381                                    SchedWriteVecALU>;
11382
11383// VPABS: Use 512bit version to implement 128/256 bit in case NoVLX.
11384let Predicates = [HasAVX512, NoVLX] in {
11385  def : Pat<(v4i64 (abs VR256X:$src)),
11386            (EXTRACT_SUBREG
11387                (VPABSQZrr
11388                    (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src, sub_ymm)),
11389             sub_ymm)>;
11390  def : Pat<(v2i64 (abs VR128X:$src)),
11391            (EXTRACT_SUBREG
11392                (VPABSQZrr
11393                    (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm)),
11394             sub_xmm)>;
11395}
11396
11397// Use 512bit version to implement 128/256 bit.
11398multiclass avx512_unary_lowering<string InstrStr, SDNode OpNode,
11399                                 AVX512VLVectorVTInfo _, Predicate prd> {
11400  let Predicates = [prd, NoVLX] in {
11401    def : Pat<(_.info256.VT (OpNode (_.info256.VT _.info256.RC:$src1))),
11402              (EXTRACT_SUBREG
11403                (!cast<Instruction>(InstrStr # "Zrr")
11404                  (INSERT_SUBREG(_.info512.VT(IMPLICIT_DEF)),
11405                                 _.info256.RC:$src1,
11406                                 _.info256.SubRegIdx)),
11407              _.info256.SubRegIdx)>;
11408
11409    def : Pat<(_.info128.VT (OpNode (_.info128.VT _.info128.RC:$src1))),
11410              (EXTRACT_SUBREG
11411                (!cast<Instruction>(InstrStr # "Zrr")
11412                  (INSERT_SUBREG(_.info512.VT(IMPLICIT_DEF)),
11413                                 _.info128.RC:$src1,
11414                                 _.info128.SubRegIdx)),
11415              _.info128.SubRegIdx)>;
11416  }
11417}
11418
11419defm VPLZCNT    : avx512_unary_rm_vl_dq<0x44, 0x44, "vplzcnt", ctlz,
11420                                        SchedWriteVecIMul, HasCDI>;
11421
11422// FIXME: Is there a better scheduler class for VPCONFLICT?
11423defm VPCONFLICT : avx512_unary_rm_vl_dq<0xC4, 0xC4, "vpconflict", X86Conflict,
11424                                        SchedWriteVecALU, HasCDI>;
11425
11426// VPLZCNT: Use 512bit version to implement 128/256 bit in case NoVLX.
11427defm : avx512_unary_lowering<"VPLZCNTQ", ctlz, avx512vl_i64_info, HasCDI>;
11428defm : avx512_unary_lowering<"VPLZCNTD", ctlz, avx512vl_i32_info, HasCDI>;
11429
11430//===---------------------------------------------------------------------===//
11431// Counts number of ones - VPOPCNTD and VPOPCNTQ
11432//===---------------------------------------------------------------------===//
11433
11434// FIXME: Is there a better scheduler class for VPOPCNTD/VPOPCNTQ?
11435defm VPOPCNT : avx512_unary_rm_vl_dq<0x55, 0x55, "vpopcnt", ctpop,
11436                                     SchedWriteVecALU, HasVPOPCNTDQ>;
11437
11438defm : avx512_unary_lowering<"VPOPCNTQ", ctpop, avx512vl_i64_info, HasVPOPCNTDQ>;
11439defm : avx512_unary_lowering<"VPOPCNTD", ctpop, avx512vl_i32_info, HasVPOPCNTDQ>;
11440
11441//===---------------------------------------------------------------------===//
11442// Replicate Single FP - MOVSHDUP and MOVSLDUP
11443//===---------------------------------------------------------------------===//
11444
11445multiclass avx512_replicate<bits<8> opc, string OpcodeStr, SDNode OpNode,
11446                            X86SchedWriteWidths sched> {
11447  defm NAME:       avx512_unary_rm_vl<opc, OpcodeStr, OpNode, sched,
11448                                      avx512vl_f32_info, HasAVX512>, XS;
11449}
11450
11451defm VMOVSHDUP : avx512_replicate<0x16, "vmovshdup", X86Movshdup,
11452                                  SchedWriteFShuffle>;
11453defm VMOVSLDUP : avx512_replicate<0x12, "vmovsldup", X86Movsldup,
11454                                  SchedWriteFShuffle>;
11455
11456//===----------------------------------------------------------------------===//
11457// AVX-512 - MOVDDUP
11458//===----------------------------------------------------------------------===//
11459
11460multiclass avx512_movddup_128<bits<8> opc, string OpcodeStr,
11461                              X86FoldableSchedWrite sched, X86VectorVTInfo _> {
11462  let ExeDomain = _.ExeDomain in {
11463  defm rr : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
11464                   (ins _.RC:$src), OpcodeStr, "$src", "$src",
11465                   (_.VT (X86VBroadcast (_.VT _.RC:$src)))>, EVEX,
11466                   Sched<[sched]>;
11467  defm rm : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
11468                 (ins _.ScalarMemOp:$src), OpcodeStr, "$src", "$src",
11469                 (_.VT (_.BroadcastLdFrag addr:$src))>,
11470                 EVEX, EVEX_CD8<_.EltSize, CD8VH>,
11471                 Sched<[sched.Folded]>;
11472  }
11473}
11474
11475multiclass avx512_movddup_common<bits<8> opc, string OpcodeStr,
11476                                 X86SchedWriteWidths sched, AVX512VLVectorVTInfo VTInfo> {
11477  defm Z : avx512_unary_rm<opc, OpcodeStr, X86Movddup, sched.ZMM,
11478                           VTInfo.info512>, EVEX_V512;
11479
11480  let Predicates = [HasAVX512, HasVLX] in {
11481    defm Z256 : avx512_unary_rm<opc, OpcodeStr, X86Movddup, sched.YMM,
11482                                VTInfo.info256>, EVEX_V256;
11483    defm Z128 : avx512_movddup_128<opc, OpcodeStr, sched.XMM,
11484                                   VTInfo.info128>, EVEX_V128;
11485  }
11486}
11487
11488multiclass avx512_movddup<bits<8> opc, string OpcodeStr,
11489                          X86SchedWriteWidths sched> {
11490  defm NAME:      avx512_movddup_common<opc, OpcodeStr, sched,
11491                                        avx512vl_f64_info>, XD, VEX_W;
11492}
11493
11494defm VMOVDDUP : avx512_movddup<0x12, "vmovddup", SchedWriteFShuffle>;
11495
11496let Predicates = [HasVLX] in {
11497def : Pat<(v2f64 (X86VBroadcast f64:$src)),
11498          (VMOVDDUPZ128rr (v2f64 (COPY_TO_REGCLASS FR64X:$src, VR128X)))>;
11499
11500def : Pat<(vselect_mask (v2i1 VK2WM:$mask), (v2f64 (X86VBroadcast f64:$src)),
11501                        (v2f64 VR128X:$src0)),
11502          (VMOVDDUPZ128rrk VR128X:$src0, VK2WM:$mask,
11503                           (v2f64 (COPY_TO_REGCLASS FR64X:$src, VR128X)))>;
11504def : Pat<(vselect_mask (v2i1 VK2WM:$mask), (v2f64 (X86VBroadcast f64:$src)),
11505                        immAllZerosV),
11506          (VMOVDDUPZ128rrkz VK2WM:$mask, (v2f64 (COPY_TO_REGCLASS FR64X:$src, VR128X)))>;
11507}
11508
11509//===----------------------------------------------------------------------===//
11510// AVX-512 - Unpack Instructions
11511//===----------------------------------------------------------------------===//
11512
11513let Uses = []<Register>, mayRaiseFPException = 0 in {
11514defm VUNPCKH : avx512_fp_binop_p<0x15, "vunpckh", X86Unpckh, X86Unpckh, HasAVX512,
11515                                 SchedWriteFShuffleSizes, 0, 1>;
11516defm VUNPCKL : avx512_fp_binop_p<0x14, "vunpckl", X86Unpckl, X86Unpckl, HasAVX512,
11517                                 SchedWriteFShuffleSizes>;
11518}
11519
11520defm VPUNPCKLBW : avx512_binop_rm_vl_b<0x60, "vpunpcklbw", X86Unpckl,
11521                                       SchedWriteShuffle, HasBWI>;
11522defm VPUNPCKHBW : avx512_binop_rm_vl_b<0x68, "vpunpckhbw", X86Unpckh,
11523                                       SchedWriteShuffle, HasBWI>;
11524defm VPUNPCKLWD : avx512_binop_rm_vl_w<0x61, "vpunpcklwd", X86Unpckl,
11525                                       SchedWriteShuffle, HasBWI>;
11526defm VPUNPCKHWD : avx512_binop_rm_vl_w<0x69, "vpunpckhwd", X86Unpckh,
11527                                       SchedWriteShuffle, HasBWI>;
11528
11529defm VPUNPCKLDQ : avx512_binop_rm_vl_d<0x62, "vpunpckldq", X86Unpckl,
11530                                       SchedWriteShuffle, HasAVX512>;
11531defm VPUNPCKHDQ : avx512_binop_rm_vl_d<0x6A, "vpunpckhdq", X86Unpckh,
11532                                       SchedWriteShuffle, HasAVX512>;
11533defm VPUNPCKLQDQ : avx512_binop_rm_vl_q<0x6C, "vpunpcklqdq", X86Unpckl,
11534                                        SchedWriteShuffle, HasAVX512>;
11535defm VPUNPCKHQDQ : avx512_binop_rm_vl_q<0x6D, "vpunpckhqdq", X86Unpckh,
11536                                        SchedWriteShuffle, HasAVX512>;
11537
11538//===----------------------------------------------------------------------===//
11539// AVX-512 - Extract & Insert Integer Instructions
11540//===----------------------------------------------------------------------===//
11541
11542multiclass avx512_extract_elt_bw_m<bits<8> opc, string OpcodeStr, SDNode OpNode,
11543                                                            X86VectorVTInfo _> {
11544  def mr : AVX512Ii8<opc, MRMDestMem, (outs),
11545              (ins _.ScalarMemOp:$dst, _.RC:$src1, u8imm:$src2),
11546              OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}",
11547              [(store (_.EltVT (trunc (OpNode (_.VT _.RC:$src1), timm:$src2))),
11548                       addr:$dst)]>,
11549              EVEX, EVEX_CD8<_.EltSize, CD8VT1>, Sched<[WriteVecExtractSt]>;
11550}
11551
11552multiclass avx512_extract_elt_b<string OpcodeStr, X86VectorVTInfo _> {
11553  let Predicates = [HasBWI] in {
11554    def rr : AVX512Ii8<0x14, MRMDestReg, (outs GR32orGR64:$dst),
11555                  (ins _.RC:$src1, u8imm:$src2),
11556                  OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}",
11557                  [(set GR32orGR64:$dst,
11558                        (X86pextrb (_.VT _.RC:$src1), timm:$src2))]>,
11559                  EVEX, TAPD, Sched<[WriteVecExtract]>;
11560
11561    defm NAME : avx512_extract_elt_bw_m<0x14, OpcodeStr, X86pextrb, _>, TAPD;
11562  }
11563}
11564
11565multiclass avx512_extract_elt_w<string OpcodeStr, X86VectorVTInfo _> {
11566  let Predicates = [HasBWI] in {
11567    def rr : AVX512Ii8<0xC5, MRMSrcReg, (outs GR32orGR64:$dst),
11568                  (ins _.RC:$src1, u8imm:$src2),
11569                  OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}",
11570                  [(set GR32orGR64:$dst,
11571                        (X86pextrw (_.VT _.RC:$src1), timm:$src2))]>,
11572                  EVEX, PD, Sched<[WriteVecExtract]>;
11573
11574    let hasSideEffects = 0, isCodeGenOnly = 1, ForceDisassemble = 1 in
11575    def rr_REV : AVX512Ii8<0x15, MRMDestReg, (outs GR32orGR64:$dst),
11576                   (ins _.RC:$src1, u8imm:$src2),
11577                   OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}", []>,
11578                   EVEX, TAPD, FoldGenData<NAME#rr>,
11579                   Sched<[WriteVecExtract]>;
11580
11581    defm NAME : avx512_extract_elt_bw_m<0x15, OpcodeStr, X86pextrw, _>, TAPD;
11582  }
11583}
11584
11585multiclass avx512_extract_elt_dq<string OpcodeStr, X86VectorVTInfo _,
11586                                                            RegisterClass GRC> {
11587  let Predicates = [HasDQI] in {
11588    def rr : AVX512Ii8<0x16, MRMDestReg, (outs GRC:$dst),
11589                  (ins _.RC:$src1, u8imm:$src2),
11590                  OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}",
11591                  [(set GRC:$dst,
11592                      (extractelt (_.VT _.RC:$src1), imm:$src2))]>,
11593                  EVEX, TAPD, Sched<[WriteVecExtract]>;
11594
11595    def mr : AVX512Ii8<0x16, MRMDestMem, (outs),
11596                (ins _.ScalarMemOp:$dst, _.RC:$src1, u8imm:$src2),
11597                OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}",
11598                [(store (extractelt (_.VT _.RC:$src1),
11599                                    imm:$src2),addr:$dst)]>,
11600                EVEX, EVEX_CD8<_.EltSize, CD8VT1>, TAPD,
11601                Sched<[WriteVecExtractSt]>;
11602  }
11603}
11604
11605defm VPEXTRBZ : avx512_extract_elt_b<"vpextrb", v16i8x_info>, VEX_WIG;
11606defm VPEXTRWZ : avx512_extract_elt_w<"vpextrw", v8i16x_info>, VEX_WIG;
11607defm VPEXTRDZ : avx512_extract_elt_dq<"vpextrd", v4i32x_info, GR32>;
11608defm VPEXTRQZ : avx512_extract_elt_dq<"vpextrq", v2i64x_info, GR64>, VEX_W;
11609
11610multiclass avx512_insert_elt_m<bits<8> opc, string OpcodeStr, SDNode OpNode,
11611                                            X86VectorVTInfo _, PatFrag LdFrag,
11612                                            SDPatternOperator immoperator> {
11613  def rm : AVX512Ii8<opc, MRMSrcMem, (outs _.RC:$dst),
11614      (ins _.RC:$src1,  _.ScalarMemOp:$src2, u8imm:$src3),
11615      OpcodeStr#"\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}",
11616      [(set _.RC:$dst,
11617          (_.VT (OpNode _.RC:$src1, (LdFrag addr:$src2), immoperator:$src3)))]>,
11618      EVEX_4V, EVEX_CD8<_.EltSize, CD8VT1>, Sched<[WriteVecInsert.Folded, WriteVecInsert.ReadAfterFold]>;
11619}
11620
11621multiclass avx512_insert_elt_bw<bits<8> opc, string OpcodeStr, SDNode OpNode,
11622                                            X86VectorVTInfo _, PatFrag LdFrag> {
11623  let Predicates = [HasBWI] in {
11624    def rr : AVX512Ii8<opc, MRMSrcReg, (outs _.RC:$dst),
11625        (ins _.RC:$src1, GR32orGR64:$src2, u8imm:$src3),
11626        OpcodeStr#"\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}",
11627        [(set _.RC:$dst,
11628            (OpNode _.RC:$src1, GR32orGR64:$src2, timm:$src3))]>, EVEX_4V,
11629        Sched<[WriteVecInsert]>;
11630
11631    defm NAME : avx512_insert_elt_m<opc, OpcodeStr, OpNode, _, LdFrag, timm>;
11632  }
11633}
11634
11635multiclass avx512_insert_elt_dq<bits<8> opc, string OpcodeStr,
11636                                         X86VectorVTInfo _, RegisterClass GRC> {
11637  let Predicates = [HasDQI] in {
11638    def rr : AVX512Ii8<opc, MRMSrcReg, (outs _.RC:$dst),
11639        (ins _.RC:$src1, GRC:$src2, u8imm:$src3),
11640        OpcodeStr#"\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}",
11641        [(set _.RC:$dst,
11642            (_.VT (insertelt _.RC:$src1, GRC:$src2, imm:$src3)))]>,
11643        EVEX_4V, TAPD, Sched<[WriteVecInsert]>;
11644
11645    defm NAME : avx512_insert_elt_m<opc, OpcodeStr, insertelt, _,
11646                                    _.ScalarLdFrag, imm>, TAPD;
11647  }
11648}
11649
11650defm VPINSRBZ : avx512_insert_elt_bw<0x20, "vpinsrb", X86pinsrb, v16i8x_info,
11651                                     extloadi8>, TAPD, VEX_WIG;
11652defm VPINSRWZ : avx512_insert_elt_bw<0xC4, "vpinsrw", X86pinsrw, v8i16x_info,
11653                                     extloadi16>, PD, VEX_WIG;
11654defm VPINSRDZ : avx512_insert_elt_dq<0x22, "vpinsrd", v4i32x_info, GR32>;
11655defm VPINSRQZ : avx512_insert_elt_dq<0x22, "vpinsrq", v2i64x_info, GR64>, VEX_W;
11656
11657// Always select FP16 instructions if available.
11658let Predicates = [HasBWI], AddedComplexity = -10 in {
11659  def : Pat<(f16 (load addr:$src)), (COPY_TO_REGCLASS (VPINSRWZrm (v8i16 (IMPLICIT_DEF)), addr:$src, 0), FR16X)>;
11660  def : Pat<(store f16:$src, addr:$dst), (VPEXTRWZmr addr:$dst, (v8i16 (COPY_TO_REGCLASS FR16:$src, VR128)), 0)>;
11661  def : Pat<(i16 (bitconvert f16:$src)), (EXTRACT_SUBREG (VPEXTRWZrr (v8i16 (COPY_TO_REGCLASS FR16X:$src, VR128X)), 0), sub_16bit)>;
11662  def : Pat<(f16 (bitconvert i16:$src)), (COPY_TO_REGCLASS (VPINSRWZrr (v8i16 (IMPLICIT_DEF)), (INSERT_SUBREG (IMPLICIT_DEF), GR16:$src, sub_16bit), 0), FR16X)>;
11663}
11664
11665//===----------------------------------------------------------------------===//
11666// VSHUFPS - VSHUFPD Operations
11667//===----------------------------------------------------------------------===//
11668
11669multiclass avx512_shufp<string OpcodeStr, AVX512VLVectorVTInfo VTInfo_FP>{
11670  defm NAME: avx512_common_3Op_imm8<OpcodeStr, VTInfo_FP, 0xC6, X86Shufp,
11671                                    SchedWriteFShuffle>,
11672                                    EVEX_CD8<VTInfo_FP.info512.EltSize, CD8VF>,
11673                                    AVX512AIi8Base, EVEX_4V;
11674}
11675
11676defm VSHUFPS: avx512_shufp<"vshufps", avx512vl_f32_info>, PS;
11677defm VSHUFPD: avx512_shufp<"vshufpd", avx512vl_f64_info>, PD, VEX_W;
11678
11679//===----------------------------------------------------------------------===//
11680// AVX-512 - Byte shift Left/Right
11681//===----------------------------------------------------------------------===//
11682
11683multiclass avx512_shift_packed<bits<8> opc, SDNode OpNode, Format MRMr,
11684                               Format MRMm, string OpcodeStr,
11685                               X86FoldableSchedWrite sched, X86VectorVTInfo _>{
11686  def ri : AVX512<opc, MRMr,
11687             (outs _.RC:$dst), (ins _.RC:$src1, u8imm:$src2),
11688             !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
11689             [(set _.RC:$dst,(_.VT (OpNode _.RC:$src1, (i8 timm:$src2))))]>,
11690             Sched<[sched]>;
11691  def mi : AVX512<opc, MRMm,
11692           (outs _.RC:$dst), (ins _.MemOp:$src1, u8imm:$src2),
11693           !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
11694           [(set _.RC:$dst,(_.VT (OpNode
11695                                 (_.VT (bitconvert (_.LdFrag addr:$src1))),
11696                                 (i8 timm:$src2))))]>,
11697           Sched<[sched.Folded, sched.ReadAfterFold]>;
11698}
11699
11700multiclass avx512_shift_packed_all<bits<8> opc, SDNode OpNode, Format MRMr,
11701                                   Format MRMm, string OpcodeStr,
11702                                   X86SchedWriteWidths sched, Predicate prd>{
11703  let Predicates = [prd] in
11704    defm Z : avx512_shift_packed<opc, OpNode, MRMr, MRMm, OpcodeStr,
11705                                 sched.ZMM, v64i8_info>, EVEX_V512;
11706  let Predicates = [prd, HasVLX] in {
11707    defm Z256 : avx512_shift_packed<opc, OpNode, MRMr, MRMm, OpcodeStr,
11708                                    sched.YMM, v32i8x_info>, EVEX_V256;
11709    defm Z128 : avx512_shift_packed<opc, OpNode, MRMr, MRMm, OpcodeStr,
11710                                    sched.XMM, v16i8x_info>, EVEX_V128;
11711  }
11712}
11713defm VPSLLDQ : avx512_shift_packed_all<0x73, X86vshldq, MRM7r, MRM7m, "vpslldq",
11714                                       SchedWriteShuffle, HasBWI>,
11715                                       AVX512PDIi8Base, EVEX_4V, VEX_WIG;
11716defm VPSRLDQ : avx512_shift_packed_all<0x73, X86vshrdq, MRM3r, MRM3m, "vpsrldq",
11717                                       SchedWriteShuffle, HasBWI>,
11718                                       AVX512PDIi8Base, EVEX_4V, VEX_WIG;
11719
11720multiclass avx512_psadbw_packed<bits<8> opc, SDNode OpNode,
11721                                string OpcodeStr, X86FoldableSchedWrite sched,
11722                                X86VectorVTInfo _dst, X86VectorVTInfo _src> {
11723  let isCommutable = 1 in
11724  def rr : AVX512BI<opc, MRMSrcReg,
11725             (outs _dst.RC:$dst), (ins _src.RC:$src1, _src.RC:$src2),
11726             !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
11727             [(set _dst.RC:$dst,(_dst.VT
11728                                (OpNode (_src.VT _src.RC:$src1),
11729                                        (_src.VT _src.RC:$src2))))]>,
11730             Sched<[sched]>;
11731  def rm : AVX512BI<opc, MRMSrcMem,
11732           (outs _dst.RC:$dst), (ins _src.RC:$src1, _src.MemOp:$src2),
11733           !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
11734           [(set _dst.RC:$dst,(_dst.VT
11735                              (OpNode (_src.VT _src.RC:$src1),
11736                              (_src.VT (bitconvert
11737                                        (_src.LdFrag addr:$src2))))))]>,
11738           Sched<[sched.Folded, sched.ReadAfterFold]>;
11739}
11740
11741multiclass avx512_psadbw_packed_all<bits<8> opc, SDNode OpNode,
11742                                    string OpcodeStr, X86SchedWriteWidths sched,
11743                                    Predicate prd> {
11744  let Predicates = [prd] in
11745    defm Z : avx512_psadbw_packed<opc, OpNode, OpcodeStr, sched.ZMM,
11746                                  v8i64_info, v64i8_info>, EVEX_V512;
11747  let Predicates = [prd, HasVLX] in {
11748    defm Z256 : avx512_psadbw_packed<opc, OpNode, OpcodeStr, sched.YMM,
11749                                     v4i64x_info, v32i8x_info>, EVEX_V256;
11750    defm Z128 : avx512_psadbw_packed<opc, OpNode, OpcodeStr, sched.XMM,
11751                                     v2i64x_info, v16i8x_info>, EVEX_V128;
11752  }
11753}
11754
11755defm VPSADBW : avx512_psadbw_packed_all<0xf6, X86psadbw, "vpsadbw",
11756                                        SchedWritePSADBW, HasBWI>, EVEX_4V, VEX_WIG;
11757
11758// Transforms to swizzle an immediate to enable better matching when
11759// memory operand isn't in the right place.
11760def VPTERNLOG321_imm8 : SDNodeXForm<timm, [{
11761  // Convert a VPTERNLOG immediate by swapping operand 0 and operand 2.
11762  uint8_t Imm = N->getZExtValue();
11763  // Swap bits 1/4 and 3/6.
11764  uint8_t NewImm = Imm & 0xa5;
11765  if (Imm & 0x02) NewImm |= 0x10;
11766  if (Imm & 0x10) NewImm |= 0x02;
11767  if (Imm & 0x08) NewImm |= 0x40;
11768  if (Imm & 0x40) NewImm |= 0x08;
11769  return getI8Imm(NewImm, SDLoc(N));
11770}]>;
11771def VPTERNLOG213_imm8 : SDNodeXForm<timm, [{
11772  // Convert a VPTERNLOG immediate by swapping operand 1 and operand 2.
11773  uint8_t Imm = N->getZExtValue();
11774  // Swap bits 2/4 and 3/5.
11775  uint8_t NewImm = Imm & 0xc3;
11776  if (Imm & 0x04) NewImm |= 0x10;
11777  if (Imm & 0x10) NewImm |= 0x04;
11778  if (Imm & 0x08) NewImm |= 0x20;
11779  if (Imm & 0x20) NewImm |= 0x08;
11780  return getI8Imm(NewImm, SDLoc(N));
11781}]>;
11782def VPTERNLOG132_imm8 : SDNodeXForm<timm, [{
11783  // Convert a VPTERNLOG immediate by swapping operand 1 and operand 2.
11784  uint8_t Imm = N->getZExtValue();
11785  // Swap bits 1/2 and 5/6.
11786  uint8_t NewImm = Imm & 0x99;
11787  if (Imm & 0x02) NewImm |= 0x04;
11788  if (Imm & 0x04) NewImm |= 0x02;
11789  if (Imm & 0x20) NewImm |= 0x40;
11790  if (Imm & 0x40) NewImm |= 0x20;
11791  return getI8Imm(NewImm, SDLoc(N));
11792}]>;
11793def VPTERNLOG231_imm8 : SDNodeXForm<timm, [{
11794  // Convert a VPTERNLOG immediate by moving operand 1 to the end.
11795  uint8_t Imm = N->getZExtValue();
11796  // Move bits 1->2, 2->4, 3->6, 4->1, 5->3, 6->5
11797  uint8_t NewImm = Imm & 0x81;
11798  if (Imm & 0x02) NewImm |= 0x04;
11799  if (Imm & 0x04) NewImm |= 0x10;
11800  if (Imm & 0x08) NewImm |= 0x40;
11801  if (Imm & 0x10) NewImm |= 0x02;
11802  if (Imm & 0x20) NewImm |= 0x08;
11803  if (Imm & 0x40) NewImm |= 0x20;
11804  return getI8Imm(NewImm, SDLoc(N));
11805}]>;
11806def VPTERNLOG312_imm8 : SDNodeXForm<timm, [{
11807  // Convert a VPTERNLOG immediate by moving operand 2 to the beginning.
11808  uint8_t Imm = N->getZExtValue();
11809  // Move bits 1->4, 2->1, 3->5, 4->2, 5->6, 6->3
11810  uint8_t NewImm = Imm & 0x81;
11811  if (Imm & 0x02) NewImm |= 0x10;
11812  if (Imm & 0x04) NewImm |= 0x02;
11813  if (Imm & 0x08) NewImm |= 0x20;
11814  if (Imm & 0x10) NewImm |= 0x04;
11815  if (Imm & 0x20) NewImm |= 0x40;
11816  if (Imm & 0x40) NewImm |= 0x08;
11817  return getI8Imm(NewImm, SDLoc(N));
11818}]>;
11819
11820multiclass avx512_ternlog<bits<8> opc, string OpcodeStr, SDNode OpNode,
11821                          X86FoldableSchedWrite sched, X86VectorVTInfo _,
11822                          string Name>{
11823  let Constraints = "$src1 = $dst", ExeDomain = _.ExeDomain in {
11824  defm rri : AVX512_maskable_3src<opc, MRMSrcReg, _, (outs _.RC:$dst),
11825                      (ins _.RC:$src2, _.RC:$src3, u8imm:$src4),
11826                      OpcodeStr, "$src4, $src3, $src2", "$src2, $src3, $src4",
11827                      (OpNode (_.VT _.RC:$src1),
11828                              (_.VT _.RC:$src2),
11829                              (_.VT _.RC:$src3),
11830                              (i8 timm:$src4)), 1, 1>,
11831                      AVX512AIi8Base, EVEX_4V, Sched<[sched]>;
11832  defm rmi : AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst),
11833                    (ins _.RC:$src2, _.MemOp:$src3, u8imm:$src4),
11834                    OpcodeStr, "$src4, $src3, $src2", "$src2, $src3, $src4",
11835                    (OpNode (_.VT _.RC:$src1),
11836                            (_.VT _.RC:$src2),
11837                            (_.VT (bitconvert (_.LdFrag addr:$src3))),
11838                            (i8 timm:$src4)), 1, 0>,
11839                    AVX512AIi8Base, EVEX_4V, EVEX_CD8<_.EltSize, CD8VF>,
11840                    Sched<[sched.Folded, sched.ReadAfterFold]>;
11841  defm rmbi : AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst),
11842                    (ins _.RC:$src2, _.ScalarMemOp:$src3, u8imm:$src4),
11843                    OpcodeStr, "$src4, ${src3}"#_.BroadcastStr#", $src2",
11844                    "$src2, ${src3}"#_.BroadcastStr#", $src4",
11845                    (OpNode (_.VT _.RC:$src1),
11846                            (_.VT _.RC:$src2),
11847                            (_.VT (_.BroadcastLdFrag addr:$src3)),
11848                            (i8 timm:$src4)), 1, 0>, EVEX_B,
11849                    AVX512AIi8Base, EVEX_4V, EVEX_CD8<_.EltSize, CD8VF>,
11850                    Sched<[sched.Folded, sched.ReadAfterFold]>;
11851  }// Constraints = "$src1 = $dst"
11852
11853  // Additional patterns for matching passthru operand in other positions.
11854  def : Pat<(_.VT (vselect_mask _.KRCWM:$mask,
11855                   (OpNode _.RC:$src3, _.RC:$src2, _.RC:$src1, (i8 timm:$src4)),
11856                   _.RC:$src1)),
11857            (!cast<Instruction>(Name#_.ZSuffix#rrik) _.RC:$src1, _.KRCWM:$mask,
11858             _.RC:$src2, _.RC:$src3, (VPTERNLOG321_imm8 timm:$src4))>;
11859  def : Pat<(_.VT (vselect_mask _.KRCWM:$mask,
11860                   (OpNode _.RC:$src2, _.RC:$src1, _.RC:$src3, (i8 timm:$src4)),
11861                   _.RC:$src1)),
11862            (!cast<Instruction>(Name#_.ZSuffix#rrik) _.RC:$src1, _.KRCWM:$mask,
11863             _.RC:$src2, _.RC:$src3, (VPTERNLOG213_imm8 timm:$src4))>;
11864
11865  // Additional patterns for matching zero masking with loads in other
11866  // positions.
11867  def : Pat<(_.VT (vselect_mask _.KRCWM:$mask,
11868                   (OpNode (bitconvert (_.LdFrag addr:$src3)),
11869                    _.RC:$src2, _.RC:$src1, (i8 timm:$src4)),
11870                   _.ImmAllZerosV)),
11871            (!cast<Instruction>(Name#_.ZSuffix#rmikz) _.RC:$src1, _.KRCWM:$mask,
11872             _.RC:$src2, addr:$src3, (VPTERNLOG321_imm8 timm:$src4))>;
11873  def : Pat<(_.VT (vselect_mask _.KRCWM:$mask,
11874                   (OpNode _.RC:$src1, (bitconvert (_.LdFrag addr:$src3)),
11875                    _.RC:$src2, (i8 timm:$src4)),
11876                   _.ImmAllZerosV)),
11877            (!cast<Instruction>(Name#_.ZSuffix#rmikz) _.RC:$src1, _.KRCWM:$mask,
11878             _.RC:$src2, addr:$src3, (VPTERNLOG132_imm8 timm:$src4))>;
11879
11880  // Additional patterns for matching masked loads with different
11881  // operand orders.
11882  def : Pat<(_.VT (vselect_mask _.KRCWM:$mask,
11883                   (OpNode _.RC:$src1, (bitconvert (_.LdFrag addr:$src3)),
11884                    _.RC:$src2, (i8 timm:$src4)),
11885                   _.RC:$src1)),
11886            (!cast<Instruction>(Name#_.ZSuffix#rmik) _.RC:$src1, _.KRCWM:$mask,
11887             _.RC:$src2, addr:$src3, (VPTERNLOG132_imm8 timm:$src4))>;
11888  def : Pat<(_.VT (vselect_mask _.KRCWM:$mask,
11889                   (OpNode (bitconvert (_.LdFrag addr:$src3)),
11890                    _.RC:$src2, _.RC:$src1, (i8 timm:$src4)),
11891                   _.RC:$src1)),
11892            (!cast<Instruction>(Name#_.ZSuffix#rmik) _.RC:$src1, _.KRCWM:$mask,
11893             _.RC:$src2, addr:$src3, (VPTERNLOG321_imm8 timm:$src4))>;
11894  def : Pat<(_.VT (vselect_mask _.KRCWM:$mask,
11895                   (OpNode _.RC:$src2, _.RC:$src1,
11896                    (bitconvert (_.LdFrag addr:$src3)), (i8 timm:$src4)),
11897                   _.RC:$src1)),
11898            (!cast<Instruction>(Name#_.ZSuffix#rmik) _.RC:$src1, _.KRCWM:$mask,
11899             _.RC:$src2, addr:$src3, (VPTERNLOG213_imm8 timm:$src4))>;
11900  def : Pat<(_.VT (vselect_mask _.KRCWM:$mask,
11901                   (OpNode _.RC:$src2, (bitconvert (_.LdFrag addr:$src3)),
11902                    _.RC:$src1, (i8 timm:$src4)),
11903                   _.RC:$src1)),
11904            (!cast<Instruction>(Name#_.ZSuffix#rmik) _.RC:$src1, _.KRCWM:$mask,
11905             _.RC:$src2, addr:$src3, (VPTERNLOG231_imm8 timm:$src4))>;
11906  def : Pat<(_.VT (vselect_mask _.KRCWM:$mask,
11907                   (OpNode (bitconvert (_.LdFrag addr:$src3)),
11908                    _.RC:$src1, _.RC:$src2, (i8 timm:$src4)),
11909                   _.RC:$src1)),
11910            (!cast<Instruction>(Name#_.ZSuffix#rmik) _.RC:$src1, _.KRCWM:$mask,
11911             _.RC:$src2, addr:$src3, (VPTERNLOG312_imm8 timm:$src4))>;
11912
11913  // Additional patterns for matching zero masking with broadcasts in other
11914  // positions.
11915  def : Pat<(_.VT (vselect_mask _.KRCWM:$mask,
11916                   (OpNode (_.BroadcastLdFrag addr:$src3),
11917                    _.RC:$src2, _.RC:$src1, (i8 timm:$src4)),
11918                   _.ImmAllZerosV)),
11919            (!cast<Instruction>(Name#_.ZSuffix#rmbikz) _.RC:$src1,
11920             _.KRCWM:$mask, _.RC:$src2, addr:$src3,
11921             (VPTERNLOG321_imm8 timm:$src4))>;
11922  def : Pat<(_.VT (vselect_mask _.KRCWM:$mask,
11923                   (OpNode _.RC:$src1,
11924                    (_.BroadcastLdFrag addr:$src3),
11925                    _.RC:$src2, (i8 timm:$src4)),
11926                   _.ImmAllZerosV)),
11927            (!cast<Instruction>(Name#_.ZSuffix#rmbikz) _.RC:$src1,
11928             _.KRCWM:$mask, _.RC:$src2, addr:$src3,
11929             (VPTERNLOG132_imm8 timm:$src4))>;
11930
11931  // Additional patterns for matching masked broadcasts with different
11932  // operand orders.
11933  def : Pat<(_.VT (vselect_mask _.KRCWM:$mask,
11934                   (OpNode _.RC:$src1, (_.BroadcastLdFrag addr:$src3),
11935                    _.RC:$src2, (i8 timm:$src4)),
11936                   _.RC:$src1)),
11937            (!cast<Instruction>(Name#_.ZSuffix#rmbik) _.RC:$src1, _.KRCWM:$mask,
11938             _.RC:$src2, addr:$src3, (VPTERNLOG132_imm8 timm:$src4))>;
11939  def : Pat<(_.VT (vselect_mask _.KRCWM:$mask,
11940                   (OpNode (_.BroadcastLdFrag addr:$src3),
11941                    _.RC:$src2, _.RC:$src1, (i8 timm:$src4)),
11942                   _.RC:$src1)),
11943            (!cast<Instruction>(Name#_.ZSuffix#rmbik) _.RC:$src1, _.KRCWM:$mask,
11944             _.RC:$src2, addr:$src3, (VPTERNLOG321_imm8 timm:$src4))>;
11945  def : Pat<(_.VT (vselect_mask _.KRCWM:$mask,
11946                   (OpNode _.RC:$src2, _.RC:$src1,
11947                    (_.BroadcastLdFrag addr:$src3),
11948                    (i8 timm:$src4)), _.RC:$src1)),
11949            (!cast<Instruction>(Name#_.ZSuffix#rmbik) _.RC:$src1, _.KRCWM:$mask,
11950             _.RC:$src2, addr:$src3, (VPTERNLOG213_imm8 timm:$src4))>;
11951  def : Pat<(_.VT (vselect_mask _.KRCWM:$mask,
11952                   (OpNode _.RC:$src2,
11953                    (_.BroadcastLdFrag addr:$src3),
11954                    _.RC:$src1, (i8 timm:$src4)),
11955                   _.RC:$src1)),
11956            (!cast<Instruction>(Name#_.ZSuffix#rmbik) _.RC:$src1, _.KRCWM:$mask,
11957             _.RC:$src2, addr:$src3, (VPTERNLOG231_imm8 timm:$src4))>;
11958  def : Pat<(_.VT (vselect_mask _.KRCWM:$mask,
11959                   (OpNode (_.BroadcastLdFrag addr:$src3),
11960                    _.RC:$src1, _.RC:$src2, (i8 timm:$src4)),
11961                   _.RC:$src1)),
11962            (!cast<Instruction>(Name#_.ZSuffix#rmbik) _.RC:$src1, _.KRCWM:$mask,
11963             _.RC:$src2, addr:$src3, (VPTERNLOG312_imm8 timm:$src4))>;
11964}
11965
11966multiclass avx512_common_ternlog<string OpcodeStr, X86SchedWriteWidths sched,
11967                                 AVX512VLVectorVTInfo _> {
11968  let Predicates = [HasAVX512] in
11969    defm Z    : avx512_ternlog<0x25, OpcodeStr, X86vpternlog, sched.ZMM,
11970                               _.info512, NAME>, EVEX_V512;
11971  let Predicates = [HasAVX512, HasVLX] in {
11972    defm Z128 : avx512_ternlog<0x25, OpcodeStr, X86vpternlog, sched.XMM,
11973                               _.info128, NAME>, EVEX_V128;
11974    defm Z256 : avx512_ternlog<0x25, OpcodeStr, X86vpternlog, sched.YMM,
11975                               _.info256, NAME>, EVEX_V256;
11976  }
11977}
11978
11979defm VPTERNLOGD : avx512_common_ternlog<"vpternlogd", SchedWriteVecALU,
11980                                        avx512vl_i32_info>;
11981defm VPTERNLOGQ : avx512_common_ternlog<"vpternlogq", SchedWriteVecALU,
11982                                        avx512vl_i64_info>, VEX_W;
11983
11984// Patterns to implement vnot using vpternlog instead of creating all ones
11985// using pcmpeq or vpternlog and then xoring with that. The value 15 is chosen
11986// so that the result is only dependent on src0. But we use the same source
11987// for all operands to prevent a false dependency.
11988// TODO: We should maybe have a more generalized algorithm for folding to
11989// vpternlog.
11990let Predicates = [HasAVX512] in {
11991  def : Pat<(v64i8 (vnot VR512:$src)),
11992            (VPTERNLOGQZrri VR512:$src, VR512:$src, VR512:$src, (i8 15))>;
11993  def : Pat<(v32i16 (vnot VR512:$src)),
11994            (VPTERNLOGQZrri VR512:$src, VR512:$src, VR512:$src, (i8 15))>;
11995  def : Pat<(v16i32 (vnot VR512:$src)),
11996            (VPTERNLOGQZrri VR512:$src, VR512:$src, VR512:$src, (i8 15))>;
11997  def : Pat<(v8i64 (vnot VR512:$src)),
11998            (VPTERNLOGQZrri VR512:$src, VR512:$src, VR512:$src, (i8 15))>;
11999}
12000
12001let Predicates = [HasAVX512, NoVLX] in {
12002  def : Pat<(v16i8 (vnot VR128X:$src)),
12003            (EXTRACT_SUBREG
12004             (VPTERNLOGQZrri
12005              (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
12006              (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
12007              (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
12008              (i8 15)), sub_xmm)>;
12009  def : Pat<(v8i16 (vnot VR128X:$src)),
12010            (EXTRACT_SUBREG
12011             (VPTERNLOGQZrri
12012              (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
12013              (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
12014              (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
12015              (i8 15)), sub_xmm)>;
12016  def : Pat<(v4i32 (vnot VR128X:$src)),
12017            (EXTRACT_SUBREG
12018             (VPTERNLOGQZrri
12019              (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
12020              (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
12021              (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
12022              (i8 15)), sub_xmm)>;
12023  def : Pat<(v2i64 (vnot VR128X:$src)),
12024            (EXTRACT_SUBREG
12025             (VPTERNLOGQZrri
12026              (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
12027              (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
12028              (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
12029              (i8 15)), sub_xmm)>;
12030
12031  def : Pat<(v32i8 (vnot VR256X:$src)),
12032            (EXTRACT_SUBREG
12033             (VPTERNLOGQZrri
12034              (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src, sub_ymm),
12035              (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src, sub_ymm),
12036              (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src, sub_ymm),
12037              (i8 15)), sub_ymm)>;
12038  def : Pat<(v16i16 (vnot VR256X:$src)),
12039            (EXTRACT_SUBREG
12040             (VPTERNLOGQZrri
12041              (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src, sub_ymm),
12042              (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src, sub_ymm),
12043              (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src, sub_ymm),
12044              (i8 15)), sub_ymm)>;
12045  def : Pat<(v8i32 (vnot VR256X:$src)),
12046            (EXTRACT_SUBREG
12047             (VPTERNLOGQZrri
12048              (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src, sub_ymm),
12049              (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src, sub_ymm),
12050              (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src, sub_ymm),
12051              (i8 15)), sub_ymm)>;
12052  def : Pat<(v4i64 (vnot VR256X:$src)),
12053            (EXTRACT_SUBREG
12054             (VPTERNLOGQZrri
12055              (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src, sub_ymm),
12056              (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src, sub_ymm),
12057              (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src, sub_ymm),
12058              (i8 15)), sub_ymm)>;
12059}
12060
12061let Predicates = [HasVLX] in {
12062  def : Pat<(v16i8 (vnot VR128X:$src)),
12063            (VPTERNLOGQZ128rri VR128X:$src, VR128X:$src, VR128X:$src, (i8 15))>;
12064  def : Pat<(v8i16 (vnot VR128X:$src)),
12065            (VPTERNLOGQZ128rri VR128X:$src, VR128X:$src, VR128X:$src, (i8 15))>;
12066  def : Pat<(v4i32 (vnot VR128X:$src)),
12067            (VPTERNLOGQZ128rri VR128X:$src, VR128X:$src, VR128X:$src, (i8 15))>;
12068  def : Pat<(v2i64 (vnot VR128X:$src)),
12069            (VPTERNLOGQZ128rri VR128X:$src, VR128X:$src, VR128X:$src, (i8 15))>;
12070
12071  def : Pat<(v32i8 (vnot VR256X:$src)),
12072            (VPTERNLOGQZ256rri VR256X:$src, VR256X:$src, VR256X:$src, (i8 15))>;
12073  def : Pat<(v16i16 (vnot VR256X:$src)),
12074            (VPTERNLOGQZ256rri VR256X:$src, VR256X:$src, VR256X:$src, (i8 15))>;
12075  def : Pat<(v8i32 (vnot VR256X:$src)),
12076            (VPTERNLOGQZ256rri VR256X:$src, VR256X:$src, VR256X:$src, (i8 15))>;
12077  def : Pat<(v4i64 (vnot VR256X:$src)),
12078            (VPTERNLOGQZ256rri VR256X:$src, VR256X:$src, VR256X:$src, (i8 15))>;
12079}
12080
12081//===----------------------------------------------------------------------===//
12082// AVX-512 - FixupImm
12083//===----------------------------------------------------------------------===//
12084
12085multiclass avx512_fixupimm_packed<bits<8> opc, string OpcodeStr,
12086                                  X86FoldableSchedWrite sched, X86VectorVTInfo _,
12087                                  X86VectorVTInfo TblVT>{
12088  let Constraints = "$src1 = $dst", ExeDomain = _.ExeDomain,
12089      Uses = [MXCSR], mayRaiseFPException = 1 in {
12090    defm rri : AVX512_maskable_3src<opc, MRMSrcReg, _, (outs _.RC:$dst),
12091                        (ins _.RC:$src2, _.RC:$src3, i32u8imm:$src4),
12092                         OpcodeStr#_.Suffix, "$src4, $src3, $src2", "$src2, $src3, $src4",
12093                        (X86VFixupimm (_.VT _.RC:$src1),
12094                                      (_.VT _.RC:$src2),
12095                                      (TblVT.VT _.RC:$src3),
12096                                      (i32 timm:$src4))>, Sched<[sched]>;
12097    defm rmi : AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst),
12098                      (ins _.RC:$src2, _.MemOp:$src3, i32u8imm:$src4),
12099                      OpcodeStr#_.Suffix, "$src4, $src3, $src2", "$src2, $src3, $src4",
12100                      (X86VFixupimm (_.VT _.RC:$src1),
12101                                    (_.VT _.RC:$src2),
12102                                    (TblVT.VT (bitconvert (TblVT.LdFrag addr:$src3))),
12103                                    (i32 timm:$src4))>,
12104                      Sched<[sched.Folded, sched.ReadAfterFold]>;
12105    defm rmbi : AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst),
12106                      (ins _.RC:$src2, _.ScalarMemOp:$src3, i32u8imm:$src4),
12107                    OpcodeStr#_.Suffix, "$src4, ${src3}"#_.BroadcastStr#", $src2",
12108                    "$src2, ${src3}"#_.BroadcastStr#", $src4",
12109                      (X86VFixupimm (_.VT _.RC:$src1),
12110                                    (_.VT _.RC:$src2),
12111                                    (TblVT.VT (TblVT.BroadcastLdFrag addr:$src3)),
12112                                    (i32 timm:$src4))>,
12113                    EVEX_B, Sched<[sched.Folded, sched.ReadAfterFold]>;
12114  } // Constraints = "$src1 = $dst"
12115}
12116
12117multiclass avx512_fixupimm_packed_sae<bits<8> opc, string OpcodeStr,
12118                                      X86FoldableSchedWrite sched,
12119                                      X86VectorVTInfo _, X86VectorVTInfo TblVT>
12120  : avx512_fixupimm_packed<opc, OpcodeStr, sched, _, TblVT> {
12121let Constraints = "$src1 = $dst", ExeDomain = _.ExeDomain, Uses = [MXCSR] in {
12122  defm rrib : AVX512_maskable_3src<opc, MRMSrcReg, _, (outs _.RC:$dst),
12123                      (ins _.RC:$src2, _.RC:$src3, i32u8imm:$src4),
12124                      OpcodeStr#_.Suffix, "$src4, {sae}, $src3, $src2",
12125                      "$src2, $src3, {sae}, $src4",
12126                      (X86VFixupimmSAE (_.VT _.RC:$src1),
12127                                       (_.VT _.RC:$src2),
12128                                       (TblVT.VT _.RC:$src3),
12129                                       (i32 timm:$src4))>,
12130                      EVEX_B, Sched<[sched]>;
12131  }
12132}
12133
12134multiclass avx512_fixupimm_scalar<bits<8> opc, string OpcodeStr,
12135                                  X86FoldableSchedWrite sched, X86VectorVTInfo _,
12136                                  X86VectorVTInfo _src3VT> {
12137  let Constraints = "$src1 = $dst" , Predicates = [HasAVX512],
12138      ExeDomain = _.ExeDomain in {
12139    defm rri : AVX512_maskable_3src_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
12140                      (ins _.RC:$src2, _.RC:$src3, i32u8imm:$src4),
12141                      OpcodeStr#_.Suffix, "$src4, $src3, $src2", "$src2, $src3, $src4",
12142                      (X86VFixupimms (_.VT _.RC:$src1),
12143                                     (_.VT _.RC:$src2),
12144                                     (_src3VT.VT _src3VT.RC:$src3),
12145                                     (i32 timm:$src4))>, Sched<[sched]>, SIMD_EXC;
12146    let Uses = [MXCSR] in
12147    defm rrib : AVX512_maskable_3src_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
12148                      (ins _.RC:$src2, _.RC:$src3, i32u8imm:$src4),
12149                      OpcodeStr#_.Suffix, "$src4, {sae}, $src3, $src2",
12150                      "$src2, $src3, {sae}, $src4",
12151                      (X86VFixupimmSAEs (_.VT _.RC:$src1),
12152                                        (_.VT _.RC:$src2),
12153                                        (_src3VT.VT _src3VT.RC:$src3),
12154                                        (i32 timm:$src4))>,
12155                      EVEX_B, Sched<[sched.Folded, sched.ReadAfterFold]>;
12156    defm rmi : AVX512_maskable_3src_scalar<opc, MRMSrcMem, _, (outs _.RC:$dst),
12157                     (ins _.RC:$src2, _.ScalarMemOp:$src3, i32u8imm:$src4),
12158                     OpcodeStr#_.Suffix, "$src4, $src3, $src2", "$src2, $src3, $src4",
12159                     (X86VFixupimms (_.VT _.RC:$src1),
12160                                    (_.VT _.RC:$src2),
12161                                    (_src3VT.VT (scalar_to_vector
12162                                              (_src3VT.ScalarLdFrag addr:$src3))),
12163                                    (i32 timm:$src4))>,
12164                     Sched<[sched.Folded, sched.ReadAfterFold]>, SIMD_EXC;
12165  }
12166}
12167
12168multiclass avx512_fixupimm_packed_all<X86SchedWriteWidths sched,
12169                                      AVX512VLVectorVTInfo _Vec,
12170                                      AVX512VLVectorVTInfo _Tbl> {
12171  let Predicates = [HasAVX512] in
12172    defm Z    : avx512_fixupimm_packed_sae<0x54, "vfixupimm", sched.ZMM,
12173                                _Vec.info512, _Tbl.info512>, AVX512AIi8Base,
12174                                EVEX_4V, EVEX_V512;
12175  let Predicates = [HasAVX512, HasVLX] in {
12176    defm Z128 : avx512_fixupimm_packed<0x54, "vfixupimm", sched.XMM,
12177                            _Vec.info128, _Tbl.info128>, AVX512AIi8Base,
12178                            EVEX_4V, EVEX_V128;
12179    defm Z256 : avx512_fixupimm_packed<0x54, "vfixupimm", sched.YMM,
12180                            _Vec.info256, _Tbl.info256>, AVX512AIi8Base,
12181                            EVEX_4V, EVEX_V256;
12182  }
12183}
12184
12185defm VFIXUPIMMSSZ : avx512_fixupimm_scalar<0x55, "vfixupimm",
12186                                           SchedWriteFAdd.Scl, f32x_info, v4i32x_info>,
12187                          AVX512AIi8Base, VEX_LIG, EVEX_4V, EVEX_CD8<32, CD8VT1>;
12188defm VFIXUPIMMSDZ : avx512_fixupimm_scalar<0x55, "vfixupimm",
12189                                           SchedWriteFAdd.Scl, f64x_info, v2i64x_info>,
12190                          AVX512AIi8Base, VEX_LIG, EVEX_4V, EVEX_CD8<64, CD8VT1>, VEX_W;
12191defm VFIXUPIMMPS : avx512_fixupimm_packed_all<SchedWriteFAdd, avx512vl_f32_info,
12192                         avx512vl_i32_info>, EVEX_CD8<32, CD8VF>;
12193defm VFIXUPIMMPD : avx512_fixupimm_packed_all<SchedWriteFAdd, avx512vl_f64_info,
12194                         avx512vl_i64_info>, EVEX_CD8<64, CD8VF>, VEX_W;
12195
12196// Patterns used to select SSE scalar fp arithmetic instructions from
12197// either:
12198//
12199// (1) a scalar fp operation followed by a blend
12200//
12201// The effect is that the backend no longer emits unnecessary vector
12202// insert instructions immediately after SSE scalar fp instructions
12203// like addss or mulss.
12204//
12205// For example, given the following code:
12206//   __m128 foo(__m128 A, __m128 B) {
12207//     A[0] += B[0];
12208//     return A;
12209//   }
12210//
12211// Previously we generated:
12212//   addss %xmm0, %xmm1
12213//   movss %xmm1, %xmm0
12214//
12215// We now generate:
12216//   addss %xmm1, %xmm0
12217//
12218// (2) a vector packed single/double fp operation followed by a vector insert
12219//
12220// The effect is that the backend converts the packed fp instruction
12221// followed by a vector insert into a single SSE scalar fp instruction.
12222//
12223// For example, given the following code:
12224//   __m128 foo(__m128 A, __m128 B) {
12225//     __m128 C = A + B;
12226//     return (__m128) {c[0], a[1], a[2], a[3]};
12227//   }
12228//
12229// Previously we generated:
12230//   addps %xmm0, %xmm1
12231//   movss %xmm1, %xmm0
12232//
12233// We now generate:
12234//   addss %xmm1, %xmm0
12235
12236// TODO: Some canonicalization in lowering would simplify the number of
12237// patterns we have to try to match.
12238multiclass AVX512_scalar_math_fp_patterns<SDPatternOperator Op, SDNode MaskedOp,
12239                                          string OpcPrefix, SDNode MoveNode,
12240                                          X86VectorVTInfo _, PatLeaf ZeroFP> {
12241  let Predicates = [HasAVX512] in {
12242    // extracted scalar math op with insert via movss
12243    def : Pat<(MoveNode
12244               (_.VT VR128X:$dst),
12245               (_.VT (scalar_to_vector
12246                      (Op (_.EltVT (extractelt (_.VT VR128X:$dst), (iPTR 0))),
12247                          _.FRC:$src)))),
12248              (!cast<Instruction>("V"#OpcPrefix#"Zrr_Int") _.VT:$dst,
12249               (_.VT (COPY_TO_REGCLASS _.FRC:$src, VR128X)))>;
12250    def : Pat<(MoveNode
12251               (_.VT VR128X:$dst),
12252               (_.VT (scalar_to_vector
12253                      (Op (_.EltVT (extractelt (_.VT VR128X:$dst), (iPTR 0))),
12254                          (_.ScalarLdFrag addr:$src))))),
12255              (!cast<Instruction>("V"#OpcPrefix#"Zrm_Int") _.VT:$dst, addr:$src)>;
12256
12257    // extracted masked scalar math op with insert via movss
12258    def : Pat<(MoveNode (_.VT VR128X:$src1),
12259               (scalar_to_vector
12260                (X86selects_mask VK1WM:$mask,
12261                            (MaskedOp (_.EltVT
12262                                       (extractelt (_.VT VR128X:$src1), (iPTR 0))),
12263                                      _.FRC:$src2),
12264                            _.FRC:$src0))),
12265              (!cast<Instruction>("V"#OpcPrefix#"Zrr_Intk")
12266               (_.VT (COPY_TO_REGCLASS _.FRC:$src0, VR128X)),
12267               VK1WM:$mask, _.VT:$src1,
12268               (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)))>;
12269    def : Pat<(MoveNode (_.VT VR128X:$src1),
12270               (scalar_to_vector
12271                (X86selects_mask VK1WM:$mask,
12272                            (MaskedOp (_.EltVT
12273                                       (extractelt (_.VT VR128X:$src1), (iPTR 0))),
12274                                      (_.ScalarLdFrag addr:$src2)),
12275                            _.FRC:$src0))),
12276              (!cast<Instruction>("V"#OpcPrefix#"Zrm_Intk")
12277               (_.VT (COPY_TO_REGCLASS _.FRC:$src0, VR128X)),
12278               VK1WM:$mask, _.VT:$src1, addr:$src2)>;
12279
12280    // extracted masked scalar math op with insert via movss
12281    def : Pat<(MoveNode (_.VT VR128X:$src1),
12282               (scalar_to_vector
12283                (X86selects_mask VK1WM:$mask,
12284                            (MaskedOp (_.EltVT
12285                                       (extractelt (_.VT VR128X:$src1), (iPTR 0))),
12286                                      _.FRC:$src2), (_.EltVT ZeroFP)))),
12287      (!cast<I>("V"#OpcPrefix#"Zrr_Intkz")
12288          VK1WM:$mask, _.VT:$src1,
12289          (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)))>;
12290    def : Pat<(MoveNode (_.VT VR128X:$src1),
12291               (scalar_to_vector
12292                (X86selects_mask VK1WM:$mask,
12293                            (MaskedOp (_.EltVT
12294                                       (extractelt (_.VT VR128X:$src1), (iPTR 0))),
12295                                      (_.ScalarLdFrag addr:$src2)), (_.EltVT ZeroFP)))),
12296      (!cast<I>("V"#OpcPrefix#"Zrm_Intkz") VK1WM:$mask, _.VT:$src1, addr:$src2)>;
12297  }
12298}
12299
12300defm : AVX512_scalar_math_fp_patterns<any_fadd, fadd, "ADDSS", X86Movss, v4f32x_info, fp32imm0>;
12301defm : AVX512_scalar_math_fp_patterns<any_fsub, fsub, "SUBSS", X86Movss, v4f32x_info, fp32imm0>;
12302defm : AVX512_scalar_math_fp_patterns<any_fmul, fmul, "MULSS", X86Movss, v4f32x_info, fp32imm0>;
12303defm : AVX512_scalar_math_fp_patterns<any_fdiv, fdiv, "DIVSS", X86Movss, v4f32x_info, fp32imm0>;
12304
12305defm : AVX512_scalar_math_fp_patterns<any_fadd, fadd, "ADDSD", X86Movsd, v2f64x_info, fp64imm0>;
12306defm : AVX512_scalar_math_fp_patterns<any_fsub, fsub, "SUBSD", X86Movsd, v2f64x_info, fp64imm0>;
12307defm : AVX512_scalar_math_fp_patterns<any_fmul, fmul, "MULSD", X86Movsd, v2f64x_info, fp64imm0>;
12308defm : AVX512_scalar_math_fp_patterns<any_fdiv, fdiv, "DIVSD", X86Movsd, v2f64x_info, fp64imm0>;
12309
12310defm : AVX512_scalar_math_fp_patterns<any_fadd, fadd, "ADDSH", X86Movsh, v8f16x_info, fp16imm0>;
12311defm : AVX512_scalar_math_fp_patterns<any_fsub, fsub, "SUBSH", X86Movsh, v8f16x_info, fp16imm0>;
12312defm : AVX512_scalar_math_fp_patterns<any_fmul, fmul, "MULSH", X86Movsh, v8f16x_info, fp16imm0>;
12313defm : AVX512_scalar_math_fp_patterns<any_fdiv, fdiv, "DIVSH", X86Movsh, v8f16x_info, fp16imm0>;
12314
12315multiclass AVX512_scalar_unary_math_patterns<SDPatternOperator OpNode, string OpcPrefix,
12316                                             SDNode Move, X86VectorVTInfo _> {
12317  let Predicates = [HasAVX512] in {
12318    def : Pat<(_.VT (Move _.VT:$dst,
12319                     (scalar_to_vector (OpNode (extractelt _.VT:$src, 0))))),
12320              (!cast<Instruction>("V"#OpcPrefix#"Zr_Int") _.VT:$dst, _.VT:$src)>;
12321  }
12322}
12323
12324defm : AVX512_scalar_unary_math_patterns<any_fsqrt, "SQRTSS", X86Movss, v4f32x_info>;
12325defm : AVX512_scalar_unary_math_patterns<any_fsqrt, "SQRTSD", X86Movsd, v2f64x_info>;
12326defm : AVX512_scalar_unary_math_patterns<any_fsqrt, "SQRTSH", X86Movsh, v8f16x_info>;
12327
12328//===----------------------------------------------------------------------===//
12329// AES instructions
12330//===----------------------------------------------------------------------===//
12331
12332multiclass avx512_vaes<bits<8> Op, string OpStr, string IntPrefix> {
12333  let Predicates = [HasVLX, HasVAES] in {
12334    defm Z128 : AESI_binop_rm_int<Op, OpStr,
12335                                  !cast<Intrinsic>(IntPrefix),
12336                                  loadv2i64, 0, VR128X, i128mem>,
12337                  EVEX_4V, EVEX_CD8<64, CD8VF>, EVEX_V128, VEX_WIG;
12338    defm Z256 : AESI_binop_rm_int<Op, OpStr,
12339                                  !cast<Intrinsic>(IntPrefix#"_256"),
12340                                  loadv4i64, 0, VR256X, i256mem>,
12341                  EVEX_4V, EVEX_CD8<64, CD8VF>, EVEX_V256, VEX_WIG;
12342    }
12343    let Predicates = [HasAVX512, HasVAES] in
12344    defm Z    : AESI_binop_rm_int<Op, OpStr,
12345                                  !cast<Intrinsic>(IntPrefix#"_512"),
12346                                  loadv8i64, 0, VR512, i512mem>,
12347                  EVEX_4V, EVEX_CD8<64, CD8VF>, EVEX_V512, VEX_WIG;
12348}
12349
12350defm VAESENC      : avx512_vaes<0xDC, "vaesenc", "int_x86_aesni_aesenc">;
12351defm VAESENCLAST  : avx512_vaes<0xDD, "vaesenclast", "int_x86_aesni_aesenclast">;
12352defm VAESDEC      : avx512_vaes<0xDE, "vaesdec", "int_x86_aesni_aesdec">;
12353defm VAESDECLAST  : avx512_vaes<0xDF, "vaesdeclast", "int_x86_aesni_aesdeclast">;
12354
12355//===----------------------------------------------------------------------===//
12356// PCLMUL instructions - Carry less multiplication
12357//===----------------------------------------------------------------------===//
12358
12359let Predicates = [HasAVX512, HasVPCLMULQDQ] in
12360defm VPCLMULQDQZ : vpclmulqdq<VR512, i512mem, loadv8i64, int_x86_pclmulqdq_512>,
12361                              EVEX_4V, EVEX_V512, EVEX_CD8<64, CD8VF>, VEX_WIG;
12362
12363let Predicates = [HasVLX, HasVPCLMULQDQ] in {
12364defm VPCLMULQDQZ128 : vpclmulqdq<VR128X, i128mem, loadv2i64, int_x86_pclmulqdq>,
12365                              EVEX_4V, EVEX_V128, EVEX_CD8<64, CD8VF>, VEX_WIG;
12366
12367defm VPCLMULQDQZ256: vpclmulqdq<VR256X, i256mem, loadv4i64,
12368                                int_x86_pclmulqdq_256>, EVEX_4V, EVEX_V256,
12369                                EVEX_CD8<64, CD8VF>, VEX_WIG;
12370}
12371
12372// Aliases
12373defm : vpclmulqdq_aliases<"VPCLMULQDQZ", VR512, i512mem>;
12374defm : vpclmulqdq_aliases<"VPCLMULQDQZ128", VR128X, i128mem>;
12375defm : vpclmulqdq_aliases<"VPCLMULQDQZ256", VR256X, i256mem>;
12376
12377//===----------------------------------------------------------------------===//
12378// VBMI2
12379//===----------------------------------------------------------------------===//
12380
12381multiclass VBMI2_shift_var_rm<bits<8> Op, string OpStr, SDNode OpNode,
12382                              X86FoldableSchedWrite sched, X86VectorVTInfo VTI> {
12383  let Constraints = "$src1 = $dst",
12384      ExeDomain   = VTI.ExeDomain in {
12385    defm r:   AVX512_maskable_3src<Op, MRMSrcReg, VTI, (outs VTI.RC:$dst),
12386                (ins VTI.RC:$src2, VTI.RC:$src3), OpStr,
12387                "$src3, $src2", "$src2, $src3",
12388                (VTI.VT (OpNode VTI.RC:$src1, VTI.RC:$src2, VTI.RC:$src3))>,
12389                T8PD, EVEX_4V, Sched<[sched]>;
12390    defm m:   AVX512_maskable_3src<Op, MRMSrcMem, VTI, (outs VTI.RC:$dst),
12391                (ins VTI.RC:$src2, VTI.MemOp:$src3), OpStr,
12392                "$src3, $src2", "$src2, $src3",
12393                (VTI.VT (OpNode VTI.RC:$src1, VTI.RC:$src2,
12394                        (VTI.VT (VTI.LdFrag addr:$src3))))>,
12395                T8PD, EVEX_4V,
12396                Sched<[sched.Folded, sched.ReadAfterFold]>;
12397  }
12398}
12399
12400multiclass VBMI2_shift_var_rmb<bits<8> Op, string OpStr, SDNode OpNode,
12401                               X86FoldableSchedWrite sched, X86VectorVTInfo VTI>
12402         : VBMI2_shift_var_rm<Op, OpStr, OpNode, sched, VTI> {
12403  let Constraints = "$src1 = $dst",
12404      ExeDomain   = VTI.ExeDomain in
12405  defm mb:  AVX512_maskable_3src<Op, MRMSrcMem, VTI, (outs VTI.RC:$dst),
12406              (ins VTI.RC:$src2, VTI.ScalarMemOp:$src3), OpStr,
12407              "${src3}"#VTI.BroadcastStr#", $src2",
12408              "$src2, ${src3}"#VTI.BroadcastStr,
12409              (OpNode VTI.RC:$src1, VTI.RC:$src2,
12410               (VTI.VT (VTI.BroadcastLdFrag addr:$src3)))>,
12411              T8PD, EVEX_4V, EVEX_B,
12412              Sched<[sched.Folded, sched.ReadAfterFold]>;
12413}
12414
12415multiclass VBMI2_shift_var_rm_common<bits<8> Op, string OpStr, SDNode OpNode,
12416                                     X86SchedWriteWidths sched, AVX512VLVectorVTInfo VTI> {
12417  let Predicates = [HasVBMI2] in
12418  defm Z      : VBMI2_shift_var_rm<Op, OpStr, OpNode, sched.ZMM, VTI.info512>,
12419                                   EVEX_V512;
12420  let Predicates = [HasVBMI2, HasVLX] in {
12421    defm Z256 : VBMI2_shift_var_rm<Op, OpStr, OpNode, sched.YMM, VTI.info256>,
12422                                   EVEX_V256;
12423    defm Z128 : VBMI2_shift_var_rm<Op, OpStr, OpNode, sched.XMM, VTI.info128>,
12424                                   EVEX_V128;
12425  }
12426}
12427
12428multiclass VBMI2_shift_var_rmb_common<bits<8> Op, string OpStr, SDNode OpNode,
12429                                      X86SchedWriteWidths sched, AVX512VLVectorVTInfo VTI> {
12430  let Predicates = [HasVBMI2] in
12431  defm Z      : VBMI2_shift_var_rmb<Op, OpStr, OpNode, sched.ZMM, VTI.info512>,
12432                                    EVEX_V512;
12433  let Predicates = [HasVBMI2, HasVLX] in {
12434    defm Z256 : VBMI2_shift_var_rmb<Op, OpStr, OpNode, sched.YMM, VTI.info256>,
12435                                    EVEX_V256;
12436    defm Z128 : VBMI2_shift_var_rmb<Op, OpStr, OpNode, sched.XMM, VTI.info128>,
12437                                    EVEX_V128;
12438  }
12439}
12440multiclass VBMI2_shift_var<bits<8> wOp, bits<8> dqOp, string Prefix,
12441                           SDNode OpNode, X86SchedWriteWidths sched> {
12442  defm W : VBMI2_shift_var_rm_common<wOp, Prefix#"w", OpNode, sched,
12443             avx512vl_i16_info>, VEX_W, EVEX_CD8<16, CD8VF>;
12444  defm D : VBMI2_shift_var_rmb_common<dqOp, Prefix#"d", OpNode, sched,
12445             avx512vl_i32_info>, EVEX_CD8<32, CD8VF>;
12446  defm Q : VBMI2_shift_var_rmb_common<dqOp, Prefix#"q", OpNode, sched,
12447             avx512vl_i64_info>, VEX_W, EVEX_CD8<64, CD8VF>;
12448}
12449
12450multiclass VBMI2_shift_imm<bits<8> wOp, bits<8> dqOp, string Prefix,
12451                           SDNode OpNode, X86SchedWriteWidths sched> {
12452  defm W : avx512_common_3Op_rm_imm8<wOp, OpNode, Prefix#"w", sched,
12453             avx512vl_i16_info, avx512vl_i16_info, HasVBMI2>,
12454             VEX_W, EVEX_CD8<16, CD8VF>;
12455  defm D : avx512_common_3Op_imm8<Prefix#"d", avx512vl_i32_info, dqOp,
12456             OpNode, sched, HasVBMI2>, AVX512AIi8Base, EVEX_4V, EVEX_CD8<32, CD8VF>;
12457  defm Q : avx512_common_3Op_imm8<Prefix#"q", avx512vl_i64_info, dqOp, OpNode,
12458             sched, HasVBMI2>, AVX512AIi8Base, EVEX_4V, EVEX_CD8<64, CD8VF>, VEX_W;
12459}
12460
12461// Concat & Shift
12462defm VPSHLDV : VBMI2_shift_var<0x70, 0x71, "vpshldv", X86VShldv, SchedWriteVecIMul>;
12463defm VPSHRDV : VBMI2_shift_var<0x72, 0x73, "vpshrdv", X86VShrdv, SchedWriteVecIMul>;
12464defm VPSHLD  : VBMI2_shift_imm<0x70, 0x71, "vpshld", X86VShld, SchedWriteVecIMul>;
12465defm VPSHRD  : VBMI2_shift_imm<0x72, 0x73, "vpshrd", X86VShrd, SchedWriteVecIMul>;
12466
12467// Compress
12468defm VPCOMPRESSB : compress_by_elt_width<0x63, "vpcompressb", WriteVarShuffle256,
12469                                         avx512vl_i8_info, HasVBMI2>, EVEX,
12470                                         NotMemoryFoldable;
12471defm VPCOMPRESSW : compress_by_elt_width <0x63, "vpcompressw", WriteVarShuffle256,
12472                                          avx512vl_i16_info, HasVBMI2>, EVEX, VEX_W,
12473                                          NotMemoryFoldable;
12474// Expand
12475defm VPEXPANDB : expand_by_elt_width <0x62, "vpexpandb", WriteVarShuffle256,
12476                                      avx512vl_i8_info, HasVBMI2>, EVEX;
12477defm VPEXPANDW : expand_by_elt_width <0x62, "vpexpandw", WriteVarShuffle256,
12478                                      avx512vl_i16_info, HasVBMI2>, EVEX, VEX_W;
12479
12480//===----------------------------------------------------------------------===//
12481// VNNI
12482//===----------------------------------------------------------------------===//
12483
12484let Constraints = "$src1 = $dst" in
12485multiclass VNNI_rmb<bits<8> Op, string OpStr, SDNode OpNode,
12486                    X86FoldableSchedWrite sched, X86VectorVTInfo VTI,
12487                    bit IsCommutable> {
12488  let ExeDomain = VTI.ExeDomain in {
12489  defm r  :   AVX512_maskable_3src<Op, MRMSrcReg, VTI, (outs VTI.RC:$dst),
12490                                   (ins VTI.RC:$src2, VTI.RC:$src3), OpStr,
12491                                   "$src3, $src2", "$src2, $src3",
12492                                   (VTI.VT (OpNode VTI.RC:$src1,
12493                                            VTI.RC:$src2, VTI.RC:$src3)),
12494                                   IsCommutable, IsCommutable>,
12495                                   EVEX_4V, T8PD, Sched<[sched]>;
12496  defm m  :   AVX512_maskable_3src<Op, MRMSrcMem, VTI, (outs VTI.RC:$dst),
12497                                   (ins VTI.RC:$src2, VTI.MemOp:$src3), OpStr,
12498                                   "$src3, $src2", "$src2, $src3",
12499                                   (VTI.VT (OpNode VTI.RC:$src1, VTI.RC:$src2,
12500                                            (VTI.VT (VTI.LdFrag addr:$src3))))>,
12501                                   EVEX_4V, EVEX_CD8<32, CD8VF>, T8PD,
12502                                   Sched<[sched.Folded, sched.ReadAfterFold]>;
12503  defm mb :   AVX512_maskable_3src<Op, MRMSrcMem, VTI, (outs VTI.RC:$dst),
12504                                   (ins VTI.RC:$src2, VTI.ScalarMemOp:$src3),
12505                                   OpStr, "${src3}"#VTI.BroadcastStr#", $src2",
12506                                   "$src2, ${src3}"#VTI.BroadcastStr,
12507                                   (OpNode VTI.RC:$src1, VTI.RC:$src2,
12508                                    (VTI.VT (VTI.BroadcastLdFrag addr:$src3)))>,
12509                                   EVEX_4V, EVEX_CD8<32, CD8VF>, EVEX_B,
12510                                   T8PD, Sched<[sched.Folded, sched.ReadAfterFold]>;
12511  }
12512}
12513
12514multiclass VNNI_common<bits<8> Op, string OpStr, SDNode OpNode,
12515                       X86SchedWriteWidths sched, bit IsCommutable> {
12516  let Predicates = [HasVNNI] in
12517  defm Z      :   VNNI_rmb<Op, OpStr, OpNode, sched.ZMM, v16i32_info,
12518                           IsCommutable>, EVEX_V512;
12519  let Predicates = [HasVNNI, HasVLX] in {
12520    defm Z256 :   VNNI_rmb<Op, OpStr, OpNode, sched.YMM, v8i32x_info,
12521                           IsCommutable>, EVEX_V256;
12522    defm Z128 :   VNNI_rmb<Op, OpStr, OpNode, sched.XMM, v4i32x_info,
12523                           IsCommutable>, EVEX_V128;
12524  }
12525}
12526
12527// FIXME: Is there a better scheduler class for VPDP?
12528defm VPDPBUSD   : VNNI_common<0x50, "vpdpbusd", X86Vpdpbusd, SchedWriteVecIMul, 0>;
12529defm VPDPBUSDS  : VNNI_common<0x51, "vpdpbusds", X86Vpdpbusds, SchedWriteVecIMul, 0>;
12530defm VPDPWSSD   : VNNI_common<0x52, "vpdpwssd", X86Vpdpwssd, SchedWriteVecIMul, 1>;
12531defm VPDPWSSDS  : VNNI_common<0x53, "vpdpwssds", X86Vpdpwssds, SchedWriteVecIMul, 1>;
12532
12533// Patterns to match VPDPWSSD from existing instructions/intrinsics.
12534let Predicates = [HasVNNI] in {
12535  def : Pat<(v16i32 (add VR512:$src1,
12536                         (X86vpmaddwd_su VR512:$src2, VR512:$src3))),
12537            (VPDPWSSDZr VR512:$src1, VR512:$src2, VR512:$src3)>;
12538  def : Pat<(v16i32 (add VR512:$src1,
12539                         (X86vpmaddwd_su VR512:$src2, (load addr:$src3)))),
12540            (VPDPWSSDZm VR512:$src1, VR512:$src2, addr:$src3)>;
12541}
12542let Predicates = [HasVNNI,HasVLX] in {
12543  def : Pat<(v8i32 (add VR256X:$src1,
12544                        (X86vpmaddwd_su VR256X:$src2, VR256X:$src3))),
12545            (VPDPWSSDZ256r VR256X:$src1, VR256X:$src2, VR256X:$src3)>;
12546  def : Pat<(v8i32 (add VR256X:$src1,
12547                        (X86vpmaddwd_su VR256X:$src2, (load addr:$src3)))),
12548            (VPDPWSSDZ256m VR256X:$src1, VR256X:$src2, addr:$src3)>;
12549  def : Pat<(v4i32 (add VR128X:$src1,
12550                        (X86vpmaddwd_su VR128X:$src2, VR128X:$src3))),
12551            (VPDPWSSDZ128r VR128X:$src1, VR128X:$src2, VR128X:$src3)>;
12552  def : Pat<(v4i32 (add VR128X:$src1,
12553                        (X86vpmaddwd_su VR128X:$src2, (load addr:$src3)))),
12554            (VPDPWSSDZ128m VR128X:$src1, VR128X:$src2, addr:$src3)>;
12555}
12556
12557//===----------------------------------------------------------------------===//
12558// Bit Algorithms
12559//===----------------------------------------------------------------------===//
12560
12561// FIXME: Is there a better scheduler class for VPOPCNTB/VPOPCNTW?
12562defm VPOPCNTB : avx512_unary_rm_vl<0x54, "vpopcntb", ctpop, SchedWriteVecALU,
12563                                   avx512vl_i8_info, HasBITALG>;
12564defm VPOPCNTW : avx512_unary_rm_vl<0x54, "vpopcntw", ctpop, SchedWriteVecALU,
12565                                   avx512vl_i16_info, HasBITALG>, VEX_W;
12566
12567defm : avx512_unary_lowering<"VPOPCNTB", ctpop, avx512vl_i8_info, HasBITALG>;
12568defm : avx512_unary_lowering<"VPOPCNTW", ctpop, avx512vl_i16_info, HasBITALG>;
12569
12570def X86Vpshufbitqmb_su : PatFrag<(ops node:$src1, node:$src2),
12571                                 (X86Vpshufbitqmb node:$src1, node:$src2), [{
12572  return N->hasOneUse();
12573}]>;
12574
12575multiclass VPSHUFBITQMB_rm<X86FoldableSchedWrite sched, X86VectorVTInfo VTI> {
12576  defm rr : AVX512_maskable_cmp<0x8F, MRMSrcReg, VTI, (outs VTI.KRC:$dst),
12577                                (ins VTI.RC:$src1, VTI.RC:$src2),
12578                                "vpshufbitqmb",
12579                                "$src2, $src1", "$src1, $src2",
12580                                (X86Vpshufbitqmb (VTI.VT VTI.RC:$src1),
12581                                (VTI.VT VTI.RC:$src2)),
12582                                (X86Vpshufbitqmb_su (VTI.VT VTI.RC:$src1),
12583                                (VTI.VT VTI.RC:$src2))>, EVEX_4V, T8PD,
12584                                Sched<[sched]>;
12585  defm rm : AVX512_maskable_cmp<0x8F, MRMSrcMem, VTI, (outs VTI.KRC:$dst),
12586                                (ins VTI.RC:$src1, VTI.MemOp:$src2),
12587                                "vpshufbitqmb",
12588                                "$src2, $src1", "$src1, $src2",
12589                                (X86Vpshufbitqmb (VTI.VT VTI.RC:$src1),
12590                                (VTI.VT (VTI.LdFrag addr:$src2))),
12591                                (X86Vpshufbitqmb_su (VTI.VT VTI.RC:$src1),
12592                                (VTI.VT (VTI.LdFrag addr:$src2)))>,
12593                                EVEX_4V, EVEX_CD8<8, CD8VF>, T8PD,
12594                                Sched<[sched.Folded, sched.ReadAfterFold]>;
12595}
12596
12597multiclass VPSHUFBITQMB_common<X86SchedWriteWidths sched, AVX512VLVectorVTInfo VTI> {
12598  let Predicates = [HasBITALG] in
12599  defm Z      : VPSHUFBITQMB_rm<sched.ZMM, VTI.info512>, EVEX_V512;
12600  let Predicates = [HasBITALG, HasVLX] in {
12601    defm Z256 : VPSHUFBITQMB_rm<sched.YMM, VTI.info256>, EVEX_V256;
12602    defm Z128 : VPSHUFBITQMB_rm<sched.XMM, VTI.info128>, EVEX_V128;
12603  }
12604}
12605
12606// FIXME: Is there a better scheduler class for VPSHUFBITQMB?
12607defm VPSHUFBITQMB : VPSHUFBITQMB_common<SchedWriteVecIMul, avx512vl_i8_info>;
12608
12609//===----------------------------------------------------------------------===//
12610// GFNI
12611//===----------------------------------------------------------------------===//
12612
12613multiclass GF2P8MULB_avx512_common<bits<8> Op, string OpStr, SDNode OpNode,
12614                                   X86SchedWriteWidths sched> {
12615  let Predicates = [HasGFNI, HasAVX512, HasBWI] in
12616  defm Z      : avx512_binop_rm<Op, OpStr, OpNode, v64i8_info, sched.ZMM, 1>,
12617                                EVEX_V512;
12618  let Predicates = [HasGFNI, HasVLX, HasBWI] in {
12619    defm Z256 : avx512_binop_rm<Op, OpStr, OpNode, v32i8x_info, sched.YMM, 1>,
12620                                EVEX_V256;
12621    defm Z128 : avx512_binop_rm<Op, OpStr, OpNode, v16i8x_info, sched.XMM, 1>,
12622                                EVEX_V128;
12623  }
12624}
12625
12626defm VGF2P8MULB : GF2P8MULB_avx512_common<0xCF, "vgf2p8mulb", X86GF2P8mulb,
12627                                          SchedWriteVecALU>,
12628                                          EVEX_CD8<8, CD8VF>, T8PD;
12629
12630multiclass GF2P8AFFINE_avx512_rmb_imm<bits<8> Op, string OpStr, SDNode OpNode,
12631                                      X86FoldableSchedWrite sched, X86VectorVTInfo VTI,
12632                                      X86VectorVTInfo BcstVTI>
12633           : avx512_3Op_rm_imm8<Op, OpStr, OpNode, sched, VTI, VTI> {
12634  let ExeDomain = VTI.ExeDomain in
12635  defm rmbi : AVX512_maskable<Op, MRMSrcMem, VTI, (outs VTI.RC:$dst),
12636                (ins VTI.RC:$src1, VTI.ScalarMemOp:$src2, u8imm:$src3),
12637                OpStr, "$src3, ${src2}"#BcstVTI.BroadcastStr#", $src1",
12638                "$src1, ${src2}"#BcstVTI.BroadcastStr#", $src3",
12639                (OpNode (VTI.VT VTI.RC:$src1),
12640                 (bitconvert (BcstVTI.VT (X86VBroadcastld64 addr:$src2))),
12641                 (i8 timm:$src3))>, EVEX_B,
12642                 Sched<[sched.Folded, sched.ReadAfterFold]>;
12643}
12644
12645multiclass GF2P8AFFINE_avx512_common<bits<8> Op, string OpStr, SDNode OpNode,
12646                                     X86SchedWriteWidths sched> {
12647  let Predicates = [HasGFNI, HasAVX512, HasBWI] in
12648  defm Z      : GF2P8AFFINE_avx512_rmb_imm<Op, OpStr, OpNode, sched.ZMM,
12649                                           v64i8_info, v8i64_info>, EVEX_V512;
12650  let Predicates = [HasGFNI, HasVLX, HasBWI] in {
12651    defm Z256 : GF2P8AFFINE_avx512_rmb_imm<Op, OpStr, OpNode, sched.YMM,
12652                                           v32i8x_info, v4i64x_info>, EVEX_V256;
12653    defm Z128 : GF2P8AFFINE_avx512_rmb_imm<Op, OpStr, OpNode, sched.XMM,
12654                                           v16i8x_info, v2i64x_info>, EVEX_V128;
12655  }
12656}
12657
12658defm VGF2P8AFFINEINVQB : GF2P8AFFINE_avx512_common<0xCF, "vgf2p8affineinvqb",
12659                         X86GF2P8affineinvqb, SchedWriteVecIMul>,
12660                         EVEX_4V, EVEX_CD8<8, CD8VF>, VEX_W, AVX512AIi8Base;
12661defm VGF2P8AFFINEQB    : GF2P8AFFINE_avx512_common<0xCE, "vgf2p8affineqb",
12662                         X86GF2P8affineqb, SchedWriteVecIMul>,
12663                         EVEX_4V, EVEX_CD8<8, CD8VF>, VEX_W, AVX512AIi8Base;
12664
12665
12666//===----------------------------------------------------------------------===//
12667// AVX5124FMAPS
12668//===----------------------------------------------------------------------===//
12669
12670let hasSideEffects = 0, mayLoad = 1, ExeDomain = SSEPackedSingle,
12671    Constraints = "$src1 = $dst", Uses = [MXCSR], mayRaiseFPException = 1 in {
12672defm V4FMADDPSrm : AVX512_maskable_3src_in_asm<0x9A, MRMSrcMem, v16f32_info,
12673                    (outs VR512:$dst), (ins VR512:$src2, f128mem:$src3),
12674                    "v4fmaddps", "$src3, $src2", "$src2, $src3",
12675                    []>, EVEX_V512, EVEX_4V, T8XD, EVEX_CD8<32, CD8VQ>,
12676                    Sched<[SchedWriteFMA.ZMM.Folded]>;
12677
12678defm V4FNMADDPSrm : AVX512_maskable_3src_in_asm<0xAA, MRMSrcMem, v16f32_info,
12679                     (outs VR512:$dst), (ins VR512:$src2, f128mem:$src3),
12680                     "v4fnmaddps", "$src3, $src2", "$src2, $src3",
12681                     []>, EVEX_V512, EVEX_4V, T8XD, EVEX_CD8<32, CD8VQ>,
12682                     Sched<[SchedWriteFMA.ZMM.Folded]>;
12683
12684defm V4FMADDSSrm : AVX512_maskable_3src_in_asm<0x9B, MRMSrcMem, f32x_info,
12685                    (outs VR128X:$dst), (ins  VR128X:$src2, f128mem:$src3),
12686                    "v4fmaddss", "$src3, $src2", "$src2, $src3",
12687                    []>, VEX_LIG, EVEX_4V, T8XD, EVEX_CD8<32, CD8VF>,
12688                    Sched<[SchedWriteFMA.Scl.Folded]>;
12689
12690defm V4FNMADDSSrm : AVX512_maskable_3src_in_asm<0xAB, MRMSrcMem, f32x_info,
12691                     (outs VR128X:$dst), (ins VR128X:$src2, f128mem:$src3),
12692                     "v4fnmaddss", "$src3, $src2", "$src2, $src3",
12693                     []>, VEX_LIG, EVEX_4V, T8XD, EVEX_CD8<32, CD8VF>,
12694                     Sched<[SchedWriteFMA.Scl.Folded]>;
12695}
12696
12697//===----------------------------------------------------------------------===//
12698// AVX5124VNNIW
12699//===----------------------------------------------------------------------===//
12700
12701let hasSideEffects = 0, mayLoad = 1, ExeDomain = SSEPackedInt,
12702    Constraints = "$src1 = $dst" in {
12703defm VP4DPWSSDrm : AVX512_maskable_3src_in_asm<0x52, MRMSrcMem, v16i32_info,
12704                    (outs VR512:$dst), (ins VR512:$src2, f128mem:$src3),
12705                     "vp4dpwssd", "$src3, $src2", "$src2, $src3",
12706                    []>, EVEX_V512, EVEX_4V, T8XD, EVEX_CD8<32, CD8VQ>,
12707                    Sched<[SchedWriteFMA.ZMM.Folded]>;
12708
12709defm VP4DPWSSDSrm : AVX512_maskable_3src_in_asm<0x53, MRMSrcMem, v16i32_info,
12710                     (outs VR512:$dst), (ins VR512:$src2, f128mem:$src3),
12711                     "vp4dpwssds", "$src3, $src2", "$src2, $src3",
12712                     []>, EVEX_V512, EVEX_4V, T8XD, EVEX_CD8<32, CD8VQ>,
12713                     Sched<[SchedWriteFMA.ZMM.Folded]>;
12714}
12715
12716let hasSideEffects = 0 in {
12717  let mayStore = 1, SchedRW = [WriteFStoreX] in
12718  def MASKPAIR16STORE : PseudoI<(outs), (ins anymem:$dst, VK16PAIR:$src), []>;
12719  let mayLoad = 1, SchedRW = [WriteFLoadX] in
12720  def MASKPAIR16LOAD : PseudoI<(outs VK16PAIR:$dst), (ins anymem:$src), []>;
12721}
12722
12723//===----------------------------------------------------------------------===//
12724// VP2INTERSECT
12725//===----------------------------------------------------------------------===//
12726
12727multiclass avx512_vp2intersect_modes<X86FoldableSchedWrite sched, X86VectorVTInfo _> {
12728  def rr : I<0x68, MRMSrcReg,
12729                  (outs _.KRPC:$dst),
12730                  (ins _.RC:$src1, _.RC:$src2),
12731                  !strconcat("vp2intersect", _.Suffix,
12732                             "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
12733                  [(set _.KRPC:$dst, (X86vp2intersect
12734                            _.RC:$src1, (_.VT _.RC:$src2)))]>,
12735                  EVEX_4V, T8XD, Sched<[sched]>;
12736
12737  def rm : I<0x68, MRMSrcMem,
12738                  (outs _.KRPC:$dst),
12739                  (ins  _.RC:$src1, _.MemOp:$src2),
12740                  !strconcat("vp2intersect", _.Suffix,
12741                             "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
12742                  [(set _.KRPC:$dst, (X86vp2intersect
12743                            _.RC:$src1, (_.VT (bitconvert (_.LdFrag addr:$src2)))))]>,
12744                  EVEX_4V, T8XD, EVEX_CD8<_.EltSize, CD8VF>,
12745                  Sched<[sched.Folded, sched.ReadAfterFold]>;
12746
12747  def rmb : I<0x68, MRMSrcMem,
12748                  (outs _.KRPC:$dst),
12749                  (ins _.RC:$src1, _.ScalarMemOp:$src2),
12750                  !strconcat("vp2intersect", _.Suffix, "\t{${src2}", _.BroadcastStr,
12751                             ", $src1, $dst|$dst, $src1, ${src2}", _.BroadcastStr ,"}"),
12752                  [(set _.KRPC:$dst, (X86vp2intersect
12753                             _.RC:$src1, (_.VT (_.BroadcastLdFrag addr:$src2))))]>,
12754                  EVEX_4V, T8XD, EVEX_B, EVEX_CD8<_.EltSize, CD8VF>,
12755                  Sched<[sched.Folded, sched.ReadAfterFold]>;
12756}
12757
12758multiclass avx512_vp2intersect<X86SchedWriteWidths sched, AVX512VLVectorVTInfo _> {
12759  let Predicates  = [HasAVX512, HasVP2INTERSECT] in
12760    defm Z : avx512_vp2intersect_modes<sched.ZMM, _.info512>, EVEX_V512;
12761
12762  let Predicates = [HasAVX512, HasVP2INTERSECT, HasVLX] in {
12763    defm Z256 : avx512_vp2intersect_modes<sched.YMM, _.info256>, EVEX_V256;
12764    defm Z128 : avx512_vp2intersect_modes<sched.XMM, _.info128>, EVEX_V128;
12765  }
12766}
12767
12768defm VP2INTERSECTD : avx512_vp2intersect<SchedWriteVecALU, avx512vl_i32_info>;
12769defm VP2INTERSECTQ : avx512_vp2intersect<SchedWriteVecALU, avx512vl_i64_info>, VEX_W;
12770
12771multiclass avx512_binop_all2<bits<8> opc, string OpcodeStr,
12772                             X86SchedWriteWidths sched,
12773                             AVX512VLVectorVTInfo _SrcVTInfo,
12774                             AVX512VLVectorVTInfo _DstVTInfo,
12775                             SDNode OpNode, Predicate prd,
12776                             bit IsCommutable = 0> {
12777  let Predicates = [prd] in
12778    defm NAME#Z : avx512_binop_rm2<opc, OpcodeStr, sched.ZMM, OpNode,
12779                                   _SrcVTInfo.info512, _DstVTInfo.info512,
12780                                   _SrcVTInfo.info512, IsCommutable>,
12781                                   EVEX_V512, EVEX_CD8<32, CD8VF>;
12782  let Predicates = [HasVLX, prd] in {
12783    defm NAME#Z256 : avx512_binop_rm2<opc, OpcodeStr, sched.YMM, OpNode,
12784                                      _SrcVTInfo.info256, _DstVTInfo.info256,
12785                                      _SrcVTInfo.info256, IsCommutable>,
12786                                     EVEX_V256, EVEX_CD8<32, CD8VF>;
12787    defm NAME#Z128 : avx512_binop_rm2<opc, OpcodeStr, sched.XMM, OpNode,
12788                                      _SrcVTInfo.info128, _DstVTInfo.info128,
12789                                      _SrcVTInfo.info128, IsCommutable>,
12790                                      EVEX_V128, EVEX_CD8<32, CD8VF>;
12791  }
12792}
12793
12794let ExeDomain = SSEPackedSingle in
12795defm VCVTNE2PS2BF16 : avx512_binop_all2<0x72, "vcvtne2ps2bf16",
12796                                        SchedWriteCvtPD2PS, //FIXME: Should be SchedWriteCvtPS2BF
12797                                        avx512vl_f32_info, avx512vl_i16_info,
12798                                        X86cvtne2ps2bf16, HasBF16, 0>, T8XD;
12799
12800// Truncate Float to BFloat16
12801multiclass avx512_cvtps2bf16<bits<8> opc, string OpcodeStr,
12802                             X86SchedWriteWidths sched> {
12803  let ExeDomain = SSEPackedSingle in {
12804  let Predicates = [HasBF16], Uses = []<Register>, mayRaiseFPException = 0 in {
12805    defm Z : avx512_vcvt_fp<opc, OpcodeStr, v16i16x_info, v16f32_info,
12806                            X86cvtneps2bf16, X86cvtneps2bf16, sched.ZMM>, EVEX_V512;
12807  }
12808  let Predicates = [HasBF16, HasVLX] in {
12809    let Uses = []<Register>, mayRaiseFPException = 0 in {
12810    defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v8i16x_info, v4f32x_info,
12811                               null_frag, null_frag, sched.XMM, "{1to4}", "{x}", f128mem,
12812                               VK4WM>, EVEX_V128;
12813    defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v8i16x_info, v8f32x_info,
12814                               X86cvtneps2bf16, X86cvtneps2bf16,
12815                               sched.YMM, "{1to8}", "{y}">, EVEX_V256;
12816    }
12817  } // Predicates = [HasBF16, HasVLX]
12818  } // ExeDomain = SSEPackedSingle
12819
12820  def : InstAlias<OpcodeStr#"x\t{$src, $dst|$dst, $src}",
12821                  (!cast<Instruction>(NAME # "Z128rr") VR128X:$dst,
12822                  VR128X:$src), 0>;
12823  def : InstAlias<OpcodeStr#"x\t{$src, $dst|$dst, $src}",
12824                  (!cast<Instruction>(NAME # "Z128rm") VR128X:$dst,
12825                  f128mem:$src), 0, "intel">;
12826  def : InstAlias<OpcodeStr#"y\t{$src, $dst|$dst, $src}",
12827                  (!cast<Instruction>(NAME # "Z256rr") VR128X:$dst,
12828                  VR256X:$src), 0>;
12829  def : InstAlias<OpcodeStr#"y\t{$src, $dst|$dst, $src}",
12830                  (!cast<Instruction>(NAME # "Z256rm") VR128X:$dst,
12831                  f256mem:$src), 0, "intel">;
12832}
12833
12834defm VCVTNEPS2BF16 : avx512_cvtps2bf16<0x72, "vcvtneps2bf16",
12835                                       SchedWriteCvtPD2PS>, T8XS,
12836                                       EVEX_CD8<32, CD8VF>;
12837
12838let Predicates = [HasBF16, HasVLX] in {
12839  // Special patterns to allow use of X86mcvtneps2bf16 for masking. Instruction
12840  // patterns have been disabled with null_frag.
12841  def : Pat<(v8i16 (X86cvtneps2bf16 (v4f32 VR128X:$src))),
12842            (VCVTNEPS2BF16Z128rr VR128X:$src)>;
12843  def : Pat<(X86mcvtneps2bf16 (v4f32 VR128X:$src), (v8i16 VR128X:$src0),
12844                              VK4WM:$mask),
12845            (VCVTNEPS2BF16Z128rrk VR128X:$src0, VK4WM:$mask, VR128X:$src)>;
12846  def : Pat<(X86mcvtneps2bf16 (v4f32 VR128X:$src), v8i16x_info.ImmAllZerosV,
12847                              VK4WM:$mask),
12848            (VCVTNEPS2BF16Z128rrkz VK4WM:$mask, VR128X:$src)>;
12849
12850  def : Pat<(v8i16 (X86cvtneps2bf16 (loadv4f32 addr:$src))),
12851            (VCVTNEPS2BF16Z128rm addr:$src)>;
12852  def : Pat<(X86mcvtneps2bf16 (loadv4f32 addr:$src), (v8i16 VR128X:$src0),
12853                              VK4WM:$mask),
12854            (VCVTNEPS2BF16Z128rmk VR128X:$src0, VK4WM:$mask, addr:$src)>;
12855  def : Pat<(X86mcvtneps2bf16 (loadv4f32 addr:$src), v8i16x_info.ImmAllZerosV,
12856                              VK4WM:$mask),
12857            (VCVTNEPS2BF16Z128rmkz VK4WM:$mask, addr:$src)>;
12858
12859  def : Pat<(v8i16 (X86cvtneps2bf16 (v4f32
12860                                     (X86VBroadcastld32 addr:$src)))),
12861            (VCVTNEPS2BF16Z128rmb addr:$src)>;
12862  def : Pat<(X86mcvtneps2bf16 (v4f32 (X86VBroadcastld32 addr:$src)),
12863                              (v8i16 VR128X:$src0), VK4WM:$mask),
12864            (VCVTNEPS2BF16Z128rmbk VR128X:$src0, VK4WM:$mask, addr:$src)>;
12865  def : Pat<(X86mcvtneps2bf16 (v4f32 (X86VBroadcastld32 addr:$src)),
12866                              v8i16x_info.ImmAllZerosV, VK4WM:$mask),
12867            (VCVTNEPS2BF16Z128rmbkz VK4WM:$mask, addr:$src)>;
12868}
12869
12870let Constraints = "$src1 = $dst" in {
12871multiclass avx512_dpbf16ps_rm<bits<8> opc, string OpcodeStr, SDNode OpNode,
12872                              X86FoldableSchedWrite sched,
12873                              X86VectorVTInfo _, X86VectorVTInfo src_v> {
12874  defm r: AVX512_maskable_3src<opc, MRMSrcReg, _, (outs _.RC:$dst),
12875                           (ins src_v.RC:$src2, src_v.RC:$src3),
12876                           OpcodeStr, "$src3, $src2", "$src2, $src3",
12877                           (_.VT (OpNode _.RC:$src1, src_v.RC:$src2, src_v.RC:$src3))>,
12878                           EVEX_4V, Sched<[sched]>;
12879
12880  defm m: AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst),
12881                               (ins src_v.RC:$src2, src_v.MemOp:$src3),
12882                               OpcodeStr, "$src3, $src2", "$src2, $src3",
12883                               (_.VT (OpNode _.RC:$src1, src_v.RC:$src2,
12884                               (src_v.LdFrag addr:$src3)))>, EVEX_4V,
12885                               Sched<[sched.Folded, sched.ReadAfterFold]>;
12886
12887  defm mb: AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst),
12888                  (ins src_v.RC:$src2, src_v.ScalarMemOp:$src3),
12889                  OpcodeStr,
12890                  !strconcat("${src3}", _.BroadcastStr,", $src2"),
12891                  !strconcat("$src2, ${src3}", _.BroadcastStr),
12892                  (_.VT (OpNode _.RC:$src1, src_v.RC:$src2,
12893                  (src_v.VT (src_v.BroadcastLdFrag addr:$src3))))>,
12894                  EVEX_B, EVEX_4V, Sched<[sched.Folded, sched.ReadAfterFold]>;
12895
12896}
12897} // Constraints = "$src1 = $dst"
12898
12899multiclass avx512_dpbf16ps_sizes<bits<8> opc, string OpcodeStr, SDNode OpNode,
12900                                 X86SchedWriteWidths sched, AVX512VLVectorVTInfo _,
12901                                 AVX512VLVectorVTInfo src_v, Predicate prd> {
12902  let Predicates = [prd] in {
12903    defm Z    : avx512_dpbf16ps_rm<opc, OpcodeStr, OpNode, sched.ZMM, _.info512,
12904                                   src_v.info512>, EVEX_V512;
12905  }
12906  let Predicates = [HasVLX, prd] in {
12907    defm Z256 : avx512_dpbf16ps_rm<opc, OpcodeStr, OpNode, sched.YMM, _.info256,
12908                                   src_v.info256>, EVEX_V256;
12909    defm Z128 : avx512_dpbf16ps_rm<opc, OpcodeStr, OpNode, sched.XMM, _.info128,
12910                                   src_v.info128>, EVEX_V128;
12911  }
12912}
12913
12914let ExeDomain = SSEPackedSingle in
12915defm VDPBF16PS : avx512_dpbf16ps_sizes<0x52, "vdpbf16ps", X86dpbf16ps, SchedWriteFMA,
12916                                       avx512vl_f32_info, avx512vl_i32_info,
12917                                       HasBF16>, T8XS, EVEX_CD8<32, CD8VF>;
12918
12919//===----------------------------------------------------------------------===//
12920// AVX512FP16
12921//===----------------------------------------------------------------------===//
12922
12923let Predicates = [HasFP16] in {
12924// Move word ( r/m16) to Packed word
12925def VMOVW2SHrr : AVX512<0x6E, MRMSrcReg, (outs VR128X:$dst), (ins GR32:$src),
12926                      "vmovw\t{$src, $dst|$dst, $src}", []>, T_MAP5PD, EVEX, Sched<[WriteVecMoveFromGpr]>;
12927def VMOVWrm : AVX512<0x6E, MRMSrcMem, (outs VR128X:$dst), (ins i16mem:$src),
12928                      "vmovw\t{$src, $dst|$dst, $src}",
12929                      [(set VR128X:$dst,
12930                        (v8i16 (scalar_to_vector (loadi16 addr:$src))))]>,
12931                      T_MAP5PD, EVEX, EVEX_CD8<16, CD8VT1>, Sched<[WriteFLoad]>;
12932
12933def : Pat<(f16 (bitconvert GR16:$src)),
12934          (f16 (COPY_TO_REGCLASS
12935                (VMOVW2SHrr
12936                 (INSERT_SUBREG (IMPLICIT_DEF), GR16:$src, sub_16bit)),
12937                FR16X))>;
12938def : Pat<(v8i16 (scalar_to_vector (i16 GR16:$src))),
12939          (VMOVW2SHrr (INSERT_SUBREG (IMPLICIT_DEF), GR16:$src, sub_16bit))>;
12940def : Pat<(v4i32 (X86vzmovl (scalar_to_vector (and GR32:$src, 0xffff)))),
12941          (VMOVW2SHrr GR32:$src)>;
12942// FIXME: We should really find a way to improve these patterns.
12943def : Pat<(v8i32 (X86vzmovl
12944                  (insert_subvector undef,
12945                                    (v4i32 (scalar_to_vector
12946                                            (and GR32:$src, 0xffff))),
12947                                    (iPTR 0)))),
12948          (SUBREG_TO_REG (i32 0), (VMOVW2SHrr GR32:$src), sub_xmm)>;
12949def : Pat<(v16i32 (X86vzmovl
12950                   (insert_subvector undef,
12951                                     (v4i32 (scalar_to_vector
12952                                             (and GR32:$src, 0xffff))),
12953                                     (iPTR 0)))),
12954          (SUBREG_TO_REG (i32 0), (VMOVW2SHrr GR32:$src), sub_xmm)>;
12955
12956def : Pat<(v8i16 (X86vzmovl (scalar_to_vector (i16 GR16:$src)))),
12957          (VMOVW2SHrr (INSERT_SUBREG (IMPLICIT_DEF), GR16:$src, sub_16bit))>;
12958
12959// AVX 128-bit movw instruction write zeros in the high 128-bit part.
12960def : Pat<(v8i16 (X86vzload16 addr:$src)),
12961          (VMOVWrm addr:$src)>;
12962def : Pat<(v16i16 (X86vzload16 addr:$src)),
12963          (SUBREG_TO_REG (i32 0), (v8i16 (VMOVWrm addr:$src)), sub_xmm)>;
12964
12965// Use regular 128-bit instructions to match 512-bit scalar_to_vec+zext.
12966def : Pat<(v32i16 (X86vzload16 addr:$src)),
12967          (SUBREG_TO_REG (i32 0), (v8i16 (VMOVWrm addr:$src)), sub_xmm)>;
12968
12969def : Pat<(v4i32 (scalar_to_vector (i32 (extloadi16 addr:$src)))),
12970          (VMOVWrm addr:$src)>;
12971def : Pat<(v4i32 (X86vzmovl (scalar_to_vector (i32 (zextloadi16 addr:$src))))),
12972          (VMOVWrm addr:$src)>;
12973def : Pat<(v8i32 (X86vzmovl
12974                  (insert_subvector undef,
12975                                    (v4i32 (scalar_to_vector
12976                                            (i32 (zextloadi16 addr:$src)))),
12977                                    (iPTR 0)))),
12978          (SUBREG_TO_REG (i32 0), (VMOVWrm addr:$src), sub_xmm)>;
12979def : Pat<(v16i32 (X86vzmovl
12980                   (insert_subvector undef,
12981                                     (v4i32 (scalar_to_vector
12982                                             (i32 (zextloadi16 addr:$src)))),
12983                                     (iPTR 0)))),
12984          (SUBREG_TO_REG (i32 0), (VMOVWrm addr:$src), sub_xmm)>;
12985
12986// Move word from xmm register to r/m16
12987def VMOVSH2Wrr  : AVX512<0x7E, MRMDestReg, (outs GR32:$dst), (ins VR128X:$src),
12988                       "vmovw\t{$src, $dst|$dst, $src}", []>, T_MAP5PD, EVEX, Sched<[WriteVecMoveToGpr]>;
12989def VMOVWmr  : AVX512<0x7E, MRMDestMem, (outs),
12990                       (ins i16mem:$dst, VR128X:$src),
12991                       "vmovw\t{$src, $dst|$dst, $src}",
12992                       [(store (i16 (extractelt (v8i16 VR128X:$src),
12993                                     (iPTR 0))), addr:$dst)]>,
12994                       T_MAP5PD, EVEX, EVEX_CD8<16, CD8VT1>, Sched<[WriteFStore]>;
12995
12996def : Pat<(i16 (bitconvert FR16X:$src)),
12997          (i16 (EXTRACT_SUBREG
12998                (VMOVSH2Wrr (COPY_TO_REGCLASS FR16X:$src, VR128X)),
12999                sub_16bit))>;
13000def : Pat<(i16 (extractelt (v8i16 VR128X:$src), (iPTR 0))),
13001          (i16 (EXTRACT_SUBREG (VMOVSH2Wrr VR128X:$src), sub_16bit))>;
13002
13003// Allow "vmovw" to use GR64
13004let hasSideEffects = 0 in {
13005  def VMOVW64toSHrr : AVX512<0x6E, MRMSrcReg, (outs VR128X:$dst), (ins GR64:$src),
13006                     "vmovw\t{$src, $dst|$dst, $src}", []>, T_MAP5PD, EVEX, VEX_W, Sched<[WriteVecMoveFromGpr]>;
13007  def VMOVSHtoW64rr : AVX512<0x7E, MRMDestReg, (outs GR64:$dst), (ins VR128X:$src),
13008                     "vmovw\t{$src, $dst|$dst, $src}", []>, T_MAP5PD, EVEX, VEX_W, Sched<[WriteVecMoveToGpr]>;
13009}
13010}
13011
13012// Convert 16-bit float to i16/u16
13013multiclass avx512_cvtph2w<bits<8> opc, string OpcodeStr, SDPatternOperator OpNode,
13014                          SDPatternOperator MaskOpNode, SDNode OpNodeRnd,
13015                          AVX512VLVectorVTInfo _Dst,
13016                          AVX512VLVectorVTInfo _Src,
13017                          X86SchedWriteWidths sched> {
13018  let Predicates = [HasFP16] in {
13019    defm Z : avx512_vcvt_fp<opc, OpcodeStr, _Dst.info512, _Src.info512,
13020                            OpNode, MaskOpNode, sched.ZMM>,
13021             avx512_vcvt_fp_rc<opc, OpcodeStr, _Dst.info512, _Src.info512,
13022                               OpNodeRnd, sched.ZMM>, EVEX_V512;
13023  }
13024  let Predicates = [HasFP16, HasVLX] in {
13025    defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, _Dst.info128, _Src.info128,
13026                               OpNode, MaskOpNode, sched.XMM>, EVEX_V128;
13027    defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, _Dst.info256, _Src.info256,
13028                               OpNode, MaskOpNode, sched.YMM>, EVEX_V256;
13029  }
13030}
13031
13032// Convert 16-bit float to i16/u16 truncate
13033multiclass avx512_cvttph2w<bits<8> opc, string OpcodeStr, SDPatternOperator OpNode,
13034                           SDPatternOperator MaskOpNode, SDNode OpNodeRnd,
13035                           AVX512VLVectorVTInfo _Dst, AVX512VLVectorVTInfo _Src,
13036                           X86SchedWriteWidths sched> {
13037  let Predicates = [HasFP16] in {
13038    defm Z : avx512_vcvt_fp<opc, OpcodeStr, _Dst.info512, _Src.info512,
13039                            OpNode, MaskOpNode, sched.ZMM>,
13040             avx512_vcvt_fp_sae<opc, OpcodeStr, _Dst.info512, _Src.info512,
13041                               OpNodeRnd, sched.ZMM>, EVEX_V512;
13042  }
13043  let Predicates = [HasFP16, HasVLX] in {
13044    defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, _Dst.info128, _Src.info128,
13045                               OpNode, MaskOpNode, sched.XMM>, EVEX_V128;
13046    defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, _Dst.info256, _Src.info256,
13047                               OpNode, MaskOpNode, sched.YMM>, EVEX_V256;
13048  }
13049}
13050
13051defm VCVTPH2UW : avx512_cvtph2w<0x7D, "vcvtph2uw", X86cvtp2UInt, X86cvtp2UInt,
13052                                X86cvtp2UIntRnd, avx512vl_i16_info,
13053                                avx512vl_f16_info, SchedWriteCvtPD2DQ>,
13054                                T_MAP5PS, EVEX_CD8<16, CD8VF>;
13055defm VCVTUW2PH : avx512_cvtph2w<0x7D, "vcvtuw2ph", any_uint_to_fp, uint_to_fp,
13056                                X86VUintToFpRnd, avx512vl_f16_info,
13057                                avx512vl_i16_info, SchedWriteCvtPD2DQ>,
13058                                T_MAP5XD, EVEX_CD8<16, CD8VF>;
13059defm VCVTTPH2W : avx512_cvttph2w<0x7C, "vcvttph2w", X86any_cvttp2si,
13060                                X86cvttp2si, X86cvttp2siSAE,
13061                                avx512vl_i16_info, avx512vl_f16_info,
13062                                SchedWriteCvtPD2DQ>, T_MAP5PD, EVEX_CD8<16, CD8VF>;
13063defm VCVTTPH2UW : avx512_cvttph2w<0x7C, "vcvttph2uw", X86any_cvttp2ui,
13064                                X86cvttp2ui, X86cvttp2uiSAE,
13065                                avx512vl_i16_info, avx512vl_f16_info,
13066                                SchedWriteCvtPD2DQ>, T_MAP5PS, EVEX_CD8<16, CD8VF>;
13067defm VCVTPH2W : avx512_cvtph2w<0x7D, "vcvtph2w", X86cvtp2Int, X86cvtp2Int,
13068                                X86cvtp2IntRnd, avx512vl_i16_info,
13069                                avx512vl_f16_info, SchedWriteCvtPD2DQ>,
13070                                T_MAP5PD, EVEX_CD8<16, CD8VF>;
13071defm VCVTW2PH : avx512_cvtph2w<0x7D, "vcvtw2ph", any_sint_to_fp, sint_to_fp,
13072                                X86VSintToFpRnd, avx512vl_f16_info,
13073                                avx512vl_i16_info, SchedWriteCvtPD2DQ>,
13074                                T_MAP5XS, EVEX_CD8<16, CD8VF>;
13075
13076// Convert Half to Signed/Unsigned Doubleword
13077multiclass avx512_cvtph2dq<bits<8> opc, string OpcodeStr, SDPatternOperator OpNode,
13078                           SDPatternOperator MaskOpNode, SDNode OpNodeRnd,
13079                           X86SchedWriteWidths sched> {
13080  let Predicates = [HasFP16] in {
13081    defm Z : avx512_vcvt_fp<opc, OpcodeStr, v16i32_info, v16f16x_info, OpNode,
13082                            MaskOpNode, sched.ZMM>,
13083             avx512_vcvt_fp_rc<opc, OpcodeStr, v16i32_info, v16f16x_info,
13084                                OpNodeRnd, sched.ZMM>, EVEX_V512;
13085  }
13086  let Predicates = [HasFP16, HasVLX] in {
13087    defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v4i32x_info, v8f16x_info, OpNode,
13088                               MaskOpNode, sched.XMM, "{1to4}", "", f64mem>, EVEX_V128;
13089    defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v8i32x_info, v8f16x_info, OpNode,
13090                               MaskOpNode, sched.YMM>, EVEX_V256;
13091  }
13092}
13093
13094// Convert Half to Signed/Unsigned Doubleword with truncation
13095multiclass avx512_cvttph2dq<bits<8> opc, string OpcodeStr, SDPatternOperator OpNode,
13096                            SDPatternOperator MaskOpNode, SDNode OpNodeRnd,
13097                            X86SchedWriteWidths sched> {
13098  let Predicates = [HasFP16] in {
13099    defm Z : avx512_vcvt_fp<opc, OpcodeStr, v16i32_info, v16f16x_info, OpNode,
13100                            MaskOpNode, sched.ZMM>,
13101             avx512_vcvt_fp_sae<opc, OpcodeStr, v16i32_info, v16f16x_info,
13102                                OpNodeRnd, sched.ZMM>, EVEX_V512;
13103  }
13104  let Predicates = [HasFP16, HasVLX] in {
13105    defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v4i32x_info, v8f16x_info, OpNode,
13106                               MaskOpNode, sched.XMM, "{1to4}", "", f64mem>, EVEX_V128;
13107    defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v8i32x_info, v8f16x_info, OpNode,
13108                               MaskOpNode, sched.YMM>, EVEX_V256;
13109  }
13110}
13111
13112
13113defm VCVTPH2DQ : avx512_cvtph2dq<0x5B, "vcvtph2dq", X86cvtp2Int, X86cvtp2Int,
13114                                 X86cvtp2IntRnd, SchedWriteCvtPS2DQ>, T_MAP5PD,
13115                                 EVEX_CD8<16, CD8VH>;
13116defm VCVTPH2UDQ : avx512_cvtph2dq<0x79, "vcvtph2udq", X86cvtp2UInt, X86cvtp2UInt,
13117                                 X86cvtp2UIntRnd, SchedWriteCvtPS2DQ>, T_MAP5PS,
13118                                 EVEX_CD8<16, CD8VH>;
13119
13120defm VCVTTPH2DQ : avx512_cvttph2dq<0x5B, "vcvttph2dq", X86any_cvttp2si,
13121                                X86cvttp2si, X86cvttp2siSAE,
13122                                SchedWriteCvtPS2DQ>, T_MAP5XS,
13123                                EVEX_CD8<16, CD8VH>;
13124
13125defm VCVTTPH2UDQ : avx512_cvttph2dq<0x78, "vcvttph2udq", X86any_cvttp2ui,
13126                                 X86cvttp2ui, X86cvttp2uiSAE,
13127                                 SchedWriteCvtPS2DQ>, T_MAP5PS,
13128                                 EVEX_CD8<16, CD8VH>;
13129
13130// Convert Half to Signed/Unsigned Quardword
13131multiclass avx512_cvtph2qq<bits<8> opc, string OpcodeStr, SDPatternOperator OpNode,
13132                           SDPatternOperator MaskOpNode, SDNode OpNodeRnd,
13133                           X86SchedWriteWidths sched> {
13134  let Predicates = [HasFP16] in {
13135    defm Z : avx512_vcvt_fp<opc, OpcodeStr, v8i64_info, v8f16x_info, OpNode,
13136                            MaskOpNode, sched.ZMM>,
13137             avx512_vcvt_fp_rc<opc, OpcodeStr, v8i64_info, v8f16x_info,
13138                               OpNodeRnd, sched.ZMM>, EVEX_V512;
13139  }
13140  let Predicates = [HasFP16, HasVLX] in {
13141    // Explicitly specified broadcast string, since we take only 2 elements
13142    // from v8f16x_info source
13143    defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v2i64x_info, v8f16x_info, OpNode,
13144                               MaskOpNode, sched.XMM, "{1to2}", "", f32mem>,
13145                               EVEX_V128;
13146    // Explicitly specified broadcast string, since we take only 4 elements
13147    // from v8f16x_info source
13148    defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v4i64x_info, v8f16x_info, OpNode,
13149                               MaskOpNode, sched.YMM, "{1to4}", "", f64mem>,
13150                               EVEX_V256;
13151  }
13152}
13153
13154// Convert Half to Signed/Unsigned Quardword with truncation
13155multiclass avx512_cvttph2qq<bits<8> opc, string OpcodeStr, SDPatternOperator OpNode,
13156                            SDPatternOperator MaskOpNode, SDNode OpNodeRnd,
13157                            X86SchedWriteWidths sched> {
13158  let Predicates = [HasFP16] in {
13159    defm Z : avx512_vcvt_fp<opc, OpcodeStr, v8i64_info, v8f16x_info, OpNode,
13160                            MaskOpNode, sched.ZMM>,
13161             avx512_vcvt_fp_sae<opc, OpcodeStr, v8i64_info, v8f16x_info,
13162                                OpNodeRnd, sched.ZMM>, EVEX_V512;
13163  }
13164  let Predicates = [HasFP16, HasVLX] in {
13165    // Explicitly specified broadcast string, since we take only 2 elements
13166    // from v8f16x_info source
13167    defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v2i64x_info, v8f16x_info, OpNode,
13168                               MaskOpNode, sched.XMM, "{1to2}", "", f32mem>, EVEX_V128;
13169    // Explicitly specified broadcast string, since we take only 4 elements
13170    // from v8f16x_info source
13171    defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v4i64x_info, v8f16x_info, OpNode,
13172                               MaskOpNode, sched.YMM, "{1to4}", "", f64mem>, EVEX_V256;
13173  }
13174}
13175
13176defm VCVTPH2QQ : avx512_cvtph2qq<0x7B, "vcvtph2qq", X86cvtp2Int, X86cvtp2Int,
13177                                 X86cvtp2IntRnd, SchedWriteCvtPS2DQ>, T_MAP5PD,
13178                                 EVEX_CD8<16, CD8VQ>;
13179
13180defm VCVTPH2UQQ : avx512_cvtph2qq<0x79, "vcvtph2uqq", X86cvtp2UInt, X86cvtp2UInt,
13181                                 X86cvtp2UIntRnd, SchedWriteCvtPS2DQ>, T_MAP5PD,
13182                                 EVEX_CD8<16, CD8VQ>;
13183
13184defm VCVTTPH2QQ : avx512_cvttph2qq<0x7A, "vcvttph2qq", X86any_cvttp2si,
13185                                 X86cvttp2si, X86cvttp2siSAE,
13186                                 SchedWriteCvtPS2DQ>, T_MAP5PD,
13187                                 EVEX_CD8<16, CD8VQ>;
13188
13189defm VCVTTPH2UQQ : avx512_cvttph2qq<0x78, "vcvttph2uqq", X86any_cvttp2ui,
13190                                 X86cvttp2ui, X86cvttp2uiSAE,
13191                                 SchedWriteCvtPS2DQ>, T_MAP5PD,
13192                                 EVEX_CD8<16, CD8VQ>;
13193
13194// Convert Signed/Unsigned Quardword to Half
13195multiclass avx512_cvtqq2ph<bits<8> opc, string OpcodeStr, SDPatternOperator OpNode,
13196                           SDPatternOperator MaskOpNode, SDNode OpNodeRnd,
13197                           X86SchedWriteWidths sched> {
13198  // we need "x"/"y"/"z" suffixes in order to distinguish between 128, 256 and
13199  // 512 memory forms of these instructions in Asm Parcer. They have the same
13200  // dest type - 'v8f16x_info'. We also specify the broadcast string explicitly
13201  // due to the same reason.
13202  let Predicates = [HasFP16] in {
13203    defm Z : avx512_vcvt_fp<opc, OpcodeStr, v8f16x_info, v8i64_info, OpNode,
13204                            MaskOpNode, sched.ZMM, "{1to8}", "{z}">,
13205             avx512_vcvt_fp_rc<opc, OpcodeStr, v8f16x_info, v8i64_info,
13206                               OpNodeRnd, sched.ZMM>, EVEX_V512;
13207  }
13208  let Predicates = [HasFP16, HasVLX] in {
13209    defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v8f16x_info, v2i64x_info,
13210                               null_frag, null_frag, sched.XMM, "{1to2}", "{x}",
13211                               i128mem, VK2WM>,
13212                               EVEX_V128, NotEVEX2VEXConvertible;
13213    defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v8f16x_info, v4i64x_info,
13214                               null_frag, null_frag, sched.YMM, "{1to4}", "{y}",
13215                               i256mem, VK4WM>,
13216                               EVEX_V256, NotEVEX2VEXConvertible;
13217  }
13218
13219  def : InstAlias<OpcodeStr#"x\t{$src, $dst|$dst, $src}",
13220                  (!cast<Instruction>(NAME # "Z128rr") VR128X:$dst,
13221                  VR128X:$src), 0, "att">;
13222  def : InstAlias<OpcodeStr#"x\t{$src, $dst {${mask}}|$dst {${mask}}, $src}",
13223                  (!cast<Instruction>(NAME # "Z128rrk") VR128X:$dst,
13224                  VK2WM:$mask, VR128X:$src), 0, "att">;
13225  def : InstAlias<OpcodeStr#"x\t{$src, $dst {${mask}} {z}|$dst {${mask}} {z}, $src}",
13226                  (!cast<Instruction>(NAME # "Z128rrkz") VR128X:$dst,
13227                  VK2WM:$mask, VR128X:$src), 0, "att">;
13228  def : InstAlias<OpcodeStr#"x\t{${src}{1to2}, $dst|$dst, ${src}{1to2}}",
13229                  (!cast<Instruction>(NAME # "Z128rmb") VR128X:$dst,
13230                  i64mem:$src), 0, "att">;
13231  def : InstAlias<OpcodeStr#"x\t{${src}{1to2}, $dst {${mask}}|"
13232                  "$dst {${mask}}, ${src}{1to2}}",
13233                  (!cast<Instruction>(NAME # "Z128rmbk") VR128X:$dst,
13234                  VK2WM:$mask, i64mem:$src), 0, "att">;
13235  def : InstAlias<OpcodeStr#"x\t{${src}{1to2}, $dst {${mask}} {z}|"
13236                  "$dst {${mask}} {z}, ${src}{1to2}}",
13237                  (!cast<Instruction>(NAME # "Z128rmbkz") VR128X:$dst,
13238                  VK2WM:$mask, i64mem:$src), 0, "att">;
13239
13240  def : InstAlias<OpcodeStr#"y\t{$src, $dst|$dst, $src}",
13241                  (!cast<Instruction>(NAME # "Z256rr") VR128X:$dst,
13242                  VR256X:$src), 0, "att">;
13243  def : InstAlias<OpcodeStr#"y\t{$src, $dst {${mask}}|"
13244                  "$dst {${mask}}, $src}",
13245                  (!cast<Instruction>(NAME # "Z256rrk") VR128X:$dst,
13246                  VK4WM:$mask, VR256X:$src), 0, "att">;
13247  def : InstAlias<OpcodeStr#"y\t{$src, $dst {${mask}} {z}|"
13248                  "$dst {${mask}} {z}, $src}",
13249                  (!cast<Instruction>(NAME # "Z256rrkz") VR128X:$dst,
13250                  VK4WM:$mask, VR256X:$src), 0, "att">;
13251  def : InstAlias<OpcodeStr#"y\t{${src}{1to4}, $dst|$dst, ${src}{1to4}}",
13252                  (!cast<Instruction>(NAME # "Z256rmb") VR128X:$dst,
13253                  i64mem:$src), 0, "att">;
13254  def : InstAlias<OpcodeStr#"y\t{${src}{1to4}, $dst {${mask}}|"
13255                  "$dst {${mask}}, ${src}{1to4}}",
13256                  (!cast<Instruction>(NAME # "Z256rmbk") VR128X:$dst,
13257                  VK4WM:$mask, i64mem:$src), 0, "att">;
13258  def : InstAlias<OpcodeStr#"y\t{${src}{1to4}, $dst {${mask}} {z}|"
13259                  "$dst {${mask}} {z}, ${src}{1to4}}",
13260                  (!cast<Instruction>(NAME # "Z256rmbkz") VR128X:$dst,
13261                  VK4WM:$mask, i64mem:$src), 0, "att">;
13262
13263  def : InstAlias<OpcodeStr#"z\t{$src, $dst|$dst, $src}",
13264                  (!cast<Instruction>(NAME # "Zrr") VR128X:$dst,
13265                  VR512:$src), 0, "att">;
13266  def : InstAlias<OpcodeStr#"z\t{$src, $dst {${mask}}|"
13267                  "$dst {${mask}}, $src}",
13268                  (!cast<Instruction>(NAME # "Zrrk") VR128X:$dst,
13269                  VK8WM:$mask, VR512:$src), 0, "att">;
13270  def : InstAlias<OpcodeStr#"z\t{$src, $dst {${mask}} {z}|"
13271                  "$dst {${mask}} {z}, $src}",
13272                  (!cast<Instruction>(NAME # "Zrrkz") VR128X:$dst,
13273                  VK8WM:$mask, VR512:$src), 0, "att">;
13274  def : InstAlias<OpcodeStr#"z\t{${src}{1to8}, $dst|$dst, ${src}{1to8}}",
13275                  (!cast<Instruction>(NAME # "Zrmb") VR128X:$dst,
13276                  i64mem:$src), 0, "att">;
13277  def : InstAlias<OpcodeStr#"z\t{${src}{1to8}, $dst {${mask}}|"
13278                  "$dst {${mask}}, ${src}{1to8}}",
13279                  (!cast<Instruction>(NAME # "Zrmbk") VR128X:$dst,
13280                  VK8WM:$mask, i64mem:$src), 0, "att">;
13281  def : InstAlias<OpcodeStr#"z\t{${src}{1to8}, $dst {${mask}} {z}|"
13282                  "$dst {${mask}} {z}, ${src}{1to8}}",
13283                  (!cast<Instruction>(NAME # "Zrmbkz") VR128X:$dst,
13284                  VK8WM:$mask, i64mem:$src), 0, "att">;
13285}
13286
13287defm VCVTQQ2PH : avx512_cvtqq2ph<0x5B, "vcvtqq2ph", any_sint_to_fp, sint_to_fp,
13288                            X86VSintToFpRnd, SchedWriteCvtDQ2PS>, VEX_W, T_MAP5PS,
13289                            EVEX_CD8<64, CD8VF>;
13290
13291defm VCVTUQQ2PH : avx512_cvtqq2ph<0x7A, "vcvtuqq2ph", any_uint_to_fp, uint_to_fp,
13292                            X86VUintToFpRnd, SchedWriteCvtDQ2PS>, VEX_W, T_MAP5XD,
13293                            EVEX_CD8<64, CD8VF>;
13294
13295// Convert half to signed/unsigned int 32/64
13296defm VCVTSH2SIZ: avx512_cvt_s_int_round<0x2D, f16x_info, i32x_info, X86cvts2si,
13297                                   X86cvts2siRnd, WriteCvtSS2I, "cvtsh2si", "{l}", HasFP16>,
13298                                   T_MAP5XS, EVEX_CD8<16, CD8VT1>;
13299defm VCVTSH2SI64Z: avx512_cvt_s_int_round<0x2D, f16x_info, i64x_info, X86cvts2si,
13300                                   X86cvts2siRnd, WriteCvtSS2I, "cvtsh2si", "{q}", HasFP16>,
13301                                   T_MAP5XS, VEX_W, EVEX_CD8<16, CD8VT1>;
13302defm VCVTSH2USIZ: avx512_cvt_s_int_round<0x79, f16x_info, i32x_info, X86cvts2usi,
13303                                   X86cvts2usiRnd, WriteCvtSS2I, "cvtsh2usi", "{l}", HasFP16>,
13304                                   T_MAP5XS, EVEX_CD8<16, CD8VT1>;
13305defm VCVTSH2USI64Z: avx512_cvt_s_int_round<0x79, f16x_info, i64x_info, X86cvts2usi,
13306                                   X86cvts2usiRnd, WriteCvtSS2I, "cvtsh2usi", "{q}", HasFP16>,
13307                                   T_MAP5XS, VEX_W, EVEX_CD8<16, CD8VT1>;
13308
13309defm VCVTTSH2SIZ: avx512_cvt_s_all<0x2C, "vcvttsh2si", f16x_info, i32x_info,
13310                        any_fp_to_sint, X86cvtts2Int, X86cvtts2IntSAE, WriteCvtSS2I,
13311                        "{l}", HasFP16>, T_MAP5XS, EVEX_CD8<16, CD8VT1>;
13312defm VCVTTSH2SI64Z: avx512_cvt_s_all<0x2C, "vcvttsh2si", f16x_info, i64x_info,
13313                        any_fp_to_sint, X86cvtts2Int, X86cvtts2IntSAE, WriteCvtSS2I,
13314                        "{q}", HasFP16>, VEX_W, T_MAP5XS, EVEX_CD8<16, CD8VT1>;
13315defm VCVTTSH2USIZ: avx512_cvt_s_all<0x78, "vcvttsh2usi", f16x_info, i32x_info,
13316                        any_fp_to_uint, X86cvtts2UInt, X86cvtts2UIntSAE, WriteCvtSS2I,
13317                        "{l}", HasFP16>, T_MAP5XS, EVEX_CD8<16, CD8VT1>;
13318defm VCVTTSH2USI64Z: avx512_cvt_s_all<0x78, "vcvttsh2usi", f16x_info, i64x_info,
13319                        any_fp_to_uint, X86cvtts2UInt, X86cvtts2UIntSAE, WriteCvtSS2I,
13320                        "{q}", HasFP16>, T_MAP5XS, VEX_W, EVEX_CD8<16, CD8VT1>;
13321
13322let Predicates = [HasFP16] in {
13323  defm VCVTSI2SHZ  : avx512_vcvtsi_common<0x2A,  X86SintToFp, X86SintToFpRnd, WriteCvtI2SS, GR32,
13324                                   v8f16x_info, i32mem, loadi32, "cvtsi2sh", "l">,
13325                                   T_MAP5XS, EVEX_CD8<32, CD8VT1>;
13326  defm VCVTSI642SHZ: avx512_vcvtsi_common<0x2A,  X86SintToFp, X86SintToFpRnd, WriteCvtI2SS, GR64,
13327                                   v8f16x_info, i64mem, loadi64, "cvtsi2sh","q">,
13328                                   T_MAP5XS, VEX_W, EVEX_CD8<64, CD8VT1>;
13329  defm VCVTUSI2SHZ   : avx512_vcvtsi_common<0x7B,  X86UintToFp, X86UintToFpRnd, WriteCvtI2SS, GR32,
13330                                    v8f16x_info, i32mem, loadi32,
13331                                    "cvtusi2sh","l">, T_MAP5XS, EVEX_CD8<32, CD8VT1>;
13332  defm VCVTUSI642SHZ : avx512_vcvtsi_common<0x7B,  X86UintToFp, X86UintToFpRnd, WriteCvtI2SS, GR64,
13333                                    v8f16x_info, i64mem, loadi64, "cvtusi2sh", "q">,
13334                                    T_MAP5XS, VEX_W, EVEX_CD8<64, CD8VT1>;
13335  def : InstAlias<"vcvtsi2sh\t{$src, $src1, $dst|$dst, $src1, $src}",
13336              (VCVTSI2SHZrm_Int VR128X:$dst, VR128X:$src1, i32mem:$src), 0, "att">;
13337
13338  def : InstAlias<"vcvtusi2sh\t{$src, $src1, $dst|$dst, $src1, $src}",
13339              (VCVTUSI2SHZrm_Int VR128X:$dst, VR128X:$src1, i32mem:$src), 0, "att">;
13340
13341
13342  def : Pat<(f16 (any_sint_to_fp (loadi32 addr:$src))),
13343            (VCVTSI2SHZrm (f16 (IMPLICIT_DEF)), addr:$src)>;
13344  def : Pat<(f16 (any_sint_to_fp (loadi64 addr:$src))),
13345            (VCVTSI642SHZrm (f16 (IMPLICIT_DEF)), addr:$src)>;
13346
13347  def : Pat<(f16 (any_sint_to_fp GR32:$src)),
13348            (VCVTSI2SHZrr (f16 (IMPLICIT_DEF)), GR32:$src)>;
13349  def : Pat<(f16 (any_sint_to_fp GR64:$src)),
13350            (VCVTSI642SHZrr (f16 (IMPLICIT_DEF)), GR64:$src)>;
13351
13352  def : Pat<(f16 (any_uint_to_fp (loadi32 addr:$src))),
13353            (VCVTUSI2SHZrm (f16 (IMPLICIT_DEF)), addr:$src)>;
13354  def : Pat<(f16 (any_uint_to_fp (loadi64 addr:$src))),
13355            (VCVTUSI642SHZrm (f16 (IMPLICIT_DEF)), addr:$src)>;
13356
13357  def : Pat<(f16 (any_uint_to_fp GR32:$src)),
13358            (VCVTUSI2SHZrr (f16 (IMPLICIT_DEF)), GR32:$src)>;
13359  def : Pat<(f16 (any_uint_to_fp GR64:$src)),
13360            (VCVTUSI642SHZrr (f16 (IMPLICIT_DEF)), GR64:$src)>;
13361
13362  // Patterns used for matching vcvtsi2sh intrinsic sequences from clang
13363  // which produce unnecessary vmovsh instructions
13364  def : Pat<(v8f16 (X86Movsh
13365                     (v8f16 VR128X:$dst),
13366                     (v8f16 (scalar_to_vector (f16 (any_sint_to_fp GR64:$src)))))),
13367            (VCVTSI642SHZrr_Int VR128X:$dst, GR64:$src)>;
13368
13369  def : Pat<(v8f16 (X86Movsh
13370                     (v8f16 VR128X:$dst),
13371                     (v8f16 (scalar_to_vector (f16 (any_sint_to_fp (loadi64 addr:$src))))))),
13372            (VCVTSI642SHZrm_Int VR128X:$dst, addr:$src)>;
13373
13374  def : Pat<(v8f16 (X86Movsh
13375                     (v8f16 VR128X:$dst),
13376                     (v8f16 (scalar_to_vector (f16 (any_sint_to_fp GR32:$src)))))),
13377            (VCVTSI2SHZrr_Int VR128X:$dst, GR32:$src)>;
13378
13379  def : Pat<(v8f16 (X86Movsh
13380                     (v8f16 VR128X:$dst),
13381                     (v8f16 (scalar_to_vector (f16 (any_sint_to_fp (loadi32 addr:$src))))))),
13382            (VCVTSI2SHZrm_Int VR128X:$dst, addr:$src)>;
13383
13384  def : Pat<(v8f16 (X86Movsh
13385                     (v8f16 VR128X:$dst),
13386                     (v8f16 (scalar_to_vector (f16 (any_uint_to_fp GR64:$src)))))),
13387            (VCVTUSI642SHZrr_Int VR128X:$dst, GR64:$src)>;
13388
13389  def : Pat<(v8f16 (X86Movsh
13390                     (v8f16 VR128X:$dst),
13391                     (v8f16 (scalar_to_vector (f16 (any_uint_to_fp (loadi64 addr:$src))))))),
13392            (VCVTUSI642SHZrm_Int VR128X:$dst, addr:$src)>;
13393
13394  def : Pat<(v8f16 (X86Movsh
13395                     (v8f16 VR128X:$dst),
13396                     (v8f16 (scalar_to_vector (f16 (any_uint_to_fp GR32:$src)))))),
13397            (VCVTUSI2SHZrr_Int VR128X:$dst, GR32:$src)>;
13398
13399  def : Pat<(v8f16 (X86Movsh
13400                     (v8f16 VR128X:$dst),
13401                     (v8f16 (scalar_to_vector (f16 (any_uint_to_fp (loadi32 addr:$src))))))),
13402            (VCVTUSI2SHZrm_Int VR128X:$dst, addr:$src)>;
13403} // Predicates = [HasFP16]
13404
13405let Predicates = [HasFP16, HasVLX] in {
13406  // Special patterns to allow use of X86VMSintToFP for masking. Instruction
13407  // patterns have been disabled with null_frag.
13408  def : Pat<(v8f16 (X86any_VSintToFP (v4i64 VR256X:$src))),
13409            (VCVTQQ2PHZ256rr VR256X:$src)>;
13410  def : Pat<(X86VMSintToFP (v4i64 VR256X:$src), (v8f16 VR128X:$src0),
13411                           VK4WM:$mask),
13412            (VCVTQQ2PHZ256rrk VR128X:$src0, VK4WM:$mask, VR256X:$src)>;
13413  def : Pat<(X86VMSintToFP (v4i64 VR256X:$src), v8f16x_info.ImmAllZerosV,
13414                           VK4WM:$mask),
13415            (VCVTQQ2PHZ256rrkz VK4WM:$mask, VR256X:$src)>;
13416
13417  def : Pat<(v8f16 (X86any_VSintToFP (loadv4i64 addr:$src))),
13418            (VCVTQQ2PHZ256rm addr:$src)>;
13419  def : Pat<(X86VMSintToFP (loadv4i64 addr:$src), (v8f16 VR128X:$src0),
13420                           VK4WM:$mask),
13421            (VCVTQQ2PHZ256rmk VR128X:$src0, VK4WM:$mask, addr:$src)>;
13422  def : Pat<(X86VMSintToFP (loadv4i64 addr:$src), v8f16x_info.ImmAllZerosV,
13423                           VK4WM:$mask),
13424            (VCVTQQ2PHZ256rmkz VK4WM:$mask, addr:$src)>;
13425
13426  def : Pat<(v8f16 (X86any_VSintToFP (v4i64 (X86VBroadcastld64 addr:$src)))),
13427            (VCVTQQ2PHZ256rmb addr:$src)>;
13428  def : Pat<(X86VMSintToFP (v4i64 (X86VBroadcastld64 addr:$src)),
13429                           (v8f16 VR128X:$src0), VK4WM:$mask),
13430            (VCVTQQ2PHZ256rmbk VR128X:$src0, VK4WM:$mask, addr:$src)>;
13431  def : Pat<(X86VMSintToFP (v4i64 (X86VBroadcastld64 addr:$src)),
13432                           v8f16x_info.ImmAllZerosV, VK4WM:$mask),
13433            (VCVTQQ2PHZ256rmbkz VK4WM:$mask, addr:$src)>;
13434
13435  def : Pat<(v8f16 (X86any_VSintToFP (v2i64 VR128X:$src))),
13436            (VCVTQQ2PHZ128rr VR128X:$src)>;
13437  def : Pat<(X86VMSintToFP (v2i64 VR128X:$src), (v8f16 VR128X:$src0),
13438                           VK2WM:$mask),
13439            (VCVTQQ2PHZ128rrk VR128X:$src0, VK2WM:$mask, VR128X:$src)>;
13440  def : Pat<(X86VMSintToFP (v2i64 VR128X:$src), v8f16x_info.ImmAllZerosV,
13441                           VK2WM:$mask),
13442            (VCVTQQ2PHZ128rrkz VK2WM:$mask, VR128X:$src)>;
13443
13444  def : Pat<(v8f16 (X86any_VSintToFP (loadv2i64 addr:$src))),
13445            (VCVTQQ2PHZ128rm addr:$src)>;
13446  def : Pat<(X86VMSintToFP (loadv2i64 addr:$src), (v8f16 VR128X:$src0),
13447                           VK2WM:$mask),
13448            (VCVTQQ2PHZ128rmk VR128X:$src0, VK2WM:$mask, addr:$src)>;
13449  def : Pat<(X86VMSintToFP (loadv2i64 addr:$src), v8f16x_info.ImmAllZerosV,
13450                           VK2WM:$mask),
13451            (VCVTQQ2PHZ128rmkz VK2WM:$mask, addr:$src)>;
13452
13453  def : Pat<(v8f16 (X86any_VSintToFP (v2i64 (X86VBroadcastld64 addr:$src)))),
13454            (VCVTQQ2PHZ128rmb addr:$src)>;
13455  def : Pat<(X86VMSintToFP (v2i64 (X86VBroadcastld64 addr:$src)),
13456                           (v8f16 VR128X:$src0), VK2WM:$mask),
13457            (VCVTQQ2PHZ128rmbk VR128X:$src0, VK2WM:$mask, addr:$src)>;
13458  def : Pat<(X86VMSintToFP (v2i64 (X86VBroadcastld64 addr:$src)),
13459                           v8f16x_info.ImmAllZerosV, VK2WM:$mask),
13460            (VCVTQQ2PHZ128rmbkz VK2WM:$mask, addr:$src)>;
13461
13462  // Special patterns to allow use of X86VMUintToFP for masking. Instruction
13463  // patterns have been disabled with null_frag.
13464  def : Pat<(v8f16 (X86any_VUintToFP (v4i64 VR256X:$src))),
13465            (VCVTUQQ2PHZ256rr VR256X:$src)>;
13466  def : Pat<(X86VMUintToFP (v4i64 VR256X:$src), (v8f16 VR128X:$src0),
13467                           VK4WM:$mask),
13468            (VCVTUQQ2PHZ256rrk VR128X:$src0, VK4WM:$mask, VR256X:$src)>;
13469  def : Pat<(X86VMUintToFP (v4i64 VR256X:$src), v8f16x_info.ImmAllZerosV,
13470                           VK4WM:$mask),
13471            (VCVTUQQ2PHZ256rrkz VK4WM:$mask, VR256X:$src)>;
13472
13473  def : Pat<(v8f16 (X86any_VUintToFP (loadv4i64 addr:$src))),
13474            (VCVTUQQ2PHZ256rm addr:$src)>;
13475  def : Pat<(X86VMUintToFP (loadv4i64 addr:$src), (v8f16 VR128X:$src0),
13476                           VK4WM:$mask),
13477            (VCVTUQQ2PHZ256rmk VR128X:$src0, VK4WM:$mask, addr:$src)>;
13478  def : Pat<(X86VMUintToFP (loadv4i64 addr:$src), v8f16x_info.ImmAllZerosV,
13479                           VK4WM:$mask),
13480            (VCVTUQQ2PHZ256rmkz VK4WM:$mask, addr:$src)>;
13481
13482  def : Pat<(v8f16 (X86any_VUintToFP (v4i64 (X86VBroadcastld64 addr:$src)))),
13483            (VCVTUQQ2PHZ256rmb addr:$src)>;
13484  def : Pat<(X86VMUintToFP (v4i64 (X86VBroadcastld64 addr:$src)),
13485                           (v8f16 VR128X:$src0), VK4WM:$mask),
13486            (VCVTUQQ2PHZ256rmbk VR128X:$src0, VK4WM:$mask, addr:$src)>;
13487  def : Pat<(X86VMUintToFP (v4i64 (X86VBroadcastld64 addr:$src)),
13488                           v8f16x_info.ImmAllZerosV, VK4WM:$mask),
13489            (VCVTUQQ2PHZ256rmbkz VK4WM:$mask, addr:$src)>;
13490
13491  def : Pat<(v8f16 (X86any_VUintToFP (v2i64 VR128X:$src))),
13492            (VCVTUQQ2PHZ128rr VR128X:$src)>;
13493  def : Pat<(X86VMUintToFP (v2i64 VR128X:$src), (v8f16 VR128X:$src0),
13494                           VK2WM:$mask),
13495            (VCVTUQQ2PHZ128rrk VR128X:$src0, VK2WM:$mask, VR128X:$src)>;
13496  def : Pat<(X86VMUintToFP (v2i64 VR128X:$src), v8f16x_info.ImmAllZerosV,
13497                           VK2WM:$mask),
13498            (VCVTUQQ2PHZ128rrkz VK2WM:$mask, VR128X:$src)>;
13499
13500  def : Pat<(v8f16 (X86any_VUintToFP (loadv2i64 addr:$src))),
13501            (VCVTUQQ2PHZ128rm addr:$src)>;
13502  def : Pat<(X86VMUintToFP (loadv2i64 addr:$src), (v8f16 VR128X:$src0),
13503                           VK2WM:$mask),
13504            (VCVTUQQ2PHZ128rmk VR128X:$src0, VK2WM:$mask, addr:$src)>;
13505  def : Pat<(X86VMUintToFP (loadv2i64 addr:$src), v8f16x_info.ImmAllZerosV,
13506                           VK2WM:$mask),
13507            (VCVTUQQ2PHZ128rmkz VK2WM:$mask, addr:$src)>;
13508
13509  def : Pat<(v8f16 (X86any_VUintToFP (v2i64 (X86VBroadcastld64 addr:$src)))),
13510            (VCVTUQQ2PHZ128rmb addr:$src)>;
13511  def : Pat<(X86VMUintToFP (v2i64 (X86VBroadcastld64 addr:$src)),
13512                           (v8f16 VR128X:$src0), VK2WM:$mask),
13513            (VCVTUQQ2PHZ128rmbk VR128X:$src0, VK2WM:$mask, addr:$src)>;
13514  def : Pat<(X86VMUintToFP (v2i64 (X86VBroadcastld64 addr:$src)),
13515                           v8f16x_info.ImmAllZerosV, VK2WM:$mask),
13516            (VCVTUQQ2PHZ128rmbkz VK2WM:$mask, addr:$src)>;
13517}
13518
13519let Constraints = "@earlyclobber $dst, $src1 = $dst" in {
13520  multiclass avx512_cfmaop_rm<bits<8> opc, string OpcodeStr, SDNode OpNode, X86VectorVTInfo _, bit IsCommutable> {
13521    defm r: AVX512_maskable_3src<opc, MRMSrcReg, _, (outs _.RC:$dst),
13522            (ins _.RC:$src2, _.RC:$src3),
13523            OpcodeStr, "$src3, $src2", "$src2, $src3",
13524            (_.VT (OpNode _.RC:$src2, _.RC:$src3, _.RC:$src1)), IsCommutable>, EVEX_4V;
13525
13526    defm m: AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst),
13527            (ins _.RC:$src2, _.MemOp:$src3),
13528            OpcodeStr, "$src3, $src2", "$src2, $src3",
13529            (_.VT (OpNode _.RC:$src2, (_.LdFrag addr:$src3), _.RC:$src1))>, EVEX_4V;
13530
13531    defm mb: AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst),
13532            (ins _.RC:$src2, _.ScalarMemOp:$src3),
13533            OpcodeStr, !strconcat("${src3}", _.BroadcastStr,", $src2"), !strconcat("$src2, ${src3}", _.BroadcastStr),
13534            (_.VT (OpNode _.RC:$src2, (_.VT (_.BroadcastLdFrag addr:$src3)), _.RC:$src1))>, EVEX_B, EVEX_4V;
13535  }
13536} // Constraints = "@earlyclobber $dst, $src1 = $dst"
13537
13538multiclass avx512_cfmaop_round<bits<8> opc, string OpcodeStr, SDNode OpNode,
13539                                 X86VectorVTInfo _> {
13540  let Constraints = "@earlyclobber $dst, $src1 = $dst" in
13541  defm rb: AVX512_maskable_3src<opc, MRMSrcReg, _, (outs _.RC:$dst),
13542          (ins _.RC:$src2, _.RC:$src3, AVX512RC:$rc),
13543          OpcodeStr, "$rc, $src3, $src2", "$src2, $src3, $rc",
13544          (_.VT (OpNode _.RC:$src2, _.RC:$src3, _.RC:$src1, (i32 timm:$rc)))>,
13545          EVEX_4V, EVEX_B, EVEX_RC;
13546}
13547
13548
13549multiclass avx512_cfmaop_common<bits<8> opc, string OpcodeStr, SDNode OpNode, SDNode OpNodeRnd, bit IsCommutable> {
13550  let Predicates = [HasFP16] in {
13551    defm Z    : avx512_cfmaop_rm<opc, OpcodeStr, OpNode, v16f32_info, IsCommutable>,
13552                avx512_cfmaop_round<opc, OpcodeStr, OpNodeRnd, v16f32_info>,
13553                      EVEX_V512, Sched<[WriteFMAZ]>;
13554  }
13555  let Predicates = [HasVLX, HasFP16] in {
13556    defm Z256 : avx512_cfmaop_rm<opc, OpcodeStr, OpNode, v8f32x_info, IsCommutable>, EVEX_V256, Sched<[WriteFMAY]>;
13557    defm Z128 : avx512_cfmaop_rm<opc, OpcodeStr, OpNode, v4f32x_info, IsCommutable>, EVEX_V128, Sched<[WriteFMAX]>;
13558  }
13559}
13560
13561multiclass avx512_cfmulop_common<bits<8> opc, string OpcodeStr, SDNode OpNode,
13562                                 SDNode MaskOpNode, SDNode OpNodeRnd, bit IsCommutable> {
13563  let Predicates = [HasFP16] in {
13564    defm Z    : avx512_fp_packed<opc, OpcodeStr, OpNode, MaskOpNode, v16f32_info,
13565                                 WriteFMAZ, IsCommutable, IsCommutable, "", "@earlyclobber $dst", 0>,
13566                avx512_fp_round_packed<opc, OpcodeStr, OpNodeRnd, WriteFMAZ, v16f32_info,
13567                                       "", "@earlyclobber $dst">, EVEX_V512;
13568  }
13569  let Predicates = [HasVLX, HasFP16] in {
13570    defm Z256 : avx512_fp_packed<opc, OpcodeStr, OpNode, MaskOpNode, v8f32x_info,
13571                                 WriteFMAY, IsCommutable, IsCommutable, "", "@earlyclobber $dst", 0>, EVEX_V256;
13572    defm Z128 : avx512_fp_packed<opc, OpcodeStr, OpNode, MaskOpNode, v4f32x_info,
13573                                 WriteFMAX, IsCommutable, IsCommutable, "", "@earlyclobber $dst", 0>, EVEX_V128;
13574  }
13575}
13576
13577
13578let Uses = [MXCSR] in {
13579  defm VFMADDCPH  : avx512_cfmaop_common<0x56, "vfmaddcph", x86vfmaddc, x86vfmaddcRnd, 1>,
13580                                    T_MAP6XS, EVEX_CD8<32, CD8VF>;
13581  defm VFCMADDCPH : avx512_cfmaop_common<0x56, "vfcmaddcph", x86vfcmaddc, x86vfcmaddcRnd, 0>,
13582                                    T_MAP6XD, EVEX_CD8<32, CD8VF>;
13583
13584  defm VFMULCPH  : avx512_cfmulop_common<0xD6, "vfmulcph", x86vfmulc, x86vfmulc,
13585                                         x86vfmulcRnd, 1>, T_MAP6XS, EVEX_CD8<32, CD8VF>;
13586  defm VFCMULCPH : avx512_cfmulop_common<0xD6, "vfcmulcph", x86vfcmulc,
13587                                         x86vfcmulc, x86vfcmulcRnd, 0>, T_MAP6XD, EVEX_CD8<32, CD8VF>;
13588}
13589
13590
13591multiclass avx512_cfmaop_sh_common<bits<8> opc, string OpcodeStr, SDNode OpNode, SDNode OpNodeRnd,
13592                                   bit IsCommutable> {
13593  let Predicates = [HasFP16], Constraints = "@earlyclobber $dst, $src1 = $dst" in {
13594    defm r : AVX512_maskable_3src<opc, MRMSrcReg, v4f32x_info, (outs VR128X:$dst),
13595                        (ins VR128X:$src2, VR128X:$src3), OpcodeStr,
13596                        "$src3, $src2", "$src2, $src3",
13597                        (v4f32 (OpNode VR128X:$src2, VR128X:$src3, VR128X:$src1)), IsCommutable>,
13598                        Sched<[WriteFMAX]>;
13599    defm m : AVX512_maskable_3src<opc, MRMSrcMem, v4f32x_info, (outs VR128X:$dst),
13600                        (ins VR128X:$src2, ssmem:$src3), OpcodeStr,
13601                        "$src3, $src2", "$src2, $src3",
13602                        (v4f32 (OpNode VR128X:$src2, (sse_load_f32 addr:$src3), VR128X:$src1))>,
13603                        Sched<[WriteFMAX.Folded, WriteFMAX.ReadAfterFold]>;
13604    defm rb : AVX512_maskable_3src<opc,  MRMSrcReg, v4f32x_info, (outs VR128X:$dst),
13605                        (ins VR128X:$src2, VR128X:$src3, AVX512RC:$rc), OpcodeStr,
13606                        "$rc, $src3, $src2", "$src2, $src3, $rc",
13607                        (v4f32 (OpNodeRnd VR128X:$src2, VR128X:$src3, VR128X:$src1, (i32 timm:$rc)))>,
13608                        EVEX_B, EVEX_RC, Sched<[WriteFMAX]>;
13609  }
13610}
13611
13612multiclass avx512_cfmbinop_sh_common<bits<8> opc, string OpcodeStr, SDNode OpNode,
13613                                     SDNode OpNodeRnd, bit IsCommutable> {
13614  let Predicates = [HasFP16] in {
13615    defm rr : AVX512_maskable<opc, MRMSrcReg, f32x_info, (outs VR128X:$dst),
13616                        (ins VR128X:$src1, VR128X:$src2), OpcodeStr,
13617                        "$src2, $src1", "$src1, $src2",
13618                        (v4f32 (OpNode VR128X:$src1, VR128X:$src2)),
13619                        IsCommutable, IsCommutable, IsCommutable,
13620                        X86selects, "@earlyclobber $dst">, Sched<[WriteFMAX]>;
13621    defm rm : AVX512_maskable<opc, MRMSrcMem, f32x_info, (outs VR128X:$dst),
13622                        (ins VR128X:$src1, ssmem:$src2), OpcodeStr,
13623                        "$src2, $src1", "$src1, $src2",
13624                        (v4f32 (OpNode VR128X:$src1, (sse_load_f32 addr:$src2))),
13625                        0, 0, 0, X86selects, "@earlyclobber $dst">,
13626                        Sched<[WriteFMAX.Folded, WriteFMAX.ReadAfterFold]>;
13627    defm rrb : AVX512_maskable<opc, MRMSrcReg, f32x_info, (outs VR128X:$dst),
13628                        (ins VR128X:$src1, VR128X:$src2, AVX512RC:$rc), OpcodeStr,
13629                        "$rc, $src2, $src1", "$src1, $src2, $rc",
13630                        (OpNodeRnd (v4f32 VR128X:$src1), (v4f32 VR128X:$src2), (i32 timm:$rc)),
13631                        0, 0, 0, X86selects, "@earlyclobber $dst">,
13632                        EVEX_B, EVEX_RC, Sched<[WriteFMAX]>;
13633  }
13634}
13635
13636let Uses = [MXCSR] in {
13637  defm VFMADDCSHZ  : avx512_cfmaop_sh_common<0x57, "vfmaddcsh", x86vfmaddcSh, x86vfmaddcShRnd, 1>,
13638                                    T_MAP6XS, EVEX_CD8<32, CD8VT1>, EVEX_V128, EVEX_4V;
13639  defm VFCMADDCSHZ : avx512_cfmaop_sh_common<0x57, "vfcmaddcsh", x86vfcmaddcSh, x86vfcmaddcShRnd, 0>,
13640                                    T_MAP6XD, EVEX_CD8<32, CD8VT1>, EVEX_V128, EVEX_4V;
13641
13642  defm VFMULCSHZ  : avx512_cfmbinop_sh_common<0xD7, "vfmulcsh", x86vfmulcSh, x86vfmulcShRnd, 1>,
13643                                    T_MAP6XS, EVEX_CD8<32, CD8VT1>, EVEX_V128, VEX_LIG, EVEX_4V;
13644  defm VFCMULCSHZ : avx512_cfmbinop_sh_common<0xD7, "vfcmulcsh", x86vfcmulcSh, x86vfcmulcShRnd, 0>,
13645                                    T_MAP6XD, EVEX_CD8<32, CD8VT1>, EVEX_V128, VEX_LIG, EVEX_4V;
13646}
13647