xref: /freebsd/contrib/llvm-project/llvm/lib/Target/X86/X86InstrAVX512.td (revision fe6060f10f634930ff71b7c50291ddc610da2475)
1//===-- X86InstrAVX512.td - AVX512 Instruction Set ---------*- tablegen -*-===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file describes the X86 AVX512 instruction set, defining the
10// instructions, and properties of the instructions which are needed for code
11// generation, machine code emission, and analysis.
12//
13//===----------------------------------------------------------------------===//
14
15// Group template arguments that can be derived from the vector type (EltNum x
16// EltVT).  These are things like the register class for the writemask, etc.
17// The idea is to pass one of these as the template argument rather than the
18// individual arguments.
19// The template is also used for scalar types, in this case numelts is 1.
20class X86VectorVTInfo<int numelts, ValueType eltvt, RegisterClass rc,
21                      string suffix = ""> {
22  RegisterClass RC = rc;
23  ValueType EltVT = eltvt;
24  int NumElts = numelts;
25
26  // Corresponding mask register class.
27  RegisterClass KRC = !cast<RegisterClass>("VK" # NumElts);
28
29  // Corresponding mask register pair class.
30  RegisterOperand KRPC = !if (!gt(NumElts, 16), ?,
31                              !cast<RegisterOperand>("VK" # NumElts # "Pair"));
32
33  // Corresponding write-mask register class.
34  RegisterClass KRCWM = !cast<RegisterClass>("VK" # NumElts # "WM");
35
36  // The mask VT.
37  ValueType KVT = !cast<ValueType>("v" # NumElts # "i1");
38
39  // Suffix used in the instruction mnemonic.
40  string Suffix = suffix;
41
42  // VTName is a string name for vector VT. For vector types it will be
43  // v # NumElts # EltVT, so for vector of 8 elements of i32 it will be v8i32
44  // It is a little bit complex for scalar types, where NumElts = 1.
45  // In this case we build v4f32 or v2f64
46  string VTName = "v" # !if (!eq (NumElts, 1),
47                        !if (!eq (EltVT.Size, 32), 4,
48                        !if (!eq (EltVT.Size, 64), 2, NumElts)), NumElts) # EltVT;
49
50  // The vector VT.
51  ValueType VT = !cast<ValueType>(VTName);
52
53  string EltTypeName = !cast<string>(EltVT);
54  // Size of the element type in bits, e.g. 32 for v16i32.
55  string EltSizeName = !subst("i", "", !subst("f", "", EltTypeName));
56  int EltSize = EltVT.Size;
57
58  // "i" for integer types and "f" for floating-point types
59  string TypeVariantName = !subst(EltSizeName, "", EltTypeName);
60
61  // Size of RC in bits, e.g. 512 for VR512.
62  int Size = VT.Size;
63
64  // The corresponding memory operand, e.g. i512mem for VR512.
65  X86MemOperand MemOp = !cast<X86MemOperand>(TypeVariantName # Size # "mem");
66  X86MemOperand ScalarMemOp = !cast<X86MemOperand>(EltVT # "mem");
67  // FP scalar memory operand for intrinsics - ssmem/sdmem.
68  Operand IntScalarMemOp = !if (!eq (EltTypeName, "f32"), !cast<Operand>("ssmem"),
69                           !if (!eq (EltTypeName, "f64"), !cast<Operand>("sdmem"), ?));
70
71  // Load patterns
72  PatFrag LdFrag = !cast<PatFrag>("load" # VTName);
73
74  PatFrag AlignedLdFrag = !cast<PatFrag>("alignedload" # VTName);
75
76  PatFrag ScalarLdFrag = !cast<PatFrag>("load" # EltVT);
77  PatFrag BroadcastLdFrag = !cast<PatFrag>("X86VBroadcastld" # EltSizeName);
78
79  PatFrags ScalarIntMemFrags = !if (!eq (EltTypeName, "f32"),
80                                           !cast<PatFrags>("sse_load_f32"),
81                               !if (!eq (EltTypeName, "f64"),
82                                     !cast<PatFrags>("sse_load_f64"),
83                               ?));
84
85  // The string to specify embedded broadcast in assembly.
86  string BroadcastStr = "{1to" # NumElts # "}";
87
88  // 8-bit compressed displacement tuple/subvector format.  This is only
89  // defined for NumElts <= 8.
90  CD8VForm CD8TupleForm = !if (!eq (!srl(NumElts, 4), 0),
91                               !cast<CD8VForm>("CD8VT" # NumElts), ?);
92
93  SubRegIndex SubRegIdx = !if (!eq (Size, 128), sub_xmm,
94                          !if (!eq (Size, 256), sub_ymm, ?));
95
96  Domain ExeDomain = !if (!eq (EltTypeName, "f32"), SSEPackedSingle,
97                     !if (!eq (EltTypeName, "f64"), SSEPackedDouble,
98                     SSEPackedInt));
99
100  RegisterClass FRC = !if (!eq (EltTypeName, "f32"), FR32X, FR64X);
101
102  dag ImmAllZerosV = (VT immAllZerosV);
103
104  string ZSuffix = !if (!eq (Size, 128), "Z128",
105                   !if (!eq (Size, 256), "Z256", "Z"));
106}
107
108def v64i8_info  : X86VectorVTInfo<64,  i8, VR512, "b">;
109def v32i16_info : X86VectorVTInfo<32, i16, VR512, "w">;
110def v16i32_info : X86VectorVTInfo<16, i32, VR512, "d">;
111def v8i64_info  : X86VectorVTInfo<8,  i64, VR512, "q">;
112def v16f32_info : X86VectorVTInfo<16, f32, VR512, "ps">;
113def v8f64_info  : X86VectorVTInfo<8,  f64, VR512, "pd">;
114
115// "x" in v32i8x_info means RC = VR256X
116def v32i8x_info  : X86VectorVTInfo<32,  i8, VR256X, "b">;
117def v16i16x_info : X86VectorVTInfo<16, i16, VR256X, "w">;
118def v8i32x_info  : X86VectorVTInfo<8,  i32, VR256X, "d">;
119def v4i64x_info  : X86VectorVTInfo<4,  i64, VR256X, "q">;
120def v8f32x_info  : X86VectorVTInfo<8,  f32, VR256X, "ps">;
121def v4f64x_info  : X86VectorVTInfo<4,  f64, VR256X, "pd">;
122
123def v16i8x_info  : X86VectorVTInfo<16,  i8, VR128X, "b">;
124def v8i16x_info  : X86VectorVTInfo<8,  i16, VR128X, "w">;
125def v4i32x_info  : X86VectorVTInfo<4,  i32, VR128X, "d">;
126def v2i64x_info  : X86VectorVTInfo<2,  i64, VR128X, "q">;
127def v4f32x_info  : X86VectorVTInfo<4,  f32, VR128X, "ps">;
128def v2f64x_info  : X86VectorVTInfo<2,  f64, VR128X, "pd">;
129
130// We map scalar types to the smallest (128-bit) vector type
131// with the appropriate element type. This allows to use the same masking logic.
132def i32x_info    : X86VectorVTInfo<1,  i32, GR32, "si">;
133def i64x_info    : X86VectorVTInfo<1,  i64, GR64, "sq">;
134def f32x_info    : X86VectorVTInfo<1,  f32, VR128X, "ss">;
135def f64x_info    : X86VectorVTInfo<1,  f64, VR128X, "sd">;
136
137class AVX512VLVectorVTInfo<X86VectorVTInfo i512, X86VectorVTInfo i256,
138                           X86VectorVTInfo i128> {
139  X86VectorVTInfo info512 = i512;
140  X86VectorVTInfo info256 = i256;
141  X86VectorVTInfo info128 = i128;
142}
143
144def avx512vl_i8_info  : AVX512VLVectorVTInfo<v64i8_info, v32i8x_info,
145                                             v16i8x_info>;
146def avx512vl_i16_info : AVX512VLVectorVTInfo<v32i16_info, v16i16x_info,
147                                             v8i16x_info>;
148def avx512vl_i32_info : AVX512VLVectorVTInfo<v16i32_info, v8i32x_info,
149                                             v4i32x_info>;
150def avx512vl_i64_info : AVX512VLVectorVTInfo<v8i64_info, v4i64x_info,
151                                             v2i64x_info>;
152def avx512vl_f32_info : AVX512VLVectorVTInfo<v16f32_info, v8f32x_info,
153                                             v4f32x_info>;
154def avx512vl_f64_info : AVX512VLVectorVTInfo<v8f64_info, v4f64x_info,
155                                             v2f64x_info>;
156
157class X86KVectorVTInfo<RegisterClass _krc, RegisterClass _krcwm,
158                       ValueType _vt> {
159  RegisterClass KRC = _krc;
160  RegisterClass KRCWM = _krcwm;
161  ValueType KVT = _vt;
162}
163
164def v1i1_info : X86KVectorVTInfo<VK1, VK1WM, v1i1>;
165def v2i1_info : X86KVectorVTInfo<VK2, VK2WM, v2i1>;
166def v4i1_info : X86KVectorVTInfo<VK4, VK4WM, v4i1>;
167def v8i1_info : X86KVectorVTInfo<VK8, VK8WM, v8i1>;
168def v16i1_info : X86KVectorVTInfo<VK16, VK16WM, v16i1>;
169def v32i1_info : X86KVectorVTInfo<VK32, VK32WM, v32i1>;
170def v64i1_info : X86KVectorVTInfo<VK64, VK64WM, v64i1>;
171
172// Used for matching masked operations. Ensures the operation part only has a
173// single use.
174def vselect_mask : PatFrag<(ops node:$mask, node:$src1, node:$src2),
175                           (vselect node:$mask, node:$src1, node:$src2), [{
176  return isProfitableToFormMaskedOp(N);
177}]>;
178
179def X86selects_mask : PatFrag<(ops node:$mask, node:$src1, node:$src2),
180                              (X86selects node:$mask, node:$src1, node:$src2), [{
181  return isProfitableToFormMaskedOp(N);
182}]>;
183
184// This multiclass generates the masking variants from the non-masking
185// variant.  It only provides the assembly pieces for the masking variants.
186// It assumes custom ISel patterns for masking which can be provided as
187// template arguments.
188multiclass AVX512_maskable_custom<bits<8> O, Format F,
189                                  dag Outs,
190                                  dag Ins, dag MaskingIns, dag ZeroMaskingIns,
191                                  string OpcodeStr,
192                                  string AttSrcAsm, string IntelSrcAsm,
193                                  list<dag> Pattern,
194                                  list<dag> MaskingPattern,
195                                  list<dag> ZeroMaskingPattern,
196                                  string MaskingConstraint = "",
197                                  bit IsCommutable = 0,
198                                  bit IsKCommutable = 0,
199                                  bit IsKZCommutable = IsCommutable> {
200  let isCommutable = IsCommutable in
201    def NAME: AVX512<O, F, Outs, Ins,
202                       OpcodeStr#"\t{"#AttSrcAsm#", $dst|"#
203                                     "$dst, "#IntelSrcAsm#"}",
204                       Pattern>;
205
206  // Prefer over VMOV*rrk Pat<>
207  let isCommutable = IsKCommutable in
208    def NAME#k: AVX512<O, F, Outs, MaskingIns,
209                       OpcodeStr#"\t{"#AttSrcAsm#", $dst {${mask}}|"#
210                                     "$dst {${mask}}, "#IntelSrcAsm#"}",
211                       MaskingPattern>,
212              EVEX_K {
213      // In case of the 3src subclass this is overridden with a let.
214      string Constraints = MaskingConstraint;
215    }
216
217  // Zero mask does not add any restrictions to commute operands transformation.
218  // So, it is Ok to use IsCommutable instead of IsKCommutable.
219  let isCommutable = IsKZCommutable in // Prefer over VMOV*rrkz Pat<>
220    def NAME#kz: AVX512<O, F, Outs, ZeroMaskingIns,
221                       OpcodeStr#"\t{"#AttSrcAsm#", $dst {${mask}} {z}|"#
222                                     "$dst {${mask}} {z}, "#IntelSrcAsm#"}",
223                       ZeroMaskingPattern>,
224              EVEX_KZ;
225}
226
227
228// Common base class of AVX512_maskable and AVX512_maskable_3src.
229multiclass AVX512_maskable_common<bits<8> O, Format F, X86VectorVTInfo _,
230                                  dag Outs,
231                                  dag Ins, dag MaskingIns, dag ZeroMaskingIns,
232                                  string OpcodeStr,
233                                  string AttSrcAsm, string IntelSrcAsm,
234                                  dag RHS, dag MaskingRHS,
235                                  SDPatternOperator Select = vselect_mask,
236                                  string MaskingConstraint = "",
237                                  bit IsCommutable = 0,
238                                  bit IsKCommutable = 0,
239                                  bit IsKZCommutable = IsCommutable> :
240  AVX512_maskable_custom<O, F, Outs, Ins, MaskingIns, ZeroMaskingIns, OpcodeStr,
241                         AttSrcAsm, IntelSrcAsm,
242                         [(set _.RC:$dst, RHS)],
243                         [(set _.RC:$dst, MaskingRHS)],
244                         [(set _.RC:$dst,
245                               (Select _.KRCWM:$mask, RHS, _.ImmAllZerosV))],
246                         MaskingConstraint, IsCommutable,
247                         IsKCommutable, IsKZCommutable>;
248
249// This multiclass generates the unconditional/non-masking, the masking and
250// the zero-masking variant of the vector instruction.  In the masking case, the
251// preserved vector elements come from a new dummy input operand tied to $dst.
252// This version uses a separate dag for non-masking and masking.
253multiclass AVX512_maskable_split<bits<8> O, Format F, X86VectorVTInfo _,
254                           dag Outs, dag Ins, string OpcodeStr,
255                           string AttSrcAsm, string IntelSrcAsm,
256                           dag RHS, dag MaskRHS,
257                           bit IsCommutable = 0, bit IsKCommutable = 0,
258                           bit IsKZCommutable = IsCommutable> :
259   AVX512_maskable_custom<O, F, Outs, Ins,
260                          !con((ins _.RC:$src0, _.KRCWM:$mask), Ins),
261                          !con((ins _.KRCWM:$mask), Ins),
262                          OpcodeStr, AttSrcAsm, IntelSrcAsm,
263                          [(set _.RC:$dst, RHS)],
264                          [(set _.RC:$dst,
265                              (vselect_mask _.KRCWM:$mask, MaskRHS, _.RC:$src0))],
266                          [(set _.RC:$dst,
267                              (vselect_mask _.KRCWM:$mask, MaskRHS, _.ImmAllZerosV))],
268                          "$src0 = $dst", IsCommutable, IsKCommutable,
269                          IsKZCommutable>;
270
271// This multiclass generates the unconditional/non-masking, the masking and
272// the zero-masking variant of the vector instruction.  In the masking case, the
273// preserved vector elements come from a new dummy input operand tied to $dst.
274multiclass AVX512_maskable<bits<8> O, Format F, X86VectorVTInfo _,
275                           dag Outs, dag Ins, string OpcodeStr,
276                           string AttSrcAsm, string IntelSrcAsm,
277                           dag RHS,
278                           bit IsCommutable = 0, bit IsKCommutable = 0,
279                           bit IsKZCommutable = IsCommutable,
280                           SDPatternOperator Select = vselect_mask> :
281   AVX512_maskable_common<O, F, _, Outs, Ins,
282                          !con((ins _.RC:$src0, _.KRCWM:$mask), Ins),
283                          !con((ins _.KRCWM:$mask), Ins),
284                          OpcodeStr, AttSrcAsm, IntelSrcAsm, RHS,
285                          (Select _.KRCWM:$mask, RHS, _.RC:$src0),
286                          Select, "$src0 = $dst", IsCommutable, IsKCommutable,
287                          IsKZCommutable>;
288
289// This multiclass generates the unconditional/non-masking, the masking and
290// the zero-masking variant of the scalar instruction.
291multiclass AVX512_maskable_scalar<bits<8> O, Format F, X86VectorVTInfo _,
292                           dag Outs, dag Ins, string OpcodeStr,
293                           string AttSrcAsm, string IntelSrcAsm,
294                           dag RHS> :
295   AVX512_maskable<O, F, _, Outs, Ins, OpcodeStr, AttSrcAsm, IntelSrcAsm,
296                   RHS, 0, 0, 0, X86selects_mask>;
297
298// Similar to AVX512_maskable but in this case one of the source operands
299// ($src1) is already tied to $dst so we just use that for the preserved
300// vector elements.  NOTE that the NonTiedIns (the ins dag) should exclude
301// $src1.
302multiclass AVX512_maskable_3src<bits<8> O, Format F, X86VectorVTInfo _,
303                                dag Outs, dag NonTiedIns, string OpcodeStr,
304                                string AttSrcAsm, string IntelSrcAsm,
305                                dag RHS,
306                                bit IsCommutable = 0,
307                                bit IsKCommutable = 0,
308                                SDPatternOperator Select = vselect_mask,
309                                bit MaskOnly = 0> :
310   AVX512_maskable_common<O, F, _, Outs,
311                          !con((ins _.RC:$src1), NonTiedIns),
312                          !con((ins _.RC:$src1, _.KRCWM:$mask), NonTiedIns),
313                          !con((ins _.RC:$src1, _.KRCWM:$mask), NonTiedIns),
314                          OpcodeStr, AttSrcAsm, IntelSrcAsm,
315                          !if(MaskOnly, (null_frag), RHS),
316                          (Select _.KRCWM:$mask, RHS, _.RC:$src1),
317                          Select, "", IsCommutable, IsKCommutable>;
318
319// Similar to AVX512_maskable_3src but in this case the input VT for the tied
320// operand differs from the output VT. This requires a bitconvert on
321// the preserved vector going into the vselect.
322// NOTE: The unmasked pattern is disabled.
323multiclass AVX512_maskable_3src_cast<bits<8> O, Format F, X86VectorVTInfo OutVT,
324                                     X86VectorVTInfo InVT,
325                                     dag Outs, dag NonTiedIns, string OpcodeStr,
326                                     string AttSrcAsm, string IntelSrcAsm,
327                                     dag RHS, bit IsCommutable = 0> :
328   AVX512_maskable_common<O, F, OutVT, Outs,
329                          !con((ins InVT.RC:$src1), NonTiedIns),
330                          !con((ins InVT.RC:$src1, InVT.KRCWM:$mask), NonTiedIns),
331                          !con((ins InVT.RC:$src1, InVT.KRCWM:$mask), NonTiedIns),
332                          OpcodeStr, AttSrcAsm, IntelSrcAsm, (null_frag),
333                          (vselect_mask InVT.KRCWM:$mask, RHS,
334                           (bitconvert InVT.RC:$src1)),
335                           vselect_mask, "", IsCommutable>;
336
337multiclass AVX512_maskable_3src_scalar<bits<8> O, Format F, X86VectorVTInfo _,
338                                     dag Outs, dag NonTiedIns, string OpcodeStr,
339                                     string AttSrcAsm, string IntelSrcAsm,
340                                     dag RHS,
341                                     bit IsCommutable = 0,
342                                     bit IsKCommutable = 0,
343                                     bit MaskOnly = 0> :
344   AVX512_maskable_3src<O, F, _, Outs, NonTiedIns, OpcodeStr, AttSrcAsm,
345                        IntelSrcAsm, RHS, IsCommutable, IsKCommutable,
346                        X86selects_mask, MaskOnly>;
347
348multiclass AVX512_maskable_in_asm<bits<8> O, Format F, X86VectorVTInfo _,
349                                  dag Outs, dag Ins,
350                                  string OpcodeStr,
351                                  string AttSrcAsm, string IntelSrcAsm,
352                                  list<dag> Pattern> :
353   AVX512_maskable_custom<O, F, Outs, Ins,
354                          !con((ins _.RC:$src0, _.KRCWM:$mask), Ins),
355                          !con((ins _.KRCWM:$mask), Ins),
356                          OpcodeStr, AttSrcAsm, IntelSrcAsm, Pattern, [], [],
357                          "$src0 = $dst">;
358
359multiclass AVX512_maskable_3src_in_asm<bits<8> O, Format F, X86VectorVTInfo _,
360                                       dag Outs, dag NonTiedIns,
361                                       string OpcodeStr,
362                                       string AttSrcAsm, string IntelSrcAsm,
363                                       list<dag> Pattern> :
364   AVX512_maskable_custom<O, F, Outs,
365                          !con((ins _.RC:$src1), NonTiedIns),
366                          !con((ins _.RC:$src1, _.KRCWM:$mask), NonTiedIns),
367                          !con((ins _.RC:$src1, _.KRCWM:$mask), NonTiedIns),
368                          OpcodeStr, AttSrcAsm, IntelSrcAsm, Pattern, [], [],
369                          "">;
370
371// Instruction with mask that puts result in mask register,
372// like "compare" and "vptest"
373multiclass AVX512_maskable_custom_cmp<bits<8> O, Format F,
374                                  dag Outs,
375                                  dag Ins, dag MaskingIns,
376                                  string OpcodeStr,
377                                  string AttSrcAsm, string IntelSrcAsm,
378                                  list<dag> Pattern,
379                                  list<dag> MaskingPattern,
380                                  bit IsCommutable = 0> {
381    let isCommutable = IsCommutable in {
382    def NAME: AVX512<O, F, Outs, Ins,
383                       OpcodeStr#"\t{"#AttSrcAsm#", $dst|"#
384                                     "$dst, "#IntelSrcAsm#"}",
385                       Pattern>;
386
387    def NAME#k: AVX512<O, F, Outs, MaskingIns,
388                       OpcodeStr#"\t{"#AttSrcAsm#", $dst {${mask}}|"#
389                                     "$dst {${mask}}, "#IntelSrcAsm#"}",
390                       MaskingPattern>, EVEX_K;
391    }
392}
393
394multiclass AVX512_maskable_common_cmp<bits<8> O, Format F, X86VectorVTInfo _,
395                                  dag Outs,
396                                  dag Ins, dag MaskingIns,
397                                  string OpcodeStr,
398                                  string AttSrcAsm, string IntelSrcAsm,
399                                  dag RHS, dag MaskingRHS,
400                                  bit IsCommutable = 0> :
401  AVX512_maskable_custom_cmp<O, F, Outs, Ins, MaskingIns, OpcodeStr,
402                         AttSrcAsm, IntelSrcAsm,
403                         [(set _.KRC:$dst, RHS)],
404                         [(set _.KRC:$dst, MaskingRHS)], IsCommutable>;
405
406multiclass AVX512_maskable_cmp<bits<8> O, Format F, X86VectorVTInfo _,
407                           dag Outs, dag Ins, string OpcodeStr,
408                           string AttSrcAsm, string IntelSrcAsm,
409                           dag RHS, dag RHS_su, bit IsCommutable = 0> :
410   AVX512_maskable_common_cmp<O, F, _, Outs, Ins,
411                          !con((ins _.KRCWM:$mask), Ins),
412                          OpcodeStr, AttSrcAsm, IntelSrcAsm, RHS,
413                          (and _.KRCWM:$mask, RHS_su), IsCommutable>;
414
415// Used by conversion instructions.
416multiclass AVX512_maskable_cvt<bits<8> O, Format F, X86VectorVTInfo _,
417                                  dag Outs,
418                                  dag Ins, dag MaskingIns, dag ZeroMaskingIns,
419                                  string OpcodeStr,
420                                  string AttSrcAsm, string IntelSrcAsm,
421                                  dag RHS, dag MaskingRHS, dag ZeroMaskingRHS> :
422  AVX512_maskable_custom<O, F, Outs, Ins, MaskingIns, ZeroMaskingIns, OpcodeStr,
423                         AttSrcAsm, IntelSrcAsm,
424                         [(set _.RC:$dst, RHS)],
425                         [(set _.RC:$dst, MaskingRHS)],
426                         [(set _.RC:$dst, ZeroMaskingRHS)],
427                         "$src0 = $dst">;
428
429multiclass AVX512_maskable_fma<bits<8> O, Format F, X86VectorVTInfo _,
430                               dag Outs, dag NonTiedIns, string OpcodeStr,
431                               string AttSrcAsm, string IntelSrcAsm,
432                               dag RHS, dag MaskingRHS, bit IsCommutable,
433                               bit IsKCommutable> :
434   AVX512_maskable_custom<O, F, Outs,
435                          !con((ins _.RC:$src1), NonTiedIns),
436                          !con((ins _.RC:$src1, _.KRCWM:$mask), NonTiedIns),
437                          !con((ins _.RC:$src1, _.KRCWM:$mask), NonTiedIns),
438                          OpcodeStr, AttSrcAsm, IntelSrcAsm,
439                          [(set _.RC:$dst, RHS)],
440                          [(set _.RC:$dst,
441                            (vselect_mask _.KRCWM:$mask, MaskingRHS, _.RC:$src1))],
442                          [(set _.RC:$dst,
443                            (vselect_mask _.KRCWM:$mask, MaskingRHS, _.ImmAllZerosV))],
444                          "", IsCommutable, IsKCommutable>;
445
446// Alias instruction that maps zero vector to pxor / xorp* for AVX-512.
447// This is expanded by ExpandPostRAPseudos to an xorps / vxorps, and then
448// swizzled by ExecutionDomainFix to pxor.
449// We set canFoldAsLoad because this can be converted to a constant-pool
450// load of an all-zeros value if folding it would be beneficial.
451let isReMaterializable = 1, isAsCheapAsAMove = 1, canFoldAsLoad = 1,
452    isPseudo = 1, Predicates = [HasAVX512], SchedRW = [WriteZero] in {
453def AVX512_512_SET0 : I<0, Pseudo, (outs VR512:$dst), (ins), "",
454               [(set VR512:$dst, (v16i32 immAllZerosV))]>;
455def AVX512_512_SETALLONES : I<0, Pseudo, (outs VR512:$dst), (ins), "",
456               [(set VR512:$dst, (v16i32 immAllOnesV))]>;
457}
458
459let Predicates = [HasAVX512] in {
460def : Pat<(v64i8 immAllZerosV), (AVX512_512_SET0)>;
461def : Pat<(v32i16 immAllZerosV), (AVX512_512_SET0)>;
462def : Pat<(v8i64 immAllZerosV), (AVX512_512_SET0)>;
463def : Pat<(v16f32 immAllZerosV), (AVX512_512_SET0)>;
464def : Pat<(v8f64 immAllZerosV), (AVX512_512_SET0)>;
465}
466
467// Alias instructions that allow VPTERNLOG to be used with a mask to create
468// a mix of all ones and all zeros elements. This is done this way to force
469// the same register to be used as input for all three sources.
470let isPseudo = 1, Predicates = [HasAVX512], SchedRW = [WriteVecALU] in {
471def AVX512_512_SEXT_MASK_32 : I<0, Pseudo, (outs VR512:$dst),
472                                (ins VK16WM:$mask), "",
473                           [(set VR512:$dst, (vselect (v16i1 VK16WM:$mask),
474                                                      (v16i32 immAllOnesV),
475                                                      (v16i32 immAllZerosV)))]>;
476def AVX512_512_SEXT_MASK_64 : I<0, Pseudo, (outs VR512:$dst),
477                                (ins VK8WM:$mask), "",
478                [(set VR512:$dst, (vselect (v8i1 VK8WM:$mask),
479                                           (v8i64 immAllOnesV),
480                                           (v8i64 immAllZerosV)))]>;
481}
482
483let isReMaterializable = 1, isAsCheapAsAMove = 1, canFoldAsLoad = 1,
484    isPseudo = 1, Predicates = [HasAVX512], SchedRW = [WriteZero] in {
485def AVX512_128_SET0 : I<0, Pseudo, (outs VR128X:$dst), (ins), "",
486               [(set VR128X:$dst, (v4i32 immAllZerosV))]>;
487def AVX512_256_SET0 : I<0, Pseudo, (outs VR256X:$dst), (ins), "",
488               [(set VR256X:$dst, (v8i32 immAllZerosV))]>;
489}
490
491let Predicates = [HasAVX512] in {
492def : Pat<(v8i16 immAllZerosV), (AVX512_128_SET0)>;
493def : Pat<(v16i8 immAllZerosV), (AVX512_128_SET0)>;
494def : Pat<(v2i64 immAllZerosV), (AVX512_128_SET0)>;
495def : Pat<(v4f32 immAllZerosV), (AVX512_128_SET0)>;
496def : Pat<(v2f64 immAllZerosV), (AVX512_128_SET0)>;
497def : Pat<(v32i8 immAllZerosV), (AVX512_256_SET0)>;
498def : Pat<(v16i16 immAllZerosV), (AVX512_256_SET0)>;
499def : Pat<(v4i64 immAllZerosV), (AVX512_256_SET0)>;
500def : Pat<(v8f32 immAllZerosV), (AVX512_256_SET0)>;
501def : Pat<(v4f64 immAllZerosV), (AVX512_256_SET0)>;
502}
503
504// Alias instructions that map fld0 to xorps for sse or vxorps for avx.
505// This is expanded by ExpandPostRAPseudos.
506let isReMaterializable = 1, isAsCheapAsAMove = 1, canFoldAsLoad = 1,
507    isPseudo = 1, SchedRW = [WriteZero], Predicates = [HasAVX512] in {
508  def AVX512_FsFLD0SS : I<0, Pseudo, (outs FR32X:$dst), (ins), "",
509                          [(set FR32X:$dst, fp32imm0)]>;
510  def AVX512_FsFLD0SD : I<0, Pseudo, (outs FR64X:$dst), (ins), "",
511                          [(set FR64X:$dst, fp64imm0)]>;
512  def AVX512_FsFLD0F128 : I<0, Pseudo, (outs VR128X:$dst), (ins), "",
513                            [(set VR128X:$dst, fp128imm0)]>;
514}
515
516//===----------------------------------------------------------------------===//
517// AVX-512 - VECTOR INSERT
518//
519
520// Supports two different pattern operators for mask and unmasked ops. Allows
521// null_frag to be passed for one.
522multiclass vinsert_for_size_split<int Opcode, X86VectorVTInfo From,
523                                  X86VectorVTInfo To,
524                                  SDPatternOperator vinsert_insert,
525                                  SDPatternOperator vinsert_for_mask,
526                                  X86FoldableSchedWrite sched> {
527  let hasSideEffects = 0, ExeDomain = To.ExeDomain in {
528    defm rr : AVX512_maskable_split<Opcode, MRMSrcReg, To, (outs To.RC:$dst),
529                   (ins To.RC:$src1, From.RC:$src2, u8imm:$src3),
530                   "vinsert" # From.EltTypeName # "x" # From.NumElts,
531                   "$src3, $src2, $src1", "$src1, $src2, $src3",
532                   (vinsert_insert:$src3 (To.VT To.RC:$src1),
533                                         (From.VT From.RC:$src2),
534                                         (iPTR imm)),
535                   (vinsert_for_mask:$src3 (To.VT To.RC:$src1),
536                                           (From.VT From.RC:$src2),
537                                           (iPTR imm))>,
538                   AVX512AIi8Base, EVEX_4V, Sched<[sched]>;
539    let mayLoad = 1 in
540    defm rm : AVX512_maskable_split<Opcode, MRMSrcMem, To, (outs To.RC:$dst),
541                   (ins To.RC:$src1, From.MemOp:$src2, u8imm:$src3),
542                   "vinsert" # From.EltTypeName # "x" # From.NumElts,
543                   "$src3, $src2, $src1", "$src1, $src2, $src3",
544                   (vinsert_insert:$src3 (To.VT To.RC:$src1),
545                               (From.VT (From.LdFrag addr:$src2)),
546                               (iPTR imm)),
547                   (vinsert_for_mask:$src3 (To.VT To.RC:$src1),
548                               (From.VT (From.LdFrag addr:$src2)),
549                               (iPTR imm))>, AVX512AIi8Base, EVEX_4V,
550                   EVEX_CD8<From.EltSize, From.CD8TupleForm>,
551                   Sched<[sched.Folded, sched.ReadAfterFold]>;
552  }
553}
554
555// Passes the same pattern operator for masked and unmasked ops.
556multiclass vinsert_for_size<int Opcode, X86VectorVTInfo From,
557                            X86VectorVTInfo To,
558                            SDPatternOperator vinsert_insert,
559                            X86FoldableSchedWrite sched> :
560  vinsert_for_size_split<Opcode, From, To, vinsert_insert, vinsert_insert, sched>;
561
562multiclass vinsert_for_size_lowering<string InstrStr, X86VectorVTInfo From,
563                       X86VectorVTInfo To, PatFrag vinsert_insert,
564                       SDNodeXForm INSERT_get_vinsert_imm , list<Predicate> p> {
565  let Predicates = p in {
566    def : Pat<(vinsert_insert:$ins
567                     (To.VT To.RC:$src1), (From.VT From.RC:$src2), (iPTR imm)),
568              (To.VT (!cast<Instruction>(InstrStr#"rr")
569                     To.RC:$src1, From.RC:$src2,
570                     (INSERT_get_vinsert_imm To.RC:$ins)))>;
571
572    def : Pat<(vinsert_insert:$ins
573                  (To.VT To.RC:$src1),
574                  (From.VT (From.LdFrag addr:$src2)),
575                  (iPTR imm)),
576              (To.VT (!cast<Instruction>(InstrStr#"rm")
577                  To.RC:$src1, addr:$src2,
578                  (INSERT_get_vinsert_imm To.RC:$ins)))>;
579  }
580}
581
582multiclass vinsert_for_type<ValueType EltVT32, int Opcode128,
583                            ValueType EltVT64, int Opcode256,
584                            X86FoldableSchedWrite sched> {
585
586  let Predicates = [HasVLX] in
587    defm NAME # "32x4Z256" : vinsert_for_size<Opcode128,
588                                 X86VectorVTInfo< 4, EltVT32, VR128X>,
589                                 X86VectorVTInfo< 8, EltVT32, VR256X>,
590                                 vinsert128_insert, sched>, EVEX_V256;
591
592  defm NAME # "32x4Z" : vinsert_for_size<Opcode128,
593                                 X86VectorVTInfo< 4, EltVT32, VR128X>,
594                                 X86VectorVTInfo<16, EltVT32, VR512>,
595                                 vinsert128_insert, sched>, EVEX_V512;
596
597  defm NAME # "64x4Z" : vinsert_for_size<Opcode256,
598                                 X86VectorVTInfo< 4, EltVT64, VR256X>,
599                                 X86VectorVTInfo< 8, EltVT64, VR512>,
600                                 vinsert256_insert, sched>, VEX_W, EVEX_V512;
601
602  // Even with DQI we'd like to only use these instructions for masking.
603  let Predicates = [HasVLX, HasDQI] in
604    defm NAME # "64x2Z256" : vinsert_for_size_split<Opcode128,
605                                   X86VectorVTInfo< 2, EltVT64, VR128X>,
606                                   X86VectorVTInfo< 4, EltVT64, VR256X>,
607                                   null_frag, vinsert128_insert, sched>,
608                                   VEX_W1X, EVEX_V256;
609
610  // Even with DQI we'd like to only use these instructions for masking.
611  let Predicates = [HasDQI] in {
612    defm NAME # "64x2Z" : vinsert_for_size_split<Opcode128,
613                                 X86VectorVTInfo< 2, EltVT64, VR128X>,
614                                 X86VectorVTInfo< 8, EltVT64, VR512>,
615                                 null_frag, vinsert128_insert, sched>,
616                                 VEX_W, EVEX_V512;
617
618    defm NAME # "32x8Z" : vinsert_for_size_split<Opcode256,
619                                   X86VectorVTInfo< 8, EltVT32, VR256X>,
620                                   X86VectorVTInfo<16, EltVT32, VR512>,
621                                   null_frag, vinsert256_insert, sched>,
622                                   EVEX_V512;
623  }
624}
625
626// FIXME: Is there a better scheduler class for VINSERTF/VINSERTI?
627defm VINSERTF : vinsert_for_type<f32, 0x18, f64, 0x1a, WriteFShuffle256>;
628defm VINSERTI : vinsert_for_type<i32, 0x38, i64, 0x3a, WriteShuffle256>;
629
630// Codegen pattern with the alternative types,
631// Even with AVX512DQ we'll still use these for unmasked operations.
632defm : vinsert_for_size_lowering<"VINSERTF32x4Z256", v2f64x_info, v4f64x_info,
633              vinsert128_insert, INSERT_get_vinsert128_imm, [HasVLX]>;
634defm : vinsert_for_size_lowering<"VINSERTI32x4Z256", v2i64x_info, v4i64x_info,
635              vinsert128_insert, INSERT_get_vinsert128_imm, [HasVLX]>;
636
637defm : vinsert_for_size_lowering<"VINSERTF32x4Z", v2f64x_info, v8f64_info,
638              vinsert128_insert, INSERT_get_vinsert128_imm, [HasAVX512]>;
639defm : vinsert_for_size_lowering<"VINSERTI32x4Z", v2i64x_info, v8i64_info,
640              vinsert128_insert, INSERT_get_vinsert128_imm, [HasAVX512]>;
641
642defm : vinsert_for_size_lowering<"VINSERTF64x4Z", v8f32x_info, v16f32_info,
643              vinsert256_insert, INSERT_get_vinsert256_imm, [HasAVX512]>;
644defm : vinsert_for_size_lowering<"VINSERTI64x4Z", v8i32x_info, v16i32_info,
645              vinsert256_insert, INSERT_get_vinsert256_imm, [HasAVX512]>;
646
647// Codegen pattern with the alternative types insert VEC128 into VEC256
648defm : vinsert_for_size_lowering<"VINSERTI32x4Z256", v8i16x_info, v16i16x_info,
649              vinsert128_insert, INSERT_get_vinsert128_imm, [HasVLX]>;
650defm : vinsert_for_size_lowering<"VINSERTI32x4Z256", v16i8x_info, v32i8x_info,
651              vinsert128_insert, INSERT_get_vinsert128_imm, [HasVLX]>;
652// Codegen pattern with the alternative types insert VEC128 into VEC512
653defm : vinsert_for_size_lowering<"VINSERTI32x4Z", v8i16x_info, v32i16_info,
654              vinsert128_insert, INSERT_get_vinsert128_imm, [HasAVX512]>;
655defm : vinsert_for_size_lowering<"VINSERTI32x4Z", v16i8x_info, v64i8_info,
656               vinsert128_insert, INSERT_get_vinsert128_imm, [HasAVX512]>;
657// Codegen pattern with the alternative types insert VEC256 into VEC512
658defm : vinsert_for_size_lowering<"VINSERTI64x4Z", v16i16x_info, v32i16_info,
659              vinsert256_insert, INSERT_get_vinsert256_imm, [HasAVX512]>;
660defm : vinsert_for_size_lowering<"VINSERTI64x4Z", v32i8x_info, v64i8_info,
661              vinsert256_insert, INSERT_get_vinsert256_imm, [HasAVX512]>;
662
663
664multiclass vinsert_for_mask_cast<string InstrStr, X86VectorVTInfo From,
665                                 X86VectorVTInfo To, X86VectorVTInfo Cast,
666                                 PatFrag vinsert_insert,
667                                 SDNodeXForm INSERT_get_vinsert_imm,
668                                 list<Predicate> p> {
669let Predicates = p in {
670  def : Pat<(Cast.VT
671             (vselect_mask Cast.KRCWM:$mask,
672                           (bitconvert
673                            (vinsert_insert:$ins (To.VT To.RC:$src1),
674                                                 (From.VT From.RC:$src2),
675                                                 (iPTR imm))),
676                           Cast.RC:$src0)),
677            (!cast<Instruction>(InstrStr#"rrk")
678             Cast.RC:$src0, Cast.KRCWM:$mask, To.RC:$src1, From.RC:$src2,
679             (INSERT_get_vinsert_imm To.RC:$ins))>;
680  def : Pat<(Cast.VT
681             (vselect_mask Cast.KRCWM:$mask,
682                           (bitconvert
683                            (vinsert_insert:$ins (To.VT To.RC:$src1),
684                                                 (From.VT
685                                                  (bitconvert
686                                                   (From.LdFrag addr:$src2))),
687                                                 (iPTR imm))),
688                           Cast.RC:$src0)),
689            (!cast<Instruction>(InstrStr#"rmk")
690             Cast.RC:$src0, Cast.KRCWM:$mask, To.RC:$src1, addr:$src2,
691             (INSERT_get_vinsert_imm To.RC:$ins))>;
692
693  def : Pat<(Cast.VT
694             (vselect_mask Cast.KRCWM:$mask,
695                           (bitconvert
696                            (vinsert_insert:$ins (To.VT To.RC:$src1),
697                                                 (From.VT From.RC:$src2),
698                                                 (iPTR imm))),
699                           Cast.ImmAllZerosV)),
700            (!cast<Instruction>(InstrStr#"rrkz")
701             Cast.KRCWM:$mask, To.RC:$src1, From.RC:$src2,
702             (INSERT_get_vinsert_imm To.RC:$ins))>;
703  def : Pat<(Cast.VT
704             (vselect_mask Cast.KRCWM:$mask,
705                           (bitconvert
706                            (vinsert_insert:$ins (To.VT To.RC:$src1),
707                                                 (From.VT (From.LdFrag addr:$src2)),
708                                                 (iPTR imm))),
709                           Cast.ImmAllZerosV)),
710            (!cast<Instruction>(InstrStr#"rmkz")
711             Cast.KRCWM:$mask, To.RC:$src1, addr:$src2,
712             (INSERT_get_vinsert_imm To.RC:$ins))>;
713}
714}
715
716defm : vinsert_for_mask_cast<"VINSERTF32x4Z256", v2f64x_info, v4f64x_info,
717                             v8f32x_info, vinsert128_insert,
718                             INSERT_get_vinsert128_imm, [HasVLX]>;
719defm : vinsert_for_mask_cast<"VINSERTF64x2Z256", v4f32x_info, v8f32x_info,
720                             v4f64x_info, vinsert128_insert,
721                             INSERT_get_vinsert128_imm, [HasDQI, HasVLX]>;
722
723defm : vinsert_for_mask_cast<"VINSERTI32x4Z256", v2i64x_info, v4i64x_info,
724                             v8i32x_info, vinsert128_insert,
725                             INSERT_get_vinsert128_imm, [HasVLX]>;
726defm : vinsert_for_mask_cast<"VINSERTI32x4Z256", v8i16x_info, v16i16x_info,
727                             v8i32x_info, vinsert128_insert,
728                             INSERT_get_vinsert128_imm, [HasVLX]>;
729defm : vinsert_for_mask_cast<"VINSERTI32x4Z256", v16i8x_info, v32i8x_info,
730                             v8i32x_info, vinsert128_insert,
731                             INSERT_get_vinsert128_imm, [HasVLX]>;
732defm : vinsert_for_mask_cast<"VINSERTF64x2Z256", v4i32x_info, v8i32x_info,
733                             v4i64x_info, vinsert128_insert,
734                             INSERT_get_vinsert128_imm, [HasDQI, HasVLX]>;
735defm : vinsert_for_mask_cast<"VINSERTF64x2Z256", v8i16x_info, v16i16x_info,
736                             v4i64x_info, vinsert128_insert,
737                             INSERT_get_vinsert128_imm, [HasDQI, HasVLX]>;
738defm : vinsert_for_mask_cast<"VINSERTF64x2Z256", v16i8x_info, v32i8x_info,
739                             v4i64x_info, vinsert128_insert,
740                             INSERT_get_vinsert128_imm, [HasDQI, HasVLX]>;
741
742defm : vinsert_for_mask_cast<"VINSERTF32x4Z", v2f64x_info, v8f64_info,
743                             v16f32_info, vinsert128_insert,
744                             INSERT_get_vinsert128_imm, [HasAVX512]>;
745defm : vinsert_for_mask_cast<"VINSERTF64x2Z", v4f32x_info, v16f32_info,
746                             v8f64_info, vinsert128_insert,
747                             INSERT_get_vinsert128_imm, [HasDQI]>;
748
749defm : vinsert_for_mask_cast<"VINSERTI32x4Z", v2i64x_info, v8i64_info,
750                             v16i32_info, vinsert128_insert,
751                             INSERT_get_vinsert128_imm, [HasAVX512]>;
752defm : vinsert_for_mask_cast<"VINSERTI32x4Z", v8i16x_info, v32i16_info,
753                             v16i32_info, vinsert128_insert,
754                             INSERT_get_vinsert128_imm, [HasAVX512]>;
755defm : vinsert_for_mask_cast<"VINSERTI32x4Z", v16i8x_info, v64i8_info,
756                             v16i32_info, vinsert128_insert,
757                             INSERT_get_vinsert128_imm, [HasAVX512]>;
758defm : vinsert_for_mask_cast<"VINSERTI64x2Z", v4i32x_info, v16i32_info,
759                             v8i64_info, vinsert128_insert,
760                             INSERT_get_vinsert128_imm, [HasDQI]>;
761defm : vinsert_for_mask_cast<"VINSERTI64x2Z", v8i16x_info, v32i16_info,
762                             v8i64_info, vinsert128_insert,
763                             INSERT_get_vinsert128_imm, [HasDQI]>;
764defm : vinsert_for_mask_cast<"VINSERTI64x2Z", v16i8x_info, v64i8_info,
765                             v8i64_info, vinsert128_insert,
766                             INSERT_get_vinsert128_imm, [HasDQI]>;
767
768defm : vinsert_for_mask_cast<"VINSERTF32x8Z", v4f64x_info, v8f64_info,
769                             v16f32_info, vinsert256_insert,
770                             INSERT_get_vinsert256_imm, [HasDQI]>;
771defm : vinsert_for_mask_cast<"VINSERTF64x4Z", v8f32x_info, v16f32_info,
772                             v8f64_info, vinsert256_insert,
773                             INSERT_get_vinsert256_imm, [HasAVX512]>;
774
775defm : vinsert_for_mask_cast<"VINSERTI32x8Z", v4i64x_info, v8i64_info,
776                             v16i32_info, vinsert256_insert,
777                             INSERT_get_vinsert256_imm, [HasDQI]>;
778defm : vinsert_for_mask_cast<"VINSERTI32x8Z", v16i16x_info, v32i16_info,
779                             v16i32_info, vinsert256_insert,
780                             INSERT_get_vinsert256_imm, [HasDQI]>;
781defm : vinsert_for_mask_cast<"VINSERTI32x8Z", v32i8x_info, v64i8_info,
782                             v16i32_info, vinsert256_insert,
783                             INSERT_get_vinsert256_imm, [HasDQI]>;
784defm : vinsert_for_mask_cast<"VINSERTI64x4Z", v8i32x_info, v16i32_info,
785                             v8i64_info, vinsert256_insert,
786                             INSERT_get_vinsert256_imm, [HasAVX512]>;
787defm : vinsert_for_mask_cast<"VINSERTI64x4Z", v16i16x_info, v32i16_info,
788                             v8i64_info, vinsert256_insert,
789                             INSERT_get_vinsert256_imm, [HasAVX512]>;
790defm : vinsert_for_mask_cast<"VINSERTI64x4Z", v32i8x_info, v64i8_info,
791                             v8i64_info, vinsert256_insert,
792                             INSERT_get_vinsert256_imm, [HasAVX512]>;
793
794// vinsertps - insert f32 to XMM
795let ExeDomain = SSEPackedSingle in {
796let isCommutable = 1 in
797def VINSERTPSZrr : AVX512AIi8<0x21, MRMSrcReg, (outs VR128X:$dst),
798      (ins VR128X:$src1, VR128X:$src2, u8imm:$src3),
799      "vinsertps\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}",
800      [(set VR128X:$dst, (X86insertps VR128X:$src1, VR128X:$src2, timm:$src3))]>,
801      EVEX_4V, Sched<[SchedWriteFShuffle.XMM]>;
802def VINSERTPSZrm: AVX512AIi8<0x21, MRMSrcMem, (outs VR128X:$dst),
803      (ins VR128X:$src1, f32mem:$src2, u8imm:$src3),
804      "vinsertps\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}",
805      [(set VR128X:$dst, (X86insertps VR128X:$src1,
806                          (v4f32 (scalar_to_vector (loadf32 addr:$src2))),
807                          timm:$src3))]>,
808      EVEX_4V, EVEX_CD8<32, CD8VT1>,
809      Sched<[SchedWriteFShuffle.XMM.Folded, SchedWriteFShuffle.XMM.ReadAfterFold]>;
810}
811
812//===----------------------------------------------------------------------===//
813// AVX-512 VECTOR EXTRACT
814//---
815
816// Supports two different pattern operators for mask and unmasked ops. Allows
817// null_frag to be passed for one.
818multiclass vextract_for_size_split<int Opcode,
819                                   X86VectorVTInfo From, X86VectorVTInfo To,
820                                   SDPatternOperator vextract_extract,
821                                   SDPatternOperator vextract_for_mask,
822                                   SchedWrite SchedRR, SchedWrite SchedMR> {
823
824  let hasSideEffects = 0, ExeDomain = To.ExeDomain in {
825    defm rr : AVX512_maskable_split<Opcode, MRMDestReg, To, (outs To.RC:$dst),
826                (ins From.RC:$src1, u8imm:$idx),
827                "vextract" # To.EltTypeName # "x" # To.NumElts,
828                "$idx, $src1", "$src1, $idx",
829                (vextract_extract:$idx (From.VT From.RC:$src1), (iPTR imm)),
830                (vextract_for_mask:$idx (From.VT From.RC:$src1), (iPTR imm))>,
831                AVX512AIi8Base, EVEX, Sched<[SchedRR]>;
832
833    def mr  : AVX512AIi8<Opcode, MRMDestMem, (outs),
834                    (ins To.MemOp:$dst, From.RC:$src1, u8imm:$idx),
835                    "vextract" # To.EltTypeName # "x" # To.NumElts #
836                        "\t{$idx, $src1, $dst|$dst, $src1, $idx}",
837                    [(store (To.VT (vextract_extract:$idx
838                                    (From.VT From.RC:$src1), (iPTR imm))),
839                             addr:$dst)]>, EVEX,
840                    Sched<[SchedMR]>;
841
842    let mayStore = 1, hasSideEffects = 0 in
843    def mrk : AVX512AIi8<Opcode, MRMDestMem, (outs),
844                    (ins To.MemOp:$dst, To.KRCWM:$mask,
845                                        From.RC:$src1, u8imm:$idx),
846                     "vextract" # To.EltTypeName # "x" # To.NumElts #
847                          "\t{$idx, $src1, $dst {${mask}}|"
848                          "$dst {${mask}}, $src1, $idx}", []>,
849                    EVEX_K, EVEX, Sched<[SchedMR]>, NotMemoryFoldable;
850  }
851}
852
853// Passes the same pattern operator for masked and unmasked ops.
854multiclass vextract_for_size<int Opcode, X86VectorVTInfo From,
855                             X86VectorVTInfo To,
856                             SDPatternOperator vextract_extract,
857                             SchedWrite SchedRR, SchedWrite SchedMR> :
858  vextract_for_size_split<Opcode, From, To, vextract_extract, vextract_extract, SchedRR, SchedMR>;
859
860// Codegen pattern for the alternative types
861multiclass vextract_for_size_lowering<string InstrStr, X86VectorVTInfo From,
862                X86VectorVTInfo To, PatFrag vextract_extract,
863                SDNodeXForm EXTRACT_get_vextract_imm, list<Predicate> p> {
864  let Predicates = p in {
865     def : Pat<(vextract_extract:$ext (From.VT From.RC:$src1), (iPTR imm)),
866               (To.VT (!cast<Instruction>(InstrStr#"rr")
867                          From.RC:$src1,
868                          (EXTRACT_get_vextract_imm To.RC:$ext)))>;
869     def : Pat<(store (To.VT (vextract_extract:$ext (From.VT From.RC:$src1),
870                              (iPTR imm))), addr:$dst),
871               (!cast<Instruction>(InstrStr#"mr") addr:$dst, From.RC:$src1,
872                (EXTRACT_get_vextract_imm To.RC:$ext))>;
873  }
874}
875
876multiclass vextract_for_type<ValueType EltVT32, int Opcode128,
877                             ValueType EltVT64, int Opcode256,
878                             SchedWrite SchedRR, SchedWrite SchedMR> {
879  let Predicates = [HasAVX512] in {
880    defm NAME # "32x4Z" : vextract_for_size<Opcode128,
881                                   X86VectorVTInfo<16, EltVT32, VR512>,
882                                   X86VectorVTInfo< 4, EltVT32, VR128X>,
883                                   vextract128_extract, SchedRR, SchedMR>,
884                                       EVEX_V512, EVEX_CD8<32, CD8VT4>;
885    defm NAME # "64x4Z" : vextract_for_size<Opcode256,
886                                   X86VectorVTInfo< 8, EltVT64, VR512>,
887                                   X86VectorVTInfo< 4, EltVT64, VR256X>,
888                                   vextract256_extract, SchedRR, SchedMR>,
889                                       VEX_W, EVEX_V512, EVEX_CD8<64, CD8VT4>;
890  }
891  let Predicates = [HasVLX] in
892    defm NAME # "32x4Z256" : vextract_for_size<Opcode128,
893                                 X86VectorVTInfo< 8, EltVT32, VR256X>,
894                                 X86VectorVTInfo< 4, EltVT32, VR128X>,
895                                 vextract128_extract, SchedRR, SchedMR>,
896                                     EVEX_V256, EVEX_CD8<32, CD8VT4>;
897
898  // Even with DQI we'd like to only use these instructions for masking.
899  let Predicates = [HasVLX, HasDQI] in
900    defm NAME # "64x2Z256" : vextract_for_size_split<Opcode128,
901                                 X86VectorVTInfo< 4, EltVT64, VR256X>,
902                                 X86VectorVTInfo< 2, EltVT64, VR128X>,
903                                 null_frag, vextract128_extract, SchedRR, SchedMR>,
904                                     VEX_W1X, EVEX_V256, EVEX_CD8<64, CD8VT2>;
905
906  // Even with DQI we'd like to only use these instructions for masking.
907  let Predicates = [HasDQI] in {
908    defm NAME # "64x2Z" : vextract_for_size_split<Opcode128,
909                                 X86VectorVTInfo< 8, EltVT64, VR512>,
910                                 X86VectorVTInfo< 2, EltVT64, VR128X>,
911                                 null_frag, vextract128_extract, SchedRR, SchedMR>,
912                                     VEX_W, EVEX_V512, EVEX_CD8<64, CD8VT2>;
913    defm NAME # "32x8Z" : vextract_for_size_split<Opcode256,
914                                 X86VectorVTInfo<16, EltVT32, VR512>,
915                                 X86VectorVTInfo< 8, EltVT32, VR256X>,
916                                 null_frag, vextract256_extract, SchedRR, SchedMR>,
917                                     EVEX_V512, EVEX_CD8<32, CD8VT8>;
918  }
919}
920
921// TODO - replace WriteFStore/WriteVecStore with X86SchedWriteMoveLSWidths types.
922defm VEXTRACTF : vextract_for_type<f32, 0x19, f64, 0x1b, WriteFShuffle256, WriteFStore>;
923defm VEXTRACTI : vextract_for_type<i32, 0x39, i64, 0x3b, WriteShuffle256, WriteVecStore>;
924
925// extract_subvector codegen patterns with the alternative types.
926// Even with AVX512DQ we'll still use these for unmasked operations.
927defm : vextract_for_size_lowering<"VEXTRACTF32x4Z", v8f64_info, v2f64x_info,
928          vextract128_extract, EXTRACT_get_vextract128_imm, [HasAVX512]>;
929defm : vextract_for_size_lowering<"VEXTRACTI32x4Z", v8i64_info, v2i64x_info,
930          vextract128_extract, EXTRACT_get_vextract128_imm, [HasAVX512]>;
931
932defm : vextract_for_size_lowering<"VEXTRACTF64x4Z", v16f32_info, v8f32x_info,
933          vextract256_extract, EXTRACT_get_vextract256_imm, [HasAVX512]>;
934defm : vextract_for_size_lowering<"VEXTRACTI64x4Z", v16i32_info, v8i32x_info,
935          vextract256_extract, EXTRACT_get_vextract256_imm, [HasAVX512]>;
936
937defm : vextract_for_size_lowering<"VEXTRACTF32x4Z256", v4f64x_info, v2f64x_info,
938          vextract128_extract, EXTRACT_get_vextract128_imm, [HasVLX]>;
939defm : vextract_for_size_lowering<"VEXTRACTI32x4Z256", v4i64x_info, v2i64x_info,
940          vextract128_extract, EXTRACT_get_vextract128_imm, [HasVLX]>;
941
942// Codegen pattern with the alternative types extract VEC128 from VEC256
943defm : vextract_for_size_lowering<"VEXTRACTI32x4Z256", v16i16x_info, v8i16x_info,
944          vextract128_extract, EXTRACT_get_vextract128_imm, [HasVLX]>;
945defm : vextract_for_size_lowering<"VEXTRACTI32x4Z256", v32i8x_info, v16i8x_info,
946          vextract128_extract, EXTRACT_get_vextract128_imm, [HasVLX]>;
947
948// Codegen pattern with the alternative types extract VEC128 from VEC512
949defm : vextract_for_size_lowering<"VEXTRACTI32x4Z", v32i16_info, v8i16x_info,
950                 vextract128_extract, EXTRACT_get_vextract128_imm, [HasAVX512]>;
951defm : vextract_for_size_lowering<"VEXTRACTI32x4Z", v64i8_info, v16i8x_info,
952                 vextract128_extract, EXTRACT_get_vextract128_imm, [HasAVX512]>;
953// Codegen pattern with the alternative types extract VEC256 from VEC512
954defm : vextract_for_size_lowering<"VEXTRACTI64x4Z", v32i16_info, v16i16x_info,
955                 vextract256_extract, EXTRACT_get_vextract256_imm, [HasAVX512]>;
956defm : vextract_for_size_lowering<"VEXTRACTI64x4Z", v64i8_info, v32i8x_info,
957                 vextract256_extract, EXTRACT_get_vextract256_imm, [HasAVX512]>;
958
959
960// A 128-bit extract from bits [255:128] of a 512-bit vector should use a
961// smaller extract to enable EVEX->VEX.
962let Predicates = [NoVLX] in {
963def : Pat<(v2i64 (extract_subvector (v8i64 VR512:$src), (iPTR 2))),
964          (v2i64 (VEXTRACTI128rr
965                  (v4i64 (EXTRACT_SUBREG (v8i64 VR512:$src), sub_ymm)),
966                  (iPTR 1)))>;
967def : Pat<(v2f64 (extract_subvector (v8f64 VR512:$src), (iPTR 2))),
968          (v2f64 (VEXTRACTF128rr
969                  (v4f64 (EXTRACT_SUBREG (v8f64 VR512:$src), sub_ymm)),
970                  (iPTR 1)))>;
971def : Pat<(v4i32 (extract_subvector (v16i32 VR512:$src), (iPTR 4))),
972          (v4i32 (VEXTRACTI128rr
973                  (v8i32 (EXTRACT_SUBREG (v16i32 VR512:$src), sub_ymm)),
974                  (iPTR 1)))>;
975def : Pat<(v4f32 (extract_subvector (v16f32 VR512:$src), (iPTR 4))),
976          (v4f32 (VEXTRACTF128rr
977                  (v8f32 (EXTRACT_SUBREG (v16f32 VR512:$src), sub_ymm)),
978                  (iPTR 1)))>;
979def : Pat<(v8i16 (extract_subvector (v32i16 VR512:$src), (iPTR 8))),
980          (v8i16 (VEXTRACTI128rr
981                  (v16i16 (EXTRACT_SUBREG (v32i16 VR512:$src), sub_ymm)),
982                  (iPTR 1)))>;
983def : Pat<(v16i8 (extract_subvector (v64i8 VR512:$src), (iPTR 16))),
984          (v16i8 (VEXTRACTI128rr
985                  (v32i8 (EXTRACT_SUBREG (v64i8 VR512:$src), sub_ymm)),
986                  (iPTR 1)))>;
987}
988
989// A 128-bit extract from bits [255:128] of a 512-bit vector should use a
990// smaller extract to enable EVEX->VEX.
991let Predicates = [HasVLX] in {
992def : Pat<(v2i64 (extract_subvector (v8i64 VR512:$src), (iPTR 2))),
993          (v2i64 (VEXTRACTI32x4Z256rr
994                  (v4i64 (EXTRACT_SUBREG (v8i64 VR512:$src), sub_ymm)),
995                  (iPTR 1)))>;
996def : Pat<(v2f64 (extract_subvector (v8f64 VR512:$src), (iPTR 2))),
997          (v2f64 (VEXTRACTF32x4Z256rr
998                  (v4f64 (EXTRACT_SUBREG (v8f64 VR512:$src), sub_ymm)),
999                  (iPTR 1)))>;
1000def : Pat<(v4i32 (extract_subvector (v16i32 VR512:$src), (iPTR 4))),
1001          (v4i32 (VEXTRACTI32x4Z256rr
1002                  (v8i32 (EXTRACT_SUBREG (v16i32 VR512:$src), sub_ymm)),
1003                  (iPTR 1)))>;
1004def : Pat<(v4f32 (extract_subvector (v16f32 VR512:$src), (iPTR 4))),
1005          (v4f32 (VEXTRACTF32x4Z256rr
1006                  (v8f32 (EXTRACT_SUBREG (v16f32 VR512:$src), sub_ymm)),
1007                  (iPTR 1)))>;
1008def : Pat<(v8i16 (extract_subvector (v32i16 VR512:$src), (iPTR 8))),
1009          (v8i16 (VEXTRACTI32x4Z256rr
1010                  (v16i16 (EXTRACT_SUBREG (v32i16 VR512:$src), sub_ymm)),
1011                  (iPTR 1)))>;
1012def : Pat<(v16i8 (extract_subvector (v64i8 VR512:$src), (iPTR 16))),
1013          (v16i8 (VEXTRACTI32x4Z256rr
1014                  (v32i8 (EXTRACT_SUBREG (v64i8 VR512:$src), sub_ymm)),
1015                  (iPTR 1)))>;
1016}
1017
1018
1019// Additional patterns for handling a bitcast between the vselect and the
1020// extract_subvector.
1021multiclass vextract_for_mask_cast<string InstrStr, X86VectorVTInfo From,
1022                                  X86VectorVTInfo To, X86VectorVTInfo Cast,
1023                                  PatFrag vextract_extract,
1024                                  SDNodeXForm EXTRACT_get_vextract_imm,
1025                                  list<Predicate> p> {
1026let Predicates = p in {
1027  def : Pat<(Cast.VT (vselect_mask Cast.KRCWM:$mask,
1028                                   (bitconvert
1029                                    (To.VT (vextract_extract:$ext
1030                                            (From.VT From.RC:$src), (iPTR imm)))),
1031                                   To.RC:$src0)),
1032            (Cast.VT (!cast<Instruction>(InstrStr#"rrk")
1033                      Cast.RC:$src0, Cast.KRCWM:$mask, From.RC:$src,
1034                      (EXTRACT_get_vextract_imm To.RC:$ext)))>;
1035
1036  def : Pat<(Cast.VT (vselect_mask Cast.KRCWM:$mask,
1037                                   (bitconvert
1038                                    (To.VT (vextract_extract:$ext
1039                                            (From.VT From.RC:$src), (iPTR imm)))),
1040                                   Cast.ImmAllZerosV)),
1041            (Cast.VT (!cast<Instruction>(InstrStr#"rrkz")
1042                      Cast.KRCWM:$mask, From.RC:$src,
1043                      (EXTRACT_get_vextract_imm To.RC:$ext)))>;
1044}
1045}
1046
1047defm : vextract_for_mask_cast<"VEXTRACTF32x4Z256", v4f64x_info, v2f64x_info,
1048                              v4f32x_info, vextract128_extract,
1049                              EXTRACT_get_vextract128_imm, [HasVLX]>;
1050defm : vextract_for_mask_cast<"VEXTRACTF64x2Z256", v8f32x_info, v4f32x_info,
1051                              v2f64x_info, vextract128_extract,
1052                              EXTRACT_get_vextract128_imm, [HasDQI, HasVLX]>;
1053
1054defm : vextract_for_mask_cast<"VEXTRACTI32x4Z256", v4i64x_info, v2i64x_info,
1055                              v4i32x_info, vextract128_extract,
1056                              EXTRACT_get_vextract128_imm, [HasVLX]>;
1057defm : vextract_for_mask_cast<"VEXTRACTI32x4Z256", v16i16x_info, v8i16x_info,
1058                              v4i32x_info, vextract128_extract,
1059                              EXTRACT_get_vextract128_imm, [HasVLX]>;
1060defm : vextract_for_mask_cast<"VEXTRACTI32x4Z256", v32i8x_info, v16i8x_info,
1061                              v4i32x_info, vextract128_extract,
1062                              EXTRACT_get_vextract128_imm, [HasVLX]>;
1063defm : vextract_for_mask_cast<"VEXTRACTI64x2Z256", v8i32x_info, v4i32x_info,
1064                              v2i64x_info, vextract128_extract,
1065                              EXTRACT_get_vextract128_imm, [HasDQI, HasVLX]>;
1066defm : vextract_for_mask_cast<"VEXTRACTI64x2Z256", v16i16x_info, v8i16x_info,
1067                              v2i64x_info, vextract128_extract,
1068                              EXTRACT_get_vextract128_imm, [HasDQI, HasVLX]>;
1069defm : vextract_for_mask_cast<"VEXTRACTI64x2Z256", v32i8x_info, v16i8x_info,
1070                              v2i64x_info, vextract128_extract,
1071                              EXTRACT_get_vextract128_imm, [HasDQI, HasVLX]>;
1072
1073defm : vextract_for_mask_cast<"VEXTRACTF32x4Z", v8f64_info, v2f64x_info,
1074                              v4f32x_info, vextract128_extract,
1075                              EXTRACT_get_vextract128_imm, [HasAVX512]>;
1076defm : vextract_for_mask_cast<"VEXTRACTF64x2Z", v16f32_info, v4f32x_info,
1077                              v2f64x_info, vextract128_extract,
1078                              EXTRACT_get_vextract128_imm, [HasDQI]>;
1079
1080defm : vextract_for_mask_cast<"VEXTRACTI32x4Z", v8i64_info, v2i64x_info,
1081                              v4i32x_info, vextract128_extract,
1082                              EXTRACT_get_vextract128_imm, [HasAVX512]>;
1083defm : vextract_for_mask_cast<"VEXTRACTI32x4Z", v32i16_info, v8i16x_info,
1084                              v4i32x_info, vextract128_extract,
1085                              EXTRACT_get_vextract128_imm, [HasAVX512]>;
1086defm : vextract_for_mask_cast<"VEXTRACTI32x4Z", v64i8_info, v16i8x_info,
1087                              v4i32x_info, vextract128_extract,
1088                              EXTRACT_get_vextract128_imm, [HasAVX512]>;
1089defm : vextract_for_mask_cast<"VEXTRACTI64x2Z", v16i32_info, v4i32x_info,
1090                              v2i64x_info, vextract128_extract,
1091                              EXTRACT_get_vextract128_imm, [HasDQI]>;
1092defm : vextract_for_mask_cast<"VEXTRACTI64x2Z", v32i16_info, v8i16x_info,
1093                              v2i64x_info, vextract128_extract,
1094                              EXTRACT_get_vextract128_imm, [HasDQI]>;
1095defm : vextract_for_mask_cast<"VEXTRACTI64x2Z", v64i8_info, v16i8x_info,
1096                              v2i64x_info, vextract128_extract,
1097                              EXTRACT_get_vextract128_imm, [HasDQI]>;
1098
1099defm : vextract_for_mask_cast<"VEXTRACTF32x8Z", v8f64_info, v4f64x_info,
1100                              v8f32x_info, vextract256_extract,
1101                              EXTRACT_get_vextract256_imm, [HasDQI]>;
1102defm : vextract_for_mask_cast<"VEXTRACTF64x4Z", v16f32_info, v8f32x_info,
1103                              v4f64x_info, vextract256_extract,
1104                              EXTRACT_get_vextract256_imm, [HasAVX512]>;
1105
1106defm : vextract_for_mask_cast<"VEXTRACTI32x8Z", v8i64_info, v4i64x_info,
1107                              v8i32x_info, vextract256_extract,
1108                              EXTRACT_get_vextract256_imm, [HasDQI]>;
1109defm : vextract_for_mask_cast<"VEXTRACTI32x8Z", v32i16_info, v16i16x_info,
1110                              v8i32x_info, vextract256_extract,
1111                              EXTRACT_get_vextract256_imm, [HasDQI]>;
1112defm : vextract_for_mask_cast<"VEXTRACTI32x8Z", v64i8_info, v32i8x_info,
1113                              v8i32x_info, vextract256_extract,
1114                              EXTRACT_get_vextract256_imm, [HasDQI]>;
1115defm : vextract_for_mask_cast<"VEXTRACTI64x4Z", v16i32_info, v8i32x_info,
1116                              v4i64x_info, vextract256_extract,
1117                              EXTRACT_get_vextract256_imm, [HasAVX512]>;
1118defm : vextract_for_mask_cast<"VEXTRACTI64x4Z", v32i16_info, v16i16x_info,
1119                              v4i64x_info, vextract256_extract,
1120                              EXTRACT_get_vextract256_imm, [HasAVX512]>;
1121defm : vextract_for_mask_cast<"VEXTRACTI64x4Z", v64i8_info, v32i8x_info,
1122                              v4i64x_info, vextract256_extract,
1123                              EXTRACT_get_vextract256_imm, [HasAVX512]>;
1124
1125// vextractps - extract 32 bits from XMM
1126def VEXTRACTPSZrr : AVX512AIi8<0x17, MRMDestReg, (outs GR32orGR64:$dst),
1127      (ins VR128X:$src1, u8imm:$src2),
1128      "vextractps\t{$src2, $src1, $dst|$dst, $src1, $src2}",
1129      [(set GR32orGR64:$dst, (extractelt (bc_v4i32 (v4f32 VR128X:$src1)), imm:$src2))]>,
1130      EVEX, VEX_WIG, Sched<[WriteVecExtract]>;
1131
1132def VEXTRACTPSZmr : AVX512AIi8<0x17, MRMDestMem, (outs),
1133      (ins f32mem:$dst, VR128X:$src1, u8imm:$src2),
1134      "vextractps\t{$src2, $src1, $dst|$dst, $src1, $src2}",
1135      [(store (extractelt (bc_v4i32 (v4f32 VR128X:$src1)), imm:$src2),
1136                          addr:$dst)]>,
1137      EVEX, VEX_WIG, EVEX_CD8<32, CD8VT1>, Sched<[WriteVecExtractSt]>;
1138
1139//===---------------------------------------------------------------------===//
1140// AVX-512 BROADCAST
1141//---
1142// broadcast with a scalar argument.
1143multiclass avx512_broadcast_scalar<bits<8> opc, string OpcodeStr,
1144                            string Name,
1145                            X86VectorVTInfo DestInfo, X86VectorVTInfo SrcInfo> {
1146  def : Pat<(DestInfo.VT (X86VBroadcast SrcInfo.FRC:$src)),
1147            (!cast<Instruction>(Name#DestInfo.ZSuffix#rr)
1148             (SrcInfo.VT (COPY_TO_REGCLASS SrcInfo.FRC:$src, SrcInfo.RC)))>;
1149  def : Pat<(DestInfo.VT (vselect_mask DestInfo.KRCWM:$mask,
1150                                       (X86VBroadcast SrcInfo.FRC:$src),
1151                                       DestInfo.RC:$src0)),
1152            (!cast<Instruction>(Name#DestInfo.ZSuffix#rrk)
1153             DestInfo.RC:$src0, DestInfo.KRCWM:$mask,
1154             (SrcInfo.VT (COPY_TO_REGCLASS SrcInfo.FRC:$src, SrcInfo.RC)))>;
1155  def : Pat<(DestInfo.VT (vselect_mask DestInfo.KRCWM:$mask,
1156                                       (X86VBroadcast SrcInfo.FRC:$src),
1157                                       DestInfo.ImmAllZerosV)),
1158            (!cast<Instruction>(Name#DestInfo.ZSuffix#rrkz)
1159             DestInfo.KRCWM:$mask, (SrcInfo.VT (COPY_TO_REGCLASS SrcInfo.FRC:$src, SrcInfo.RC)))>;
1160}
1161
1162// Split version to allow mask and broadcast node to be different types. This
1163// helps support the 32x2 broadcasts.
1164multiclass avx512_broadcast_rm_split<bits<8> opc, string OpcodeStr,
1165                                     string Name,
1166                                     SchedWrite SchedRR, SchedWrite SchedRM,
1167                                     X86VectorVTInfo MaskInfo,
1168                                     X86VectorVTInfo DestInfo,
1169                                     X86VectorVTInfo SrcInfo,
1170                                     bit IsConvertibleToThreeAddress,
1171                                     SDPatternOperator UnmaskedOp = X86VBroadcast,
1172                                     SDPatternOperator UnmaskedBcastOp = SrcInfo.BroadcastLdFrag> {
1173  let hasSideEffects = 0 in
1174  def rr : AVX512PI<opc, MRMSrcReg, (outs MaskInfo.RC:$dst), (ins SrcInfo.RC:$src),
1175                    !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
1176                    [(set MaskInfo.RC:$dst,
1177                      (MaskInfo.VT
1178                       (bitconvert
1179                        (DestInfo.VT
1180                         (UnmaskedOp (SrcInfo.VT SrcInfo.RC:$src))))))],
1181                    DestInfo.ExeDomain>, T8PD, EVEX, Sched<[SchedRR]>;
1182  def rrkz : AVX512PI<opc, MRMSrcReg, (outs MaskInfo.RC:$dst),
1183                      (ins MaskInfo.KRCWM:$mask, SrcInfo.RC:$src),
1184                      !strconcat(OpcodeStr, "\t{$src, ${dst} {${mask}} {z}|",
1185                       "${dst} {${mask}} {z}, $src}"),
1186                       [(set MaskInfo.RC:$dst,
1187                         (vselect_mask MaskInfo.KRCWM:$mask,
1188                          (MaskInfo.VT
1189                           (bitconvert
1190                            (DestInfo.VT
1191                             (X86VBroadcast (SrcInfo.VT SrcInfo.RC:$src))))),
1192                          MaskInfo.ImmAllZerosV))],
1193                       DestInfo.ExeDomain>, T8PD, EVEX, EVEX_KZ, Sched<[SchedRR]>;
1194  let Constraints = "$src0 = $dst" in
1195  def rrk : AVX512PI<opc, MRMSrcReg, (outs MaskInfo.RC:$dst),
1196                     (ins MaskInfo.RC:$src0, MaskInfo.KRCWM:$mask,
1197                          SrcInfo.RC:$src),
1198                     !strconcat(OpcodeStr, "\t{$src, ${dst} {${mask}}|",
1199                     "${dst} {${mask}}, $src}"),
1200                     [(set MaskInfo.RC:$dst,
1201                       (vselect_mask MaskInfo.KRCWM:$mask,
1202                        (MaskInfo.VT
1203                         (bitconvert
1204                          (DestInfo.VT
1205                           (X86VBroadcast (SrcInfo.VT SrcInfo.RC:$src))))),
1206                        MaskInfo.RC:$src0))],
1207                      DestInfo.ExeDomain>, T8PD, EVEX, EVEX_K, Sched<[SchedRR]>;
1208
1209  let hasSideEffects = 0, mayLoad = 1 in
1210  def rm : AVX512PI<opc, MRMSrcMem, (outs MaskInfo.RC:$dst),
1211                    (ins SrcInfo.ScalarMemOp:$src),
1212                    !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
1213                    [(set MaskInfo.RC:$dst,
1214                      (MaskInfo.VT
1215                       (bitconvert
1216                        (DestInfo.VT
1217                         (UnmaskedBcastOp addr:$src)))))],
1218                    DestInfo.ExeDomain>, T8PD, EVEX,
1219                    EVEX_CD8<SrcInfo.EltSize, CD8VT1>, Sched<[SchedRM]>;
1220
1221  def rmkz : AVX512PI<opc, MRMSrcMem, (outs MaskInfo.RC:$dst),
1222                      (ins MaskInfo.KRCWM:$mask, SrcInfo.ScalarMemOp:$src),
1223                      !strconcat(OpcodeStr, "\t{$src, ${dst} {${mask}} {z}|",
1224                       "${dst} {${mask}} {z}, $src}"),
1225                       [(set MaskInfo.RC:$dst,
1226                         (vselect_mask MaskInfo.KRCWM:$mask,
1227                          (MaskInfo.VT
1228                           (bitconvert
1229                            (DestInfo.VT
1230                             (SrcInfo.BroadcastLdFrag addr:$src)))),
1231                          MaskInfo.ImmAllZerosV))],
1232                       DestInfo.ExeDomain>, T8PD, EVEX, EVEX_KZ,
1233                       EVEX_CD8<SrcInfo.EltSize, CD8VT1>, Sched<[SchedRM]>;
1234
1235  let Constraints = "$src0 = $dst",
1236      isConvertibleToThreeAddress = IsConvertibleToThreeAddress in
1237  def rmk : AVX512PI<opc, MRMSrcMem, (outs MaskInfo.RC:$dst),
1238                     (ins MaskInfo.RC:$src0, MaskInfo.KRCWM:$mask,
1239                          SrcInfo.ScalarMemOp:$src),
1240                     !strconcat(OpcodeStr, "\t{$src, ${dst} {${mask}}|",
1241                     "${dst} {${mask}}, $src}"),
1242                     [(set MaskInfo.RC:$dst,
1243                       (vselect_mask MaskInfo.KRCWM:$mask,
1244                        (MaskInfo.VT
1245                         (bitconvert
1246                          (DestInfo.VT
1247                           (SrcInfo.BroadcastLdFrag addr:$src)))),
1248                        MaskInfo.RC:$src0))],
1249                      DestInfo.ExeDomain>, T8PD, EVEX, EVEX_K,
1250                      EVEX_CD8<SrcInfo.EltSize, CD8VT1>, Sched<[SchedRM]>;
1251}
1252
1253// Helper class to force mask and broadcast result to same type.
1254multiclass avx512_broadcast_rm<bits<8> opc, string OpcodeStr, string Name,
1255                               SchedWrite SchedRR, SchedWrite SchedRM,
1256                               X86VectorVTInfo DestInfo,
1257                               X86VectorVTInfo SrcInfo,
1258                               bit IsConvertibleToThreeAddress> :
1259  avx512_broadcast_rm_split<opc, OpcodeStr, Name, SchedRR, SchedRM,
1260                            DestInfo, DestInfo, SrcInfo,
1261                            IsConvertibleToThreeAddress>;
1262
1263multiclass avx512_fp_broadcast_sd<bits<8> opc, string OpcodeStr,
1264                                                       AVX512VLVectorVTInfo _> {
1265  let Predicates = [HasAVX512] in {
1266    defm Z  : avx512_broadcast_rm<opc, OpcodeStr, NAME, WriteFShuffle256,
1267                                  WriteFShuffle256Ld, _.info512, _.info128, 1>,
1268              avx512_broadcast_scalar<opc, OpcodeStr, NAME, _.info512,
1269                                      _.info128>,
1270              EVEX_V512;
1271  }
1272
1273  let Predicates = [HasVLX] in {
1274    defm Z256  : avx512_broadcast_rm<opc, OpcodeStr, NAME, WriteFShuffle256,
1275                                     WriteFShuffle256Ld, _.info256, _.info128, 1>,
1276                 avx512_broadcast_scalar<opc, OpcodeStr, NAME, _.info256,
1277                                         _.info128>,
1278                 EVEX_V256;
1279  }
1280}
1281
1282multiclass avx512_fp_broadcast_ss<bits<8> opc, string OpcodeStr,
1283                                                       AVX512VLVectorVTInfo _> {
1284  let Predicates = [HasAVX512] in {
1285    defm Z  : avx512_broadcast_rm<opc, OpcodeStr, NAME, WriteFShuffle256,
1286                                  WriteFShuffle256Ld, _.info512, _.info128, 1>,
1287              avx512_broadcast_scalar<opc, OpcodeStr, NAME, _.info512,
1288                                      _.info128>,
1289              EVEX_V512;
1290  }
1291
1292  let Predicates = [HasVLX] in {
1293    defm Z256  : avx512_broadcast_rm<opc, OpcodeStr, NAME, WriteFShuffle256,
1294                                     WriteFShuffle256Ld, _.info256, _.info128, 1>,
1295                 avx512_broadcast_scalar<opc, OpcodeStr, NAME, _.info256,
1296                                         _.info128>,
1297                 EVEX_V256;
1298    defm Z128  : avx512_broadcast_rm<opc, OpcodeStr, NAME, WriteFShuffle256,
1299                                     WriteFShuffle256Ld, _.info128, _.info128, 1>,
1300                 avx512_broadcast_scalar<opc, OpcodeStr, NAME, _.info128,
1301                                         _.info128>,
1302                 EVEX_V128;
1303  }
1304}
1305defm VBROADCASTSS  : avx512_fp_broadcast_ss<0x18, "vbroadcastss",
1306                                       avx512vl_f32_info>;
1307defm VBROADCASTSD  : avx512_fp_broadcast_sd<0x19, "vbroadcastsd",
1308                                       avx512vl_f64_info>, VEX_W1X;
1309
1310multiclass avx512_int_broadcast_reg<bits<8> opc, SchedWrite SchedRR,
1311                                    X86VectorVTInfo _, SDPatternOperator OpNode,
1312                                    RegisterClass SrcRC> {
1313  // Fold with a mask even if it has multiple uses since it is cheap.
1314  let ExeDomain = _.ExeDomain in
1315  defm rr : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
1316                          (ins SrcRC:$src),
1317                          "vpbroadcast"#_.Suffix, "$src", "$src",
1318                          (_.VT (OpNode SrcRC:$src)), /*IsCommutable*/0,
1319                          /*IsKCommutable*/0, /*IsKZCommutable*/0, vselect>,
1320                          T8PD, EVEX, Sched<[SchedRR]>;
1321}
1322
1323multiclass avx512_int_broadcastbw_reg<bits<8> opc, string Name, SchedWrite SchedRR,
1324                                    X86VectorVTInfo _, SDPatternOperator OpNode,
1325                                    RegisterClass SrcRC, SubRegIndex Subreg> {
1326  let hasSideEffects = 0, ExeDomain = _.ExeDomain in
1327  defm rr : AVX512_maskable_custom<opc, MRMSrcReg,
1328                         (outs _.RC:$dst), (ins GR32:$src),
1329                         !con((ins _.RC:$src0, _.KRCWM:$mask), (ins GR32:$src)),
1330                         !con((ins _.KRCWM:$mask), (ins GR32:$src)),
1331                         "vpbroadcast"#_.Suffix, "$src", "$src", [], [], [],
1332                         "$src0 = $dst">, T8PD, EVEX, Sched<[SchedRR]>;
1333
1334  def : Pat <(_.VT (OpNode SrcRC:$src)),
1335             (!cast<Instruction>(Name#rr)
1336              (i32 (INSERT_SUBREG (i32 (IMPLICIT_DEF)), SrcRC:$src, Subreg)))>;
1337
1338  // Fold with a mask even if it has multiple uses since it is cheap.
1339  def : Pat <(vselect _.KRCWM:$mask, (_.VT (OpNode SrcRC:$src)), _.RC:$src0),
1340             (!cast<Instruction>(Name#rrk) _.RC:$src0, _.KRCWM:$mask,
1341              (i32 (INSERT_SUBREG (i32 (IMPLICIT_DEF)), SrcRC:$src, Subreg)))>;
1342
1343  def : Pat <(vselect _.KRCWM:$mask, (_.VT (OpNode SrcRC:$src)), _.ImmAllZerosV),
1344             (!cast<Instruction>(Name#rrkz) _.KRCWM:$mask,
1345              (i32 (INSERT_SUBREG (i32 (IMPLICIT_DEF)), SrcRC:$src, Subreg)))>;
1346}
1347
1348multiclass avx512_int_broadcastbw_reg_vl<bits<8> opc, string Name,
1349                      AVX512VLVectorVTInfo _, SDPatternOperator OpNode,
1350                      RegisterClass SrcRC, SubRegIndex Subreg, Predicate prd> {
1351  let Predicates = [prd] in
1352    defm Z : avx512_int_broadcastbw_reg<opc, Name#Z, WriteShuffle256, _.info512,
1353              OpNode, SrcRC, Subreg>, EVEX_V512;
1354  let Predicates = [prd, HasVLX] in {
1355    defm Z256 : avx512_int_broadcastbw_reg<opc, Name#Z256, WriteShuffle256,
1356              _.info256, OpNode, SrcRC, Subreg>, EVEX_V256;
1357    defm Z128 : avx512_int_broadcastbw_reg<opc, Name#Z128, WriteShuffle,
1358              _.info128, OpNode, SrcRC, Subreg>, EVEX_V128;
1359  }
1360}
1361
1362multiclass avx512_int_broadcast_reg_vl<bits<8> opc, AVX512VLVectorVTInfo _,
1363                                       SDPatternOperator OpNode,
1364                                       RegisterClass SrcRC, Predicate prd> {
1365  let Predicates = [prd] in
1366    defm Z : avx512_int_broadcast_reg<opc, WriteShuffle256, _.info512, OpNode,
1367                                      SrcRC>, EVEX_V512;
1368  let Predicates = [prd, HasVLX] in {
1369    defm Z256 : avx512_int_broadcast_reg<opc, WriteShuffle256, _.info256, OpNode,
1370                                         SrcRC>, EVEX_V256;
1371    defm Z128 : avx512_int_broadcast_reg<opc, WriteShuffle, _.info128, OpNode,
1372                                         SrcRC>, EVEX_V128;
1373  }
1374}
1375
1376defm VPBROADCASTBr : avx512_int_broadcastbw_reg_vl<0x7A, "VPBROADCASTBr",
1377                       avx512vl_i8_info, X86VBroadcast, GR8, sub_8bit, HasBWI>;
1378defm VPBROADCASTWr : avx512_int_broadcastbw_reg_vl<0x7B, "VPBROADCASTWr",
1379                       avx512vl_i16_info, X86VBroadcast, GR16, sub_16bit,
1380                       HasBWI>;
1381defm VPBROADCASTDr : avx512_int_broadcast_reg_vl<0x7C, avx512vl_i32_info,
1382                                                 X86VBroadcast, GR32, HasAVX512>;
1383defm VPBROADCASTQr : avx512_int_broadcast_reg_vl<0x7C, avx512vl_i64_info,
1384                                                 X86VBroadcast, GR64, HasAVX512>, VEX_W;
1385
1386multiclass avx512_int_broadcast_rm_vl<bits<8> opc, string OpcodeStr,
1387                                        AVX512VLVectorVTInfo _, Predicate prd,
1388                                        bit IsConvertibleToThreeAddress> {
1389  let Predicates = [prd] in {
1390    defm Z :   avx512_broadcast_rm<opc, OpcodeStr, NAME, WriteShuffle256,
1391                                   WriteShuffle256Ld, _.info512, _.info128,
1392                                   IsConvertibleToThreeAddress>,
1393                                  EVEX_V512;
1394  }
1395  let Predicates = [prd, HasVLX] in {
1396    defm Z256 : avx512_broadcast_rm<opc, OpcodeStr, NAME, WriteShuffle256,
1397                                    WriteShuffle256Ld, _.info256, _.info128,
1398                                    IsConvertibleToThreeAddress>,
1399                                 EVEX_V256;
1400    defm Z128 : avx512_broadcast_rm<opc, OpcodeStr, NAME, WriteShuffle,
1401                                    WriteShuffleXLd, _.info128, _.info128,
1402                                    IsConvertibleToThreeAddress>,
1403                                 EVEX_V128;
1404  }
1405}
1406
1407defm VPBROADCASTB  : avx512_int_broadcast_rm_vl<0x78, "vpbroadcastb",
1408                                           avx512vl_i8_info, HasBWI, 0>;
1409defm VPBROADCASTW  : avx512_int_broadcast_rm_vl<0x79, "vpbroadcastw",
1410                                           avx512vl_i16_info, HasBWI, 0>;
1411defm VPBROADCASTD  : avx512_int_broadcast_rm_vl<0x58, "vpbroadcastd",
1412                                           avx512vl_i32_info, HasAVX512, 1>;
1413defm VPBROADCASTQ  : avx512_int_broadcast_rm_vl<0x59, "vpbroadcastq",
1414                                           avx512vl_i64_info, HasAVX512, 1>, VEX_W1X;
1415
1416multiclass avx512_subvec_broadcast_rm<bits<8> opc, string OpcodeStr,
1417                                      SDPatternOperator OpNode,
1418                                      X86VectorVTInfo _Dst,
1419                                      X86VectorVTInfo _Src> {
1420  defm rm : AVX512_maskable<opc, MRMSrcMem, _Dst, (outs _Dst.RC:$dst),
1421                           (ins _Src.MemOp:$src), OpcodeStr, "$src", "$src",
1422                           (_Dst.VT (OpNode addr:$src))>,
1423                           Sched<[SchedWriteShuffle.YMM.Folded]>,
1424                           AVX5128IBase, EVEX;
1425}
1426
1427// This should be used for the AVX512DQ broadcast instructions. It disables
1428// the unmasked patterns so that we only use the DQ instructions when masking
1429//  is requested.
1430multiclass avx512_subvec_broadcast_rm_dq<bits<8> opc, string OpcodeStr,
1431                                         SDPatternOperator OpNode,
1432                                         X86VectorVTInfo _Dst,
1433                                         X86VectorVTInfo _Src> {
1434  let hasSideEffects = 0, mayLoad = 1 in
1435  defm rm : AVX512_maskable_split<opc, MRMSrcMem, _Dst, (outs _Dst.RC:$dst),
1436                           (ins _Src.MemOp:$src), OpcodeStr, "$src", "$src",
1437                           (null_frag),
1438                           (_Dst.VT (OpNode addr:$src))>,
1439                           Sched<[SchedWriteShuffle.YMM.Folded]>,
1440                           AVX5128IBase, EVEX;
1441}
1442
1443//===----------------------------------------------------------------------===//
1444// AVX-512 BROADCAST SUBVECTORS
1445//
1446
1447defm VBROADCASTI32X4 : avx512_subvec_broadcast_rm<0x5a, "vbroadcasti32x4",
1448                       X86SubVBroadcastld128, v16i32_info, v4i32x_info>,
1449                       EVEX_V512, EVEX_CD8<32, CD8VT4>;
1450defm VBROADCASTF32X4 : avx512_subvec_broadcast_rm<0x1a, "vbroadcastf32x4",
1451                       X86SubVBroadcastld128, v16f32_info, v4f32x_info>,
1452                       EVEX_V512, EVEX_CD8<32, CD8VT4>;
1453defm VBROADCASTI64X4 : avx512_subvec_broadcast_rm<0x5b, "vbroadcasti64x4",
1454                       X86SubVBroadcastld256, v8i64_info, v4i64x_info>, VEX_W,
1455                       EVEX_V512, EVEX_CD8<64, CD8VT4>;
1456defm VBROADCASTF64X4 : avx512_subvec_broadcast_rm<0x1b, "vbroadcastf64x4",
1457                       X86SubVBroadcastld256, v8f64_info, v4f64x_info>, VEX_W,
1458                       EVEX_V512, EVEX_CD8<64, CD8VT4>;
1459
1460let Predicates = [HasAVX512] in {
1461def : Pat<(v8f64 (X86SubVBroadcastld256 addr:$src)),
1462          (VBROADCASTF64X4rm addr:$src)>;
1463def : Pat<(v16f32 (X86SubVBroadcastld256 addr:$src)),
1464          (VBROADCASTF64X4rm addr:$src)>;
1465def : Pat<(v8i64 (X86SubVBroadcastld256 addr:$src)),
1466          (VBROADCASTI64X4rm addr:$src)>;
1467def : Pat<(v16i32 (X86SubVBroadcastld256 addr:$src)),
1468          (VBROADCASTI64X4rm addr:$src)>;
1469def : Pat<(v32i16 (X86SubVBroadcastld256 addr:$src)),
1470          (VBROADCASTI64X4rm addr:$src)>;
1471def : Pat<(v64i8 (X86SubVBroadcastld256 addr:$src)),
1472          (VBROADCASTI64X4rm addr:$src)>;
1473
1474def : Pat<(v8f64 (X86SubVBroadcastld128 addr:$src)),
1475          (VBROADCASTF32X4rm addr:$src)>;
1476def : Pat<(v16f32 (X86SubVBroadcastld128 addr:$src)),
1477          (VBROADCASTF32X4rm addr:$src)>;
1478def : Pat<(v8i64 (X86SubVBroadcastld128 addr:$src)),
1479          (VBROADCASTI32X4rm addr:$src)>;
1480def : Pat<(v16i32 (X86SubVBroadcastld128 addr:$src)),
1481          (VBROADCASTI32X4rm addr:$src)>;
1482def : Pat<(v32i16 (X86SubVBroadcastld128 addr:$src)),
1483          (VBROADCASTI32X4rm addr:$src)>;
1484def : Pat<(v64i8 (X86SubVBroadcastld128 addr:$src)),
1485          (VBROADCASTI32X4rm addr:$src)>;
1486
1487// Patterns for selects of bitcasted operations.
1488def : Pat<(vselect_mask VK16WM:$mask,
1489                        (bc_v16f32 (v8f64 (X86SubVBroadcastld128 addr:$src))),
1490                        (v16f32 immAllZerosV)),
1491          (VBROADCASTF32X4rmkz VK16WM:$mask, addr:$src)>;
1492def : Pat<(vselect_mask VK16WM:$mask,
1493                        (bc_v16f32 (v8f64 (X86SubVBroadcastld128 addr:$src))),
1494                        VR512:$src0),
1495          (VBROADCASTF32X4rmk VR512:$src0, VK16WM:$mask, addr:$src)>;
1496def : Pat<(vselect_mask VK16WM:$mask,
1497                        (bc_v16i32 (v8i64 (X86SubVBroadcastld128 addr:$src))),
1498                        (v16i32 immAllZerosV)),
1499          (VBROADCASTI32X4rmkz VK16WM:$mask, addr:$src)>;
1500def : Pat<(vselect_mask VK16WM:$mask,
1501                        (bc_v16i32 (v8i64 (X86SubVBroadcastld128 addr:$src))),
1502                        VR512:$src0),
1503          (VBROADCASTI32X4rmk VR512:$src0, VK16WM:$mask, addr:$src)>;
1504
1505def : Pat<(vselect_mask VK8WM:$mask,
1506                        (bc_v8f64 (v16f32 (X86SubVBroadcastld256 addr:$src))),
1507                        (v8f64 immAllZerosV)),
1508          (VBROADCASTF64X4rmkz VK8WM:$mask, addr:$src)>;
1509def : Pat<(vselect_mask VK8WM:$mask,
1510                        (bc_v8f64 (v16f32 (X86SubVBroadcastld256 addr:$src))),
1511                        VR512:$src0),
1512          (VBROADCASTF64X4rmk VR512:$src0, VK8WM:$mask, addr:$src)>;
1513def : Pat<(vselect_mask VK8WM:$mask,
1514                        (bc_v8i64 (v16i32 (X86SubVBroadcastld256 addr:$src))),
1515                        (v8i64 immAllZerosV)),
1516          (VBROADCASTI64X4rmkz VK8WM:$mask, addr:$src)>;
1517def : Pat<(vselect_mask VK8WM:$mask,
1518                        (bc_v8i64 (v16i32 (X86SubVBroadcastld256 addr:$src))),
1519                        VR512:$src0),
1520          (VBROADCASTI64X4rmk VR512:$src0, VK8WM:$mask, addr:$src)>;
1521}
1522
1523let Predicates = [HasVLX] in {
1524defm VBROADCASTI32X4Z256 : avx512_subvec_broadcast_rm<0x5a, "vbroadcasti32x4",
1525                           X86SubVBroadcastld128, v8i32x_info, v4i32x_info>,
1526                           EVEX_V256, EVEX_CD8<32, CD8VT4>;
1527defm VBROADCASTF32X4Z256 : avx512_subvec_broadcast_rm<0x1a, "vbroadcastf32x4",
1528                           X86SubVBroadcastld128, v8f32x_info, v4f32x_info>,
1529                           EVEX_V256, EVEX_CD8<32, CD8VT4>;
1530
1531def : Pat<(v4f64 (X86SubVBroadcastld128 addr:$src)),
1532          (VBROADCASTF32X4Z256rm addr:$src)>;
1533def : Pat<(v8f32 (X86SubVBroadcastld128 addr:$src)),
1534          (VBROADCASTF32X4Z256rm addr:$src)>;
1535def : Pat<(v4i64 (X86SubVBroadcastld128 addr:$src)),
1536          (VBROADCASTI32X4Z256rm addr:$src)>;
1537def : Pat<(v8i32 (X86SubVBroadcastld128 addr:$src)),
1538          (VBROADCASTI32X4Z256rm addr:$src)>;
1539def : Pat<(v16i16 (X86SubVBroadcastld128 addr:$src)),
1540          (VBROADCASTI32X4Z256rm addr:$src)>;
1541def : Pat<(v32i8 (X86SubVBroadcastld128 addr:$src)),
1542          (VBROADCASTI32X4Z256rm addr:$src)>;
1543
1544// Patterns for selects of bitcasted operations.
1545def : Pat<(vselect_mask VK8WM:$mask,
1546                        (bc_v8f32 (v4f64 (X86SubVBroadcastld128 addr:$src))),
1547                        (v8f32 immAllZerosV)),
1548          (VBROADCASTF32X4Z256rmkz VK8WM:$mask, addr:$src)>;
1549def : Pat<(vselect_mask VK8WM:$mask,
1550                        (bc_v8f32 (v4f64 (X86SubVBroadcastld128 addr:$src))),
1551                        VR256X:$src0),
1552          (VBROADCASTF32X4Z256rmk VR256X:$src0, VK8WM:$mask, addr:$src)>;
1553def : Pat<(vselect_mask VK8WM:$mask,
1554                        (bc_v8i32 (v4i64 (X86SubVBroadcastld128 addr:$src))),
1555                        (v8i32 immAllZerosV)),
1556          (VBROADCASTI32X4Z256rmkz VK8WM:$mask, addr:$src)>;
1557def : Pat<(vselect_mask VK8WM:$mask,
1558                        (bc_v8i32 (v4i64 (X86SubVBroadcastld128 addr:$src))),
1559                        VR256X:$src0),
1560          (VBROADCASTI32X4Z256rmk VR256X:$src0, VK8WM:$mask, addr:$src)>;
1561}
1562
1563let Predicates = [HasVLX, HasDQI] in {
1564defm VBROADCASTI64X2Z128 : avx512_subvec_broadcast_rm_dq<0x5a, "vbroadcasti64x2",
1565                           X86SubVBroadcastld128, v4i64x_info, v2i64x_info>, VEX_W1X,
1566                           EVEX_V256, EVEX_CD8<64, CD8VT2>;
1567defm VBROADCASTF64X2Z128 : avx512_subvec_broadcast_rm_dq<0x1a, "vbroadcastf64x2",
1568                           X86SubVBroadcastld128, v4f64x_info, v2f64x_info>, VEX_W1X,
1569                           EVEX_V256, EVEX_CD8<64, CD8VT2>;
1570
1571// Patterns for selects of bitcasted operations.
1572def : Pat<(vselect_mask VK4WM:$mask,
1573                        (bc_v4f64 (v8f32 (X86SubVBroadcastld128 addr:$src))),
1574                        (v4f64 immAllZerosV)),
1575          (VBROADCASTF64X2Z128rmkz VK4WM:$mask, addr:$src)>;
1576def : Pat<(vselect_mask VK4WM:$mask,
1577                        (bc_v4f64 (v8f32 (X86SubVBroadcastld128 addr:$src))),
1578                        VR256X:$src0),
1579          (VBROADCASTF64X2Z128rmk VR256X:$src0, VK4WM:$mask, addr:$src)>;
1580def : Pat<(vselect_mask VK4WM:$mask,
1581                        (bc_v4i64 (v8i32 (X86SubVBroadcastld128 addr:$src))),
1582                        (v4i64 immAllZerosV)),
1583          (VBROADCASTI64X2Z128rmkz VK4WM:$mask, addr:$src)>;
1584def : Pat<(vselect_mask VK4WM:$mask,
1585                        (bc_v4i64 (v8i32 (X86SubVBroadcastld128 addr:$src))),
1586                        VR256X:$src0),
1587          (VBROADCASTI64X2Z128rmk VR256X:$src0, VK4WM:$mask, addr:$src)>;
1588}
1589
1590let Predicates = [HasDQI] in {
1591defm VBROADCASTI64X2 : avx512_subvec_broadcast_rm_dq<0x5a, "vbroadcasti64x2",
1592                       X86SubVBroadcastld128, v8i64_info, v2i64x_info>, VEX_W,
1593                       EVEX_V512, EVEX_CD8<64, CD8VT2>;
1594defm VBROADCASTI32X8 : avx512_subvec_broadcast_rm_dq<0x5b, "vbroadcasti32x8",
1595                       X86SubVBroadcastld256, v16i32_info, v8i32x_info>,
1596                       EVEX_V512, EVEX_CD8<32, CD8VT8>;
1597defm VBROADCASTF64X2 : avx512_subvec_broadcast_rm_dq<0x1a, "vbroadcastf64x2",
1598                       X86SubVBroadcastld128, v8f64_info, v2f64x_info>, VEX_W,
1599                       EVEX_V512, EVEX_CD8<64, CD8VT2>;
1600defm VBROADCASTF32X8 : avx512_subvec_broadcast_rm_dq<0x1b, "vbroadcastf32x8",
1601                       X86SubVBroadcastld256, v16f32_info, v8f32x_info>,
1602                       EVEX_V512, EVEX_CD8<32, CD8VT8>;
1603
1604// Patterns for selects of bitcasted operations.
1605def : Pat<(vselect_mask VK16WM:$mask,
1606                        (bc_v16f32 (v8f64 (X86SubVBroadcastld256 addr:$src))),
1607                        (v16f32 immAllZerosV)),
1608          (VBROADCASTF32X8rmkz VK16WM:$mask, addr:$src)>;
1609def : Pat<(vselect_mask VK16WM:$mask,
1610                        (bc_v16f32 (v8f64 (X86SubVBroadcastld256 addr:$src))),
1611                        VR512:$src0),
1612          (VBROADCASTF32X8rmk VR512:$src0, VK16WM:$mask, addr:$src)>;
1613def : Pat<(vselect_mask VK16WM:$mask,
1614                        (bc_v16i32 (v8i64 (X86SubVBroadcastld256 addr:$src))),
1615                        (v16i32 immAllZerosV)),
1616          (VBROADCASTI32X8rmkz VK16WM:$mask, addr:$src)>;
1617def : Pat<(vselect_mask VK16WM:$mask,
1618                        (bc_v16i32 (v8i64 (X86SubVBroadcastld256 addr:$src))),
1619                        VR512:$src0),
1620          (VBROADCASTI32X8rmk VR512:$src0, VK16WM:$mask, addr:$src)>;
1621
1622def : Pat<(vselect_mask VK8WM:$mask,
1623                        (bc_v8f64 (v16f32 (X86SubVBroadcastld128 addr:$src))),
1624                        (v8f64 immAllZerosV)),
1625          (VBROADCASTF64X2rmkz VK8WM:$mask, addr:$src)>;
1626def : Pat<(vselect_mask VK8WM:$mask,
1627                        (bc_v8f64 (v16f32 (X86SubVBroadcastld128 addr:$src))),
1628                        VR512:$src0),
1629          (VBROADCASTF64X2rmk VR512:$src0, VK8WM:$mask, addr:$src)>;
1630def : Pat<(vselect_mask VK8WM:$mask,
1631                        (bc_v8i64 (v16i32 (X86SubVBroadcastld128 addr:$src))),
1632                        (v8i64 immAllZerosV)),
1633          (VBROADCASTI64X2rmkz VK8WM:$mask, addr:$src)>;
1634def : Pat<(vselect_mask VK8WM:$mask,
1635                        (bc_v8i64 (v16i32 (X86SubVBroadcastld128 addr:$src))),
1636                        VR512:$src0),
1637          (VBROADCASTI64X2rmk VR512:$src0, VK8WM:$mask, addr:$src)>;
1638}
1639
1640multiclass avx512_common_broadcast_32x2<bits<8> opc, string OpcodeStr,
1641                         AVX512VLVectorVTInfo _Dst, AVX512VLVectorVTInfo _Src> {
1642  let Predicates = [HasDQI] in
1643    defm Z :    avx512_broadcast_rm_split<opc, OpcodeStr, NAME, WriteShuffle256,
1644                                          WriteShuffle256Ld, _Dst.info512,
1645                                          _Src.info512, _Src.info128, 0, null_frag, null_frag>,
1646                                          EVEX_V512;
1647  let Predicates = [HasDQI, HasVLX] in
1648    defm Z256 : avx512_broadcast_rm_split<opc, OpcodeStr, NAME, WriteShuffle256,
1649                                          WriteShuffle256Ld, _Dst.info256,
1650                                          _Src.info256, _Src.info128, 0, null_frag, null_frag>,
1651                                          EVEX_V256;
1652}
1653
1654multiclass avx512_common_broadcast_i32x2<bits<8> opc, string OpcodeStr,
1655                         AVX512VLVectorVTInfo _Dst, AVX512VLVectorVTInfo _Src> :
1656  avx512_common_broadcast_32x2<opc, OpcodeStr, _Dst, _Src> {
1657
1658  let Predicates = [HasDQI, HasVLX] in
1659    defm Z128 : avx512_broadcast_rm_split<opc, OpcodeStr, NAME, WriteShuffle,
1660                                          WriteShuffleXLd, _Dst.info128,
1661                                          _Src.info128, _Src.info128, 0, null_frag, null_frag>,
1662                                          EVEX_V128;
1663}
1664
1665defm VBROADCASTI32X2  : avx512_common_broadcast_i32x2<0x59, "vbroadcasti32x2",
1666                                          avx512vl_i32_info, avx512vl_i64_info>;
1667defm VBROADCASTF32X2  : avx512_common_broadcast_32x2<0x19, "vbroadcastf32x2",
1668                                          avx512vl_f32_info, avx512vl_f64_info>;
1669
1670//===----------------------------------------------------------------------===//
1671// AVX-512 BROADCAST MASK TO VECTOR REGISTER
1672//---
1673multiclass avx512_mask_broadcastm<bits<8> opc, string OpcodeStr,
1674                                  X86VectorVTInfo _, RegisterClass KRC> {
1675  def rr : AVX512XS8I<opc, MRMSrcReg, (outs _.RC:$dst), (ins KRC:$src),
1676                  !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
1677                  [(set _.RC:$dst, (_.VT (X86VBroadcastm KRC:$src)))]>,
1678                  EVEX, Sched<[WriteShuffle]>;
1679}
1680
1681multiclass avx512_mask_broadcast<bits<8> opc, string OpcodeStr,
1682                                 AVX512VLVectorVTInfo VTInfo, RegisterClass KRC> {
1683  let Predicates = [HasCDI] in
1684    defm Z : avx512_mask_broadcastm<opc, OpcodeStr, VTInfo.info512, KRC>, EVEX_V512;
1685  let Predicates = [HasCDI, HasVLX] in {
1686    defm Z256 : avx512_mask_broadcastm<opc, OpcodeStr, VTInfo.info256, KRC>, EVEX_V256;
1687    defm Z128 : avx512_mask_broadcastm<opc, OpcodeStr, VTInfo.info128, KRC>, EVEX_V128;
1688  }
1689}
1690
1691defm VPBROADCASTMW2D : avx512_mask_broadcast<0x3A, "vpbroadcastmw2d",
1692                                               avx512vl_i32_info, VK16>;
1693defm VPBROADCASTMB2Q : avx512_mask_broadcast<0x2A, "vpbroadcastmb2q",
1694                                               avx512vl_i64_info, VK8>, VEX_W;
1695
1696//===----------------------------------------------------------------------===//
1697// -- VPERMI2 - 3 source operands form --
1698multiclass avx512_perm_i<bits<8> opc, string OpcodeStr,
1699                         X86FoldableSchedWrite sched,
1700                         X86VectorVTInfo _, X86VectorVTInfo IdxVT> {
1701let Constraints = "$src1 = $dst", ExeDomain = _.ExeDomain,
1702    hasSideEffects = 0 in {
1703  defm rr: AVX512_maskable_3src_cast<opc, MRMSrcReg, _, IdxVT, (outs _.RC:$dst),
1704          (ins _.RC:$src2, _.RC:$src3),
1705          OpcodeStr, "$src3, $src2", "$src2, $src3",
1706          (_.VT (X86VPermt2 _.RC:$src2, IdxVT.RC:$src1, _.RC:$src3)), 1>,
1707          EVEX_4V, AVX5128IBase, Sched<[sched]>;
1708
1709  let mayLoad = 1 in
1710  defm rm: AVX512_maskable_3src_cast<opc, MRMSrcMem, _, IdxVT, (outs _.RC:$dst),
1711            (ins _.RC:$src2, _.MemOp:$src3),
1712            OpcodeStr, "$src3, $src2", "$src2, $src3",
1713            (_.VT (X86VPermt2 _.RC:$src2, IdxVT.RC:$src1,
1714                   (_.VT (_.LdFrag addr:$src3)))), 1>,
1715            EVEX_4V, AVX5128IBase, Sched<[sched.Folded, sched.ReadAfterFold]>;
1716  }
1717}
1718
1719multiclass avx512_perm_i_mb<bits<8> opc, string OpcodeStr,
1720                            X86FoldableSchedWrite sched,
1721                            X86VectorVTInfo _, X86VectorVTInfo IdxVT> {
1722  let Constraints = "$src1 = $dst", ExeDomain = _.ExeDomain,
1723      hasSideEffects = 0, mayLoad = 1 in
1724  defm rmb: AVX512_maskable_3src_cast<opc, MRMSrcMem, _, IdxVT, (outs _.RC:$dst),
1725              (ins _.RC:$src2, _.ScalarMemOp:$src3),
1726              OpcodeStr,   !strconcat("${src3}", _.BroadcastStr,", $src2"),
1727              !strconcat("$src2, ${src3}", _.BroadcastStr ),
1728              (_.VT (X86VPermt2 _.RC:$src2,
1729               IdxVT.RC:$src1,(_.VT (_.BroadcastLdFrag addr:$src3)))), 1>,
1730              AVX5128IBase, EVEX_4V, EVEX_B,
1731              Sched<[sched.Folded, sched.ReadAfterFold]>;
1732}
1733
1734multiclass avx512_perm_i_sizes<bits<8> opc, string OpcodeStr,
1735                               X86FoldableSchedWrite sched,
1736                               AVX512VLVectorVTInfo VTInfo,
1737                               AVX512VLVectorVTInfo ShuffleMask> {
1738  defm NAME: avx512_perm_i<opc, OpcodeStr, sched, VTInfo.info512,
1739                           ShuffleMask.info512>,
1740            avx512_perm_i_mb<opc, OpcodeStr, sched, VTInfo.info512,
1741                             ShuffleMask.info512>, EVEX_V512;
1742  let Predicates = [HasVLX] in {
1743  defm NAME#128: avx512_perm_i<opc, OpcodeStr, sched, VTInfo.info128,
1744                               ShuffleMask.info128>,
1745                 avx512_perm_i_mb<opc, OpcodeStr, sched, VTInfo.info128,
1746                                  ShuffleMask.info128>, EVEX_V128;
1747  defm NAME#256: avx512_perm_i<opc, OpcodeStr, sched, VTInfo.info256,
1748                               ShuffleMask.info256>,
1749                 avx512_perm_i_mb<opc, OpcodeStr, sched, VTInfo.info256,
1750                                  ShuffleMask.info256>, EVEX_V256;
1751  }
1752}
1753
1754multiclass avx512_perm_i_sizes_bw<bits<8> opc, string OpcodeStr,
1755                                  X86FoldableSchedWrite sched,
1756                                  AVX512VLVectorVTInfo VTInfo,
1757                                  AVX512VLVectorVTInfo Idx,
1758                                  Predicate Prd> {
1759  let Predicates = [Prd] in
1760  defm NAME: avx512_perm_i<opc, OpcodeStr, sched, VTInfo.info512,
1761                           Idx.info512>, EVEX_V512;
1762  let Predicates = [Prd, HasVLX] in {
1763  defm NAME#128: avx512_perm_i<opc, OpcodeStr, sched, VTInfo.info128,
1764                               Idx.info128>, EVEX_V128;
1765  defm NAME#256: avx512_perm_i<opc, OpcodeStr, sched, VTInfo.info256,
1766                               Idx.info256>,  EVEX_V256;
1767  }
1768}
1769
1770defm VPERMI2D  : avx512_perm_i_sizes<0x76, "vpermi2d", WriteVarShuffle256,
1771                  avx512vl_i32_info, avx512vl_i32_info>, EVEX_CD8<32, CD8VF>;
1772defm VPERMI2Q  : avx512_perm_i_sizes<0x76, "vpermi2q", WriteVarShuffle256,
1773                  avx512vl_i64_info, avx512vl_i64_info>, VEX_W, EVEX_CD8<64, CD8VF>;
1774defm VPERMI2W  : avx512_perm_i_sizes_bw<0x75, "vpermi2w", WriteVarShuffle256,
1775                  avx512vl_i16_info, avx512vl_i16_info, HasBWI>,
1776                  VEX_W, EVEX_CD8<16, CD8VF>;
1777defm VPERMI2B  : avx512_perm_i_sizes_bw<0x75, "vpermi2b", WriteVarShuffle256,
1778                  avx512vl_i8_info, avx512vl_i8_info, HasVBMI>,
1779                  EVEX_CD8<8, CD8VF>;
1780defm VPERMI2PS : avx512_perm_i_sizes<0x77, "vpermi2ps", WriteFVarShuffle256,
1781                  avx512vl_f32_info, avx512vl_i32_info>, EVEX_CD8<32, CD8VF>;
1782defm VPERMI2PD : avx512_perm_i_sizes<0x77, "vpermi2pd", WriteFVarShuffle256,
1783                  avx512vl_f64_info, avx512vl_i64_info>, VEX_W, EVEX_CD8<64, CD8VF>;
1784
1785// Extra patterns to deal with extra bitcasts due to passthru and index being
1786// different types on the fp versions.
1787multiclass avx512_perm_i_lowering<string InstrStr, X86VectorVTInfo _,
1788                                  X86VectorVTInfo IdxVT,
1789                                  X86VectorVTInfo CastVT> {
1790  def : Pat<(_.VT (vselect_mask _.KRCWM:$mask,
1791                                (X86VPermt2 (_.VT _.RC:$src2),
1792                                            (IdxVT.VT (bitconvert
1793                                                       (CastVT.VT _.RC:$src1))),
1794                                            _.RC:$src3),
1795                                (_.VT (bitconvert (CastVT.VT _.RC:$src1))))),
1796            (!cast<Instruction>(InstrStr#"rrk") _.RC:$src1, _.KRCWM:$mask,
1797                                                _.RC:$src2, _.RC:$src3)>;
1798  def : Pat<(_.VT (vselect_mask _.KRCWM:$mask,
1799                                (X86VPermt2 _.RC:$src2,
1800                                            (IdxVT.VT (bitconvert
1801                                                       (CastVT.VT _.RC:$src1))),
1802                                            (_.LdFrag addr:$src3)),
1803                                (_.VT (bitconvert  (CastVT.VT _.RC:$src1))))),
1804            (!cast<Instruction>(InstrStr#"rmk") _.RC:$src1, _.KRCWM:$mask,
1805                                                _.RC:$src2, addr:$src3)>;
1806  def : Pat<(_.VT (vselect_mask _.KRCWM:$mask,
1807                                 (X86VPermt2 _.RC:$src2,
1808                                             (IdxVT.VT (bitconvert  (CastVT.VT _.RC:$src1))),
1809                                             (_.BroadcastLdFrag addr:$src3)),
1810                                 (_.VT (bitconvert  (CastVT.VT _.RC:$src1))))),
1811            (!cast<Instruction>(InstrStr#"rmbk") _.RC:$src1, _.KRCWM:$mask,
1812                                                 _.RC:$src2, addr:$src3)>;
1813}
1814
1815// TODO: Should we add more casts? The vXi64 case is common due to ABI.
1816defm : avx512_perm_i_lowering<"VPERMI2PS", v16f32_info, v16i32_info, v8i64_info>;
1817defm : avx512_perm_i_lowering<"VPERMI2PS256", v8f32x_info, v8i32x_info, v4i64x_info>;
1818defm : avx512_perm_i_lowering<"VPERMI2PS128", v4f32x_info, v4i32x_info, v2i64x_info>;
1819
1820// VPERMT2
1821multiclass avx512_perm_t<bits<8> opc, string OpcodeStr,
1822                         X86FoldableSchedWrite sched,
1823                         X86VectorVTInfo _, X86VectorVTInfo IdxVT> {
1824let Constraints = "$src1 = $dst", ExeDomain = _.ExeDomain in {
1825  defm rr: AVX512_maskable_3src<opc, MRMSrcReg, _, (outs _.RC:$dst),
1826          (ins IdxVT.RC:$src2, _.RC:$src3),
1827          OpcodeStr, "$src3, $src2", "$src2, $src3",
1828          (_.VT (X86VPermt2 _.RC:$src1, IdxVT.RC:$src2, _.RC:$src3)), 1>,
1829          EVEX_4V, AVX5128IBase, Sched<[sched]>;
1830
1831  defm rm: AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst),
1832            (ins IdxVT.RC:$src2, _.MemOp:$src3),
1833            OpcodeStr, "$src3, $src2", "$src2, $src3",
1834            (_.VT (X86VPermt2 _.RC:$src1, IdxVT.RC:$src2,
1835                   (_.LdFrag addr:$src3))), 1>,
1836            EVEX_4V, AVX5128IBase, Sched<[sched.Folded, sched.ReadAfterFold]>;
1837  }
1838}
1839multiclass avx512_perm_t_mb<bits<8> opc, string OpcodeStr,
1840                            X86FoldableSchedWrite sched,
1841                            X86VectorVTInfo _, X86VectorVTInfo IdxVT> {
1842  let Constraints = "$src1 = $dst", ExeDomain = _.ExeDomain in
1843  defm rmb: AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst),
1844              (ins IdxVT.RC:$src2, _.ScalarMemOp:$src3),
1845              OpcodeStr,   !strconcat("${src3}", _.BroadcastStr,", $src2"),
1846              !strconcat("$src2, ${src3}", _.BroadcastStr ),
1847              (_.VT (X86VPermt2 _.RC:$src1,
1848               IdxVT.RC:$src2,(_.VT (_.BroadcastLdFrag addr:$src3)))), 1>,
1849              AVX5128IBase, EVEX_4V, EVEX_B,
1850              Sched<[sched.Folded, sched.ReadAfterFold]>;
1851}
1852
1853multiclass avx512_perm_t_sizes<bits<8> opc, string OpcodeStr,
1854                               X86FoldableSchedWrite sched,
1855                               AVX512VLVectorVTInfo VTInfo,
1856                               AVX512VLVectorVTInfo ShuffleMask> {
1857  defm NAME: avx512_perm_t<opc, OpcodeStr, sched, VTInfo.info512,
1858                              ShuffleMask.info512>,
1859            avx512_perm_t_mb<opc, OpcodeStr, sched, VTInfo.info512,
1860                              ShuffleMask.info512>, EVEX_V512;
1861  let Predicates = [HasVLX] in {
1862  defm NAME#128: avx512_perm_t<opc, OpcodeStr, sched, VTInfo.info128,
1863                              ShuffleMask.info128>,
1864                 avx512_perm_t_mb<opc, OpcodeStr, sched, VTInfo.info128,
1865                              ShuffleMask.info128>, EVEX_V128;
1866  defm NAME#256: avx512_perm_t<opc, OpcodeStr, sched, VTInfo.info256,
1867                              ShuffleMask.info256>,
1868                 avx512_perm_t_mb<opc, OpcodeStr, sched, VTInfo.info256,
1869                              ShuffleMask.info256>, EVEX_V256;
1870  }
1871}
1872
1873multiclass avx512_perm_t_sizes_bw<bits<8> opc, string OpcodeStr,
1874                                  X86FoldableSchedWrite sched,
1875                                  AVX512VLVectorVTInfo VTInfo,
1876                                  AVX512VLVectorVTInfo Idx, Predicate Prd> {
1877  let Predicates = [Prd] in
1878  defm NAME: avx512_perm_t<opc, OpcodeStr, sched, VTInfo.info512,
1879                           Idx.info512>, EVEX_V512;
1880  let Predicates = [Prd, HasVLX] in {
1881  defm NAME#128: avx512_perm_t<opc, OpcodeStr, sched, VTInfo.info128,
1882                               Idx.info128>, EVEX_V128;
1883  defm NAME#256: avx512_perm_t<opc, OpcodeStr, sched, VTInfo.info256,
1884                               Idx.info256>, EVEX_V256;
1885  }
1886}
1887
1888defm VPERMT2D  : avx512_perm_t_sizes<0x7E, "vpermt2d", WriteVarShuffle256,
1889                  avx512vl_i32_info, avx512vl_i32_info>, EVEX_CD8<32, CD8VF>;
1890defm VPERMT2Q  : avx512_perm_t_sizes<0x7E, "vpermt2q", WriteVarShuffle256,
1891                  avx512vl_i64_info, avx512vl_i64_info>, VEX_W, EVEX_CD8<64, CD8VF>;
1892defm VPERMT2W  : avx512_perm_t_sizes_bw<0x7D, "vpermt2w", WriteVarShuffle256,
1893                  avx512vl_i16_info, avx512vl_i16_info, HasBWI>,
1894                  VEX_W, EVEX_CD8<16, CD8VF>;
1895defm VPERMT2B  : avx512_perm_t_sizes_bw<0x7D, "vpermt2b", WriteVarShuffle256,
1896                  avx512vl_i8_info, avx512vl_i8_info, HasVBMI>,
1897                  EVEX_CD8<8, CD8VF>;
1898defm VPERMT2PS : avx512_perm_t_sizes<0x7F, "vpermt2ps", WriteFVarShuffle256,
1899                  avx512vl_f32_info, avx512vl_i32_info>, EVEX_CD8<32, CD8VF>;
1900defm VPERMT2PD : avx512_perm_t_sizes<0x7F, "vpermt2pd", WriteFVarShuffle256,
1901                  avx512vl_f64_info, avx512vl_i64_info>, VEX_W, EVEX_CD8<64, CD8VF>;
1902
1903//===----------------------------------------------------------------------===//
1904// AVX-512 - BLEND using mask
1905//
1906
1907multiclass WriteFVarBlendask<bits<8> opc, string OpcodeStr,
1908                             X86FoldableSchedWrite sched, X86VectorVTInfo _> {
1909  let ExeDomain = _.ExeDomain, hasSideEffects = 0 in {
1910  def rr : AVX5128I<opc, MRMSrcReg, (outs _.RC:$dst),
1911             (ins _.RC:$src1, _.RC:$src2),
1912             !strconcat(OpcodeStr,
1913             "\t{$src2, $src1, ${dst}|${dst}, $src1, $src2}"), []>,
1914             EVEX_4V, Sched<[sched]>;
1915  def rrk : AVX5128I<opc, MRMSrcReg, (outs _.RC:$dst),
1916             (ins _.KRCWM:$mask, _.RC:$src1, _.RC:$src2),
1917             !strconcat(OpcodeStr,
1918             "\t{$src2, $src1, ${dst} {${mask}}|${dst} {${mask}}, $src1, $src2}"),
1919             []>, EVEX_4V, EVEX_K, Sched<[sched]>;
1920  def rrkz : AVX5128I<opc, MRMSrcReg, (outs _.RC:$dst),
1921             (ins _.KRCWM:$mask, _.RC:$src1, _.RC:$src2),
1922             !strconcat(OpcodeStr,
1923             "\t{$src2, $src1, ${dst} {${mask}} {z}|${dst} {${mask}} {z}, $src1, $src2}"),
1924             []>, EVEX_4V, EVEX_KZ, Sched<[sched]>, NotMemoryFoldable;
1925  let mayLoad = 1 in {
1926  def rm  : AVX5128I<opc, MRMSrcMem, (outs _.RC:$dst),
1927             (ins _.RC:$src1, _.MemOp:$src2),
1928             !strconcat(OpcodeStr,
1929             "\t{$src2, $src1, ${dst}|${dst}, $src1, $src2}"),
1930             []>, EVEX_4V, EVEX_CD8<_.EltSize, CD8VF>,
1931             Sched<[sched.Folded, sched.ReadAfterFold]>;
1932  def rmk : AVX5128I<opc, MRMSrcMem, (outs _.RC:$dst),
1933             (ins _.KRCWM:$mask, _.RC:$src1, _.MemOp:$src2),
1934             !strconcat(OpcodeStr,
1935             "\t{$src2, $src1, ${dst} {${mask}}|${dst} {${mask}}, $src1, $src2}"),
1936             []>, EVEX_4V, EVEX_K, EVEX_CD8<_.EltSize, CD8VF>,
1937             Sched<[sched.Folded, sched.ReadAfterFold]>;
1938  def rmkz : AVX5128I<opc, MRMSrcMem, (outs _.RC:$dst),
1939             (ins _.KRCWM:$mask, _.RC:$src1, _.MemOp:$src2),
1940             !strconcat(OpcodeStr,
1941             "\t{$src2, $src1, ${dst} {${mask}} {z}|${dst} {${mask}} {z}, $src1, $src2}"),
1942             []>, EVEX_4V, EVEX_KZ, EVEX_CD8<_.EltSize, CD8VF>,
1943             Sched<[sched.Folded, sched.ReadAfterFold]>, NotMemoryFoldable;
1944  }
1945  }
1946}
1947multiclass WriteFVarBlendask_rmb<bits<8> opc, string OpcodeStr,
1948                                 X86FoldableSchedWrite sched, X86VectorVTInfo _> {
1949  let ExeDomain = _.ExeDomain, mayLoad = 1, hasSideEffects = 0 in {
1950  def rmbk : AVX5128I<opc, MRMSrcMem, (outs _.RC:$dst),
1951      (ins _.KRCWM:$mask, _.RC:$src1, _.ScalarMemOp:$src2),
1952       !strconcat(OpcodeStr,
1953            "\t{${src2}", _.BroadcastStr, ", $src1, $dst {${mask}}|",
1954            "$dst {${mask}}, $src1, ${src2}", _.BroadcastStr, "}"), []>,
1955      EVEX_4V, EVEX_K, EVEX_B, EVEX_CD8<_.EltSize, CD8VF>,
1956      Sched<[sched.Folded, sched.ReadAfterFold]>;
1957
1958  def rmbkz : AVX5128I<opc, MRMSrcMem, (outs _.RC:$dst),
1959      (ins _.KRCWM:$mask, _.RC:$src1, _.ScalarMemOp:$src2),
1960       !strconcat(OpcodeStr,
1961            "\t{${src2}", _.BroadcastStr, ", $src1, $dst {${mask}} {z}|",
1962            "$dst {${mask}} {z}, $src1, ${src2}", _.BroadcastStr, "}"), []>,
1963      EVEX_4V, EVEX_KZ, EVEX_B, EVEX_CD8<_.EltSize, CD8VF>,
1964      Sched<[sched.Folded, sched.ReadAfterFold]>, NotMemoryFoldable;
1965
1966  def rmb : AVX5128I<opc, MRMSrcMem, (outs _.RC:$dst),
1967      (ins _.RC:$src1, _.ScalarMemOp:$src2),
1968       !strconcat(OpcodeStr,
1969            "\t{${src2}", _.BroadcastStr, ", $src1, $dst|",
1970            "$dst, $src1, ${src2}", _.BroadcastStr, "}"), []>,
1971      EVEX_4V, EVEX_B, EVEX_CD8<_.EltSize, CD8VF>,
1972      Sched<[sched.Folded, sched.ReadAfterFold]>;
1973  }
1974}
1975
1976multiclass blendmask_dq<bits<8> opc, string OpcodeStr, X86SchedWriteWidths sched,
1977                        AVX512VLVectorVTInfo VTInfo> {
1978  defm Z : WriteFVarBlendask<opc, OpcodeStr, sched.ZMM, VTInfo.info512>,
1979           WriteFVarBlendask_rmb<opc, OpcodeStr, sched.ZMM, VTInfo.info512>,
1980                                 EVEX_V512;
1981
1982  let Predicates = [HasVLX] in {
1983    defm Z256 : WriteFVarBlendask<opc, OpcodeStr, sched.YMM, VTInfo.info256>,
1984                WriteFVarBlendask_rmb<opc, OpcodeStr, sched.YMM, VTInfo.info256>,
1985                                      EVEX_V256;
1986    defm Z128 : WriteFVarBlendask<opc, OpcodeStr, sched.XMM, VTInfo.info128>,
1987                WriteFVarBlendask_rmb<opc, OpcodeStr, sched.XMM, VTInfo.info128>,
1988                                      EVEX_V128;
1989  }
1990}
1991
1992multiclass blendmask_bw<bits<8> opc, string OpcodeStr, X86SchedWriteWidths sched,
1993                        AVX512VLVectorVTInfo VTInfo> {
1994  let Predicates = [HasBWI] in
1995    defm Z : WriteFVarBlendask<opc, OpcodeStr, sched.ZMM, VTInfo.info512>,
1996                               EVEX_V512;
1997
1998  let Predicates = [HasBWI, HasVLX] in {
1999    defm Z256 : WriteFVarBlendask<opc, OpcodeStr, sched.YMM, VTInfo.info256>,
2000                                  EVEX_V256;
2001    defm Z128 : WriteFVarBlendask<opc, OpcodeStr, sched.XMM, VTInfo.info128>,
2002                                  EVEX_V128;
2003  }
2004}
2005
2006defm VBLENDMPS : blendmask_dq<0x65, "vblendmps", SchedWriteFVarBlend,
2007                              avx512vl_f32_info>;
2008defm VBLENDMPD : blendmask_dq<0x65, "vblendmpd", SchedWriteFVarBlend,
2009                              avx512vl_f64_info>, VEX_W;
2010defm VPBLENDMD : blendmask_dq<0x64, "vpblendmd", SchedWriteVarBlend,
2011                              avx512vl_i32_info>;
2012defm VPBLENDMQ : blendmask_dq<0x64, "vpblendmq", SchedWriteVarBlend,
2013                              avx512vl_i64_info>, VEX_W;
2014defm VPBLENDMB : blendmask_bw<0x66, "vpblendmb", SchedWriteVarBlend,
2015                              avx512vl_i8_info>;
2016defm VPBLENDMW : blendmask_bw<0x66, "vpblendmw", SchedWriteVarBlend,
2017                              avx512vl_i16_info>, VEX_W;
2018
2019//===----------------------------------------------------------------------===//
2020// Compare Instructions
2021//===----------------------------------------------------------------------===//
2022
2023// avx512_cmp_scalar - AVX512 CMPSS and CMPSD
2024
2025multiclass avx512_cmp_scalar<X86VectorVTInfo _, SDNode OpNode, SDNode OpNodeSAE,
2026                             PatFrag OpNode_su, PatFrag OpNodeSAE_su,
2027                             X86FoldableSchedWrite sched> {
2028  defm  rr_Int  : AVX512_maskable_cmp<0xC2, MRMSrcReg, _,
2029                      (outs _.KRC:$dst),
2030                      (ins _.RC:$src1, _.RC:$src2, u8imm:$cc),
2031                      "vcmp"#_.Suffix,
2032                      "$cc, $src2, $src1", "$src1, $src2, $cc",
2033                      (OpNode (_.VT _.RC:$src1), (_.VT _.RC:$src2), timm:$cc),
2034                      (OpNode_su (_.VT _.RC:$src1), (_.VT _.RC:$src2),
2035                                 timm:$cc)>, EVEX_4V, VEX_LIG, Sched<[sched]>, SIMD_EXC;
2036  let mayLoad = 1 in
2037  defm  rm_Int  : AVX512_maskable_cmp<0xC2, MRMSrcMem, _,
2038                    (outs _.KRC:$dst),
2039                    (ins _.RC:$src1, _.IntScalarMemOp:$src2, u8imm:$cc),
2040                    "vcmp"#_.Suffix,
2041                    "$cc, $src2, $src1", "$src1, $src2, $cc",
2042                    (OpNode (_.VT _.RC:$src1), (_.ScalarIntMemFrags addr:$src2),
2043                        timm:$cc),
2044                    (OpNode_su (_.VT _.RC:$src1), (_.ScalarIntMemFrags addr:$src2),
2045                        timm:$cc)>, EVEX_4V, VEX_LIG, EVEX_CD8<_.EltSize, CD8VT1>,
2046                    Sched<[sched.Folded, sched.ReadAfterFold]>, SIMD_EXC;
2047
2048  let Uses = [MXCSR] in
2049  defm  rrb_Int  : AVX512_maskable_cmp<0xC2, MRMSrcReg, _,
2050                     (outs _.KRC:$dst),
2051                     (ins _.RC:$src1, _.RC:$src2, u8imm:$cc),
2052                     "vcmp"#_.Suffix,
2053                     "$cc, {sae}, $src2, $src1","$src1, $src2, {sae}, $cc",
2054                     (OpNodeSAE (_.VT _.RC:$src1), (_.VT _.RC:$src2),
2055                                timm:$cc),
2056                     (OpNodeSAE_su (_.VT _.RC:$src1), (_.VT _.RC:$src2),
2057                                   timm:$cc)>,
2058                     EVEX_4V, VEX_LIG, EVEX_B, Sched<[sched]>;
2059
2060  let isCodeGenOnly = 1 in {
2061    let isCommutable = 1 in
2062    def rr : AVX512Ii8<0xC2, MRMSrcReg,
2063                (outs _.KRC:$dst), (ins _.FRC:$src1, _.FRC:$src2, u8imm:$cc),
2064                !strconcat("vcmp", _.Suffix,
2065                           "\t{$cc, $src2, $src1, $dst|$dst, $src1, $src2, $cc}"),
2066                [(set _.KRC:$dst, (OpNode _.FRC:$src1,
2067                                          _.FRC:$src2,
2068                                          timm:$cc))]>,
2069                EVEX_4V, VEX_LIG, Sched<[sched]>, SIMD_EXC;
2070    def rm : AVX512Ii8<0xC2, MRMSrcMem,
2071              (outs _.KRC:$dst),
2072              (ins _.FRC:$src1, _.ScalarMemOp:$src2, u8imm:$cc),
2073              !strconcat("vcmp", _.Suffix,
2074                         "\t{$cc, $src2, $src1, $dst|$dst, $src1, $src2, $cc}"),
2075              [(set _.KRC:$dst, (OpNode _.FRC:$src1,
2076                                        (_.ScalarLdFrag addr:$src2),
2077                                        timm:$cc))]>,
2078              EVEX_4V, VEX_LIG, EVEX_CD8<_.EltSize, CD8VT1>,
2079              Sched<[sched.Folded, sched.ReadAfterFold]>, SIMD_EXC;
2080  }
2081}
2082
2083def X86cmpms_su : PatFrag<(ops node:$src1, node:$src2, node:$cc),
2084                          (X86cmpms node:$src1, node:$src2, node:$cc), [{
2085  return N->hasOneUse();
2086}]>;
2087def X86cmpmsSAE_su : PatFrag<(ops node:$src1, node:$src2, node:$cc),
2088                          (X86cmpmsSAE node:$src1, node:$src2, node:$cc), [{
2089  return N->hasOneUse();
2090}]>;
2091
2092let Predicates = [HasAVX512] in {
2093  let ExeDomain = SSEPackedSingle in
2094  defm VCMPSSZ : avx512_cmp_scalar<f32x_info, X86cmpms, X86cmpmsSAE,
2095                                   X86cmpms_su, X86cmpmsSAE_su,
2096                                   SchedWriteFCmp.Scl>, AVX512XSIi8Base;
2097  let ExeDomain = SSEPackedDouble in
2098  defm VCMPSDZ : avx512_cmp_scalar<f64x_info, X86cmpms, X86cmpmsSAE,
2099                                   X86cmpms_su, X86cmpmsSAE_su,
2100                                   SchedWriteFCmp.Scl>, AVX512XDIi8Base, VEX_W;
2101}
2102
2103multiclass avx512_icmp_packed<bits<8> opc, string OpcodeStr,
2104                              X86FoldableSchedWrite sched,
2105                              X86VectorVTInfo _, bit IsCommutable> {
2106  let isCommutable = IsCommutable, hasSideEffects = 0 in
2107  def rr : AVX512BI<opc, MRMSrcReg,
2108             (outs _.KRC:$dst), (ins _.RC:$src1, _.RC:$src2),
2109             !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
2110             []>, EVEX_4V, Sched<[sched]>;
2111  let mayLoad = 1, hasSideEffects = 0 in
2112  def rm : AVX512BI<opc, MRMSrcMem,
2113             (outs _.KRC:$dst), (ins _.RC:$src1, _.MemOp:$src2),
2114             !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
2115             []>, EVEX_4V, Sched<[sched.Folded, sched.ReadAfterFold]>;
2116  let isCommutable = IsCommutable, hasSideEffects = 0 in
2117  def rrk : AVX512BI<opc, MRMSrcReg,
2118              (outs _.KRC:$dst), (ins _.KRCWM:$mask, _.RC:$src1, _.RC:$src2),
2119              !strconcat(OpcodeStr, "\t{$src2, $src1, $dst {${mask}}|",
2120                          "$dst {${mask}}, $src1, $src2}"),
2121              []>, EVEX_4V, EVEX_K, Sched<[sched]>;
2122  let mayLoad = 1, hasSideEffects = 0 in
2123  def rmk : AVX512BI<opc, MRMSrcMem,
2124              (outs _.KRC:$dst), (ins _.KRCWM:$mask, _.RC:$src1, _.MemOp:$src2),
2125              !strconcat(OpcodeStr, "\t{$src2, $src1, $dst {${mask}}|",
2126                          "$dst {${mask}}, $src1, $src2}"),
2127              []>, EVEX_4V, EVEX_K, Sched<[sched.Folded, sched.ReadAfterFold]>;
2128}
2129
2130multiclass avx512_icmp_packed_rmb<bits<8> opc, string OpcodeStr,
2131                                  X86FoldableSchedWrite sched, X86VectorVTInfo _,
2132                                  bit IsCommutable> :
2133           avx512_icmp_packed<opc, OpcodeStr, sched, _, IsCommutable> {
2134  let mayLoad = 1, hasSideEffects = 0 in {
2135  def rmb : AVX512BI<opc, MRMSrcMem,
2136              (outs _.KRC:$dst), (ins _.RC:$src1, _.ScalarMemOp:$src2),
2137              !strconcat(OpcodeStr, "\t{${src2}", _.BroadcastStr, ", $src1, $dst",
2138                                    "|$dst, $src1, ${src2}", _.BroadcastStr, "}"),
2139              []>, EVEX_4V, EVEX_B, Sched<[sched.Folded, sched.ReadAfterFold]>;
2140  def rmbk : AVX512BI<opc, MRMSrcMem,
2141               (outs _.KRC:$dst), (ins _.KRCWM:$mask, _.RC:$src1,
2142                                       _.ScalarMemOp:$src2),
2143               !strconcat(OpcodeStr,
2144                          "\t{${src2}", _.BroadcastStr, ", $src1, $dst {${mask}}|",
2145                          "$dst {${mask}}, $src1, ${src2}", _.BroadcastStr, "}"),
2146               []>, EVEX_4V, EVEX_K, EVEX_B,
2147               Sched<[sched.Folded, sched.ReadAfterFold]>;
2148  }
2149}
2150
2151multiclass avx512_icmp_packed_vl<bits<8> opc, string OpcodeStr,
2152                                 X86SchedWriteWidths sched,
2153                                 AVX512VLVectorVTInfo VTInfo, Predicate prd,
2154                                 bit IsCommutable = 0> {
2155  let Predicates = [prd] in
2156  defm Z : avx512_icmp_packed<opc, OpcodeStr, sched.ZMM,
2157                              VTInfo.info512, IsCommutable>, EVEX_V512;
2158
2159  let Predicates = [prd, HasVLX] in {
2160    defm Z256 : avx512_icmp_packed<opc, OpcodeStr, sched.YMM,
2161                                   VTInfo.info256, IsCommutable>, EVEX_V256;
2162    defm Z128 : avx512_icmp_packed<opc, OpcodeStr, sched.XMM,
2163                                   VTInfo.info128, IsCommutable>, EVEX_V128;
2164  }
2165}
2166
2167multiclass avx512_icmp_packed_rmb_vl<bits<8> opc, string OpcodeStr,
2168                                     X86SchedWriteWidths sched,
2169                                     AVX512VLVectorVTInfo VTInfo,
2170                                     Predicate prd, bit IsCommutable = 0> {
2171  let Predicates = [prd] in
2172  defm Z : avx512_icmp_packed_rmb<opc, OpcodeStr, sched.ZMM,
2173                                  VTInfo.info512, IsCommutable>, EVEX_V512;
2174
2175  let Predicates = [prd, HasVLX] in {
2176    defm Z256 : avx512_icmp_packed_rmb<opc, OpcodeStr, sched.YMM,
2177                                       VTInfo.info256, IsCommutable>, EVEX_V256;
2178    defm Z128 : avx512_icmp_packed_rmb<opc, OpcodeStr, sched.XMM,
2179                                       VTInfo.info128, IsCommutable>, EVEX_V128;
2180  }
2181}
2182
2183// This fragment treats X86cmpm as commutable to help match loads in both
2184// operands for PCMPEQ.
2185def X86setcc_commute : SDNode<"ISD::SETCC", SDTSetCC, [SDNPCommutative]>;
2186def X86pcmpgtm : PatFrag<(ops node:$src1, node:$src2),
2187                         (setcc node:$src1, node:$src2, SETGT)>;
2188
2189// AddedComplexity is needed because the explicit SETEQ/SETGT CondCode doesn't
2190// increase the pattern complexity the way an immediate would.
2191let AddedComplexity = 2 in {
2192// FIXME: Is there a better scheduler class for VPCMP?
2193defm VPCMPEQB : avx512_icmp_packed_vl<0x74, "vpcmpeqb",
2194                      SchedWriteVecALU, avx512vl_i8_info, HasBWI, 1>,
2195                EVEX_CD8<8, CD8VF>, VEX_WIG;
2196
2197defm VPCMPEQW : avx512_icmp_packed_vl<0x75, "vpcmpeqw",
2198                      SchedWriteVecALU, avx512vl_i16_info, HasBWI, 1>,
2199                EVEX_CD8<16, CD8VF>, VEX_WIG;
2200
2201defm VPCMPEQD : avx512_icmp_packed_rmb_vl<0x76, "vpcmpeqd",
2202                      SchedWriteVecALU, avx512vl_i32_info, HasAVX512, 1>,
2203                EVEX_CD8<32, CD8VF>;
2204
2205defm VPCMPEQQ : avx512_icmp_packed_rmb_vl<0x29, "vpcmpeqq",
2206                      SchedWriteVecALU, avx512vl_i64_info, HasAVX512, 1>,
2207                T8PD, VEX_W, EVEX_CD8<64, CD8VF>;
2208
2209defm VPCMPGTB : avx512_icmp_packed_vl<0x64, "vpcmpgtb",
2210                      SchedWriteVecALU, avx512vl_i8_info, HasBWI>,
2211                EVEX_CD8<8, CD8VF>, VEX_WIG;
2212
2213defm VPCMPGTW : avx512_icmp_packed_vl<0x65, "vpcmpgtw",
2214                      SchedWriteVecALU, avx512vl_i16_info, HasBWI>,
2215                EVEX_CD8<16, CD8VF>, VEX_WIG;
2216
2217defm VPCMPGTD : avx512_icmp_packed_rmb_vl<0x66, "vpcmpgtd",
2218                      SchedWriteVecALU, avx512vl_i32_info, HasAVX512>,
2219                EVEX_CD8<32, CD8VF>;
2220
2221defm VPCMPGTQ : avx512_icmp_packed_rmb_vl<0x37, "vpcmpgtq",
2222                      SchedWriteVecALU, avx512vl_i64_info, HasAVX512>,
2223                T8PD, VEX_W, EVEX_CD8<64, CD8VF>;
2224}
2225
2226def X86pcmpm_imm : SDNodeXForm<setcc, [{
2227  ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(2))->get();
2228  uint8_t SSECC = X86::getVPCMPImmForCond(CC);
2229  return getI8Imm(SSECC, SDLoc(N));
2230}]>;
2231
2232// Swapped operand version of the above.
2233def X86pcmpm_imm_commute : SDNodeXForm<setcc, [{
2234  ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(2))->get();
2235  uint8_t SSECC = X86::getVPCMPImmForCond(CC);
2236  SSECC = X86::getSwappedVPCMPImm(SSECC);
2237  return getI8Imm(SSECC, SDLoc(N));
2238}]>;
2239
2240multiclass avx512_icmp_cc<bits<8> opc, string Suffix, PatFrag Frag,
2241                          PatFrag Frag_su,
2242                          X86FoldableSchedWrite sched,
2243                          X86VectorVTInfo _, string Name> {
2244  let isCommutable = 1 in
2245  def rri : AVX512AIi8<opc, MRMSrcReg,
2246             (outs _.KRC:$dst), (ins _.RC:$src1, _.RC:$src2, u8imm:$cc),
2247             !strconcat("vpcmp", Suffix,
2248                        "\t{$cc, $src2, $src1, $dst|$dst, $src1, $src2, $cc}"),
2249             [(set _.KRC:$dst, (_.KVT (Frag:$cc (_.VT _.RC:$src1),
2250                                                (_.VT _.RC:$src2),
2251                                                cond)))]>,
2252             EVEX_4V, Sched<[sched]>;
2253  def rmi : AVX512AIi8<opc, MRMSrcMem,
2254             (outs _.KRC:$dst), (ins _.RC:$src1, _.MemOp:$src2, u8imm:$cc),
2255             !strconcat("vpcmp", Suffix,
2256                        "\t{$cc, $src2, $src1, $dst|$dst, $src1, $src2, $cc}"),
2257             [(set _.KRC:$dst, (_.KVT
2258                                (Frag:$cc
2259                                 (_.VT _.RC:$src1),
2260                                 (_.VT (_.LdFrag addr:$src2)),
2261                                 cond)))]>,
2262             EVEX_4V, Sched<[sched.Folded, sched.ReadAfterFold]>;
2263  let isCommutable = 1 in
2264  def rrik : AVX512AIi8<opc, MRMSrcReg,
2265              (outs _.KRC:$dst), (ins _.KRCWM:$mask, _.RC:$src1, _.RC:$src2,
2266                                      u8imm:$cc),
2267              !strconcat("vpcmp", Suffix,
2268                         "\t{$cc, $src2, $src1, $dst {${mask}}|",
2269                         "$dst {${mask}}, $src1, $src2, $cc}"),
2270              [(set _.KRC:$dst, (and _.KRCWM:$mask,
2271                                     (_.KVT (Frag_su:$cc (_.VT _.RC:$src1),
2272                                                         (_.VT _.RC:$src2),
2273                                                         cond))))]>,
2274              EVEX_4V, EVEX_K, Sched<[sched]>;
2275  def rmik : AVX512AIi8<opc, MRMSrcMem,
2276              (outs _.KRC:$dst), (ins _.KRCWM:$mask, _.RC:$src1, _.MemOp:$src2,
2277                                    u8imm:$cc),
2278              !strconcat("vpcmp", Suffix,
2279                         "\t{$cc, $src2, $src1, $dst {${mask}}|",
2280                         "$dst {${mask}}, $src1, $src2, $cc}"),
2281              [(set _.KRC:$dst, (and _.KRCWM:$mask,
2282                                     (_.KVT
2283                                      (Frag_su:$cc
2284                                       (_.VT _.RC:$src1),
2285                                       (_.VT (_.LdFrag addr:$src2)),
2286                                       cond))))]>,
2287              EVEX_4V, EVEX_K, Sched<[sched.Folded, sched.ReadAfterFold]>;
2288
2289  def : Pat<(_.KVT (Frag:$cc (_.LdFrag addr:$src2),
2290                             (_.VT _.RC:$src1), cond)),
2291            (!cast<Instruction>(Name#_.ZSuffix#"rmi")
2292             _.RC:$src1, addr:$src2, (X86pcmpm_imm_commute $cc))>;
2293
2294  def : Pat<(and _.KRCWM:$mask,
2295                 (_.KVT (Frag_su:$cc (_.LdFrag addr:$src2),
2296                                     (_.VT _.RC:$src1), cond))),
2297            (!cast<Instruction>(Name#_.ZSuffix#"rmik")
2298             _.KRCWM:$mask, _.RC:$src1, addr:$src2,
2299             (X86pcmpm_imm_commute $cc))>;
2300}
2301
2302multiclass avx512_icmp_cc_rmb<bits<8> opc, string Suffix, PatFrag Frag,
2303                              PatFrag Frag_su, X86FoldableSchedWrite sched,
2304                              X86VectorVTInfo _, string Name> :
2305           avx512_icmp_cc<opc, Suffix, Frag, Frag_su, sched, _, Name> {
2306  def rmib : AVX512AIi8<opc, MRMSrcMem,
2307             (outs _.KRC:$dst), (ins _.RC:$src1, _.ScalarMemOp:$src2,
2308                                     u8imm:$cc),
2309             !strconcat("vpcmp", Suffix,
2310                        "\t{$cc, ${src2}", _.BroadcastStr, ", $src1, $dst|",
2311                        "$dst, $src1, ${src2}", _.BroadcastStr, ", $cc}"),
2312             [(set _.KRC:$dst, (_.KVT (Frag:$cc
2313                                       (_.VT _.RC:$src1),
2314                                       (_.BroadcastLdFrag addr:$src2),
2315                                       cond)))]>,
2316             EVEX_4V, EVEX_B, Sched<[sched.Folded, sched.ReadAfterFold]>;
2317  def rmibk : AVX512AIi8<opc, MRMSrcMem,
2318              (outs _.KRC:$dst), (ins _.KRCWM:$mask, _.RC:$src1,
2319                                       _.ScalarMemOp:$src2, u8imm:$cc),
2320              !strconcat("vpcmp", Suffix,
2321                  "\t{$cc, ${src2}", _.BroadcastStr, ", $src1, $dst {${mask}}|",
2322                  "$dst {${mask}}, $src1, ${src2}", _.BroadcastStr, ", $cc}"),
2323              [(set _.KRC:$dst, (and _.KRCWM:$mask,
2324                                     (_.KVT (Frag_su:$cc
2325                                             (_.VT _.RC:$src1),
2326                                             (_.BroadcastLdFrag addr:$src2),
2327                                             cond))))]>,
2328              EVEX_4V, EVEX_K, EVEX_B, Sched<[sched.Folded, sched.ReadAfterFold]>;
2329
2330  def : Pat<(_.KVT (Frag:$cc (_.BroadcastLdFrag addr:$src2),
2331                    (_.VT _.RC:$src1), cond)),
2332            (!cast<Instruction>(Name#_.ZSuffix#"rmib")
2333             _.RC:$src1, addr:$src2, (X86pcmpm_imm_commute $cc))>;
2334
2335  def : Pat<(and _.KRCWM:$mask,
2336                 (_.KVT (Frag_su:$cc (_.BroadcastLdFrag addr:$src2),
2337                                     (_.VT _.RC:$src1), cond))),
2338            (!cast<Instruction>(Name#_.ZSuffix#"rmibk")
2339             _.KRCWM:$mask, _.RC:$src1, addr:$src2,
2340             (X86pcmpm_imm_commute $cc))>;
2341}
2342
2343multiclass avx512_icmp_cc_vl<bits<8> opc, string Suffix, PatFrag Frag,
2344                             PatFrag Frag_su, X86SchedWriteWidths sched,
2345                             AVX512VLVectorVTInfo VTInfo, Predicate prd> {
2346  let Predicates = [prd] in
2347  defm Z : avx512_icmp_cc<opc, Suffix, Frag, Frag_su,
2348                          sched.ZMM, VTInfo.info512, NAME>, EVEX_V512;
2349
2350  let Predicates = [prd, HasVLX] in {
2351    defm Z256 : avx512_icmp_cc<opc, Suffix, Frag, Frag_su,
2352                               sched.YMM, VTInfo.info256, NAME>, EVEX_V256;
2353    defm Z128 : avx512_icmp_cc<opc, Suffix, Frag, Frag_su,
2354                               sched.XMM, VTInfo.info128, NAME>, EVEX_V128;
2355  }
2356}
2357
2358multiclass avx512_icmp_cc_rmb_vl<bits<8> opc, string Suffix, PatFrag Frag,
2359                                 PatFrag Frag_su, X86SchedWriteWidths sched,
2360                                 AVX512VLVectorVTInfo VTInfo, Predicate prd> {
2361  let Predicates = [prd] in
2362  defm Z : avx512_icmp_cc_rmb<opc, Suffix, Frag, Frag_su,
2363                              sched.ZMM, VTInfo.info512, NAME>, EVEX_V512;
2364
2365  let Predicates = [prd, HasVLX] in {
2366    defm Z256 : avx512_icmp_cc_rmb<opc, Suffix, Frag, Frag_su,
2367                                   sched.YMM, VTInfo.info256, NAME>, EVEX_V256;
2368    defm Z128 : avx512_icmp_cc_rmb<opc, Suffix, Frag, Frag_su,
2369                                   sched.XMM, VTInfo.info128, NAME>, EVEX_V128;
2370  }
2371}
2372
2373def X86pcmpm : PatFrag<(ops node:$src1, node:$src2, node:$cc),
2374                       (setcc node:$src1, node:$src2, node:$cc), [{
2375  ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(2))->get();
2376  return !ISD::isUnsignedIntSetCC(CC);
2377}], X86pcmpm_imm>;
2378
2379def X86pcmpm_su : PatFrag<(ops node:$src1, node:$src2, node:$cc),
2380                          (setcc node:$src1, node:$src2, node:$cc), [{
2381  ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(2))->get();
2382  return N->hasOneUse() && !ISD::isUnsignedIntSetCC(CC);
2383}], X86pcmpm_imm>;
2384
2385def X86pcmpum : PatFrag<(ops node:$src1, node:$src2, node:$cc),
2386                        (setcc node:$src1, node:$src2, node:$cc), [{
2387  ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(2))->get();
2388  return ISD::isUnsignedIntSetCC(CC);
2389}], X86pcmpm_imm>;
2390
2391def X86pcmpum_su : PatFrag<(ops node:$src1, node:$src2, node:$cc),
2392                           (setcc node:$src1, node:$src2, node:$cc), [{
2393  ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(2))->get();
2394  return N->hasOneUse() && ISD::isUnsignedIntSetCC(CC);
2395}], X86pcmpm_imm>;
2396
2397// FIXME: Is there a better scheduler class for VPCMP/VPCMPU?
2398defm VPCMPB : avx512_icmp_cc_vl<0x3F, "b", X86pcmpm, X86pcmpm_su,
2399                                SchedWriteVecALU, avx512vl_i8_info, HasBWI>,
2400                                EVEX_CD8<8, CD8VF>;
2401defm VPCMPUB : avx512_icmp_cc_vl<0x3E, "ub", X86pcmpum, X86pcmpum_su,
2402                                 SchedWriteVecALU, avx512vl_i8_info, HasBWI>,
2403                                 EVEX_CD8<8, CD8VF>;
2404
2405defm VPCMPW : avx512_icmp_cc_vl<0x3F, "w", X86pcmpm, X86pcmpm_su,
2406                                SchedWriteVecALU, avx512vl_i16_info, HasBWI>,
2407                                VEX_W, EVEX_CD8<16, CD8VF>;
2408defm VPCMPUW : avx512_icmp_cc_vl<0x3E, "uw", X86pcmpum, X86pcmpum_su,
2409                                 SchedWriteVecALU, avx512vl_i16_info, HasBWI>,
2410                                 VEX_W, EVEX_CD8<16, CD8VF>;
2411
2412defm VPCMPD : avx512_icmp_cc_rmb_vl<0x1F, "d", X86pcmpm, X86pcmpm_su,
2413                                    SchedWriteVecALU, avx512vl_i32_info,
2414                                    HasAVX512>, EVEX_CD8<32, CD8VF>;
2415defm VPCMPUD : avx512_icmp_cc_rmb_vl<0x1E, "ud", X86pcmpum, X86pcmpum_su,
2416                                     SchedWriteVecALU, avx512vl_i32_info,
2417                                     HasAVX512>, EVEX_CD8<32, CD8VF>;
2418
2419defm VPCMPQ : avx512_icmp_cc_rmb_vl<0x1F, "q", X86pcmpm, X86pcmpm_su,
2420                                    SchedWriteVecALU, avx512vl_i64_info,
2421                                    HasAVX512>, VEX_W, EVEX_CD8<64, CD8VF>;
2422defm VPCMPUQ : avx512_icmp_cc_rmb_vl<0x1E, "uq", X86pcmpum, X86pcmpum_su,
2423                                     SchedWriteVecALU, avx512vl_i64_info,
2424                                     HasAVX512>, VEX_W, EVEX_CD8<64, CD8VF>;
2425
2426def X86cmpm_su : PatFrag<(ops node:$src1, node:$src2, node:$cc),
2427                         (X86cmpm node:$src1, node:$src2, node:$cc), [{
2428  return N->hasOneUse();
2429}]>;
2430
2431def X86cmpm_imm_commute : SDNodeXForm<timm, [{
2432  uint8_t Imm = X86::getSwappedVCMPImm(N->getZExtValue() & 0x1f);
2433  return getI8Imm(Imm, SDLoc(N));
2434}]>;
2435
2436multiclass avx512_vcmp_common<X86FoldableSchedWrite sched, X86VectorVTInfo _,
2437                              string Name> {
2438let Uses = [MXCSR], mayRaiseFPException = 1 in {
2439  defm  rri  : AVX512_maskable_cmp<0xC2, MRMSrcReg, _,
2440                   (outs _.KRC:$dst), (ins _.RC:$src1, _.RC:$src2,u8imm:$cc),
2441                   "vcmp"#_.Suffix,
2442                   "$cc, $src2, $src1", "$src1, $src2, $cc",
2443                   (X86any_cmpm (_.VT _.RC:$src1), (_.VT _.RC:$src2), timm:$cc),
2444                   (X86cmpm_su (_.VT _.RC:$src1), (_.VT _.RC:$src2), timm:$cc),
2445                   1>, Sched<[sched]>;
2446
2447  defm  rmi  : AVX512_maskable_cmp<0xC2, MRMSrcMem, _,
2448                (outs _.KRC:$dst),(ins _.RC:$src1, _.MemOp:$src2, u8imm:$cc),
2449                "vcmp"#_.Suffix,
2450                "$cc, $src2, $src1", "$src1, $src2, $cc",
2451                (X86any_cmpm (_.VT _.RC:$src1), (_.VT (_.LdFrag addr:$src2)),
2452                             timm:$cc),
2453                (X86cmpm_su (_.VT _.RC:$src1), (_.VT (_.LdFrag addr:$src2)),
2454                            timm:$cc)>,
2455                Sched<[sched.Folded, sched.ReadAfterFold]>;
2456
2457  defm  rmbi : AVX512_maskable_cmp<0xC2, MRMSrcMem, _,
2458                (outs _.KRC:$dst),
2459                (ins _.RC:$src1, _.ScalarMemOp:$src2, u8imm:$cc),
2460                "vcmp"#_.Suffix,
2461                "$cc, ${src2}"#_.BroadcastStr#", $src1",
2462                "$src1, ${src2}"#_.BroadcastStr#", $cc",
2463                (X86any_cmpm (_.VT _.RC:$src1),
2464                             (_.VT (_.BroadcastLdFrag addr:$src2)),
2465                             timm:$cc),
2466                (X86cmpm_su (_.VT _.RC:$src1),
2467                            (_.VT (_.BroadcastLdFrag addr:$src2)),
2468                            timm:$cc)>,
2469                EVEX_B, Sched<[sched.Folded, sched.ReadAfterFold]>;
2470  }
2471
2472  // Patterns for selecting with loads in other operand.
2473  def : Pat<(X86any_cmpm (_.LdFrag addr:$src2), (_.VT _.RC:$src1),
2474                         timm:$cc),
2475            (!cast<Instruction>(Name#_.ZSuffix#"rmi") _.RC:$src1, addr:$src2,
2476                                                      (X86cmpm_imm_commute timm:$cc))>;
2477
2478  def : Pat<(and _.KRCWM:$mask, (X86cmpm_su (_.LdFrag addr:$src2),
2479                                            (_.VT _.RC:$src1),
2480                                            timm:$cc)),
2481            (!cast<Instruction>(Name#_.ZSuffix#"rmik") _.KRCWM:$mask,
2482                                                       _.RC:$src1, addr:$src2,
2483                                                       (X86cmpm_imm_commute timm:$cc))>;
2484
2485  def : Pat<(X86any_cmpm (_.BroadcastLdFrag addr:$src2),
2486                         (_.VT _.RC:$src1), timm:$cc),
2487            (!cast<Instruction>(Name#_.ZSuffix#"rmbi") _.RC:$src1, addr:$src2,
2488                                                       (X86cmpm_imm_commute timm:$cc))>;
2489
2490  def : Pat<(and _.KRCWM:$mask, (X86cmpm_su (_.BroadcastLdFrag addr:$src2),
2491                                            (_.VT _.RC:$src1),
2492                                            timm:$cc)),
2493            (!cast<Instruction>(Name#_.ZSuffix#"rmbik") _.KRCWM:$mask,
2494                                                        _.RC:$src1, addr:$src2,
2495                                                        (X86cmpm_imm_commute timm:$cc))>;
2496
2497  // Patterns for mask intrinsics.
2498  def : Pat<(X86cmpmm (_.VT _.RC:$src1), (_.VT _.RC:$src2), timm:$cc,
2499                      (_.KVT immAllOnesV)),
2500            (!cast<Instruction>(Name#_.ZSuffix#"rri") _.RC:$src1, _.RC:$src2, timm:$cc)>;
2501
2502  def : Pat<(X86cmpmm (_.VT _.RC:$src1), (_.VT _.RC:$src2), timm:$cc, _.KRCWM:$mask),
2503            (!cast<Instruction>(Name#_.ZSuffix#"rrik") _.KRCWM:$mask, _.RC:$src1,
2504                                                       _.RC:$src2, timm:$cc)>;
2505
2506  def : Pat<(X86cmpmm (_.VT _.RC:$src1), (_.VT (_.LdFrag addr:$src2)), timm:$cc,
2507                      (_.KVT immAllOnesV)),
2508            (!cast<Instruction>(Name#_.ZSuffix#"rmi") _.RC:$src1, addr:$src2, timm:$cc)>;
2509
2510  def : Pat<(X86cmpmm (_.VT _.RC:$src1), (_.VT (_.LdFrag addr:$src2)), timm:$cc,
2511                      _.KRCWM:$mask),
2512            (!cast<Instruction>(Name#_.ZSuffix#"rmik") _.KRCWM:$mask, _.RC:$src1,
2513                                                       addr:$src2, timm:$cc)>;
2514
2515  def : Pat<(X86cmpmm (_.VT _.RC:$src1), (_.VT (_.BroadcastLdFrag addr:$src2)), timm:$cc,
2516                      (_.KVT immAllOnesV)),
2517            (!cast<Instruction>(Name#_.ZSuffix#"rmbi") _.RC:$src1, addr:$src2, timm:$cc)>;
2518
2519  def : Pat<(X86cmpmm (_.VT _.RC:$src1), (_.VT (_.BroadcastLdFrag addr:$src2)), timm:$cc,
2520                      _.KRCWM:$mask),
2521            (!cast<Instruction>(Name#_.ZSuffix#"rmbik") _.KRCWM:$mask, _.RC:$src1,
2522                                                        addr:$src2, timm:$cc)>;
2523
2524  // Patterns for mask intrinsics with loads in other operand.
2525  def : Pat<(X86cmpmm (_.VT (_.LdFrag addr:$src2)), (_.VT _.RC:$src1), timm:$cc,
2526                      (_.KVT immAllOnesV)),
2527            (!cast<Instruction>(Name#_.ZSuffix#"rmi") _.RC:$src1, addr:$src2,
2528                                                      (X86cmpm_imm_commute timm:$cc))>;
2529
2530  def : Pat<(X86cmpmm (_.VT (_.LdFrag addr:$src2)), (_.VT _.RC:$src1), timm:$cc,
2531                      _.KRCWM:$mask),
2532            (!cast<Instruction>(Name#_.ZSuffix#"rmik") _.KRCWM:$mask,
2533                                                       _.RC:$src1, addr:$src2,
2534                                                       (X86cmpm_imm_commute timm:$cc))>;
2535
2536  def : Pat<(X86cmpmm (_.VT (_.BroadcastLdFrag addr:$src2)), (_.VT _.RC:$src1), timm:$cc,
2537                      (_.KVT immAllOnesV)),
2538            (!cast<Instruction>(Name#_.ZSuffix#"rmbi") _.RC:$src1, addr:$src2,
2539                                                       (X86cmpm_imm_commute timm:$cc))>;
2540
2541  def : Pat<(X86cmpmm (_.VT (_.BroadcastLdFrag addr:$src2)), (_.VT _.RC:$src1), timm:$cc,
2542                      _.KRCWM:$mask),
2543            (!cast<Instruction>(Name#_.ZSuffix#"rmbik") _.KRCWM:$mask,
2544                                                        _.RC:$src1, addr:$src2,
2545                                                        (X86cmpm_imm_commute  timm:$cc))>;
2546}
2547
2548multiclass avx512_vcmp_sae<X86FoldableSchedWrite sched, X86VectorVTInfo _> {
2549  // comparison code form (VCMP[EQ/LT/LE/...]
2550  let Uses = [MXCSR] in
2551  defm  rrib  : AVX512_maskable_custom_cmp<0xC2, MRMSrcReg, (outs _.KRC:$dst),
2552                     (ins _.RC:$src1, _.RC:$src2, u8imm:$cc),
2553                     (ins _.KRCWM:$mask, _.RC:$src1, _.RC:$src2, u8imm:$cc),
2554                     "vcmp"#_.Suffix,
2555                     "$cc, {sae}, $src2, $src1",
2556                     "$src1, $src2, {sae}, $cc",
2557                     [(set _.KRC:$dst, (X86cmpmmSAE (_.VT _.RC:$src1),
2558                                        (_.VT _.RC:$src2), timm:$cc, (_.KVT immAllOnesV)))],
2559                     [(set _.KRC:$dst, (X86cmpmmSAE (_.VT _.RC:$src1),
2560                                        (_.VT _.RC:$src2), timm:$cc, _.KRCWM:$mask))]>,
2561                     EVEX_B, Sched<[sched]>;
2562}
2563
2564multiclass avx512_vcmp<X86SchedWriteWidths sched, AVX512VLVectorVTInfo _> {
2565  let Predicates = [HasAVX512] in {
2566    defm Z    : avx512_vcmp_common<sched.ZMM, _.info512, NAME>,
2567                avx512_vcmp_sae<sched.ZMM, _.info512>, EVEX_V512;
2568
2569  }
2570  let Predicates = [HasAVX512,HasVLX] in {
2571   defm Z128 : avx512_vcmp_common<sched.XMM, _.info128, NAME>, EVEX_V128;
2572   defm Z256 : avx512_vcmp_common<sched.YMM, _.info256, NAME>, EVEX_V256;
2573  }
2574}
2575
2576defm VCMPPD : avx512_vcmp<SchedWriteFCmp, avx512vl_f64_info>,
2577                          AVX512PDIi8Base, EVEX_4V, EVEX_CD8<64, CD8VF>, VEX_W;
2578defm VCMPPS : avx512_vcmp<SchedWriteFCmp, avx512vl_f32_info>,
2579                          AVX512PSIi8Base, EVEX_4V, EVEX_CD8<32, CD8VF>;
2580
2581// Patterns to select fp compares with load as first operand.
2582let Predicates = [HasAVX512] in {
2583  def : Pat<(v1i1 (X86cmpms (loadf64 addr:$src2), FR64X:$src1,
2584                            timm:$cc)),
2585            (VCMPSDZrm FR64X:$src1, addr:$src2, (X86cmpm_imm_commute timm:$cc))>;
2586
2587  def : Pat<(v1i1 (X86cmpms (loadf32 addr:$src2), FR32X:$src1,
2588                            timm:$cc)),
2589            (VCMPSSZrm FR32X:$src1, addr:$src2, (X86cmpm_imm_commute timm:$cc))>;
2590}
2591
2592// ----------------------------------------------------------------
2593// FPClass
2594
2595def X86Vfpclasss_su : PatFrag<(ops node:$src1, node:$src2),
2596                              (X86Vfpclasss node:$src1, node:$src2), [{
2597  return N->hasOneUse();
2598}]>;
2599
2600def X86Vfpclass_su : PatFrag<(ops node:$src1, node:$src2),
2601                             (X86Vfpclass node:$src1, node:$src2), [{
2602  return N->hasOneUse();
2603}]>;
2604
2605//handle fpclass instruction  mask =  op(reg_scalar,imm)
2606//                                    op(mem_scalar,imm)
2607multiclass avx512_scalar_fpclass<bits<8> opc, string OpcodeStr,
2608                                 X86FoldableSchedWrite sched, X86VectorVTInfo _,
2609                                 Predicate prd> {
2610  let Predicates = [prd], ExeDomain = _.ExeDomain, Uses = [MXCSR] in {
2611      def rr : AVX512<opc, MRMSrcReg, (outs _.KRC:$dst),
2612                      (ins _.RC:$src1, i32u8imm:$src2),
2613                      OpcodeStr#_.Suffix#"\t{$src2, $src1, $dst|$dst, $src1, $src2}",
2614                      [(set _.KRC:$dst,(X86Vfpclasss (_.VT _.RC:$src1),
2615                              (i32 timm:$src2)))]>,
2616                      Sched<[sched]>;
2617      def rrk : AVX512<opc, MRMSrcReg, (outs _.KRC:$dst),
2618                      (ins _.KRCWM:$mask, _.RC:$src1, i32u8imm:$src2),
2619                      OpcodeStr#_.Suffix#
2620                      "\t{$src2, $src1, $dst {${mask}}|$dst {${mask}}, $src1, $src2}",
2621                      [(set _.KRC:$dst,(and _.KRCWM:$mask,
2622                                      (X86Vfpclasss_su (_.VT _.RC:$src1),
2623                                      (i32 timm:$src2))))]>,
2624                      EVEX_K, Sched<[sched]>;
2625    def rm : AVX512<opc, MRMSrcMem, (outs _.KRC:$dst),
2626                    (ins _.IntScalarMemOp:$src1, i32u8imm:$src2),
2627                    OpcodeStr#_.Suffix#
2628                              "\t{$src2, $src1, $dst|$dst, $src1, $src2}",
2629                    [(set _.KRC:$dst,
2630                          (X86Vfpclasss (_.ScalarIntMemFrags addr:$src1),
2631                                        (i32 timm:$src2)))]>,
2632                    Sched<[sched.Folded, sched.ReadAfterFold]>;
2633    def rmk : AVX512<opc, MRMSrcMem, (outs _.KRC:$dst),
2634                    (ins _.KRCWM:$mask, _.IntScalarMemOp:$src1, i32u8imm:$src2),
2635                    OpcodeStr#_.Suffix#
2636                    "\t{$src2, $src1, $dst {${mask}}|$dst {${mask}}, $src1, $src2}",
2637                    [(set _.KRC:$dst,(and _.KRCWM:$mask,
2638                        (X86Vfpclasss_su (_.ScalarIntMemFrags addr:$src1),
2639                            (i32 timm:$src2))))]>,
2640                    EVEX_K, Sched<[sched.Folded, sched.ReadAfterFold]>;
2641  }
2642}
2643
2644//handle fpclass instruction mask = fpclass(reg_vec, reg_vec, imm)
2645//                                  fpclass(reg_vec, mem_vec, imm)
2646//                                  fpclass(reg_vec, broadcast(eltVt), imm)
2647multiclass avx512_vector_fpclass<bits<8> opc, string OpcodeStr,
2648                                 X86FoldableSchedWrite sched, X86VectorVTInfo _,
2649                                 string mem>{
2650  let ExeDomain = _.ExeDomain, Uses = [MXCSR] in {
2651  def rr : AVX512<opc, MRMSrcReg, (outs _.KRC:$dst),
2652                      (ins _.RC:$src1, i32u8imm:$src2),
2653                      OpcodeStr#_.Suffix#"\t{$src2, $src1, $dst|$dst, $src1, $src2}",
2654                      [(set _.KRC:$dst,(X86Vfpclass (_.VT _.RC:$src1),
2655                                       (i32 timm:$src2)))]>,
2656                      Sched<[sched]>;
2657  def rrk : AVX512<opc, MRMSrcReg, (outs _.KRC:$dst),
2658                      (ins _.KRCWM:$mask, _.RC:$src1, i32u8imm:$src2),
2659                      OpcodeStr#_.Suffix#
2660                      "\t{$src2, $src1, $dst {${mask}}|$dst {${mask}}, $src1, $src2}",
2661                      [(set _.KRC:$dst,(and _.KRCWM:$mask,
2662                                       (X86Vfpclass_su (_.VT _.RC:$src1),
2663                                       (i32 timm:$src2))))]>,
2664                      EVEX_K, Sched<[sched]>;
2665  def rm : AVX512<opc, MRMSrcMem, (outs _.KRC:$dst),
2666                    (ins _.MemOp:$src1, i32u8imm:$src2),
2667                    OpcodeStr#_.Suffix#"{"#mem#"}"#
2668                    "\t{$src2, $src1, $dst|$dst, $src1, $src2}",
2669                    [(set _.KRC:$dst,(X86Vfpclass
2670                                     (_.VT (_.LdFrag addr:$src1)),
2671                                     (i32 timm:$src2)))]>,
2672                    Sched<[sched.Folded, sched.ReadAfterFold]>;
2673  def rmk : AVX512<opc, MRMSrcMem, (outs _.KRC:$dst),
2674                    (ins _.KRCWM:$mask, _.MemOp:$src1, i32u8imm:$src2),
2675                    OpcodeStr#_.Suffix#"{"#mem#"}"#
2676                    "\t{$src2, $src1, $dst {${mask}}|$dst {${mask}}, $src1, $src2}",
2677                    [(set _.KRC:$dst, (and _.KRCWM:$mask, (X86Vfpclass_su
2678                                  (_.VT (_.LdFrag addr:$src1)),
2679                                  (i32 timm:$src2))))]>,
2680                    EVEX_K, Sched<[sched.Folded, sched.ReadAfterFold]>;
2681  def rmb : AVX512<opc, MRMSrcMem, (outs _.KRC:$dst),
2682                    (ins _.ScalarMemOp:$src1, i32u8imm:$src2),
2683                    OpcodeStr#_.Suffix#"\t{$src2, ${src1}"#
2684                                      _.BroadcastStr#", $dst|$dst, ${src1}"
2685                                                  #_.BroadcastStr#", $src2}",
2686                    [(set _.KRC:$dst,(X86Vfpclass
2687                                     (_.VT (_.BroadcastLdFrag addr:$src1)),
2688                                     (i32 timm:$src2)))]>,
2689                    EVEX_B, Sched<[sched.Folded, sched.ReadAfterFold]>;
2690  def rmbk : AVX512<opc, MRMSrcMem, (outs _.KRC:$dst),
2691                    (ins _.KRCWM:$mask, _.ScalarMemOp:$src1, i32u8imm:$src2),
2692                    OpcodeStr#_.Suffix#"\t{$src2, ${src1}"#
2693                          _.BroadcastStr#", $dst {${mask}}|$dst {${mask}}, ${src1}"#
2694                                                   _.BroadcastStr#", $src2}",
2695                    [(set _.KRC:$dst,(and _.KRCWM:$mask, (X86Vfpclass_su
2696                                     (_.VT (_.BroadcastLdFrag addr:$src1)),
2697                                     (i32 timm:$src2))))]>,
2698                    EVEX_B, EVEX_K,  Sched<[sched.Folded, sched.ReadAfterFold]>;
2699  }
2700
2701  // Allow registers or broadcast with the x, y, z suffix we use to disambiguate
2702  // the memory form.
2703  def : InstAlias<OpcodeStr#_.Suffix#mem#
2704                  "\t{$src2, $src1, $dst|$dst, $src1, $src2}",
2705                  (!cast<Instruction>(NAME#"rr")
2706                   _.KRC:$dst, _.RC:$src1, i32u8imm:$src2), 0, "att">;
2707  def : InstAlias<OpcodeStr#_.Suffix#mem#
2708                  "\t{$src2, $src1, $dst {${mask}}|$dst {${mask}}, $src1, $src2}",
2709                  (!cast<Instruction>(NAME#"rrk")
2710                   _.KRC:$dst, _.KRCWM:$mask, _.RC:$src1, i32u8imm:$src2), 0, "att">;
2711  def : InstAlias<OpcodeStr#_.Suffix#mem#
2712                  "\t{$src2, ${src1}"#_.BroadcastStr#", $dst|$dst, ${src1}"#
2713                  _.BroadcastStr#", $src2}",
2714                  (!cast<Instruction>(NAME#"rmb")
2715                   _.KRC:$dst, _.ScalarMemOp:$src1, i32u8imm:$src2), 0, "att">;
2716  def : InstAlias<OpcodeStr#_.Suffix#mem#
2717                  "\t{$src2, ${src1}"#_.BroadcastStr#", $dst {${mask}}|"
2718                  "$dst {${mask}}, ${src1}"#_.BroadcastStr#", $src2}",
2719                  (!cast<Instruction>(NAME#"rmbk")
2720                   _.KRC:$dst, _.KRCWM:$mask, _.ScalarMemOp:$src1, i32u8imm:$src2), 0, "att">;
2721}
2722
2723multiclass avx512_vector_fpclass_all<string OpcodeStr, AVX512VLVectorVTInfo _,
2724                                     bits<8> opc, X86SchedWriteWidths sched,
2725                                     Predicate prd>{
2726  let Predicates = [prd] in {
2727    defm Z    : avx512_vector_fpclass<opc, OpcodeStr, sched.ZMM,
2728                                      _.info512, "z">, EVEX_V512;
2729  }
2730  let Predicates = [prd, HasVLX] in {
2731    defm Z128 : avx512_vector_fpclass<opc, OpcodeStr, sched.XMM,
2732                                      _.info128, "x">, EVEX_V128;
2733    defm Z256 : avx512_vector_fpclass<opc, OpcodeStr, sched.YMM,
2734                                      _.info256, "y">, EVEX_V256;
2735  }
2736}
2737
2738multiclass avx512_fp_fpclass_all<string OpcodeStr, bits<8> opcVec,
2739                                 bits<8> opcScalar, X86SchedWriteWidths sched,
2740                                 Predicate prd> {
2741  defm PS : avx512_vector_fpclass_all<OpcodeStr,  avx512vl_f32_info, opcVec,
2742                                      sched, prd>,
2743                                      EVEX_CD8<32, CD8VF>;
2744  defm PD : avx512_vector_fpclass_all<OpcodeStr,  avx512vl_f64_info, opcVec,
2745                                      sched, prd>,
2746                                      EVEX_CD8<64, CD8VF> , VEX_W;
2747  defm SSZ : avx512_scalar_fpclass<opcScalar, OpcodeStr,
2748                                   sched.Scl, f32x_info, prd>, VEX_LIG,
2749                                   EVEX_CD8<32, CD8VT1>;
2750  defm SDZ : avx512_scalar_fpclass<opcScalar, OpcodeStr,
2751                                   sched.Scl, f64x_info, prd>, VEX_LIG,
2752                                   EVEX_CD8<64, CD8VT1>, VEX_W;
2753}
2754
2755defm VFPCLASS : avx512_fp_fpclass_all<"vfpclass", 0x66, 0x67, SchedWriteFCmp,
2756                                      HasDQI>, AVX512AIi8Base, EVEX;
2757
2758//-----------------------------------------------------------------
2759// Mask register copy, including
2760// - copy between mask registers
2761// - load/store mask registers
2762// - copy from GPR to mask register and vice versa
2763//
2764multiclass avx512_mask_mov<bits<8> opc_kk, bits<8> opc_km, bits<8> opc_mk,
2765                         string OpcodeStr, RegisterClass KRC,
2766                         ValueType vvt, X86MemOperand x86memop> {
2767  let isMoveReg = 1, hasSideEffects = 0, SchedRW = [WriteMove] in
2768  def kk : I<opc_kk, MRMSrcReg, (outs KRC:$dst), (ins KRC:$src),
2769             !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), []>,
2770             Sched<[WriteMove]>;
2771  def km : I<opc_km, MRMSrcMem, (outs KRC:$dst), (ins x86memop:$src),
2772             !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
2773             [(set KRC:$dst, (vvt (load addr:$src)))]>,
2774             Sched<[WriteLoad]>;
2775  def mk : I<opc_mk, MRMDestMem, (outs), (ins x86memop:$dst, KRC:$src),
2776             !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
2777             [(store KRC:$src, addr:$dst)]>,
2778             Sched<[WriteStore]>;
2779}
2780
2781multiclass avx512_mask_mov_gpr<bits<8> opc_kr, bits<8> opc_rk,
2782                             string OpcodeStr,
2783                             RegisterClass KRC, RegisterClass GRC> {
2784  let hasSideEffects = 0 in {
2785    def kr : I<opc_kr, MRMSrcReg, (outs KRC:$dst), (ins GRC:$src),
2786               !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), []>,
2787               Sched<[WriteMove]>;
2788    def rk : I<opc_rk, MRMSrcReg, (outs GRC:$dst), (ins KRC:$src),
2789               !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), []>,
2790               Sched<[WriteMove]>;
2791  }
2792}
2793
2794let Predicates = [HasDQI] in
2795  defm KMOVB : avx512_mask_mov<0x90, 0x90, 0x91, "kmovb", VK8, v8i1, i8mem>,
2796               avx512_mask_mov_gpr<0x92, 0x93, "kmovb", VK8, GR32>,
2797               VEX, PD;
2798
2799let Predicates = [HasAVX512] in
2800  defm KMOVW : avx512_mask_mov<0x90, 0x90, 0x91, "kmovw", VK16, v16i1, i16mem>,
2801               avx512_mask_mov_gpr<0x92, 0x93, "kmovw", VK16, GR32>,
2802               VEX, PS;
2803
2804let Predicates = [HasBWI] in {
2805  defm KMOVD : avx512_mask_mov<0x90, 0x90, 0x91, "kmovd", VK32, v32i1,i32mem>,
2806               VEX, PD, VEX_W;
2807  defm KMOVD : avx512_mask_mov_gpr<0x92, 0x93, "kmovd", VK32, GR32>,
2808               VEX, XD;
2809  defm KMOVQ : avx512_mask_mov<0x90, 0x90, 0x91, "kmovq", VK64, v64i1, i64mem>,
2810               VEX, PS, VEX_W;
2811  defm KMOVQ : avx512_mask_mov_gpr<0x92, 0x93, "kmovq", VK64, GR64>,
2812               VEX, XD, VEX_W;
2813}
2814
2815// GR from/to mask register
2816def : Pat<(v16i1 (bitconvert (i16 GR16:$src))),
2817          (COPY_TO_REGCLASS (i32 (INSERT_SUBREG (IMPLICIT_DEF), GR16:$src, sub_16bit)), VK16)>;
2818def : Pat<(i16 (bitconvert (v16i1 VK16:$src))),
2819          (EXTRACT_SUBREG (i32 (COPY_TO_REGCLASS VK16:$src, GR32)), sub_16bit)>;
2820def : Pat<(i8 (trunc (i16 (bitconvert (v16i1 VK16:$src))))),
2821          (EXTRACT_SUBREG (i32 (COPY_TO_REGCLASS VK16:$src, GR32)), sub_8bit)>;
2822
2823def : Pat<(v8i1 (bitconvert (i8 GR8:$src))),
2824          (COPY_TO_REGCLASS (i32 (INSERT_SUBREG (IMPLICIT_DEF), GR8:$src, sub_8bit)), VK8)>;
2825def : Pat<(i8 (bitconvert (v8i1 VK8:$src))),
2826          (EXTRACT_SUBREG (i32 (COPY_TO_REGCLASS VK8:$src, GR32)), sub_8bit)>;
2827
2828def : Pat<(i32 (zext (i16 (bitconvert (v16i1 VK16:$src))))),
2829          (KMOVWrk VK16:$src)>;
2830def : Pat<(i64 (zext (i16 (bitconvert (v16i1 VK16:$src))))),
2831          (SUBREG_TO_REG (i64 0), (KMOVWrk VK16:$src), sub_32bit)>;
2832def : Pat<(i32 (anyext (i16 (bitconvert (v16i1 VK16:$src))))),
2833          (COPY_TO_REGCLASS VK16:$src, GR32)>;
2834def : Pat<(i64 (anyext (i16 (bitconvert (v16i1 VK16:$src))))),
2835          (INSERT_SUBREG (IMPLICIT_DEF), (COPY_TO_REGCLASS VK16:$src, GR32), sub_32bit)>;
2836
2837def : Pat<(i32 (zext (i8 (bitconvert (v8i1 VK8:$src))))),
2838          (KMOVBrk VK8:$src)>, Requires<[HasDQI]>;
2839def : Pat<(i64 (zext (i8 (bitconvert (v8i1 VK8:$src))))),
2840          (SUBREG_TO_REG (i64 0), (KMOVBrk VK8:$src), sub_32bit)>, Requires<[HasDQI]>;
2841def : Pat<(i32 (anyext (i8 (bitconvert (v8i1 VK8:$src))))),
2842          (COPY_TO_REGCLASS VK8:$src, GR32)>;
2843def : Pat<(i64 (anyext (i8 (bitconvert (v8i1 VK8:$src))))),
2844          (INSERT_SUBREG (IMPLICIT_DEF), (COPY_TO_REGCLASS VK8:$src, GR32), sub_32bit)>;
2845
2846def : Pat<(v32i1 (bitconvert (i32 GR32:$src))),
2847          (COPY_TO_REGCLASS GR32:$src, VK32)>;
2848def : Pat<(i32 (bitconvert (v32i1 VK32:$src))),
2849          (COPY_TO_REGCLASS VK32:$src, GR32)>;
2850def : Pat<(v64i1 (bitconvert (i64 GR64:$src))),
2851          (COPY_TO_REGCLASS GR64:$src, VK64)>;
2852def : Pat<(i64 (bitconvert (v64i1 VK64:$src))),
2853          (COPY_TO_REGCLASS VK64:$src, GR64)>;
2854
2855// Load/store kreg
2856let Predicates = [HasDQI] in {
2857  def : Pat<(v1i1 (load addr:$src)),
2858            (COPY_TO_REGCLASS (KMOVBkm addr:$src), VK1)>;
2859  def : Pat<(v2i1 (load addr:$src)),
2860            (COPY_TO_REGCLASS (KMOVBkm addr:$src), VK2)>;
2861  def : Pat<(v4i1 (load addr:$src)),
2862            (COPY_TO_REGCLASS (KMOVBkm addr:$src), VK4)>;
2863}
2864
2865let Predicates = [HasAVX512] in {
2866  def : Pat<(v8i1 (bitconvert (i8 (load addr:$src)))),
2867            (COPY_TO_REGCLASS (MOVZX32rm8 addr:$src), VK8)>;
2868  def : Pat<(v16i1 (bitconvert (loadi16 addr:$src))),
2869            (KMOVWkm addr:$src)>;
2870}
2871
2872def X86kextract : SDNode<"ISD::EXTRACT_VECTOR_ELT",
2873                         SDTypeProfile<1, 2, [SDTCisVT<0, i8>,
2874                                              SDTCVecEltisVT<1, i1>,
2875                                              SDTCisPtrTy<2>]>>;
2876
2877let Predicates = [HasAVX512] in {
2878  multiclass operation_gpr_mask_copy_lowering<RegisterClass maskRC, ValueType maskVT> {
2879    def : Pat<(maskVT (scalar_to_vector GR32:$src)),
2880              (COPY_TO_REGCLASS GR32:$src, maskRC)>;
2881
2882    def : Pat<(maskVT (scalar_to_vector GR8:$src)),
2883              (COPY_TO_REGCLASS (INSERT_SUBREG (i32 (IMPLICIT_DEF)), GR8:$src, sub_8bit), maskRC)>;
2884
2885    def : Pat<(i8 (X86kextract maskRC:$src, (iPTR 0))),
2886              (EXTRACT_SUBREG (i32 (COPY_TO_REGCLASS maskRC:$src, GR32)), sub_8bit)>;
2887
2888    def : Pat<(i32 (anyext (i8 (X86kextract maskRC:$src, (iPTR 0))))),
2889              (i32 (COPY_TO_REGCLASS maskRC:$src, GR32))>;
2890  }
2891
2892  defm : operation_gpr_mask_copy_lowering<VK1,  v1i1>;
2893  defm : operation_gpr_mask_copy_lowering<VK2,  v2i1>;
2894  defm : operation_gpr_mask_copy_lowering<VK4,  v4i1>;
2895  defm : operation_gpr_mask_copy_lowering<VK8,  v8i1>;
2896  defm : operation_gpr_mask_copy_lowering<VK16,  v16i1>;
2897  defm : operation_gpr_mask_copy_lowering<VK32,  v32i1>;
2898  defm : operation_gpr_mask_copy_lowering<VK64,  v64i1>;
2899
2900  def : Pat<(insert_subvector (v16i1 immAllZerosV),
2901                              (v1i1 (scalar_to_vector GR8:$src)), (iPTR 0)),
2902            (KMOVWkr (AND32ri8
2903                      (INSERT_SUBREG (i32 (IMPLICIT_DEF)), GR8:$src, sub_8bit),
2904                      (i32 1)))>;
2905}
2906
2907// Mask unary operation
2908// - KNOT
2909multiclass avx512_mask_unop<bits<8> opc, string OpcodeStr,
2910                            RegisterClass KRC, SDPatternOperator OpNode,
2911                            X86FoldableSchedWrite sched, Predicate prd> {
2912  let Predicates = [prd] in
2913    def rr : I<opc, MRMSrcReg, (outs KRC:$dst), (ins KRC:$src),
2914               !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
2915               [(set KRC:$dst, (OpNode KRC:$src))]>,
2916               Sched<[sched]>;
2917}
2918
2919multiclass avx512_mask_unop_all<bits<8> opc, string OpcodeStr,
2920                                SDPatternOperator OpNode,
2921                                X86FoldableSchedWrite sched> {
2922  defm B : avx512_mask_unop<opc, !strconcat(OpcodeStr, "b"), VK8, OpNode,
2923                            sched, HasDQI>, VEX, PD;
2924  defm W : avx512_mask_unop<opc, !strconcat(OpcodeStr, "w"), VK16, OpNode,
2925                            sched, HasAVX512>, VEX, PS;
2926  defm D : avx512_mask_unop<opc, !strconcat(OpcodeStr, "d"), VK32, OpNode,
2927                            sched, HasBWI>, VEX, PD, VEX_W;
2928  defm Q : avx512_mask_unop<opc, !strconcat(OpcodeStr, "q"), VK64, OpNode,
2929                            sched, HasBWI>, VEX, PS, VEX_W;
2930}
2931
2932// TODO - do we need a X86SchedWriteWidths::KMASK type?
2933defm KNOT : avx512_mask_unop_all<0x44, "knot", vnot, SchedWriteVecLogic.XMM>;
2934
2935// KNL does not support KMOVB, 8-bit mask is promoted to 16-bit
2936let Predicates = [HasAVX512, NoDQI] in
2937def : Pat<(vnot VK8:$src),
2938          (COPY_TO_REGCLASS (KNOTWrr (COPY_TO_REGCLASS VK8:$src, VK16)), VK8)>;
2939
2940def : Pat<(vnot VK4:$src),
2941          (COPY_TO_REGCLASS (KNOTWrr (COPY_TO_REGCLASS VK4:$src, VK16)), VK4)>;
2942def : Pat<(vnot VK2:$src),
2943          (COPY_TO_REGCLASS (KNOTWrr (COPY_TO_REGCLASS VK2:$src, VK16)), VK2)>;
2944def : Pat<(vnot VK1:$src),
2945          (COPY_TO_REGCLASS (KNOTWrr (COPY_TO_REGCLASS VK1:$src, VK16)), VK2)>;
2946
2947// Mask binary operation
2948// - KAND, KANDN, KOR, KXNOR, KXOR
2949multiclass avx512_mask_binop<bits<8> opc, string OpcodeStr,
2950                           RegisterClass KRC, SDPatternOperator OpNode,
2951                           X86FoldableSchedWrite sched, Predicate prd,
2952                           bit IsCommutable> {
2953  let Predicates = [prd], isCommutable = IsCommutable in
2954    def rr : I<opc, MRMSrcReg, (outs KRC:$dst), (ins KRC:$src1, KRC:$src2),
2955               !strconcat(OpcodeStr,
2956                          "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
2957               [(set KRC:$dst, (OpNode KRC:$src1, KRC:$src2))]>,
2958               Sched<[sched]>;
2959}
2960
2961multiclass avx512_mask_binop_all<bits<8> opc, string OpcodeStr,
2962                                 SDPatternOperator OpNode,
2963                                 X86FoldableSchedWrite sched, bit IsCommutable,
2964                                 Predicate prdW = HasAVX512> {
2965  defm B : avx512_mask_binop<opc, !strconcat(OpcodeStr, "b"), VK8, OpNode,
2966                             sched, HasDQI, IsCommutable>, VEX_4V, VEX_L, PD;
2967  defm W : avx512_mask_binop<opc, !strconcat(OpcodeStr, "w"), VK16, OpNode,
2968                             sched, prdW, IsCommutable>, VEX_4V, VEX_L, PS;
2969  defm D : avx512_mask_binop<opc, !strconcat(OpcodeStr, "d"), VK32, OpNode,
2970                             sched, HasBWI, IsCommutable>, VEX_4V, VEX_L, VEX_W, PD;
2971  defm Q : avx512_mask_binop<opc, !strconcat(OpcodeStr, "q"), VK64, OpNode,
2972                             sched, HasBWI, IsCommutable>, VEX_4V, VEX_L, VEX_W, PS;
2973}
2974
2975// These nodes use 'vnot' instead of 'not' to support vectors.
2976def vandn : PatFrag<(ops node:$i0, node:$i1), (and (vnot node:$i0), node:$i1)>;
2977def vxnor : PatFrag<(ops node:$i0, node:$i1), (vnot (xor node:$i0, node:$i1))>;
2978
2979// TODO - do we need a X86SchedWriteWidths::KMASK type?
2980defm KAND  : avx512_mask_binop_all<0x41, "kand",  and,     SchedWriteVecLogic.XMM, 1>;
2981defm KOR   : avx512_mask_binop_all<0x45, "kor",   or,      SchedWriteVecLogic.XMM, 1>;
2982defm KXNOR : avx512_mask_binop_all<0x46, "kxnor", vxnor,   SchedWriteVecLogic.XMM, 1>;
2983defm KXOR  : avx512_mask_binop_all<0x47, "kxor",  xor,     SchedWriteVecLogic.XMM, 1>;
2984defm KANDN : avx512_mask_binop_all<0x42, "kandn", vandn,   SchedWriteVecLogic.XMM, 0>;
2985defm KADD  : avx512_mask_binop_all<0x4A, "kadd",  X86kadd, SchedWriteVecLogic.XMM, 1, HasDQI>;
2986
2987multiclass avx512_binop_pat<SDPatternOperator VOpNode,
2988                            Instruction Inst> {
2989  // With AVX512F, 8-bit mask is promoted to 16-bit mask,
2990  // for the DQI set, this type is legal and KxxxB instruction is used
2991  let Predicates = [NoDQI] in
2992  def : Pat<(VOpNode VK8:$src1, VK8:$src2),
2993            (COPY_TO_REGCLASS
2994              (Inst (COPY_TO_REGCLASS VK8:$src1, VK16),
2995                    (COPY_TO_REGCLASS VK8:$src2, VK16)), VK8)>;
2996
2997  // All types smaller than 8 bits require conversion anyway
2998  def : Pat<(VOpNode VK1:$src1, VK1:$src2),
2999        (COPY_TO_REGCLASS (Inst
3000                           (COPY_TO_REGCLASS VK1:$src1, VK16),
3001                           (COPY_TO_REGCLASS VK1:$src2, VK16)), VK1)>;
3002  def : Pat<(VOpNode VK2:$src1, VK2:$src2),
3003        (COPY_TO_REGCLASS (Inst
3004                           (COPY_TO_REGCLASS VK2:$src1, VK16),
3005                           (COPY_TO_REGCLASS VK2:$src2, VK16)), VK2)>;
3006  def : Pat<(VOpNode VK4:$src1, VK4:$src2),
3007        (COPY_TO_REGCLASS (Inst
3008                           (COPY_TO_REGCLASS VK4:$src1, VK16),
3009                           (COPY_TO_REGCLASS VK4:$src2, VK16)), VK4)>;
3010}
3011
3012defm : avx512_binop_pat<and,   KANDWrr>;
3013defm : avx512_binop_pat<vandn, KANDNWrr>;
3014defm : avx512_binop_pat<or,    KORWrr>;
3015defm : avx512_binop_pat<vxnor, KXNORWrr>;
3016defm : avx512_binop_pat<xor,   KXORWrr>;
3017
3018// Mask unpacking
3019multiclass avx512_mask_unpck<string Suffix, X86KVectorVTInfo Dst,
3020                             X86KVectorVTInfo Src, X86FoldableSchedWrite sched,
3021                             Predicate prd> {
3022  let Predicates = [prd] in {
3023    let hasSideEffects = 0 in
3024    def rr : I<0x4b, MRMSrcReg, (outs Dst.KRC:$dst),
3025               (ins Src.KRC:$src1, Src.KRC:$src2),
3026               "kunpck"#Suffix#"\t{$src2, $src1, $dst|$dst, $src1, $src2}", []>,
3027               VEX_4V, VEX_L, Sched<[sched]>;
3028
3029    def : Pat<(Dst.KVT (concat_vectors Src.KRC:$src1, Src.KRC:$src2)),
3030              (!cast<Instruction>(NAME#rr) Src.KRC:$src2, Src.KRC:$src1)>;
3031  }
3032}
3033
3034defm KUNPCKBW : avx512_mask_unpck<"bw", v16i1_info, v8i1_info,  WriteShuffle, HasAVX512>, PD;
3035defm KUNPCKWD : avx512_mask_unpck<"wd", v32i1_info, v16i1_info, WriteShuffle, HasBWI>, PS;
3036defm KUNPCKDQ : avx512_mask_unpck<"dq", v64i1_info, v32i1_info, WriteShuffle, HasBWI>, PS, VEX_W;
3037
3038// Mask bit testing
3039multiclass avx512_mask_testop<bits<8> opc, string OpcodeStr, RegisterClass KRC,
3040                              SDNode OpNode, X86FoldableSchedWrite sched,
3041                              Predicate prd> {
3042  let Predicates = [prd], Defs = [EFLAGS] in
3043    def rr : I<opc, MRMSrcReg, (outs), (ins KRC:$src1, KRC:$src2),
3044               !strconcat(OpcodeStr, "\t{$src2, $src1|$src1, $src2}"),
3045               [(set EFLAGS, (OpNode KRC:$src1, KRC:$src2))]>,
3046               Sched<[sched]>;
3047}
3048
3049multiclass avx512_mask_testop_w<bits<8> opc, string OpcodeStr, SDNode OpNode,
3050                                X86FoldableSchedWrite sched,
3051                                Predicate prdW = HasAVX512> {
3052  defm B : avx512_mask_testop<opc, OpcodeStr#"b", VK8, OpNode, sched, HasDQI>,
3053                                                                VEX, PD;
3054  defm W : avx512_mask_testop<opc, OpcodeStr#"w", VK16, OpNode, sched, prdW>,
3055                                                                VEX, PS;
3056  defm Q : avx512_mask_testop<opc, OpcodeStr#"q", VK64, OpNode, sched, HasBWI>,
3057                                                                VEX, PS, VEX_W;
3058  defm D : avx512_mask_testop<opc, OpcodeStr#"d", VK32, OpNode, sched, HasBWI>,
3059                                                                VEX, PD, VEX_W;
3060}
3061
3062// TODO - do we need a X86SchedWriteWidths::KMASK type?
3063defm KORTEST : avx512_mask_testop_w<0x98, "kortest", X86kortest, SchedWriteVecLogic.XMM>;
3064defm KTEST   : avx512_mask_testop_w<0x99, "ktest", X86ktest, SchedWriteVecLogic.XMM, HasDQI>;
3065
3066// Mask shift
3067multiclass avx512_mask_shiftop<bits<8> opc, string OpcodeStr, RegisterClass KRC,
3068                               SDNode OpNode, X86FoldableSchedWrite sched> {
3069  let Predicates = [HasAVX512] in
3070    def ri : Ii8<opc, MRMSrcReg, (outs KRC:$dst), (ins KRC:$src, u8imm:$imm),
3071                 !strconcat(OpcodeStr,
3072                            "\t{$imm, $src, $dst|$dst, $src, $imm}"),
3073                            [(set KRC:$dst, (OpNode KRC:$src, (i8 timm:$imm)))]>,
3074                 Sched<[sched]>;
3075}
3076
3077multiclass avx512_mask_shiftop_w<bits<8> opc1, bits<8> opc2, string OpcodeStr,
3078                                 SDNode OpNode, X86FoldableSchedWrite sched> {
3079  defm W : avx512_mask_shiftop<opc1, !strconcat(OpcodeStr, "w"), VK16, OpNode,
3080                               sched>, VEX, TAPD, VEX_W;
3081  let Predicates = [HasDQI] in
3082  defm B : avx512_mask_shiftop<opc1, !strconcat(OpcodeStr, "b"), VK8, OpNode,
3083                               sched>, VEX, TAPD;
3084  let Predicates = [HasBWI] in {
3085  defm Q : avx512_mask_shiftop<opc2, !strconcat(OpcodeStr, "q"), VK64, OpNode,
3086                               sched>, VEX, TAPD, VEX_W;
3087  defm D : avx512_mask_shiftop<opc2, !strconcat(OpcodeStr, "d"), VK32, OpNode,
3088                               sched>, VEX, TAPD;
3089  }
3090}
3091
3092defm KSHIFTL : avx512_mask_shiftop_w<0x32, 0x33, "kshiftl", X86kshiftl, WriteShuffle>;
3093defm KSHIFTR : avx512_mask_shiftop_w<0x30, 0x31, "kshiftr", X86kshiftr, WriteShuffle>;
3094
3095// Patterns for comparing 128/256-bit integer vectors using 512-bit instruction.
3096multiclass axv512_icmp_packed_cc_no_vlx_lowering<PatFrag Frag, PatFrag Frag_su,
3097                                                 string InstStr,
3098                                                 X86VectorVTInfo Narrow,
3099                                                 X86VectorVTInfo Wide> {
3100def : Pat<(Narrow.KVT (Frag:$cc (Narrow.VT Narrow.RC:$src1),
3101                                (Narrow.VT Narrow.RC:$src2), cond)),
3102          (COPY_TO_REGCLASS
3103           (!cast<Instruction>(InstStr#"Zrri")
3104            (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src1, Narrow.SubRegIdx)),
3105            (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src2, Narrow.SubRegIdx)),
3106            (X86pcmpm_imm $cc)), Narrow.KRC)>;
3107
3108def : Pat<(Narrow.KVT (and Narrow.KRC:$mask,
3109                           (Narrow.KVT (Frag_su:$cc (Narrow.VT Narrow.RC:$src1),
3110                                                    (Narrow.VT Narrow.RC:$src2),
3111                                                    cond)))),
3112          (COPY_TO_REGCLASS (!cast<Instruction>(InstStr#"Zrrik")
3113           (COPY_TO_REGCLASS Narrow.KRC:$mask, Wide.KRC),
3114           (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src1, Narrow.SubRegIdx)),
3115           (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src2, Narrow.SubRegIdx)),
3116           (X86pcmpm_imm $cc)), Narrow.KRC)>;
3117}
3118
3119multiclass axv512_icmp_packed_cc_rmb_no_vlx_lowering<PatFrag Frag, PatFrag Frag_su,
3120                                                     string InstStr,
3121                                                     X86VectorVTInfo Narrow,
3122                                                     X86VectorVTInfo Wide> {
3123// Broadcast load.
3124def : Pat<(Narrow.KVT (Frag:$cc (Narrow.VT Narrow.RC:$src1),
3125                                (Narrow.BroadcastLdFrag addr:$src2), cond)),
3126          (COPY_TO_REGCLASS
3127           (!cast<Instruction>(InstStr#"Zrmib")
3128            (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src1, Narrow.SubRegIdx)),
3129            addr:$src2, (X86pcmpm_imm $cc)), Narrow.KRC)>;
3130
3131def : Pat<(Narrow.KVT (and Narrow.KRC:$mask,
3132                           (Narrow.KVT
3133                            (Frag_su:$cc (Narrow.VT Narrow.RC:$src1),
3134                                         (Narrow.BroadcastLdFrag addr:$src2),
3135                                         cond)))),
3136          (COPY_TO_REGCLASS (!cast<Instruction>(InstStr#"Zrmibk")
3137           (COPY_TO_REGCLASS Narrow.KRC:$mask, Wide.KRC),
3138           (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src1, Narrow.SubRegIdx)),
3139           addr:$src2, (X86pcmpm_imm $cc)), Narrow.KRC)>;
3140
3141// Commuted with broadcast load.
3142def : Pat<(Narrow.KVT (Frag:$cc (Narrow.BroadcastLdFrag addr:$src2),
3143                                (Narrow.VT Narrow.RC:$src1),
3144                                cond)),
3145          (COPY_TO_REGCLASS
3146           (!cast<Instruction>(InstStr#"Zrmib")
3147            (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src1, Narrow.SubRegIdx)),
3148            addr:$src2, (X86pcmpm_imm_commute $cc)), Narrow.KRC)>;
3149
3150def : Pat<(Narrow.KVT (and Narrow.KRC:$mask,
3151                           (Narrow.KVT
3152                            (Frag_su:$cc (Narrow.BroadcastLdFrag addr:$src2),
3153                                         (Narrow.VT Narrow.RC:$src1),
3154                                         cond)))),
3155          (COPY_TO_REGCLASS (!cast<Instruction>(InstStr#"Zrmibk")
3156           (COPY_TO_REGCLASS Narrow.KRC:$mask, Wide.KRC),
3157           (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src1, Narrow.SubRegIdx)),
3158           addr:$src2, (X86pcmpm_imm_commute $cc)), Narrow.KRC)>;
3159}
3160
3161// Same as above, but for fp types which don't use PatFrags.
3162multiclass axv512_cmp_packed_cc_no_vlx_lowering<string InstStr,
3163                                                X86VectorVTInfo Narrow,
3164                                                X86VectorVTInfo Wide> {
3165def : Pat<(Narrow.KVT (X86cmpm (Narrow.VT Narrow.RC:$src1),
3166                               (Narrow.VT Narrow.RC:$src2), timm:$cc)),
3167          (COPY_TO_REGCLASS
3168           (!cast<Instruction>(InstStr#"Zrri")
3169            (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src1, Narrow.SubRegIdx)),
3170            (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src2, Narrow.SubRegIdx)),
3171            timm:$cc), Narrow.KRC)>;
3172
3173def : Pat<(Narrow.KVT (and Narrow.KRC:$mask,
3174                           (X86cmpm_su (Narrow.VT Narrow.RC:$src1),
3175                                       (Narrow.VT Narrow.RC:$src2), timm:$cc))),
3176          (COPY_TO_REGCLASS (!cast<Instruction>(InstStr#"Zrrik")
3177           (COPY_TO_REGCLASS Narrow.KRC:$mask, Wide.KRC),
3178           (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src1, Narrow.SubRegIdx)),
3179           (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src2, Narrow.SubRegIdx)),
3180           timm:$cc), Narrow.KRC)>;
3181
3182// Broadcast load.
3183def : Pat<(Narrow.KVT (X86cmpm (Narrow.VT Narrow.RC:$src1),
3184                               (Narrow.VT (Narrow.BroadcastLdFrag addr:$src2)), timm:$cc)),
3185          (COPY_TO_REGCLASS
3186           (!cast<Instruction>(InstStr#"Zrmbi")
3187            (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src1, Narrow.SubRegIdx)),
3188            addr:$src2, timm:$cc), Narrow.KRC)>;
3189
3190def : Pat<(Narrow.KVT (and Narrow.KRC:$mask,
3191                           (X86cmpm_su (Narrow.VT Narrow.RC:$src1),
3192                                       (Narrow.VT (Narrow.BroadcastLdFrag addr:$src2)), timm:$cc))),
3193          (COPY_TO_REGCLASS (!cast<Instruction>(InstStr#"Zrmbik")
3194           (COPY_TO_REGCLASS Narrow.KRC:$mask, Wide.KRC),
3195           (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src1, Narrow.SubRegIdx)),
3196           addr:$src2, timm:$cc), Narrow.KRC)>;
3197
3198// Commuted with broadcast load.
3199def : Pat<(Narrow.KVT (X86cmpm (Narrow.VT (Narrow.BroadcastLdFrag addr:$src2)),
3200                               (Narrow.VT Narrow.RC:$src1), timm:$cc)),
3201          (COPY_TO_REGCLASS
3202           (!cast<Instruction>(InstStr#"Zrmbi")
3203            (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src1, Narrow.SubRegIdx)),
3204            addr:$src2, (X86cmpm_imm_commute timm:$cc)), Narrow.KRC)>;
3205
3206def : Pat<(Narrow.KVT (and Narrow.KRC:$mask,
3207                           (X86cmpm_su (Narrow.VT (Narrow.BroadcastLdFrag addr:$src2)),
3208                                       (Narrow.VT Narrow.RC:$src1), timm:$cc))),
3209          (COPY_TO_REGCLASS (!cast<Instruction>(InstStr#"Zrmbik")
3210           (COPY_TO_REGCLASS Narrow.KRC:$mask, Wide.KRC),
3211           (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src1, Narrow.SubRegIdx)),
3212           addr:$src2, (X86cmpm_imm_commute timm:$cc)), Narrow.KRC)>;
3213}
3214
3215let Predicates = [HasAVX512, NoVLX] in {
3216  defm : axv512_icmp_packed_cc_no_vlx_lowering<X86pcmpm, X86pcmpm_su, "VPCMPD", v8i32x_info, v16i32_info>;
3217  defm : axv512_icmp_packed_cc_no_vlx_lowering<X86pcmpum, X86pcmpum_su, "VPCMPUD", v8i32x_info, v16i32_info>;
3218
3219  defm : axv512_icmp_packed_cc_no_vlx_lowering<X86pcmpm, X86pcmpm_su, "VPCMPD", v4i32x_info, v16i32_info>;
3220  defm : axv512_icmp_packed_cc_no_vlx_lowering<X86pcmpum, X86pcmpum_su, "VPCMPUD", v4i32x_info, v16i32_info>;
3221
3222  defm : axv512_icmp_packed_cc_no_vlx_lowering<X86pcmpm, X86pcmpm_su, "VPCMPQ", v4i64x_info, v8i64_info>;
3223  defm : axv512_icmp_packed_cc_no_vlx_lowering<X86pcmpum, X86pcmpum_su, "VPCMPUQ", v4i64x_info, v8i64_info>;
3224
3225  defm : axv512_icmp_packed_cc_no_vlx_lowering<X86pcmpm, X86pcmpm_su, "VPCMPQ", v2i64x_info, v8i64_info>;
3226  defm : axv512_icmp_packed_cc_no_vlx_lowering<X86pcmpum, X86pcmpum_su, "VPCMPUQ", v2i64x_info, v8i64_info>;
3227
3228  defm : axv512_icmp_packed_cc_rmb_no_vlx_lowering<X86pcmpm, X86pcmpm_su, "VPCMPD", v8i32x_info, v16i32_info>;
3229  defm : axv512_icmp_packed_cc_rmb_no_vlx_lowering<X86pcmpum, X86pcmpum_su, "VPCMPUD", v8i32x_info, v16i32_info>;
3230
3231  defm : axv512_icmp_packed_cc_rmb_no_vlx_lowering<X86pcmpm, X86pcmpm_su, "VPCMPD", v4i32x_info, v16i32_info>;
3232  defm : axv512_icmp_packed_cc_rmb_no_vlx_lowering<X86pcmpum, X86pcmpum_su, "VPCMPUD", v4i32x_info, v16i32_info>;
3233
3234  defm : axv512_icmp_packed_cc_rmb_no_vlx_lowering<X86pcmpm, X86pcmpm_su, "VPCMPQ", v4i64x_info, v8i64_info>;
3235  defm : axv512_icmp_packed_cc_rmb_no_vlx_lowering<X86pcmpum, X86pcmpum_su, "VPCMPUQ", v4i64x_info, v8i64_info>;
3236
3237  defm : axv512_icmp_packed_cc_rmb_no_vlx_lowering<X86pcmpm, X86pcmpm_su, "VPCMPQ", v2i64x_info, v8i64_info>;
3238  defm : axv512_icmp_packed_cc_rmb_no_vlx_lowering<X86pcmpum, X86pcmpum_su, "VPCMPUQ", v2i64x_info, v8i64_info>;
3239
3240  defm : axv512_cmp_packed_cc_no_vlx_lowering<"VCMPPS", v8f32x_info, v16f32_info>;
3241  defm : axv512_cmp_packed_cc_no_vlx_lowering<"VCMPPS", v4f32x_info, v16f32_info>;
3242  defm : axv512_cmp_packed_cc_no_vlx_lowering<"VCMPPD", v4f64x_info, v8f64_info>;
3243  defm : axv512_cmp_packed_cc_no_vlx_lowering<"VCMPPD", v2f64x_info, v8f64_info>;
3244}
3245
3246let Predicates = [HasBWI, NoVLX] in {
3247  defm : axv512_icmp_packed_cc_no_vlx_lowering<X86pcmpm, X86pcmpm_su, "VPCMPB", v32i8x_info, v64i8_info>;
3248  defm : axv512_icmp_packed_cc_no_vlx_lowering<X86pcmpum, X86pcmpum_su, "VPCMPUB", v32i8x_info, v64i8_info>;
3249
3250  defm : axv512_icmp_packed_cc_no_vlx_lowering<X86pcmpm, X86pcmpm_su, "VPCMPB", v16i8x_info, v64i8_info>;
3251  defm : axv512_icmp_packed_cc_no_vlx_lowering<X86pcmpum, X86pcmpum_su, "VPCMPUB", v16i8x_info, v64i8_info>;
3252
3253  defm : axv512_icmp_packed_cc_no_vlx_lowering<X86pcmpm, X86pcmpm_su, "VPCMPW", v16i16x_info, v32i16_info>;
3254  defm : axv512_icmp_packed_cc_no_vlx_lowering<X86pcmpum, X86pcmpum_su, "VPCMPUW", v16i16x_info, v32i16_info>;
3255
3256  defm : axv512_icmp_packed_cc_no_vlx_lowering<X86pcmpm, X86pcmpm_su, "VPCMPW", v8i16x_info, v32i16_info>;
3257  defm : axv512_icmp_packed_cc_no_vlx_lowering<X86pcmpum, X86pcmpum_su, "VPCMPUW", v8i16x_info, v32i16_info>;
3258}
3259
3260// Mask setting all 0s or 1s
3261multiclass avx512_mask_setop<RegisterClass KRC, ValueType VT, SDPatternOperator Val> {
3262  let Predicates = [HasAVX512] in
3263    let isReMaterializable = 1, isAsCheapAsAMove = 1, isPseudo = 1,
3264        SchedRW = [WriteZero] in
3265      def NAME# : I<0, Pseudo, (outs KRC:$dst), (ins), "",
3266                     [(set KRC:$dst, (VT Val))]>;
3267}
3268
3269multiclass avx512_mask_setop_w<SDPatternOperator Val> {
3270  defm W : avx512_mask_setop<VK16, v16i1, Val>;
3271  defm D : avx512_mask_setop<VK32,  v32i1, Val>;
3272  defm Q : avx512_mask_setop<VK64, v64i1, Val>;
3273}
3274
3275defm KSET0 : avx512_mask_setop_w<immAllZerosV>;
3276defm KSET1 : avx512_mask_setop_w<immAllOnesV>;
3277
3278// With AVX-512 only, 8-bit mask is promoted to 16-bit mask.
3279let Predicates = [HasAVX512] in {
3280  def : Pat<(v8i1 immAllZerosV), (COPY_TO_REGCLASS (KSET0W), VK8)>;
3281  def : Pat<(v4i1 immAllZerosV), (COPY_TO_REGCLASS (KSET0W), VK4)>;
3282  def : Pat<(v2i1 immAllZerosV), (COPY_TO_REGCLASS (KSET0W), VK2)>;
3283  def : Pat<(v1i1 immAllZerosV), (COPY_TO_REGCLASS (KSET0W), VK1)>;
3284  def : Pat<(v8i1 immAllOnesV),  (COPY_TO_REGCLASS (KSET1W), VK8)>;
3285  def : Pat<(v4i1 immAllOnesV),  (COPY_TO_REGCLASS (KSET1W), VK4)>;
3286  def : Pat<(v2i1 immAllOnesV),  (COPY_TO_REGCLASS (KSET1W), VK2)>;
3287  def : Pat<(v1i1 immAllOnesV),  (COPY_TO_REGCLASS (KSET1W), VK1)>;
3288}
3289
3290// Patterns for kmask insert_subvector/extract_subvector to/from index=0
3291multiclass operation_subvector_mask_lowering<RegisterClass subRC, ValueType subVT,
3292                                             RegisterClass RC, ValueType VT> {
3293  def : Pat<(subVT (extract_subvector (VT RC:$src), (iPTR 0))),
3294            (subVT (COPY_TO_REGCLASS RC:$src, subRC))>;
3295
3296  def : Pat<(VT (insert_subvector undef, subRC:$src, (iPTR 0))),
3297            (VT (COPY_TO_REGCLASS subRC:$src, RC))>;
3298}
3299defm : operation_subvector_mask_lowering<VK1,  v1i1,  VK2,  v2i1>;
3300defm : operation_subvector_mask_lowering<VK1,  v1i1,  VK4,  v4i1>;
3301defm : operation_subvector_mask_lowering<VK1,  v1i1,  VK8,  v8i1>;
3302defm : operation_subvector_mask_lowering<VK1,  v1i1,  VK16, v16i1>;
3303defm : operation_subvector_mask_lowering<VK1,  v1i1,  VK32, v32i1>;
3304defm : operation_subvector_mask_lowering<VK1,  v1i1,  VK64, v64i1>;
3305
3306defm : operation_subvector_mask_lowering<VK2,  v2i1,  VK4,  v4i1>;
3307defm : operation_subvector_mask_lowering<VK2,  v2i1,  VK8,  v8i1>;
3308defm : operation_subvector_mask_lowering<VK2,  v2i1,  VK16, v16i1>;
3309defm : operation_subvector_mask_lowering<VK2,  v2i1,  VK32, v32i1>;
3310defm : operation_subvector_mask_lowering<VK2,  v2i1,  VK64, v64i1>;
3311
3312defm : operation_subvector_mask_lowering<VK4,  v4i1,  VK8,  v8i1>;
3313defm : operation_subvector_mask_lowering<VK4,  v4i1,  VK16, v16i1>;
3314defm : operation_subvector_mask_lowering<VK4,  v4i1,  VK32, v32i1>;
3315defm : operation_subvector_mask_lowering<VK4,  v4i1,  VK64, v64i1>;
3316
3317defm : operation_subvector_mask_lowering<VK8,  v8i1,  VK16, v16i1>;
3318defm : operation_subvector_mask_lowering<VK8,  v8i1,  VK32, v32i1>;
3319defm : operation_subvector_mask_lowering<VK8,  v8i1,  VK64, v64i1>;
3320
3321defm : operation_subvector_mask_lowering<VK16, v16i1, VK32, v32i1>;
3322defm : operation_subvector_mask_lowering<VK16, v16i1, VK64, v64i1>;
3323
3324defm : operation_subvector_mask_lowering<VK32, v32i1, VK64, v64i1>;
3325
3326//===----------------------------------------------------------------------===//
3327// AVX-512 - Aligned and unaligned load and store
3328//
3329
3330multiclass avx512_load<bits<8> opc, string OpcodeStr, string Name,
3331                       X86VectorVTInfo _, PatFrag ld_frag, PatFrag mload,
3332                       X86SchedWriteMoveLS Sched, string EVEX2VEXOvrd,
3333                       bit NoRMPattern = 0,
3334                       SDPatternOperator SelectOprr = vselect> {
3335  let hasSideEffects = 0 in {
3336  let isMoveReg = 1 in
3337  def rr : AVX512PI<opc, MRMSrcReg, (outs _.RC:$dst), (ins _.RC:$src),
3338                    !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), [],
3339                    _.ExeDomain>, EVEX, Sched<[Sched.RR]>,
3340                    EVEX2VEXOverride<EVEX2VEXOvrd#"rr">;
3341  def rrkz : AVX512PI<opc, MRMSrcReg, (outs _.RC:$dst),
3342                      (ins _.KRCWM:$mask,  _.RC:$src),
3343                      !strconcat(OpcodeStr, "\t{$src, ${dst} {${mask}} {z}|",
3344                       "${dst} {${mask}} {z}, $src}"),
3345                       [(set _.RC:$dst, (_.VT (SelectOprr _.KRCWM:$mask,
3346                                           (_.VT _.RC:$src),
3347                                           _.ImmAllZerosV)))], _.ExeDomain>,
3348                       EVEX, EVEX_KZ, Sched<[Sched.RR]>;
3349
3350  let mayLoad = 1, canFoldAsLoad = 1, isReMaterializable = 1 in
3351  def rm : AVX512PI<opc, MRMSrcMem, (outs _.RC:$dst), (ins _.MemOp:$src),
3352                    !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
3353                    !if(NoRMPattern, [],
3354                        [(set _.RC:$dst,
3355                          (_.VT (ld_frag addr:$src)))]),
3356                    _.ExeDomain>, EVEX, Sched<[Sched.RM]>,
3357                    EVEX2VEXOverride<EVEX2VEXOvrd#"rm">;
3358
3359  let Constraints = "$src0 = $dst", isConvertibleToThreeAddress = 1 in {
3360    def rrk : AVX512PI<opc, MRMSrcReg, (outs _.RC:$dst),
3361                      (ins _.RC:$src0, _.KRCWM:$mask, _.RC:$src1),
3362                      !strconcat(OpcodeStr, "\t{$src1, ${dst} {${mask}}|",
3363                      "${dst} {${mask}}, $src1}"),
3364                      [(set _.RC:$dst, (_.VT (SelectOprr _.KRCWM:$mask,
3365                                          (_.VT _.RC:$src1),
3366                                          (_.VT _.RC:$src0))))], _.ExeDomain>,
3367                       EVEX, EVEX_K, Sched<[Sched.RR]>;
3368    def rmk : AVX512PI<opc, MRMSrcMem, (outs _.RC:$dst),
3369                     (ins _.RC:$src0, _.KRCWM:$mask, _.MemOp:$src1),
3370                     !strconcat(OpcodeStr, "\t{$src1, ${dst} {${mask}}|",
3371                      "${dst} {${mask}}, $src1}"),
3372                     [(set _.RC:$dst, (_.VT
3373                         (vselect_mask _.KRCWM:$mask,
3374                          (_.VT (ld_frag addr:$src1)),
3375                           (_.VT _.RC:$src0))))], _.ExeDomain>,
3376                     EVEX, EVEX_K, Sched<[Sched.RM]>;
3377  }
3378  def rmkz : AVX512PI<opc, MRMSrcMem, (outs _.RC:$dst),
3379                  (ins _.KRCWM:$mask, _.MemOp:$src),
3380                  OpcodeStr #"\t{$src, ${dst} {${mask}} {z}|"#
3381                                "${dst} {${mask}} {z}, $src}",
3382                  [(set _.RC:$dst, (_.VT (vselect_mask _.KRCWM:$mask,
3383                    (_.VT (ld_frag addr:$src)), _.ImmAllZerosV)))],
3384                  _.ExeDomain>, EVEX, EVEX_KZ, Sched<[Sched.RM]>;
3385  }
3386  def : Pat<(_.VT (mload addr:$ptr, _.KRCWM:$mask, undef)),
3387            (!cast<Instruction>(Name#_.ZSuffix#rmkz) _.KRCWM:$mask, addr:$ptr)>;
3388
3389  def : Pat<(_.VT (mload addr:$ptr, _.KRCWM:$mask, _.ImmAllZerosV)),
3390            (!cast<Instruction>(Name#_.ZSuffix#rmkz) _.KRCWM:$mask, addr:$ptr)>;
3391
3392  def : Pat<(_.VT (mload addr:$ptr, _.KRCWM:$mask, (_.VT _.RC:$src0))),
3393            (!cast<Instruction>(Name#_.ZSuffix#rmk) _.RC:$src0,
3394             _.KRCWM:$mask, addr:$ptr)>;
3395}
3396
3397multiclass avx512_alignedload_vl<bits<8> opc, string OpcodeStr,
3398                                 AVX512VLVectorVTInfo _, Predicate prd,
3399                                 X86SchedWriteMoveLSWidths Sched,
3400                                 string EVEX2VEXOvrd, bit NoRMPattern = 0> {
3401  let Predicates = [prd] in
3402  defm Z : avx512_load<opc, OpcodeStr, NAME, _.info512,
3403                       _.info512.AlignedLdFrag, masked_load_aligned,
3404                       Sched.ZMM, "", NoRMPattern>, EVEX_V512;
3405
3406  let Predicates = [prd, HasVLX] in {
3407  defm Z256 : avx512_load<opc, OpcodeStr, NAME, _.info256,
3408                          _.info256.AlignedLdFrag, masked_load_aligned,
3409                          Sched.YMM, EVEX2VEXOvrd#"Y", NoRMPattern>, EVEX_V256;
3410  defm Z128 : avx512_load<opc, OpcodeStr, NAME, _.info128,
3411                          _.info128.AlignedLdFrag, masked_load_aligned,
3412                          Sched.XMM, EVEX2VEXOvrd, NoRMPattern>, EVEX_V128;
3413  }
3414}
3415
3416multiclass avx512_load_vl<bits<8> opc, string OpcodeStr,
3417                          AVX512VLVectorVTInfo _, Predicate prd,
3418                          X86SchedWriteMoveLSWidths Sched,
3419                          string EVEX2VEXOvrd, bit NoRMPattern = 0,
3420                          SDPatternOperator SelectOprr = vselect> {
3421  let Predicates = [prd] in
3422  defm Z : avx512_load<opc, OpcodeStr, NAME, _.info512, _.info512.LdFrag,
3423                       masked_load, Sched.ZMM, "",
3424                       NoRMPattern, SelectOprr>, EVEX_V512;
3425
3426  let Predicates = [prd, HasVLX] in {
3427  defm Z256 : avx512_load<opc, OpcodeStr, NAME, _.info256, _.info256.LdFrag,
3428                         masked_load, Sched.YMM, EVEX2VEXOvrd#"Y",
3429                         NoRMPattern, SelectOprr>, EVEX_V256;
3430  defm Z128 : avx512_load<opc, OpcodeStr, NAME, _.info128, _.info128.LdFrag,
3431                         masked_load, Sched.XMM, EVEX2VEXOvrd,
3432                         NoRMPattern, SelectOprr>, EVEX_V128;
3433  }
3434}
3435
3436multiclass avx512_store<bits<8> opc, string OpcodeStr, string BaseName,
3437                        X86VectorVTInfo _, PatFrag st_frag, PatFrag mstore,
3438                        X86SchedWriteMoveLS Sched, string EVEX2VEXOvrd,
3439                        bit NoMRPattern = 0> {
3440  let hasSideEffects = 0, isCodeGenOnly = 1, ForceDisassemble = 1 in {
3441  let isMoveReg = 1 in
3442  def rr_REV  : AVX512PI<opc, MRMDestReg, (outs _.RC:$dst), (ins _.RC:$src),
3443                         OpcodeStr # "\t{$src, $dst|$dst, $src}",
3444                         [], _.ExeDomain>, EVEX,
3445                         FoldGenData<BaseName#_.ZSuffix#rr>, Sched<[Sched.RR]>,
3446                         EVEX2VEXOverride<EVEX2VEXOvrd#"rr_REV">;
3447  def rrk_REV : AVX512PI<opc, MRMDestReg, (outs  _.RC:$dst),
3448                         (ins _.KRCWM:$mask, _.RC:$src),
3449                         OpcodeStr # "\t{$src, ${dst} {${mask}}|"#
3450                         "${dst} {${mask}}, $src}",
3451                         [], _.ExeDomain>,  EVEX, EVEX_K,
3452                         FoldGenData<BaseName#_.ZSuffix#rrk>,
3453                         Sched<[Sched.RR]>;
3454  def rrkz_REV : AVX512PI<opc, MRMDestReg, (outs  _.RC:$dst),
3455                          (ins _.KRCWM:$mask, _.RC:$src),
3456                          OpcodeStr # "\t{$src, ${dst} {${mask}} {z}|" #
3457                          "${dst} {${mask}} {z}, $src}",
3458                          [], _.ExeDomain>, EVEX, EVEX_KZ,
3459                          FoldGenData<BaseName#_.ZSuffix#rrkz>,
3460                          Sched<[Sched.RR]>;
3461  }
3462
3463  let hasSideEffects = 0, mayStore = 1 in
3464  def mr : AVX512PI<opc, MRMDestMem, (outs), (ins _.MemOp:$dst, _.RC:$src),
3465                    !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
3466                    !if(NoMRPattern, [],
3467                        [(st_frag (_.VT _.RC:$src), addr:$dst)]),
3468                    _.ExeDomain>, EVEX, Sched<[Sched.MR]>,
3469                    EVEX2VEXOverride<EVEX2VEXOvrd#"mr">;
3470  def mrk : AVX512PI<opc, MRMDestMem, (outs),
3471                     (ins _.MemOp:$dst, _.KRCWM:$mask, _.RC:$src),
3472              OpcodeStr # "\t{$src, ${dst} {${mask}}|${dst} {${mask}}, $src}",
3473               [], _.ExeDomain>, EVEX, EVEX_K, Sched<[Sched.MR]>,
3474               NotMemoryFoldable;
3475
3476  def: Pat<(mstore (_.VT _.RC:$src), addr:$ptr, _.KRCWM:$mask),
3477           (!cast<Instruction>(BaseName#_.ZSuffix#mrk) addr:$ptr,
3478                                                        _.KRCWM:$mask, _.RC:$src)>;
3479
3480  def : InstAlias<OpcodeStr#".s\t{$src, $dst|$dst, $src}",
3481                  (!cast<Instruction>(BaseName#_.ZSuffix#"rr_REV")
3482                   _.RC:$dst, _.RC:$src), 0>;
3483  def : InstAlias<OpcodeStr#".s\t{$src, ${dst} {${mask}}|${dst} {${mask}}, $src}",
3484                  (!cast<Instruction>(BaseName#_.ZSuffix#"rrk_REV")
3485                   _.RC:$dst, _.KRCWM:$mask, _.RC:$src), 0>;
3486  def : InstAlias<OpcodeStr#".s\t{$src, ${dst} {${mask}} {z}|${dst} {${mask}} {z}, $src}",
3487                  (!cast<Instruction>(BaseName#_.ZSuffix#"rrkz_REV")
3488                   _.RC:$dst, _.KRCWM:$mask, _.RC:$src), 0>;
3489}
3490
3491multiclass avx512_store_vl< bits<8> opc, string OpcodeStr,
3492                            AVX512VLVectorVTInfo _, Predicate prd,
3493                            X86SchedWriteMoveLSWidths Sched,
3494                            string EVEX2VEXOvrd, bit NoMRPattern = 0> {
3495  let Predicates = [prd] in
3496  defm Z : avx512_store<opc, OpcodeStr, NAME, _.info512, store,
3497                        masked_store, Sched.ZMM, "",
3498                        NoMRPattern>, EVEX_V512;
3499  let Predicates = [prd, HasVLX] in {
3500    defm Z256 : avx512_store<opc, OpcodeStr, NAME, _.info256, store,
3501                             masked_store, Sched.YMM,
3502                             EVEX2VEXOvrd#"Y", NoMRPattern>, EVEX_V256;
3503    defm Z128 : avx512_store<opc, OpcodeStr, NAME, _.info128, store,
3504                             masked_store, Sched.XMM, EVEX2VEXOvrd,
3505                             NoMRPattern>, EVEX_V128;
3506  }
3507}
3508
3509multiclass avx512_alignedstore_vl<bits<8> opc, string OpcodeStr,
3510                                  AVX512VLVectorVTInfo _, Predicate prd,
3511                                  X86SchedWriteMoveLSWidths Sched,
3512                                  string EVEX2VEXOvrd, bit NoMRPattern = 0> {
3513  let Predicates = [prd] in
3514  defm Z : avx512_store<opc, OpcodeStr, NAME, _.info512, alignedstore,
3515                        masked_store_aligned, Sched.ZMM, "",
3516                        NoMRPattern>, EVEX_V512;
3517
3518  let Predicates = [prd, HasVLX] in {
3519    defm Z256 : avx512_store<opc, OpcodeStr, NAME, _.info256, alignedstore,
3520                             masked_store_aligned, Sched.YMM,
3521                             EVEX2VEXOvrd#"Y", NoMRPattern>, EVEX_V256;
3522    defm Z128 : avx512_store<opc, OpcodeStr, NAME, _.info128, alignedstore,
3523                             masked_store_aligned, Sched.XMM, EVEX2VEXOvrd,
3524                             NoMRPattern>, EVEX_V128;
3525  }
3526}
3527
3528defm VMOVAPS : avx512_alignedload_vl<0x28, "vmovaps", avx512vl_f32_info,
3529                                     HasAVX512, SchedWriteFMoveLS, "VMOVAPS">,
3530               avx512_alignedstore_vl<0x29, "vmovaps", avx512vl_f32_info,
3531                                      HasAVX512, SchedWriteFMoveLS, "VMOVAPS">,
3532               PS, EVEX_CD8<32, CD8VF>;
3533
3534defm VMOVAPD : avx512_alignedload_vl<0x28, "vmovapd", avx512vl_f64_info,
3535                                     HasAVX512, SchedWriteFMoveLS, "VMOVAPD">,
3536               avx512_alignedstore_vl<0x29, "vmovapd", avx512vl_f64_info,
3537                                      HasAVX512, SchedWriteFMoveLS, "VMOVAPD">,
3538               PD, VEX_W, EVEX_CD8<64, CD8VF>;
3539
3540defm VMOVUPS : avx512_load_vl<0x10, "vmovups", avx512vl_f32_info, HasAVX512,
3541                              SchedWriteFMoveLS, "VMOVUPS", 0, null_frag>,
3542               avx512_store_vl<0x11, "vmovups", avx512vl_f32_info, HasAVX512,
3543                               SchedWriteFMoveLS, "VMOVUPS">,
3544                               PS, EVEX_CD8<32, CD8VF>;
3545
3546defm VMOVUPD : avx512_load_vl<0x10, "vmovupd", avx512vl_f64_info, HasAVX512,
3547                              SchedWriteFMoveLS, "VMOVUPD", 0, null_frag>,
3548               avx512_store_vl<0x11, "vmovupd", avx512vl_f64_info, HasAVX512,
3549                               SchedWriteFMoveLS, "VMOVUPD">,
3550               PD, VEX_W, EVEX_CD8<64, CD8VF>;
3551
3552defm VMOVDQA32 : avx512_alignedload_vl<0x6F, "vmovdqa32", avx512vl_i32_info,
3553                                       HasAVX512, SchedWriteVecMoveLS,
3554                                       "VMOVDQA", 1>,
3555                 avx512_alignedstore_vl<0x7F, "vmovdqa32", avx512vl_i32_info,
3556                                        HasAVX512, SchedWriteVecMoveLS,
3557                                        "VMOVDQA", 1>,
3558                 PD, EVEX_CD8<32, CD8VF>;
3559
3560defm VMOVDQA64 : avx512_alignedload_vl<0x6F, "vmovdqa64", avx512vl_i64_info,
3561                                       HasAVX512, SchedWriteVecMoveLS,
3562                                       "VMOVDQA">,
3563                 avx512_alignedstore_vl<0x7F, "vmovdqa64", avx512vl_i64_info,
3564                                        HasAVX512, SchedWriteVecMoveLS,
3565                                        "VMOVDQA">,
3566                 PD, VEX_W, EVEX_CD8<64, CD8VF>;
3567
3568defm VMOVDQU8 : avx512_load_vl<0x6F, "vmovdqu8", avx512vl_i8_info, HasBWI,
3569                               SchedWriteVecMoveLS, "VMOVDQU", 1>,
3570                avx512_store_vl<0x7F, "vmovdqu8", avx512vl_i8_info, HasBWI,
3571                                SchedWriteVecMoveLS, "VMOVDQU", 1>,
3572                XD, EVEX_CD8<8, CD8VF>;
3573
3574defm VMOVDQU16 : avx512_load_vl<0x6F, "vmovdqu16", avx512vl_i16_info, HasBWI,
3575                                SchedWriteVecMoveLS, "VMOVDQU", 1>,
3576                 avx512_store_vl<0x7F, "vmovdqu16", avx512vl_i16_info, HasBWI,
3577                                 SchedWriteVecMoveLS, "VMOVDQU", 1>,
3578                 XD, VEX_W, EVEX_CD8<16, CD8VF>;
3579
3580defm VMOVDQU32 : avx512_load_vl<0x6F, "vmovdqu32", avx512vl_i32_info, HasAVX512,
3581                                SchedWriteVecMoveLS, "VMOVDQU", 1, null_frag>,
3582                 avx512_store_vl<0x7F, "vmovdqu32", avx512vl_i32_info, HasAVX512,
3583                                 SchedWriteVecMoveLS, "VMOVDQU", 1>,
3584                 XS, EVEX_CD8<32, CD8VF>;
3585
3586defm VMOVDQU64 : avx512_load_vl<0x6F, "vmovdqu64", avx512vl_i64_info, HasAVX512,
3587                                SchedWriteVecMoveLS, "VMOVDQU", 0, null_frag>,
3588                 avx512_store_vl<0x7F, "vmovdqu64", avx512vl_i64_info, HasAVX512,
3589                                 SchedWriteVecMoveLS, "VMOVDQU">,
3590                 XS, VEX_W, EVEX_CD8<64, CD8VF>;
3591
3592// Special instructions to help with spilling when we don't have VLX. We need
3593// to load or store from a ZMM register instead. These are converted in
3594// expandPostRAPseudos.
3595let isReMaterializable = 1, canFoldAsLoad = 1,
3596    isPseudo = 1, mayLoad = 1, hasSideEffects = 0 in {
3597def VMOVAPSZ128rm_NOVLX : I<0, Pseudo, (outs VR128X:$dst), (ins f128mem:$src),
3598                            "", []>, Sched<[WriteFLoadX]>;
3599def VMOVAPSZ256rm_NOVLX : I<0, Pseudo, (outs VR256X:$dst), (ins f256mem:$src),
3600                            "", []>, Sched<[WriteFLoadY]>;
3601def VMOVUPSZ128rm_NOVLX : I<0, Pseudo, (outs VR128X:$dst), (ins f128mem:$src),
3602                            "", []>, Sched<[WriteFLoadX]>;
3603def VMOVUPSZ256rm_NOVLX : I<0, Pseudo, (outs VR256X:$dst), (ins f256mem:$src),
3604                            "", []>, Sched<[WriteFLoadY]>;
3605}
3606
3607let isPseudo = 1, mayStore = 1, hasSideEffects = 0 in {
3608def VMOVAPSZ128mr_NOVLX : I<0, Pseudo, (outs), (ins f128mem:$dst, VR128X:$src),
3609                            "", []>, Sched<[WriteFStoreX]>;
3610def VMOVAPSZ256mr_NOVLX : I<0, Pseudo, (outs), (ins f256mem:$dst, VR256X:$src),
3611                            "", []>, Sched<[WriteFStoreY]>;
3612def VMOVUPSZ128mr_NOVLX : I<0, Pseudo, (outs), (ins f128mem:$dst, VR128X:$src),
3613                            "", []>, Sched<[WriteFStoreX]>;
3614def VMOVUPSZ256mr_NOVLX : I<0, Pseudo, (outs), (ins f256mem:$dst, VR256X:$src),
3615                            "", []>, Sched<[WriteFStoreY]>;
3616}
3617
3618def : Pat<(v8i64 (vselect VK8WM:$mask, (v8i64 immAllZerosV),
3619                          (v8i64 VR512:$src))),
3620   (VMOVDQA64Zrrkz (COPY_TO_REGCLASS (KNOTWrr (COPY_TO_REGCLASS VK8:$mask, VK16)),
3621                                              VK8), VR512:$src)>;
3622
3623def : Pat<(v16i32 (vselect VK16WM:$mask, (v16i32 immAllZerosV),
3624                           (v16i32 VR512:$src))),
3625                  (VMOVDQA32Zrrkz (KNOTWrr VK16WM:$mask), VR512:$src)>;
3626
3627// These patterns exist to prevent the above patterns from introducing a second
3628// mask inversion when one already exists.
3629def : Pat<(v8i64 (vselect (v8i1 (vnot VK8:$mask)),
3630                          (v8i64 immAllZerosV),
3631                          (v8i64 VR512:$src))),
3632                 (VMOVDQA64Zrrkz VK8:$mask, VR512:$src)>;
3633def : Pat<(v16i32 (vselect (v16i1 (vnot VK16:$mask)),
3634                           (v16i32 immAllZerosV),
3635                           (v16i32 VR512:$src))),
3636                  (VMOVDQA32Zrrkz VK16WM:$mask, VR512:$src)>;
3637
3638multiclass mask_move_lowering<string InstrStr, X86VectorVTInfo Narrow,
3639                              X86VectorVTInfo Wide> {
3640 def : Pat<(Narrow.VT (vselect (Narrow.KVT Narrow.KRCWM:$mask),
3641                               Narrow.RC:$src1, Narrow.RC:$src0)),
3642           (EXTRACT_SUBREG
3643            (Wide.VT
3644             (!cast<Instruction>(InstrStr#"rrk")
3645              (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src0, Narrow.SubRegIdx)),
3646              (COPY_TO_REGCLASS Narrow.KRCWM:$mask, Wide.KRCWM),
3647              (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src1, Narrow.SubRegIdx)))),
3648            Narrow.SubRegIdx)>;
3649
3650 def : Pat<(Narrow.VT (vselect (Narrow.KVT Narrow.KRCWM:$mask),
3651                               Narrow.RC:$src1, Narrow.ImmAllZerosV)),
3652           (EXTRACT_SUBREG
3653            (Wide.VT
3654             (!cast<Instruction>(InstrStr#"rrkz")
3655              (COPY_TO_REGCLASS Narrow.KRCWM:$mask, Wide.KRCWM),
3656              (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src1, Narrow.SubRegIdx)))),
3657            Narrow.SubRegIdx)>;
3658}
3659
3660// Patterns for handling v8i1 selects of 256-bit vectors when VLX isn't
3661// available. Use a 512-bit operation and extract.
3662let Predicates = [HasAVX512, NoVLX] in {
3663  defm : mask_move_lowering<"VMOVAPSZ", v4f32x_info, v16f32_info>;
3664  defm : mask_move_lowering<"VMOVDQA32Z", v4i32x_info, v16i32_info>;
3665  defm : mask_move_lowering<"VMOVAPSZ", v8f32x_info, v16f32_info>;
3666  defm : mask_move_lowering<"VMOVDQA32Z", v8i32x_info, v16i32_info>;
3667
3668  defm : mask_move_lowering<"VMOVAPDZ", v2f64x_info, v8f64_info>;
3669  defm : mask_move_lowering<"VMOVDQA64Z", v2i64x_info, v8i64_info>;
3670  defm : mask_move_lowering<"VMOVAPDZ", v4f64x_info, v8f64_info>;
3671  defm : mask_move_lowering<"VMOVDQA64Z", v4i64x_info, v8i64_info>;
3672}
3673
3674let Predicates = [HasBWI, NoVLX] in {
3675  defm : mask_move_lowering<"VMOVDQU8Z", v16i8x_info, v64i8_info>;
3676  defm : mask_move_lowering<"VMOVDQU8Z", v32i8x_info, v64i8_info>;
3677
3678  defm : mask_move_lowering<"VMOVDQU16Z", v8i16x_info, v32i16_info>;
3679  defm : mask_move_lowering<"VMOVDQU16Z", v16i16x_info, v32i16_info>;
3680}
3681
3682let Predicates = [HasAVX512] in {
3683  // 512-bit load.
3684  def : Pat<(alignedloadv16i32 addr:$src),
3685            (VMOVDQA64Zrm addr:$src)>;
3686  def : Pat<(alignedloadv32i16 addr:$src),
3687            (VMOVDQA64Zrm addr:$src)>;
3688  def : Pat<(alignedloadv64i8 addr:$src),
3689            (VMOVDQA64Zrm addr:$src)>;
3690  def : Pat<(loadv16i32 addr:$src),
3691            (VMOVDQU64Zrm addr:$src)>;
3692  def : Pat<(loadv32i16 addr:$src),
3693            (VMOVDQU64Zrm addr:$src)>;
3694  def : Pat<(loadv64i8 addr:$src),
3695            (VMOVDQU64Zrm addr:$src)>;
3696
3697  // 512-bit store.
3698  def : Pat<(alignedstore (v16i32 VR512:$src), addr:$dst),
3699            (VMOVDQA64Zmr addr:$dst, VR512:$src)>;
3700  def : Pat<(alignedstore (v32i16 VR512:$src), addr:$dst),
3701            (VMOVDQA64Zmr addr:$dst, VR512:$src)>;
3702  def : Pat<(alignedstore (v64i8 VR512:$src), addr:$dst),
3703            (VMOVDQA64Zmr addr:$dst, VR512:$src)>;
3704  def : Pat<(store (v16i32 VR512:$src), addr:$dst),
3705            (VMOVDQU64Zmr addr:$dst, VR512:$src)>;
3706  def : Pat<(store (v32i16 VR512:$src), addr:$dst),
3707            (VMOVDQU64Zmr addr:$dst, VR512:$src)>;
3708  def : Pat<(store (v64i8 VR512:$src), addr:$dst),
3709            (VMOVDQU64Zmr addr:$dst, VR512:$src)>;
3710}
3711
3712let Predicates = [HasVLX] in {
3713  // 128-bit load.
3714  def : Pat<(alignedloadv4i32 addr:$src),
3715            (VMOVDQA64Z128rm addr:$src)>;
3716  def : Pat<(alignedloadv8i16 addr:$src),
3717            (VMOVDQA64Z128rm addr:$src)>;
3718  def : Pat<(alignedloadv16i8 addr:$src),
3719            (VMOVDQA64Z128rm addr:$src)>;
3720  def : Pat<(loadv4i32 addr:$src),
3721            (VMOVDQU64Z128rm addr:$src)>;
3722  def : Pat<(loadv8i16 addr:$src),
3723            (VMOVDQU64Z128rm addr:$src)>;
3724  def : Pat<(loadv16i8 addr:$src),
3725            (VMOVDQU64Z128rm addr:$src)>;
3726
3727  // 128-bit store.
3728  def : Pat<(alignedstore (v4i32 VR128X:$src), addr:$dst),
3729            (VMOVDQA64Z128mr addr:$dst, VR128X:$src)>;
3730  def : Pat<(alignedstore (v8i16 VR128X:$src), addr:$dst),
3731            (VMOVDQA64Z128mr addr:$dst, VR128X:$src)>;
3732  def : Pat<(alignedstore (v16i8 VR128X:$src), addr:$dst),
3733            (VMOVDQA64Z128mr addr:$dst, VR128X:$src)>;
3734  def : Pat<(store (v4i32 VR128X:$src), addr:$dst),
3735            (VMOVDQU64Z128mr addr:$dst, VR128X:$src)>;
3736  def : Pat<(store (v8i16 VR128X:$src), addr:$dst),
3737            (VMOVDQU64Z128mr addr:$dst, VR128X:$src)>;
3738  def : Pat<(store (v16i8 VR128X:$src), addr:$dst),
3739            (VMOVDQU64Z128mr addr:$dst, VR128X:$src)>;
3740
3741  // 256-bit load.
3742  def : Pat<(alignedloadv8i32 addr:$src),
3743            (VMOVDQA64Z256rm addr:$src)>;
3744  def : Pat<(alignedloadv16i16 addr:$src),
3745            (VMOVDQA64Z256rm addr:$src)>;
3746  def : Pat<(alignedloadv32i8 addr:$src),
3747            (VMOVDQA64Z256rm addr:$src)>;
3748  def : Pat<(loadv8i32 addr:$src),
3749            (VMOVDQU64Z256rm addr:$src)>;
3750  def : Pat<(loadv16i16 addr:$src),
3751            (VMOVDQU64Z256rm addr:$src)>;
3752  def : Pat<(loadv32i8 addr:$src),
3753            (VMOVDQU64Z256rm addr:$src)>;
3754
3755  // 256-bit store.
3756  def : Pat<(alignedstore (v8i32 VR256X:$src), addr:$dst),
3757            (VMOVDQA64Z256mr addr:$dst, VR256X:$src)>;
3758  def : Pat<(alignedstore (v16i16 VR256X:$src), addr:$dst),
3759            (VMOVDQA64Z256mr addr:$dst, VR256X:$src)>;
3760  def : Pat<(alignedstore (v32i8 VR256X:$src), addr:$dst),
3761            (VMOVDQA64Z256mr addr:$dst, VR256X:$src)>;
3762  def : Pat<(store (v8i32 VR256X:$src), addr:$dst),
3763            (VMOVDQU64Z256mr addr:$dst, VR256X:$src)>;
3764  def : Pat<(store (v16i16 VR256X:$src), addr:$dst),
3765            (VMOVDQU64Z256mr addr:$dst, VR256X:$src)>;
3766  def : Pat<(store (v32i8 VR256X:$src), addr:$dst),
3767            (VMOVDQU64Z256mr addr:$dst, VR256X:$src)>;
3768}
3769
3770// Move Int Doubleword to Packed Double Int
3771//
3772let ExeDomain = SSEPackedInt in {
3773def VMOVDI2PDIZrr : AVX512BI<0x6E, MRMSrcReg, (outs VR128X:$dst), (ins GR32:$src),
3774                      "vmovd\t{$src, $dst|$dst, $src}",
3775                      [(set VR128X:$dst,
3776                        (v4i32 (scalar_to_vector GR32:$src)))]>,
3777                        EVEX, Sched<[WriteVecMoveFromGpr]>;
3778def VMOVDI2PDIZrm : AVX512BI<0x6E, MRMSrcMem, (outs VR128X:$dst), (ins i32mem:$src),
3779                      "vmovd\t{$src, $dst|$dst, $src}",
3780                      [(set VR128X:$dst,
3781                        (v4i32 (scalar_to_vector (loadi32 addr:$src))))]>,
3782                      EVEX, EVEX_CD8<32, CD8VT1>, Sched<[WriteVecLoad]>;
3783def VMOV64toPQIZrr : AVX512BI<0x6E, MRMSrcReg, (outs VR128X:$dst), (ins GR64:$src),
3784                      "vmovq\t{$src, $dst|$dst, $src}",
3785                        [(set VR128X:$dst,
3786                          (v2i64 (scalar_to_vector GR64:$src)))]>,
3787                      EVEX, VEX_W, Sched<[WriteVecMoveFromGpr]>;
3788let isCodeGenOnly = 1, ForceDisassemble = 1, hasSideEffects = 0, mayLoad = 1 in
3789def VMOV64toPQIZrm : AVX512BI<0x6E, MRMSrcMem, (outs VR128X:$dst),
3790                      (ins i64mem:$src),
3791                      "vmovq\t{$src, $dst|$dst, $src}", []>,
3792                      EVEX, VEX_W, EVEX_CD8<64, CD8VT1>, Sched<[WriteVecLoad]>;
3793let isCodeGenOnly = 1 in {
3794def VMOV64toSDZrr : AVX512BI<0x6E, MRMSrcReg, (outs FR64X:$dst), (ins GR64:$src),
3795                       "vmovq\t{$src, $dst|$dst, $src}",
3796                       [(set FR64X:$dst, (bitconvert GR64:$src))]>,
3797                       EVEX, VEX_W, Sched<[WriteVecMoveFromGpr]>;
3798def VMOVSDto64Zrr : AVX512BI<0x7E, MRMDestReg, (outs GR64:$dst), (ins FR64X:$src),
3799                         "vmovq\t{$src, $dst|$dst, $src}",
3800                         [(set GR64:$dst, (bitconvert FR64X:$src))]>,
3801                         EVEX, VEX_W, Sched<[WriteVecMoveFromGpr]>;
3802}
3803} // ExeDomain = SSEPackedInt
3804
3805// Move Int Doubleword to Single Scalar
3806//
3807let ExeDomain = SSEPackedInt, isCodeGenOnly = 1 in {
3808def VMOVDI2SSZrr  : AVX512BI<0x6E, MRMSrcReg, (outs FR32X:$dst), (ins GR32:$src),
3809                      "vmovd\t{$src, $dst|$dst, $src}",
3810                      [(set FR32X:$dst, (bitconvert GR32:$src))]>,
3811                      EVEX, Sched<[WriteVecMoveFromGpr]>;
3812} // ExeDomain = SSEPackedInt, isCodeGenOnly = 1
3813
3814// Move doubleword from xmm register to r/m32
3815//
3816let ExeDomain = SSEPackedInt in {
3817def VMOVPDI2DIZrr  : AVX512BI<0x7E, MRMDestReg, (outs GR32:$dst), (ins VR128X:$src),
3818                       "vmovd\t{$src, $dst|$dst, $src}",
3819                       [(set GR32:$dst, (extractelt (v4i32 VR128X:$src),
3820                                        (iPTR 0)))]>,
3821                       EVEX, Sched<[WriteVecMoveToGpr]>;
3822def VMOVPDI2DIZmr  : AVX512BI<0x7E, MRMDestMem, (outs),
3823                       (ins i32mem:$dst, VR128X:$src),
3824                       "vmovd\t{$src, $dst|$dst, $src}",
3825                       [(store (i32 (extractelt (v4i32 VR128X:$src),
3826                                     (iPTR 0))), addr:$dst)]>,
3827                       EVEX, EVEX_CD8<32, CD8VT1>, Sched<[WriteVecStore]>;
3828} // ExeDomain = SSEPackedInt
3829
3830// Move quadword from xmm1 register to r/m64
3831//
3832let ExeDomain = SSEPackedInt in {
3833def VMOVPQIto64Zrr : I<0x7E, MRMDestReg, (outs GR64:$dst), (ins VR128X:$src),
3834                      "vmovq\t{$src, $dst|$dst, $src}",
3835                      [(set GR64:$dst, (extractelt (v2i64 VR128X:$src),
3836                                                   (iPTR 0)))]>,
3837                      PD, EVEX, VEX_W, Sched<[WriteVecMoveToGpr]>,
3838                      Requires<[HasAVX512]>;
3839
3840let isCodeGenOnly = 1, ForceDisassemble = 1, hasSideEffects = 0, mayStore = 1 in
3841def VMOVPQIto64Zmr : I<0x7E, MRMDestMem, (outs), (ins i64mem:$dst, VR128X:$src),
3842                      "vmovq\t{$src, $dst|$dst, $src}", []>, PD,
3843                      EVEX, VEX_W, Sched<[WriteVecStore]>,
3844                      Requires<[HasAVX512, In64BitMode]>;
3845
3846def VMOVPQI2QIZmr : I<0xD6, MRMDestMem, (outs),
3847                      (ins i64mem:$dst, VR128X:$src),
3848                      "vmovq\t{$src, $dst|$dst, $src}",
3849                      [(store (extractelt (v2i64 VR128X:$src), (iPTR 0)),
3850                              addr:$dst)]>,
3851                      EVEX, PD, VEX_W, EVEX_CD8<64, CD8VT1>,
3852                      Sched<[WriteVecStore]>, Requires<[HasAVX512]>;
3853
3854let hasSideEffects = 0, isCodeGenOnly = 1, ForceDisassemble = 1 in
3855def VMOVPQI2QIZrr : AVX512BI<0xD6, MRMDestReg, (outs VR128X:$dst),
3856                             (ins VR128X:$src),
3857                             "vmovq\t{$src, $dst|$dst, $src}", []>,
3858                             EVEX, VEX_W, Sched<[SchedWriteVecLogic.XMM]>;
3859} // ExeDomain = SSEPackedInt
3860
3861def : InstAlias<"vmovq.s\t{$src, $dst|$dst, $src}",
3862                (VMOVPQI2QIZrr VR128X:$dst, VR128X:$src), 0>;
3863
3864let Predicates = [HasAVX512] in {
3865  def : Pat<(X86vextractstore64 (v2i64 VR128X:$src), addr:$dst),
3866            (VMOVPQI2QIZmr addr:$dst, VR128X:$src)>;
3867}
3868
3869// Move Scalar Single to Double Int
3870//
3871let ExeDomain = SSEPackedInt, isCodeGenOnly = 1 in {
3872def VMOVSS2DIZrr  : AVX512BI<0x7E, MRMDestReg, (outs GR32:$dst),
3873                      (ins FR32X:$src),
3874                      "vmovd\t{$src, $dst|$dst, $src}",
3875                      [(set GR32:$dst, (bitconvert FR32X:$src))]>,
3876                      EVEX, Sched<[WriteVecMoveToGpr]>;
3877} // ExeDomain = SSEPackedInt, isCodeGenOnly = 1
3878
3879// Move Quadword Int to Packed Quadword Int
3880//
3881let ExeDomain = SSEPackedInt in {
3882def VMOVQI2PQIZrm : AVX512XSI<0x7E, MRMSrcMem, (outs VR128X:$dst),
3883                      (ins i64mem:$src),
3884                      "vmovq\t{$src, $dst|$dst, $src}",
3885                      [(set VR128X:$dst,
3886                        (v2i64 (scalar_to_vector (loadi64 addr:$src))))]>,
3887                      EVEX, VEX_W, EVEX_CD8<8, CD8VT8>, Sched<[WriteVecLoad]>;
3888} // ExeDomain = SSEPackedInt
3889
3890// Allow "vmovd" but print "vmovq".
3891def : InstAlias<"vmovd\t{$src, $dst|$dst, $src}",
3892                (VMOV64toPQIZrr VR128X:$dst, GR64:$src), 0>;
3893def : InstAlias<"vmovd\t{$src, $dst|$dst, $src}",
3894                (VMOVPQIto64Zrr GR64:$dst, VR128X:$src), 0>;
3895
3896// Conversions between masks and scalar fp.
3897def : Pat<(v32i1 (bitconvert FR32X:$src)),
3898          (KMOVDkr (VMOVSS2DIZrr FR32X:$src))>;
3899def : Pat<(f32 (bitconvert VK32:$src)),
3900          (VMOVDI2SSZrr (KMOVDrk VK32:$src))>;
3901
3902def : Pat<(v64i1 (bitconvert FR64X:$src)),
3903          (KMOVQkr (VMOVSDto64Zrr FR64X:$src))>;
3904def : Pat<(f64 (bitconvert VK64:$src)),
3905          (VMOV64toSDZrr (KMOVQrk VK64:$src))>;
3906
3907//===----------------------------------------------------------------------===//
3908// AVX-512  MOVSS, MOVSD
3909//===----------------------------------------------------------------------===//
3910
3911multiclass avx512_move_scalar<string asm, SDNode OpNode, PatFrag vzload_frag,
3912                              X86VectorVTInfo _> {
3913  let Predicates = [HasAVX512, OptForSize] in
3914  def rr : AVX512PI<0x10, MRMSrcReg, (outs _.RC:$dst),
3915             (ins _.RC:$src1, _.RC:$src2),
3916             !strconcat(asm, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
3917             [(set _.RC:$dst, (_.VT (OpNode _.RC:$src1, _.RC:$src2)))],
3918             _.ExeDomain>, EVEX_4V, Sched<[SchedWriteFShuffle.XMM]>;
3919  def rrkz : AVX512PI<0x10, MRMSrcReg, (outs _.RC:$dst),
3920              (ins _.KRCWM:$mask, _.RC:$src1, _.RC:$src2),
3921              !strconcat(asm, "\t{$src2, $src1, $dst {${mask}} {z}|",
3922              "$dst {${mask}} {z}, $src1, $src2}"),
3923              [(set _.RC:$dst, (_.VT (X86selects _.KRCWM:$mask,
3924                                      (_.VT (OpNode _.RC:$src1, _.RC:$src2)),
3925                                      _.ImmAllZerosV)))],
3926              _.ExeDomain>, EVEX_4V, EVEX_KZ, Sched<[SchedWriteFShuffle.XMM]>;
3927  let Constraints = "$src0 = $dst"  in
3928  def rrk : AVX512PI<0x10, MRMSrcReg, (outs _.RC:$dst),
3929             (ins _.RC:$src0, _.KRCWM:$mask, _.RC:$src1, _.RC:$src2),
3930             !strconcat(asm, "\t{$src2, $src1, $dst {${mask}}|",
3931             "$dst {${mask}}, $src1, $src2}"),
3932             [(set _.RC:$dst, (_.VT (X86selects _.KRCWM:$mask,
3933                                     (_.VT (OpNode _.RC:$src1, _.RC:$src2)),
3934                                     (_.VT _.RC:$src0))))],
3935             _.ExeDomain>, EVEX_4V, EVEX_K, Sched<[SchedWriteFShuffle.XMM]>;
3936  let canFoldAsLoad = 1, isReMaterializable = 1 in {
3937  def rm : AVX512PI<0x10, MRMSrcMem, (outs _.RC:$dst), (ins _.ScalarMemOp:$src),
3938             !strconcat(asm, "\t{$src, $dst|$dst, $src}"),
3939             [(set _.RC:$dst, (_.VT (vzload_frag addr:$src)))],
3940             _.ExeDomain>, EVEX, Sched<[WriteFLoad]>;
3941  // _alt version uses FR32/FR64 register class.
3942  let isCodeGenOnly = 1 in
3943  def rm_alt : AVX512PI<0x10, MRMSrcMem, (outs _.FRC:$dst), (ins _.ScalarMemOp:$src),
3944                 !strconcat(asm, "\t{$src, $dst|$dst, $src}"),
3945                 [(set _.FRC:$dst, (_.ScalarLdFrag addr:$src))],
3946                 _.ExeDomain>, EVEX, Sched<[WriteFLoad]>;
3947  }
3948  let mayLoad = 1, hasSideEffects = 0 in {
3949    let Constraints = "$src0 = $dst" in
3950    def rmk : AVX512PI<0x10, MRMSrcMem, (outs _.RC:$dst),
3951               (ins _.RC:$src0, _.KRCWM:$mask, _.ScalarMemOp:$src),
3952               !strconcat(asm, "\t{$src, $dst {${mask}}|",
3953               "$dst {${mask}}, $src}"),
3954               [], _.ExeDomain>, EVEX, EVEX_K, Sched<[WriteFLoad]>;
3955    def rmkz : AVX512PI<0x10, MRMSrcMem, (outs _.RC:$dst),
3956               (ins _.KRCWM:$mask, _.ScalarMemOp:$src),
3957               !strconcat(asm, "\t{$src, $dst {${mask}} {z}|",
3958               "$dst {${mask}} {z}, $src}"),
3959               [], _.ExeDomain>, EVEX, EVEX_KZ, Sched<[WriteFLoad]>;
3960  }
3961  def mr: AVX512PI<0x11, MRMDestMem, (outs), (ins _.ScalarMemOp:$dst, _.FRC:$src),
3962             !strconcat(asm, "\t{$src, $dst|$dst, $src}"),
3963             [(store _.FRC:$src, addr:$dst)],  _.ExeDomain>,
3964             EVEX, Sched<[WriteFStore]>;
3965  let mayStore = 1, hasSideEffects = 0 in
3966  def mrk: AVX512PI<0x11, MRMDestMem, (outs),
3967              (ins _.ScalarMemOp:$dst, VK1WM:$mask, _.RC:$src),
3968              !strconcat(asm, "\t{$src, $dst {${mask}}|$dst {${mask}}, $src}"),
3969              [], _.ExeDomain>, EVEX, EVEX_K, Sched<[WriteFStore]>,
3970              NotMemoryFoldable;
3971}
3972
3973defm VMOVSSZ : avx512_move_scalar<"vmovss", X86Movss, X86vzload32, f32x_info>,
3974                                  VEX_LIG, XS, EVEX_CD8<32, CD8VT1>;
3975
3976defm VMOVSDZ : avx512_move_scalar<"vmovsd", X86Movsd, X86vzload64, f64x_info>,
3977                                  VEX_LIG, XD, VEX_W, EVEX_CD8<64, CD8VT1>;
3978
3979
3980multiclass avx512_move_scalar_lowering<string InstrStr, SDNode OpNode,
3981                                       PatLeaf ZeroFP, X86VectorVTInfo _> {
3982
3983def : Pat<(_.VT (OpNode _.RC:$src0,
3984                        (_.VT (scalar_to_vector
3985                                  (_.EltVT (X86selects VK1WM:$mask,
3986                                                       (_.EltVT _.FRC:$src1),
3987                                                       (_.EltVT _.FRC:$src2))))))),
3988          (!cast<Instruction>(InstrStr#rrk)
3989                        (_.VT (COPY_TO_REGCLASS _.FRC:$src2, _.RC)),
3990                        VK1WM:$mask,
3991                        (_.VT _.RC:$src0),
3992                        (_.VT (COPY_TO_REGCLASS _.FRC:$src1, _.RC)))>;
3993
3994def : Pat<(_.VT (OpNode _.RC:$src0,
3995                        (_.VT (scalar_to_vector
3996                                  (_.EltVT (X86selects VK1WM:$mask,
3997                                                       (_.EltVT _.FRC:$src1),
3998                                                       (_.EltVT ZeroFP))))))),
3999          (!cast<Instruction>(InstrStr#rrkz)
4000                        VK1WM:$mask,
4001                        (_.VT _.RC:$src0),
4002                        (_.VT (COPY_TO_REGCLASS _.FRC:$src1, _.RC)))>;
4003}
4004
4005multiclass avx512_store_scalar_lowering<string InstrStr, AVX512VLVectorVTInfo _,
4006                                        dag Mask, RegisterClass MaskRC> {
4007
4008def : Pat<(masked_store
4009             (_.info512.VT (insert_subvector undef,
4010                               (_.info128.VT _.info128.RC:$src),
4011                               (iPTR 0))), addr:$dst, Mask),
4012          (!cast<Instruction>(InstrStr#mrk) addr:$dst,
4013                      (COPY_TO_REGCLASS MaskRC:$mask, VK1WM),
4014                      _.info128.RC:$src)>;
4015
4016}
4017
4018multiclass avx512_store_scalar_lowering_subreg<string InstrStr,
4019                                               AVX512VLVectorVTInfo _,
4020                                               dag Mask, RegisterClass MaskRC,
4021                                               SubRegIndex subreg> {
4022
4023def : Pat<(masked_store
4024             (_.info512.VT (insert_subvector undef,
4025                               (_.info128.VT _.info128.RC:$src),
4026                               (iPTR 0))), addr:$dst, Mask),
4027          (!cast<Instruction>(InstrStr#mrk) addr:$dst,
4028                      (COPY_TO_REGCLASS (i32 (INSERT_SUBREG (IMPLICIT_DEF), MaskRC:$mask, subreg)), VK1WM),
4029                      _.info128.RC:$src)>;
4030
4031}
4032
4033// This matches the more recent codegen from clang that avoids emitting a 512
4034// bit masked store directly. Codegen will widen 128-bit masked store to 512
4035// bits on AVX512F only targets.
4036multiclass avx512_store_scalar_lowering_subreg2<string InstrStr,
4037                                               AVX512VLVectorVTInfo _,
4038                                               dag Mask512, dag Mask128,
4039                                               RegisterClass MaskRC,
4040                                               SubRegIndex subreg> {
4041
4042// AVX512F pattern.
4043def : Pat<(masked_store
4044             (_.info512.VT (insert_subvector undef,
4045                               (_.info128.VT _.info128.RC:$src),
4046                               (iPTR 0))), addr:$dst, Mask512),
4047          (!cast<Instruction>(InstrStr#mrk) addr:$dst,
4048                      (COPY_TO_REGCLASS (i32 (INSERT_SUBREG (IMPLICIT_DEF), MaskRC:$mask, subreg)), VK1WM),
4049                      _.info128.RC:$src)>;
4050
4051// AVX512VL pattern.
4052def : Pat<(masked_store (_.info128.VT _.info128.RC:$src), addr:$dst, Mask128),
4053          (!cast<Instruction>(InstrStr#mrk) addr:$dst,
4054                      (COPY_TO_REGCLASS (i32 (INSERT_SUBREG (IMPLICIT_DEF), MaskRC:$mask, subreg)), VK1WM),
4055                      _.info128.RC:$src)>;
4056}
4057
4058multiclass avx512_load_scalar_lowering<string InstrStr, AVX512VLVectorVTInfo _,
4059                                       dag Mask, RegisterClass MaskRC> {
4060
4061def : Pat<(_.info128.VT (extract_subvector
4062                         (_.info512.VT (masked_load addr:$srcAddr, Mask,
4063                                        _.info512.ImmAllZerosV)),
4064                           (iPTR 0))),
4065          (!cast<Instruction>(InstrStr#rmkz)
4066                      (COPY_TO_REGCLASS MaskRC:$mask, VK1WM),
4067                      addr:$srcAddr)>;
4068
4069def : Pat<(_.info128.VT (extract_subvector
4070                (_.info512.VT (masked_load addr:$srcAddr, Mask,
4071                      (_.info512.VT (insert_subvector undef,
4072                            (_.info128.VT (X86vzmovl _.info128.RC:$src)),
4073                            (iPTR 0))))),
4074                (iPTR 0))),
4075          (!cast<Instruction>(InstrStr#rmk) _.info128.RC:$src,
4076                      (COPY_TO_REGCLASS MaskRC:$mask, VK1WM),
4077                      addr:$srcAddr)>;
4078
4079}
4080
4081multiclass avx512_load_scalar_lowering_subreg<string InstrStr,
4082                                              AVX512VLVectorVTInfo _,
4083                                              dag Mask, RegisterClass MaskRC,
4084                                              SubRegIndex subreg> {
4085
4086def : Pat<(_.info128.VT (extract_subvector
4087                         (_.info512.VT (masked_load addr:$srcAddr, Mask,
4088                                        _.info512.ImmAllZerosV)),
4089                           (iPTR 0))),
4090          (!cast<Instruction>(InstrStr#rmkz)
4091                      (COPY_TO_REGCLASS (i32 (INSERT_SUBREG (IMPLICIT_DEF), MaskRC:$mask, subreg)), VK1WM),
4092                      addr:$srcAddr)>;
4093
4094def : Pat<(_.info128.VT (extract_subvector
4095                (_.info512.VT (masked_load addr:$srcAddr, Mask,
4096                      (_.info512.VT (insert_subvector undef,
4097                            (_.info128.VT (X86vzmovl _.info128.RC:$src)),
4098                            (iPTR 0))))),
4099                (iPTR 0))),
4100          (!cast<Instruction>(InstrStr#rmk) _.info128.RC:$src,
4101                      (COPY_TO_REGCLASS (i32 (INSERT_SUBREG (IMPLICIT_DEF), MaskRC:$mask, subreg)), VK1WM),
4102                      addr:$srcAddr)>;
4103
4104}
4105
4106// This matches the more recent codegen from clang that avoids emitting a 512
4107// bit masked load directly. Codegen will widen 128-bit masked load to 512
4108// bits on AVX512F only targets.
4109multiclass avx512_load_scalar_lowering_subreg2<string InstrStr,
4110                                              AVX512VLVectorVTInfo _,
4111                                              dag Mask512, dag Mask128,
4112                                              RegisterClass MaskRC,
4113                                              SubRegIndex subreg> {
4114// AVX512F patterns.
4115def : Pat<(_.info128.VT (extract_subvector
4116                         (_.info512.VT (masked_load addr:$srcAddr, Mask512,
4117                                        _.info512.ImmAllZerosV)),
4118                           (iPTR 0))),
4119          (!cast<Instruction>(InstrStr#rmkz)
4120                      (COPY_TO_REGCLASS (i32 (INSERT_SUBREG (IMPLICIT_DEF), MaskRC:$mask, subreg)), VK1WM),
4121                      addr:$srcAddr)>;
4122
4123def : Pat<(_.info128.VT (extract_subvector
4124                (_.info512.VT (masked_load addr:$srcAddr, Mask512,
4125                      (_.info512.VT (insert_subvector undef,
4126                            (_.info128.VT (X86vzmovl _.info128.RC:$src)),
4127                            (iPTR 0))))),
4128                (iPTR 0))),
4129          (!cast<Instruction>(InstrStr#rmk) _.info128.RC:$src,
4130                      (COPY_TO_REGCLASS (i32 (INSERT_SUBREG (IMPLICIT_DEF), MaskRC:$mask, subreg)), VK1WM),
4131                      addr:$srcAddr)>;
4132
4133// AVX512Vl patterns.
4134def : Pat<(_.info128.VT (masked_load addr:$srcAddr, Mask128,
4135                         _.info128.ImmAllZerosV)),
4136          (!cast<Instruction>(InstrStr#rmkz)
4137                      (COPY_TO_REGCLASS (i32 (INSERT_SUBREG (IMPLICIT_DEF), MaskRC:$mask, subreg)), VK1WM),
4138                      addr:$srcAddr)>;
4139
4140def : Pat<(_.info128.VT (masked_load addr:$srcAddr, Mask128,
4141                         (_.info128.VT (X86vzmovl _.info128.RC:$src)))),
4142          (!cast<Instruction>(InstrStr#rmk) _.info128.RC:$src,
4143                      (COPY_TO_REGCLASS (i32 (INSERT_SUBREG (IMPLICIT_DEF), MaskRC:$mask, subreg)), VK1WM),
4144                      addr:$srcAddr)>;
4145}
4146
4147defm : avx512_move_scalar_lowering<"VMOVSSZ", X86Movss, fp32imm0, v4f32x_info>;
4148defm : avx512_move_scalar_lowering<"VMOVSDZ", X86Movsd, fp64imm0, v2f64x_info>;
4149
4150defm : avx512_store_scalar_lowering<"VMOVSSZ", avx512vl_f32_info,
4151                   (v16i1 (bitconvert (i16 (trunc (and GR32:$mask, (i32 1)))))), GR32>;
4152defm : avx512_store_scalar_lowering_subreg<"VMOVSSZ", avx512vl_f32_info,
4153                   (v16i1 (bitconvert (i16 (and GR16:$mask, (i16 1))))), GR16, sub_16bit>;
4154defm : avx512_store_scalar_lowering_subreg<"VMOVSDZ", avx512vl_f64_info,
4155                   (v8i1 (bitconvert (i8 (and GR8:$mask, (i8 1))))), GR8, sub_8bit>;
4156
4157defm : avx512_store_scalar_lowering_subreg2<"VMOVSSZ", avx512vl_f32_info,
4158                   (v16i1 (insert_subvector
4159                           (v16i1 immAllZerosV),
4160                           (v4i1 (extract_subvector
4161                                  (v8i1 (bitconvert (and GR8:$mask, (i8 1)))),
4162                                  (iPTR 0))),
4163                           (iPTR 0))),
4164                   (v4i1 (extract_subvector
4165                          (v8i1 (bitconvert (and GR8:$mask, (i8 1)))),
4166                          (iPTR 0))), GR8, sub_8bit>;
4167defm : avx512_store_scalar_lowering_subreg2<"VMOVSDZ", avx512vl_f64_info,
4168                   (v8i1
4169                    (extract_subvector
4170                     (v16i1
4171                      (insert_subvector
4172                       (v16i1 immAllZerosV),
4173                       (v2i1 (extract_subvector
4174                              (v8i1 (bitconvert (i8 (and GR8:$mask, (i8 1))))),
4175                              (iPTR 0))),
4176                       (iPTR 0))),
4177                     (iPTR 0))),
4178                   (v2i1 (extract_subvector
4179                          (v8i1 (bitconvert (i8 (and GR8:$mask, (i8 1))))),
4180                          (iPTR 0))), GR8, sub_8bit>;
4181
4182defm : avx512_load_scalar_lowering<"VMOVSSZ", avx512vl_f32_info,
4183                   (v16i1 (bitconvert (i16 (trunc (and GR32:$mask, (i32 1)))))), GR32>;
4184defm : avx512_load_scalar_lowering_subreg<"VMOVSSZ", avx512vl_f32_info,
4185                   (v16i1 (bitconvert (i16 (and GR16:$mask, (i16 1))))), GR16, sub_16bit>;
4186defm : avx512_load_scalar_lowering_subreg<"VMOVSDZ", avx512vl_f64_info,
4187                   (v8i1 (bitconvert (i8 (and GR8:$mask, (i8 1))))), GR8, sub_8bit>;
4188
4189defm : avx512_load_scalar_lowering_subreg2<"VMOVSSZ", avx512vl_f32_info,
4190                   (v16i1 (insert_subvector
4191                           (v16i1 immAllZerosV),
4192                           (v4i1 (extract_subvector
4193                                  (v8i1 (bitconvert (and GR8:$mask, (i8 1)))),
4194                                  (iPTR 0))),
4195                           (iPTR 0))),
4196                   (v4i1 (extract_subvector
4197                          (v8i1 (bitconvert (and GR8:$mask, (i8 1)))),
4198                          (iPTR 0))), GR8, sub_8bit>;
4199defm : avx512_load_scalar_lowering_subreg2<"VMOVSDZ", avx512vl_f64_info,
4200                   (v8i1
4201                    (extract_subvector
4202                     (v16i1
4203                      (insert_subvector
4204                       (v16i1 immAllZerosV),
4205                       (v2i1 (extract_subvector
4206                              (v8i1 (bitconvert (i8 (and GR8:$mask, (i8 1))))),
4207                              (iPTR 0))),
4208                       (iPTR 0))),
4209                     (iPTR 0))),
4210                   (v2i1 (extract_subvector
4211                          (v8i1 (bitconvert (i8 (and GR8:$mask, (i8 1))))),
4212                          (iPTR 0))), GR8, sub_8bit>;
4213
4214def : Pat<(f32 (X86selects VK1WM:$mask, (f32 FR32X:$src1), (f32 FR32X:$src2))),
4215          (COPY_TO_REGCLASS (v4f32 (VMOVSSZrrk
4216           (v4f32 (COPY_TO_REGCLASS FR32X:$src2, VR128X)),
4217           VK1WM:$mask, (v4f32 (IMPLICIT_DEF)),
4218           (v4f32 (COPY_TO_REGCLASS FR32X:$src1, VR128X)))), FR32X)>;
4219
4220def : Pat<(f32 (X86selects VK1WM:$mask, (f32 FR32X:$src1), fp32imm0)),
4221          (COPY_TO_REGCLASS (v4f32 (VMOVSSZrrkz VK1WM:$mask, (v4f32 (IMPLICIT_DEF)),
4222           (v4f32 (COPY_TO_REGCLASS FR32X:$src1, VR128X)))), FR32X)>;
4223
4224def : Pat<(f32 (X86selects VK1WM:$mask, (loadf32 addr:$src), (f32 FR32X:$src0))),
4225          (COPY_TO_REGCLASS
4226           (v4f32 (VMOVSSZrmk (v4f32 (COPY_TO_REGCLASS FR32X:$src0, VR128X)),
4227                                                       VK1WM:$mask, addr:$src)),
4228           FR32X)>;
4229def : Pat<(f32 (X86selects VK1WM:$mask, (loadf32 addr:$src), fp32imm0)),
4230          (COPY_TO_REGCLASS (v4f32 (VMOVSSZrmkz VK1WM:$mask, addr:$src)), FR32X)>;
4231
4232def : Pat<(f64 (X86selects VK1WM:$mask, (f64 FR64X:$src1), (f64 FR64X:$src2))),
4233          (COPY_TO_REGCLASS (v2f64 (VMOVSDZrrk
4234           (v2f64 (COPY_TO_REGCLASS FR64X:$src2, VR128X)),
4235           VK1WM:$mask, (v2f64 (IMPLICIT_DEF)),
4236           (v2f64 (COPY_TO_REGCLASS FR64X:$src1, VR128X)))), FR64X)>;
4237
4238def : Pat<(f64 (X86selects VK1WM:$mask, (f64 FR64X:$src1), fp64imm0)),
4239          (COPY_TO_REGCLASS (v2f64 (VMOVSDZrrkz VK1WM:$mask, (v2f64 (IMPLICIT_DEF)),
4240           (v2f64 (COPY_TO_REGCLASS FR64X:$src1, VR128X)))), FR64X)>;
4241
4242def : Pat<(f64 (X86selects VK1WM:$mask, (loadf64 addr:$src), (f64 FR64X:$src0))),
4243          (COPY_TO_REGCLASS
4244           (v2f64 (VMOVSDZrmk (v2f64 (COPY_TO_REGCLASS FR64X:$src0, VR128X)),
4245                                                       VK1WM:$mask, addr:$src)),
4246           FR64X)>;
4247def : Pat<(f64 (X86selects VK1WM:$mask, (loadf64 addr:$src), fp64imm0)),
4248          (COPY_TO_REGCLASS (v2f64 (VMOVSDZrmkz VK1WM:$mask, addr:$src)), FR64X)>;
4249
4250
4251def : Pat<(v4f32 (X86selects VK1WM:$mask, (v4f32 VR128X:$src1), (v4f32 VR128X:$src2))),
4252          (VMOVSSZrrk VR128X:$src2, VK1WM:$mask, VR128X:$src1, VR128X:$src1)>;
4253def : Pat<(v2f64 (X86selects VK1WM:$mask, (v2f64 VR128X:$src1), (v2f64 VR128X:$src2))),
4254          (VMOVSDZrrk VR128X:$src2, VK1WM:$mask, VR128X:$src1, VR128X:$src1)>;
4255
4256def : Pat<(v4f32 (X86selects VK1WM:$mask, (v4f32 VR128X:$src1), (v4f32 immAllZerosV))),
4257          (VMOVSSZrrkz VK1WM:$mask, VR128X:$src1, VR128X:$src1)>;
4258def : Pat<(v2f64 (X86selects VK1WM:$mask, (v2f64 VR128X:$src1), (v2f64 immAllZerosV))),
4259          (VMOVSDZrrkz VK1WM:$mask, VR128X:$src1, VR128X:$src1)>;
4260
4261let hasSideEffects = 0, isCodeGenOnly = 1, ForceDisassemble = 1 in {
4262  def VMOVSSZrr_REV: AVX512<0x11, MRMDestReg, (outs VR128X:$dst),
4263                           (ins VR128X:$src1, VR128X:$src2),
4264                           "vmovss\t{$src2, $src1, $dst|$dst, $src1, $src2}",
4265                           []>, XS, EVEX_4V, VEX_LIG,
4266                           FoldGenData<"VMOVSSZrr">,
4267                           Sched<[SchedWriteFShuffle.XMM]>;
4268
4269  let Constraints = "$src0 = $dst" in
4270  def VMOVSSZrrk_REV: AVX512<0x11, MRMDestReg, (outs VR128X:$dst),
4271                             (ins f32x_info.RC:$src0, f32x_info.KRCWM:$mask,
4272                                                   VR128X:$src1, VR128X:$src2),
4273                             "vmovss\t{$src2, $src1, $dst {${mask}}|"#
4274                                        "$dst {${mask}}, $src1, $src2}",
4275                             []>, EVEX_K, XS, EVEX_4V, VEX_LIG,
4276                             FoldGenData<"VMOVSSZrrk">,
4277                             Sched<[SchedWriteFShuffle.XMM]>;
4278
4279  def VMOVSSZrrkz_REV: AVX512<0x11, MRMDestReg, (outs VR128X:$dst),
4280                         (ins f32x_info.KRCWM:$mask, VR128X:$src1, VR128X:$src2),
4281                         "vmovss\t{$src2, $src1, $dst {${mask}} {z}|"#
4282                                    "$dst {${mask}} {z}, $src1, $src2}",
4283                         []>, EVEX_KZ, XS, EVEX_4V, VEX_LIG,
4284                         FoldGenData<"VMOVSSZrrkz">,
4285                         Sched<[SchedWriteFShuffle.XMM]>;
4286
4287  def VMOVSDZrr_REV: AVX512<0x11, MRMDestReg, (outs VR128X:$dst),
4288                           (ins VR128X:$src1, VR128X:$src2),
4289                           "vmovsd\t{$src2, $src1, $dst|$dst, $src1, $src2}",
4290                           []>, XD, EVEX_4V, VEX_LIG, VEX_W,
4291                           FoldGenData<"VMOVSDZrr">,
4292                           Sched<[SchedWriteFShuffle.XMM]>;
4293
4294  let Constraints = "$src0 = $dst" in
4295  def VMOVSDZrrk_REV: AVX512<0x11, MRMDestReg, (outs VR128X:$dst),
4296                             (ins f64x_info.RC:$src0, f64x_info.KRCWM:$mask,
4297                                                   VR128X:$src1, VR128X:$src2),
4298                             "vmovsd\t{$src2, $src1, $dst {${mask}}|"#
4299                                        "$dst {${mask}}, $src1, $src2}",
4300                             []>, EVEX_K, XD, EVEX_4V, VEX_LIG,
4301                             VEX_W, FoldGenData<"VMOVSDZrrk">,
4302                             Sched<[SchedWriteFShuffle.XMM]>;
4303
4304  def VMOVSDZrrkz_REV: AVX512<0x11, MRMDestReg, (outs VR128X:$dst),
4305                              (ins f64x_info.KRCWM:$mask, VR128X:$src1,
4306                                                          VR128X:$src2),
4307                              "vmovsd\t{$src2, $src1, $dst {${mask}} {z}|"#
4308                                         "$dst {${mask}} {z}, $src1, $src2}",
4309                              []>, EVEX_KZ, XD, EVEX_4V, VEX_LIG,
4310                              VEX_W, FoldGenData<"VMOVSDZrrkz">,
4311                              Sched<[SchedWriteFShuffle.XMM]>;
4312}
4313
4314def : InstAlias<"vmovss.s\t{$src2, $src1, $dst|$dst, $src1, $src2}",
4315                (VMOVSSZrr_REV VR128X:$dst, VR128X:$src1, VR128X:$src2), 0>;
4316def : InstAlias<"vmovss.s\t{$src2, $src1, $dst {${mask}}|"#
4317                             "$dst {${mask}}, $src1, $src2}",
4318                (VMOVSSZrrk_REV VR128X:$dst, VK1WM:$mask,
4319                                VR128X:$src1, VR128X:$src2), 0>;
4320def : InstAlias<"vmovss.s\t{$src2, $src1, $dst {${mask}} {z}|"#
4321                             "$dst {${mask}} {z}, $src1, $src2}",
4322                (VMOVSSZrrkz_REV VR128X:$dst, VK1WM:$mask,
4323                                 VR128X:$src1, VR128X:$src2), 0>;
4324def : InstAlias<"vmovsd.s\t{$src2, $src1, $dst|$dst, $src1, $src2}",
4325                (VMOVSDZrr_REV VR128X:$dst, VR128X:$src1, VR128X:$src2), 0>;
4326def : InstAlias<"vmovsd.s\t{$src2, $src1, $dst {${mask}}|"#
4327                             "$dst {${mask}}, $src1, $src2}",
4328                (VMOVSDZrrk_REV VR128X:$dst, VK1WM:$mask,
4329                                VR128X:$src1, VR128X:$src2), 0>;
4330def : InstAlias<"vmovsd.s\t{$src2, $src1, $dst {${mask}} {z}|"#
4331                             "$dst {${mask}} {z}, $src1, $src2}",
4332                (VMOVSDZrrkz_REV VR128X:$dst, VK1WM:$mask,
4333                                 VR128X:$src1, VR128X:$src2), 0>;
4334
4335let Predicates = [HasAVX512, OptForSize] in {
4336  def : Pat<(v4f32 (X86vzmovl (v4f32 VR128X:$src))),
4337            (VMOVSSZrr (v4f32 (AVX512_128_SET0)), VR128X:$src)>;
4338  def : Pat<(v4i32 (X86vzmovl (v4i32 VR128X:$src))),
4339            (VMOVSSZrr (v4i32 (AVX512_128_SET0)), VR128X:$src)>;
4340
4341  // Move low f32 and clear high bits.
4342  def : Pat<(v8f32 (X86vzmovl (v8f32 VR256X:$src))),
4343            (SUBREG_TO_REG (i32 0),
4344             (v4f32 (VMOVSSZrr (v4f32 (AVX512_128_SET0)),
4345              (v4f32 (EXTRACT_SUBREG (v8f32 VR256X:$src), sub_xmm)))), sub_xmm)>;
4346  def : Pat<(v8i32 (X86vzmovl (v8i32 VR256X:$src))),
4347            (SUBREG_TO_REG (i32 0),
4348             (v4i32 (VMOVSSZrr (v4i32 (AVX512_128_SET0)),
4349              (v4i32 (EXTRACT_SUBREG (v8i32 VR256X:$src), sub_xmm)))), sub_xmm)>;
4350
4351  def : Pat<(v16f32 (X86vzmovl (v16f32 VR512:$src))),
4352            (SUBREG_TO_REG (i32 0),
4353             (v4f32 (VMOVSSZrr (v4f32 (AVX512_128_SET0)),
4354              (v4f32 (EXTRACT_SUBREG (v16f32 VR512:$src), sub_xmm)))), sub_xmm)>;
4355  def : Pat<(v16i32 (X86vzmovl (v16i32 VR512:$src))),
4356            (SUBREG_TO_REG (i32 0),
4357             (v4i32 (VMOVSSZrr (v4i32 (AVX512_128_SET0)),
4358              (v4i32 (EXTRACT_SUBREG (v16i32 VR512:$src), sub_xmm)))), sub_xmm)>;
4359}
4360
4361// Use 128-bit blends for OptForSpeed since BLENDs have better throughput than
4362// VMOVSS/SD. Unfortunately, loses the ability to use XMM16-31.
4363let Predicates = [HasAVX512, OptForSpeed] in {
4364  def : Pat<(v16f32 (X86vzmovl (v16f32 VR512:$src))),
4365            (SUBREG_TO_REG (i32 0),
4366             (v4f32 (VBLENDPSrri (v4f32 (V_SET0)),
4367                          (v4f32 (EXTRACT_SUBREG (v16f32 VR512:$src), sub_xmm)),
4368                          (i8 1))), sub_xmm)>;
4369  def : Pat<(v16i32 (X86vzmovl (v16i32 VR512:$src))),
4370            (SUBREG_TO_REG (i32 0),
4371             (v4i32 (VPBLENDWrri (v4i32 (V_SET0)),
4372                          (v4i32 (EXTRACT_SUBREG (v16i32 VR512:$src), sub_xmm)),
4373                          (i8 3))), sub_xmm)>;
4374}
4375
4376let Predicates = [HasAVX512] in {
4377  def : Pat<(v4f32 (scalar_to_vector (loadf32 addr:$src))),
4378            (VMOVSSZrm addr:$src)>;
4379  def : Pat<(v2f64 (scalar_to_vector (loadf64 addr:$src))),
4380            (VMOVSDZrm addr:$src)>;
4381
4382  // Represent the same patterns above but in the form they appear for
4383  // 256-bit types
4384  def : Pat<(v8f32 (X86vzload32 addr:$src)),
4385            (SUBREG_TO_REG (i32 0), (VMOVSSZrm addr:$src), sub_xmm)>;
4386  def : Pat<(v4f64 (X86vzload64 addr:$src)),
4387            (SUBREG_TO_REG (i32 0), (VMOVSDZrm addr:$src), sub_xmm)>;
4388
4389  // Represent the same patterns above but in the form they appear for
4390  // 512-bit types
4391  def : Pat<(v16f32 (X86vzload32 addr:$src)),
4392            (SUBREG_TO_REG (i32 0), (VMOVSSZrm addr:$src), sub_xmm)>;
4393  def : Pat<(v8f64 (X86vzload64 addr:$src)),
4394            (SUBREG_TO_REG (i32 0), (VMOVSDZrm addr:$src), sub_xmm)>;
4395}
4396
4397let ExeDomain = SSEPackedInt, SchedRW = [SchedWriteVecLogic.XMM] in {
4398def VMOVZPQILo2PQIZrr : AVX512XSI<0x7E, MRMSrcReg, (outs VR128X:$dst),
4399                                (ins VR128X:$src),
4400                                "vmovq\t{$src, $dst|$dst, $src}",
4401                                [(set VR128X:$dst, (v2i64 (X86vzmovl
4402                                                   (v2i64 VR128X:$src))))]>,
4403                                EVEX, VEX_W;
4404}
4405
4406let Predicates = [HasAVX512] in {
4407  def : Pat<(v4i32 (X86vzmovl (v4i32 (scalar_to_vector GR32:$src)))),
4408            (VMOVDI2PDIZrr GR32:$src)>;
4409
4410  def : Pat<(v2i64 (X86vzmovl (v2i64 (scalar_to_vector GR64:$src)))),
4411            (VMOV64toPQIZrr GR64:$src)>;
4412
4413  // AVX 128-bit movd/movq instruction write zeros in the high 128-bit part.
4414  def : Pat<(v4i32 (X86vzload32 addr:$src)),
4415            (VMOVDI2PDIZrm addr:$src)>;
4416  def : Pat<(v8i32 (X86vzload32 addr:$src)),
4417            (SUBREG_TO_REG (i32 0), (v4i32 (VMOVDI2PDIZrm addr:$src)), sub_xmm)>;
4418  def : Pat<(v2f64 (X86vzmovl (v2f64 VR128X:$src))),
4419            (VMOVZPQILo2PQIZrr VR128X:$src)>;
4420  def : Pat<(v2i64 (X86vzload64 addr:$src)),
4421            (VMOVQI2PQIZrm addr:$src)>;
4422  def : Pat<(v4i64 (X86vzload64 addr:$src)),
4423            (SUBREG_TO_REG (i64 0), (v2i64 (VMOVQI2PQIZrm addr:$src)), sub_xmm)>;
4424
4425  // Use regular 128-bit instructions to match 512-bit scalar_to_vec+zext.
4426  def : Pat<(v16i32 (X86vzload32 addr:$src)),
4427            (SUBREG_TO_REG (i32 0), (v4i32 (VMOVDI2PDIZrm addr:$src)), sub_xmm)>;
4428  def : Pat<(v8i64 (X86vzload64 addr:$src)),
4429            (SUBREG_TO_REG (i64 0), (v2i64 (VMOVQI2PQIZrm addr:$src)), sub_xmm)>;
4430
4431  def : Pat<(v4f64 (X86vzmovl (v4f64 VR256X:$src))),
4432            (SUBREG_TO_REG (i32 0),
4433             (v2f64 (VMOVZPQILo2PQIZrr
4434                     (v2f64 (EXTRACT_SUBREG (v4f64 VR256X:$src), sub_xmm)))),
4435             sub_xmm)>;
4436  def : Pat<(v4i64 (X86vzmovl (v4i64 VR256X:$src))),
4437            (SUBREG_TO_REG (i32 0),
4438             (v2i64 (VMOVZPQILo2PQIZrr
4439                     (v2i64 (EXTRACT_SUBREG (v4i64 VR256X:$src), sub_xmm)))),
4440             sub_xmm)>;
4441
4442  def : Pat<(v8f64 (X86vzmovl (v8f64 VR512:$src))),
4443            (SUBREG_TO_REG (i32 0),
4444             (v2f64 (VMOVZPQILo2PQIZrr
4445                     (v2f64 (EXTRACT_SUBREG (v8f64 VR512:$src), sub_xmm)))),
4446             sub_xmm)>;
4447  def : Pat<(v8i64 (X86vzmovl (v8i64 VR512:$src))),
4448            (SUBREG_TO_REG (i32 0),
4449             (v2i64 (VMOVZPQILo2PQIZrr
4450                     (v2i64 (EXTRACT_SUBREG (v8i64 VR512:$src), sub_xmm)))),
4451             sub_xmm)>;
4452}
4453
4454//===----------------------------------------------------------------------===//
4455// AVX-512 - Non-temporals
4456//===----------------------------------------------------------------------===//
4457
4458def VMOVNTDQAZrm : AVX512PI<0x2A, MRMSrcMem, (outs VR512:$dst),
4459                      (ins i512mem:$src), "vmovntdqa\t{$src, $dst|$dst, $src}",
4460                      [], SSEPackedInt>, Sched<[SchedWriteVecMoveLS.ZMM.RM]>,
4461                      EVEX, T8PD, EVEX_V512, EVEX_CD8<64, CD8VF>;
4462
4463let Predicates = [HasVLX] in {
4464  def VMOVNTDQAZ256rm : AVX512PI<0x2A, MRMSrcMem, (outs VR256X:$dst),
4465                       (ins i256mem:$src),
4466                       "vmovntdqa\t{$src, $dst|$dst, $src}",
4467                       [], SSEPackedInt>, Sched<[SchedWriteVecMoveLS.YMM.RM]>,
4468                       EVEX, T8PD, EVEX_V256, EVEX_CD8<64, CD8VF>;
4469
4470  def VMOVNTDQAZ128rm : AVX512PI<0x2A, MRMSrcMem, (outs VR128X:$dst),
4471                      (ins i128mem:$src),
4472                      "vmovntdqa\t{$src, $dst|$dst, $src}",
4473                      [], SSEPackedInt>, Sched<[SchedWriteVecMoveLS.XMM.RM]>,
4474                      EVEX, T8PD, EVEX_V128, EVEX_CD8<64, CD8VF>;
4475}
4476
4477multiclass avx512_movnt<bits<8> opc, string OpcodeStr, X86VectorVTInfo _,
4478                        X86SchedWriteMoveLS Sched,
4479                        PatFrag st_frag = alignednontemporalstore> {
4480  let SchedRW = [Sched.MR], AddedComplexity = 400 in
4481  def mr : AVX512PI<opc, MRMDestMem, (outs), (ins _.MemOp:$dst, _.RC:$src),
4482                    !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
4483                    [(st_frag (_.VT _.RC:$src), addr:$dst)],
4484                    _.ExeDomain>, EVEX, EVEX_CD8<_.EltSize, CD8VF>;
4485}
4486
4487multiclass avx512_movnt_vl<bits<8> opc, string OpcodeStr,
4488                           AVX512VLVectorVTInfo VTInfo,
4489                           X86SchedWriteMoveLSWidths Sched> {
4490  let Predicates = [HasAVX512] in
4491    defm Z : avx512_movnt<opc, OpcodeStr, VTInfo.info512, Sched.ZMM>, EVEX_V512;
4492
4493  let Predicates = [HasAVX512, HasVLX] in {
4494    defm Z256 : avx512_movnt<opc, OpcodeStr, VTInfo.info256, Sched.YMM>, EVEX_V256;
4495    defm Z128 : avx512_movnt<opc, OpcodeStr, VTInfo.info128, Sched.XMM>, EVEX_V128;
4496  }
4497}
4498
4499defm VMOVNTDQ : avx512_movnt_vl<0xE7, "vmovntdq", avx512vl_i64_info,
4500                                SchedWriteVecMoveLSNT>, PD;
4501defm VMOVNTPD : avx512_movnt_vl<0x2B, "vmovntpd", avx512vl_f64_info,
4502                                SchedWriteFMoveLSNT>, PD, VEX_W;
4503defm VMOVNTPS : avx512_movnt_vl<0x2B, "vmovntps", avx512vl_f32_info,
4504                                SchedWriteFMoveLSNT>, PS;
4505
4506let Predicates = [HasAVX512], AddedComplexity = 400 in {
4507  def : Pat<(alignednontemporalstore (v16i32 VR512:$src), addr:$dst),
4508            (VMOVNTDQZmr addr:$dst, VR512:$src)>;
4509  def : Pat<(alignednontemporalstore (v32i16 VR512:$src), addr:$dst),
4510            (VMOVNTDQZmr addr:$dst, VR512:$src)>;
4511  def : Pat<(alignednontemporalstore (v64i8 VR512:$src), addr:$dst),
4512            (VMOVNTDQZmr addr:$dst, VR512:$src)>;
4513
4514  def : Pat<(v8f64 (alignednontemporalload addr:$src)),
4515            (VMOVNTDQAZrm addr:$src)>;
4516  def : Pat<(v16f32 (alignednontemporalload addr:$src)),
4517            (VMOVNTDQAZrm addr:$src)>;
4518  def : Pat<(v8i64 (alignednontemporalload addr:$src)),
4519            (VMOVNTDQAZrm addr:$src)>;
4520  def : Pat<(v16i32 (alignednontemporalload addr:$src)),
4521            (VMOVNTDQAZrm addr:$src)>;
4522  def : Pat<(v32i16 (alignednontemporalload addr:$src)),
4523            (VMOVNTDQAZrm addr:$src)>;
4524  def : Pat<(v64i8 (alignednontemporalload addr:$src)),
4525            (VMOVNTDQAZrm addr:$src)>;
4526}
4527
4528let Predicates = [HasVLX], AddedComplexity = 400 in {
4529  def : Pat<(alignednontemporalstore (v8i32 VR256X:$src), addr:$dst),
4530            (VMOVNTDQZ256mr addr:$dst, VR256X:$src)>;
4531  def : Pat<(alignednontemporalstore (v16i16 VR256X:$src), addr:$dst),
4532            (VMOVNTDQZ256mr addr:$dst, VR256X:$src)>;
4533  def : Pat<(alignednontemporalstore (v32i8 VR256X:$src), addr:$dst),
4534            (VMOVNTDQZ256mr addr:$dst, VR256X:$src)>;
4535
4536  def : Pat<(v4f64 (alignednontemporalload addr:$src)),
4537            (VMOVNTDQAZ256rm addr:$src)>;
4538  def : Pat<(v8f32 (alignednontemporalload addr:$src)),
4539            (VMOVNTDQAZ256rm addr:$src)>;
4540  def : Pat<(v4i64 (alignednontemporalload addr:$src)),
4541            (VMOVNTDQAZ256rm addr:$src)>;
4542  def : Pat<(v8i32 (alignednontemporalload addr:$src)),
4543            (VMOVNTDQAZ256rm addr:$src)>;
4544  def : Pat<(v16i16 (alignednontemporalload addr:$src)),
4545            (VMOVNTDQAZ256rm addr:$src)>;
4546  def : Pat<(v32i8 (alignednontemporalload addr:$src)),
4547            (VMOVNTDQAZ256rm addr:$src)>;
4548
4549  def : Pat<(alignednontemporalstore (v4i32 VR128X:$src), addr:$dst),
4550            (VMOVNTDQZ128mr addr:$dst, VR128X:$src)>;
4551  def : Pat<(alignednontemporalstore (v8i16 VR128X:$src), addr:$dst),
4552            (VMOVNTDQZ128mr addr:$dst, VR128X:$src)>;
4553  def : Pat<(alignednontemporalstore (v16i8 VR128X:$src), addr:$dst),
4554            (VMOVNTDQZ128mr addr:$dst, VR128X:$src)>;
4555
4556  def : Pat<(v2f64 (alignednontemporalload addr:$src)),
4557            (VMOVNTDQAZ128rm addr:$src)>;
4558  def : Pat<(v4f32 (alignednontemporalload addr:$src)),
4559            (VMOVNTDQAZ128rm addr:$src)>;
4560  def : Pat<(v2i64 (alignednontemporalload addr:$src)),
4561            (VMOVNTDQAZ128rm addr:$src)>;
4562  def : Pat<(v4i32 (alignednontemporalload addr:$src)),
4563            (VMOVNTDQAZ128rm addr:$src)>;
4564  def : Pat<(v8i16 (alignednontemporalload addr:$src)),
4565            (VMOVNTDQAZ128rm addr:$src)>;
4566  def : Pat<(v16i8 (alignednontemporalload addr:$src)),
4567            (VMOVNTDQAZ128rm addr:$src)>;
4568}
4569
4570//===----------------------------------------------------------------------===//
4571// AVX-512 - Integer arithmetic
4572//
4573multiclass avx512_binop_rm<bits<8> opc, string OpcodeStr, SDNode OpNode,
4574                           X86VectorVTInfo _, X86FoldableSchedWrite sched,
4575                           bit IsCommutable = 0> {
4576  defm rr : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
4577                    (ins _.RC:$src1, _.RC:$src2), OpcodeStr,
4578                    "$src2, $src1", "$src1, $src2",
4579                    (_.VT (OpNode _.RC:$src1, _.RC:$src2)),
4580                    IsCommutable, IsCommutable>, AVX512BIBase, EVEX_4V,
4581                    Sched<[sched]>;
4582
4583  defm rm : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
4584                  (ins _.RC:$src1, _.MemOp:$src2), OpcodeStr,
4585                  "$src2, $src1", "$src1, $src2",
4586                  (_.VT (OpNode _.RC:$src1, (_.LdFrag addr:$src2)))>,
4587                  AVX512BIBase, EVEX_4V,
4588                  Sched<[sched.Folded, sched.ReadAfterFold]>;
4589}
4590
4591multiclass avx512_binop_rmb<bits<8> opc, string OpcodeStr, SDNode OpNode,
4592                            X86VectorVTInfo _, X86FoldableSchedWrite sched,
4593                            bit IsCommutable = 0> :
4594           avx512_binop_rm<opc, OpcodeStr, OpNode, _, sched, IsCommutable> {
4595  defm rmb : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
4596                  (ins _.RC:$src1, _.ScalarMemOp:$src2), OpcodeStr,
4597                  "${src2}"#_.BroadcastStr#", $src1",
4598                  "$src1, ${src2}"#_.BroadcastStr,
4599                  (_.VT (OpNode _.RC:$src1,
4600                                (_.BroadcastLdFrag addr:$src2)))>,
4601                  AVX512BIBase, EVEX_4V, EVEX_B,
4602                  Sched<[sched.Folded, sched.ReadAfterFold]>;
4603}
4604
4605multiclass avx512_binop_rm_vl<bits<8> opc, string OpcodeStr, SDNode OpNode,
4606                              AVX512VLVectorVTInfo VTInfo,
4607                              X86SchedWriteWidths sched, Predicate prd,
4608                              bit IsCommutable = 0> {
4609  let Predicates = [prd] in
4610    defm Z : avx512_binop_rm<opc, OpcodeStr, OpNode, VTInfo.info512, sched.ZMM,
4611                             IsCommutable>, EVEX_V512;
4612
4613  let Predicates = [prd, HasVLX] in {
4614    defm Z256 : avx512_binop_rm<opc, OpcodeStr, OpNode, VTInfo.info256,
4615                                sched.YMM, IsCommutable>, EVEX_V256;
4616    defm Z128 : avx512_binop_rm<opc, OpcodeStr, OpNode, VTInfo.info128,
4617                                sched.XMM, IsCommutable>, EVEX_V128;
4618  }
4619}
4620
4621multiclass avx512_binop_rmb_vl<bits<8> opc, string OpcodeStr, SDNode OpNode,
4622                               AVX512VLVectorVTInfo VTInfo,
4623                               X86SchedWriteWidths sched, Predicate prd,
4624                               bit IsCommutable = 0> {
4625  let Predicates = [prd] in
4626    defm Z : avx512_binop_rmb<opc, OpcodeStr, OpNode, VTInfo.info512, sched.ZMM,
4627                             IsCommutable>, EVEX_V512;
4628
4629  let Predicates = [prd, HasVLX] in {
4630    defm Z256 : avx512_binop_rmb<opc, OpcodeStr, OpNode, VTInfo.info256,
4631                                 sched.YMM, IsCommutable>, EVEX_V256;
4632    defm Z128 : avx512_binop_rmb<opc, OpcodeStr, OpNode, VTInfo.info128,
4633                                 sched.XMM, IsCommutable>, EVEX_V128;
4634  }
4635}
4636
4637multiclass avx512_binop_rm_vl_q<bits<8> opc, string OpcodeStr, SDNode OpNode,
4638                                X86SchedWriteWidths sched, Predicate prd,
4639                                bit IsCommutable = 0> {
4640  defm NAME : avx512_binop_rmb_vl<opc, OpcodeStr, OpNode, avx512vl_i64_info,
4641                                  sched, prd, IsCommutable>,
4642                                  VEX_W, EVEX_CD8<64, CD8VF>;
4643}
4644
4645multiclass avx512_binop_rm_vl_d<bits<8> opc, string OpcodeStr, SDNode OpNode,
4646                                X86SchedWriteWidths sched, Predicate prd,
4647                                bit IsCommutable = 0> {
4648  defm NAME : avx512_binop_rmb_vl<opc, OpcodeStr, OpNode, avx512vl_i32_info,
4649                                  sched, prd, IsCommutable>, EVEX_CD8<32, CD8VF>;
4650}
4651
4652multiclass avx512_binop_rm_vl_w<bits<8> opc, string OpcodeStr, SDNode OpNode,
4653                                X86SchedWriteWidths sched, Predicate prd,
4654                                bit IsCommutable = 0> {
4655  defm NAME : avx512_binop_rm_vl<opc, OpcodeStr, OpNode, avx512vl_i16_info,
4656                                 sched, prd, IsCommutable>, EVEX_CD8<16, CD8VF>,
4657                                 VEX_WIG;
4658}
4659
4660multiclass avx512_binop_rm_vl_b<bits<8> opc, string OpcodeStr, SDNode OpNode,
4661                                X86SchedWriteWidths sched, Predicate prd,
4662                                bit IsCommutable = 0> {
4663  defm NAME : avx512_binop_rm_vl<opc, OpcodeStr, OpNode, avx512vl_i8_info,
4664                                 sched, prd, IsCommutable>, EVEX_CD8<8, CD8VF>,
4665                                 VEX_WIG;
4666}
4667
4668multiclass avx512_binop_rm_vl_dq<bits<8> opc_d, bits<8> opc_q, string OpcodeStr,
4669                                 SDNode OpNode, X86SchedWriteWidths sched,
4670                                 Predicate prd, bit IsCommutable = 0> {
4671  defm Q : avx512_binop_rm_vl_q<opc_q, OpcodeStr#"q", OpNode, sched, prd,
4672                                   IsCommutable>;
4673
4674  defm D : avx512_binop_rm_vl_d<opc_d, OpcodeStr#"d", OpNode, sched, prd,
4675                                   IsCommutable>;
4676}
4677
4678multiclass avx512_binop_rm_vl_bw<bits<8> opc_b, bits<8> opc_w, string OpcodeStr,
4679                                 SDNode OpNode, X86SchedWriteWidths sched,
4680                                 Predicate prd, bit IsCommutable = 0> {
4681  defm W : avx512_binop_rm_vl_w<opc_w, OpcodeStr#"w", OpNode, sched, prd,
4682                                   IsCommutable>;
4683
4684  defm B : avx512_binop_rm_vl_b<opc_b, OpcodeStr#"b", OpNode, sched, prd,
4685                                   IsCommutable>;
4686}
4687
4688multiclass avx512_binop_rm_vl_all<bits<8> opc_b, bits<8> opc_w,
4689                                  bits<8> opc_d, bits<8> opc_q,
4690                                  string OpcodeStr, SDNode OpNode,
4691                                  X86SchedWriteWidths sched,
4692                                  bit IsCommutable = 0> {
4693  defm NAME : avx512_binop_rm_vl_dq<opc_d, opc_q, OpcodeStr, OpNode,
4694                                    sched, HasAVX512, IsCommutable>,
4695              avx512_binop_rm_vl_bw<opc_b, opc_w, OpcodeStr, OpNode,
4696                                    sched, HasBWI, IsCommutable>;
4697}
4698
4699multiclass avx512_binop_rm2<bits<8> opc, string OpcodeStr,
4700                            X86FoldableSchedWrite sched,
4701                            SDNode OpNode,X86VectorVTInfo _Src,
4702                            X86VectorVTInfo _Dst, X86VectorVTInfo _Brdct,
4703                            bit IsCommutable = 0> {
4704  defm rr : AVX512_maskable<opc, MRMSrcReg, _Dst, (outs _Dst.RC:$dst),
4705                            (ins _Src.RC:$src1, _Src.RC:$src2), OpcodeStr,
4706                            "$src2, $src1","$src1, $src2",
4707                            (_Dst.VT (OpNode
4708                                         (_Src.VT _Src.RC:$src1),
4709                                         (_Src.VT _Src.RC:$src2))),
4710                            IsCommutable>,
4711                            AVX512BIBase, EVEX_4V, Sched<[sched]>;
4712  defm rm : AVX512_maskable<opc, MRMSrcMem, _Dst, (outs _Dst.RC:$dst),
4713                        (ins _Src.RC:$src1, _Src.MemOp:$src2), OpcodeStr,
4714                        "$src2, $src1", "$src1, $src2",
4715                        (_Dst.VT (OpNode (_Src.VT _Src.RC:$src1),
4716                                      (_Src.LdFrag addr:$src2)))>,
4717                        AVX512BIBase, EVEX_4V,
4718                        Sched<[sched.Folded, sched.ReadAfterFold]>;
4719
4720  defm rmb : AVX512_maskable<opc, MRMSrcMem, _Dst, (outs _Dst.RC:$dst),
4721                    (ins _Src.RC:$src1, _Brdct.ScalarMemOp:$src2),
4722                    OpcodeStr,
4723                    "${src2}"#_Brdct.BroadcastStr#", $src1",
4724                     "$src1, ${src2}"#_Brdct.BroadcastStr,
4725                    (_Dst.VT (OpNode (_Src.VT _Src.RC:$src1), (bitconvert
4726                                 (_Brdct.VT (_Brdct.BroadcastLdFrag addr:$src2)))))>,
4727                    AVX512BIBase, EVEX_4V, EVEX_B,
4728                    Sched<[sched.Folded, sched.ReadAfterFold]>;
4729}
4730
4731defm VPADD : avx512_binop_rm_vl_all<0xFC, 0xFD, 0xFE, 0xD4, "vpadd", add,
4732                                    SchedWriteVecALU, 1>;
4733defm VPSUB : avx512_binop_rm_vl_all<0xF8, 0xF9, 0xFA, 0xFB, "vpsub", sub,
4734                                    SchedWriteVecALU, 0>;
4735defm VPADDS : avx512_binop_rm_vl_bw<0xEC, 0xED, "vpadds", saddsat,
4736                                    SchedWriteVecALU, HasBWI, 1>;
4737defm VPSUBS : avx512_binop_rm_vl_bw<0xE8, 0xE9, "vpsubs", ssubsat,
4738                                    SchedWriteVecALU, HasBWI, 0>;
4739defm VPADDUS : avx512_binop_rm_vl_bw<0xDC, 0xDD, "vpaddus", uaddsat,
4740                                     SchedWriteVecALU, HasBWI, 1>;
4741defm VPSUBUS : avx512_binop_rm_vl_bw<0xD8, 0xD9, "vpsubus", usubsat,
4742                                     SchedWriteVecALU, HasBWI, 0>;
4743defm VPMULLD : avx512_binop_rm_vl_d<0x40, "vpmulld", mul,
4744                                    SchedWritePMULLD, HasAVX512, 1>, T8PD;
4745defm VPMULLW : avx512_binop_rm_vl_w<0xD5, "vpmullw", mul,
4746                                    SchedWriteVecIMul, HasBWI, 1>;
4747defm VPMULLQ : avx512_binop_rm_vl_q<0x40, "vpmullq", mul,
4748                                    SchedWriteVecIMul, HasDQI, 1>, T8PD,
4749                                    NotEVEX2VEXConvertible;
4750defm VPMULHW : avx512_binop_rm_vl_w<0xE5, "vpmulhw", mulhs, SchedWriteVecIMul,
4751                                    HasBWI, 1>;
4752defm VPMULHUW : avx512_binop_rm_vl_w<0xE4, "vpmulhuw", mulhu, SchedWriteVecIMul,
4753                                     HasBWI, 1>;
4754defm VPMULHRSW : avx512_binop_rm_vl_w<0x0B, "vpmulhrsw", X86mulhrs,
4755                                      SchedWriteVecIMul, HasBWI, 1>, T8PD;
4756defm VPAVG : avx512_binop_rm_vl_bw<0xE0, 0xE3, "vpavg", X86avg,
4757                                   SchedWriteVecALU, HasBWI, 1>;
4758defm VPMULDQ : avx512_binop_rm_vl_q<0x28, "vpmuldq", X86pmuldq,
4759                                    SchedWriteVecIMul, HasAVX512, 1>, T8PD;
4760defm VPMULUDQ : avx512_binop_rm_vl_q<0xF4, "vpmuludq", X86pmuludq,
4761                                     SchedWriteVecIMul, HasAVX512, 1>;
4762
4763multiclass avx512_binop_all<bits<8> opc, string OpcodeStr,
4764                            X86SchedWriteWidths sched,
4765                            AVX512VLVectorVTInfo _SrcVTInfo,
4766                            AVX512VLVectorVTInfo _DstVTInfo,
4767                            SDNode OpNode, Predicate prd,  bit IsCommutable = 0> {
4768  let Predicates = [prd] in
4769    defm NAME#Z : avx512_binop_rm2<opc, OpcodeStr, sched.ZMM, OpNode,
4770                                 _SrcVTInfo.info512, _DstVTInfo.info512,
4771                                 v8i64_info, IsCommutable>,
4772                                  EVEX_V512, EVEX_CD8<64, CD8VF>, VEX_W;
4773  let Predicates = [HasVLX, prd] in {
4774    defm NAME#Z256 : avx512_binop_rm2<opc, OpcodeStr, sched.YMM, OpNode,
4775                                      _SrcVTInfo.info256, _DstVTInfo.info256,
4776                                      v4i64x_info, IsCommutable>,
4777                                      EVEX_V256, EVEX_CD8<64, CD8VF>, VEX_W;
4778    defm NAME#Z128 : avx512_binop_rm2<opc, OpcodeStr, sched.XMM, OpNode,
4779                                      _SrcVTInfo.info128, _DstVTInfo.info128,
4780                                      v2i64x_info, IsCommutable>,
4781                                     EVEX_V128, EVEX_CD8<64, CD8VF>, VEX_W;
4782  }
4783}
4784
4785defm VPMULTISHIFTQB : avx512_binop_all<0x83, "vpmultishiftqb", SchedWriteVecALU,
4786                                avx512vl_i8_info, avx512vl_i8_info,
4787                                X86multishift, HasVBMI, 0>, T8PD;
4788
4789multiclass avx512_packs_rmb<bits<8> opc, string OpcodeStr, SDNode OpNode,
4790                            X86VectorVTInfo _Src, X86VectorVTInfo _Dst,
4791                            X86FoldableSchedWrite sched> {
4792  defm rmb : AVX512_maskable<opc, MRMSrcMem, _Dst, (outs _Dst.RC:$dst),
4793                    (ins _Src.RC:$src1, _Src.ScalarMemOp:$src2),
4794                    OpcodeStr,
4795                    "${src2}"#_Src.BroadcastStr#", $src1",
4796                     "$src1, ${src2}"#_Src.BroadcastStr,
4797                    (_Dst.VT (OpNode (_Src.VT _Src.RC:$src1), (bitconvert
4798                                 (_Src.VT (_Src.BroadcastLdFrag addr:$src2)))))>,
4799                    EVEX_4V, EVEX_B, EVEX_CD8<_Src.EltSize, CD8VF>,
4800                    Sched<[sched.Folded, sched.ReadAfterFold]>;
4801}
4802
4803multiclass avx512_packs_rm<bits<8> opc, string OpcodeStr,
4804                            SDNode OpNode,X86VectorVTInfo _Src,
4805                            X86VectorVTInfo _Dst, X86FoldableSchedWrite sched,
4806                            bit IsCommutable = 0> {
4807  defm rr : AVX512_maskable<opc, MRMSrcReg, _Dst, (outs _Dst.RC:$dst),
4808                            (ins _Src.RC:$src1, _Src.RC:$src2), OpcodeStr,
4809                            "$src2, $src1","$src1, $src2",
4810                            (_Dst.VT (OpNode
4811                                         (_Src.VT _Src.RC:$src1),
4812                                         (_Src.VT _Src.RC:$src2))),
4813                            IsCommutable, IsCommutable>,
4814                            EVEX_CD8<_Src.EltSize, CD8VF>, EVEX_4V, Sched<[sched]>;
4815  defm rm : AVX512_maskable<opc, MRMSrcMem, _Dst, (outs _Dst.RC:$dst),
4816                        (ins _Src.RC:$src1, _Src.MemOp:$src2), OpcodeStr,
4817                        "$src2, $src1", "$src1, $src2",
4818                        (_Dst.VT (OpNode (_Src.VT _Src.RC:$src1),
4819                                      (_Src.LdFrag addr:$src2)))>,
4820                         EVEX_4V, EVEX_CD8<_Src.EltSize, CD8VF>,
4821                         Sched<[sched.Folded, sched.ReadAfterFold]>;
4822}
4823
4824multiclass avx512_packs_all_i32_i16<bits<8> opc, string OpcodeStr,
4825                                    SDNode OpNode> {
4826  let Predicates = [HasBWI] in
4827  defm NAME#Z : avx512_packs_rm<opc, OpcodeStr, OpNode, v16i32_info,
4828                                 v32i16_info, SchedWriteShuffle.ZMM>,
4829                avx512_packs_rmb<opc, OpcodeStr, OpNode, v16i32_info,
4830                                 v32i16_info, SchedWriteShuffle.ZMM>, EVEX_V512;
4831  let Predicates = [HasBWI, HasVLX] in {
4832    defm NAME#Z256 : avx512_packs_rm<opc, OpcodeStr, OpNode, v8i32x_info,
4833                                     v16i16x_info, SchedWriteShuffle.YMM>,
4834                     avx512_packs_rmb<opc, OpcodeStr, OpNode, v8i32x_info,
4835                                      v16i16x_info, SchedWriteShuffle.YMM>,
4836                                      EVEX_V256;
4837    defm NAME#Z128 : avx512_packs_rm<opc, OpcodeStr, OpNode, v4i32x_info,
4838                                     v8i16x_info, SchedWriteShuffle.XMM>,
4839                     avx512_packs_rmb<opc, OpcodeStr, OpNode, v4i32x_info,
4840                                      v8i16x_info, SchedWriteShuffle.XMM>,
4841                                      EVEX_V128;
4842  }
4843}
4844multiclass avx512_packs_all_i16_i8<bits<8> opc, string OpcodeStr,
4845                            SDNode OpNode> {
4846  let Predicates = [HasBWI] in
4847  defm NAME#Z : avx512_packs_rm<opc, OpcodeStr, OpNode, v32i16_info, v64i8_info,
4848                                SchedWriteShuffle.ZMM>, EVEX_V512, VEX_WIG;
4849  let Predicates = [HasBWI, HasVLX] in {
4850    defm NAME#Z256 : avx512_packs_rm<opc, OpcodeStr, OpNode, v16i16x_info,
4851                                     v32i8x_info, SchedWriteShuffle.YMM>,
4852                                     EVEX_V256, VEX_WIG;
4853    defm NAME#Z128 : avx512_packs_rm<opc, OpcodeStr, OpNode, v8i16x_info,
4854                                     v16i8x_info, SchedWriteShuffle.XMM>,
4855                                     EVEX_V128, VEX_WIG;
4856  }
4857}
4858
4859multiclass avx512_vpmadd<bits<8> opc, string OpcodeStr,
4860                            SDNode OpNode, AVX512VLVectorVTInfo _Src,
4861                            AVX512VLVectorVTInfo _Dst, bit IsCommutable = 0> {
4862  let Predicates = [HasBWI] in
4863  defm NAME#Z : avx512_packs_rm<opc, OpcodeStr, OpNode, _Src.info512,
4864                                _Dst.info512, SchedWriteVecIMul.ZMM,
4865                                IsCommutable>, EVEX_V512;
4866  let Predicates = [HasBWI, HasVLX] in {
4867    defm NAME#Z256 : avx512_packs_rm<opc, OpcodeStr, OpNode, _Src.info256,
4868                                     _Dst.info256, SchedWriteVecIMul.YMM,
4869                                     IsCommutable>, EVEX_V256;
4870    defm NAME#Z128 : avx512_packs_rm<opc, OpcodeStr, OpNode, _Src.info128,
4871                                     _Dst.info128, SchedWriteVecIMul.XMM,
4872                                     IsCommutable>, EVEX_V128;
4873  }
4874}
4875
4876defm VPACKSSDW : avx512_packs_all_i32_i16<0x6B, "vpackssdw", X86Packss>, AVX512BIBase;
4877defm VPACKUSDW : avx512_packs_all_i32_i16<0x2b, "vpackusdw", X86Packus>, AVX5128IBase;
4878defm VPACKSSWB : avx512_packs_all_i16_i8 <0x63, "vpacksswb", X86Packss>, AVX512BIBase;
4879defm VPACKUSWB : avx512_packs_all_i16_i8 <0x67, "vpackuswb", X86Packus>, AVX512BIBase;
4880
4881defm VPMADDUBSW : avx512_vpmadd<0x04, "vpmaddubsw", X86vpmaddubsw,
4882                     avx512vl_i8_info, avx512vl_i16_info>, AVX512BIBase, T8PD, VEX_WIG;
4883defm VPMADDWD   : avx512_vpmadd<0xF5, "vpmaddwd", X86vpmaddwd,
4884                     avx512vl_i16_info, avx512vl_i32_info, 1>, AVX512BIBase, VEX_WIG;
4885
4886defm VPMAXSB : avx512_binop_rm_vl_b<0x3C, "vpmaxsb", smax,
4887                                    SchedWriteVecALU, HasBWI, 1>, T8PD;
4888defm VPMAXSW : avx512_binop_rm_vl_w<0xEE, "vpmaxsw", smax,
4889                                    SchedWriteVecALU, HasBWI, 1>;
4890defm VPMAXSD : avx512_binop_rm_vl_d<0x3D, "vpmaxsd", smax,
4891                                    SchedWriteVecALU, HasAVX512, 1>, T8PD;
4892defm VPMAXSQ : avx512_binop_rm_vl_q<0x3D, "vpmaxsq", smax,
4893                                    SchedWriteVecALU, HasAVX512, 1>, T8PD,
4894                                    NotEVEX2VEXConvertible;
4895
4896defm VPMAXUB : avx512_binop_rm_vl_b<0xDE, "vpmaxub", umax,
4897                                    SchedWriteVecALU, HasBWI, 1>;
4898defm VPMAXUW : avx512_binop_rm_vl_w<0x3E, "vpmaxuw", umax,
4899                                    SchedWriteVecALU, HasBWI, 1>, T8PD;
4900defm VPMAXUD : avx512_binop_rm_vl_d<0x3F, "vpmaxud", umax,
4901                                    SchedWriteVecALU, HasAVX512, 1>, T8PD;
4902defm VPMAXUQ : avx512_binop_rm_vl_q<0x3F, "vpmaxuq", umax,
4903                                    SchedWriteVecALU, HasAVX512, 1>, T8PD,
4904                                    NotEVEX2VEXConvertible;
4905
4906defm VPMINSB : avx512_binop_rm_vl_b<0x38, "vpminsb", smin,
4907                                    SchedWriteVecALU, HasBWI, 1>, T8PD;
4908defm VPMINSW : avx512_binop_rm_vl_w<0xEA, "vpminsw", smin,
4909                                    SchedWriteVecALU, HasBWI, 1>;
4910defm VPMINSD : avx512_binop_rm_vl_d<0x39, "vpminsd", smin,
4911                                    SchedWriteVecALU, HasAVX512, 1>, T8PD;
4912defm VPMINSQ : avx512_binop_rm_vl_q<0x39, "vpminsq", smin,
4913                                    SchedWriteVecALU, HasAVX512, 1>, T8PD,
4914                                    NotEVEX2VEXConvertible;
4915
4916defm VPMINUB : avx512_binop_rm_vl_b<0xDA, "vpminub", umin,
4917                                    SchedWriteVecALU, HasBWI, 1>;
4918defm VPMINUW : avx512_binop_rm_vl_w<0x3A, "vpminuw", umin,
4919                                    SchedWriteVecALU, HasBWI, 1>, T8PD;
4920defm VPMINUD : avx512_binop_rm_vl_d<0x3B, "vpminud", umin,
4921                                    SchedWriteVecALU, HasAVX512, 1>, T8PD;
4922defm VPMINUQ : avx512_binop_rm_vl_q<0x3B, "vpminuq", umin,
4923                                    SchedWriteVecALU, HasAVX512, 1>, T8PD,
4924                                    NotEVEX2VEXConvertible;
4925
4926// PMULLQ: Use 512bit version to implement 128/256 bit in case NoVLX.
4927let Predicates = [HasDQI, NoVLX] in {
4928  def : Pat<(v4i64 (mul (v4i64 VR256X:$src1), (v4i64 VR256X:$src2))),
4929            (EXTRACT_SUBREG
4930                (VPMULLQZrr
4931                    (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src1, sub_ymm),
4932                    (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src2, sub_ymm)),
4933             sub_ymm)>;
4934  def : Pat<(v4i64 (mul (v4i64 VR256X:$src1), (v4i64 (X86VBroadcastld64 addr:$src2)))),
4935            (EXTRACT_SUBREG
4936                (VPMULLQZrmb
4937                    (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src1, sub_ymm),
4938                    addr:$src2),
4939             sub_ymm)>;
4940
4941  def : Pat<(v2i64 (mul (v2i64 VR128X:$src1), (v2i64 VR128X:$src2))),
4942            (EXTRACT_SUBREG
4943                (VPMULLQZrr
4944                    (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src1, sub_xmm),
4945                    (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src2, sub_xmm)),
4946             sub_xmm)>;
4947  def : Pat<(v2i64 (mul (v2i64 VR128X:$src1), (v2i64 (X86VBroadcastld64 addr:$src2)))),
4948            (EXTRACT_SUBREG
4949                (VPMULLQZrmb
4950                    (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src1, sub_xmm),
4951                    addr:$src2),
4952             sub_xmm)>;
4953}
4954
4955multiclass avx512_min_max_lowering<string Instr, SDNode OpNode> {
4956  def : Pat<(v4i64 (OpNode VR256X:$src1, VR256X:$src2)),
4957            (EXTRACT_SUBREG
4958                (!cast<Instruction>(Instr#"rr")
4959                    (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src1, sub_ymm),
4960                    (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src2, sub_ymm)),
4961             sub_ymm)>;
4962  def : Pat<(v4i64 (OpNode (v4i64 VR256X:$src1), (v4i64 (X86VBroadcastld64 addr:$src2)))),
4963            (EXTRACT_SUBREG
4964                (!cast<Instruction>(Instr#"rmb")
4965                    (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src1, sub_ymm),
4966                    addr:$src2),
4967             sub_ymm)>;
4968
4969  def : Pat<(v2i64 (OpNode VR128X:$src1, VR128X:$src2)),
4970            (EXTRACT_SUBREG
4971                (!cast<Instruction>(Instr#"rr")
4972                    (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src1, sub_xmm),
4973                    (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src2, sub_xmm)),
4974             sub_xmm)>;
4975  def : Pat<(v2i64 (OpNode (v2i64 VR128X:$src1), (v2i64 (X86VBroadcastld64 addr:$src2)))),
4976            (EXTRACT_SUBREG
4977                (!cast<Instruction>(Instr#"rmb")
4978                    (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src1, sub_xmm),
4979                    addr:$src2),
4980             sub_xmm)>;
4981}
4982
4983let Predicates = [HasAVX512, NoVLX] in {
4984  defm : avx512_min_max_lowering<"VPMAXUQZ", umax>;
4985  defm : avx512_min_max_lowering<"VPMINUQZ", umin>;
4986  defm : avx512_min_max_lowering<"VPMAXSQZ", smax>;
4987  defm : avx512_min_max_lowering<"VPMINSQZ", smin>;
4988}
4989
4990//===----------------------------------------------------------------------===//
4991// AVX-512  Logical Instructions
4992//===----------------------------------------------------------------------===//
4993
4994defm VPAND : avx512_binop_rm_vl_dq<0xDB, 0xDB, "vpand", and,
4995                                   SchedWriteVecLogic, HasAVX512, 1>;
4996defm VPOR : avx512_binop_rm_vl_dq<0xEB, 0xEB, "vpor", or,
4997                                  SchedWriteVecLogic, HasAVX512, 1>;
4998defm VPXOR : avx512_binop_rm_vl_dq<0xEF, 0xEF, "vpxor", xor,
4999                                   SchedWriteVecLogic, HasAVX512, 1>;
5000defm VPANDN : avx512_binop_rm_vl_dq<0xDF, 0xDF, "vpandn", X86andnp,
5001                                    SchedWriteVecLogic, HasAVX512>;
5002
5003let Predicates = [HasVLX] in {
5004  def : Pat<(v16i8 (and VR128X:$src1, VR128X:$src2)),
5005            (VPANDQZ128rr VR128X:$src1, VR128X:$src2)>;
5006  def : Pat<(v8i16 (and VR128X:$src1, VR128X:$src2)),
5007            (VPANDQZ128rr VR128X:$src1, VR128X:$src2)>;
5008
5009  def : Pat<(v16i8 (or VR128X:$src1, VR128X:$src2)),
5010            (VPORQZ128rr VR128X:$src1, VR128X:$src2)>;
5011  def : Pat<(v8i16 (or VR128X:$src1, VR128X:$src2)),
5012            (VPORQZ128rr VR128X:$src1, VR128X:$src2)>;
5013
5014  def : Pat<(v16i8 (xor VR128X:$src1, VR128X:$src2)),
5015            (VPXORQZ128rr VR128X:$src1, VR128X:$src2)>;
5016  def : Pat<(v8i16 (xor VR128X:$src1, VR128X:$src2)),
5017            (VPXORQZ128rr VR128X:$src1, VR128X:$src2)>;
5018
5019  def : Pat<(v16i8 (X86andnp VR128X:$src1, VR128X:$src2)),
5020            (VPANDNQZ128rr VR128X:$src1, VR128X:$src2)>;
5021  def : Pat<(v8i16 (X86andnp VR128X:$src1, VR128X:$src2)),
5022            (VPANDNQZ128rr VR128X:$src1, VR128X:$src2)>;
5023
5024  def : Pat<(and VR128X:$src1, (loadv16i8 addr:$src2)),
5025            (VPANDQZ128rm VR128X:$src1, addr:$src2)>;
5026  def : Pat<(and VR128X:$src1, (loadv8i16 addr:$src2)),
5027            (VPANDQZ128rm VR128X:$src1, addr:$src2)>;
5028
5029  def : Pat<(or VR128X:$src1, (loadv16i8 addr:$src2)),
5030            (VPORQZ128rm VR128X:$src1, addr:$src2)>;
5031  def : Pat<(or VR128X:$src1, (loadv8i16 addr:$src2)),
5032            (VPORQZ128rm VR128X:$src1, addr:$src2)>;
5033
5034  def : Pat<(xor VR128X:$src1, (loadv16i8 addr:$src2)),
5035            (VPXORQZ128rm VR128X:$src1, addr:$src2)>;
5036  def : Pat<(xor VR128X:$src1, (loadv8i16 addr:$src2)),
5037            (VPXORQZ128rm VR128X:$src1, addr:$src2)>;
5038
5039  def : Pat<(X86andnp VR128X:$src1, (loadv16i8 addr:$src2)),
5040            (VPANDNQZ128rm VR128X:$src1, addr:$src2)>;
5041  def : Pat<(X86andnp VR128X:$src1, (loadv8i16 addr:$src2)),
5042            (VPANDNQZ128rm VR128X:$src1, addr:$src2)>;
5043
5044  def : Pat<(v32i8 (and VR256X:$src1, VR256X:$src2)),
5045            (VPANDQZ256rr VR256X:$src1, VR256X:$src2)>;
5046  def : Pat<(v16i16 (and VR256X:$src1, VR256X:$src2)),
5047            (VPANDQZ256rr VR256X:$src1, VR256X:$src2)>;
5048
5049  def : Pat<(v32i8 (or VR256X:$src1, VR256X:$src2)),
5050            (VPORQZ256rr VR256X:$src1, VR256X:$src2)>;
5051  def : Pat<(v16i16 (or VR256X:$src1, VR256X:$src2)),
5052            (VPORQZ256rr VR256X:$src1, VR256X:$src2)>;
5053
5054  def : Pat<(v32i8 (xor VR256X:$src1, VR256X:$src2)),
5055            (VPXORQZ256rr VR256X:$src1, VR256X:$src2)>;
5056  def : Pat<(v16i16 (xor VR256X:$src1, VR256X:$src2)),
5057            (VPXORQZ256rr VR256X:$src1, VR256X:$src2)>;
5058
5059  def : Pat<(v32i8 (X86andnp VR256X:$src1, VR256X:$src2)),
5060            (VPANDNQZ256rr VR256X:$src1, VR256X:$src2)>;
5061  def : Pat<(v16i16 (X86andnp VR256X:$src1, VR256X:$src2)),
5062            (VPANDNQZ256rr VR256X:$src1, VR256X:$src2)>;
5063
5064  def : Pat<(and VR256X:$src1, (loadv32i8 addr:$src2)),
5065            (VPANDQZ256rm VR256X:$src1, addr:$src2)>;
5066  def : Pat<(and VR256X:$src1, (loadv16i16 addr:$src2)),
5067            (VPANDQZ256rm VR256X:$src1, addr:$src2)>;
5068
5069  def : Pat<(or VR256X:$src1, (loadv32i8 addr:$src2)),
5070            (VPORQZ256rm VR256X:$src1, addr:$src2)>;
5071  def : Pat<(or VR256X:$src1, (loadv16i16 addr:$src2)),
5072            (VPORQZ256rm VR256X:$src1, addr:$src2)>;
5073
5074  def : Pat<(xor VR256X:$src1, (loadv32i8 addr:$src2)),
5075            (VPXORQZ256rm VR256X:$src1, addr:$src2)>;
5076  def : Pat<(xor VR256X:$src1, (loadv16i16 addr:$src2)),
5077            (VPXORQZ256rm VR256X:$src1, addr:$src2)>;
5078
5079  def : Pat<(X86andnp VR256X:$src1, (loadv32i8 addr:$src2)),
5080            (VPANDNQZ256rm VR256X:$src1, addr:$src2)>;
5081  def : Pat<(X86andnp VR256X:$src1, (loadv16i16 addr:$src2)),
5082            (VPANDNQZ256rm VR256X:$src1, addr:$src2)>;
5083}
5084
5085let Predicates = [HasAVX512] in {
5086  def : Pat<(v64i8 (and VR512:$src1, VR512:$src2)),
5087            (VPANDQZrr VR512:$src1, VR512:$src2)>;
5088  def : Pat<(v32i16 (and VR512:$src1, VR512:$src2)),
5089            (VPANDQZrr VR512:$src1, VR512:$src2)>;
5090
5091  def : Pat<(v64i8 (or VR512:$src1, VR512:$src2)),
5092            (VPORQZrr VR512:$src1, VR512:$src2)>;
5093  def : Pat<(v32i16 (or VR512:$src1, VR512:$src2)),
5094            (VPORQZrr VR512:$src1, VR512:$src2)>;
5095
5096  def : Pat<(v64i8 (xor VR512:$src1, VR512:$src2)),
5097            (VPXORQZrr VR512:$src1, VR512:$src2)>;
5098  def : Pat<(v32i16 (xor VR512:$src1, VR512:$src2)),
5099            (VPXORQZrr VR512:$src1, VR512:$src2)>;
5100
5101  def : Pat<(v64i8 (X86andnp VR512:$src1, VR512:$src2)),
5102            (VPANDNQZrr VR512:$src1, VR512:$src2)>;
5103  def : Pat<(v32i16 (X86andnp VR512:$src1, VR512:$src2)),
5104            (VPANDNQZrr VR512:$src1, VR512:$src2)>;
5105
5106  def : Pat<(and VR512:$src1, (loadv64i8 addr:$src2)),
5107            (VPANDQZrm VR512:$src1, addr:$src2)>;
5108  def : Pat<(and VR512:$src1, (loadv32i16 addr:$src2)),
5109            (VPANDQZrm VR512:$src1, addr:$src2)>;
5110
5111  def : Pat<(or VR512:$src1, (loadv64i8 addr:$src2)),
5112            (VPORQZrm VR512:$src1, addr:$src2)>;
5113  def : Pat<(or VR512:$src1, (loadv32i16 addr:$src2)),
5114            (VPORQZrm VR512:$src1, addr:$src2)>;
5115
5116  def : Pat<(xor VR512:$src1, (loadv64i8 addr:$src2)),
5117            (VPXORQZrm VR512:$src1, addr:$src2)>;
5118  def : Pat<(xor VR512:$src1, (loadv32i16 addr:$src2)),
5119            (VPXORQZrm VR512:$src1, addr:$src2)>;
5120
5121  def : Pat<(X86andnp VR512:$src1, (loadv64i8 addr:$src2)),
5122            (VPANDNQZrm VR512:$src1, addr:$src2)>;
5123  def : Pat<(X86andnp VR512:$src1, (loadv32i16 addr:$src2)),
5124            (VPANDNQZrm VR512:$src1, addr:$src2)>;
5125}
5126
5127// Patterns to catch vselect with different type than logic op.
5128multiclass avx512_logical_lowering<string InstrStr, SDNode OpNode,
5129                                    X86VectorVTInfo _,
5130                                    X86VectorVTInfo IntInfo> {
5131  // Masked register-register logical operations.
5132  def : Pat<(_.VT (vselect_mask _.KRCWM:$mask,
5133                   (bitconvert (IntInfo.VT (OpNode _.RC:$src1, _.RC:$src2))),
5134                   _.RC:$src0)),
5135            (!cast<Instruction>(InstrStr#rrk) _.RC:$src0, _.KRCWM:$mask,
5136             _.RC:$src1, _.RC:$src2)>;
5137
5138  def : Pat<(_.VT (vselect_mask _.KRCWM:$mask,
5139                   (bitconvert (IntInfo.VT (OpNode _.RC:$src1, _.RC:$src2))),
5140                   _.ImmAllZerosV)),
5141            (!cast<Instruction>(InstrStr#rrkz) _.KRCWM:$mask, _.RC:$src1,
5142             _.RC:$src2)>;
5143
5144  // Masked register-memory logical operations.
5145  def : Pat<(_.VT (vselect_mask _.KRCWM:$mask,
5146                   (bitconvert (IntInfo.VT (OpNode _.RC:$src1,
5147                                            (load addr:$src2)))),
5148                   _.RC:$src0)),
5149            (!cast<Instruction>(InstrStr#rmk) _.RC:$src0, _.KRCWM:$mask,
5150             _.RC:$src1, addr:$src2)>;
5151  def : Pat<(_.VT (vselect_mask _.KRCWM:$mask,
5152                   (bitconvert (IntInfo.VT (OpNode _.RC:$src1,
5153                                            (load addr:$src2)))),
5154                   _.ImmAllZerosV)),
5155            (!cast<Instruction>(InstrStr#rmkz) _.KRCWM:$mask, _.RC:$src1,
5156             addr:$src2)>;
5157}
5158
5159multiclass avx512_logical_lowering_bcast<string InstrStr, SDNode OpNode,
5160                                         X86VectorVTInfo _,
5161                                         X86VectorVTInfo IntInfo> {
5162  // Register-broadcast logical operations.
5163  def : Pat<(_.VT (vselect_mask _.KRCWM:$mask,
5164                   (bitconvert
5165                    (IntInfo.VT (OpNode _.RC:$src1,
5166                                 (IntInfo.VT (IntInfo.BroadcastLdFrag addr:$src2))))),
5167                   _.RC:$src0)),
5168            (!cast<Instruction>(InstrStr#rmbk) _.RC:$src0, _.KRCWM:$mask,
5169             _.RC:$src1, addr:$src2)>;
5170  def : Pat<(_.VT (vselect_mask _.KRCWM:$mask,
5171                   (bitconvert
5172                    (IntInfo.VT (OpNode _.RC:$src1,
5173                                 (IntInfo.VT (IntInfo.BroadcastLdFrag addr:$src2))))),
5174                   _.ImmAllZerosV)),
5175            (!cast<Instruction>(InstrStr#rmbkz)  _.KRCWM:$mask,
5176             _.RC:$src1, addr:$src2)>;
5177}
5178
5179multiclass avx512_logical_lowering_sizes<string InstrStr, SDNode OpNode,
5180                                         AVX512VLVectorVTInfo SelectInfo,
5181                                         AVX512VLVectorVTInfo IntInfo> {
5182let Predicates = [HasVLX] in {
5183  defm : avx512_logical_lowering<InstrStr#"Z128", OpNode, SelectInfo.info128,
5184                                 IntInfo.info128>;
5185  defm : avx512_logical_lowering<InstrStr#"Z256", OpNode, SelectInfo.info256,
5186                                 IntInfo.info256>;
5187}
5188let Predicates = [HasAVX512] in {
5189  defm : avx512_logical_lowering<InstrStr#"Z", OpNode, SelectInfo.info512,
5190                                 IntInfo.info512>;
5191}
5192}
5193
5194multiclass avx512_logical_lowering_sizes_bcast<string InstrStr, SDNode OpNode,
5195                                               AVX512VLVectorVTInfo SelectInfo,
5196                                               AVX512VLVectorVTInfo IntInfo> {
5197let Predicates = [HasVLX] in {
5198  defm : avx512_logical_lowering_bcast<InstrStr#"Z128", OpNode,
5199                                       SelectInfo.info128, IntInfo.info128>;
5200  defm : avx512_logical_lowering_bcast<InstrStr#"Z256", OpNode,
5201                                       SelectInfo.info256, IntInfo.info256>;
5202}
5203let Predicates = [HasAVX512] in {
5204  defm : avx512_logical_lowering_bcast<InstrStr#"Z", OpNode,
5205                                       SelectInfo.info512, IntInfo.info512>;
5206}
5207}
5208
5209multiclass avx512_logical_lowering_types<string InstrStr, SDNode OpNode> {
5210  // i64 vselect with i32/i16/i8 logic op
5211  defm : avx512_logical_lowering_sizes<InstrStr#"Q", OpNode, avx512vl_i64_info,
5212                                       avx512vl_i32_info>;
5213  defm : avx512_logical_lowering_sizes<InstrStr#"Q", OpNode, avx512vl_i64_info,
5214                                       avx512vl_i16_info>;
5215  defm : avx512_logical_lowering_sizes<InstrStr#"Q", OpNode, avx512vl_i64_info,
5216                                       avx512vl_i8_info>;
5217
5218  // i32 vselect with i64/i16/i8 logic op
5219  defm : avx512_logical_lowering_sizes<InstrStr#"D", OpNode, avx512vl_i32_info,
5220                                       avx512vl_i64_info>;
5221  defm : avx512_logical_lowering_sizes<InstrStr#"D", OpNode, avx512vl_i32_info,
5222                                       avx512vl_i16_info>;
5223  defm : avx512_logical_lowering_sizes<InstrStr#"D", OpNode, avx512vl_i32_info,
5224                                       avx512vl_i8_info>;
5225
5226  // f32 vselect with i64/i32/i16/i8 logic op
5227  defm : avx512_logical_lowering_sizes<InstrStr#"D", OpNode, avx512vl_f32_info,
5228                                       avx512vl_i64_info>;
5229  defm : avx512_logical_lowering_sizes<InstrStr#"D", OpNode, avx512vl_f32_info,
5230                                       avx512vl_i32_info>;
5231  defm : avx512_logical_lowering_sizes<InstrStr#"D", OpNode, avx512vl_f32_info,
5232                                       avx512vl_i16_info>;
5233  defm : avx512_logical_lowering_sizes<InstrStr#"D", OpNode, avx512vl_f32_info,
5234                                       avx512vl_i8_info>;
5235
5236  // f64 vselect with i64/i32/i16/i8 logic op
5237  defm : avx512_logical_lowering_sizes<InstrStr#"Q", OpNode, avx512vl_f64_info,
5238                                       avx512vl_i64_info>;
5239  defm : avx512_logical_lowering_sizes<InstrStr#"Q", OpNode, avx512vl_f64_info,
5240                                       avx512vl_i32_info>;
5241  defm : avx512_logical_lowering_sizes<InstrStr#"Q", OpNode, avx512vl_f64_info,
5242                                       avx512vl_i16_info>;
5243  defm : avx512_logical_lowering_sizes<InstrStr#"Q", OpNode, avx512vl_f64_info,
5244                                       avx512vl_i8_info>;
5245
5246  defm : avx512_logical_lowering_sizes_bcast<InstrStr#"D", OpNode,
5247                                             avx512vl_f32_info,
5248                                             avx512vl_i32_info>;
5249  defm : avx512_logical_lowering_sizes_bcast<InstrStr#"Q", OpNode,
5250                                             avx512vl_f64_info,
5251                                             avx512vl_i64_info>;
5252}
5253
5254defm : avx512_logical_lowering_types<"VPAND", and>;
5255defm : avx512_logical_lowering_types<"VPOR",  or>;
5256defm : avx512_logical_lowering_types<"VPXOR", xor>;
5257defm : avx512_logical_lowering_types<"VPANDN", X86andnp>;
5258
5259//===----------------------------------------------------------------------===//
5260// AVX-512  FP arithmetic
5261//===----------------------------------------------------------------------===//
5262
5263multiclass avx512_fp_scalar<bits<8> opc, string OpcodeStr,X86VectorVTInfo _,
5264                            SDPatternOperator OpNode, SDNode VecNode,
5265                            X86FoldableSchedWrite sched, bit IsCommutable> {
5266  let ExeDomain = _.ExeDomain, Uses = [MXCSR], mayRaiseFPException = 1 in {
5267  defm rr_Int : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
5268                           (ins _.RC:$src1, _.RC:$src2), OpcodeStr,
5269                           "$src2, $src1", "$src1, $src2",
5270                           (_.VT (VecNode _.RC:$src1, _.RC:$src2))>,
5271                           Sched<[sched]>;
5272
5273  defm rm_Int : AVX512_maskable_scalar<opc, MRMSrcMem, _, (outs _.RC:$dst),
5274                         (ins _.RC:$src1, _.IntScalarMemOp:$src2), OpcodeStr,
5275                         "$src2, $src1", "$src1, $src2",
5276                         (_.VT (VecNode _.RC:$src1,
5277                                        (_.ScalarIntMemFrags addr:$src2)))>,
5278                         Sched<[sched.Folded, sched.ReadAfterFold]>;
5279  let isCodeGenOnly = 1, Predicates = [HasAVX512] in {
5280  def rr : I< opc, MRMSrcReg, (outs _.FRC:$dst),
5281                         (ins _.FRC:$src1, _.FRC:$src2),
5282                          OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}",
5283                          [(set _.FRC:$dst, (OpNode _.FRC:$src1, _.FRC:$src2))]>,
5284                          Sched<[sched]> {
5285    let isCommutable = IsCommutable;
5286  }
5287  def rm : I< opc, MRMSrcMem, (outs _.FRC:$dst),
5288                         (ins _.FRC:$src1, _.ScalarMemOp:$src2),
5289                         OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}",
5290                         [(set _.FRC:$dst, (OpNode _.FRC:$src1,
5291                         (_.ScalarLdFrag addr:$src2)))]>,
5292                         Sched<[sched.Folded, sched.ReadAfterFold]>;
5293  }
5294  }
5295}
5296
5297multiclass avx512_fp_scalar_round<bits<8> opc, string OpcodeStr,X86VectorVTInfo _,
5298                                  SDNode VecNode, X86FoldableSchedWrite sched,
5299                                  bit IsCommutable = 0> {
5300  let ExeDomain = _.ExeDomain, Uses = [MXCSR] in
5301  defm rrb_Int : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
5302                          (ins _.RC:$src1, _.RC:$src2, AVX512RC:$rc), OpcodeStr,
5303                          "$rc, $src2, $src1", "$src1, $src2, $rc",
5304                          (VecNode (_.VT _.RC:$src1), (_.VT _.RC:$src2),
5305                          (i32 timm:$rc))>,
5306                          EVEX_B, EVEX_RC, Sched<[sched]>;
5307}
5308multiclass avx512_fp_scalar_sae<bits<8> opc, string OpcodeStr,X86VectorVTInfo _,
5309                                SDNode OpNode, SDNode VecNode, SDNode SaeNode,
5310                                X86FoldableSchedWrite sched, bit IsCommutable,
5311                                string EVEX2VexOvrd> {
5312  let ExeDomain = _.ExeDomain in {
5313  defm rr_Int : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
5314                           (ins _.RC:$src1, _.RC:$src2), OpcodeStr,
5315                           "$src2, $src1", "$src1, $src2",
5316                           (_.VT (VecNode _.RC:$src1, _.RC:$src2))>,
5317                           Sched<[sched]>, SIMD_EXC;
5318
5319  defm rm_Int : AVX512_maskable_scalar<opc, MRMSrcMem, _, (outs _.RC:$dst),
5320                         (ins _.RC:$src1, _.IntScalarMemOp:$src2), OpcodeStr,
5321                         "$src2, $src1", "$src1, $src2",
5322                         (_.VT (VecNode _.RC:$src1,
5323                                        (_.ScalarIntMemFrags addr:$src2)))>,
5324                         Sched<[sched.Folded, sched.ReadAfterFold]>, SIMD_EXC;
5325
5326  let isCodeGenOnly = 1, Predicates = [HasAVX512],
5327      Uses = [MXCSR], mayRaiseFPException = 1 in {
5328  def rr : I< opc, MRMSrcReg, (outs _.FRC:$dst),
5329                         (ins _.FRC:$src1, _.FRC:$src2),
5330                          OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}",
5331                          [(set _.FRC:$dst, (OpNode _.FRC:$src1, _.FRC:$src2))]>,
5332                          Sched<[sched]>,
5333                          EVEX2VEXOverride<EVEX2VexOvrd#"rr"> {
5334    let isCommutable = IsCommutable;
5335  }
5336  def rm : I< opc, MRMSrcMem, (outs _.FRC:$dst),
5337                         (ins _.FRC:$src1, _.ScalarMemOp:$src2),
5338                         OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}",
5339                         [(set _.FRC:$dst, (OpNode _.FRC:$src1,
5340                         (_.ScalarLdFrag addr:$src2)))]>,
5341                         Sched<[sched.Folded, sched.ReadAfterFold]>,
5342                         EVEX2VEXOverride<EVEX2VexOvrd#"rm">;
5343  }
5344
5345  let Uses = [MXCSR] in
5346  defm rrb_Int : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
5347                            (ins _.RC:$src1, _.RC:$src2), OpcodeStr,
5348                            "{sae}, $src2, $src1", "$src1, $src2, {sae}",
5349                            (SaeNode (_.VT _.RC:$src1), (_.VT _.RC:$src2))>,
5350                            EVEX_B, Sched<[sched]>;
5351  }
5352}
5353
5354multiclass avx512_binop_s_round<bits<8> opc, string OpcodeStr, SDPatternOperator OpNode,
5355                                SDNode VecNode, SDNode RndNode,
5356                                X86SchedWriteSizes sched, bit IsCommutable> {
5357  defm SSZ : avx512_fp_scalar<opc, OpcodeStr#"ss", f32x_info, OpNode, VecNode,
5358                              sched.PS.Scl, IsCommutable>,
5359             avx512_fp_scalar_round<opc, OpcodeStr#"ss", f32x_info, RndNode,
5360                              sched.PS.Scl, IsCommutable>,
5361                              XS, EVEX_4V, VEX_LIG,  EVEX_CD8<32, CD8VT1>;
5362  defm SDZ : avx512_fp_scalar<opc, OpcodeStr#"sd", f64x_info, OpNode, VecNode,
5363                              sched.PD.Scl, IsCommutable>,
5364             avx512_fp_scalar_round<opc, OpcodeStr#"sd", f64x_info, RndNode,
5365                              sched.PD.Scl, IsCommutable>,
5366                              XD, VEX_W, EVEX_4V, VEX_LIG, EVEX_CD8<64, CD8VT1>;
5367}
5368
5369multiclass avx512_binop_s_sae<bits<8> opc, string OpcodeStr, SDNode OpNode,
5370                              SDNode VecNode, SDNode SaeNode,
5371                              X86SchedWriteSizes sched, bit IsCommutable> {
5372  defm SSZ : avx512_fp_scalar_sae<opc, OpcodeStr#"ss", f32x_info, OpNode,
5373                              VecNode, SaeNode, sched.PS.Scl, IsCommutable,
5374                              NAME#"SS">,
5375                              XS, EVEX_4V, VEX_LIG,  EVEX_CD8<32, CD8VT1>;
5376  defm SDZ : avx512_fp_scalar_sae<opc, OpcodeStr#"sd", f64x_info, OpNode,
5377                              VecNode, SaeNode, sched.PD.Scl, IsCommutable,
5378                              NAME#"SD">,
5379                              XD, VEX_W, EVEX_4V, VEX_LIG, EVEX_CD8<64, CD8VT1>;
5380}
5381defm VADD : avx512_binop_s_round<0x58, "vadd", any_fadd, X86fadds, X86faddRnds,
5382                                 SchedWriteFAddSizes, 1>;
5383defm VMUL : avx512_binop_s_round<0x59, "vmul", any_fmul, X86fmuls, X86fmulRnds,
5384                                 SchedWriteFMulSizes, 1>;
5385defm VSUB : avx512_binop_s_round<0x5C, "vsub", any_fsub, X86fsubs, X86fsubRnds,
5386                                 SchedWriteFAddSizes, 0>;
5387defm VDIV : avx512_binop_s_round<0x5E, "vdiv", any_fdiv, X86fdivs, X86fdivRnds,
5388                                 SchedWriteFDivSizes, 0>;
5389defm VMIN : avx512_binop_s_sae<0x5D, "vmin", X86fmin, X86fmins, X86fminSAEs,
5390                               SchedWriteFCmpSizes, 0>;
5391defm VMAX : avx512_binop_s_sae<0x5F, "vmax", X86fmax, X86fmaxs, X86fmaxSAEs,
5392                               SchedWriteFCmpSizes, 0>;
5393
5394// MIN/MAX nodes are commutable under "unsafe-fp-math". In this case we use
5395// X86fminc and X86fmaxc instead of X86fmin and X86fmax
5396multiclass avx512_comutable_binop_s<bits<8> opc, string OpcodeStr,
5397                                    X86VectorVTInfo _, SDNode OpNode,
5398                                    X86FoldableSchedWrite sched,
5399                                    string EVEX2VEXOvrd> {
5400  let isCodeGenOnly = 1, Predicates = [HasAVX512], ExeDomain = _.ExeDomain in {
5401  def rr : I< opc, MRMSrcReg, (outs _.FRC:$dst),
5402                         (ins _.FRC:$src1, _.FRC:$src2),
5403                          OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}",
5404                          [(set _.FRC:$dst, (OpNode _.FRC:$src1, _.FRC:$src2))]>,
5405                          Sched<[sched]>, EVEX2VEXOverride<EVEX2VEXOvrd#"rr"> {
5406    let isCommutable = 1;
5407  }
5408  def rm : I< opc, MRMSrcMem, (outs _.FRC:$dst),
5409                         (ins _.FRC:$src1, _.ScalarMemOp:$src2),
5410                         OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}",
5411                         [(set _.FRC:$dst, (OpNode _.FRC:$src1,
5412                         (_.ScalarLdFrag addr:$src2)))]>,
5413                         Sched<[sched.Folded, sched.ReadAfterFold]>,
5414                         EVEX2VEXOverride<EVEX2VEXOvrd#"rm">;
5415  }
5416}
5417defm VMINCSSZ : avx512_comutable_binop_s<0x5D, "vminss", f32x_info, X86fminc,
5418                                         SchedWriteFCmp.Scl, "VMINCSS">, XS,
5419                                         EVEX_4V, VEX_LIG, EVEX_CD8<32, CD8VT1>, SIMD_EXC;
5420
5421defm VMINCSDZ : avx512_comutable_binop_s<0x5D, "vminsd", f64x_info, X86fminc,
5422                                         SchedWriteFCmp.Scl, "VMINCSD">, XD,
5423                                         VEX_W, EVEX_4V, VEX_LIG,
5424                                         EVEX_CD8<64, CD8VT1>, SIMD_EXC;
5425
5426defm VMAXCSSZ : avx512_comutable_binop_s<0x5F, "vmaxss", f32x_info, X86fmaxc,
5427                                         SchedWriteFCmp.Scl, "VMAXCSS">, XS,
5428                                         EVEX_4V, VEX_LIG, EVEX_CD8<32, CD8VT1>, SIMD_EXC;
5429
5430defm VMAXCSDZ : avx512_comutable_binop_s<0x5F, "vmaxsd", f64x_info, X86fmaxc,
5431                                         SchedWriteFCmp.Scl, "VMAXCSD">, XD,
5432                                         VEX_W, EVEX_4V, VEX_LIG,
5433                                         EVEX_CD8<64, CD8VT1>, SIMD_EXC;
5434
5435multiclass avx512_fp_packed<bits<8> opc, string OpcodeStr, SDPatternOperator OpNode,
5436                            SDPatternOperator MaskOpNode,
5437                            X86VectorVTInfo _, X86FoldableSchedWrite sched,
5438                            bit IsCommutable,
5439                            bit IsKCommutable = IsCommutable> {
5440  let ExeDomain = _.ExeDomain, hasSideEffects = 0,
5441      Uses = [MXCSR], mayRaiseFPException = 1 in {
5442  defm rr: AVX512_maskable_split<opc, MRMSrcReg, _, (outs _.RC:$dst),
5443                  (ins _.RC:$src1, _.RC:$src2), OpcodeStr#_.Suffix,
5444                  "$src2, $src1", "$src1, $src2",
5445                  (_.VT (OpNode _.RC:$src1, _.RC:$src2)),
5446                  (_.VT (MaskOpNode _.RC:$src1, _.RC:$src2)), IsCommutable,
5447                  IsKCommutable, IsKCommutable>,
5448                  EVEX_4V, Sched<[sched]>;
5449  let mayLoad = 1 in {
5450    defm rm: AVX512_maskable_split<opc, MRMSrcMem, _, (outs _.RC:$dst),
5451                    (ins _.RC:$src1, _.MemOp:$src2), OpcodeStr#_.Suffix,
5452                    "$src2, $src1", "$src1, $src2",
5453                    (OpNode _.RC:$src1, (_.LdFrag addr:$src2)),
5454                    (MaskOpNode _.RC:$src1, (_.LdFrag addr:$src2))>,
5455                    EVEX_4V, Sched<[sched.Folded, sched.ReadAfterFold]>;
5456    defm rmb: AVX512_maskable_split<opc, MRMSrcMem, _, (outs _.RC:$dst),
5457                     (ins _.RC:$src1, _.ScalarMemOp:$src2), OpcodeStr#_.Suffix,
5458                     "${src2}"#_.BroadcastStr#", $src1",
5459                     "$src1, ${src2}"#_.BroadcastStr,
5460                     (OpNode  _.RC:$src1, (_.VT (_.BroadcastLdFrag addr:$src2))),
5461                     (MaskOpNode  _.RC:$src1, (_.VT (_.BroadcastLdFrag addr:$src2)))>,
5462                     EVEX_4V, EVEX_B,
5463                     Sched<[sched.Folded, sched.ReadAfterFold]>;
5464    }
5465  }
5466}
5467
5468multiclass avx512_fp_round_packed<bits<8> opc, string OpcodeStr,
5469                                  SDPatternOperator OpNodeRnd,
5470                                  X86FoldableSchedWrite sched, X86VectorVTInfo _> {
5471  let ExeDomain = _.ExeDomain, Uses = [MXCSR] in
5472  defm rrb: AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
5473                  (ins _.RC:$src1, _.RC:$src2, AVX512RC:$rc), OpcodeStr#_.Suffix,
5474                  "$rc, $src2, $src1", "$src1, $src2, $rc",
5475                  (_.VT (OpNodeRnd _.RC:$src1, _.RC:$src2, (i32 timm:$rc)))>,
5476                  EVEX_4V, EVEX_B, EVEX_RC, Sched<[sched]>;
5477}
5478
5479multiclass avx512_fp_sae_packed<bits<8> opc, string OpcodeStr,
5480                                SDPatternOperator OpNodeSAE,
5481                                X86FoldableSchedWrite sched, X86VectorVTInfo _> {
5482  let ExeDomain = _.ExeDomain, Uses = [MXCSR] in
5483  defm rrb: AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
5484                  (ins _.RC:$src1, _.RC:$src2), OpcodeStr#_.Suffix,
5485                  "{sae}, $src2, $src1", "$src1, $src2, {sae}",
5486                  (_.VT (OpNodeSAE _.RC:$src1, _.RC:$src2))>,
5487                  EVEX_4V, EVEX_B, Sched<[sched]>;
5488}
5489
5490multiclass avx512_fp_binop_p<bits<8> opc, string OpcodeStr, SDPatternOperator OpNode,
5491                             SDPatternOperator MaskOpNode,
5492                             Predicate prd, X86SchedWriteSizes sched,
5493                             bit IsCommutable = 0,
5494                             bit IsPD128Commutable = IsCommutable> {
5495  let Predicates = [prd] in {
5496  defm PSZ : avx512_fp_packed<opc, OpcodeStr, OpNode, MaskOpNode, v16f32_info,
5497                              sched.PS.ZMM, IsCommutable>, EVEX_V512, PS,
5498                              EVEX_CD8<32, CD8VF>;
5499  defm PDZ : avx512_fp_packed<opc, OpcodeStr, OpNode, MaskOpNode, v8f64_info,
5500                              sched.PD.ZMM, IsCommutable>, EVEX_V512, PD, VEX_W,
5501                              EVEX_CD8<64, CD8VF>;
5502  }
5503
5504    // Define only if AVX512VL feature is present.
5505  let Predicates = [prd, HasVLX] in {
5506    defm PSZ128 : avx512_fp_packed<opc, OpcodeStr, OpNode, MaskOpNode, v4f32x_info,
5507                                   sched.PS.XMM, IsCommutable>, EVEX_V128, PS,
5508                                   EVEX_CD8<32, CD8VF>;
5509    defm PSZ256 : avx512_fp_packed<opc, OpcodeStr, OpNode, MaskOpNode, v8f32x_info,
5510                                   sched.PS.YMM, IsCommutable>, EVEX_V256, PS,
5511                                   EVEX_CD8<32, CD8VF>;
5512    defm PDZ128 : avx512_fp_packed<opc, OpcodeStr, OpNode, MaskOpNode, v2f64x_info,
5513                                   sched.PD.XMM, IsPD128Commutable,
5514                                   IsCommutable>, EVEX_V128, PD, VEX_W,
5515                                   EVEX_CD8<64, CD8VF>;
5516    defm PDZ256 : avx512_fp_packed<opc, OpcodeStr, OpNode, MaskOpNode, v4f64x_info,
5517                                   sched.PD.YMM, IsCommutable>, EVEX_V256, PD, VEX_W,
5518                                   EVEX_CD8<64, CD8VF>;
5519  }
5520}
5521
5522let Uses = [MXCSR] in
5523multiclass avx512_fp_binop_p_round<bits<8> opc, string OpcodeStr, SDNode OpNodeRnd,
5524                                   X86SchedWriteSizes sched> {
5525  defm PSZ : avx512_fp_round_packed<opc, OpcodeStr, OpNodeRnd, sched.PS.ZMM,
5526                                    v16f32_info>,
5527                                    EVEX_V512, PS, EVEX_CD8<32, CD8VF>;
5528  defm PDZ : avx512_fp_round_packed<opc, OpcodeStr, OpNodeRnd, sched.PD.ZMM,
5529                                    v8f64_info>,
5530                                    EVEX_V512, PD, VEX_W,EVEX_CD8<64, CD8VF>;
5531}
5532
5533let Uses = [MXCSR] in
5534multiclass avx512_fp_binop_p_sae<bits<8> opc, string OpcodeStr, SDNode OpNodeRnd,
5535                                 X86SchedWriteSizes sched> {
5536  defm PSZ : avx512_fp_sae_packed<opc, OpcodeStr, OpNodeRnd, sched.PS.ZMM,
5537                                  v16f32_info>,
5538                                  EVEX_V512, PS, EVEX_CD8<32, CD8VF>;
5539  defm PDZ : avx512_fp_sae_packed<opc, OpcodeStr, OpNodeRnd, sched.PD.ZMM,
5540                                  v8f64_info>,
5541                                  EVEX_V512, PD, VEX_W,EVEX_CD8<64, CD8VF>;
5542}
5543
5544defm VADD : avx512_fp_binop_p<0x58, "vadd", any_fadd, fadd, HasAVX512,
5545                              SchedWriteFAddSizes, 1>,
5546            avx512_fp_binop_p_round<0x58, "vadd", X86faddRnd, SchedWriteFAddSizes>;
5547defm VMUL : avx512_fp_binop_p<0x59, "vmul", any_fmul, fmul, HasAVX512,
5548                              SchedWriteFMulSizes, 1>,
5549            avx512_fp_binop_p_round<0x59, "vmul", X86fmulRnd, SchedWriteFMulSizes>;
5550defm VSUB : avx512_fp_binop_p<0x5C, "vsub", any_fsub, fsub, HasAVX512,
5551                              SchedWriteFAddSizes>,
5552            avx512_fp_binop_p_round<0x5C, "vsub", X86fsubRnd, SchedWriteFAddSizes>;
5553defm VDIV : avx512_fp_binop_p<0x5E, "vdiv", any_fdiv, fdiv, HasAVX512,
5554                              SchedWriteFDivSizes>,
5555            avx512_fp_binop_p_round<0x5E, "vdiv", X86fdivRnd, SchedWriteFDivSizes>;
5556defm VMIN : avx512_fp_binop_p<0x5D, "vmin", X86fmin, X86fmin, HasAVX512,
5557                              SchedWriteFCmpSizes, 0>,
5558            avx512_fp_binop_p_sae<0x5D, "vmin", X86fminSAE, SchedWriteFCmpSizes>;
5559defm VMAX : avx512_fp_binop_p<0x5F, "vmax", X86fmax, X86fmax, HasAVX512,
5560                              SchedWriteFCmpSizes, 0>,
5561            avx512_fp_binop_p_sae<0x5F, "vmax", X86fmaxSAE, SchedWriteFCmpSizes>;
5562let isCodeGenOnly = 1 in {
5563  defm VMINC : avx512_fp_binop_p<0x5D, "vmin", X86fminc, X86fminc, HasAVX512,
5564                                 SchedWriteFCmpSizes, 1>;
5565  defm VMAXC : avx512_fp_binop_p<0x5F, "vmax", X86fmaxc, X86fmaxc, HasAVX512,
5566                                 SchedWriteFCmpSizes, 1>;
5567}
5568let Uses = []<Register>, mayRaiseFPException = 0 in {
5569defm VAND  : avx512_fp_binop_p<0x54, "vand", null_frag, null_frag, HasDQI,
5570                               SchedWriteFLogicSizes, 1>;
5571defm VANDN : avx512_fp_binop_p<0x55, "vandn", null_frag, null_frag, HasDQI,
5572                               SchedWriteFLogicSizes, 0>;
5573defm VOR   : avx512_fp_binop_p<0x56, "vor", null_frag, null_frag, HasDQI,
5574                               SchedWriteFLogicSizes, 1>;
5575defm VXOR  : avx512_fp_binop_p<0x57, "vxor", null_frag, null_frag, HasDQI,
5576                               SchedWriteFLogicSizes, 1>;
5577}
5578
5579multiclass avx512_fp_scalef_p<bits<8> opc, string OpcodeStr, SDNode OpNode,
5580                              X86FoldableSchedWrite sched, X86VectorVTInfo _> {
5581  let ExeDomain = _.ExeDomain, Uses = [MXCSR], mayRaiseFPException = 1 in {
5582  defm rr: AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
5583                  (ins _.RC:$src1, _.RC:$src2), OpcodeStr#_.Suffix,
5584                  "$src2, $src1", "$src1, $src2",
5585                  (_.VT (OpNode _.RC:$src1, _.RC:$src2))>,
5586                  EVEX_4V, Sched<[sched]>;
5587  defm rm: AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
5588                  (ins _.RC:$src1, _.MemOp:$src2), OpcodeStr#_.Suffix,
5589                  "$src2, $src1", "$src1, $src2",
5590                  (OpNode _.RC:$src1, (_.LdFrag addr:$src2))>,
5591                  EVEX_4V, Sched<[sched.Folded, sched.ReadAfterFold]>;
5592  defm rmb: AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
5593                   (ins _.RC:$src1, _.ScalarMemOp:$src2), OpcodeStr#_.Suffix,
5594                   "${src2}"#_.BroadcastStr#", $src1",
5595                   "$src1, ${src2}"#_.BroadcastStr,
5596                   (OpNode  _.RC:$src1, (_.VT (_.BroadcastLdFrag addr:$src2)))>,
5597                   EVEX_4V, EVEX_B, Sched<[sched.Folded, sched.ReadAfterFold]>;
5598  }
5599}
5600
5601multiclass avx512_fp_scalef_scalar<bits<8> opc, string OpcodeStr, SDNode OpNode,
5602                                   X86FoldableSchedWrite sched, X86VectorVTInfo _> {
5603  let ExeDomain = _.ExeDomain, Uses = [MXCSR], mayRaiseFPException = 1 in {
5604  defm rr: AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
5605                  (ins _.RC:$src1, _.RC:$src2), OpcodeStr#_.Suffix,
5606                  "$src2, $src1", "$src1, $src2",
5607                  (_.VT (OpNode _.RC:$src1, _.RC:$src2))>,
5608                  Sched<[sched]>;
5609  defm rm: AVX512_maskable_scalar<opc, MRMSrcMem, _, (outs _.RC:$dst),
5610                  (ins _.RC:$src1, _.IntScalarMemOp:$src2), OpcodeStr#_.Suffix,
5611                  "$src2, $src1", "$src1, $src2",
5612                  (OpNode _.RC:$src1, (_.ScalarIntMemFrags addr:$src2))>,
5613                  Sched<[sched.Folded, sched.ReadAfterFold]>;
5614  }
5615}
5616
5617multiclass avx512_fp_scalef_all<bits<8> opc, bits<8> opcScaler, string OpcodeStr,
5618                                X86SchedWriteWidths sched> {
5619  defm PSZ : avx512_fp_scalef_p<opc, OpcodeStr, X86scalef, sched.ZMM, v16f32_info>,
5620             avx512_fp_round_packed<opc, OpcodeStr, X86scalefRnd, sched.ZMM, v16f32_info>,
5621                              EVEX_V512, EVEX_CD8<32, CD8VF>;
5622  defm PDZ : avx512_fp_scalef_p<opc, OpcodeStr, X86scalef, sched.ZMM, v8f64_info>,
5623             avx512_fp_round_packed<opc, OpcodeStr, X86scalefRnd, sched.ZMM, v8f64_info>,
5624                              EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>;
5625  defm SSZ : avx512_fp_scalef_scalar<opcScaler, OpcodeStr, X86scalefs, sched.Scl, f32x_info>,
5626             avx512_fp_scalar_round<opcScaler, OpcodeStr#"ss", f32x_info,
5627                                    X86scalefsRnd, sched.Scl>,
5628                                    EVEX_4V, VEX_LIG, EVEX_CD8<32, CD8VT1>;
5629  defm SDZ : avx512_fp_scalef_scalar<opcScaler, OpcodeStr, X86scalefs, sched.Scl, f64x_info>,
5630             avx512_fp_scalar_round<opcScaler, OpcodeStr#"sd", f64x_info,
5631                                    X86scalefsRnd, sched.Scl>,
5632                                    EVEX_4V, VEX_LIG, EVEX_CD8<64, CD8VT1>, VEX_W;
5633
5634  // Define only if AVX512VL feature is present.
5635  let Predicates = [HasVLX] in {
5636    defm PSZ128 : avx512_fp_scalef_p<opc, OpcodeStr, X86scalef, sched.XMM, v4f32x_info>,
5637                                   EVEX_V128, EVEX_CD8<32, CD8VF>;
5638    defm PSZ256 : avx512_fp_scalef_p<opc, OpcodeStr, X86scalef, sched.YMM, v8f32x_info>,
5639                                   EVEX_V256, EVEX_CD8<32, CD8VF>;
5640    defm PDZ128 : avx512_fp_scalef_p<opc, OpcodeStr, X86scalef, sched.XMM, v2f64x_info>,
5641                                   EVEX_V128, VEX_W, EVEX_CD8<64, CD8VF>;
5642    defm PDZ256 : avx512_fp_scalef_p<opc, OpcodeStr, X86scalef, sched.YMM, v4f64x_info>,
5643                                   EVEX_V256, VEX_W, EVEX_CD8<64, CD8VF>;
5644  }
5645}
5646defm VSCALEF : avx512_fp_scalef_all<0x2C, 0x2D, "vscalef",
5647                                    SchedWriteFAdd>, T8PD, NotEVEX2VEXConvertible;
5648
5649//===----------------------------------------------------------------------===//
5650// AVX-512  VPTESTM instructions
5651//===----------------------------------------------------------------------===//
5652
5653multiclass avx512_vptest<bits<8> opc, string OpcodeStr,
5654                         X86FoldableSchedWrite sched, X86VectorVTInfo _,
5655                         string Name> {
5656  // NOTE: Patterns are omitted in favor of manual selection in X86ISelDAGToDAG.
5657  // There are just too many permutations due to commutability and bitcasts.
5658  let ExeDomain = _.ExeDomain, hasSideEffects = 0 in {
5659  defm rr : AVX512_maskable_cmp<opc, MRMSrcReg, _, (outs _.KRC:$dst),
5660                   (ins _.RC:$src1, _.RC:$src2), OpcodeStr,
5661                      "$src2, $src1", "$src1, $src2",
5662                   (null_frag), (null_frag), 1>,
5663                   EVEX_4V, Sched<[sched]>;
5664  let mayLoad = 1 in
5665  defm rm : AVX512_maskable_cmp<opc, MRMSrcMem, _, (outs _.KRC:$dst),
5666                   (ins _.RC:$src1, _.MemOp:$src2), OpcodeStr,
5667                       "$src2, $src1", "$src1, $src2",
5668                   (null_frag), (null_frag)>,
5669                   EVEX_4V, EVEX_CD8<_.EltSize, CD8VF>,
5670                   Sched<[sched.Folded, sched.ReadAfterFold]>;
5671  }
5672}
5673
5674multiclass avx512_vptest_mb<bits<8> opc, string OpcodeStr,
5675                            X86FoldableSchedWrite sched, X86VectorVTInfo _> {
5676  let ExeDomain = _.ExeDomain, mayLoad = 1, hasSideEffects = 0 in
5677  defm rmb : AVX512_maskable_cmp<opc, MRMSrcMem, _, (outs _.KRC:$dst),
5678                    (ins _.RC:$src1, _.ScalarMemOp:$src2), OpcodeStr,
5679                    "${src2}"#_.BroadcastStr#", $src1",
5680                    "$src1, ${src2}"#_.BroadcastStr,
5681                    (null_frag), (null_frag)>,
5682                    EVEX_B, EVEX_4V, EVEX_CD8<_.EltSize, CD8VF>,
5683                    Sched<[sched.Folded, sched.ReadAfterFold]>;
5684}
5685
5686multiclass avx512_vptest_dq_sizes<bits<8> opc, string OpcodeStr,
5687                                  X86SchedWriteWidths sched,
5688                                  AVX512VLVectorVTInfo _> {
5689  let Predicates  = [HasAVX512] in
5690  defm Z : avx512_vptest<opc, OpcodeStr, sched.ZMM, _.info512, NAME>,
5691           avx512_vptest_mb<opc, OpcodeStr, sched.ZMM, _.info512>, EVEX_V512;
5692
5693  let Predicates = [HasAVX512, HasVLX] in {
5694  defm Z256 : avx512_vptest<opc, OpcodeStr, sched.YMM, _.info256, NAME>,
5695              avx512_vptest_mb<opc, OpcodeStr, sched.YMM, _.info256>, EVEX_V256;
5696  defm Z128 : avx512_vptest<opc, OpcodeStr, sched.XMM, _.info128, NAME>,
5697              avx512_vptest_mb<opc, OpcodeStr, sched.XMM, _.info128>, EVEX_V128;
5698  }
5699}
5700
5701multiclass avx512_vptest_dq<bits<8> opc, string OpcodeStr,
5702                            X86SchedWriteWidths sched> {
5703  defm D : avx512_vptest_dq_sizes<opc, OpcodeStr#"d", sched,
5704                                 avx512vl_i32_info>;
5705  defm Q : avx512_vptest_dq_sizes<opc, OpcodeStr#"q", sched,
5706                                 avx512vl_i64_info>, VEX_W;
5707}
5708
5709multiclass avx512_vptest_wb<bits<8> opc, string OpcodeStr,
5710                            X86SchedWriteWidths sched> {
5711  let Predicates = [HasBWI] in {
5712  defm WZ:    avx512_vptest<opc, OpcodeStr#"w", sched.ZMM,
5713                            v32i16_info, NAME#"W">, EVEX_V512, VEX_W;
5714  defm BZ:    avx512_vptest<opc, OpcodeStr#"b", sched.ZMM,
5715                            v64i8_info, NAME#"B">, EVEX_V512;
5716  }
5717  let Predicates = [HasVLX, HasBWI] in {
5718
5719  defm WZ256: avx512_vptest<opc, OpcodeStr#"w", sched.YMM,
5720                            v16i16x_info, NAME#"W">, EVEX_V256, VEX_W;
5721  defm WZ128: avx512_vptest<opc, OpcodeStr#"w", sched.XMM,
5722                            v8i16x_info, NAME#"W">, EVEX_V128, VEX_W;
5723  defm BZ256: avx512_vptest<opc, OpcodeStr#"b", sched.YMM,
5724                            v32i8x_info, NAME#"B">, EVEX_V256;
5725  defm BZ128: avx512_vptest<opc, OpcodeStr#"b", sched.XMM,
5726                            v16i8x_info, NAME#"B">, EVEX_V128;
5727  }
5728}
5729
5730multiclass avx512_vptest_all_forms<bits<8> opc_wb, bits<8> opc_dq, string OpcodeStr,
5731                                   X86SchedWriteWidths sched> :
5732  avx512_vptest_wb<opc_wb, OpcodeStr, sched>,
5733  avx512_vptest_dq<opc_dq, OpcodeStr, sched>;
5734
5735defm VPTESTM   : avx512_vptest_all_forms<0x26, 0x27, "vptestm",
5736                                         SchedWriteVecLogic>, T8PD;
5737defm VPTESTNM  : avx512_vptest_all_forms<0x26, 0x27, "vptestnm",
5738                                         SchedWriteVecLogic>, T8XS;
5739
5740//===----------------------------------------------------------------------===//
5741// AVX-512  Shift instructions
5742//===----------------------------------------------------------------------===//
5743
5744multiclass avx512_shift_rmi<bits<8> opc, Format ImmFormR, Format ImmFormM,
5745                            string OpcodeStr, SDNode OpNode,
5746                            X86FoldableSchedWrite sched, X86VectorVTInfo _> {
5747  let ExeDomain = _.ExeDomain in {
5748  defm ri : AVX512_maskable<opc, ImmFormR, _, (outs _.RC:$dst),
5749                   (ins _.RC:$src1, u8imm:$src2), OpcodeStr,
5750                      "$src2, $src1", "$src1, $src2",
5751                   (_.VT (OpNode _.RC:$src1, (i8 timm:$src2)))>,
5752                   Sched<[sched]>;
5753  defm mi : AVX512_maskable<opc, ImmFormM, _, (outs _.RC:$dst),
5754                   (ins _.MemOp:$src1, u8imm:$src2), OpcodeStr,
5755                       "$src2, $src1", "$src1, $src2",
5756                   (_.VT (OpNode (_.VT (_.LdFrag addr:$src1)),
5757                          (i8 timm:$src2)))>,
5758                   Sched<[sched.Folded]>;
5759  }
5760}
5761
5762multiclass avx512_shift_rmbi<bits<8> opc, Format ImmFormM,
5763                             string OpcodeStr, SDNode OpNode,
5764                             X86FoldableSchedWrite sched, X86VectorVTInfo _> {
5765  let ExeDomain = _.ExeDomain in
5766  defm mbi : AVX512_maskable<opc, ImmFormM, _, (outs _.RC:$dst),
5767                   (ins _.ScalarMemOp:$src1, u8imm:$src2), OpcodeStr,
5768      "$src2, ${src1}"#_.BroadcastStr, "${src1}"#_.BroadcastStr#", $src2",
5769     (_.VT (OpNode (_.BroadcastLdFrag addr:$src1), (i8 timm:$src2)))>,
5770     EVEX_B, Sched<[sched.Folded]>;
5771}
5772
5773multiclass avx512_shift_rrm<bits<8> opc, string OpcodeStr, SDNode OpNode,
5774                            X86FoldableSchedWrite sched, ValueType SrcVT,
5775                            X86VectorVTInfo _> {
5776   // src2 is always 128-bit
5777  let ExeDomain = _.ExeDomain in {
5778  defm rr : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
5779                   (ins _.RC:$src1, VR128X:$src2), OpcodeStr,
5780                      "$src2, $src1", "$src1, $src2",
5781                   (_.VT (OpNode _.RC:$src1, (SrcVT VR128X:$src2)))>,
5782                   AVX512BIBase, EVEX_4V, Sched<[sched]>;
5783  defm rm : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
5784                   (ins _.RC:$src1, i128mem:$src2), OpcodeStr,
5785                       "$src2, $src1", "$src1, $src2",
5786                   (_.VT (OpNode _.RC:$src1, (SrcVT (load addr:$src2))))>,
5787                   AVX512BIBase,
5788                   EVEX_4V, Sched<[sched.Folded, sched.ReadAfterFold]>;
5789  }
5790}
5791
5792multiclass avx512_shift_sizes<bits<8> opc, string OpcodeStr, SDNode OpNode,
5793                              X86SchedWriteWidths sched, ValueType SrcVT,
5794                              AVX512VLVectorVTInfo VTInfo,
5795                              Predicate prd> {
5796  let Predicates = [prd] in
5797  defm Z    : avx512_shift_rrm<opc, OpcodeStr, OpNode, sched.ZMM, SrcVT,
5798                               VTInfo.info512>, EVEX_V512,
5799                               EVEX_CD8<VTInfo.info512.EltSize, CD8VQ> ;
5800  let Predicates = [prd, HasVLX] in {
5801  defm Z256 : avx512_shift_rrm<opc, OpcodeStr, OpNode, sched.YMM, SrcVT,
5802                               VTInfo.info256>, EVEX_V256,
5803                               EVEX_CD8<VTInfo.info256.EltSize, CD8VH>;
5804  defm Z128 : avx512_shift_rrm<opc, OpcodeStr, OpNode, sched.XMM, SrcVT,
5805                               VTInfo.info128>, EVEX_V128,
5806                               EVEX_CD8<VTInfo.info128.EltSize, CD8VF>;
5807  }
5808}
5809
5810multiclass avx512_shift_types<bits<8> opcd, bits<8> opcq, bits<8> opcw,
5811                              string OpcodeStr, SDNode OpNode,
5812                              X86SchedWriteWidths sched,
5813                              bit NotEVEX2VEXConvertibleQ = 0> {
5814  defm D : avx512_shift_sizes<opcd, OpcodeStr#"d", OpNode, sched, v4i32,
5815                              avx512vl_i32_info, HasAVX512>;
5816  let notEVEX2VEXConvertible = NotEVEX2VEXConvertibleQ in
5817  defm Q : avx512_shift_sizes<opcq, OpcodeStr#"q", OpNode, sched, v2i64,
5818                              avx512vl_i64_info, HasAVX512>, VEX_W;
5819  defm W : avx512_shift_sizes<opcw, OpcodeStr#"w", OpNode, sched, v8i16,
5820                              avx512vl_i16_info, HasBWI>;
5821}
5822
5823multiclass avx512_shift_rmi_sizes<bits<8> opc, Format ImmFormR, Format ImmFormM,
5824                                  string OpcodeStr, SDNode OpNode,
5825                                  X86SchedWriteWidths sched,
5826                                  AVX512VLVectorVTInfo VTInfo> {
5827  let Predicates = [HasAVX512] in
5828  defm Z:    avx512_shift_rmi<opc, ImmFormR, ImmFormM, OpcodeStr, OpNode,
5829                              sched.ZMM, VTInfo.info512>,
5830             avx512_shift_rmbi<opc, ImmFormM, OpcodeStr, OpNode, sched.ZMM,
5831                               VTInfo.info512>, EVEX_V512;
5832  let Predicates = [HasAVX512, HasVLX] in {
5833  defm Z256: avx512_shift_rmi<opc, ImmFormR, ImmFormM, OpcodeStr, OpNode,
5834                              sched.YMM, VTInfo.info256>,
5835             avx512_shift_rmbi<opc, ImmFormM, OpcodeStr, OpNode, sched.YMM,
5836                               VTInfo.info256>, EVEX_V256;
5837  defm Z128: avx512_shift_rmi<opc, ImmFormR, ImmFormM, OpcodeStr, OpNode,
5838                              sched.XMM, VTInfo.info128>,
5839             avx512_shift_rmbi<opc, ImmFormM, OpcodeStr, OpNode, sched.XMM,
5840                               VTInfo.info128>, EVEX_V128;
5841  }
5842}
5843
5844multiclass avx512_shift_rmi_w<bits<8> opcw, Format ImmFormR, Format ImmFormM,
5845                              string OpcodeStr, SDNode OpNode,
5846                              X86SchedWriteWidths sched> {
5847  let Predicates = [HasBWI] in
5848  defm WZ:    avx512_shift_rmi<opcw, ImmFormR, ImmFormM, OpcodeStr, OpNode,
5849                               sched.ZMM, v32i16_info>, EVEX_V512, VEX_WIG;
5850  let Predicates = [HasVLX, HasBWI] in {
5851  defm WZ256: avx512_shift_rmi<opcw, ImmFormR, ImmFormM, OpcodeStr, OpNode,
5852                               sched.YMM, v16i16x_info>, EVEX_V256, VEX_WIG;
5853  defm WZ128: avx512_shift_rmi<opcw, ImmFormR, ImmFormM, OpcodeStr, OpNode,
5854                               sched.XMM, v8i16x_info>, EVEX_V128, VEX_WIG;
5855  }
5856}
5857
5858multiclass avx512_shift_rmi_dq<bits<8> opcd, bits<8> opcq,
5859                               Format ImmFormR, Format ImmFormM,
5860                               string OpcodeStr, SDNode OpNode,
5861                               X86SchedWriteWidths sched,
5862                               bit NotEVEX2VEXConvertibleQ = 0> {
5863  defm D: avx512_shift_rmi_sizes<opcd, ImmFormR, ImmFormM, OpcodeStr#"d", OpNode,
5864                                 sched, avx512vl_i32_info>, EVEX_CD8<32, CD8VF>;
5865  let notEVEX2VEXConvertible = NotEVEX2VEXConvertibleQ in
5866  defm Q: avx512_shift_rmi_sizes<opcq, ImmFormR, ImmFormM, OpcodeStr#"q", OpNode,
5867                                 sched, avx512vl_i64_info>, EVEX_CD8<64, CD8VF>, VEX_W;
5868}
5869
5870defm VPSRL : avx512_shift_rmi_dq<0x72, 0x73, MRM2r, MRM2m, "vpsrl", X86vsrli,
5871                                 SchedWriteVecShiftImm>,
5872             avx512_shift_rmi_w<0x71, MRM2r, MRM2m, "vpsrlw", X86vsrli,
5873                                SchedWriteVecShiftImm>, AVX512BIi8Base, EVEX_4V;
5874
5875defm VPSLL : avx512_shift_rmi_dq<0x72, 0x73, MRM6r, MRM6m, "vpsll", X86vshli,
5876                                 SchedWriteVecShiftImm>,
5877             avx512_shift_rmi_w<0x71, MRM6r, MRM6m, "vpsllw", X86vshli,
5878                                SchedWriteVecShiftImm>, AVX512BIi8Base, EVEX_4V;
5879
5880defm VPSRA : avx512_shift_rmi_dq<0x72, 0x72, MRM4r, MRM4m, "vpsra", X86vsrai,
5881                                 SchedWriteVecShiftImm, 1>,
5882             avx512_shift_rmi_w<0x71, MRM4r, MRM4m, "vpsraw", X86vsrai,
5883                                SchedWriteVecShiftImm>, AVX512BIi8Base, EVEX_4V;
5884
5885defm VPROR : avx512_shift_rmi_dq<0x72, 0x72, MRM0r, MRM0m, "vpror", X86vrotri,
5886                                 SchedWriteVecShiftImm>, AVX512BIi8Base, EVEX_4V;
5887defm VPROL : avx512_shift_rmi_dq<0x72, 0x72, MRM1r, MRM1m, "vprol", X86vrotli,
5888                                 SchedWriteVecShiftImm>, AVX512BIi8Base, EVEX_4V;
5889
5890defm VPSLL : avx512_shift_types<0xF2, 0xF3, 0xF1, "vpsll", X86vshl,
5891                                SchedWriteVecShift>;
5892defm VPSRA : avx512_shift_types<0xE2, 0xE2, 0xE1, "vpsra", X86vsra,
5893                                SchedWriteVecShift, 1>;
5894defm VPSRL : avx512_shift_types<0xD2, 0xD3, 0xD1, "vpsrl", X86vsrl,
5895                                SchedWriteVecShift>;
5896
5897// Use 512bit VPSRA/VPSRAI version to implement v2i64/v4i64 in case NoVLX.
5898let Predicates = [HasAVX512, NoVLX] in {
5899  def : Pat<(v4i64 (X86vsra (v4i64 VR256X:$src1), (v2i64 VR128X:$src2))),
5900            (EXTRACT_SUBREG (v8i64
5901              (VPSRAQZrr
5902                (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src1, sub_ymm)),
5903                 VR128X:$src2)), sub_ymm)>;
5904
5905  def : Pat<(v2i64 (X86vsra (v2i64 VR128X:$src1), (v2i64 VR128X:$src2))),
5906            (EXTRACT_SUBREG (v8i64
5907              (VPSRAQZrr
5908                (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR128X:$src1, sub_xmm)),
5909                 VR128X:$src2)), sub_xmm)>;
5910
5911  def : Pat<(v4i64 (X86vsrai (v4i64 VR256X:$src1), (i8 timm:$src2))),
5912            (EXTRACT_SUBREG (v8i64
5913              (VPSRAQZri
5914                (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src1, sub_ymm)),
5915                 timm:$src2)), sub_ymm)>;
5916
5917  def : Pat<(v2i64 (X86vsrai (v2i64 VR128X:$src1), (i8 timm:$src2))),
5918            (EXTRACT_SUBREG (v8i64
5919              (VPSRAQZri
5920                (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR128X:$src1, sub_xmm)),
5921                 timm:$src2)), sub_xmm)>;
5922}
5923
5924//===-------------------------------------------------------------------===//
5925// Variable Bit Shifts
5926//===-------------------------------------------------------------------===//
5927
5928multiclass avx512_var_shift<bits<8> opc, string OpcodeStr, SDNode OpNode,
5929                            X86FoldableSchedWrite sched, X86VectorVTInfo _> {
5930  let ExeDomain = _.ExeDomain in {
5931  defm rr : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
5932                   (ins _.RC:$src1, _.RC:$src2), OpcodeStr,
5933                      "$src2, $src1", "$src1, $src2",
5934                   (_.VT (OpNode _.RC:$src1, (_.VT _.RC:$src2)))>,
5935                   AVX5128IBase, EVEX_4V, Sched<[sched]>;
5936  defm rm : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
5937                   (ins _.RC:$src1, _.MemOp:$src2), OpcodeStr,
5938                       "$src2, $src1", "$src1, $src2",
5939                   (_.VT (OpNode _.RC:$src1,
5940                   (_.VT (_.LdFrag addr:$src2))))>,
5941                   AVX5128IBase, EVEX_4V, EVEX_CD8<_.EltSize, CD8VF>,
5942                   Sched<[sched.Folded, sched.ReadAfterFold]>;
5943  }
5944}
5945
5946multiclass avx512_var_shift_mb<bits<8> opc, string OpcodeStr, SDNode OpNode,
5947                               X86FoldableSchedWrite sched, X86VectorVTInfo _> {
5948  let ExeDomain = _.ExeDomain in
5949  defm rmb : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
5950                    (ins _.RC:$src1, _.ScalarMemOp:$src2), OpcodeStr,
5951                    "${src2}"#_.BroadcastStr#", $src1",
5952                    "$src1, ${src2}"#_.BroadcastStr,
5953                    (_.VT (OpNode _.RC:$src1, (_.VT (_.BroadcastLdFrag addr:$src2))))>,
5954                    AVX5128IBase, EVEX_B, EVEX_4V, EVEX_CD8<_.EltSize, CD8VF>,
5955                    Sched<[sched.Folded, sched.ReadAfterFold]>;
5956}
5957
5958multiclass avx512_var_shift_sizes<bits<8> opc, string OpcodeStr, SDNode OpNode,
5959                                  X86SchedWriteWidths sched, AVX512VLVectorVTInfo _> {
5960  let Predicates  = [HasAVX512] in
5961  defm Z : avx512_var_shift<opc, OpcodeStr, OpNode, sched.ZMM, _.info512>,
5962           avx512_var_shift_mb<opc, OpcodeStr, OpNode, sched.ZMM, _.info512>, EVEX_V512;
5963
5964  let Predicates = [HasAVX512, HasVLX] in {
5965  defm Z256 : avx512_var_shift<opc, OpcodeStr, OpNode, sched.YMM, _.info256>,
5966              avx512_var_shift_mb<opc, OpcodeStr, OpNode, sched.YMM, _.info256>, EVEX_V256;
5967  defm Z128 : avx512_var_shift<opc, OpcodeStr, OpNode, sched.XMM, _.info128>,
5968              avx512_var_shift_mb<opc, OpcodeStr, OpNode, sched.XMM, _.info128>, EVEX_V128;
5969  }
5970}
5971
5972multiclass avx512_var_shift_types<bits<8> opc, string OpcodeStr,
5973                                  SDNode OpNode, X86SchedWriteWidths sched> {
5974  defm D : avx512_var_shift_sizes<opc, OpcodeStr#"d", OpNode, sched,
5975                                 avx512vl_i32_info>;
5976  defm Q : avx512_var_shift_sizes<opc, OpcodeStr#"q", OpNode, sched,
5977                                 avx512vl_i64_info>, VEX_W;
5978}
5979
5980// Use 512bit version to implement 128/256 bit in case NoVLX.
5981multiclass avx512_var_shift_lowering<AVX512VLVectorVTInfo _, string OpcodeStr,
5982                                     SDNode OpNode, list<Predicate> p> {
5983  let Predicates = p in {
5984  def : Pat<(_.info256.VT (OpNode (_.info256.VT _.info256.RC:$src1),
5985                                  (_.info256.VT _.info256.RC:$src2))),
5986            (EXTRACT_SUBREG
5987                (!cast<Instruction>(OpcodeStr#"Zrr")
5988                    (INSERT_SUBREG (_.info512.VT (IMPLICIT_DEF)), VR256X:$src1, sub_ymm),
5989                    (INSERT_SUBREG (_.info512.VT (IMPLICIT_DEF)), VR256X:$src2, sub_ymm)),
5990             sub_ymm)>;
5991
5992  def : Pat<(_.info128.VT (OpNode (_.info128.VT _.info128.RC:$src1),
5993                                  (_.info128.VT _.info128.RC:$src2))),
5994            (EXTRACT_SUBREG
5995                (!cast<Instruction>(OpcodeStr#"Zrr")
5996                    (INSERT_SUBREG (_.info512.VT (IMPLICIT_DEF)), VR128X:$src1, sub_xmm),
5997                    (INSERT_SUBREG (_.info512.VT (IMPLICIT_DEF)), VR128X:$src2, sub_xmm)),
5998             sub_xmm)>;
5999  }
6000}
6001multiclass avx512_var_shift_w<bits<8> opc, string OpcodeStr,
6002                              SDNode OpNode, X86SchedWriteWidths sched> {
6003  let Predicates = [HasBWI] in
6004  defm WZ:    avx512_var_shift<opc, OpcodeStr, OpNode, sched.ZMM, v32i16_info>,
6005              EVEX_V512, VEX_W;
6006  let Predicates = [HasVLX, HasBWI] in {
6007
6008  defm WZ256: avx512_var_shift<opc, OpcodeStr, OpNode, sched.YMM, v16i16x_info>,
6009              EVEX_V256, VEX_W;
6010  defm WZ128: avx512_var_shift<opc, OpcodeStr, OpNode, sched.XMM, v8i16x_info>,
6011              EVEX_V128, VEX_W;
6012  }
6013}
6014
6015defm VPSLLV : avx512_var_shift_types<0x47, "vpsllv", X86vshlv, SchedWriteVarVecShift>,
6016              avx512_var_shift_w<0x12, "vpsllvw", X86vshlv, SchedWriteVarVecShift>;
6017
6018defm VPSRAV : avx512_var_shift_types<0x46, "vpsrav", X86vsrav, SchedWriteVarVecShift>,
6019              avx512_var_shift_w<0x11, "vpsravw", X86vsrav, SchedWriteVarVecShift>;
6020
6021defm VPSRLV : avx512_var_shift_types<0x45, "vpsrlv", X86vsrlv, SchedWriteVarVecShift>,
6022              avx512_var_shift_w<0x10, "vpsrlvw", X86vsrlv, SchedWriteVarVecShift>;
6023
6024defm VPRORV : avx512_var_shift_types<0x14, "vprorv", rotr, SchedWriteVarVecShift>;
6025defm VPROLV : avx512_var_shift_types<0x15, "vprolv", rotl, SchedWriteVarVecShift>;
6026
6027defm : avx512_var_shift_lowering<avx512vl_i64_info, "VPSRAVQ", X86vsrav, [HasAVX512, NoVLX]>;
6028defm : avx512_var_shift_lowering<avx512vl_i16_info, "VPSLLVW", X86vshlv, [HasBWI, NoVLX]>;
6029defm : avx512_var_shift_lowering<avx512vl_i16_info, "VPSRAVW", X86vsrav, [HasBWI, NoVLX]>;
6030defm : avx512_var_shift_lowering<avx512vl_i16_info, "VPSRLVW", X86vsrlv, [HasBWI, NoVLX]>;
6031
6032
6033// Use 512bit VPROL/VPROLI version to implement v2i64/v4i64 + v4i32/v8i32 in case NoVLX.
6034let Predicates = [HasAVX512, NoVLX] in {
6035  def : Pat<(v2i64 (rotl (v2i64 VR128X:$src1), (v2i64 VR128X:$src2))),
6036            (EXTRACT_SUBREG (v8i64
6037              (VPROLVQZrr
6038                (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR128X:$src1, sub_xmm)),
6039                (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR128X:$src2, sub_xmm)))),
6040                       sub_xmm)>;
6041  def : Pat<(v4i64 (rotl (v4i64 VR256X:$src1), (v4i64 VR256X:$src2))),
6042            (EXTRACT_SUBREG (v8i64
6043              (VPROLVQZrr
6044                (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src1, sub_ymm)),
6045                (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src2, sub_ymm)))),
6046                       sub_ymm)>;
6047
6048  def : Pat<(v4i32 (rotl (v4i32 VR128X:$src1), (v4i32 VR128X:$src2))),
6049            (EXTRACT_SUBREG (v16i32
6050              (VPROLVDZrr
6051                (v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR128X:$src1, sub_xmm)),
6052                (v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR128X:$src2, sub_xmm)))),
6053                        sub_xmm)>;
6054  def : Pat<(v8i32 (rotl (v8i32 VR256X:$src1), (v8i32 VR256X:$src2))),
6055            (EXTRACT_SUBREG (v16i32
6056              (VPROLVDZrr
6057                (v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src1, sub_ymm)),
6058                (v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src2, sub_ymm)))),
6059                        sub_ymm)>;
6060
6061  def : Pat<(v2i64 (X86vrotli (v2i64 VR128X:$src1), (i8 timm:$src2))),
6062            (EXTRACT_SUBREG (v8i64
6063              (VPROLQZri
6064                (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR128X:$src1, sub_xmm)),
6065                        timm:$src2)), sub_xmm)>;
6066  def : Pat<(v4i64 (X86vrotli (v4i64 VR256X:$src1), (i8 timm:$src2))),
6067            (EXTRACT_SUBREG (v8i64
6068              (VPROLQZri
6069                (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src1, sub_ymm)),
6070                       timm:$src2)), sub_ymm)>;
6071
6072  def : Pat<(v4i32 (X86vrotli (v4i32 VR128X:$src1), (i8 timm:$src2))),
6073            (EXTRACT_SUBREG (v16i32
6074              (VPROLDZri
6075                (v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR128X:$src1, sub_xmm)),
6076                        timm:$src2)), sub_xmm)>;
6077  def : Pat<(v8i32 (X86vrotli (v8i32 VR256X:$src1), (i8 timm:$src2))),
6078            (EXTRACT_SUBREG (v16i32
6079              (VPROLDZri
6080                (v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src1, sub_ymm)),
6081                        timm:$src2)), sub_ymm)>;
6082}
6083
6084// Use 512bit VPROR/VPRORI version to implement v2i64/v4i64 + v4i32/v8i32 in case NoVLX.
6085let Predicates = [HasAVX512, NoVLX] in {
6086  def : Pat<(v2i64 (rotr (v2i64 VR128X:$src1), (v2i64 VR128X:$src2))),
6087            (EXTRACT_SUBREG (v8i64
6088              (VPRORVQZrr
6089                (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR128X:$src1, sub_xmm)),
6090                (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR128X:$src2, sub_xmm)))),
6091                       sub_xmm)>;
6092  def : Pat<(v4i64 (rotr (v4i64 VR256X:$src1), (v4i64 VR256X:$src2))),
6093            (EXTRACT_SUBREG (v8i64
6094              (VPRORVQZrr
6095                (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src1, sub_ymm)),
6096                (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src2, sub_ymm)))),
6097                       sub_ymm)>;
6098
6099  def : Pat<(v4i32 (rotr (v4i32 VR128X:$src1), (v4i32 VR128X:$src2))),
6100            (EXTRACT_SUBREG (v16i32
6101              (VPRORVDZrr
6102                (v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR128X:$src1, sub_xmm)),
6103                (v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR128X:$src2, sub_xmm)))),
6104                        sub_xmm)>;
6105  def : Pat<(v8i32 (rotr (v8i32 VR256X:$src1), (v8i32 VR256X:$src2))),
6106            (EXTRACT_SUBREG (v16i32
6107              (VPRORVDZrr
6108                (v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src1, sub_ymm)),
6109                (v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src2, sub_ymm)))),
6110                        sub_ymm)>;
6111
6112  def : Pat<(v2i64 (X86vrotri (v2i64 VR128X:$src1), (i8 timm:$src2))),
6113            (EXTRACT_SUBREG (v8i64
6114              (VPRORQZri
6115                (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR128X:$src1, sub_xmm)),
6116                        timm:$src2)), sub_xmm)>;
6117  def : Pat<(v4i64 (X86vrotri (v4i64 VR256X:$src1), (i8 timm:$src2))),
6118            (EXTRACT_SUBREG (v8i64
6119              (VPRORQZri
6120                (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src1, sub_ymm)),
6121                       timm:$src2)), sub_ymm)>;
6122
6123  def : Pat<(v4i32 (X86vrotri (v4i32 VR128X:$src1), (i8 timm:$src2))),
6124            (EXTRACT_SUBREG (v16i32
6125              (VPRORDZri
6126                (v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR128X:$src1, sub_xmm)),
6127                        timm:$src2)), sub_xmm)>;
6128  def : Pat<(v8i32 (X86vrotri (v8i32 VR256X:$src1), (i8 timm:$src2))),
6129            (EXTRACT_SUBREG (v16i32
6130              (VPRORDZri
6131                (v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src1, sub_ymm)),
6132                        timm:$src2)), sub_ymm)>;
6133}
6134
6135//===-------------------------------------------------------------------===//
6136// 1-src variable permutation VPERMW/D/Q
6137//===-------------------------------------------------------------------===//
6138
6139multiclass avx512_vperm_dq_sizes<bits<8> opc, string OpcodeStr, SDNode OpNode,
6140                                 X86FoldableSchedWrite sched, AVX512VLVectorVTInfo _> {
6141  let Predicates  = [HasAVX512] in
6142  defm Z : avx512_var_shift<opc, OpcodeStr, OpNode, sched, _.info512>,
6143           avx512_var_shift_mb<opc, OpcodeStr, OpNode, sched, _.info512>, EVEX_V512;
6144
6145  let Predicates = [HasAVX512, HasVLX] in
6146  defm Z256 : avx512_var_shift<opc, OpcodeStr, OpNode, sched, _.info256>,
6147              avx512_var_shift_mb<opc, OpcodeStr, OpNode, sched, _.info256>, EVEX_V256;
6148}
6149
6150multiclass avx512_vpermi_dq_sizes<bits<8> opc, Format ImmFormR, Format ImmFormM,
6151                                 string OpcodeStr, SDNode OpNode,
6152                                 X86FoldableSchedWrite sched, AVX512VLVectorVTInfo VTInfo> {
6153  let Predicates = [HasAVX512] in
6154  defm Z:    avx512_shift_rmi<opc, ImmFormR, ImmFormM, OpcodeStr, OpNode,
6155                              sched, VTInfo.info512>,
6156             avx512_shift_rmbi<opc, ImmFormM, OpcodeStr, OpNode,
6157                               sched, VTInfo.info512>, EVEX_V512;
6158  let Predicates = [HasAVX512, HasVLX] in
6159  defm Z256: avx512_shift_rmi<opc, ImmFormR, ImmFormM, OpcodeStr, OpNode,
6160                              sched, VTInfo.info256>,
6161             avx512_shift_rmbi<opc, ImmFormM, OpcodeStr, OpNode,
6162                               sched, VTInfo.info256>, EVEX_V256;
6163}
6164
6165multiclass avx512_vperm_bw<bits<8> opc, string OpcodeStr,
6166                              Predicate prd, SDNode OpNode,
6167                              X86FoldableSchedWrite sched, AVX512VLVectorVTInfo _> {
6168  let Predicates = [prd] in
6169  defm Z:    avx512_var_shift<opc, OpcodeStr, OpNode, sched, _.info512>,
6170              EVEX_V512 ;
6171  let Predicates = [HasVLX, prd] in {
6172  defm Z256: avx512_var_shift<opc, OpcodeStr, OpNode, sched, _.info256>,
6173              EVEX_V256 ;
6174  defm Z128: avx512_var_shift<opc, OpcodeStr, OpNode, sched, _.info128>,
6175              EVEX_V128 ;
6176  }
6177}
6178
6179defm VPERMW  : avx512_vperm_bw<0x8D, "vpermw", HasBWI, X86VPermv,
6180                               WriteVarShuffle256, avx512vl_i16_info>, VEX_W;
6181defm VPERMB  : avx512_vperm_bw<0x8D, "vpermb", HasVBMI, X86VPermv,
6182                               WriteVarShuffle256, avx512vl_i8_info>;
6183
6184defm VPERMD : avx512_vperm_dq_sizes<0x36, "vpermd", X86VPermv,
6185                                    WriteVarShuffle256, avx512vl_i32_info>;
6186defm VPERMQ : avx512_vperm_dq_sizes<0x36, "vpermq", X86VPermv,
6187                                    WriteVarShuffle256, avx512vl_i64_info>, VEX_W;
6188defm VPERMPS : avx512_vperm_dq_sizes<0x16, "vpermps", X86VPermv,
6189                                     WriteFVarShuffle256, avx512vl_f32_info>;
6190defm VPERMPD : avx512_vperm_dq_sizes<0x16, "vpermpd", X86VPermv,
6191                                     WriteFVarShuffle256, avx512vl_f64_info>, VEX_W;
6192
6193defm VPERMQ : avx512_vpermi_dq_sizes<0x00, MRMSrcReg, MRMSrcMem, "vpermq",
6194                             X86VPermi, WriteShuffle256, avx512vl_i64_info>,
6195                             EVEX, AVX512AIi8Base, EVEX_CD8<64, CD8VF>, VEX_W;
6196defm VPERMPD : avx512_vpermi_dq_sizes<0x01, MRMSrcReg, MRMSrcMem, "vpermpd",
6197                             X86VPermi, WriteFShuffle256, avx512vl_f64_info>,
6198                             EVEX, AVX512AIi8Base, EVEX_CD8<64, CD8VF>, VEX_W;
6199
6200//===----------------------------------------------------------------------===//
6201// AVX-512 - VPERMIL
6202//===----------------------------------------------------------------------===//
6203
6204multiclass avx512_permil_vec<bits<8> OpcVar, string OpcodeStr, SDNode OpNode,
6205                             X86FoldableSchedWrite sched, X86VectorVTInfo _,
6206                             X86VectorVTInfo Ctrl> {
6207  defm rr: AVX512_maskable<OpcVar, MRMSrcReg, _, (outs _.RC:$dst),
6208                  (ins _.RC:$src1, Ctrl.RC:$src2), OpcodeStr,
6209                  "$src2, $src1", "$src1, $src2",
6210                  (_.VT (OpNode _.RC:$src1,
6211                               (Ctrl.VT Ctrl.RC:$src2)))>,
6212                  T8PD, EVEX_4V, Sched<[sched]>;
6213  defm rm: AVX512_maskable<OpcVar, MRMSrcMem, _, (outs _.RC:$dst),
6214                  (ins _.RC:$src1, Ctrl.MemOp:$src2), OpcodeStr,
6215                  "$src2, $src1", "$src1, $src2",
6216                  (_.VT (OpNode
6217                           _.RC:$src1,
6218                           (Ctrl.VT (Ctrl.LdFrag addr:$src2))))>,
6219                  T8PD, EVEX_4V, EVEX_CD8<_.EltSize, CD8VF>,
6220                  Sched<[sched.Folded, sched.ReadAfterFold]>;
6221  defm rmb: AVX512_maskable<OpcVar, MRMSrcMem, _, (outs _.RC:$dst),
6222                   (ins _.RC:$src1, _.ScalarMemOp:$src2), OpcodeStr,
6223                   "${src2}"#_.BroadcastStr#", $src1",
6224                   "$src1, ${src2}"#_.BroadcastStr,
6225                   (_.VT (OpNode
6226                            _.RC:$src1,
6227                            (Ctrl.VT (Ctrl.BroadcastLdFrag addr:$src2))))>,
6228                   T8PD, EVEX_4V, EVEX_B, EVEX_CD8<_.EltSize, CD8VF>,
6229                   Sched<[sched.Folded, sched.ReadAfterFold]>;
6230}
6231
6232multiclass avx512_permil_vec_common<string OpcodeStr, bits<8> OpcVar,
6233                                    X86SchedWriteWidths sched,
6234                                    AVX512VLVectorVTInfo _,
6235                                    AVX512VLVectorVTInfo Ctrl> {
6236  let Predicates = [HasAVX512] in {
6237    defm Z    : avx512_permil_vec<OpcVar, OpcodeStr, X86VPermilpv, sched.ZMM,
6238                                  _.info512, Ctrl.info512>, EVEX_V512;
6239  }
6240  let Predicates = [HasAVX512, HasVLX] in {
6241    defm Z128 : avx512_permil_vec<OpcVar, OpcodeStr, X86VPermilpv, sched.XMM,
6242                                  _.info128, Ctrl.info128>, EVEX_V128;
6243    defm Z256 : avx512_permil_vec<OpcVar, OpcodeStr, X86VPermilpv, sched.YMM,
6244                                  _.info256, Ctrl.info256>, EVEX_V256;
6245  }
6246}
6247
6248multiclass avx512_permil<string OpcodeStr, bits<8> OpcImm, bits<8> OpcVar,
6249                         AVX512VLVectorVTInfo _, AVX512VLVectorVTInfo Ctrl>{
6250  defm NAME: avx512_permil_vec_common<OpcodeStr, OpcVar, SchedWriteFVarShuffle,
6251                                      _, Ctrl>;
6252  defm NAME: avx512_shift_rmi_sizes<OpcImm, MRMSrcReg, MRMSrcMem, OpcodeStr,
6253                                    X86VPermilpi, SchedWriteFShuffle, _>,
6254                    EVEX, AVX512AIi8Base, EVEX_CD8<_.info128.EltSize, CD8VF>;
6255}
6256
6257let ExeDomain = SSEPackedSingle in
6258defm VPERMILPS : avx512_permil<"vpermilps", 0x04, 0x0C, avx512vl_f32_info,
6259                               avx512vl_i32_info>;
6260let ExeDomain = SSEPackedDouble in
6261defm VPERMILPD : avx512_permil<"vpermilpd", 0x05, 0x0D, avx512vl_f64_info,
6262                               avx512vl_i64_info>, VEX_W1X;
6263
6264//===----------------------------------------------------------------------===//
6265// AVX-512 - VPSHUFD, VPSHUFLW, VPSHUFHW
6266//===----------------------------------------------------------------------===//
6267
6268defm VPSHUFD : avx512_shift_rmi_sizes<0x70, MRMSrcReg, MRMSrcMem, "vpshufd",
6269                             X86PShufd, SchedWriteShuffle, avx512vl_i32_info>,
6270                             EVEX, AVX512BIi8Base, EVEX_CD8<32, CD8VF>;
6271defm VPSHUFH : avx512_shift_rmi_w<0x70, MRMSrcReg, MRMSrcMem, "vpshufhw",
6272                                  X86PShufhw, SchedWriteShuffle>,
6273                                  EVEX, AVX512XSIi8Base;
6274defm VPSHUFL : avx512_shift_rmi_w<0x70, MRMSrcReg, MRMSrcMem, "vpshuflw",
6275                                  X86PShuflw, SchedWriteShuffle>,
6276                                  EVEX, AVX512XDIi8Base;
6277
6278//===----------------------------------------------------------------------===//
6279// AVX-512 - VPSHUFB
6280//===----------------------------------------------------------------------===//
6281
6282multiclass avx512_pshufb_sizes<bits<8> opc, string OpcodeStr, SDNode OpNode,
6283                               X86SchedWriteWidths sched> {
6284  let Predicates = [HasBWI] in
6285  defm Z:    avx512_var_shift<opc, OpcodeStr, OpNode, sched.ZMM, v64i8_info>,
6286                              EVEX_V512;
6287
6288  let Predicates = [HasVLX, HasBWI] in {
6289  defm Z256: avx512_var_shift<opc, OpcodeStr, OpNode, sched.YMM, v32i8x_info>,
6290                              EVEX_V256;
6291  defm Z128: avx512_var_shift<opc, OpcodeStr, OpNode, sched.XMM, v16i8x_info>,
6292                              EVEX_V128;
6293  }
6294}
6295
6296defm VPSHUFB: avx512_pshufb_sizes<0x00, "vpshufb", X86pshufb,
6297                                  SchedWriteVarShuffle>, VEX_WIG;
6298
6299//===----------------------------------------------------------------------===//
6300// Move Low to High and High to Low packed FP Instructions
6301//===----------------------------------------------------------------------===//
6302
6303def VMOVLHPSZrr : AVX512PSI<0x16, MRMSrcReg, (outs VR128X:$dst),
6304          (ins VR128X:$src1, VR128X:$src2),
6305          "vmovlhps\t{$src2, $src1, $dst|$dst, $src1, $src2}",
6306          [(set VR128X:$dst, (v4f32 (X86Movlhps VR128X:$src1, VR128X:$src2)))]>,
6307          Sched<[SchedWriteFShuffle.XMM]>, EVEX_4V;
6308let isCommutable = 1 in
6309def VMOVHLPSZrr : AVX512PSI<0x12, MRMSrcReg, (outs VR128X:$dst),
6310          (ins VR128X:$src1, VR128X:$src2),
6311          "vmovhlps\t{$src2, $src1, $dst|$dst, $src1, $src2}",
6312          [(set VR128X:$dst, (v4f32 (X86Movhlps VR128X:$src1, VR128X:$src2)))]>,
6313          Sched<[SchedWriteFShuffle.XMM]>, EVEX_4V, NotMemoryFoldable;
6314
6315//===----------------------------------------------------------------------===//
6316// VMOVHPS/PD VMOVLPS Instructions
6317// All patterns was taken from SSS implementation.
6318//===----------------------------------------------------------------------===//
6319
6320multiclass avx512_mov_hilo_packed<bits<8> opc, string OpcodeStr,
6321                                  SDPatternOperator OpNode,
6322                                  X86VectorVTInfo _> {
6323  let hasSideEffects = 0, mayLoad = 1, ExeDomain = _.ExeDomain in
6324  def rm : AVX512<opc, MRMSrcMem, (outs _.RC:$dst),
6325                  (ins _.RC:$src1, f64mem:$src2),
6326                  !strconcat(OpcodeStr,
6327                             "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
6328                  [(set _.RC:$dst,
6329                     (OpNode _.RC:$src1,
6330                       (_.VT (bitconvert
6331                         (v2f64 (scalar_to_vector (loadf64 addr:$src2)))))))]>,
6332                  Sched<[SchedWriteFShuffle.XMM.Folded, SchedWriteFShuffle.XMM.ReadAfterFold]>, EVEX_4V;
6333}
6334
6335// No patterns for MOVLPS/MOVHPS as the Movlhps node should only be created in
6336// SSE1. And MOVLPS pattern is even more complex.
6337defm VMOVHPSZ128 : avx512_mov_hilo_packed<0x16, "vmovhps", null_frag,
6338                                  v4f32x_info>, EVEX_CD8<32, CD8VT2>, PS;
6339defm VMOVHPDZ128 : avx512_mov_hilo_packed<0x16, "vmovhpd", X86Unpckl,
6340                                  v2f64x_info>, EVEX_CD8<64, CD8VT1>, PD, VEX_W;
6341defm VMOVLPSZ128 : avx512_mov_hilo_packed<0x12, "vmovlps", null_frag,
6342                                  v4f32x_info>, EVEX_CD8<32, CD8VT2>, PS;
6343defm VMOVLPDZ128 : avx512_mov_hilo_packed<0x12, "vmovlpd", X86Movsd,
6344                                  v2f64x_info>, EVEX_CD8<64, CD8VT1>, PD, VEX_W;
6345
6346let Predicates = [HasAVX512] in {
6347  // VMOVHPD patterns
6348  def : Pat<(v2f64 (X86Unpckl VR128X:$src1, (X86vzload64 addr:$src2))),
6349            (VMOVHPDZ128rm VR128X:$src1, addr:$src2)>;
6350
6351  // VMOVLPD patterns
6352  def : Pat<(v2f64 (X86Movsd VR128X:$src1, (X86vzload64 addr:$src2))),
6353            (VMOVLPDZ128rm VR128X:$src1, addr:$src2)>;
6354}
6355
6356let SchedRW = [WriteFStore] in {
6357let mayStore = 1, hasSideEffects = 0 in
6358def VMOVHPSZ128mr : AVX512PSI<0x17, MRMDestMem, (outs),
6359                       (ins f64mem:$dst, VR128X:$src),
6360                       "vmovhps\t{$src, $dst|$dst, $src}",
6361                       []>, EVEX, EVEX_CD8<32, CD8VT2>;
6362def VMOVHPDZ128mr : AVX512PDI<0x17, MRMDestMem, (outs),
6363                       (ins f64mem:$dst, VR128X:$src),
6364                       "vmovhpd\t{$src, $dst|$dst, $src}",
6365                       [(store (f64 (extractelt
6366                                     (v2f64 (X86Unpckh VR128X:$src, VR128X:$src)),
6367                                     (iPTR 0))), addr:$dst)]>,
6368                       EVEX, EVEX_CD8<64, CD8VT1>, VEX_W;
6369let mayStore = 1, hasSideEffects = 0 in
6370def VMOVLPSZ128mr : AVX512PSI<0x13, MRMDestMem, (outs),
6371                       (ins f64mem:$dst, VR128X:$src),
6372                       "vmovlps\t{$src, $dst|$dst, $src}",
6373                       []>, EVEX, EVEX_CD8<32, CD8VT2>;
6374def VMOVLPDZ128mr : AVX512PDI<0x13, MRMDestMem, (outs),
6375                       (ins f64mem:$dst, VR128X:$src),
6376                       "vmovlpd\t{$src, $dst|$dst, $src}",
6377                       [(store (f64 (extractelt (v2f64 VR128X:$src),
6378                                     (iPTR 0))), addr:$dst)]>,
6379                       EVEX, EVEX_CD8<64, CD8VT1>, VEX_W;
6380} // SchedRW
6381
6382let Predicates = [HasAVX512] in {
6383  // VMOVHPD patterns
6384  def : Pat<(store (f64 (extractelt
6385                           (v2f64 (X86VPermilpi VR128X:$src, (i8 1))),
6386                           (iPTR 0))), addr:$dst),
6387           (VMOVHPDZ128mr addr:$dst, VR128X:$src)>;
6388}
6389//===----------------------------------------------------------------------===//
6390// FMA - Fused Multiply Operations
6391//
6392
6393multiclass avx512_fma3p_213_rm<bits<8> opc, string OpcodeStr, SDPatternOperator OpNode,
6394                               SDNode MaskOpNode, X86FoldableSchedWrite sched,
6395                               X86VectorVTInfo _, string Suff> {
6396  let Constraints = "$src1 = $dst", ExeDomain = _.ExeDomain, hasSideEffects = 0,
6397      Uses = [MXCSR], mayRaiseFPException = 1 in {
6398  defm r: AVX512_maskable_fma<opc, MRMSrcReg, _, (outs _.RC:$dst),
6399          (ins _.RC:$src2, _.RC:$src3),
6400          OpcodeStr, "$src3, $src2", "$src2, $src3",
6401          (_.VT (OpNode _.RC:$src2, _.RC:$src1, _.RC:$src3)),
6402          (_.VT (MaskOpNode _.RC:$src2, _.RC:$src1, _.RC:$src3)), 1, 1>,
6403          AVX512FMA3Base, Sched<[sched]>;
6404
6405  defm m: AVX512_maskable_fma<opc, MRMSrcMem, _, (outs _.RC:$dst),
6406          (ins _.RC:$src2, _.MemOp:$src3),
6407          OpcodeStr, "$src3, $src2", "$src2, $src3",
6408          (_.VT (OpNode _.RC:$src2, _.RC:$src1, (_.LdFrag addr:$src3))),
6409          (_.VT (MaskOpNode _.RC:$src2, _.RC:$src1, (_.LdFrag addr:$src3))), 1, 0>,
6410          AVX512FMA3Base, Sched<[sched.Folded, sched.ReadAfterFold]>;
6411
6412  defm mb: AVX512_maskable_fma<opc, MRMSrcMem, _, (outs _.RC:$dst),
6413            (ins _.RC:$src2, _.ScalarMemOp:$src3),
6414            OpcodeStr,   !strconcat("${src3}", _.BroadcastStr,", $src2"),
6415            !strconcat("$src2, ${src3}", _.BroadcastStr ),
6416            (OpNode _.RC:$src2,
6417             _.RC:$src1,(_.VT (_.BroadcastLdFrag addr:$src3))),
6418            (MaskOpNode _.RC:$src2,
6419             _.RC:$src1,(_.VT (_.BroadcastLdFrag addr:$src3))), 1, 0>,
6420            AVX512FMA3Base, EVEX_B, Sched<[sched.Folded, sched.ReadAfterFold]>;
6421  }
6422}
6423
6424multiclass avx512_fma3_213_round<bits<8> opc, string OpcodeStr, SDNode OpNode,
6425                                 X86FoldableSchedWrite sched,
6426                                 X86VectorVTInfo _, string Suff> {
6427  let Constraints = "$src1 = $dst", ExeDomain = _.ExeDomain, hasSideEffects = 0,
6428      Uses = [MXCSR] in
6429  defm rb: AVX512_maskable_fma<opc, MRMSrcReg, _, (outs _.RC:$dst),
6430          (ins _.RC:$src2, _.RC:$src3, AVX512RC:$rc),
6431          OpcodeStr, "$rc, $src3, $src2", "$src2, $src3, $rc",
6432          (_.VT ( OpNode _.RC:$src2, _.RC:$src1, _.RC:$src3, (i32 timm:$rc))),
6433          (_.VT ( OpNode _.RC:$src2, _.RC:$src1, _.RC:$src3, (i32 timm:$rc))), 1, 1>,
6434          AVX512FMA3Base, EVEX_B, EVEX_RC, Sched<[sched]>;
6435}
6436
6437multiclass avx512_fma3p_213_common<bits<8> opc, string OpcodeStr, SDPatternOperator OpNode,
6438                                   SDNode MaskOpNode, SDNode OpNodeRnd,
6439                                   X86SchedWriteWidths sched,
6440                                   AVX512VLVectorVTInfo _, string Suff> {
6441  let Predicates = [HasAVX512] in {
6442    defm Z      : avx512_fma3p_213_rm<opc, OpcodeStr, OpNode, MaskOpNode,
6443                                      sched.ZMM, _.info512, Suff>,
6444                  avx512_fma3_213_round<opc, OpcodeStr, OpNodeRnd, sched.ZMM,
6445                                        _.info512, Suff>,
6446                              EVEX_V512, EVEX_CD8<_.info512.EltSize, CD8VF>;
6447  }
6448  let Predicates = [HasVLX, HasAVX512] in {
6449    defm Z256 : avx512_fma3p_213_rm<opc, OpcodeStr, OpNode, MaskOpNode,
6450                                    sched.YMM, _.info256, Suff>,
6451                      EVEX_V256, EVEX_CD8<_.info256.EltSize, CD8VF>;
6452    defm Z128 : avx512_fma3p_213_rm<opc, OpcodeStr, OpNode, MaskOpNode,
6453                                    sched.XMM, _.info128, Suff>,
6454                      EVEX_V128, EVEX_CD8<_.info128.EltSize, CD8VF>;
6455  }
6456}
6457
6458multiclass avx512_fma3p_213_f<bits<8> opc, string OpcodeStr, SDPatternOperator OpNode,
6459                              SDNode MaskOpNode, SDNode OpNodeRnd> {
6460    defm PS : avx512_fma3p_213_common<opc, OpcodeStr#"ps", OpNode, MaskOpNode,
6461                                      OpNodeRnd, SchedWriteFMA,
6462                                      avx512vl_f32_info, "PS">;
6463    defm PD : avx512_fma3p_213_common<opc, OpcodeStr#"pd", OpNode, MaskOpNode,
6464                                      OpNodeRnd, SchedWriteFMA,
6465                                      avx512vl_f64_info, "PD">, VEX_W;
6466}
6467
6468defm VFMADD213    : avx512_fma3p_213_f<0xA8, "vfmadd213", any_fma,
6469                                       fma, X86FmaddRnd>;
6470defm VFMSUB213    : avx512_fma3p_213_f<0xAA, "vfmsub213", X86any_Fmsub,
6471                                       X86Fmsub, X86FmsubRnd>;
6472defm VFMADDSUB213 : avx512_fma3p_213_f<0xA6, "vfmaddsub213", X86Fmaddsub,
6473                                       X86Fmaddsub, X86FmaddsubRnd>;
6474defm VFMSUBADD213 : avx512_fma3p_213_f<0xA7, "vfmsubadd213", X86Fmsubadd,
6475                                       X86Fmsubadd, X86FmsubaddRnd>;
6476defm VFNMADD213   : avx512_fma3p_213_f<0xAC, "vfnmadd213", X86any_Fnmadd,
6477                                       X86Fnmadd, X86FnmaddRnd>;
6478defm VFNMSUB213   : avx512_fma3p_213_f<0xAE, "vfnmsub213", X86any_Fnmsub,
6479                                       X86Fnmsub, X86FnmsubRnd>;
6480
6481
6482multiclass avx512_fma3p_231_rm<bits<8> opc, string OpcodeStr, SDPatternOperator OpNode,
6483                               SDNode MaskOpNode, X86FoldableSchedWrite sched,
6484                               X86VectorVTInfo _, string Suff> {
6485  let Constraints = "$src1 = $dst", ExeDomain = _.ExeDomain, hasSideEffects = 0,
6486      Uses = [MXCSR], mayRaiseFPException = 1 in {
6487  defm r: AVX512_maskable_fma<opc, MRMSrcReg, _, (outs _.RC:$dst),
6488          (ins _.RC:$src2, _.RC:$src3),
6489          OpcodeStr, "$src3, $src2", "$src2, $src3",
6490          (null_frag),
6491          (_.VT (MaskOpNode _.RC:$src2, _.RC:$src3, _.RC:$src1)), 1, 1>,
6492          AVX512FMA3Base, Sched<[sched]>;
6493
6494  defm m: AVX512_maskable_fma<opc, MRMSrcMem, _, (outs _.RC:$dst),
6495          (ins _.RC:$src2, _.MemOp:$src3),
6496          OpcodeStr, "$src3, $src2", "$src2, $src3",
6497          (_.VT (OpNode _.RC:$src2, (_.LdFrag addr:$src3), _.RC:$src1)),
6498          (_.VT (MaskOpNode _.RC:$src2, (_.LdFrag addr:$src3), _.RC:$src1)), 1, 0>,
6499          AVX512FMA3Base, Sched<[sched.Folded, sched.ReadAfterFold]>;
6500
6501  defm mb: AVX512_maskable_fma<opc, MRMSrcMem, _, (outs _.RC:$dst),
6502         (ins _.RC:$src2, _.ScalarMemOp:$src3),
6503         OpcodeStr, "${src3}"#_.BroadcastStr#", $src2",
6504         "$src2, ${src3}"#_.BroadcastStr,
6505         (_.VT (OpNode _.RC:$src2,
6506                      (_.VT (_.BroadcastLdFrag addr:$src3)),
6507                      _.RC:$src1)),
6508         (_.VT (MaskOpNode _.RC:$src2,
6509                           (_.VT (_.BroadcastLdFrag addr:$src3)),
6510                           _.RC:$src1)), 1, 0>, AVX512FMA3Base, EVEX_B,
6511         Sched<[sched.Folded, sched.ReadAfterFold]>;
6512  }
6513}
6514
6515multiclass avx512_fma3_231_round<bits<8> opc, string OpcodeStr, SDNode OpNode,
6516                                 X86FoldableSchedWrite sched,
6517                                 X86VectorVTInfo _, string Suff> {
6518  let Constraints = "$src1 = $dst", ExeDomain = _.ExeDomain, hasSideEffects = 0,
6519      Uses = [MXCSR] in
6520  defm rb: AVX512_maskable_fma<opc, MRMSrcReg, _, (outs _.RC:$dst),
6521          (ins _.RC:$src2, _.RC:$src3, AVX512RC:$rc),
6522          OpcodeStr, "$rc, $src3, $src2", "$src2, $src3, $rc",
6523          (null_frag),
6524          (_.VT (OpNode _.RC:$src2, _.RC:$src3, _.RC:$src1, (i32 timm:$rc))),
6525          1, 1>, AVX512FMA3Base, EVEX_B, EVEX_RC, Sched<[sched]>;
6526}
6527
6528multiclass avx512_fma3p_231_common<bits<8> opc, string OpcodeStr, SDPatternOperator OpNode,
6529                                   SDNode MaskOpNode, SDNode OpNodeRnd,
6530                                   X86SchedWriteWidths sched,
6531                                   AVX512VLVectorVTInfo _, string Suff> {
6532  let Predicates = [HasAVX512] in {
6533    defm Z      : avx512_fma3p_231_rm<opc, OpcodeStr, OpNode, MaskOpNode,
6534                                      sched.ZMM, _.info512, Suff>,
6535                  avx512_fma3_231_round<opc, OpcodeStr, OpNodeRnd, sched.ZMM,
6536                                        _.info512, Suff>,
6537                              EVEX_V512, EVEX_CD8<_.info512.EltSize, CD8VF>;
6538  }
6539  let Predicates = [HasVLX, HasAVX512] in {
6540    defm Z256 : avx512_fma3p_231_rm<opc, OpcodeStr, OpNode, MaskOpNode,
6541                                    sched.YMM, _.info256, Suff>,
6542                      EVEX_V256, EVEX_CD8<_.info256.EltSize, CD8VF>;
6543    defm Z128 : avx512_fma3p_231_rm<opc, OpcodeStr, OpNode, MaskOpNode,
6544                                    sched.XMM, _.info128, Suff>,
6545                      EVEX_V128, EVEX_CD8<_.info128.EltSize, CD8VF>;
6546  }
6547}
6548
6549multiclass avx512_fma3p_231_f<bits<8> opc, string OpcodeStr, SDPatternOperator OpNode,
6550                              SDNode MaskOpNode, SDNode OpNodeRnd > {
6551    defm PS : avx512_fma3p_231_common<opc, OpcodeStr#"ps", OpNode, MaskOpNode,
6552                                      OpNodeRnd, SchedWriteFMA,
6553                                      avx512vl_f32_info, "PS">;
6554    defm PD : avx512_fma3p_231_common<opc, OpcodeStr#"pd", OpNode, MaskOpNode,
6555                                      OpNodeRnd, SchedWriteFMA,
6556                                      avx512vl_f64_info, "PD">, VEX_W;
6557}
6558
6559defm VFMADD231    : avx512_fma3p_231_f<0xB8, "vfmadd231", any_fma,
6560                                       fma, X86FmaddRnd>;
6561defm VFMSUB231    : avx512_fma3p_231_f<0xBA, "vfmsub231", X86any_Fmsub,
6562                                       X86Fmsub, X86FmsubRnd>;
6563defm VFMADDSUB231 : avx512_fma3p_231_f<0xB6, "vfmaddsub231", X86Fmaddsub,
6564                                       X86Fmaddsub, X86FmaddsubRnd>;
6565defm VFMSUBADD231 : avx512_fma3p_231_f<0xB7, "vfmsubadd231", X86Fmsubadd,
6566                                       X86Fmsubadd, X86FmsubaddRnd>;
6567defm VFNMADD231   : avx512_fma3p_231_f<0xBC, "vfnmadd231", X86any_Fnmadd,
6568                                       X86Fnmadd, X86FnmaddRnd>;
6569defm VFNMSUB231   : avx512_fma3p_231_f<0xBE, "vfnmsub231", X86any_Fnmsub,
6570                                       X86Fnmsub, X86FnmsubRnd>;
6571
6572multiclass avx512_fma3p_132_rm<bits<8> opc, string OpcodeStr, SDPatternOperator OpNode,
6573                               SDNode MaskOpNode, X86FoldableSchedWrite sched,
6574                               X86VectorVTInfo _, string Suff> {
6575  let Constraints = "$src1 = $dst", ExeDomain = _.ExeDomain, hasSideEffects = 0,
6576      Uses = [MXCSR], mayRaiseFPException = 1 in {
6577  defm r: AVX512_maskable_fma<opc, MRMSrcReg, _, (outs _.RC:$dst),
6578          (ins _.RC:$src2, _.RC:$src3),
6579          OpcodeStr, "$src3, $src2", "$src2, $src3",
6580          (null_frag),
6581          (_.VT (MaskOpNode _.RC:$src1, _.RC:$src3, _.RC:$src2)), 1, 1>,
6582          AVX512FMA3Base, Sched<[sched]>;
6583
6584  // Pattern is 312 order so that the load is in a different place from the
6585  // 213 and 231 patterns this helps tablegen's duplicate pattern detection.
6586  defm m: AVX512_maskable_fma<opc, MRMSrcMem, _, (outs _.RC:$dst),
6587          (ins _.RC:$src2, _.MemOp:$src3),
6588          OpcodeStr, "$src3, $src2", "$src2, $src3",
6589          (_.VT (OpNode (_.LdFrag addr:$src3), _.RC:$src1, _.RC:$src2)),
6590          (_.VT (MaskOpNode (_.LdFrag addr:$src3), _.RC:$src1, _.RC:$src2)), 1, 0>,
6591          AVX512FMA3Base, Sched<[sched.Folded, sched.ReadAfterFold]>;
6592
6593  // Pattern is 312 order so that the load is in a different place from the
6594  // 213 and 231 patterns this helps tablegen's duplicate pattern detection.
6595  defm mb: AVX512_maskable_fma<opc, MRMSrcMem, _, (outs _.RC:$dst),
6596         (ins _.RC:$src2, _.ScalarMemOp:$src3),
6597         OpcodeStr, "${src3}"#_.BroadcastStr#", $src2",
6598         "$src2, ${src3}"#_.BroadcastStr,
6599         (_.VT (OpNode (_.VT (_.BroadcastLdFrag addr:$src3)),
6600                       _.RC:$src1, _.RC:$src2)),
6601         (_.VT (MaskOpNode (_.VT (_.BroadcastLdFrag addr:$src3)),
6602                           _.RC:$src1, _.RC:$src2)), 1, 0>,
6603         AVX512FMA3Base, EVEX_B, Sched<[sched.Folded, sched.ReadAfterFold]>;
6604  }
6605}
6606
6607multiclass avx512_fma3_132_round<bits<8> opc, string OpcodeStr, SDNode OpNode,
6608                                 X86FoldableSchedWrite sched,
6609                                 X86VectorVTInfo _, string Suff> {
6610  let Constraints = "$src1 = $dst", ExeDomain = _.ExeDomain, hasSideEffects = 0,
6611      Uses = [MXCSR] in
6612  defm rb: AVX512_maskable_fma<opc, MRMSrcReg, _, (outs _.RC:$dst),
6613          (ins _.RC:$src2, _.RC:$src3, AVX512RC:$rc),
6614          OpcodeStr, "$rc, $src3, $src2", "$src2, $src3, $rc",
6615          (null_frag),
6616          (_.VT (OpNode _.RC:$src1, _.RC:$src3, _.RC:$src2, (i32 timm:$rc))),
6617          1, 1>, AVX512FMA3Base, EVEX_B, EVEX_RC, Sched<[sched]>;
6618}
6619
6620multiclass avx512_fma3p_132_common<bits<8> opc, string OpcodeStr, SDPatternOperator OpNode,
6621                                   SDNode MaskOpNode, SDNode OpNodeRnd,
6622                                   X86SchedWriteWidths sched,
6623                                   AVX512VLVectorVTInfo _, string Suff> {
6624  let Predicates = [HasAVX512] in {
6625    defm Z      : avx512_fma3p_132_rm<opc, OpcodeStr, OpNode, MaskOpNode,
6626                                      sched.ZMM, _.info512, Suff>,
6627                  avx512_fma3_132_round<opc, OpcodeStr, OpNodeRnd, sched.ZMM,
6628                                        _.info512, Suff>,
6629                              EVEX_V512, EVEX_CD8<_.info512.EltSize, CD8VF>;
6630  }
6631  let Predicates = [HasVLX, HasAVX512] in {
6632    defm Z256 : avx512_fma3p_132_rm<opc, OpcodeStr, OpNode, MaskOpNode,
6633                                    sched.YMM, _.info256, Suff>,
6634                      EVEX_V256, EVEX_CD8<_.info256.EltSize, CD8VF>;
6635    defm Z128 : avx512_fma3p_132_rm<opc, OpcodeStr, OpNode, MaskOpNode,
6636                                    sched.XMM, _.info128, Suff>,
6637                      EVEX_V128, EVEX_CD8<_.info128.EltSize, CD8VF>;
6638  }
6639}
6640
6641multiclass avx512_fma3p_132_f<bits<8> opc, string OpcodeStr, SDPatternOperator OpNode,
6642                              SDNode MaskOpNode, SDNode OpNodeRnd > {
6643    defm PS : avx512_fma3p_132_common<opc, OpcodeStr#"ps", OpNode, MaskOpNode,
6644                                      OpNodeRnd, SchedWriteFMA,
6645                                      avx512vl_f32_info, "PS">;
6646    defm PD : avx512_fma3p_132_common<opc, OpcodeStr#"pd", OpNode, MaskOpNode,
6647                                      OpNodeRnd, SchedWriteFMA,
6648                                      avx512vl_f64_info, "PD">, VEX_W;
6649}
6650
6651defm VFMADD132    : avx512_fma3p_132_f<0x98, "vfmadd132", any_fma,
6652                                       fma, X86FmaddRnd>;
6653defm VFMSUB132    : avx512_fma3p_132_f<0x9A, "vfmsub132", X86any_Fmsub,
6654                                       X86Fmsub, X86FmsubRnd>;
6655defm VFMADDSUB132 : avx512_fma3p_132_f<0x96, "vfmaddsub132", X86Fmaddsub,
6656                                       X86Fmaddsub, X86FmaddsubRnd>;
6657defm VFMSUBADD132 : avx512_fma3p_132_f<0x97, "vfmsubadd132", X86Fmsubadd,
6658                                       X86Fmsubadd, X86FmsubaddRnd>;
6659defm VFNMADD132   : avx512_fma3p_132_f<0x9C, "vfnmadd132", X86any_Fnmadd,
6660                                       X86Fnmadd, X86FnmaddRnd>;
6661defm VFNMSUB132   : avx512_fma3p_132_f<0x9E, "vfnmsub132", X86any_Fnmsub,
6662                                       X86Fnmsub, X86FnmsubRnd>;
6663
6664// Scalar FMA
6665multiclass avx512_fma3s_common<bits<8> opc, string OpcodeStr, X86VectorVTInfo _,
6666                               dag RHS_r, dag RHS_m, dag RHS_b, bit MaskOnlyReg> {
6667let Constraints = "$src1 = $dst", hasSideEffects = 0 in {
6668  defm r_Int: AVX512_maskable_3src_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
6669          (ins _.RC:$src2, _.RC:$src3), OpcodeStr,
6670          "$src3, $src2", "$src2, $src3", (null_frag), 1, 1>,
6671          AVX512FMA3Base, Sched<[SchedWriteFMA.Scl]>, SIMD_EXC;
6672
6673  let mayLoad = 1 in
6674  defm m_Int: AVX512_maskable_3src_scalar<opc, MRMSrcMem, _, (outs _.RC:$dst),
6675          (ins _.RC:$src2, _.IntScalarMemOp:$src3), OpcodeStr,
6676          "$src3, $src2", "$src2, $src3", (null_frag), 1, 1>,
6677          AVX512FMA3Base, Sched<[SchedWriteFMA.Scl.Folded, SchedWriteFMA.Scl.ReadAfterFold]>, SIMD_EXC;
6678
6679  let Uses = [MXCSR] in
6680  defm rb_Int: AVX512_maskable_3src_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
6681         (ins _.RC:$src2, _.RC:$src3, AVX512RC:$rc),
6682         OpcodeStr, "$rc, $src3, $src2", "$src2, $src3, $rc", (null_frag), 1, 1>,
6683         AVX512FMA3Base, EVEX_B, EVEX_RC, Sched<[SchedWriteFMA.Scl]>;
6684
6685  let isCodeGenOnly = 1, isCommutable = 1 in {
6686    def r     : AVX512FMA3S<opc, MRMSrcReg, (outs _.FRC:$dst),
6687                     (ins _.FRC:$src1, _.FRC:$src2, _.FRC:$src3),
6688                     !strconcat(OpcodeStr,
6689                              "\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
6690                     !if(MaskOnlyReg, [], [RHS_r])>, Sched<[SchedWriteFMA.Scl]>, SIMD_EXC;
6691    def m     : AVX512FMA3S<opc, MRMSrcMem, (outs _.FRC:$dst),
6692                    (ins _.FRC:$src1, _.FRC:$src2, _.ScalarMemOp:$src3),
6693                    !strconcat(OpcodeStr,
6694                               "\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
6695                    [RHS_m]>, Sched<[SchedWriteFMA.Scl.Folded, SchedWriteFMA.Scl.ReadAfterFold]>, SIMD_EXC;
6696
6697    let Uses = [MXCSR] in
6698    def rb    : AVX512FMA3S<opc, MRMSrcReg, (outs _.FRC:$dst),
6699                     (ins _.FRC:$src1, _.FRC:$src2, _.FRC:$src3, AVX512RC:$rc),
6700                     !strconcat(OpcodeStr,
6701                              "\t{$rc, $src3, $src2, $dst|$dst, $src2, $src3, $rc}"),
6702                     !if(MaskOnlyReg, [], [RHS_b])>, EVEX_B, EVEX_RC,
6703                     Sched<[SchedWriteFMA.Scl]>;
6704  }// isCodeGenOnly = 1
6705}// Constraints = "$src1 = $dst"
6706}
6707
6708multiclass avx512_fma3s_all<bits<8> opc213, bits<8> opc231, bits<8> opc132,
6709                            string OpcodeStr, SDPatternOperator OpNode, SDNode OpNodeRnd,
6710                            X86VectorVTInfo _, string SUFF> {
6711  let ExeDomain = _.ExeDomain in {
6712  defm NAME#213#SUFF#Z: avx512_fma3s_common<opc213, OpcodeStr#"213"#_.Suffix, _,
6713                // Operands for intrinsic are in 123 order to preserve passthu
6714                // semantics.
6715                (set _.FRC:$dst, (_.EltVT (OpNode _.FRC:$src2, _.FRC:$src1,
6716                         _.FRC:$src3))),
6717                (set _.FRC:$dst, (_.EltVT (OpNode _.FRC:$src2, _.FRC:$src1,
6718                         (_.ScalarLdFrag addr:$src3)))),
6719                (set _.FRC:$dst, (_.EltVT (OpNodeRnd _.FRC:$src2, _.FRC:$src1,
6720                         _.FRC:$src3, (i32 timm:$rc)))), 0>;
6721
6722  defm NAME#231#SUFF#Z: avx512_fma3s_common<opc231, OpcodeStr#"231"#_.Suffix, _,
6723                (set _.FRC:$dst, (_.EltVT (OpNode _.FRC:$src2, _.FRC:$src3,
6724                                          _.FRC:$src1))),
6725                (set _.FRC:$dst, (_.EltVT (OpNode _.FRC:$src2,
6726                            (_.ScalarLdFrag addr:$src3), _.FRC:$src1))),
6727                (set _.FRC:$dst, (_.EltVT (OpNodeRnd _.FRC:$src2, _.FRC:$src3,
6728                         _.FRC:$src1, (i32 timm:$rc)))), 1>;
6729
6730  // One pattern is 312 order so that the load is in a different place from the
6731  // 213 and 231 patterns this helps tablegen's duplicate pattern detection.
6732  defm NAME#132#SUFF#Z: avx512_fma3s_common<opc132, OpcodeStr#"132"#_.Suffix, _,
6733                (set _.FRC:$dst, (_.EltVT (OpNode _.FRC:$src1, _.FRC:$src3,
6734                         _.FRC:$src2))),
6735                (set _.FRC:$dst, (_.EltVT (OpNode (_.ScalarLdFrag addr:$src3),
6736                                 _.FRC:$src1, _.FRC:$src2))),
6737                (set _.FRC:$dst, (_.EltVT (OpNodeRnd _.FRC:$src1, _.FRC:$src3,
6738                         _.FRC:$src2, (i32 timm:$rc)))), 1>;
6739  }
6740}
6741
6742multiclass avx512_fma3s<bits<8> opc213, bits<8> opc231, bits<8> opc132,
6743                        string OpcodeStr, SDPatternOperator OpNode, SDNode OpNodeRnd> {
6744  let Predicates = [HasAVX512] in {
6745    defm NAME : avx512_fma3s_all<opc213, opc231, opc132, OpcodeStr, OpNode,
6746                                 OpNodeRnd, f32x_info, "SS">,
6747                                 EVEX_CD8<32, CD8VT1>, VEX_LIG;
6748    defm NAME : avx512_fma3s_all<opc213, opc231, opc132, OpcodeStr, OpNode,
6749                                 OpNodeRnd, f64x_info, "SD">,
6750                                 EVEX_CD8<64, CD8VT1>, VEX_LIG, VEX_W;
6751  }
6752}
6753
6754defm VFMADD  : avx512_fma3s<0xA9, 0xB9, 0x99, "vfmadd", any_fma, X86FmaddRnd>;
6755defm VFMSUB  : avx512_fma3s<0xAB, 0xBB, 0x9B, "vfmsub", X86any_Fmsub, X86FmsubRnd>;
6756defm VFNMADD : avx512_fma3s<0xAD, 0xBD, 0x9D, "vfnmadd", X86any_Fnmadd, X86FnmaddRnd>;
6757defm VFNMSUB : avx512_fma3s<0xAF, 0xBF, 0x9F, "vfnmsub", X86any_Fnmsub, X86FnmsubRnd>;
6758
6759multiclass avx512_scalar_fma_patterns<SDPatternOperator Op, SDNode MaskedOp,
6760                                      SDNode RndOp, string Prefix,
6761                                      string Suffix, SDNode Move,
6762                                      X86VectorVTInfo _, PatLeaf ZeroFP> {
6763  let Predicates = [HasAVX512] in {
6764    def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector
6765                (Op _.FRC:$src2,
6766                    (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0))),
6767                    _.FRC:$src3))))),
6768              (!cast<I>(Prefix#"213"#Suffix#"Zr_Int")
6769               VR128X:$src1, (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)),
6770               (_.VT (COPY_TO_REGCLASS _.FRC:$src3, VR128X)))>;
6771
6772    def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector
6773                (Op _.FRC:$src2, _.FRC:$src3,
6774                    (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0)))))))),
6775              (!cast<I>(Prefix#"231"#Suffix#"Zr_Int")
6776               VR128X:$src1, (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)),
6777               (_.VT (COPY_TO_REGCLASS _.FRC:$src3, VR128X)))>;
6778
6779    def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector
6780                (Op _.FRC:$src2,
6781                    (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0))),
6782                    (_.ScalarLdFrag addr:$src3)))))),
6783              (!cast<I>(Prefix#"213"#Suffix#"Zm_Int")
6784               VR128X:$src1, (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)),
6785               addr:$src3)>;
6786
6787    def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector
6788                (Op (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0))),
6789                    (_.ScalarLdFrag addr:$src3), _.FRC:$src2))))),
6790              (!cast<I>(Prefix#"132"#Suffix#"Zm_Int")
6791               VR128X:$src1, (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)),
6792               addr:$src3)>;
6793
6794    def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector
6795                (Op _.FRC:$src2, (_.ScalarLdFrag addr:$src3),
6796                    (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0)))))))),
6797              (!cast<I>(Prefix#"231"#Suffix#"Zm_Int")
6798               VR128X:$src1, (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)),
6799               addr:$src3)>;
6800
6801    def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector
6802               (X86selects_mask VK1WM:$mask,
6803                (MaskedOp _.FRC:$src2,
6804                    (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0))),
6805                    _.FRC:$src3),
6806                (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0)))))))),
6807              (!cast<I>(Prefix#"213"#Suffix#"Zr_Intk")
6808               VR128X:$src1, VK1WM:$mask,
6809               (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)),
6810               (_.VT (COPY_TO_REGCLASS _.FRC:$src3, VR128X)))>;
6811
6812    def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector
6813               (X86selects_mask VK1WM:$mask,
6814                (MaskedOp _.FRC:$src2,
6815                    (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0))),
6816                    (_.ScalarLdFrag addr:$src3)),
6817                (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0)))))))),
6818              (!cast<I>(Prefix#"213"#Suffix#"Zm_Intk")
6819               VR128X:$src1, VK1WM:$mask,
6820               (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)), addr:$src3)>;
6821
6822    def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector
6823               (X86selects_mask VK1WM:$mask,
6824                (MaskedOp (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0))),
6825                          (_.ScalarLdFrag addr:$src3), _.FRC:$src2),
6826                (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0)))))))),
6827              (!cast<I>(Prefix#"132"#Suffix#"Zm_Intk")
6828               VR128X:$src1, VK1WM:$mask,
6829               (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)), addr:$src3)>;
6830
6831    def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector
6832               (X86selects_mask VK1WM:$mask,
6833                (MaskedOp _.FRC:$src2, _.FRC:$src3,
6834                          (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0)))),
6835                (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0)))))))),
6836              (!cast<I>(Prefix#"231"#Suffix#"Zr_Intk")
6837               VR128X:$src1, VK1WM:$mask,
6838               (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)),
6839               (_.VT (COPY_TO_REGCLASS _.FRC:$src3, VR128X)))>;
6840
6841    def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector
6842               (X86selects_mask VK1WM:$mask,
6843                (MaskedOp _.FRC:$src2, (_.ScalarLdFrag addr:$src3),
6844                          (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0)))),
6845                (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0)))))))),
6846              (!cast<I>(Prefix#"231"#Suffix#"Zm_Intk")
6847               VR128X:$src1, VK1WM:$mask,
6848               (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)), addr:$src3)>;
6849
6850    def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector
6851               (X86selects_mask VK1WM:$mask,
6852                (MaskedOp _.FRC:$src2,
6853                          (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0))),
6854                          _.FRC:$src3),
6855                (_.EltVT ZeroFP)))))),
6856              (!cast<I>(Prefix#"213"#Suffix#"Zr_Intkz")
6857               VR128X:$src1, VK1WM:$mask,
6858               (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)),
6859               (_.VT (COPY_TO_REGCLASS _.FRC:$src3, VR128X)))>;
6860
6861    def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector
6862               (X86selects_mask VK1WM:$mask,
6863                (MaskedOp _.FRC:$src2, _.FRC:$src3,
6864                          (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0)))),
6865                (_.EltVT ZeroFP)))))),
6866              (!cast<I>(Prefix#"231"#Suffix#"Zr_Intkz")
6867               VR128X:$src1, VK1WM:$mask,
6868               (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)),
6869               (_.VT (COPY_TO_REGCLASS _.FRC:$src3, VR128X)))>;
6870
6871    def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector
6872               (X86selects_mask VK1WM:$mask,
6873                (MaskedOp _.FRC:$src2,
6874                          (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0))),
6875                          (_.ScalarLdFrag addr:$src3)),
6876                (_.EltVT ZeroFP)))))),
6877              (!cast<I>(Prefix#"213"#Suffix#"Zm_Intkz")
6878               VR128X:$src1, VK1WM:$mask,
6879               (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)), addr:$src3)>;
6880
6881    def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector
6882               (X86selects_mask VK1WM:$mask,
6883                (MaskedOp (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0))),
6884                          _.FRC:$src2, (_.ScalarLdFrag addr:$src3)),
6885                (_.EltVT ZeroFP)))))),
6886              (!cast<I>(Prefix#"132"#Suffix#"Zm_Intkz")
6887               VR128X:$src1, VK1WM:$mask,
6888               (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)), addr:$src3)>;
6889
6890    def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector
6891               (X86selects_mask VK1WM:$mask,
6892                (MaskedOp _.FRC:$src2, (_.ScalarLdFrag addr:$src3),
6893                          (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0)))),
6894                (_.EltVT ZeroFP)))))),
6895              (!cast<I>(Prefix#"231"#Suffix#"Zm_Intkz")
6896               VR128X:$src1, VK1WM:$mask,
6897               (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)), addr:$src3)>;
6898
6899    // Patterns with rounding mode.
6900    def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector
6901                (RndOp _.FRC:$src2,
6902                       (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0))),
6903                       _.FRC:$src3, (i32 timm:$rc)))))),
6904              (!cast<I>(Prefix#"213"#Suffix#"Zrb_Int")
6905               VR128X:$src1, (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)),
6906               (_.VT (COPY_TO_REGCLASS _.FRC:$src3, VR128X)), AVX512RC:$rc)>;
6907
6908    def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector
6909                (RndOp _.FRC:$src2, _.FRC:$src3,
6910                       (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0))),
6911                       (i32 timm:$rc)))))),
6912              (!cast<I>(Prefix#"231"#Suffix#"Zrb_Int")
6913               VR128X:$src1, (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)),
6914               (_.VT (COPY_TO_REGCLASS _.FRC:$src3, VR128X)), AVX512RC:$rc)>;
6915
6916    def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector
6917               (X86selects_mask VK1WM:$mask,
6918                (RndOp _.FRC:$src2,
6919                       (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0))),
6920                       _.FRC:$src3, (i32 timm:$rc)),
6921                (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0)))))))),
6922              (!cast<I>(Prefix#"213"#Suffix#"Zrb_Intk")
6923               VR128X:$src1, VK1WM:$mask,
6924               (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)),
6925               (_.VT (COPY_TO_REGCLASS _.FRC:$src3, VR128X)), AVX512RC:$rc)>;
6926
6927    def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector
6928               (X86selects_mask VK1WM:$mask,
6929                (RndOp _.FRC:$src2, _.FRC:$src3,
6930                       (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0))),
6931                       (i32 timm:$rc)),
6932                (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0)))))))),
6933              (!cast<I>(Prefix#"231"#Suffix#"Zrb_Intk")
6934               VR128X:$src1, VK1WM:$mask,
6935               (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)),
6936               (_.VT (COPY_TO_REGCLASS _.FRC:$src3, VR128X)), AVX512RC:$rc)>;
6937
6938    def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector
6939               (X86selects_mask VK1WM:$mask,
6940                (RndOp _.FRC:$src2,
6941                       (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0))),
6942                       _.FRC:$src3, (i32 timm:$rc)),
6943                (_.EltVT ZeroFP)))))),
6944              (!cast<I>(Prefix#"213"#Suffix#"Zrb_Intkz")
6945               VR128X:$src1, VK1WM:$mask,
6946               (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)),
6947               (_.VT (COPY_TO_REGCLASS _.FRC:$src3, VR128X)), AVX512RC:$rc)>;
6948
6949    def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector
6950               (X86selects_mask VK1WM:$mask,
6951                (RndOp _.FRC:$src2, _.FRC:$src3,
6952                       (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0))),
6953                       (i32 timm:$rc)),
6954                (_.EltVT ZeroFP)))))),
6955              (!cast<I>(Prefix#"231"#Suffix#"Zrb_Intkz")
6956               VR128X:$src1, VK1WM:$mask,
6957               (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)),
6958               (_.VT (COPY_TO_REGCLASS _.FRC:$src3, VR128X)), AVX512RC:$rc)>;
6959  }
6960}
6961
6962defm : avx512_scalar_fma_patterns<any_fma, fma, X86FmaddRnd, "VFMADD",
6963                                  "SS", X86Movss, v4f32x_info, fp32imm0>;
6964defm : avx512_scalar_fma_patterns<X86any_Fmsub, X86Fmsub, X86FmsubRnd, "VFMSUB",
6965                                  "SS", X86Movss, v4f32x_info, fp32imm0>;
6966defm : avx512_scalar_fma_patterns<X86any_Fnmadd, X86Fnmadd, X86FnmaddRnd, "VFNMADD",
6967                                  "SS", X86Movss, v4f32x_info, fp32imm0>;
6968defm : avx512_scalar_fma_patterns<X86any_Fnmsub, X86Fnmsub, X86FnmsubRnd, "VFNMSUB",
6969                                  "SS", X86Movss, v4f32x_info, fp32imm0>;
6970
6971defm : avx512_scalar_fma_patterns<any_fma, fma, X86FmaddRnd, "VFMADD",
6972                                  "SD", X86Movsd, v2f64x_info, fp64imm0>;
6973defm : avx512_scalar_fma_patterns<X86any_Fmsub, X86Fmsub, X86FmsubRnd, "VFMSUB",
6974                                  "SD", X86Movsd, v2f64x_info, fp64imm0>;
6975defm : avx512_scalar_fma_patterns<X86any_Fnmadd, X86Fnmadd, X86FnmaddRnd, "VFNMADD",
6976                                  "SD", X86Movsd, v2f64x_info, fp64imm0>;
6977defm : avx512_scalar_fma_patterns<X86any_Fnmsub, X86Fnmsub, X86FnmsubRnd, "VFNMSUB",
6978                                  "SD", X86Movsd, v2f64x_info, fp64imm0>;
6979
6980//===----------------------------------------------------------------------===//
6981// AVX-512  Packed Multiply of Unsigned 52-bit Integers and Add the Low 52-bit IFMA
6982//===----------------------------------------------------------------------===//
6983let Constraints = "$src1 = $dst" in {
6984multiclass avx512_pmadd52_rm<bits<8> opc, string OpcodeStr, SDNode OpNode,
6985                             X86FoldableSchedWrite sched, X86VectorVTInfo _> {
6986  // NOTE: The SDNode have the multiply operands first with the add last.
6987  // This enables commuted load patterns to be autogenerated by tablegen.
6988  let ExeDomain = _.ExeDomain in {
6989  defm r: AVX512_maskable_3src<opc, MRMSrcReg, _, (outs _.RC:$dst),
6990          (ins _.RC:$src2, _.RC:$src3),
6991          OpcodeStr, "$src3, $src2", "$src2, $src3",
6992          (_.VT (OpNode _.RC:$src2, _.RC:$src3, _.RC:$src1)), 1, 1>,
6993         AVX512FMA3Base, Sched<[sched]>;
6994
6995  defm m: AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst),
6996          (ins _.RC:$src2, _.MemOp:$src3),
6997          OpcodeStr, "$src3, $src2", "$src2, $src3",
6998          (_.VT (OpNode _.RC:$src2, (_.LdFrag addr:$src3), _.RC:$src1))>,
6999          AVX512FMA3Base, Sched<[sched.Folded, sched.ReadAfterFold]>;
7000
7001  defm mb: AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst),
7002            (ins _.RC:$src2, _.ScalarMemOp:$src3),
7003            OpcodeStr,   !strconcat("${src3}", _.BroadcastStr,", $src2"),
7004            !strconcat("$src2, ${src3}", _.BroadcastStr ),
7005            (OpNode _.RC:$src2,
7006                    (_.VT (_.BroadcastLdFrag addr:$src3)),
7007                    _.RC:$src1)>,
7008            AVX512FMA3Base, EVEX_B, Sched<[sched.Folded, sched.ReadAfterFold]>;
7009  }
7010}
7011} // Constraints = "$src1 = $dst"
7012
7013multiclass avx512_pmadd52_common<bits<8> opc, string OpcodeStr, SDNode OpNode,
7014                                 X86SchedWriteWidths sched, AVX512VLVectorVTInfo _> {
7015  let Predicates = [HasIFMA] in {
7016    defm Z      : avx512_pmadd52_rm<opc, OpcodeStr, OpNode, sched.ZMM, _.info512>,
7017                      EVEX_V512, EVEX_CD8<_.info512.EltSize, CD8VF>;
7018  }
7019  let Predicates = [HasVLX, HasIFMA] in {
7020    defm Z256 : avx512_pmadd52_rm<opc, OpcodeStr, OpNode, sched.YMM, _.info256>,
7021                      EVEX_V256, EVEX_CD8<_.info256.EltSize, CD8VF>;
7022    defm Z128 : avx512_pmadd52_rm<opc, OpcodeStr, OpNode, sched.XMM, _.info128>,
7023                      EVEX_V128, EVEX_CD8<_.info128.EltSize, CD8VF>;
7024  }
7025}
7026
7027defm VPMADD52LUQ : avx512_pmadd52_common<0xb4, "vpmadd52luq", x86vpmadd52l,
7028                                         SchedWriteVecIMul, avx512vl_i64_info>,
7029                                         VEX_W;
7030defm VPMADD52HUQ : avx512_pmadd52_common<0xb5, "vpmadd52huq", x86vpmadd52h,
7031                                         SchedWriteVecIMul, avx512vl_i64_info>,
7032                                         VEX_W;
7033
7034//===----------------------------------------------------------------------===//
7035// AVX-512  Scalar convert from sign integer to float/double
7036//===----------------------------------------------------------------------===//
7037
7038multiclass avx512_vcvtsi<bits<8> opc, SDPatternOperator OpNode, X86FoldableSchedWrite sched,
7039                    RegisterClass SrcRC, X86VectorVTInfo DstVT,
7040                    X86MemOperand x86memop, PatFrag ld_frag, string asm,
7041                    string mem, list<Register> _Uses = [MXCSR],
7042                    bit _mayRaiseFPException = 1> {
7043let ExeDomain = DstVT.ExeDomain, Uses = _Uses,
7044    mayRaiseFPException = _mayRaiseFPException in {
7045  let hasSideEffects = 0, isCodeGenOnly = 1 in {
7046    def rr : SI<opc, MRMSrcReg, (outs DstVT.FRC:$dst),
7047              (ins DstVT.FRC:$src1, SrcRC:$src),
7048              !strconcat(asm,"\t{$src, $src1, $dst|$dst, $src1, $src}"), []>,
7049              EVEX_4V, Sched<[sched, ReadDefault, ReadInt2Fpu]>;
7050    let mayLoad = 1 in
7051      def rm : SI<opc, MRMSrcMem, (outs DstVT.FRC:$dst),
7052              (ins DstVT.FRC:$src1, x86memop:$src),
7053              asm#"{"#mem#"}\t{$src, $src1, $dst|$dst, $src1, $src}", []>,
7054              EVEX_4V, Sched<[sched.Folded, sched.ReadAfterFold]>;
7055  } // hasSideEffects = 0
7056  def rr_Int : SI<opc, MRMSrcReg, (outs DstVT.RC:$dst),
7057                (ins DstVT.RC:$src1, SrcRC:$src2),
7058                !strconcat(asm,"\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
7059                [(set DstVT.RC:$dst,
7060                      (OpNode (DstVT.VT DstVT.RC:$src1), SrcRC:$src2))]>,
7061               EVEX_4V, Sched<[sched, ReadDefault, ReadInt2Fpu]>;
7062
7063  def rm_Int : SI<opc, MRMSrcMem, (outs DstVT.RC:$dst),
7064                (ins DstVT.RC:$src1, x86memop:$src2),
7065                asm#"{"#mem#"}\t{$src2, $src1, $dst|$dst, $src1, $src2}",
7066                [(set DstVT.RC:$dst,
7067                      (OpNode (DstVT.VT DstVT.RC:$src1),
7068                               (ld_frag addr:$src2)))]>,
7069                EVEX_4V, Sched<[sched.Folded, sched.ReadAfterFold]>;
7070}
7071  def : InstAlias<"v"#asm#mem#"\t{$src2, $src1, $dst|$dst, $src1, $src2}",
7072                  (!cast<Instruction>(NAME#"rr_Int") DstVT.RC:$dst,
7073                  DstVT.RC:$src1, SrcRC:$src2), 0, "att">;
7074}
7075
7076multiclass avx512_vcvtsi_round<bits<8> opc, SDNode OpNode,
7077                               X86FoldableSchedWrite sched, RegisterClass SrcRC,
7078                               X86VectorVTInfo DstVT, string asm,
7079                               string mem> {
7080  let ExeDomain = DstVT.ExeDomain, Uses = [MXCSR] in
7081  def rrb_Int : SI<opc, MRMSrcReg, (outs DstVT.RC:$dst),
7082              (ins DstVT.RC:$src1, SrcRC:$src2, AVX512RC:$rc),
7083              !strconcat(asm,
7084                  "\t{$src2, $rc, $src1, $dst|$dst, $src1, $rc, $src2}"),
7085              [(set DstVT.RC:$dst,
7086                    (OpNode (DstVT.VT DstVT.RC:$src1),
7087                             SrcRC:$src2,
7088                             (i32 timm:$rc)))]>,
7089              EVEX_4V, EVEX_B, EVEX_RC, Sched<[sched, ReadDefault, ReadInt2Fpu]>;
7090  def : InstAlias<"v"#asm#mem#"\t{$src2, $rc, $src1, $dst|$dst, $src1, $rc, $src2}",
7091                  (!cast<Instruction>(NAME#"rrb_Int") DstVT.RC:$dst,
7092                  DstVT.RC:$src1, SrcRC:$src2, AVX512RC:$rc), 0, "att">;
7093}
7094
7095multiclass avx512_vcvtsi_common<bits<8> opc, SDNode OpNode, SDNode OpNodeRnd,
7096                                X86FoldableSchedWrite sched,
7097                                RegisterClass SrcRC, X86VectorVTInfo DstVT,
7098                                X86MemOperand x86memop, PatFrag ld_frag,
7099                                string asm, string mem> {
7100  defm NAME : avx512_vcvtsi_round<opc, OpNodeRnd, sched, SrcRC, DstVT, asm, mem>,
7101              avx512_vcvtsi<opc, OpNode, sched, SrcRC, DstVT, x86memop,
7102                            ld_frag, asm, mem>, VEX_LIG;
7103}
7104
7105let Predicates = [HasAVX512] in {
7106defm VCVTSI2SSZ  : avx512_vcvtsi_common<0x2A, X86SintToFp, X86SintToFpRnd,
7107                                 WriteCvtI2SS, GR32,
7108                                 v4f32x_info, i32mem, loadi32, "cvtsi2ss", "l">,
7109                                 XS, EVEX_CD8<32, CD8VT1>;
7110defm VCVTSI642SSZ: avx512_vcvtsi_common<0x2A, X86SintToFp, X86SintToFpRnd,
7111                                 WriteCvtI2SS, GR64,
7112                                 v4f32x_info, i64mem, loadi64, "cvtsi2ss", "q">,
7113                                 XS, VEX_W, EVEX_CD8<64, CD8VT1>;
7114defm VCVTSI2SDZ  : avx512_vcvtsi<0x2A, null_frag, WriteCvtI2SD, GR32,
7115                                 v2f64x_info, i32mem, loadi32, "cvtsi2sd", "l", [], 0>,
7116                                 XD, VEX_LIG, EVEX_CD8<32, CD8VT1>;
7117defm VCVTSI642SDZ: avx512_vcvtsi_common<0x2A, X86SintToFp, X86SintToFpRnd,
7118                                 WriteCvtI2SD, GR64,
7119                                 v2f64x_info, i64mem, loadi64, "cvtsi2sd", "q">,
7120                                 XD, VEX_W, EVEX_CD8<64, CD8VT1>;
7121
7122def : InstAlias<"vcvtsi2ss\t{$src, $src1, $dst|$dst, $src1, $src}",
7123              (VCVTSI2SSZrm_Int VR128X:$dst, VR128X:$src1, i32mem:$src), 0, "att">;
7124def : InstAlias<"vcvtsi2sd\t{$src, $src1, $dst|$dst, $src1, $src}",
7125              (VCVTSI2SDZrm_Int VR128X:$dst, VR128X:$src1, i32mem:$src), 0, "att">;
7126
7127def : Pat<(f32 (any_sint_to_fp (loadi32 addr:$src))),
7128          (VCVTSI2SSZrm (f32 (IMPLICIT_DEF)), addr:$src)>;
7129def : Pat<(f32 (any_sint_to_fp (loadi64 addr:$src))),
7130          (VCVTSI642SSZrm (f32 (IMPLICIT_DEF)), addr:$src)>;
7131def : Pat<(f64 (any_sint_to_fp (loadi32 addr:$src))),
7132          (VCVTSI2SDZrm (f64 (IMPLICIT_DEF)), addr:$src)>;
7133def : Pat<(f64 (any_sint_to_fp (loadi64 addr:$src))),
7134          (VCVTSI642SDZrm (f64 (IMPLICIT_DEF)), addr:$src)>;
7135
7136def : Pat<(f32 (any_sint_to_fp GR32:$src)),
7137          (VCVTSI2SSZrr (f32 (IMPLICIT_DEF)), GR32:$src)>;
7138def : Pat<(f32 (any_sint_to_fp GR64:$src)),
7139          (VCVTSI642SSZrr (f32 (IMPLICIT_DEF)), GR64:$src)>;
7140def : Pat<(f64 (any_sint_to_fp GR32:$src)),
7141          (VCVTSI2SDZrr (f64 (IMPLICIT_DEF)), GR32:$src)>;
7142def : Pat<(f64 (any_sint_to_fp GR64:$src)),
7143          (VCVTSI642SDZrr (f64 (IMPLICIT_DEF)), GR64:$src)>;
7144
7145defm VCVTUSI2SSZ   : avx512_vcvtsi_common<0x7B, X86UintToFp, X86UintToFpRnd,
7146                                  WriteCvtI2SS, GR32,
7147                                  v4f32x_info, i32mem, loadi32,
7148                                  "cvtusi2ss", "l">, XS, EVEX_CD8<32, CD8VT1>;
7149defm VCVTUSI642SSZ : avx512_vcvtsi_common<0x7B, X86UintToFp, X86UintToFpRnd,
7150                                  WriteCvtI2SS, GR64,
7151                                  v4f32x_info, i64mem, loadi64, "cvtusi2ss", "q">,
7152                                  XS, VEX_W, EVEX_CD8<64, CD8VT1>;
7153defm VCVTUSI2SDZ   : avx512_vcvtsi<0x7B, null_frag, WriteCvtI2SD, GR32, v2f64x_info,
7154                                  i32mem, loadi32, "cvtusi2sd", "l", [], 0>,
7155                                  XD, VEX_LIG, EVEX_CD8<32, CD8VT1>;
7156defm VCVTUSI642SDZ : avx512_vcvtsi_common<0x7B, X86UintToFp, X86UintToFpRnd,
7157                                  WriteCvtI2SD, GR64,
7158                                  v2f64x_info, i64mem, loadi64, "cvtusi2sd", "q">,
7159                                  XD, VEX_W, EVEX_CD8<64, CD8VT1>;
7160
7161def : InstAlias<"vcvtusi2ss\t{$src, $src1, $dst|$dst, $src1, $src}",
7162              (VCVTUSI2SSZrm_Int VR128X:$dst, VR128X:$src1, i32mem:$src), 0, "att">;
7163def : InstAlias<"vcvtusi2sd\t{$src, $src1, $dst|$dst, $src1, $src}",
7164              (VCVTUSI2SDZrm_Int VR128X:$dst, VR128X:$src1, i32mem:$src), 0, "att">;
7165
7166def : Pat<(f32 (any_uint_to_fp (loadi32 addr:$src))),
7167          (VCVTUSI2SSZrm (f32 (IMPLICIT_DEF)), addr:$src)>;
7168def : Pat<(f32 (any_uint_to_fp (loadi64 addr:$src))),
7169          (VCVTUSI642SSZrm (f32 (IMPLICIT_DEF)), addr:$src)>;
7170def : Pat<(f64 (any_uint_to_fp (loadi32 addr:$src))),
7171          (VCVTUSI2SDZrm (f64 (IMPLICIT_DEF)), addr:$src)>;
7172def : Pat<(f64 (any_uint_to_fp (loadi64 addr:$src))),
7173          (VCVTUSI642SDZrm (f64 (IMPLICIT_DEF)), addr:$src)>;
7174
7175def : Pat<(f32 (any_uint_to_fp GR32:$src)),
7176          (VCVTUSI2SSZrr (f32 (IMPLICIT_DEF)), GR32:$src)>;
7177def : Pat<(f32 (any_uint_to_fp GR64:$src)),
7178          (VCVTUSI642SSZrr (f32 (IMPLICIT_DEF)), GR64:$src)>;
7179def : Pat<(f64 (any_uint_to_fp GR32:$src)),
7180          (VCVTUSI2SDZrr (f64 (IMPLICIT_DEF)), GR32:$src)>;
7181def : Pat<(f64 (any_uint_to_fp GR64:$src)),
7182          (VCVTUSI642SDZrr (f64 (IMPLICIT_DEF)), GR64:$src)>;
7183}
7184
7185//===----------------------------------------------------------------------===//
7186// AVX-512  Scalar convert from float/double to integer
7187//===----------------------------------------------------------------------===//
7188
7189multiclass avx512_cvt_s_int_round<bits<8> opc, X86VectorVTInfo SrcVT,
7190                                  X86VectorVTInfo DstVT, SDNode OpNode,
7191                                  SDNode OpNodeRnd,
7192                                  X86FoldableSchedWrite sched, string asm,
7193                                  string aliasStr> {
7194  let Predicates = [HasAVX512], ExeDomain = SrcVT.ExeDomain in {
7195    def rr_Int : SI<opc, MRMSrcReg, (outs DstVT.RC:$dst), (ins SrcVT.RC:$src),
7196                !strconcat(asm,"\t{$src, $dst|$dst, $src}"),
7197                [(set DstVT.RC:$dst, (OpNode (SrcVT.VT SrcVT.RC:$src)))]>,
7198                EVEX, VEX_LIG, Sched<[sched]>, SIMD_EXC;
7199    let Uses = [MXCSR] in
7200    def rrb_Int : SI<opc, MRMSrcReg, (outs DstVT.RC:$dst), (ins SrcVT.RC:$src, AVX512RC:$rc),
7201                 !strconcat(asm,"\t{$rc, $src, $dst|$dst, $src, $rc}"),
7202                 [(set DstVT.RC:$dst, (OpNodeRnd (SrcVT.VT SrcVT.RC:$src),(i32 timm:$rc)))]>,
7203                 EVEX, VEX_LIG, EVEX_B, EVEX_RC,
7204                 Sched<[sched]>;
7205    def rm_Int : SI<opc, MRMSrcMem, (outs DstVT.RC:$dst), (ins SrcVT.IntScalarMemOp:$src),
7206                !strconcat(asm,"\t{$src, $dst|$dst, $src}"),
7207                [(set DstVT.RC:$dst, (OpNode
7208                      (SrcVT.ScalarIntMemFrags addr:$src)))]>,
7209                EVEX, VEX_LIG, Sched<[sched.Folded, sched.ReadAfterFold]>, SIMD_EXC;
7210  } // Predicates = [HasAVX512]
7211
7212  def : InstAlias<"v" # asm # aliasStr # "\t{$src, $dst|$dst, $src}",
7213          (!cast<Instruction>(NAME # "rr_Int") DstVT.RC:$dst, SrcVT.RC:$src), 0, "att">;
7214  def : InstAlias<"v" # asm # aliasStr # "\t{$rc, $src, $dst|$dst, $src, $rc}",
7215          (!cast<Instruction>(NAME # "rrb_Int") DstVT.RC:$dst, SrcVT.RC:$src, AVX512RC:$rc), 0, "att">;
7216  def : InstAlias<"v" # asm # aliasStr # "\t{$src, $dst|$dst, $src}",
7217          (!cast<Instruction>(NAME # "rm_Int") DstVT.RC:$dst,
7218                                          SrcVT.IntScalarMemOp:$src), 0, "att">;
7219}
7220
7221// Convert float/double to signed/unsigned int 32/64
7222defm VCVTSS2SIZ: avx512_cvt_s_int_round<0x2D, f32x_info, i32x_info,X86cvts2si,
7223                                   X86cvts2siRnd, WriteCvtSS2I, "cvtss2si", "{l}">,
7224                                   XS, EVEX_CD8<32, CD8VT1>;
7225defm VCVTSS2SI64Z: avx512_cvt_s_int_round<0x2D, f32x_info, i64x_info, X86cvts2si,
7226                                   X86cvts2siRnd, WriteCvtSS2I, "cvtss2si", "{q}">,
7227                                   XS, VEX_W, EVEX_CD8<32, CD8VT1>;
7228defm VCVTSS2USIZ: avx512_cvt_s_int_round<0x79, f32x_info, i32x_info, X86cvts2usi,
7229                                   X86cvts2usiRnd, WriteCvtSS2I, "cvtss2usi", "{l}">,
7230                                   XS, EVEX_CD8<32, CD8VT1>;
7231defm VCVTSS2USI64Z: avx512_cvt_s_int_round<0x79, f32x_info, i64x_info, X86cvts2usi,
7232                                   X86cvts2usiRnd, WriteCvtSS2I, "cvtss2usi", "{q}">,
7233                                   XS, VEX_W, EVEX_CD8<32, CD8VT1>;
7234defm VCVTSD2SIZ: avx512_cvt_s_int_round<0x2D, f64x_info, i32x_info, X86cvts2si,
7235                                   X86cvts2siRnd, WriteCvtSD2I, "cvtsd2si", "{l}">,
7236                                   XD, EVEX_CD8<64, CD8VT1>;
7237defm VCVTSD2SI64Z: avx512_cvt_s_int_round<0x2D, f64x_info, i64x_info, X86cvts2si,
7238                                   X86cvts2siRnd, WriteCvtSD2I, "cvtsd2si", "{q}">,
7239                                   XD, VEX_W, EVEX_CD8<64, CD8VT1>;
7240defm VCVTSD2USIZ:   avx512_cvt_s_int_round<0x79, f64x_info, i32x_info, X86cvts2usi,
7241                                   X86cvts2usiRnd, WriteCvtSD2I, "cvtsd2usi", "{l}">,
7242                                   XD, EVEX_CD8<64, CD8VT1>;
7243defm VCVTSD2USI64Z: avx512_cvt_s_int_round<0x79, f64x_info, i64x_info, X86cvts2usi,
7244                                   X86cvts2usiRnd, WriteCvtSD2I, "cvtsd2usi", "{q}">,
7245                                   XD, VEX_W, EVEX_CD8<64, CD8VT1>;
7246
7247multiclass avx512_cvt_s<bits<8> opc, string asm, X86VectorVTInfo SrcVT,
7248                        X86VectorVTInfo DstVT, SDNode OpNode,
7249                        X86FoldableSchedWrite sched,
7250                        string aliasStr> {
7251  let Predicates = [HasAVX512], ExeDomain = SrcVT.ExeDomain in {
7252    let isCodeGenOnly = 1 in {
7253    def rr : AVX512<opc, MRMSrcReg, (outs DstVT.RC:$dst), (ins SrcVT.FRC:$src),
7254                !strconcat(asm,"\t{$src, $dst|$dst, $src}"),
7255                [(set DstVT.RC:$dst, (OpNode SrcVT.FRC:$src))]>,
7256                EVEX, VEX_LIG, Sched<[sched]>, SIMD_EXC;
7257    def rm : AVX512<opc, MRMSrcMem, (outs DstVT.RC:$dst), (ins SrcVT.ScalarMemOp:$src),
7258                !strconcat(asm,"\t{$src, $dst|$dst, $src}"),
7259                [(set DstVT.RC:$dst, (OpNode (SrcVT.ScalarLdFrag addr:$src)))]>,
7260                EVEX, VEX_LIG, Sched<[sched.Folded, sched.ReadAfterFold]>, SIMD_EXC;
7261    }
7262  } // Predicates = [HasAVX512]
7263}
7264
7265defm VCVTSS2SIZ: avx512_cvt_s<0x2D, "vcvtss2si", f32x_info, i32x_info,
7266                       lrint, WriteCvtSS2I,
7267                       "{l}">, XS, EVEX_CD8<32, CD8VT1>;
7268defm VCVTSS2SI64Z: avx512_cvt_s<0x2D, "vcvtss2si", f32x_info, i64x_info,
7269                       llrint, WriteCvtSS2I,
7270                       "{q}">, VEX_W, XS, EVEX_CD8<32, CD8VT1>;
7271defm VCVTSD2SIZ: avx512_cvt_s<0x2D, "vcvtsd2si", f64x_info, i32x_info,
7272                       lrint, WriteCvtSD2I,
7273                       "{l}">, XD, EVEX_CD8<64, CD8VT1>;
7274defm VCVTSD2SI64Z: avx512_cvt_s<0x2D, "vcvtsd2si", f64x_info, i64x_info,
7275                       llrint, WriteCvtSD2I,
7276                       "{q}">, VEX_W, XD, EVEX_CD8<64, CD8VT1>;
7277
7278let Predicates = [HasAVX512] in {
7279  def : Pat<(i64 (lrint FR32:$src)), (VCVTSS2SI64Zrr FR32:$src)>;
7280  def : Pat<(i64 (lrint (loadf32 addr:$src))), (VCVTSS2SI64Zrm addr:$src)>;
7281
7282  def : Pat<(i64 (lrint FR64:$src)), (VCVTSD2SI64Zrr FR64:$src)>;
7283  def : Pat<(i64 (lrint (loadf64 addr:$src))), (VCVTSD2SI64Zrm addr:$src)>;
7284}
7285
7286// Patterns used for matching vcvtsi2s{s,d} intrinsic sequences from clang
7287// which produce unnecessary vmovs{s,d} instructions
7288let Predicates = [HasAVX512] in {
7289def : Pat<(v4f32 (X86Movss
7290                   (v4f32 VR128X:$dst),
7291                   (v4f32 (scalar_to_vector (f32 (any_sint_to_fp GR64:$src)))))),
7292          (VCVTSI642SSZrr_Int VR128X:$dst, GR64:$src)>;
7293
7294def : Pat<(v4f32 (X86Movss
7295                   (v4f32 VR128X:$dst),
7296                   (v4f32 (scalar_to_vector (f32 (any_sint_to_fp (loadi64 addr:$src))))))),
7297          (VCVTSI642SSZrm_Int VR128X:$dst, addr:$src)>;
7298
7299def : Pat<(v4f32 (X86Movss
7300                   (v4f32 VR128X:$dst),
7301                   (v4f32 (scalar_to_vector (f32 (any_sint_to_fp GR32:$src)))))),
7302          (VCVTSI2SSZrr_Int VR128X:$dst, GR32:$src)>;
7303
7304def : Pat<(v4f32 (X86Movss
7305                   (v4f32 VR128X:$dst),
7306                   (v4f32 (scalar_to_vector (f32 (any_sint_to_fp (loadi32 addr:$src))))))),
7307          (VCVTSI2SSZrm_Int VR128X:$dst, addr:$src)>;
7308
7309def : Pat<(v2f64 (X86Movsd
7310                   (v2f64 VR128X:$dst),
7311                   (v2f64 (scalar_to_vector (f64 (any_sint_to_fp GR64:$src)))))),
7312          (VCVTSI642SDZrr_Int VR128X:$dst, GR64:$src)>;
7313
7314def : Pat<(v2f64 (X86Movsd
7315                   (v2f64 VR128X:$dst),
7316                   (v2f64 (scalar_to_vector (f64 (any_sint_to_fp (loadi64 addr:$src))))))),
7317          (VCVTSI642SDZrm_Int VR128X:$dst, addr:$src)>;
7318
7319def : Pat<(v2f64 (X86Movsd
7320                   (v2f64 VR128X:$dst),
7321                   (v2f64 (scalar_to_vector (f64 (any_sint_to_fp GR32:$src)))))),
7322          (VCVTSI2SDZrr_Int VR128X:$dst, GR32:$src)>;
7323
7324def : Pat<(v2f64 (X86Movsd
7325                   (v2f64 VR128X:$dst),
7326                   (v2f64 (scalar_to_vector (f64 (any_sint_to_fp (loadi32 addr:$src))))))),
7327          (VCVTSI2SDZrm_Int VR128X:$dst, addr:$src)>;
7328
7329def : Pat<(v4f32 (X86Movss
7330                   (v4f32 VR128X:$dst),
7331                   (v4f32 (scalar_to_vector (f32 (any_uint_to_fp GR64:$src)))))),
7332          (VCVTUSI642SSZrr_Int VR128X:$dst, GR64:$src)>;
7333
7334def : Pat<(v4f32 (X86Movss
7335                   (v4f32 VR128X:$dst),
7336                   (v4f32 (scalar_to_vector (f32 (any_uint_to_fp (loadi64 addr:$src))))))),
7337          (VCVTUSI642SSZrm_Int VR128X:$dst, addr:$src)>;
7338
7339def : Pat<(v4f32 (X86Movss
7340                   (v4f32 VR128X:$dst),
7341                   (v4f32 (scalar_to_vector (f32 (any_uint_to_fp GR32:$src)))))),
7342          (VCVTUSI2SSZrr_Int VR128X:$dst, GR32:$src)>;
7343
7344def : Pat<(v4f32 (X86Movss
7345                   (v4f32 VR128X:$dst),
7346                   (v4f32 (scalar_to_vector (f32 (any_uint_to_fp (loadi32 addr:$src))))))),
7347          (VCVTUSI2SSZrm_Int VR128X:$dst, addr:$src)>;
7348
7349def : Pat<(v2f64 (X86Movsd
7350                   (v2f64 VR128X:$dst),
7351                   (v2f64 (scalar_to_vector (f64 (any_uint_to_fp GR64:$src)))))),
7352          (VCVTUSI642SDZrr_Int VR128X:$dst, GR64:$src)>;
7353
7354def : Pat<(v2f64 (X86Movsd
7355                   (v2f64 VR128X:$dst),
7356                   (v2f64 (scalar_to_vector (f64 (any_uint_to_fp (loadi64 addr:$src))))))),
7357          (VCVTUSI642SDZrm_Int VR128X:$dst, addr:$src)>;
7358
7359def : Pat<(v2f64 (X86Movsd
7360                   (v2f64 VR128X:$dst),
7361                   (v2f64 (scalar_to_vector (f64 (any_uint_to_fp GR32:$src)))))),
7362          (VCVTUSI2SDZrr_Int VR128X:$dst, GR32:$src)>;
7363
7364def : Pat<(v2f64 (X86Movsd
7365                   (v2f64 VR128X:$dst),
7366                   (v2f64 (scalar_to_vector (f64 (any_uint_to_fp (loadi32 addr:$src))))))),
7367          (VCVTUSI2SDZrm_Int VR128X:$dst, addr:$src)>;
7368} // Predicates = [HasAVX512]
7369
7370// Convert float/double to signed/unsigned int 32/64 with truncation
7371multiclass avx512_cvt_s_all<bits<8> opc, string asm, X86VectorVTInfo _SrcRC,
7372                            X86VectorVTInfo _DstRC, SDPatternOperator OpNode,
7373                            SDNode OpNodeInt, SDNode OpNodeSAE,
7374                            X86FoldableSchedWrite sched, string aliasStr>{
7375let Predicates = [HasAVX512], ExeDomain = _SrcRC.ExeDomain in {
7376  let isCodeGenOnly = 1 in {
7377  def rr : AVX512<opc, MRMSrcReg, (outs _DstRC.RC:$dst), (ins _SrcRC.FRC:$src),
7378              !strconcat(asm,"\t{$src, $dst|$dst, $src}"),
7379              [(set _DstRC.RC:$dst, (OpNode _SrcRC.FRC:$src))]>,
7380              EVEX, VEX_LIG, Sched<[sched]>, SIMD_EXC;
7381  def rm : AVX512<opc, MRMSrcMem, (outs _DstRC.RC:$dst), (ins _SrcRC.ScalarMemOp:$src),
7382              !strconcat(asm,"\t{$src, $dst|$dst, $src}"),
7383              [(set _DstRC.RC:$dst, (OpNode (_SrcRC.ScalarLdFrag addr:$src)))]>,
7384              EVEX, VEX_LIG, Sched<[sched.Folded, sched.ReadAfterFold]>, SIMD_EXC;
7385  }
7386
7387  def rr_Int : AVX512<opc, MRMSrcReg, (outs _DstRC.RC:$dst), (ins _SrcRC.RC:$src),
7388            !strconcat(asm,"\t{$src, $dst|$dst, $src}"),
7389           [(set _DstRC.RC:$dst, (OpNodeInt (_SrcRC.VT _SrcRC.RC:$src)))]>,
7390           EVEX, VEX_LIG, Sched<[sched]>, SIMD_EXC;
7391  let Uses = [MXCSR] in
7392  def rrb_Int : AVX512<opc, MRMSrcReg, (outs _DstRC.RC:$dst), (ins _SrcRC.RC:$src),
7393            !strconcat(asm,"\t{{sae}, $src, $dst|$dst, $src, {sae}}"),
7394            [(set _DstRC.RC:$dst, (OpNodeSAE (_SrcRC.VT _SrcRC.RC:$src)))]>,
7395                                  EVEX, VEX_LIG, EVEX_B, Sched<[sched]>;
7396  def rm_Int : AVX512<opc, MRMSrcMem, (outs _DstRC.RC:$dst),
7397              (ins _SrcRC.IntScalarMemOp:$src),
7398              !strconcat(asm,"\t{$src, $dst|$dst, $src}"),
7399              [(set _DstRC.RC:$dst,
7400                (OpNodeInt (_SrcRC.ScalarIntMemFrags addr:$src)))]>,
7401              EVEX, VEX_LIG, Sched<[sched.Folded, sched.ReadAfterFold]>, SIMD_EXC;
7402} //HasAVX512
7403
7404  def : InstAlias<asm # aliasStr # "\t{$src, $dst|$dst, $src}",
7405          (!cast<Instruction>(NAME # "rr_Int") _DstRC.RC:$dst, _SrcRC.RC:$src), 0, "att">;
7406  def : InstAlias<asm # aliasStr # "\t{{sae}, $src, $dst|$dst, $src, {sae}}",
7407          (!cast<Instruction>(NAME # "rrb_Int") _DstRC.RC:$dst, _SrcRC.RC:$src), 0, "att">;
7408  def : InstAlias<asm # aliasStr # "\t{$src, $dst|$dst, $src}",
7409          (!cast<Instruction>(NAME # "rm_Int") _DstRC.RC:$dst,
7410                                          _SrcRC.IntScalarMemOp:$src), 0, "att">;
7411}
7412
7413defm VCVTTSS2SIZ: avx512_cvt_s_all<0x2C, "vcvttss2si", f32x_info, i32x_info,
7414                        any_fp_to_sint, X86cvtts2Int, X86cvtts2IntSAE, WriteCvtSS2I,
7415                        "{l}">, XS, EVEX_CD8<32, CD8VT1>;
7416defm VCVTTSS2SI64Z: avx512_cvt_s_all<0x2C, "vcvttss2si", f32x_info, i64x_info,
7417                        any_fp_to_sint, X86cvtts2Int, X86cvtts2IntSAE, WriteCvtSS2I,
7418                        "{q}">, VEX_W, XS, EVEX_CD8<32, CD8VT1>;
7419defm VCVTTSD2SIZ: avx512_cvt_s_all<0x2C, "vcvttsd2si", f64x_info, i32x_info,
7420                        any_fp_to_sint, X86cvtts2Int, X86cvtts2IntSAE, WriteCvtSD2I,
7421                        "{l}">, XD, EVEX_CD8<64, CD8VT1>;
7422defm VCVTTSD2SI64Z: avx512_cvt_s_all<0x2C, "vcvttsd2si", f64x_info, i64x_info,
7423                        any_fp_to_sint, X86cvtts2Int, X86cvtts2IntSAE, WriteCvtSD2I,
7424                        "{q}">, VEX_W, XD, EVEX_CD8<64, CD8VT1>;
7425
7426defm VCVTTSS2USIZ: avx512_cvt_s_all<0x78, "vcvttss2usi", f32x_info, i32x_info,
7427                        any_fp_to_uint, X86cvtts2UInt, X86cvtts2UIntSAE, WriteCvtSS2I,
7428                        "{l}">, XS, EVEX_CD8<32, CD8VT1>;
7429defm VCVTTSS2USI64Z: avx512_cvt_s_all<0x78, "vcvttss2usi", f32x_info, i64x_info,
7430                        any_fp_to_uint, X86cvtts2UInt, X86cvtts2UIntSAE, WriteCvtSS2I,
7431                        "{q}">, XS,VEX_W, EVEX_CD8<32, CD8VT1>;
7432defm VCVTTSD2USIZ: avx512_cvt_s_all<0x78, "vcvttsd2usi", f64x_info, i32x_info,
7433                        any_fp_to_uint, X86cvtts2UInt, X86cvtts2UIntSAE, WriteCvtSD2I,
7434                        "{l}">, XD, EVEX_CD8<64, CD8VT1>;
7435defm VCVTTSD2USI64Z: avx512_cvt_s_all<0x78, "vcvttsd2usi", f64x_info, i64x_info,
7436                        any_fp_to_uint, X86cvtts2UInt, X86cvtts2UIntSAE, WriteCvtSD2I,
7437                        "{q}">, XD, VEX_W, EVEX_CD8<64, CD8VT1>;
7438
7439//===----------------------------------------------------------------------===//
7440// AVX-512  Convert form float to double and back
7441//===----------------------------------------------------------------------===//
7442
7443let Uses = [MXCSR], mayRaiseFPException = 1 in
7444multiclass avx512_cvt_fp_scalar<bits<8> opc, string OpcodeStr, X86VectorVTInfo _,
7445                                X86VectorVTInfo _Src, SDNode OpNode,
7446                                X86FoldableSchedWrite sched> {
7447  defm rr_Int : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
7448                         (ins _.RC:$src1, _Src.RC:$src2), OpcodeStr,
7449                         "$src2, $src1", "$src1, $src2",
7450                         (_.VT (OpNode (_.VT _.RC:$src1),
7451                                       (_Src.VT _Src.RC:$src2)))>,
7452                         EVEX_4V, VEX_LIG, Sched<[sched]>;
7453  defm rm_Int : AVX512_maskable_scalar<opc, MRMSrcMem, _, (outs _.RC:$dst),
7454                         (ins _.RC:$src1, _Src.IntScalarMemOp:$src2), OpcodeStr,
7455                         "$src2, $src1", "$src1, $src2",
7456                         (_.VT (OpNode (_.VT _.RC:$src1),
7457                                  (_Src.ScalarIntMemFrags addr:$src2)))>,
7458                         EVEX_4V, VEX_LIG,
7459                         Sched<[sched.Folded, sched.ReadAfterFold]>;
7460
7461  let isCodeGenOnly = 1, hasSideEffects = 0 in {
7462    def rr : I<opc, MRMSrcReg, (outs _.FRC:$dst),
7463               (ins _.FRC:$src1, _Src.FRC:$src2),
7464               OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}", []>,
7465               EVEX_4V, VEX_LIG, Sched<[sched]>;
7466    let mayLoad = 1 in
7467    def rm : I<opc, MRMSrcMem, (outs _.FRC:$dst),
7468               (ins _.FRC:$src1, _Src.ScalarMemOp:$src2),
7469               OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}", []>,
7470               EVEX_4V, VEX_LIG, Sched<[sched.Folded, sched.ReadAfterFold]>;
7471  }
7472}
7473
7474// Scalar Conversion with SAE - suppress all exceptions
7475multiclass avx512_cvt_fp_sae_scalar<bits<8> opc, string OpcodeStr, X86VectorVTInfo _,
7476                                    X86VectorVTInfo _Src, SDNode OpNodeSAE,
7477                                    X86FoldableSchedWrite sched> {
7478  let Uses = [MXCSR] in
7479  defm rrb_Int : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
7480                        (ins _.RC:$src1, _Src.RC:$src2), OpcodeStr,
7481                        "{sae}, $src2, $src1", "$src1, $src2, {sae}",
7482                        (_.VT (OpNodeSAE (_.VT _.RC:$src1),
7483                                         (_Src.VT _Src.RC:$src2)))>,
7484                        EVEX_4V, VEX_LIG, EVEX_B, Sched<[sched]>;
7485}
7486
7487// Scalar Conversion with rounding control (RC)
7488multiclass avx512_cvt_fp_rc_scalar<bits<8> opc, string OpcodeStr, X86VectorVTInfo _,
7489                                   X86VectorVTInfo _Src, SDNode OpNodeRnd,
7490                                   X86FoldableSchedWrite sched> {
7491  let Uses = [MXCSR] in
7492  defm rrb_Int : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
7493                        (ins _.RC:$src1, _Src.RC:$src2, AVX512RC:$rc), OpcodeStr,
7494                        "$rc, $src2, $src1", "$src1, $src2, $rc",
7495                        (_.VT (OpNodeRnd (_.VT _.RC:$src1),
7496                                         (_Src.VT _Src.RC:$src2), (i32 timm:$rc)))>,
7497                        EVEX_4V, VEX_LIG, Sched<[sched]>,
7498                        EVEX_B, EVEX_RC;
7499}
7500multiclass avx512_cvt_fp_scalar_sd2ss<bits<8> opc, string OpcodeStr,
7501                                      SDNode OpNode, SDNode OpNodeRnd,
7502                                      X86FoldableSchedWrite sched,
7503                                      X86VectorVTInfo _src, X86VectorVTInfo _dst> {
7504  let Predicates = [HasAVX512], ExeDomain = SSEPackedSingle in {
7505    defm Z : avx512_cvt_fp_scalar<opc, OpcodeStr, _dst, _src, OpNode, sched>,
7506             avx512_cvt_fp_rc_scalar<opc, OpcodeStr, _dst, _src,
7507                               OpNodeRnd, sched>, VEX_W, EVEX_CD8<64, CD8VT1>, XD;
7508  }
7509}
7510
7511multiclass avx512_cvt_fp_scalar_ss2sd<bits<8> opc, string OpcodeStr,
7512                                      SDNode OpNode, SDNode OpNodeSAE,
7513                                      X86FoldableSchedWrite sched,
7514                                      X86VectorVTInfo _src, X86VectorVTInfo _dst> {
7515  let Predicates = [HasAVX512], ExeDomain = SSEPackedSingle in {
7516    defm Z : avx512_cvt_fp_scalar<opc, OpcodeStr, _dst, _src, OpNode, sched>,
7517             avx512_cvt_fp_sae_scalar<opc, OpcodeStr, _dst, _src, OpNodeSAE, sched>,
7518             EVEX_CD8<32, CD8VT1>, XS;
7519  }
7520}
7521defm VCVTSD2SS : avx512_cvt_fp_scalar_sd2ss<0x5A, "vcvtsd2ss", X86frounds,
7522                                         X86froundsRnd, WriteCvtSD2SS, f64x_info,
7523                                         f32x_info>;
7524defm VCVTSS2SD : avx512_cvt_fp_scalar_ss2sd<0x5A, "vcvtss2sd", X86fpexts,
7525                                          X86fpextsSAE, WriteCvtSS2SD, f32x_info,
7526                                          f64x_info>;
7527
7528def : Pat<(f64 (any_fpextend FR32X:$src)),
7529          (VCVTSS2SDZrr (f64 (IMPLICIT_DEF)), FR32X:$src)>,
7530          Requires<[HasAVX512]>;
7531def : Pat<(f64 (any_fpextend (loadf32 addr:$src))),
7532          (VCVTSS2SDZrm (f64 (IMPLICIT_DEF)), addr:$src)>,
7533          Requires<[HasAVX512, OptForSize]>;
7534
7535def : Pat<(f32 (any_fpround FR64X:$src)),
7536          (VCVTSD2SSZrr (f32 (IMPLICIT_DEF)), FR64X:$src)>,
7537           Requires<[HasAVX512]>;
7538
7539def : Pat<(v4f32 (X86Movss
7540                   (v4f32 VR128X:$dst),
7541                   (v4f32 (scalar_to_vector
7542                     (f32 (any_fpround (f64 (extractelt VR128X:$src, (iPTR 0))))))))),
7543          (VCVTSD2SSZrr_Int VR128X:$dst, VR128X:$src)>,
7544          Requires<[HasAVX512]>;
7545
7546def : Pat<(v2f64 (X86Movsd
7547                   (v2f64 VR128X:$dst),
7548                   (v2f64 (scalar_to_vector
7549                     (f64 (any_fpextend (f32 (extractelt VR128X:$src, (iPTR 0))))))))),
7550          (VCVTSS2SDZrr_Int VR128X:$dst, VR128X:$src)>,
7551          Requires<[HasAVX512]>;
7552
7553//===----------------------------------------------------------------------===//
7554// AVX-512  Vector convert from signed/unsigned integer to float/double
7555//          and from float/double to signed/unsigned integer
7556//===----------------------------------------------------------------------===//
7557
7558multiclass avx512_vcvt_fp<bits<8> opc, string OpcodeStr, X86VectorVTInfo _,
7559                          X86VectorVTInfo _Src, SDPatternOperator OpNode, SDPatternOperator MaskOpNode,
7560                          X86FoldableSchedWrite sched,
7561                          string Broadcast = _.BroadcastStr,
7562                          string Alias = "", X86MemOperand MemOp = _Src.MemOp,
7563                          RegisterClass MaskRC = _.KRCWM,
7564                          dag LdDAG = (_.VT (OpNode (_Src.VT (_Src.LdFrag addr:$src)))),
7565                          dag MaskLdDAG = (_.VT (MaskOpNode (_Src.VT (_Src.LdFrag addr:$src))))> {
7566let Uses = [MXCSR], mayRaiseFPException = 1 in {
7567  defm rr : AVX512_maskable_cvt<opc, MRMSrcReg, _, (outs _.RC:$dst),
7568                         (ins _Src.RC:$src),
7569                         (ins _.RC:$src0, MaskRC:$mask, _Src.RC:$src),
7570                         (ins MaskRC:$mask, _Src.RC:$src),
7571                          OpcodeStr, "$src", "$src",
7572                         (_.VT (OpNode (_Src.VT _Src.RC:$src))),
7573                         (vselect_mask MaskRC:$mask,
7574                                       (_.VT (MaskOpNode (_Src.VT _Src.RC:$src))),
7575                                       _.RC:$src0),
7576                         (vselect_mask MaskRC:$mask,
7577                                       (_.VT (MaskOpNode (_Src.VT _Src.RC:$src))),
7578                                       _.ImmAllZerosV)>,
7579                         EVEX, Sched<[sched]>;
7580
7581  defm rm : AVX512_maskable_cvt<opc, MRMSrcMem, _, (outs _.RC:$dst),
7582                         (ins MemOp:$src),
7583                         (ins _.RC:$src0, MaskRC:$mask, MemOp:$src),
7584                         (ins MaskRC:$mask, MemOp:$src),
7585                         OpcodeStr#Alias, "$src", "$src",
7586                         LdDAG,
7587                         (vselect_mask MaskRC:$mask, MaskLdDAG, _.RC:$src0),
7588                         (vselect_mask MaskRC:$mask, MaskLdDAG, _.ImmAllZerosV)>,
7589                         EVEX, Sched<[sched.Folded]>;
7590
7591  defm rmb : AVX512_maskable_cvt<opc, MRMSrcMem, _, (outs _.RC:$dst),
7592                         (ins _Src.ScalarMemOp:$src),
7593                         (ins _.RC:$src0, MaskRC:$mask, _Src.ScalarMemOp:$src),
7594                         (ins MaskRC:$mask, _Src.ScalarMemOp:$src),
7595                         OpcodeStr,
7596                         "${src}"#Broadcast, "${src}"#Broadcast,
7597                         (_.VT (OpNode (_Src.VT
7598                                  (_Src.BroadcastLdFrag addr:$src))
7599                            )),
7600                         (vselect_mask MaskRC:$mask,
7601                                       (_.VT
7602                                        (MaskOpNode
7603                                         (_Src.VT
7604                                          (_Src.BroadcastLdFrag addr:$src)))),
7605                                       _.RC:$src0),
7606                         (vselect_mask MaskRC:$mask,
7607                                       (_.VT
7608                                        (MaskOpNode
7609                                         (_Src.VT
7610                                          (_Src.BroadcastLdFrag addr:$src)))),
7611                                       _.ImmAllZerosV)>,
7612                         EVEX, EVEX_B, Sched<[sched.Folded]>;
7613  }
7614}
7615// Conversion with SAE - suppress all exceptions
7616multiclass avx512_vcvt_fp_sae<bits<8> opc, string OpcodeStr, X86VectorVTInfo _,
7617                              X86VectorVTInfo _Src, SDNode OpNodeSAE,
7618                              X86FoldableSchedWrite sched> {
7619  let Uses = [MXCSR] in
7620  defm rrb : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
7621                        (ins _Src.RC:$src), OpcodeStr,
7622                        "{sae}, $src", "$src, {sae}",
7623                        (_.VT (OpNodeSAE (_Src.VT _Src.RC:$src)))>,
7624                        EVEX, EVEX_B, Sched<[sched]>;
7625}
7626
7627// Conversion with rounding control (RC)
7628multiclass avx512_vcvt_fp_rc<bits<8> opc, string OpcodeStr, X86VectorVTInfo _,
7629                         X86VectorVTInfo _Src, SDPatternOperator OpNodeRnd,
7630                         X86FoldableSchedWrite sched> {
7631  let Uses = [MXCSR] in
7632  defm rrb : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
7633                        (ins _Src.RC:$src, AVX512RC:$rc), OpcodeStr,
7634                        "$rc, $src", "$src, $rc",
7635                        (_.VT (OpNodeRnd (_Src.VT _Src.RC:$src), (i32 timm:$rc)))>,
7636                        EVEX, EVEX_B, EVEX_RC, Sched<[sched]>;
7637}
7638
7639// Similar to avx512_vcvt_fp, but uses an extload for the memory form.
7640multiclass avx512_vcvt_fpextend<bits<8> opc, string OpcodeStr, X86VectorVTInfo _,
7641                                X86VectorVTInfo _Src, SDPatternOperator OpNode,
7642                                SDNode MaskOpNode,
7643                                X86FoldableSchedWrite sched,
7644                                string Broadcast = _.BroadcastStr,
7645                                string Alias = "", X86MemOperand MemOp = _Src.MemOp,
7646                                RegisterClass MaskRC = _.KRCWM>
7647  : avx512_vcvt_fp<opc, OpcodeStr, _, _Src, OpNode, MaskOpNode, sched, Broadcast,
7648                   Alias, MemOp, MaskRC,
7649                   (_.VT (!cast<PatFrag>("extload"#_Src.VTName) addr:$src)),
7650                   (_.VT (!cast<PatFrag>("extload"#_Src.VTName) addr:$src))>;
7651
7652// Extend Float to Double
7653multiclass avx512_cvtps2pd<bits<8> opc, string OpcodeStr,
7654                           X86SchedWriteWidths sched> {
7655  let Predicates = [HasAVX512] in {
7656    defm Z : avx512_vcvt_fpextend<opc, OpcodeStr, v8f64_info, v8f32x_info,
7657                            any_fpextend, fpextend, sched.ZMM>,
7658             avx512_vcvt_fp_sae<opc, OpcodeStr, v8f64_info, v8f32x_info,
7659                                X86vfpextSAE, sched.ZMM>, EVEX_V512;
7660  }
7661  let Predicates = [HasVLX] in {
7662    defm Z128 : avx512_vcvt_fpextend<opc, OpcodeStr, v2f64x_info, v4f32x_info,
7663                               X86any_vfpext, X86vfpext, sched.XMM, "{1to2}",
7664                               "", f64mem>, EVEX_V128;
7665    defm Z256 : avx512_vcvt_fpextend<opc, OpcodeStr, v4f64x_info, v4f32x_info,
7666                                     any_fpextend, fpextend, sched.YMM>, EVEX_V256;
7667  }
7668}
7669
7670// Truncate Double to Float
7671multiclass avx512_cvtpd2ps<bits<8> opc, string OpcodeStr, X86SchedWriteWidths sched> {
7672  let Predicates = [HasAVX512] in {
7673    defm Z : avx512_vcvt_fp<opc, OpcodeStr, v8f32x_info, v8f64_info,
7674                            X86any_vfpround, X86vfpround, sched.ZMM>,
7675             avx512_vcvt_fp_rc<opc, OpcodeStr, v8f32x_info, v8f64_info,
7676                               X86vfproundRnd, sched.ZMM>, EVEX_V512;
7677  }
7678  let Predicates = [HasVLX] in {
7679    defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v4f32x_info, v2f64x_info,
7680                               null_frag, null_frag, sched.XMM, "{1to2}", "{x}",
7681                               f128mem, VK2WM>, EVEX_V128;
7682    defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v4f32x_info, v4f64x_info,
7683                               X86any_vfpround, X86vfpround,
7684                               sched.YMM, "{1to4}", "{y}">, EVEX_V256;
7685  }
7686
7687  def : InstAlias<OpcodeStr#"x\t{$src, $dst|$dst, $src}",
7688                  (!cast<Instruction>(NAME # "Z128rr") VR128X:$dst, VR128X:$src), 0, "att">;
7689  def : InstAlias<OpcodeStr#"x\t{$src, $dst {${mask}}|$dst {${mask}}, $src}",
7690                  (!cast<Instruction>(NAME # "Z128rrk") VR128X:$dst,
7691                  VK2WM:$mask, VR128X:$src), 0, "att">;
7692  def : InstAlias<OpcodeStr#"x\t{$src, $dst {${mask}} {z}|"
7693                  "$dst {${mask}} {z}, $src}",
7694                  (!cast<Instruction>(NAME # "Z128rrkz") VR128X:$dst,
7695                  VK2WM:$mask, VR128X:$src), 0, "att">;
7696  def : InstAlias<OpcodeStr#"x\t{${src}{1to2}, $dst|$dst, ${src}{1to2}}",
7697                  (!cast<Instruction>(NAME # "Z128rmb") VR128X:$dst, f64mem:$src), 0, "att">;
7698  def : InstAlias<OpcodeStr#"x\t{${src}{1to2}, $dst {${mask}}|"
7699                  "$dst {${mask}}, ${src}{1to2}}",
7700                  (!cast<Instruction>(NAME # "Z128rmbk") VR128X:$dst,
7701                  VK2WM:$mask, f64mem:$src), 0, "att">;
7702  def : InstAlias<OpcodeStr#"x\t{${src}{1to2}, $dst {${mask}} {z}|"
7703                  "$dst {${mask}} {z}, ${src}{1to2}}",
7704                  (!cast<Instruction>(NAME # "Z128rmbkz") VR128X:$dst,
7705                  VK2WM:$mask, f64mem:$src), 0, "att">;
7706
7707  def : InstAlias<OpcodeStr#"y\t{$src, $dst|$dst, $src}",
7708                  (!cast<Instruction>(NAME # "Z256rr") VR128X:$dst, VR256X:$src), 0, "att">;
7709  def : InstAlias<OpcodeStr#"y\t{$src, $dst {${mask}}|$dst {${mask}}, $src}",
7710                  (!cast<Instruction>(NAME # "Z256rrk") VR128X:$dst,
7711                  VK4WM:$mask, VR256X:$src), 0, "att">;
7712  def : InstAlias<OpcodeStr#"y\t{$src, $dst {${mask}} {z}|"
7713                  "$dst {${mask}} {z}, $src}",
7714                  (!cast<Instruction>(NAME # "Z256rrkz") VR128X:$dst,
7715                  VK4WM:$mask, VR256X:$src), 0, "att">;
7716  def : InstAlias<OpcodeStr#"y\t{${src}{1to4}, $dst|$dst, ${src}{1to4}}",
7717                  (!cast<Instruction>(NAME # "Z256rmb") VR128X:$dst, f64mem:$src), 0, "att">;
7718  def : InstAlias<OpcodeStr#"y\t{${src}{1to4}, $dst {${mask}}|"
7719                  "$dst {${mask}}, ${src}{1to4}}",
7720                  (!cast<Instruction>(NAME # "Z256rmbk") VR128X:$dst,
7721                  VK4WM:$mask, f64mem:$src), 0, "att">;
7722  def : InstAlias<OpcodeStr#"y\t{${src}{1to4}, $dst {${mask}} {z}|"
7723                  "$dst {${mask}} {z}, ${src}{1to4}}",
7724                  (!cast<Instruction>(NAME # "Z256rmbkz") VR128X:$dst,
7725                  VK4WM:$mask, f64mem:$src), 0, "att">;
7726}
7727
7728defm VCVTPD2PS : avx512_cvtpd2ps<0x5A, "vcvtpd2ps", SchedWriteCvtPD2PS>,
7729                                  VEX_W, PD, EVEX_CD8<64, CD8VF>;
7730defm VCVTPS2PD : avx512_cvtps2pd<0x5A, "vcvtps2pd", SchedWriteCvtPS2PD>,
7731                                  PS, EVEX_CD8<32, CD8VH>;
7732
7733let Predicates = [HasVLX] in {
7734  // Special patterns to allow use of X86vmfpround for masking. Instruction
7735  // patterns have been disabled with null_frag.
7736  def : Pat<(X86any_vfpround (v2f64 VR128X:$src)),
7737            (VCVTPD2PSZ128rr VR128X:$src)>;
7738  def : Pat<(X86vmfpround (v2f64 VR128X:$src), (v4f32 VR128X:$src0),
7739                          VK2WM:$mask),
7740            (VCVTPD2PSZ128rrk VR128X:$src0, VK2WM:$mask, VR128X:$src)>;
7741  def : Pat<(X86vmfpround (v2f64 VR128X:$src), v4f32x_info.ImmAllZerosV,
7742                          VK2WM:$mask),
7743            (VCVTPD2PSZ128rrkz VK2WM:$mask, VR128X:$src)>;
7744
7745  def : Pat<(X86any_vfpround (loadv2f64 addr:$src)),
7746            (VCVTPD2PSZ128rm addr:$src)>;
7747  def : Pat<(X86vmfpround (loadv2f64 addr:$src), (v4f32 VR128X:$src0),
7748                          VK2WM:$mask),
7749            (VCVTPD2PSZ128rmk VR128X:$src0, VK2WM:$mask, addr:$src)>;
7750  def : Pat<(X86vmfpround (loadv2f64 addr:$src), v4f32x_info.ImmAllZerosV,
7751                          VK2WM:$mask),
7752            (VCVTPD2PSZ128rmkz VK2WM:$mask, addr:$src)>;
7753
7754  def : Pat<(X86any_vfpround (v2f64 (X86VBroadcastld64 addr:$src))),
7755            (VCVTPD2PSZ128rmb addr:$src)>;
7756  def : Pat<(X86vmfpround (v2f64 (X86VBroadcastld64 addr:$src)),
7757                          (v4f32 VR128X:$src0), VK2WM:$mask),
7758            (VCVTPD2PSZ128rmbk VR128X:$src0, VK2WM:$mask, addr:$src)>;
7759  def : Pat<(X86vmfpround (v2f64 (X86VBroadcastld64 addr:$src)),
7760                          v4f32x_info.ImmAllZerosV, VK2WM:$mask),
7761            (VCVTPD2PSZ128rmbkz VK2WM:$mask, addr:$src)>;
7762}
7763
7764// Convert Signed/Unsigned Doubleword to Double
7765let Uses = []<Register>, mayRaiseFPException = 0 in
7766multiclass avx512_cvtdq2pd<bits<8> opc, string OpcodeStr, SDPatternOperator OpNode,
7767                           SDNode MaskOpNode, SDPatternOperator OpNode128,
7768                           SDNode MaskOpNode128,
7769                           X86SchedWriteWidths sched> {
7770  // No rounding in this op
7771  let Predicates = [HasAVX512] in
7772    defm Z : avx512_vcvt_fp<opc, OpcodeStr, v8f64_info, v8i32x_info, OpNode,
7773                            MaskOpNode, sched.ZMM>, EVEX_V512;
7774
7775  let Predicates = [HasVLX] in {
7776    defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v2f64x_info, v4i32x_info,
7777                               OpNode128, MaskOpNode128, sched.XMM, "{1to2}",
7778                               "", i64mem, VK2WM,
7779                               (v2f64 (OpNode128 (bc_v4i32
7780                                (v2i64
7781                                 (scalar_to_vector (loadi64 addr:$src)))))),
7782                               (v2f64 (MaskOpNode128 (bc_v4i32
7783                                (v2i64
7784                                 (scalar_to_vector (loadi64 addr:$src))))))>,
7785                               EVEX_V128;
7786    defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v4f64x_info, v4i32x_info, OpNode,
7787                               MaskOpNode, sched.YMM>, EVEX_V256;
7788  }
7789}
7790
7791// Convert Signed/Unsigned Doubleword to Float
7792multiclass avx512_cvtdq2ps<bits<8> opc, string OpcodeStr, SDPatternOperator OpNode,
7793                           SDNode MaskOpNode, SDNode OpNodeRnd,
7794                           X86SchedWriteWidths sched> {
7795  let Predicates = [HasAVX512] in
7796    defm Z : avx512_vcvt_fp<opc, OpcodeStr, v16f32_info, v16i32_info, OpNode,
7797                            MaskOpNode, sched.ZMM>,
7798             avx512_vcvt_fp_rc<opc, OpcodeStr, v16f32_info, v16i32_info,
7799                               OpNodeRnd, sched.ZMM>, EVEX_V512;
7800
7801  let Predicates = [HasVLX] in {
7802    defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v4f32x_info, v4i32x_info, OpNode,
7803                               MaskOpNode, sched.XMM>, EVEX_V128;
7804    defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v8f32x_info, v8i32x_info, OpNode,
7805                               MaskOpNode, sched.YMM>, EVEX_V256;
7806  }
7807}
7808
7809// Convert Float to Signed/Unsigned Doubleword with truncation
7810multiclass avx512_cvttps2dq<bits<8> opc, string OpcodeStr, SDPatternOperator OpNode,
7811                            SDNode MaskOpNode,
7812                            SDNode OpNodeSAE, X86SchedWriteWidths sched> {
7813  let Predicates = [HasAVX512] in {
7814    defm Z : avx512_vcvt_fp<opc, OpcodeStr, v16i32_info, v16f32_info, OpNode,
7815                            MaskOpNode, sched.ZMM>,
7816             avx512_vcvt_fp_sae<opc, OpcodeStr, v16i32_info, v16f32_info,
7817                                OpNodeSAE, sched.ZMM>, EVEX_V512;
7818  }
7819  let Predicates = [HasVLX] in {
7820    defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v4i32x_info, v4f32x_info, OpNode,
7821                               MaskOpNode, sched.XMM>, EVEX_V128;
7822    defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v8i32x_info, v8f32x_info, OpNode,
7823                               MaskOpNode, sched.YMM>, EVEX_V256;
7824  }
7825}
7826
7827// Convert Float to Signed/Unsigned Doubleword
7828multiclass avx512_cvtps2dq<bits<8> opc, string OpcodeStr, SDNode OpNode,
7829                           SDNode MaskOpNode, SDNode OpNodeRnd,
7830                           X86SchedWriteWidths sched> {
7831  let Predicates = [HasAVX512] in {
7832    defm Z : avx512_vcvt_fp<opc, OpcodeStr, v16i32_info, v16f32_info, OpNode,
7833                            MaskOpNode, sched.ZMM>,
7834             avx512_vcvt_fp_rc<opc, OpcodeStr, v16i32_info, v16f32_info,
7835                                OpNodeRnd, sched.ZMM>, EVEX_V512;
7836  }
7837  let Predicates = [HasVLX] in {
7838    defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v4i32x_info, v4f32x_info, OpNode,
7839                               MaskOpNode, sched.XMM>, EVEX_V128;
7840    defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v8i32x_info, v8f32x_info, OpNode,
7841                               MaskOpNode, sched.YMM>, EVEX_V256;
7842  }
7843}
7844
7845// Convert Double to Signed/Unsigned Doubleword with truncation
7846multiclass avx512_cvttpd2dq<bits<8> opc, string OpcodeStr, SDPatternOperator OpNode,
7847                            SDNode MaskOpNode, SDNode OpNodeSAE,
7848                            X86SchedWriteWidths sched> {
7849  let Predicates = [HasAVX512] in {
7850    defm Z : avx512_vcvt_fp<opc, OpcodeStr, v8i32x_info, v8f64_info, OpNode,
7851                            MaskOpNode, sched.ZMM>,
7852             avx512_vcvt_fp_sae<opc, OpcodeStr, v8i32x_info, v8f64_info,
7853                                OpNodeSAE, sched.ZMM>, EVEX_V512;
7854  }
7855  let Predicates = [HasVLX] in {
7856    // we need "x"/"y" suffixes in order to distinguish between 128 and 256
7857    // memory forms of these instructions in Asm Parser. They have the same
7858    // dest type - 'v4i32x_info'. We also specify the broadcast string explicitly
7859    // due to the same reason.
7860    defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v4i32x_info, v2f64x_info,
7861                               null_frag, null_frag, sched.XMM, "{1to2}", "{x}", f128mem,
7862                               VK2WM>, EVEX_V128;
7863    defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v4i32x_info, v4f64x_info, OpNode,
7864                               MaskOpNode, sched.YMM, "{1to4}", "{y}">, EVEX_V256;
7865  }
7866
7867  def : InstAlias<OpcodeStr#"x\t{$src, $dst|$dst, $src}",
7868                  (!cast<Instruction>(NAME # "Z128rr") VR128X:$dst,
7869                  VR128X:$src), 0, "att">;
7870  def : InstAlias<OpcodeStr#"x\t{$src, $dst {${mask}}|$dst {${mask}}, $src}",
7871                  (!cast<Instruction>(NAME # "Z128rrk") VR128X:$dst,
7872                  VK2WM:$mask, VR128X:$src), 0, "att">;
7873  def : InstAlias<OpcodeStr#"x\t{$src, $dst {${mask}} {z}|$dst {${mask}} {z}, $src}",
7874                  (!cast<Instruction>(NAME # "Z128rrkz") VR128X:$dst,
7875                  VK2WM:$mask, VR128X:$src), 0, "att">;
7876  def : InstAlias<OpcodeStr#"x\t{${src}{1to2}, $dst|$dst, ${src}{1to2}}",
7877                  (!cast<Instruction>(NAME # "Z128rmb") VR128X:$dst,
7878                  f64mem:$src), 0, "att">;
7879  def : InstAlias<OpcodeStr#"x\t{${src}{1to2}, $dst {${mask}}|"
7880                  "$dst {${mask}}, ${src}{1to2}}",
7881                  (!cast<Instruction>(NAME # "Z128rmbk") VR128X:$dst,
7882                  VK2WM:$mask, f64mem:$src), 0, "att">;
7883  def : InstAlias<OpcodeStr#"x\t{${src}{1to2}, $dst {${mask}} {z}|"
7884                  "$dst {${mask}} {z}, ${src}{1to2}}",
7885                  (!cast<Instruction>(NAME # "Z128rmbkz") VR128X:$dst,
7886                  VK2WM:$mask, f64mem:$src), 0, "att">;
7887
7888  def : InstAlias<OpcodeStr#"y\t{$src, $dst|$dst, $src}",
7889                  (!cast<Instruction>(NAME # "Z256rr") VR128X:$dst,
7890                  VR256X:$src), 0, "att">;
7891  def : InstAlias<OpcodeStr#"y\t{$src, $dst {${mask}}|$dst {${mask}}, $src}",
7892                  (!cast<Instruction>(NAME # "Z256rrk") VR128X:$dst,
7893                  VK4WM:$mask, VR256X:$src), 0, "att">;
7894  def : InstAlias<OpcodeStr#"y\t{$src, $dst {${mask}} {z}|$dst {${mask}} {z}, $src}",
7895                  (!cast<Instruction>(NAME # "Z256rrkz") VR128X:$dst,
7896                  VK4WM:$mask, VR256X:$src), 0, "att">;
7897  def : InstAlias<OpcodeStr#"y\t{${src}{1to4}, $dst|$dst, ${src}{1to4}}",
7898                  (!cast<Instruction>(NAME # "Z256rmb") VR128X:$dst,
7899                  f64mem:$src), 0, "att">;
7900  def : InstAlias<OpcodeStr#"y\t{${src}{1to4}, $dst {${mask}}|"
7901                  "$dst {${mask}}, ${src}{1to4}}",
7902                  (!cast<Instruction>(NAME # "Z256rmbk") VR128X:$dst,
7903                  VK4WM:$mask, f64mem:$src), 0, "att">;
7904  def : InstAlias<OpcodeStr#"y\t{${src}{1to4}, $dst {${mask}} {z}|"
7905                  "$dst {${mask}} {z}, ${src}{1to4}}",
7906                  (!cast<Instruction>(NAME # "Z256rmbkz") VR128X:$dst,
7907                  VK4WM:$mask, f64mem:$src), 0, "att">;
7908}
7909
7910// Convert Double to Signed/Unsigned Doubleword
7911multiclass avx512_cvtpd2dq<bits<8> opc, string OpcodeStr, SDNode OpNode,
7912                           SDNode MaskOpNode, SDNode OpNodeRnd,
7913                           X86SchedWriteWidths sched> {
7914  let Predicates = [HasAVX512] in {
7915    defm Z : avx512_vcvt_fp<opc, OpcodeStr, v8i32x_info, v8f64_info, OpNode,
7916                            MaskOpNode, sched.ZMM>,
7917             avx512_vcvt_fp_rc<opc, OpcodeStr, v8i32x_info, v8f64_info,
7918                               OpNodeRnd, sched.ZMM>, EVEX_V512;
7919  }
7920  let Predicates = [HasVLX] in {
7921    // we need "x"/"y" suffixes in order to distinguish between 128 and 256
7922    // memory forms of these instructions in Asm Parcer. They have the same
7923    // dest type - 'v4i32x_info'. We also specify the broadcast string explicitly
7924    // due to the same reason.
7925    defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v4i32x_info, v2f64x_info,
7926                               null_frag, null_frag, sched.XMM, "{1to2}", "{x}", f128mem,
7927                               VK2WM>, EVEX_V128;
7928    defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v4i32x_info, v4f64x_info, OpNode,
7929                               MaskOpNode, sched.YMM, "{1to4}", "{y}">, EVEX_V256;
7930  }
7931
7932  def : InstAlias<OpcodeStr#"x\t{$src, $dst|$dst, $src}",
7933                  (!cast<Instruction>(NAME # "Z128rr") VR128X:$dst, VR128X:$src), 0, "att">;
7934  def : InstAlias<OpcodeStr#"x\t{$src, $dst {${mask}}|$dst {${mask}}, $src}",
7935                  (!cast<Instruction>(NAME # "Z128rrk") VR128X:$dst,
7936                  VK2WM:$mask, VR128X:$src), 0, "att">;
7937  def : InstAlias<OpcodeStr#"x\t{$src, $dst {${mask}} {z}|$dst {${mask}} {z}, $src}",
7938                  (!cast<Instruction>(NAME # "Z128rrkz") VR128X:$dst,
7939                  VK2WM:$mask, VR128X:$src), 0, "att">;
7940  def : InstAlias<OpcodeStr#"x\t{${src}{1to2}, $dst|$dst, ${src}{1to2}}",
7941                  (!cast<Instruction>(NAME # "Z128rmb") VR128X:$dst,
7942                  f64mem:$src), 0, "att">;
7943  def : InstAlias<OpcodeStr#"x\t{${src}{1to2}, $dst {${mask}}|"
7944                  "$dst {${mask}}, ${src}{1to2}}",
7945                  (!cast<Instruction>(NAME # "Z128rmbk") VR128X:$dst,
7946                  VK2WM:$mask, f64mem:$src), 0, "att">;
7947  def : InstAlias<OpcodeStr#"x\t{${src}{1to2}, $dst {${mask}} {z}|"
7948                  "$dst {${mask}} {z}, ${src}{1to2}}",
7949                  (!cast<Instruction>(NAME # "Z128rmbkz") VR128X:$dst,
7950                  VK2WM:$mask, f64mem:$src), 0, "att">;
7951
7952  def : InstAlias<OpcodeStr#"y\t{$src, $dst|$dst, $src}",
7953                  (!cast<Instruction>(NAME # "Z256rr") VR128X:$dst, VR256X:$src), 0, "att">;
7954  def : InstAlias<OpcodeStr#"y\t{$src, $dst {${mask}}|$dst {${mask}}, $src}",
7955                  (!cast<Instruction>(NAME # "Z256rrk") VR128X:$dst,
7956                  VK4WM:$mask, VR256X:$src), 0, "att">;
7957  def : InstAlias<OpcodeStr#"y\t{$src, $dst {${mask}} {z}|$dst {${mask}} {z}, $src}",
7958                  (!cast<Instruction>(NAME # "Z256rrkz") VR128X:$dst,
7959                  VK4WM:$mask, VR256X:$src), 0, "att">;
7960  def : InstAlias<OpcodeStr#"y\t{${src}{1to4}, $dst|$dst, ${src}{1to4}}",
7961                  (!cast<Instruction>(NAME # "Z256rmb") VR128X:$dst,
7962                  f64mem:$src), 0, "att">;
7963  def : InstAlias<OpcodeStr#"y\t{${src}{1to4}, $dst {${mask}}|"
7964                  "$dst {${mask}}, ${src}{1to4}}",
7965                  (!cast<Instruction>(NAME # "Z256rmbk") VR128X:$dst,
7966                  VK4WM:$mask, f64mem:$src), 0, "att">;
7967  def : InstAlias<OpcodeStr#"y\t{${src}{1to4}, $dst {${mask}} {z}|"
7968                  "$dst {${mask}} {z}, ${src}{1to4}}",
7969                  (!cast<Instruction>(NAME # "Z256rmbkz") VR128X:$dst,
7970                  VK4WM:$mask, f64mem:$src), 0, "att">;
7971}
7972
7973// Convert Double to Signed/Unsigned Quardword
7974multiclass avx512_cvtpd2qq<bits<8> opc, string OpcodeStr, SDNode OpNode,
7975                           SDNode MaskOpNode, SDNode OpNodeRnd,
7976                           X86SchedWriteWidths sched> {
7977  let Predicates = [HasDQI] in {
7978    defm Z : avx512_vcvt_fp<opc, OpcodeStr, v8i64_info, v8f64_info, OpNode,
7979                            MaskOpNode, sched.ZMM>,
7980             avx512_vcvt_fp_rc<opc, OpcodeStr, v8i64_info, v8f64_info,
7981                               OpNodeRnd, sched.ZMM>, EVEX_V512;
7982  }
7983  let Predicates = [HasDQI, HasVLX] in {
7984    defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v2i64x_info, v2f64x_info, OpNode,
7985                               MaskOpNode, sched.XMM>, EVEX_V128;
7986    defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v4i64x_info, v4f64x_info, OpNode,
7987                               MaskOpNode, sched.YMM>, EVEX_V256;
7988  }
7989}
7990
7991// Convert Double to Signed/Unsigned Quardword with truncation
7992multiclass avx512_cvttpd2qq<bits<8> opc, string OpcodeStr, SDPatternOperator OpNode,
7993                            SDNode MaskOpNode, SDNode OpNodeRnd,
7994                            X86SchedWriteWidths sched> {
7995  let Predicates = [HasDQI] in {
7996    defm Z : avx512_vcvt_fp<opc, OpcodeStr, v8i64_info, v8f64_info, OpNode,
7997                            MaskOpNode, sched.ZMM>,
7998             avx512_vcvt_fp_sae<opc, OpcodeStr, v8i64_info, v8f64_info,
7999                                OpNodeRnd, sched.ZMM>, EVEX_V512;
8000  }
8001  let Predicates = [HasDQI, HasVLX] in {
8002    defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v2i64x_info, v2f64x_info, OpNode,
8003                               MaskOpNode, sched.XMM>, EVEX_V128;
8004    defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v4i64x_info, v4f64x_info, OpNode,
8005                               MaskOpNode, sched.YMM>, EVEX_V256;
8006  }
8007}
8008
8009// Convert Signed/Unsigned Quardword to Double
8010multiclass avx512_cvtqq2pd<bits<8> opc, string OpcodeStr, SDPatternOperator OpNode,
8011                           SDNode MaskOpNode, SDNode OpNodeRnd,
8012                           X86SchedWriteWidths sched> {
8013  let Predicates = [HasDQI] in {
8014    defm Z : avx512_vcvt_fp<opc, OpcodeStr, v8f64_info, v8i64_info, OpNode,
8015                            MaskOpNode, sched.ZMM>,
8016             avx512_vcvt_fp_rc<opc, OpcodeStr, v8f64_info, v8i64_info,
8017                               OpNodeRnd, sched.ZMM>, EVEX_V512;
8018  }
8019  let Predicates = [HasDQI, HasVLX] in {
8020    defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v2f64x_info, v2i64x_info, OpNode,
8021                               MaskOpNode, sched.XMM>, EVEX_V128, NotEVEX2VEXConvertible;
8022    defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v4f64x_info, v4i64x_info, OpNode,
8023                               MaskOpNode, sched.YMM>, EVEX_V256, NotEVEX2VEXConvertible;
8024  }
8025}
8026
8027// Convert Float to Signed/Unsigned Quardword
8028multiclass avx512_cvtps2qq<bits<8> opc, string OpcodeStr, SDNode OpNode,
8029                           SDNode MaskOpNode, SDNode OpNodeRnd,
8030                           X86SchedWriteWidths sched> {
8031  let Predicates = [HasDQI] in {
8032    defm Z : avx512_vcvt_fp<opc, OpcodeStr, v8i64_info, v8f32x_info, OpNode,
8033                            MaskOpNode, sched.ZMM>,
8034             avx512_vcvt_fp_rc<opc, OpcodeStr, v8i64_info, v8f32x_info,
8035                               OpNodeRnd, sched.ZMM>, EVEX_V512;
8036  }
8037  let Predicates = [HasDQI, HasVLX] in {
8038    // Explicitly specified broadcast string, since we take only 2 elements
8039    // from v4f32x_info source
8040    defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v2i64x_info, v4f32x_info, OpNode,
8041                               MaskOpNode, sched.XMM, "{1to2}", "", f64mem, VK2WM,
8042                               (v2i64 (OpNode (bc_v4f32
8043                                (v2f64
8044                                 (scalar_to_vector (loadf64 addr:$src)))))),
8045                               (v2i64 (MaskOpNode (bc_v4f32
8046                                (v2f64
8047                                 (scalar_to_vector (loadf64 addr:$src))))))>,
8048                               EVEX_V128;
8049    defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v4i64x_info, v4f32x_info, OpNode,
8050                               MaskOpNode, sched.YMM>, EVEX_V256;
8051  }
8052}
8053
8054// Convert Float to Signed/Unsigned Quardword with truncation
8055multiclass avx512_cvttps2qq<bits<8> opc, string OpcodeStr, SDPatternOperator OpNode,
8056                            SDNode MaskOpNode, SDNode OpNodeRnd,
8057                            X86SchedWriteWidths sched> {
8058  let Predicates = [HasDQI] in {
8059    defm Z : avx512_vcvt_fp<opc, OpcodeStr, v8i64_info, v8f32x_info, OpNode,
8060                            MaskOpNode, sched.ZMM>,
8061             avx512_vcvt_fp_sae<opc, OpcodeStr, v8i64_info, v8f32x_info,
8062                                OpNodeRnd, sched.ZMM>, EVEX_V512;
8063  }
8064  let Predicates = [HasDQI, HasVLX] in {
8065    // Explicitly specified broadcast string, since we take only 2 elements
8066    // from v4f32x_info source
8067    defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v2i64x_info, v4f32x_info, OpNode,
8068                               MaskOpNode, sched.XMM, "{1to2}", "", f64mem, VK2WM,
8069                               (v2i64 (OpNode (bc_v4f32
8070                                (v2f64
8071                                 (scalar_to_vector (loadf64 addr:$src)))))),
8072                               (v2i64 (MaskOpNode (bc_v4f32
8073                                (v2f64
8074                                 (scalar_to_vector (loadf64 addr:$src))))))>,
8075                               EVEX_V128;
8076    defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v4i64x_info, v4f32x_info, OpNode,
8077                               MaskOpNode, sched.YMM>, EVEX_V256;
8078  }
8079}
8080
8081// Convert Signed/Unsigned Quardword to Float
8082multiclass avx512_cvtqq2ps<bits<8> opc, string OpcodeStr, SDPatternOperator OpNode,
8083                           SDNode MaskOpNode, SDNode OpNodeRnd,
8084                           X86SchedWriteWidths sched> {
8085  let Predicates = [HasDQI] in {
8086    defm Z : avx512_vcvt_fp<opc, OpcodeStr, v8f32x_info, v8i64_info, OpNode,
8087                            MaskOpNode, sched.ZMM>,
8088             avx512_vcvt_fp_rc<opc, OpcodeStr, v8f32x_info, v8i64_info,
8089                               OpNodeRnd, sched.ZMM>, EVEX_V512;
8090  }
8091  let Predicates = [HasDQI, HasVLX] in {
8092    // we need "x"/"y" suffixes in order to distinguish between 128 and 256
8093    // memory forms of these instructions in Asm Parcer. They have the same
8094    // dest type - 'v4i32x_info'. We also specify the broadcast string explicitly
8095    // due to the same reason.
8096    defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v4f32x_info, v2i64x_info, null_frag,
8097                               null_frag, sched.XMM, "{1to2}", "{x}", i128mem, VK2WM>,
8098                               EVEX_V128, NotEVEX2VEXConvertible;
8099    defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v4f32x_info, v4i64x_info, OpNode,
8100                               MaskOpNode, sched.YMM, "{1to4}", "{y}">, EVEX_V256,
8101                               NotEVEX2VEXConvertible;
8102  }
8103
8104  def : InstAlias<OpcodeStr#"x\t{$src, $dst|$dst, $src}",
8105                  (!cast<Instruction>(NAME # "Z128rr") VR128X:$dst,
8106                  VR128X:$src), 0, "att">;
8107  def : InstAlias<OpcodeStr#"x\t{$src, $dst {${mask}}|$dst {${mask}}, $src}",
8108                  (!cast<Instruction>(NAME # "Z128rrk") VR128X:$dst,
8109                  VK2WM:$mask, VR128X:$src), 0, "att">;
8110  def : InstAlias<OpcodeStr#"x\t{$src, $dst {${mask}} {z}|$dst {${mask}} {z}, $src}",
8111                  (!cast<Instruction>(NAME # "Z128rrkz") VR128X:$dst,
8112                  VK2WM:$mask, VR128X:$src), 0, "att">;
8113  def : InstAlias<OpcodeStr#"x\t{${src}{1to2}, $dst|$dst, ${src}{1to2}}",
8114                  (!cast<Instruction>(NAME # "Z128rmb") VR128X:$dst,
8115                  i64mem:$src), 0, "att">;
8116  def : InstAlias<OpcodeStr#"x\t{${src}{1to2}, $dst {${mask}}|"
8117                  "$dst {${mask}}, ${src}{1to2}}",
8118                  (!cast<Instruction>(NAME # "Z128rmbk") VR128X:$dst,
8119                  VK2WM:$mask, i64mem:$src), 0, "att">;
8120  def : InstAlias<OpcodeStr#"x\t{${src}{1to2}, $dst {${mask}} {z}|"
8121                  "$dst {${mask}} {z}, ${src}{1to2}}",
8122                  (!cast<Instruction>(NAME # "Z128rmbkz") VR128X:$dst,
8123                  VK2WM:$mask, i64mem:$src), 0, "att">;
8124
8125  def : InstAlias<OpcodeStr#"y\t{$src, $dst|$dst, $src}",
8126                  (!cast<Instruction>(NAME # "Z256rr") VR128X:$dst,
8127                  VR256X:$src), 0, "att">;
8128  def : InstAlias<OpcodeStr#"y\t{$src, $dst {${mask}}|"
8129                  "$dst {${mask}}, $src}",
8130                  (!cast<Instruction>(NAME # "Z256rrk") VR128X:$dst,
8131                  VK4WM:$mask, VR256X:$src), 0, "att">;
8132  def : InstAlias<OpcodeStr#"y\t{$src, $dst {${mask}} {z}|"
8133                  "$dst {${mask}} {z}, $src}",
8134                  (!cast<Instruction>(NAME # "Z256rrkz") VR128X:$dst,
8135                  VK4WM:$mask, VR256X:$src), 0, "att">;
8136  def : InstAlias<OpcodeStr#"y\t{${src}{1to4}, $dst|$dst, ${src}{1to4}}",
8137                  (!cast<Instruction>(NAME # "Z256rmb") VR128X:$dst,
8138                  i64mem:$src), 0, "att">;
8139  def : InstAlias<OpcodeStr#"y\t{${src}{1to4}, $dst {${mask}}|"
8140                  "$dst {${mask}}, ${src}{1to4}}",
8141                  (!cast<Instruction>(NAME # "Z256rmbk") VR128X:$dst,
8142                  VK4WM:$mask, i64mem:$src), 0, "att">;
8143  def : InstAlias<OpcodeStr#"y\t{${src}{1to4}, $dst {${mask}} {z}|"
8144                  "$dst {${mask}} {z}, ${src}{1to4}}",
8145                  (!cast<Instruction>(NAME # "Z256rmbkz") VR128X:$dst,
8146                  VK4WM:$mask, i64mem:$src), 0, "att">;
8147}
8148
8149defm VCVTDQ2PD : avx512_cvtdq2pd<0xE6, "vcvtdq2pd", any_sint_to_fp, sint_to_fp,
8150                                 X86any_VSintToFP, X86VSintToFP,
8151                                 SchedWriteCvtDQ2PD>, XS, EVEX_CD8<32, CD8VH>;
8152
8153defm VCVTDQ2PS : avx512_cvtdq2ps<0x5B, "vcvtdq2ps", any_sint_to_fp, sint_to_fp,
8154                                X86VSintToFpRnd, SchedWriteCvtDQ2PS>,
8155                                PS, EVEX_CD8<32, CD8VF>;
8156
8157defm VCVTTPS2DQ : avx512_cvttps2dq<0x5B, "vcvttps2dq", X86any_cvttp2si,
8158                                 X86cvttp2si, X86cvttp2siSAE,
8159                                 SchedWriteCvtPS2DQ>, XS, EVEX_CD8<32, CD8VF>;
8160
8161defm VCVTTPD2DQ : avx512_cvttpd2dq<0xE6, "vcvttpd2dq", X86any_cvttp2si,
8162                                 X86cvttp2si, X86cvttp2siSAE,
8163                                 SchedWriteCvtPD2DQ>,
8164                                 PD, VEX_W, EVEX_CD8<64, CD8VF>;
8165
8166defm VCVTTPS2UDQ : avx512_cvttps2dq<0x78, "vcvttps2udq", X86any_cvttp2ui,
8167                                 X86cvttp2ui, X86cvttp2uiSAE,
8168                                 SchedWriteCvtPS2DQ>, PS, EVEX_CD8<32, CD8VF>;
8169
8170defm VCVTTPD2UDQ : avx512_cvttpd2dq<0x78, "vcvttpd2udq", X86any_cvttp2ui,
8171                                 X86cvttp2ui, X86cvttp2uiSAE,
8172                                 SchedWriteCvtPD2DQ>,
8173                                 PS, VEX_W, EVEX_CD8<64, CD8VF>;
8174
8175defm VCVTUDQ2PD : avx512_cvtdq2pd<0x7A, "vcvtudq2pd", any_uint_to_fp,
8176                                  uint_to_fp, X86any_VUintToFP, X86VUintToFP,
8177                                  SchedWriteCvtDQ2PD>, XS, EVEX_CD8<32, CD8VH>;
8178
8179defm VCVTUDQ2PS : avx512_cvtdq2ps<0x7A, "vcvtudq2ps", any_uint_to_fp,
8180                                 uint_to_fp, X86VUintToFpRnd,
8181                                 SchedWriteCvtDQ2PS>, XD, EVEX_CD8<32, CD8VF>;
8182
8183defm VCVTPS2DQ : avx512_cvtps2dq<0x5B, "vcvtps2dq", X86cvtp2Int, X86cvtp2Int,
8184                                 X86cvtp2IntRnd, SchedWriteCvtPS2DQ>, PD,
8185                                 EVEX_CD8<32, CD8VF>;
8186
8187defm VCVTPD2DQ : avx512_cvtpd2dq<0xE6, "vcvtpd2dq", X86cvtp2Int, X86cvtp2Int,
8188                                 X86cvtp2IntRnd, SchedWriteCvtPD2DQ>, XD,
8189                                 VEX_W, EVEX_CD8<64, CD8VF>;
8190
8191defm VCVTPS2UDQ : avx512_cvtps2dq<0x79, "vcvtps2udq", X86cvtp2UInt, X86cvtp2UInt,
8192                                 X86cvtp2UIntRnd, SchedWriteCvtPS2DQ>,
8193                                 PS, EVEX_CD8<32, CD8VF>;
8194
8195defm VCVTPD2UDQ : avx512_cvtpd2dq<0x79, "vcvtpd2udq", X86cvtp2UInt, X86cvtp2UInt,
8196                                 X86cvtp2UIntRnd, SchedWriteCvtPD2DQ>, VEX_W,
8197                                 PS, EVEX_CD8<64, CD8VF>;
8198
8199defm VCVTPD2QQ : avx512_cvtpd2qq<0x7B, "vcvtpd2qq", X86cvtp2Int, X86cvtp2Int,
8200                                 X86cvtp2IntRnd, SchedWriteCvtPD2DQ>, VEX_W,
8201                                 PD, EVEX_CD8<64, CD8VF>;
8202
8203defm VCVTPS2QQ : avx512_cvtps2qq<0x7B, "vcvtps2qq", X86cvtp2Int, X86cvtp2Int,
8204                                 X86cvtp2IntRnd, SchedWriteCvtPS2DQ>, PD,
8205                                 EVEX_CD8<32, CD8VH>;
8206
8207defm VCVTPD2UQQ : avx512_cvtpd2qq<0x79, "vcvtpd2uqq", X86cvtp2UInt, X86cvtp2UInt,
8208                                 X86cvtp2UIntRnd, SchedWriteCvtPD2DQ>, VEX_W,
8209                                 PD, EVEX_CD8<64, CD8VF>;
8210
8211defm VCVTPS2UQQ : avx512_cvtps2qq<0x79, "vcvtps2uqq", X86cvtp2UInt, X86cvtp2UInt,
8212                                 X86cvtp2UIntRnd, SchedWriteCvtPS2DQ>, PD,
8213                                 EVEX_CD8<32, CD8VH>;
8214
8215defm VCVTTPD2QQ : avx512_cvttpd2qq<0x7A, "vcvttpd2qq", X86any_cvttp2si,
8216                                 X86cvttp2si, X86cvttp2siSAE,
8217                                 SchedWriteCvtPD2DQ>, VEX_W,
8218                                 PD, EVEX_CD8<64, CD8VF>;
8219
8220defm VCVTTPS2QQ : avx512_cvttps2qq<0x7A, "vcvttps2qq", X86any_cvttp2si,
8221                                 X86cvttp2si, X86cvttp2siSAE,
8222                                 SchedWriteCvtPS2DQ>, PD,
8223                                 EVEX_CD8<32, CD8VH>;
8224
8225defm VCVTTPD2UQQ : avx512_cvttpd2qq<0x78, "vcvttpd2uqq", X86any_cvttp2ui,
8226                                 X86cvttp2ui, X86cvttp2uiSAE,
8227                                 SchedWriteCvtPD2DQ>, VEX_W,
8228                                 PD, EVEX_CD8<64, CD8VF>;
8229
8230defm VCVTTPS2UQQ : avx512_cvttps2qq<0x78, "vcvttps2uqq", X86any_cvttp2ui,
8231                                 X86cvttp2ui, X86cvttp2uiSAE,
8232                                 SchedWriteCvtPS2DQ>, PD,
8233                                 EVEX_CD8<32, CD8VH>;
8234
8235defm VCVTQQ2PD : avx512_cvtqq2pd<0xE6, "vcvtqq2pd", any_sint_to_fp,
8236                            sint_to_fp, X86VSintToFpRnd,
8237                            SchedWriteCvtDQ2PD>, VEX_W, XS, EVEX_CD8<64, CD8VF>;
8238
8239defm VCVTUQQ2PD : avx512_cvtqq2pd<0x7A, "vcvtuqq2pd", any_uint_to_fp,
8240                            uint_to_fp, X86VUintToFpRnd, SchedWriteCvtDQ2PD>,
8241                            VEX_W, XS, EVEX_CD8<64, CD8VF>;
8242
8243defm VCVTQQ2PS : avx512_cvtqq2ps<0x5B, "vcvtqq2ps", any_sint_to_fp,
8244                            sint_to_fp, X86VSintToFpRnd, SchedWriteCvtDQ2PS>,
8245                            VEX_W, PS, EVEX_CD8<64, CD8VF>;
8246
8247defm VCVTUQQ2PS : avx512_cvtqq2ps<0x7A, "vcvtuqq2ps", any_uint_to_fp,
8248                            uint_to_fp, X86VUintToFpRnd, SchedWriteCvtDQ2PS>,
8249                            VEX_W, XD, EVEX_CD8<64, CD8VF>;
8250
8251let Predicates = [HasVLX] in {
8252  // Special patterns to allow use of X86mcvtp2Int for masking. Instruction
8253  // patterns have been disabled with null_frag.
8254  def : Pat<(v4i32 (X86cvtp2Int (v2f64 VR128X:$src))),
8255            (VCVTPD2DQZ128rr VR128X:$src)>;
8256  def : Pat<(X86mcvtp2Int (v2f64 VR128X:$src), (v4i32 VR128X:$src0),
8257                          VK2WM:$mask),
8258            (VCVTPD2DQZ128rrk VR128X:$src0, VK2WM:$mask, VR128X:$src)>;
8259  def : Pat<(X86mcvtp2Int (v2f64 VR128X:$src), v4i32x_info.ImmAllZerosV,
8260                          VK2WM:$mask),
8261            (VCVTPD2DQZ128rrkz VK2WM:$mask, VR128X:$src)>;
8262
8263  def : Pat<(v4i32 (X86cvtp2Int (loadv2f64 addr:$src))),
8264            (VCVTPD2DQZ128rm addr:$src)>;
8265  def : Pat<(X86mcvtp2Int (loadv2f64 addr:$src), (v4i32 VR128X:$src0),
8266                          VK2WM:$mask),
8267            (VCVTPD2DQZ128rmk VR128X:$src0, VK2WM:$mask, addr:$src)>;
8268  def : Pat<(X86mcvtp2Int (loadv2f64 addr:$src), v4i32x_info.ImmAllZerosV,
8269                          VK2WM:$mask),
8270            (VCVTPD2DQZ128rmkz VK2WM:$mask, addr:$src)>;
8271
8272  def : Pat<(v4i32 (X86cvtp2Int (v2f64 (X86VBroadcastld64 addr:$src)))),
8273            (VCVTPD2DQZ128rmb addr:$src)>;
8274  def : Pat<(X86mcvtp2Int (v2f64 (X86VBroadcastld64 addr:$src)),
8275                          (v4i32 VR128X:$src0), VK2WM:$mask),
8276            (VCVTPD2DQZ128rmbk VR128X:$src0, VK2WM:$mask, addr:$src)>;
8277  def : Pat<(X86mcvtp2Int (v2f64 (X86VBroadcastld64 addr:$src)),
8278                          v4i32x_info.ImmAllZerosV, VK2WM:$mask),
8279            (VCVTPD2DQZ128rmbkz VK2WM:$mask, addr:$src)>;
8280
8281  // Special patterns to allow use of X86mcvttp2si for masking. Instruction
8282  // patterns have been disabled with null_frag.
8283  def : Pat<(v4i32 (X86any_cvttp2si (v2f64 VR128X:$src))),
8284            (VCVTTPD2DQZ128rr VR128X:$src)>;
8285  def : Pat<(X86mcvttp2si (v2f64 VR128X:$src), (v4i32 VR128X:$src0),
8286                          VK2WM:$mask),
8287            (VCVTTPD2DQZ128rrk VR128X:$src0, VK2WM:$mask, VR128X:$src)>;
8288  def : Pat<(X86mcvttp2si (v2f64 VR128X:$src), v4i32x_info.ImmAllZerosV,
8289                          VK2WM:$mask),
8290            (VCVTTPD2DQZ128rrkz VK2WM:$mask, VR128X:$src)>;
8291
8292  def : Pat<(v4i32 (X86any_cvttp2si (loadv2f64 addr:$src))),
8293            (VCVTTPD2DQZ128rm addr:$src)>;
8294  def : Pat<(X86mcvttp2si (loadv2f64 addr:$src), (v4i32 VR128X:$src0),
8295                          VK2WM:$mask),
8296            (VCVTTPD2DQZ128rmk VR128X:$src0, VK2WM:$mask, addr:$src)>;
8297  def : Pat<(X86mcvttp2si (loadv2f64 addr:$src), v4i32x_info.ImmAllZerosV,
8298                          VK2WM:$mask),
8299            (VCVTTPD2DQZ128rmkz VK2WM:$mask, addr:$src)>;
8300
8301  def : Pat<(v4i32 (X86any_cvttp2si (v2f64 (X86VBroadcastld64 addr:$src)))),
8302            (VCVTTPD2DQZ128rmb addr:$src)>;
8303  def : Pat<(X86mcvttp2si (v2f64 (X86VBroadcastld64 addr:$src)),
8304                          (v4i32 VR128X:$src0), VK2WM:$mask),
8305            (VCVTTPD2DQZ128rmbk VR128X:$src0, VK2WM:$mask, addr:$src)>;
8306  def : Pat<(X86mcvttp2si (v2f64 (X86VBroadcastld64 addr:$src)),
8307                          v4i32x_info.ImmAllZerosV, VK2WM:$mask),
8308            (VCVTTPD2DQZ128rmbkz VK2WM:$mask, addr:$src)>;
8309
8310  // Special patterns to allow use of X86mcvtp2UInt for masking. Instruction
8311  // patterns have been disabled with null_frag.
8312  def : Pat<(v4i32 (X86cvtp2UInt (v2f64 VR128X:$src))),
8313            (VCVTPD2UDQZ128rr VR128X:$src)>;
8314  def : Pat<(X86mcvtp2UInt (v2f64 VR128X:$src), (v4i32 VR128X:$src0),
8315                           VK2WM:$mask),
8316            (VCVTPD2UDQZ128rrk VR128X:$src0, VK2WM:$mask, VR128X:$src)>;
8317  def : Pat<(X86mcvtp2UInt (v2f64 VR128X:$src), v4i32x_info.ImmAllZerosV,
8318                           VK2WM:$mask),
8319            (VCVTPD2UDQZ128rrkz VK2WM:$mask, VR128X:$src)>;
8320
8321  def : Pat<(v4i32 (X86cvtp2UInt (loadv2f64 addr:$src))),
8322            (VCVTPD2UDQZ128rm addr:$src)>;
8323  def : Pat<(X86mcvtp2UInt (loadv2f64 addr:$src), (v4i32 VR128X:$src0),
8324                           VK2WM:$mask),
8325            (VCVTPD2UDQZ128rmk VR128X:$src0, VK2WM:$mask, addr:$src)>;
8326  def : Pat<(X86mcvtp2UInt (loadv2f64 addr:$src), v4i32x_info.ImmAllZerosV,
8327                           VK2WM:$mask),
8328            (VCVTPD2UDQZ128rmkz VK2WM:$mask, addr:$src)>;
8329
8330  def : Pat<(v4i32 (X86cvtp2UInt (v2f64 (X86VBroadcastld64 addr:$src)))),
8331            (VCVTPD2UDQZ128rmb addr:$src)>;
8332  def : Pat<(X86mcvtp2UInt (v2f64 (X86VBroadcastld64 addr:$src)),
8333                           (v4i32 VR128X:$src0), VK2WM:$mask),
8334            (VCVTPD2UDQZ128rmbk VR128X:$src0, VK2WM:$mask, addr:$src)>;
8335  def : Pat<(X86mcvtp2UInt (v2f64 (X86VBroadcastld64 addr:$src)),
8336                           v4i32x_info.ImmAllZerosV, VK2WM:$mask),
8337            (VCVTPD2UDQZ128rmbkz VK2WM:$mask, addr:$src)>;
8338
8339  // Special patterns to allow use of X86mcvtp2UInt for masking. Instruction
8340  // patterns have been disabled with null_frag.
8341  def : Pat<(v4i32 (X86any_cvttp2ui (v2f64 VR128X:$src))),
8342            (VCVTTPD2UDQZ128rr VR128X:$src)>;
8343  def : Pat<(X86mcvttp2ui (v2f64 VR128X:$src), (v4i32 VR128X:$src0),
8344                          VK2WM:$mask),
8345            (VCVTTPD2UDQZ128rrk VR128X:$src0, VK2WM:$mask, VR128X:$src)>;
8346  def : Pat<(X86mcvttp2ui (v2f64 VR128X:$src), v4i32x_info.ImmAllZerosV,
8347                          VK2WM:$mask),
8348            (VCVTTPD2UDQZ128rrkz VK2WM:$mask, VR128X:$src)>;
8349
8350  def : Pat<(v4i32 (X86any_cvttp2ui (loadv2f64 addr:$src))),
8351            (VCVTTPD2UDQZ128rm addr:$src)>;
8352  def : Pat<(X86mcvttp2ui (loadv2f64 addr:$src), (v4i32 VR128X:$src0),
8353                          VK2WM:$mask),
8354            (VCVTTPD2UDQZ128rmk VR128X:$src0, VK2WM:$mask, addr:$src)>;
8355  def : Pat<(X86mcvttp2ui (loadv2f64 addr:$src), v4i32x_info.ImmAllZerosV,
8356                          VK2WM:$mask),
8357            (VCVTTPD2UDQZ128rmkz VK2WM:$mask, addr:$src)>;
8358
8359  def : Pat<(v4i32 (X86any_cvttp2ui (v2f64 (X86VBroadcastld64 addr:$src)))),
8360            (VCVTTPD2UDQZ128rmb addr:$src)>;
8361  def : Pat<(X86mcvttp2ui (v2f64 (X86VBroadcastld64 addr:$src)),
8362                          (v4i32 VR128X:$src0), VK2WM:$mask),
8363            (VCVTTPD2UDQZ128rmbk VR128X:$src0, VK2WM:$mask, addr:$src)>;
8364  def : Pat<(X86mcvttp2ui (v2f64 (X86VBroadcastld64 addr:$src)),
8365                          v4i32x_info.ImmAllZerosV, VK2WM:$mask),
8366            (VCVTTPD2UDQZ128rmbkz VK2WM:$mask, addr:$src)>;
8367}
8368
8369let Predicates = [HasDQI, HasVLX] in {
8370  def : Pat<(v2i64 (X86cvtp2Int (bc_v4f32 (v2f64 (X86vzload64 addr:$src))))),
8371            (VCVTPS2QQZ128rm addr:$src)>;
8372  def : Pat<(v2i64 (vselect_mask VK2WM:$mask,
8373                                 (X86cvtp2Int (bc_v4f32 (v2f64 (X86vzload64 addr:$src)))),
8374                                 VR128X:$src0)),
8375            (VCVTPS2QQZ128rmk VR128X:$src0, VK2WM:$mask, addr:$src)>;
8376  def : Pat<(v2i64 (vselect_mask VK2WM:$mask,
8377                                 (X86cvtp2Int (bc_v4f32 (v2f64 (X86vzload64 addr:$src)))),
8378                                 v2i64x_info.ImmAllZerosV)),
8379            (VCVTPS2QQZ128rmkz VK2WM:$mask, addr:$src)>;
8380
8381  def : Pat<(v2i64 (X86cvtp2UInt (bc_v4f32 (v2f64 (X86vzload64 addr:$src))))),
8382            (VCVTPS2UQQZ128rm addr:$src)>;
8383  def : Pat<(v2i64 (vselect_mask VK2WM:$mask,
8384                                 (X86cvtp2UInt (bc_v4f32 (v2f64 (X86vzload64 addr:$src)))),
8385                                 VR128X:$src0)),
8386            (VCVTPS2UQQZ128rmk VR128X:$src0, VK2WM:$mask, addr:$src)>;
8387  def : Pat<(v2i64 (vselect_mask VK2WM:$mask,
8388                                 (X86cvtp2UInt (bc_v4f32 (v2f64 (X86vzload64 addr:$src)))),
8389                                 v2i64x_info.ImmAllZerosV)),
8390            (VCVTPS2UQQZ128rmkz VK2WM:$mask, addr:$src)>;
8391
8392  def : Pat<(v2i64 (X86any_cvttp2si (bc_v4f32 (v2f64 (X86vzload64 addr:$src))))),
8393            (VCVTTPS2QQZ128rm addr:$src)>;
8394  def : Pat<(v2i64 (vselect_mask VK2WM:$mask,
8395                                 (X86cvttp2si (bc_v4f32 (v2f64 (X86vzload64 addr:$src)))),
8396                                 VR128X:$src0)),
8397            (VCVTTPS2QQZ128rmk VR128X:$src0, VK2WM:$mask, addr:$src)>;
8398  def : Pat<(v2i64 (vselect_mask VK2WM:$mask,
8399                                 (X86cvttp2si (bc_v4f32 (v2f64 (X86vzload64 addr:$src)))),
8400                                 v2i64x_info.ImmAllZerosV)),
8401            (VCVTTPS2QQZ128rmkz VK2WM:$mask, addr:$src)>;
8402
8403  def : Pat<(v2i64 (X86any_cvttp2ui (bc_v4f32 (v2f64 (X86vzload64 addr:$src))))),
8404            (VCVTTPS2UQQZ128rm addr:$src)>;
8405  def : Pat<(v2i64 (vselect_mask VK2WM:$mask,
8406                                 (X86cvttp2ui (bc_v4f32 (v2f64 (X86vzload64 addr:$src)))),
8407                                 VR128X:$src0)),
8408            (VCVTTPS2UQQZ128rmk VR128X:$src0, VK2WM:$mask, addr:$src)>;
8409  def : Pat<(v2i64 (vselect_mask VK2WM:$mask,
8410                                 (X86cvttp2ui (bc_v4f32 (v2f64 (X86vzload64 addr:$src)))),
8411                                 v2i64x_info.ImmAllZerosV)),
8412            (VCVTTPS2UQQZ128rmkz VK2WM:$mask, addr:$src)>;
8413}
8414
8415let Predicates = [HasVLX] in {
8416  def : Pat<(v2f64 (X86any_VSintToFP (bc_v4i32 (v2i64 (X86vzload64 addr:$src))))),
8417            (VCVTDQ2PDZ128rm addr:$src)>;
8418  def : Pat<(v2f64 (vselect_mask VK2WM:$mask,
8419                                 (X86VSintToFP (bc_v4i32 (v2i64 (X86vzload64 addr:$src)))),
8420                                 VR128X:$src0)),
8421            (VCVTDQ2PDZ128rmk VR128X:$src0, VK2WM:$mask, addr:$src)>;
8422  def : Pat<(v2f64 (vselect_mask VK2WM:$mask,
8423                                 (X86VSintToFP (bc_v4i32 (v2i64 (X86vzload64 addr:$src)))),
8424                                 v2f64x_info.ImmAllZerosV)),
8425            (VCVTDQ2PDZ128rmkz VK2WM:$mask, addr:$src)>;
8426
8427  def : Pat<(v2f64 (X86any_VUintToFP (bc_v4i32 (v2i64 (X86vzload64 addr:$src))))),
8428            (VCVTUDQ2PDZ128rm addr:$src)>;
8429  def : Pat<(v2f64 (vselect_mask VK2WM:$mask,
8430                                 (X86VUintToFP (bc_v4i32 (v2i64 (X86vzload64 addr:$src)))),
8431                                 VR128X:$src0)),
8432            (VCVTUDQ2PDZ128rmk VR128X:$src0, VK2WM:$mask, addr:$src)>;
8433  def : Pat<(v2f64 (vselect_mask VK2WM:$mask,
8434                                 (X86VUintToFP (bc_v4i32 (v2i64 (X86vzload64 addr:$src)))),
8435                                 v2f64x_info.ImmAllZerosV)),
8436            (VCVTUDQ2PDZ128rmkz VK2WM:$mask, addr:$src)>;
8437}
8438
8439let Predicates = [HasDQI, HasVLX] in {
8440  // Special patterns to allow use of X86VMSintToFP for masking. Instruction
8441  // patterns have been disabled with null_frag.
8442  def : Pat<(v4f32 (X86any_VSintToFP (v2i64 VR128X:$src))),
8443            (VCVTQQ2PSZ128rr VR128X:$src)>;
8444  def : Pat<(X86VMSintToFP (v2i64 VR128X:$src), (v4f32 VR128X:$src0),
8445                           VK2WM:$mask),
8446            (VCVTQQ2PSZ128rrk VR128X:$src0, VK2WM:$mask, VR128X:$src)>;
8447  def : Pat<(X86VMSintToFP (v2i64 VR128X:$src), v4f32x_info.ImmAllZerosV,
8448                           VK2WM:$mask),
8449            (VCVTQQ2PSZ128rrkz VK2WM:$mask, VR128X:$src)>;
8450
8451  def : Pat<(v4f32 (X86any_VSintToFP (loadv2i64 addr:$src))),
8452            (VCVTQQ2PSZ128rm addr:$src)>;
8453  def : Pat<(X86VMSintToFP (loadv2i64 addr:$src), (v4f32 VR128X:$src0),
8454                           VK2WM:$mask),
8455            (VCVTQQ2PSZ128rmk VR128X:$src0, VK2WM:$mask, addr:$src)>;
8456  def : Pat<(X86VMSintToFP (loadv2i64 addr:$src), v4f32x_info.ImmAllZerosV,
8457                           VK2WM:$mask),
8458            (VCVTQQ2PSZ128rmkz VK2WM:$mask, addr:$src)>;
8459
8460  def : Pat<(v4f32 (X86any_VSintToFP (v2i64 (X86VBroadcastld64 addr:$src)))),
8461            (VCVTQQ2PSZ128rmb addr:$src)>;
8462  def : Pat<(X86VMSintToFP (v2i64 (X86VBroadcastld64 addr:$src)),
8463                           (v4f32 VR128X:$src0), VK2WM:$mask),
8464            (VCVTQQ2PSZ128rmbk VR128X:$src0, VK2WM:$mask, addr:$src)>;
8465  def : Pat<(X86VMSintToFP (v2i64 (X86VBroadcastld64 addr:$src)),
8466                           v4f32x_info.ImmAllZerosV, VK2WM:$mask),
8467            (VCVTQQ2PSZ128rmbkz VK2WM:$mask, addr:$src)>;
8468
8469  // Special patterns to allow use of X86VMUintToFP for masking. Instruction
8470  // patterns have been disabled with null_frag.
8471  def : Pat<(v4f32 (X86any_VUintToFP (v2i64 VR128X:$src))),
8472            (VCVTUQQ2PSZ128rr VR128X:$src)>;
8473  def : Pat<(X86VMUintToFP (v2i64 VR128X:$src), (v4f32 VR128X:$src0),
8474                           VK2WM:$mask),
8475            (VCVTUQQ2PSZ128rrk VR128X:$src0, VK2WM:$mask, VR128X:$src)>;
8476  def : Pat<(X86VMUintToFP (v2i64 VR128X:$src), v4f32x_info.ImmAllZerosV,
8477                           VK2WM:$mask),
8478            (VCVTUQQ2PSZ128rrkz VK2WM:$mask, VR128X:$src)>;
8479
8480  def : Pat<(v4f32 (X86any_VUintToFP (loadv2i64 addr:$src))),
8481            (VCVTUQQ2PSZ128rm addr:$src)>;
8482  def : Pat<(X86VMUintToFP (loadv2i64 addr:$src), (v4f32 VR128X:$src0),
8483                           VK2WM:$mask),
8484            (VCVTUQQ2PSZ128rmk VR128X:$src0, VK2WM:$mask, addr:$src)>;
8485  def : Pat<(X86VMUintToFP (loadv2i64 addr:$src), v4f32x_info.ImmAllZerosV,
8486                           VK2WM:$mask),
8487            (VCVTUQQ2PSZ128rmkz VK2WM:$mask, addr:$src)>;
8488
8489  def : Pat<(v4f32 (X86any_VUintToFP (v2i64 (X86VBroadcastld64 addr:$src)))),
8490            (VCVTUQQ2PSZ128rmb addr:$src)>;
8491  def : Pat<(X86VMUintToFP (v2i64 (X86VBroadcastld64 addr:$src)),
8492                           (v4f32 VR128X:$src0), VK2WM:$mask),
8493            (VCVTUQQ2PSZ128rmbk VR128X:$src0, VK2WM:$mask, addr:$src)>;
8494  def : Pat<(X86VMUintToFP (v2i64 (X86VBroadcastld64 addr:$src)),
8495                           v4f32x_info.ImmAllZerosV, VK2WM:$mask),
8496            (VCVTUQQ2PSZ128rmbkz VK2WM:$mask, addr:$src)>;
8497}
8498
8499//===----------------------------------------------------------------------===//
8500// Half precision conversion instructions
8501//===----------------------------------------------------------------------===//
8502
8503let Uses = [MXCSR], mayRaiseFPException = 1 in
8504multiclass avx512_cvtph2ps<X86VectorVTInfo _dest, X86VectorVTInfo _src,
8505                           X86MemOperand x86memop, dag ld_dag,
8506                           X86FoldableSchedWrite sched> {
8507  defm rr : AVX512_maskable_split<0x13, MRMSrcReg, _dest ,(outs _dest.RC:$dst),
8508                            (ins _src.RC:$src), "vcvtph2ps", "$src", "$src",
8509                            (X86any_cvtph2ps (_src.VT _src.RC:$src)),
8510                            (X86cvtph2ps (_src.VT _src.RC:$src))>,
8511                            T8PD, Sched<[sched]>;
8512  defm rm : AVX512_maskable_split<0x13, MRMSrcMem, _dest, (outs _dest.RC:$dst),
8513                            (ins x86memop:$src), "vcvtph2ps", "$src", "$src",
8514                            (X86any_cvtph2ps (_src.VT ld_dag)),
8515                            (X86cvtph2ps (_src.VT ld_dag))>,
8516                            T8PD, Sched<[sched.Folded]>;
8517}
8518
8519multiclass avx512_cvtph2ps_sae<X86VectorVTInfo _dest, X86VectorVTInfo _src,
8520                               X86FoldableSchedWrite sched> {
8521  let Uses = [MXCSR] in
8522  defm rrb : AVX512_maskable<0x13, MRMSrcReg, _dest, (outs _dest.RC:$dst),
8523                             (ins _src.RC:$src), "vcvtph2ps",
8524                             "{sae}, $src", "$src, {sae}",
8525                             (X86cvtph2psSAE (_src.VT _src.RC:$src))>,
8526                             T8PD, EVEX_B, Sched<[sched]>;
8527}
8528
8529let Predicates = [HasAVX512] in
8530  defm VCVTPH2PSZ : avx512_cvtph2ps<v16f32_info, v16i16x_info, f256mem,
8531                                    (load addr:$src), WriteCvtPH2PSZ>,
8532                    avx512_cvtph2ps_sae<v16f32_info, v16i16x_info, WriteCvtPH2PSZ>,
8533                    EVEX, EVEX_V512, EVEX_CD8<32, CD8VH>;
8534
8535let Predicates = [HasVLX] in {
8536  defm VCVTPH2PSZ256 : avx512_cvtph2ps<v8f32x_info, v8i16x_info, f128mem,
8537                       (load addr:$src), WriteCvtPH2PSY>, EVEX, EVEX_V256,
8538                       EVEX_CD8<32, CD8VH>;
8539  defm VCVTPH2PSZ128 : avx512_cvtph2ps<v4f32x_info, v8i16x_info, f64mem,
8540                       (bitconvert (v2i64 (X86vzload64 addr:$src))),
8541                       WriteCvtPH2PS>, EVEX, EVEX_V128,
8542                       EVEX_CD8<32, CD8VH>;
8543
8544  // Pattern match vcvtph2ps of a scalar i64 load.
8545  def : Pat<(v4f32 (X86any_cvtph2ps (v8i16 (bitconvert
8546              (v2i64 (scalar_to_vector (loadi64 addr:$src))))))),
8547            (VCVTPH2PSZ128rm addr:$src)>;
8548}
8549
8550multiclass avx512_cvtps2ph<X86VectorVTInfo _dest, X86VectorVTInfo _src,
8551                           X86MemOperand x86memop, SchedWrite RR, SchedWrite MR> {
8552let ExeDomain = GenericDomain, Uses = [MXCSR], mayRaiseFPException = 1 in {
8553  def rr : AVX512AIi8<0x1D, MRMDestReg, (outs _dest.RC:$dst),
8554             (ins _src.RC:$src1, i32u8imm:$src2),
8555             "vcvtps2ph\t{$src2, $src1, $dst|$dst, $src1, $src2}",
8556             [(set _dest.RC:$dst,
8557                   (X86any_cvtps2ph (_src.VT _src.RC:$src1), (i32 timm:$src2)))]>,
8558             Sched<[RR]>;
8559  let Constraints = "$src0 = $dst" in
8560  def rrk : AVX512AIi8<0x1D, MRMDestReg, (outs _dest.RC:$dst),
8561             (ins _dest.RC:$src0, _src.KRCWM:$mask, _src.RC:$src1, i32u8imm:$src2),
8562             "vcvtps2ph\t{$src2, $src1, $dst {${mask}}|$dst {${mask}}, $src1, $src2}",
8563             [(set _dest.RC:$dst,
8564                   (X86mcvtps2ph (_src.VT _src.RC:$src1), (i32 timm:$src2),
8565                                 _dest.RC:$src0, _src.KRCWM:$mask))]>,
8566             Sched<[RR]>, EVEX_K;
8567  def rrkz : AVX512AIi8<0x1D, MRMDestReg, (outs _dest.RC:$dst),
8568             (ins _src.KRCWM:$mask, _src.RC:$src1, i32u8imm:$src2),
8569             "vcvtps2ph\t{$src2, $src1, $dst {${mask}} {z}|$dst {${mask}} {z}, $src1, $src2}",
8570             [(set _dest.RC:$dst,
8571                   (X86mcvtps2ph (_src.VT _src.RC:$src1), (i32 timm:$src2),
8572                                 _dest.ImmAllZerosV, _src.KRCWM:$mask))]>,
8573             Sched<[RR]>, EVEX_KZ;
8574  let hasSideEffects = 0, mayStore = 1 in {
8575    def mr : AVX512AIi8<0x1D, MRMDestMem, (outs),
8576               (ins x86memop:$dst, _src.RC:$src1, i32u8imm:$src2),
8577               "vcvtps2ph\t{$src2, $src1, $dst|$dst, $src1, $src2}", []>,
8578               Sched<[MR]>;
8579    def mrk : AVX512AIi8<0x1D, MRMDestMem, (outs),
8580               (ins x86memop:$dst, _dest.KRCWM:$mask, _src.RC:$src1, i32u8imm:$src2),
8581               "vcvtps2ph\t{$src2, $src1, $dst {${mask}}|$dst {${mask}}, $src1, $src2}", []>,
8582                EVEX_K, Sched<[MR]>, NotMemoryFoldable;
8583  }
8584}
8585}
8586
8587multiclass avx512_cvtps2ph_sae<X86VectorVTInfo _dest, X86VectorVTInfo _src,
8588                               SchedWrite Sched> {
8589  let hasSideEffects = 0, Uses = [MXCSR] in
8590  defm rrb : AVX512_maskable_in_asm<0x1D, MRMDestReg, _dest,
8591                   (outs _dest.RC:$dst),
8592                   (ins _src.RC:$src1, i32u8imm:$src2),
8593                   "vcvtps2ph", "$src2, {sae}, $src1", "$src1, {sae}, $src2", []>,
8594                   EVEX_B, AVX512AIi8Base, Sched<[Sched]>;
8595}
8596
8597let Predicates = [HasAVX512] in {
8598  defm VCVTPS2PHZ : avx512_cvtps2ph<v16i16x_info, v16f32_info, f256mem,
8599                                    WriteCvtPS2PHZ, WriteCvtPS2PHZSt>,
8600                    avx512_cvtps2ph_sae<v16i16x_info, v16f32_info, WriteCvtPS2PHZ>,
8601                                        EVEX, EVEX_V512, EVEX_CD8<32, CD8VH>;
8602
8603  def : Pat<(store (v16i16 (X86any_cvtps2ph VR512:$src1, timm:$src2)), addr:$dst),
8604            (VCVTPS2PHZmr addr:$dst, VR512:$src1, timm:$src2)>;
8605}
8606
8607let Predicates = [HasVLX] in {
8608  defm VCVTPS2PHZ256 : avx512_cvtps2ph<v8i16x_info, v8f32x_info, f128mem,
8609                                       WriteCvtPS2PHY, WriteCvtPS2PHYSt>,
8610                                       EVEX, EVEX_V256, EVEX_CD8<32, CD8VH>;
8611  defm VCVTPS2PHZ128 : avx512_cvtps2ph<v8i16x_info, v4f32x_info, f64mem,
8612                                       WriteCvtPS2PH, WriteCvtPS2PHSt>,
8613                                       EVEX, EVEX_V128, EVEX_CD8<32, CD8VH>;
8614
8615  def : Pat<(store (f64 (extractelt
8616                         (bc_v2f64 (v8i16 (X86any_cvtps2ph VR128X:$src1, timm:$src2))),
8617                         (iPTR 0))), addr:$dst),
8618            (VCVTPS2PHZ128mr addr:$dst, VR128X:$src1, timm:$src2)>;
8619  def : Pat<(store (i64 (extractelt
8620                         (bc_v2i64 (v8i16 (X86any_cvtps2ph VR128X:$src1, timm:$src2))),
8621                         (iPTR 0))), addr:$dst),
8622            (VCVTPS2PHZ128mr addr:$dst, VR128X:$src1, timm:$src2)>;
8623  def : Pat<(store (v8i16 (X86any_cvtps2ph VR256X:$src1, timm:$src2)), addr:$dst),
8624            (VCVTPS2PHZ256mr addr:$dst, VR256X:$src1, timm:$src2)>;
8625}
8626
8627//  Unordered/Ordered scalar fp compare with Sae and set EFLAGS
8628multiclass avx512_ord_cmp_sae<bits<8> opc, X86VectorVTInfo _,
8629                            string OpcodeStr, Domain d,
8630                            X86FoldableSchedWrite sched = WriteFComX> {
8631  let hasSideEffects = 0, Uses = [MXCSR] in
8632  def rrb: AVX512<opc, MRMSrcReg, (outs), (ins _.RC:$src1, _.RC:$src2),
8633                  !strconcat(OpcodeStr, "\t{{sae}, $src2, $src1|$src1, $src2, {sae}}"), []>,
8634                  EVEX, EVEX_B, VEX_LIG, EVEX_V128, Sched<[sched]>;
8635}
8636
8637let Defs = [EFLAGS], Predicates = [HasAVX512] in {
8638  defm VUCOMISSZ : avx512_ord_cmp_sae<0x2E, v4f32x_info, "vucomiss", SSEPackedSingle>,
8639                                   AVX512PSIi8Base, EVEX_CD8<32, CD8VT1>;
8640  defm VUCOMISDZ : avx512_ord_cmp_sae<0x2E, v2f64x_info, "vucomisd", SSEPackedDouble>,
8641                                   AVX512PDIi8Base, VEX_W, EVEX_CD8<64, CD8VT1>;
8642  defm VCOMISSZ : avx512_ord_cmp_sae<0x2F, v4f32x_info, "vcomiss", SSEPackedSingle>,
8643                                   AVX512PSIi8Base, EVEX_CD8<32, CD8VT1>;
8644  defm VCOMISDZ : avx512_ord_cmp_sae<0x2F, v2f64x_info, "vcomisd", SSEPackedDouble>,
8645                                   AVX512PDIi8Base, VEX_W, EVEX_CD8<64, CD8VT1>;
8646}
8647
8648let Defs = [EFLAGS], Predicates = [HasAVX512] in {
8649  defm VUCOMISSZ : sse12_ord_cmp<0x2E, FR32X, X86any_fcmp, f32, f32mem, loadf32,
8650                                 "ucomiss", SSEPackedSingle>, PS, EVEX, VEX_LIG,
8651                                 EVEX_CD8<32, CD8VT1>;
8652  defm VUCOMISDZ : sse12_ord_cmp<0x2E, FR64X, X86any_fcmp, f64, f64mem, loadf64,
8653                                  "ucomisd", SSEPackedDouble>, PD, EVEX,
8654                                  VEX_LIG, VEX_W, EVEX_CD8<64, CD8VT1>;
8655  defm VCOMISSZ  : sse12_ord_cmp<0x2F, FR32X, X86strict_fcmps, f32, f32mem, loadf32,
8656                                 "comiss", SSEPackedSingle>, PS, EVEX, VEX_LIG,
8657                                 EVEX_CD8<32, CD8VT1>;
8658  defm VCOMISDZ  : sse12_ord_cmp<0x2F, FR64X, X86strict_fcmps, f64, f64mem, loadf64,
8659                                 "comisd", SSEPackedDouble>, PD, EVEX,
8660                                  VEX_LIG, VEX_W, EVEX_CD8<64, CD8VT1>;
8661  let isCodeGenOnly = 1 in {
8662    defm VUCOMISSZ  : sse12_ord_cmp_int<0x2E, VR128X, X86ucomi, v4f32, ssmem,
8663                          sse_load_f32, "ucomiss", SSEPackedSingle>, PS, EVEX, VEX_LIG,
8664                          EVEX_CD8<32, CD8VT1>;
8665    defm VUCOMISDZ  : sse12_ord_cmp_int<0x2E, VR128X, X86ucomi, v2f64, sdmem,
8666                          sse_load_f64, "ucomisd", SSEPackedDouble>, PD, EVEX,
8667                          VEX_LIG, VEX_W, EVEX_CD8<64, CD8VT1>;
8668
8669    defm VCOMISSZ  : sse12_ord_cmp_int<0x2F, VR128X, X86comi, v4f32, ssmem,
8670                          sse_load_f32, "comiss", SSEPackedSingle>, PS, EVEX, VEX_LIG,
8671                          EVEX_CD8<32, CD8VT1>;
8672    defm VCOMISDZ  : sse12_ord_cmp_int<0x2F, VR128X, X86comi, v2f64, sdmem,
8673                          sse_load_f64, "comisd", SSEPackedDouble>, PD, EVEX,
8674                          VEX_LIG, VEX_W, EVEX_CD8<64, CD8VT1>;
8675  }
8676}
8677
8678/// avx512_fp14_s rcp14ss, rcp14sd, rsqrt14ss, rsqrt14sd
8679multiclass avx512_fp14_s<bits<8> opc, string OpcodeStr, SDNode OpNode,
8680                         X86FoldableSchedWrite sched, X86VectorVTInfo _> {
8681  let Predicates = [HasAVX512], ExeDomain = _.ExeDomain, Uses = [MXCSR] in {
8682  defm rr : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
8683                           (ins _.RC:$src1, _.RC:$src2), OpcodeStr,
8684                           "$src2, $src1", "$src1, $src2",
8685                           (OpNode (_.VT _.RC:$src1), (_.VT _.RC:$src2))>,
8686                           EVEX_4V, VEX_LIG, Sched<[sched]>;
8687  defm rm : AVX512_maskable_scalar<opc, MRMSrcMem, _, (outs _.RC:$dst),
8688                         (ins _.RC:$src1, _.IntScalarMemOp:$src2), OpcodeStr,
8689                         "$src2, $src1", "$src1, $src2",
8690                         (OpNode (_.VT _.RC:$src1),
8691                          (_.ScalarIntMemFrags addr:$src2))>, EVEX_4V, VEX_LIG,
8692                          Sched<[sched.Folded, sched.ReadAfterFold]>;
8693}
8694}
8695
8696defm VRCP14SSZ : avx512_fp14_s<0x4D, "vrcp14ss", X86rcp14s, SchedWriteFRcp.Scl,
8697                               f32x_info>, EVEX_CD8<32, CD8VT1>,
8698                               T8PD;
8699defm VRCP14SDZ : avx512_fp14_s<0x4D, "vrcp14sd", X86rcp14s, SchedWriteFRcp.Scl,
8700                               f64x_info>, VEX_W, EVEX_CD8<64, CD8VT1>,
8701                               T8PD;
8702defm VRSQRT14SSZ : avx512_fp14_s<0x4F, "vrsqrt14ss", X86rsqrt14s,
8703                                 SchedWriteFRsqrt.Scl, f32x_info>,
8704                                 EVEX_CD8<32, CD8VT1>, T8PD;
8705defm VRSQRT14SDZ : avx512_fp14_s<0x4F, "vrsqrt14sd", X86rsqrt14s,
8706                                 SchedWriteFRsqrt.Scl, f64x_info>, VEX_W,
8707                                 EVEX_CD8<64, CD8VT1>, T8PD;
8708
8709/// avx512_fp14_p rcp14ps, rcp14pd, rsqrt14ps, rsqrt14pd
8710multiclass avx512_fp14_p<bits<8> opc, string OpcodeStr, SDNode OpNode,
8711                         X86FoldableSchedWrite sched, X86VectorVTInfo _> {
8712  let ExeDomain = _.ExeDomain in {
8713  defm r: AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
8714                         (ins _.RC:$src), OpcodeStr, "$src", "$src",
8715                         (_.VT (OpNode _.RC:$src))>, EVEX, T8PD,
8716                         Sched<[sched]>;
8717  defm m: AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
8718                         (ins _.MemOp:$src), OpcodeStr, "$src", "$src",
8719                         (OpNode (_.VT
8720                           (bitconvert (_.LdFrag addr:$src))))>, EVEX, T8PD,
8721                         Sched<[sched.Folded, sched.ReadAfterFold]>;
8722  defm mb: AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
8723                          (ins _.ScalarMemOp:$src), OpcodeStr,
8724                          "${src}"#_.BroadcastStr, "${src}"#_.BroadcastStr,
8725                          (OpNode (_.VT
8726                            (_.BroadcastLdFrag addr:$src)))>,
8727                          EVEX, T8PD, EVEX_B, Sched<[sched.Folded, sched.ReadAfterFold]>;
8728  }
8729}
8730
8731let Uses = [MXCSR] in
8732multiclass avx512_fp14_p_vl_all<bits<8> opc, string OpcodeStr, SDNode OpNode,
8733                                X86SchedWriteWidths sched> {
8734  defm PSZ : avx512_fp14_p<opc, !strconcat(OpcodeStr, "ps"), OpNode, sched.ZMM,
8735                           v16f32_info>, EVEX_V512, EVEX_CD8<32, CD8VF>;
8736  defm PDZ : avx512_fp14_p<opc, !strconcat(OpcodeStr, "pd"), OpNode, sched.ZMM,
8737                           v8f64_info>, EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>;
8738
8739  // Define only if AVX512VL feature is present.
8740  let Predicates = [HasVLX] in {
8741    defm PSZ128 : avx512_fp14_p<opc, !strconcat(OpcodeStr, "ps"),
8742                                OpNode, sched.XMM, v4f32x_info>,
8743                               EVEX_V128, EVEX_CD8<32, CD8VF>;
8744    defm PSZ256 : avx512_fp14_p<opc, !strconcat(OpcodeStr, "ps"),
8745                                OpNode, sched.YMM, v8f32x_info>,
8746                               EVEX_V256, EVEX_CD8<32, CD8VF>;
8747    defm PDZ128 : avx512_fp14_p<opc, !strconcat(OpcodeStr, "pd"),
8748                                OpNode, sched.XMM, v2f64x_info>,
8749                               EVEX_V128, VEX_W, EVEX_CD8<64, CD8VF>;
8750    defm PDZ256 : avx512_fp14_p<opc, !strconcat(OpcodeStr, "pd"),
8751                                OpNode, sched.YMM, v4f64x_info>,
8752                               EVEX_V256, VEX_W, EVEX_CD8<64, CD8VF>;
8753  }
8754}
8755
8756defm VRSQRT14 : avx512_fp14_p_vl_all<0x4E, "vrsqrt14", X86rsqrt14, SchedWriteFRsqrt>;
8757defm VRCP14 : avx512_fp14_p_vl_all<0x4C, "vrcp14", X86rcp14, SchedWriteFRcp>;
8758
8759/// avx512_fp28_s rcp28ss, rcp28sd, rsqrt28ss, rsqrt28sd
8760multiclass avx512_fp28_s<bits<8> opc, string OpcodeStr,X86VectorVTInfo _,
8761                         SDNode OpNode, SDNode OpNodeSAE,
8762                         X86FoldableSchedWrite sched> {
8763  let ExeDomain = _.ExeDomain, Uses = [MXCSR] in {
8764  defm r : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
8765                           (ins _.RC:$src1, _.RC:$src2), OpcodeStr,
8766                           "$src2, $src1", "$src1, $src2",
8767                           (OpNode (_.VT _.RC:$src1), (_.VT _.RC:$src2))>,
8768                           Sched<[sched]>, SIMD_EXC;
8769
8770  defm rb : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
8771                            (ins _.RC:$src1, _.RC:$src2), OpcodeStr,
8772                            "{sae}, $src2, $src1", "$src1, $src2, {sae}",
8773                            (OpNodeSAE (_.VT _.RC:$src1), (_.VT _.RC:$src2))>,
8774                            EVEX_B, Sched<[sched]>;
8775
8776  defm m : AVX512_maskable_scalar<opc, MRMSrcMem, _, (outs _.RC:$dst),
8777                         (ins _.RC:$src1, _.IntScalarMemOp:$src2), OpcodeStr,
8778                         "$src2, $src1", "$src1, $src2",
8779                         (OpNode (_.VT _.RC:$src1), (_.ScalarIntMemFrags addr:$src2))>,
8780                         Sched<[sched.Folded, sched.ReadAfterFold]>, SIMD_EXC;
8781  }
8782}
8783
8784multiclass avx512_eri_s<bits<8> opc, string OpcodeStr, SDNode OpNode,
8785                        SDNode OpNodeSAE, X86FoldableSchedWrite sched> {
8786  defm SSZ : avx512_fp28_s<opc, OpcodeStr#"ss", f32x_info, OpNode, OpNodeSAE,
8787                           sched>, EVEX_CD8<32, CD8VT1>, VEX_LIG;
8788  defm SDZ : avx512_fp28_s<opc, OpcodeStr#"sd", f64x_info, OpNode, OpNodeSAE,
8789                           sched>, EVEX_CD8<64, CD8VT1>, VEX_LIG, VEX_W;
8790}
8791
8792let Predicates = [HasERI] in {
8793  defm VRCP28   : avx512_eri_s<0xCB, "vrcp28", X86rcp28s, X86rcp28SAEs,
8794                               SchedWriteFRcp.Scl>, T8PD, EVEX_4V;
8795  defm VRSQRT28 : avx512_eri_s<0xCD, "vrsqrt28", X86rsqrt28s, X86rsqrt28SAEs,
8796                               SchedWriteFRsqrt.Scl>, T8PD, EVEX_4V;
8797}
8798
8799defm VGETEXP   : avx512_eri_s<0x43, "vgetexp", X86fgetexps, X86fgetexpSAEs,
8800                              SchedWriteFRnd.Scl>, T8PD, EVEX_4V;
8801/// avx512_fp28_p rcp28ps, rcp28pd, rsqrt28ps, rsqrt28pd
8802
8803multiclass avx512_fp28_p<bits<8> opc, string OpcodeStr, X86VectorVTInfo _,
8804                         SDNode OpNode, X86FoldableSchedWrite sched> {
8805  let ExeDomain = _.ExeDomain, Uses = [MXCSR], mayRaiseFPException = 1 in {
8806  defm r : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
8807                         (ins _.RC:$src), OpcodeStr, "$src", "$src",
8808                         (OpNode (_.VT _.RC:$src))>,
8809                         Sched<[sched]>;
8810
8811  defm m : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
8812                         (ins _.MemOp:$src), OpcodeStr, "$src", "$src",
8813                         (OpNode (_.VT
8814                             (bitconvert (_.LdFrag addr:$src))))>,
8815                          Sched<[sched.Folded, sched.ReadAfterFold]>;
8816
8817  defm mb : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
8818                         (ins _.ScalarMemOp:$src), OpcodeStr,
8819                         "${src}"#_.BroadcastStr, "${src}"#_.BroadcastStr,
8820                         (OpNode (_.VT
8821                                  (_.BroadcastLdFrag addr:$src)))>,
8822                         EVEX_B, Sched<[sched.Folded, sched.ReadAfterFold]>;
8823  }
8824}
8825multiclass avx512_fp28_p_sae<bits<8> opc, string OpcodeStr, X86VectorVTInfo _,
8826                         SDNode OpNode, X86FoldableSchedWrite sched> {
8827  let ExeDomain = _.ExeDomain, Uses = [MXCSR] in
8828  defm rb : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
8829                        (ins _.RC:$src), OpcodeStr,
8830                        "{sae}, $src", "$src, {sae}",
8831                        (OpNode (_.VT _.RC:$src))>,
8832                        EVEX_B, Sched<[sched]>;
8833}
8834
8835multiclass  avx512_eri<bits<8> opc, string OpcodeStr, SDNode OpNode,
8836                       SDNode OpNodeSAE, X86SchedWriteWidths sched> {
8837   defm PSZ : avx512_fp28_p<opc, OpcodeStr#"ps", v16f32_info, OpNode, sched.ZMM>,
8838              avx512_fp28_p_sae<opc, OpcodeStr#"ps", v16f32_info, OpNodeSAE, sched.ZMM>,
8839              T8PD, EVEX_V512, EVEX_CD8<32, CD8VF>;
8840   defm PDZ : avx512_fp28_p<opc, OpcodeStr#"pd", v8f64_info, OpNode, sched.ZMM>,
8841              avx512_fp28_p_sae<opc, OpcodeStr#"pd", v8f64_info, OpNodeSAE, sched.ZMM>,
8842              T8PD, EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>;
8843}
8844
8845multiclass avx512_fp_unaryop_packed<bits<8> opc, string OpcodeStr,
8846                                  SDNode OpNode, X86SchedWriteWidths sched> {
8847  // Define only if AVX512VL feature is present.
8848  let Predicates = [HasVLX] in {
8849    defm PSZ128 : avx512_fp28_p<opc, OpcodeStr#"ps", v4f32x_info, OpNode,
8850                                sched.XMM>,
8851                                EVEX_V128, T8PD, EVEX_CD8<32, CD8VF>;
8852    defm PSZ256 : avx512_fp28_p<opc, OpcodeStr#"ps", v8f32x_info, OpNode,
8853                                sched.YMM>,
8854                                EVEX_V256, T8PD, EVEX_CD8<32, CD8VF>;
8855    defm PDZ128 : avx512_fp28_p<opc, OpcodeStr#"pd", v2f64x_info, OpNode,
8856                                sched.XMM>,
8857                                EVEX_V128, VEX_W, T8PD, EVEX_CD8<64, CD8VF>;
8858    defm PDZ256 : avx512_fp28_p<opc, OpcodeStr#"pd", v4f64x_info, OpNode,
8859                                sched.YMM>,
8860                                EVEX_V256, VEX_W, T8PD, EVEX_CD8<64, CD8VF>;
8861  }
8862}
8863
8864let Predicates = [HasERI] in {
8865 defm VRSQRT28 : avx512_eri<0xCC, "vrsqrt28", X86rsqrt28, X86rsqrt28SAE,
8866                            SchedWriteFRsqrt>, EVEX;
8867 defm VRCP28   : avx512_eri<0xCA, "vrcp28", X86rcp28, X86rcp28SAE,
8868                            SchedWriteFRcp>, EVEX;
8869 defm VEXP2    : avx512_eri<0xC8, "vexp2", X86exp2, X86exp2SAE,
8870                            SchedWriteFAdd>, EVEX;
8871}
8872defm VGETEXP   : avx512_eri<0x42, "vgetexp", X86fgetexp, X86fgetexpSAE,
8873                            SchedWriteFRnd>,
8874                 avx512_fp_unaryop_packed<0x42, "vgetexp", X86fgetexp,
8875                                          SchedWriteFRnd>, EVEX;
8876
8877multiclass avx512_sqrt_packed_round<bits<8> opc, string OpcodeStr,
8878                                    X86FoldableSchedWrite sched, X86VectorVTInfo _>{
8879  let ExeDomain = _.ExeDomain in
8880  defm rb: AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
8881                         (ins _.RC:$src, AVX512RC:$rc), OpcodeStr, "$rc, $src", "$src, $rc",
8882                         (_.VT (X86fsqrtRnd _.RC:$src, (i32 timm:$rc)))>,
8883                         EVEX, EVEX_B, EVEX_RC, Sched<[sched]>;
8884}
8885
8886multiclass avx512_sqrt_packed<bits<8> opc, string OpcodeStr,
8887                              X86FoldableSchedWrite sched, X86VectorVTInfo _>{
8888  let ExeDomain = _.ExeDomain, Uses = [MXCSR], mayRaiseFPException = 1 in {
8889  defm r: AVX512_maskable_split<opc, MRMSrcReg, _, (outs _.RC:$dst),
8890                         (ins _.RC:$src), OpcodeStr, "$src", "$src",
8891                         (_.VT (any_fsqrt _.RC:$src)),
8892                         (_.VT (fsqrt _.RC:$src))>, EVEX,
8893                         Sched<[sched]>;
8894  defm m: AVX512_maskable_split<opc, MRMSrcMem, _, (outs _.RC:$dst),
8895                         (ins _.MemOp:$src), OpcodeStr, "$src", "$src",
8896                         (any_fsqrt (_.VT (_.LdFrag addr:$src))),
8897                         (fsqrt (_.VT (_.LdFrag addr:$src)))>, EVEX,
8898                         Sched<[sched.Folded, sched.ReadAfterFold]>;
8899  defm mb: AVX512_maskable_split<opc, MRMSrcMem, _, (outs _.RC:$dst),
8900                          (ins _.ScalarMemOp:$src), OpcodeStr,
8901                          "${src}"#_.BroadcastStr, "${src}"#_.BroadcastStr,
8902                          (any_fsqrt (_.VT (_.BroadcastLdFrag addr:$src))),
8903                          (fsqrt (_.VT (_.BroadcastLdFrag addr:$src)))>,
8904                          EVEX, EVEX_B, Sched<[sched.Folded, sched.ReadAfterFold]>;
8905  }
8906}
8907
8908let Uses = [MXCSR], mayRaiseFPException = 1 in
8909multiclass avx512_sqrt_packed_all<bits<8> opc, string OpcodeStr,
8910                                  X86SchedWriteSizes sched> {
8911  defm PSZ : avx512_sqrt_packed<opc, !strconcat(OpcodeStr, "ps"),
8912                                sched.PS.ZMM, v16f32_info>,
8913                                EVEX_V512, PS, EVEX_CD8<32, CD8VF>;
8914  defm PDZ : avx512_sqrt_packed<opc, !strconcat(OpcodeStr, "pd"),
8915                                sched.PD.ZMM, v8f64_info>,
8916                                EVEX_V512, VEX_W, PD, EVEX_CD8<64, CD8VF>;
8917  // Define only if AVX512VL feature is present.
8918  let Predicates = [HasVLX] in {
8919    defm PSZ128 : avx512_sqrt_packed<opc, !strconcat(OpcodeStr, "ps"),
8920                                     sched.PS.XMM, v4f32x_info>,
8921                                     EVEX_V128, PS, EVEX_CD8<32, CD8VF>;
8922    defm PSZ256 : avx512_sqrt_packed<opc, !strconcat(OpcodeStr, "ps"),
8923                                     sched.PS.YMM, v8f32x_info>,
8924                                     EVEX_V256, PS, EVEX_CD8<32, CD8VF>;
8925    defm PDZ128 : avx512_sqrt_packed<opc, !strconcat(OpcodeStr, "pd"),
8926                                     sched.PD.XMM, v2f64x_info>,
8927                                     EVEX_V128, VEX_W, PD, EVEX_CD8<64, CD8VF>;
8928    defm PDZ256 : avx512_sqrt_packed<opc, !strconcat(OpcodeStr, "pd"),
8929                                     sched.PD.YMM, v4f64x_info>,
8930                                     EVEX_V256, VEX_W, PD, EVEX_CD8<64, CD8VF>;
8931  }
8932}
8933
8934let Uses = [MXCSR] in
8935multiclass avx512_sqrt_packed_all_round<bits<8> opc, string OpcodeStr,
8936                                        X86SchedWriteSizes sched> {
8937  defm PSZ : avx512_sqrt_packed_round<opc, !strconcat(OpcodeStr, "ps"),
8938                                      sched.PS.ZMM, v16f32_info>,
8939                                      EVEX_V512, PS, EVEX_CD8<32, CD8VF>;
8940  defm PDZ : avx512_sqrt_packed_round<opc, !strconcat(OpcodeStr, "pd"),
8941                                      sched.PD.ZMM, v8f64_info>,
8942                                      EVEX_V512, VEX_W, PD, EVEX_CD8<64, CD8VF>;
8943}
8944
8945multiclass avx512_sqrt_scalar<bits<8> opc, string OpcodeStr, X86FoldableSchedWrite sched,
8946                              X86VectorVTInfo _, string Name> {
8947  let ExeDomain = _.ExeDomain in {
8948    defm r_Int : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
8949                         (ins _.RC:$src1, _.RC:$src2), OpcodeStr,
8950                         "$src2, $src1", "$src1, $src2",
8951                         (X86fsqrts (_.VT _.RC:$src1),
8952                                    (_.VT _.RC:$src2))>,
8953                         Sched<[sched]>, SIMD_EXC;
8954    defm m_Int : AVX512_maskable_scalar<opc, MRMSrcMem, _, (outs _.RC:$dst),
8955                         (ins _.RC:$src1, _.IntScalarMemOp:$src2), OpcodeStr,
8956                         "$src2, $src1", "$src1, $src2",
8957                         (X86fsqrts (_.VT _.RC:$src1),
8958                                    (_.ScalarIntMemFrags addr:$src2))>,
8959                         Sched<[sched.Folded, sched.ReadAfterFold]>, SIMD_EXC;
8960    let Uses = [MXCSR] in
8961    defm rb_Int : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
8962                         (ins _.RC:$src1, _.RC:$src2, AVX512RC:$rc), OpcodeStr,
8963                         "$rc, $src2, $src1", "$src1, $src2, $rc",
8964                         (X86fsqrtRnds (_.VT _.RC:$src1),
8965                                     (_.VT _.RC:$src2),
8966                                     (i32 timm:$rc))>,
8967                         EVEX_B, EVEX_RC, Sched<[sched]>;
8968
8969    let isCodeGenOnly = 1, hasSideEffects = 0, Predicates=[HasAVX512] in {
8970      def r : I<opc, MRMSrcReg, (outs _.FRC:$dst),
8971                (ins _.FRC:$src1, _.FRC:$src2),
8972                OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}", []>,
8973                Sched<[sched]>, SIMD_EXC;
8974      let mayLoad = 1 in
8975        def m : I<opc, MRMSrcMem, (outs _.FRC:$dst),
8976                  (ins _.FRC:$src1, _.ScalarMemOp:$src2),
8977                  OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}", []>,
8978                  Sched<[sched.Folded, sched.ReadAfterFold]>, SIMD_EXC;
8979    }
8980  }
8981
8982  let Predicates = [HasAVX512] in {
8983    def : Pat<(_.EltVT (any_fsqrt _.FRC:$src)),
8984              (!cast<Instruction>(Name#Zr)
8985                  (_.EltVT (IMPLICIT_DEF)), _.FRC:$src)>;
8986  }
8987
8988  let Predicates = [HasAVX512, OptForSize] in {
8989    def : Pat<(_.EltVT (any_fsqrt (load addr:$src))),
8990              (!cast<Instruction>(Name#Zm)
8991                  (_.EltVT (IMPLICIT_DEF)), addr:$src)>;
8992  }
8993}
8994
8995multiclass avx512_sqrt_scalar_all<bits<8> opc, string OpcodeStr,
8996                                  X86SchedWriteSizes sched> {
8997  defm SSZ : avx512_sqrt_scalar<opc, OpcodeStr#"ss", sched.PS.Scl, f32x_info, NAME#"SS">,
8998                        EVEX_CD8<32, CD8VT1>, EVEX_4V, XS;
8999  defm SDZ : avx512_sqrt_scalar<opc, OpcodeStr#"sd", sched.PD.Scl, f64x_info, NAME#"SD">,
9000                        EVEX_CD8<64, CD8VT1>, EVEX_4V, XD, VEX_W;
9001}
9002
9003defm VSQRT : avx512_sqrt_packed_all<0x51, "vsqrt", SchedWriteFSqrtSizes>,
9004             avx512_sqrt_packed_all_round<0x51, "vsqrt", SchedWriteFSqrtSizes>;
9005
9006defm VSQRT : avx512_sqrt_scalar_all<0x51, "vsqrt", SchedWriteFSqrtSizes>, VEX_LIG;
9007
9008multiclass avx512_rndscale_scalar<bits<8> opc, string OpcodeStr,
9009                                  X86FoldableSchedWrite sched, X86VectorVTInfo _> {
9010  let ExeDomain = _.ExeDomain in {
9011  defm r_Int : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
9012                           (ins _.RC:$src1, _.RC:$src2, i32u8imm:$src3), OpcodeStr,
9013                           "$src3, $src2, $src1", "$src1, $src2, $src3",
9014                           (_.VT (X86RndScales (_.VT _.RC:$src1), (_.VT _.RC:$src2),
9015                           (i32 timm:$src3)))>,
9016                           Sched<[sched]>, SIMD_EXC;
9017
9018  let Uses = [MXCSR] in
9019  defm rb_Int : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
9020                         (ins _.RC:$src1, _.RC:$src2, i32u8imm:$src3), OpcodeStr,
9021                         "$src3, {sae}, $src2, $src1", "$src1, $src2, {sae}, $src3",
9022                         (_.VT (X86RndScalesSAE (_.VT _.RC:$src1), (_.VT _.RC:$src2),
9023                         (i32 timm:$src3)))>, EVEX_B,
9024                         Sched<[sched]>;
9025
9026  defm m_Int : AVX512_maskable_scalar<opc, MRMSrcMem, _, (outs _.RC:$dst),
9027                         (ins _.RC:$src1, _.IntScalarMemOp:$src2, i32u8imm:$src3),
9028                         OpcodeStr,
9029                         "$src3, $src2, $src1", "$src1, $src2, $src3",
9030                         (_.VT (X86RndScales _.RC:$src1,
9031                                (_.ScalarIntMemFrags addr:$src2), (i32 timm:$src3)))>,
9032                         Sched<[sched.Folded, sched.ReadAfterFold]>, SIMD_EXC;
9033
9034  let isCodeGenOnly = 1, hasSideEffects = 0, Predicates = [HasAVX512] in {
9035    def r : I<opc, MRMSrcReg, (outs _.FRC:$dst),
9036               (ins _.FRC:$src1, _.FRC:$src2, i32u8imm:$src3),
9037               OpcodeStr#"\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}",
9038               []>, Sched<[sched]>, SIMD_EXC;
9039
9040    let mayLoad = 1 in
9041      def m : I<opc, MRMSrcMem, (outs _.FRC:$dst),
9042                 (ins _.FRC:$src1, _.ScalarMemOp:$src2, i32u8imm:$src3),
9043                 OpcodeStr#"\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}",
9044                 []>, Sched<[sched.Folded, sched.ReadAfterFold]>, SIMD_EXC;
9045  }
9046  }
9047
9048  let Predicates = [HasAVX512] in {
9049    def : Pat<(X86any_VRndScale _.FRC:$src1, timm:$src2),
9050              (_.EltVT (!cast<Instruction>(NAME#r) (_.EltVT (IMPLICIT_DEF)),
9051               _.FRC:$src1, timm:$src2))>;
9052  }
9053
9054  let Predicates = [HasAVX512, OptForSize] in {
9055    def : Pat<(X86any_VRndScale (_.ScalarLdFrag addr:$src1), timm:$src2),
9056              (_.EltVT (!cast<Instruction>(NAME#m) (_.EltVT (IMPLICIT_DEF)),
9057               addr:$src1, timm:$src2))>;
9058  }
9059}
9060
9061defm VRNDSCALESSZ : avx512_rndscale_scalar<0x0A, "vrndscaless",
9062                                           SchedWriteFRnd.Scl, f32x_info>,
9063                                           AVX512AIi8Base, EVEX_4V, VEX_LIG,
9064                                           EVEX_CD8<32, CD8VT1>;
9065
9066defm VRNDSCALESDZ : avx512_rndscale_scalar<0x0B, "vrndscalesd",
9067                                           SchedWriteFRnd.Scl, f64x_info>,
9068                                           VEX_W, AVX512AIi8Base, EVEX_4V, VEX_LIG,
9069                                           EVEX_CD8<64, CD8VT1>;
9070
9071multiclass avx512_masked_scalar<SDNode OpNode, string OpcPrefix, SDNode Move,
9072                                dag Mask, X86VectorVTInfo _, PatLeaf ZeroFP,
9073                                dag OutMask, Predicate BasePredicate> {
9074  let Predicates = [BasePredicate] in {
9075    def : Pat<(Move _.VT:$src1, (scalar_to_vector (X86selects_mask Mask,
9076               (OpNode (extractelt _.VT:$src2, (iPTR 0))),
9077               (extractelt _.VT:$dst, (iPTR 0))))),
9078              (!cast<Instruction>("V"#OpcPrefix#r_Intk)
9079               _.VT:$dst, OutMask, _.VT:$src2, _.VT:$src1)>;
9080
9081    def : Pat<(Move _.VT:$src1, (scalar_to_vector (X86selects_mask Mask,
9082               (OpNode (extractelt _.VT:$src2, (iPTR 0))),
9083               ZeroFP))),
9084              (!cast<Instruction>("V"#OpcPrefix#r_Intkz)
9085               OutMask, _.VT:$src2, _.VT:$src1)>;
9086  }
9087}
9088
9089defm : avx512_masked_scalar<fsqrt, "SQRTSSZ", X86Movss,
9090                            (v1i1 (scalar_to_vector (i8 (trunc (i32 GR32:$mask))))), v4f32x_info,
9091                            fp32imm0, (COPY_TO_REGCLASS  $mask, VK1WM), HasAVX512>;
9092defm : avx512_masked_scalar<fsqrt, "SQRTSDZ", X86Movsd,
9093                            (v1i1 (scalar_to_vector (i8 (trunc (i32 GR32:$mask))))), v2f64x_info,
9094                            fp64imm0, (COPY_TO_REGCLASS  $mask, VK1WM), HasAVX512>;
9095
9096
9097//-------------------------------------------------
9098// Integer truncate and extend operations
9099//-------------------------------------------------
9100
9101// PatFrags that contain a select and a truncate op. The take operands in the
9102// same order as X86vmtrunc, X86vmtruncs, X86vmtruncus. This allows us to pass
9103// either to the multiclasses.
9104def select_trunc : PatFrag<(ops node:$src, node:$src0, node:$mask),
9105                           (vselect_mask node:$mask,
9106                                         (trunc node:$src), node:$src0)>;
9107def select_truncs : PatFrag<(ops node:$src, node:$src0, node:$mask),
9108                            (vselect_mask node:$mask,
9109                                          (X86vtruncs node:$src), node:$src0)>;
9110def select_truncus : PatFrag<(ops node:$src, node:$src0, node:$mask),
9111                             (vselect_mask node:$mask,
9112                                           (X86vtruncus node:$src), node:$src0)>;
9113
9114multiclass avx512_trunc_common<bits<8> opc, string OpcodeStr, SDNode OpNode,
9115                              SDPatternOperator MaskNode,
9116                              X86FoldableSchedWrite sched, X86VectorVTInfo SrcInfo,
9117                              X86VectorVTInfo DestInfo, X86MemOperand x86memop> {
9118  let ExeDomain = DestInfo.ExeDomain in {
9119  def rr : AVX512XS8I<opc, MRMDestReg, (outs DestInfo.RC:$dst),
9120             (ins SrcInfo.RC:$src),
9121             OpcodeStr # "\t{$src, $dst|$dst, $src}",
9122             [(set DestInfo.RC:$dst,
9123                   (DestInfo.VT (OpNode (SrcInfo.VT SrcInfo.RC:$src))))]>,
9124             EVEX, Sched<[sched]>;
9125  let Constraints = "$src0 = $dst" in
9126  def rrk : AVX512XS8I<opc, MRMDestReg, (outs DestInfo.RC:$dst),
9127             (ins DestInfo.RC:$src0, SrcInfo.KRCWM:$mask, SrcInfo.RC:$src),
9128             OpcodeStr # "\t{$src, $dst {${mask}}|$dst {${mask}}, $src}",
9129             [(set DestInfo.RC:$dst,
9130                   (MaskNode (SrcInfo.VT SrcInfo.RC:$src),
9131                             (DestInfo.VT DestInfo.RC:$src0),
9132                             SrcInfo.KRCWM:$mask))]>,
9133             EVEX, EVEX_K, Sched<[sched]>;
9134  def rrkz : AVX512XS8I<opc, MRMDestReg, (outs DestInfo.RC:$dst),
9135             (ins SrcInfo.KRCWM:$mask, SrcInfo.RC:$src),
9136             OpcodeStr # "\t{$src, $dst {${mask}} {z}|$dst {${mask}} {z}, $src}",
9137             [(set DestInfo.RC:$dst,
9138                   (DestInfo.VT (MaskNode (SrcInfo.VT SrcInfo.RC:$src),
9139                             DestInfo.ImmAllZerosV, SrcInfo.KRCWM:$mask)))]>,
9140             EVEX, EVEX_KZ, Sched<[sched]>;
9141  }
9142
9143  let mayStore = 1, hasSideEffects = 0, ExeDomain = DestInfo.ExeDomain in {
9144    def mr : AVX512XS8I<opc, MRMDestMem, (outs),
9145               (ins x86memop:$dst, SrcInfo.RC:$src),
9146               OpcodeStr # "\t{$src, $dst|$dst, $src}", []>,
9147               EVEX, Sched<[sched.Folded]>;
9148
9149    def mrk : AVX512XS8I<opc, MRMDestMem, (outs),
9150               (ins x86memop:$dst, SrcInfo.KRCWM:$mask, SrcInfo.RC:$src),
9151               OpcodeStr # "\t{$src, $dst {${mask}}|$dst {${mask}}, $src}", []>,
9152               EVEX, EVEX_K, Sched<[sched.Folded]>, NotMemoryFoldable;
9153  }//mayStore = 1, hasSideEffects = 0
9154}
9155
9156multiclass avx512_trunc_mr_lowering<X86VectorVTInfo SrcInfo,
9157                                    X86VectorVTInfo DestInfo,
9158                                    PatFrag truncFrag, PatFrag mtruncFrag,
9159                                    string Name> {
9160
9161  def : Pat<(truncFrag (SrcInfo.VT SrcInfo.RC:$src), addr:$dst),
9162            (!cast<Instruction>(Name#SrcInfo.ZSuffix#mr)
9163                                    addr:$dst, SrcInfo.RC:$src)>;
9164
9165  def : Pat<(mtruncFrag (SrcInfo.VT SrcInfo.RC:$src), addr:$dst,
9166                        SrcInfo.KRCWM:$mask),
9167            (!cast<Instruction>(Name#SrcInfo.ZSuffix#mrk)
9168                            addr:$dst, SrcInfo.KRCWM:$mask, SrcInfo.RC:$src)>;
9169}
9170
9171multiclass avx512_trunc<bits<8> opc, string OpcodeStr, SDNode OpNode128,
9172                        SDNode OpNode256, SDNode OpNode512,
9173                        SDPatternOperator MaskNode128,
9174                        SDPatternOperator MaskNode256,
9175                        SDPatternOperator MaskNode512,
9176                        X86FoldableSchedWrite sched,
9177                        AVX512VLVectorVTInfo VTSrcInfo,
9178                        X86VectorVTInfo DestInfoZ128,
9179                        X86VectorVTInfo DestInfoZ256, X86VectorVTInfo DestInfoZ,
9180                        X86MemOperand x86memopZ128, X86MemOperand x86memopZ256,
9181                        X86MemOperand x86memopZ, PatFrag truncFrag,
9182                        PatFrag mtruncFrag, Predicate prd = HasAVX512>{
9183
9184  let Predicates = [HasVLX, prd] in {
9185    defm Z128:  avx512_trunc_common<opc, OpcodeStr, OpNode128, MaskNode128, sched,
9186                             VTSrcInfo.info128, DestInfoZ128, x86memopZ128>,
9187                avx512_trunc_mr_lowering<VTSrcInfo.info128, DestInfoZ128,
9188                             truncFrag, mtruncFrag, NAME>, EVEX_V128;
9189
9190    defm Z256:  avx512_trunc_common<opc, OpcodeStr, OpNode256, MaskNode256, sched,
9191                             VTSrcInfo.info256, DestInfoZ256, x86memopZ256>,
9192                avx512_trunc_mr_lowering<VTSrcInfo.info256, DestInfoZ256,
9193                             truncFrag, mtruncFrag, NAME>, EVEX_V256;
9194  }
9195  let Predicates = [prd] in
9196    defm Z:     avx512_trunc_common<opc, OpcodeStr, OpNode512, MaskNode512, sched,
9197                             VTSrcInfo.info512, DestInfoZ, x86memopZ>,
9198                avx512_trunc_mr_lowering<VTSrcInfo.info512, DestInfoZ,
9199                             truncFrag, mtruncFrag, NAME>, EVEX_V512;
9200}
9201
9202multiclass avx512_trunc_qb<bits<8> opc, string OpcodeStr, SDNode OpNode,
9203                           SDPatternOperator MaskNode,
9204                           X86FoldableSchedWrite sched, PatFrag StoreNode,
9205                           PatFrag MaskedStoreNode, SDNode InVecNode,
9206                           SDPatternOperator InVecMaskNode> {
9207  defm NAME: avx512_trunc<opc, OpcodeStr, InVecNode, InVecNode, InVecNode,
9208                          InVecMaskNode, InVecMaskNode, InVecMaskNode, sched,
9209                          avx512vl_i64_info, v16i8x_info, v16i8x_info,
9210                          v16i8x_info, i16mem, i32mem, i64mem, StoreNode,
9211                          MaskedStoreNode>, EVEX_CD8<8, CD8VO>;
9212}
9213
9214multiclass avx512_trunc_qw<bits<8> opc, string OpcodeStr, SDNode OpNode,
9215                           SDPatternOperator MaskNode,
9216                           X86FoldableSchedWrite sched, PatFrag StoreNode,
9217                           PatFrag MaskedStoreNode, SDNode InVecNode,
9218                           SDPatternOperator InVecMaskNode> {
9219  defm NAME: avx512_trunc<opc, OpcodeStr, InVecNode, InVecNode, OpNode,
9220                          InVecMaskNode, InVecMaskNode, MaskNode, sched,
9221                          avx512vl_i64_info, v8i16x_info, v8i16x_info,
9222                          v8i16x_info, i32mem, i64mem, i128mem, StoreNode,
9223                          MaskedStoreNode>, EVEX_CD8<16, CD8VQ>;
9224}
9225
9226multiclass avx512_trunc_qd<bits<8> opc, string OpcodeStr, SDNode OpNode,
9227                           SDPatternOperator MaskNode,
9228                           X86FoldableSchedWrite sched, PatFrag StoreNode,
9229                           PatFrag MaskedStoreNode, SDNode InVecNode,
9230                           SDPatternOperator InVecMaskNode> {
9231  defm NAME: avx512_trunc<opc, OpcodeStr, InVecNode, OpNode, OpNode,
9232                          InVecMaskNode, MaskNode, MaskNode, sched,
9233                          avx512vl_i64_info, v4i32x_info, v4i32x_info,
9234                          v8i32x_info, i64mem, i128mem, i256mem, StoreNode,
9235                          MaskedStoreNode>, EVEX_CD8<32, CD8VH>;
9236}
9237
9238multiclass avx512_trunc_db<bits<8> opc, string OpcodeStr, SDNode OpNode,
9239                           SDPatternOperator MaskNode,
9240                           X86FoldableSchedWrite sched, PatFrag StoreNode,
9241                           PatFrag MaskedStoreNode, SDNode InVecNode,
9242                           SDPatternOperator InVecMaskNode> {
9243  defm NAME: avx512_trunc<opc, OpcodeStr, InVecNode, InVecNode, OpNode,
9244                          InVecMaskNode, InVecMaskNode, MaskNode, sched,
9245                          avx512vl_i32_info, v16i8x_info, v16i8x_info,
9246                          v16i8x_info, i32mem, i64mem, i128mem, StoreNode,
9247                          MaskedStoreNode>, EVEX_CD8<8, CD8VQ>;
9248}
9249
9250multiclass avx512_trunc_dw<bits<8> opc, string OpcodeStr, SDNode OpNode,
9251                           SDPatternOperator MaskNode,
9252                           X86FoldableSchedWrite sched, PatFrag StoreNode,
9253                           PatFrag MaskedStoreNode, SDNode InVecNode,
9254                           SDPatternOperator InVecMaskNode> {
9255  defm NAME: avx512_trunc<opc, OpcodeStr, InVecNode, OpNode, OpNode,
9256                          InVecMaskNode, MaskNode, MaskNode, sched,
9257                          avx512vl_i32_info, v8i16x_info, v8i16x_info,
9258                          v16i16x_info, i64mem, i128mem, i256mem, StoreNode,
9259                          MaskedStoreNode>, EVEX_CD8<16, CD8VH>;
9260}
9261
9262multiclass avx512_trunc_wb<bits<8> opc, string OpcodeStr, SDNode OpNode,
9263                           SDPatternOperator MaskNode,
9264                           X86FoldableSchedWrite sched, PatFrag StoreNode,
9265                           PatFrag MaskedStoreNode, SDNode InVecNode,
9266                           SDPatternOperator InVecMaskNode> {
9267  defm NAME: avx512_trunc<opc, OpcodeStr, InVecNode, OpNode, OpNode,
9268                          InVecMaskNode, MaskNode, MaskNode, sched,
9269                          avx512vl_i16_info, v16i8x_info, v16i8x_info,
9270                          v32i8x_info, i64mem, i128mem, i256mem, StoreNode,
9271                          MaskedStoreNode, HasBWI>, EVEX_CD8<16, CD8VH>;
9272}
9273
9274defm VPMOVQB    : avx512_trunc_qb<0x32, "vpmovqb",   trunc, select_trunc,
9275                                  WriteShuffle256, truncstorevi8,
9276                                  masked_truncstorevi8, X86vtrunc, X86vmtrunc>;
9277defm VPMOVSQB   : avx512_trunc_qb<0x22, "vpmovsqb",  X86vtruncs, select_truncs,
9278                                  WriteShuffle256, truncstore_s_vi8,
9279                                  masked_truncstore_s_vi8, X86vtruncs,
9280                                  X86vmtruncs>;
9281defm VPMOVUSQB  : avx512_trunc_qb<0x12, "vpmovusqb", X86vtruncus,
9282                                  select_truncus, WriteShuffle256,
9283                                  truncstore_us_vi8, masked_truncstore_us_vi8,
9284                                  X86vtruncus, X86vmtruncus>;
9285
9286defm VPMOVQW    : avx512_trunc_qw<0x34, "vpmovqw", trunc, select_trunc,
9287                                  WriteShuffle256, truncstorevi16,
9288                                  masked_truncstorevi16, X86vtrunc, X86vmtrunc>;
9289defm VPMOVSQW   : avx512_trunc_qw<0x24, "vpmovsqw",  X86vtruncs, select_truncs,
9290                                  WriteShuffle256, truncstore_s_vi16,
9291                                  masked_truncstore_s_vi16, X86vtruncs,
9292                                  X86vmtruncs>;
9293defm VPMOVUSQW  : avx512_trunc_qw<0x14, "vpmovusqw", X86vtruncus,
9294                                  select_truncus, WriteShuffle256,
9295                                  truncstore_us_vi16, masked_truncstore_us_vi16,
9296                                  X86vtruncus, X86vmtruncus>;
9297
9298defm VPMOVQD    : avx512_trunc_qd<0x35, "vpmovqd", trunc, select_trunc,
9299                                  WriteShuffle256, truncstorevi32,
9300                                  masked_truncstorevi32, X86vtrunc, X86vmtrunc>;
9301defm VPMOVSQD   : avx512_trunc_qd<0x25, "vpmovsqd",  X86vtruncs, select_truncs,
9302                                  WriteShuffle256, truncstore_s_vi32,
9303                                  masked_truncstore_s_vi32, X86vtruncs,
9304                                  X86vmtruncs>;
9305defm VPMOVUSQD  : avx512_trunc_qd<0x15, "vpmovusqd", X86vtruncus,
9306                                  select_truncus, WriteShuffle256,
9307                                  truncstore_us_vi32, masked_truncstore_us_vi32,
9308                                  X86vtruncus, X86vmtruncus>;
9309
9310defm VPMOVDB    : avx512_trunc_db<0x31, "vpmovdb", trunc, select_trunc,
9311                                  WriteShuffle256, truncstorevi8,
9312                                  masked_truncstorevi8, X86vtrunc, X86vmtrunc>;
9313defm VPMOVSDB   : avx512_trunc_db<0x21, "vpmovsdb", X86vtruncs, select_truncs,
9314                                  WriteShuffle256, truncstore_s_vi8,
9315                                  masked_truncstore_s_vi8, X86vtruncs,
9316                                  X86vmtruncs>;
9317defm VPMOVUSDB  : avx512_trunc_db<0x11, "vpmovusdb",  X86vtruncus,
9318                                  select_truncus, WriteShuffle256,
9319                                  truncstore_us_vi8, masked_truncstore_us_vi8,
9320                                  X86vtruncus, X86vmtruncus>;
9321
9322defm VPMOVDW    : avx512_trunc_dw<0x33, "vpmovdw", trunc, select_trunc,
9323                                  WriteShuffle256, truncstorevi16,
9324                                  masked_truncstorevi16, X86vtrunc, X86vmtrunc>;
9325defm VPMOVSDW   : avx512_trunc_dw<0x23, "vpmovsdw", X86vtruncs, select_truncs,
9326                                  WriteShuffle256, truncstore_s_vi16,
9327                                  masked_truncstore_s_vi16, X86vtruncs,
9328                                  X86vmtruncs>;
9329defm VPMOVUSDW  : avx512_trunc_dw<0x13, "vpmovusdw", X86vtruncus,
9330                                  select_truncus, WriteShuffle256,
9331                                  truncstore_us_vi16, masked_truncstore_us_vi16,
9332                                  X86vtruncus, X86vmtruncus>;
9333
9334defm VPMOVWB    : avx512_trunc_wb<0x30, "vpmovwb", trunc, select_trunc,
9335                                  WriteShuffle256, truncstorevi8,
9336                                  masked_truncstorevi8, X86vtrunc,
9337                                  X86vmtrunc>;
9338defm VPMOVSWB   : avx512_trunc_wb<0x20, "vpmovswb", X86vtruncs, select_truncs,
9339                                  WriteShuffle256, truncstore_s_vi8,
9340                                  masked_truncstore_s_vi8, X86vtruncs,
9341                                  X86vmtruncs>;
9342defm VPMOVUSWB  : avx512_trunc_wb<0x10, "vpmovuswb", X86vtruncus,
9343                                  select_truncus, WriteShuffle256,
9344                                  truncstore_us_vi8, masked_truncstore_us_vi8,
9345                                  X86vtruncus, X86vmtruncus>;
9346
9347let Predicates = [HasAVX512, NoVLX] in {
9348def: Pat<(v8i16 (trunc (v8i32 VR256X:$src))),
9349         (v8i16 (EXTRACT_SUBREG
9350                 (v16i16 (VPMOVDWZrr (v16i32 (INSERT_SUBREG (IMPLICIT_DEF),
9351                                          VR256X:$src, sub_ymm)))), sub_xmm))>;
9352def: Pat<(v4i32 (trunc (v4i64 VR256X:$src))),
9353         (v4i32 (EXTRACT_SUBREG
9354                 (v8i32 (VPMOVQDZrr (v8i64 (INSERT_SUBREG (IMPLICIT_DEF),
9355                                           VR256X:$src, sub_ymm)))), sub_xmm))>;
9356}
9357
9358let Predicates = [HasBWI, NoVLX] in {
9359def: Pat<(v16i8 (trunc (v16i16 VR256X:$src))),
9360         (v16i8 (EXTRACT_SUBREG (VPMOVWBZrr (v32i16 (INSERT_SUBREG (IMPLICIT_DEF),
9361                                            VR256X:$src, sub_ymm))), sub_xmm))>;
9362}
9363
9364// Without BWI we can't use vXi16/vXi8 vselect so we have to use vmtrunc nodes.
9365multiclass mtrunc_lowering<string InstrName, SDNode OpNode,
9366                           X86VectorVTInfo DestInfo,
9367                           X86VectorVTInfo SrcInfo> {
9368  def : Pat<(DestInfo.VT (OpNode (SrcInfo.VT SrcInfo.RC:$src),
9369                                 DestInfo.RC:$src0,
9370                                 SrcInfo.KRCWM:$mask)),
9371            (!cast<Instruction>(InstrName#"rrk") DestInfo.RC:$src0,
9372                                                 SrcInfo.KRCWM:$mask,
9373                                                 SrcInfo.RC:$src)>;
9374
9375  def : Pat<(DestInfo.VT (OpNode (SrcInfo.VT SrcInfo.RC:$src),
9376                                 DestInfo.ImmAllZerosV,
9377                                 SrcInfo.KRCWM:$mask)),
9378            (!cast<Instruction>(InstrName#"rrkz") SrcInfo.KRCWM:$mask,
9379                                                  SrcInfo.RC:$src)>;
9380}
9381
9382let Predicates = [HasVLX] in {
9383defm : mtrunc_lowering<"VPMOVDWZ256", X86vmtrunc, v8i16x_info, v8i32x_info>;
9384defm : mtrunc_lowering<"VPMOVSDWZ256", X86vmtruncs, v8i16x_info, v8i32x_info>;
9385defm : mtrunc_lowering<"VPMOVUSDWZ256", X86vmtruncus, v8i16x_info, v8i32x_info>;
9386}
9387
9388let Predicates = [HasAVX512] in {
9389defm : mtrunc_lowering<"VPMOVDWZ", X86vmtrunc, v16i16x_info, v16i32_info>;
9390defm : mtrunc_lowering<"VPMOVSDWZ", X86vmtruncs, v16i16x_info, v16i32_info>;
9391defm : mtrunc_lowering<"VPMOVUSDWZ", X86vmtruncus, v16i16x_info, v16i32_info>;
9392
9393defm : mtrunc_lowering<"VPMOVDBZ", X86vmtrunc, v16i8x_info, v16i32_info>;
9394defm : mtrunc_lowering<"VPMOVSDBZ", X86vmtruncs, v16i8x_info, v16i32_info>;
9395defm : mtrunc_lowering<"VPMOVUSDBZ", X86vmtruncus, v16i8x_info, v16i32_info>;
9396
9397defm : mtrunc_lowering<"VPMOVQWZ", X86vmtrunc, v8i16x_info, v8i64_info>;
9398defm : mtrunc_lowering<"VPMOVSQWZ", X86vmtruncs, v8i16x_info, v8i64_info>;
9399defm : mtrunc_lowering<"VPMOVUSQWZ", X86vmtruncus, v8i16x_info, v8i64_info>;
9400}
9401
9402multiclass WriteShuffle256_common<bits<8> opc, string OpcodeStr, X86FoldableSchedWrite sched,
9403              X86VectorVTInfo DestInfo, X86VectorVTInfo SrcInfo,
9404              X86MemOperand x86memop, PatFrag LdFrag, SDNode OpNode>{
9405  let ExeDomain = DestInfo.ExeDomain in {
9406  defm rr   : AVX512_maskable<opc, MRMSrcReg, DestInfo, (outs DestInfo.RC:$dst),
9407                    (ins SrcInfo.RC:$src), OpcodeStr ,"$src", "$src",
9408                    (DestInfo.VT (OpNode (SrcInfo.VT SrcInfo.RC:$src)))>,
9409                  EVEX, Sched<[sched]>;
9410
9411  defm rm : AVX512_maskable<opc, MRMSrcMem, DestInfo, (outs DestInfo.RC:$dst),
9412                  (ins x86memop:$src), OpcodeStr ,"$src", "$src",
9413                  (DestInfo.VT (LdFrag addr:$src))>,
9414                EVEX, Sched<[sched.Folded]>;
9415  }
9416}
9417
9418multiclass WriteShuffle256_BW<bits<8> opc, string OpcodeStr,
9419          SDNode OpNode, SDNode InVecNode, string ExtTy,
9420          X86FoldableSchedWrite sched, PatFrag LdFrag = !cast<PatFrag>(ExtTy#"extloadvi8")> {
9421  let Predicates = [HasVLX, HasBWI] in {
9422    defm Z128:  WriteShuffle256_common<opc, OpcodeStr, sched, v8i16x_info,
9423                    v16i8x_info, i64mem, LdFrag, InVecNode>,
9424                     EVEX_CD8<8, CD8VH>, T8PD, EVEX_V128, VEX_WIG;
9425
9426    defm Z256:  WriteShuffle256_common<opc, OpcodeStr, sched, v16i16x_info,
9427                    v16i8x_info, i128mem, LdFrag, OpNode>,
9428                     EVEX_CD8<8, CD8VH>, T8PD, EVEX_V256, VEX_WIG;
9429  }
9430  let Predicates = [HasBWI] in {
9431    defm Z   :  WriteShuffle256_common<opc, OpcodeStr, sched, v32i16_info,
9432                    v32i8x_info, i256mem, LdFrag, OpNode>,
9433                     EVEX_CD8<8, CD8VH>, T8PD, EVEX_V512, VEX_WIG;
9434  }
9435}
9436
9437multiclass WriteShuffle256_BD<bits<8> opc, string OpcodeStr,
9438          SDNode OpNode, SDNode InVecNode, string ExtTy,
9439          X86FoldableSchedWrite sched, PatFrag LdFrag = !cast<PatFrag>(ExtTy#"extloadvi8")> {
9440  let Predicates = [HasVLX, HasAVX512] in {
9441    defm Z128:  WriteShuffle256_common<opc, OpcodeStr, sched, v4i32x_info,
9442                   v16i8x_info, i32mem, LdFrag, InVecNode>,
9443                         EVEX_CD8<8, CD8VQ>, T8PD, EVEX_V128, VEX_WIG;
9444
9445    defm Z256:  WriteShuffle256_common<opc, OpcodeStr, sched, v8i32x_info,
9446                   v16i8x_info, i64mem, LdFrag, InVecNode>,
9447                         EVEX_CD8<8, CD8VQ>, T8PD, EVEX_V256, VEX_WIG;
9448  }
9449  let Predicates = [HasAVX512] in {
9450    defm Z   :  WriteShuffle256_common<opc, OpcodeStr, sched, v16i32_info,
9451                   v16i8x_info, i128mem, LdFrag, OpNode>,
9452                         EVEX_CD8<8, CD8VQ>, T8PD, EVEX_V512, VEX_WIG;
9453  }
9454}
9455
9456multiclass WriteShuffle256_BQ<bits<8> opc, string OpcodeStr,
9457          SDNode OpNode, SDNode InVecNode, string ExtTy,
9458          X86FoldableSchedWrite sched, PatFrag LdFrag = !cast<PatFrag>(ExtTy#"extloadvi8")> {
9459  let Predicates = [HasVLX, HasAVX512] in {
9460    defm Z128:  WriteShuffle256_common<opc, OpcodeStr, sched, v2i64x_info,
9461                   v16i8x_info, i16mem, LdFrag, InVecNode>,
9462                     EVEX_CD8<8, CD8VO>, T8PD, EVEX_V128, VEX_WIG;
9463
9464    defm Z256:  WriteShuffle256_common<opc, OpcodeStr, sched, v4i64x_info,
9465                   v16i8x_info, i32mem, LdFrag, InVecNode>,
9466                     EVEX_CD8<8, CD8VO>, T8PD, EVEX_V256, VEX_WIG;
9467  }
9468  let Predicates = [HasAVX512] in {
9469    defm Z   :  WriteShuffle256_common<opc, OpcodeStr, sched, v8i64_info,
9470                   v16i8x_info, i64mem, LdFrag, InVecNode>,
9471                     EVEX_CD8<8, CD8VO>, T8PD, EVEX_V512, VEX_WIG;
9472  }
9473}
9474
9475multiclass WriteShuffle256_WD<bits<8> opc, string OpcodeStr,
9476         SDNode OpNode, SDNode InVecNode, string ExtTy,
9477         X86FoldableSchedWrite sched, PatFrag LdFrag = !cast<PatFrag>(ExtTy#"extloadvi16")> {
9478  let Predicates = [HasVLX, HasAVX512] in {
9479    defm Z128:  WriteShuffle256_common<opc, OpcodeStr, sched, v4i32x_info,
9480                   v8i16x_info, i64mem, LdFrag, InVecNode>,
9481                     EVEX_CD8<16, CD8VH>, T8PD, EVEX_V128, VEX_WIG;
9482
9483    defm Z256:  WriteShuffle256_common<opc, OpcodeStr, sched, v8i32x_info,
9484                   v8i16x_info, i128mem, LdFrag, OpNode>,
9485                     EVEX_CD8<16, CD8VH>, T8PD, EVEX_V256, VEX_WIG;
9486  }
9487  let Predicates = [HasAVX512] in {
9488    defm Z   :  WriteShuffle256_common<opc, OpcodeStr, sched, v16i32_info,
9489                   v16i16x_info, i256mem, LdFrag, OpNode>,
9490                     EVEX_CD8<16, CD8VH>, T8PD, EVEX_V512, VEX_WIG;
9491  }
9492}
9493
9494multiclass WriteShuffle256_WQ<bits<8> opc, string OpcodeStr,
9495         SDNode OpNode, SDNode InVecNode, string ExtTy,
9496         X86FoldableSchedWrite sched, PatFrag LdFrag = !cast<PatFrag>(ExtTy#"extloadvi16")> {
9497  let Predicates = [HasVLX, HasAVX512] in {
9498    defm Z128:  WriteShuffle256_common<opc, OpcodeStr, sched, v2i64x_info,
9499                   v8i16x_info, i32mem, LdFrag, InVecNode>,
9500                     EVEX_CD8<16, CD8VQ>, T8PD, EVEX_V128, VEX_WIG;
9501
9502    defm Z256:  WriteShuffle256_common<opc, OpcodeStr, sched, v4i64x_info,
9503                   v8i16x_info, i64mem, LdFrag, InVecNode>,
9504                     EVEX_CD8<16, CD8VQ>, T8PD, EVEX_V256, VEX_WIG;
9505  }
9506  let Predicates = [HasAVX512] in {
9507    defm Z   :  WriteShuffle256_common<opc, OpcodeStr, sched, v8i64_info,
9508                   v8i16x_info, i128mem, LdFrag, OpNode>,
9509                     EVEX_CD8<16, CD8VQ>, T8PD, EVEX_V512, VEX_WIG;
9510  }
9511}
9512
9513multiclass WriteShuffle256_DQ<bits<8> opc, string OpcodeStr,
9514         SDNode OpNode, SDNode InVecNode, string ExtTy,
9515         X86FoldableSchedWrite sched, PatFrag LdFrag = !cast<PatFrag>(ExtTy#"extloadvi32")> {
9516
9517  let Predicates = [HasVLX, HasAVX512] in {
9518    defm Z128:  WriteShuffle256_common<opc, OpcodeStr, sched, v2i64x_info,
9519                   v4i32x_info, i64mem, LdFrag, InVecNode>,
9520                     EVEX_CD8<32, CD8VH>, T8PD, EVEX_V128;
9521
9522    defm Z256:  WriteShuffle256_common<opc, OpcodeStr, sched, v4i64x_info,
9523                   v4i32x_info, i128mem, LdFrag, OpNode>,
9524                     EVEX_CD8<32, CD8VH>, T8PD, EVEX_V256;
9525  }
9526  let Predicates = [HasAVX512] in {
9527    defm Z   :  WriteShuffle256_common<opc, OpcodeStr, sched, v8i64_info,
9528                   v8i32x_info, i256mem, LdFrag, OpNode>,
9529                     EVEX_CD8<32, CD8VH>, T8PD, EVEX_V512;
9530  }
9531}
9532
9533defm VPMOVZXBW : WriteShuffle256_BW<0x30, "vpmovzxbw", zext, zext_invec, "z", WriteShuffle256>;
9534defm VPMOVZXBD : WriteShuffle256_BD<0x31, "vpmovzxbd", zext, zext_invec, "z", WriteShuffle256>;
9535defm VPMOVZXBQ : WriteShuffle256_BQ<0x32, "vpmovzxbq", zext, zext_invec, "z", WriteShuffle256>;
9536defm VPMOVZXWD : WriteShuffle256_WD<0x33, "vpmovzxwd", zext, zext_invec, "z", WriteShuffle256>;
9537defm VPMOVZXWQ : WriteShuffle256_WQ<0x34, "vpmovzxwq", zext, zext_invec, "z", WriteShuffle256>;
9538defm VPMOVZXDQ : WriteShuffle256_DQ<0x35, "vpmovzxdq", zext, zext_invec, "z", WriteShuffle256>;
9539
9540defm VPMOVSXBW: WriteShuffle256_BW<0x20, "vpmovsxbw", sext, sext_invec, "s", WriteShuffle256>;
9541defm VPMOVSXBD: WriteShuffle256_BD<0x21, "vpmovsxbd", sext, sext_invec, "s", WriteShuffle256>;
9542defm VPMOVSXBQ: WriteShuffle256_BQ<0x22, "vpmovsxbq", sext, sext_invec, "s", WriteShuffle256>;
9543defm VPMOVSXWD: WriteShuffle256_WD<0x23, "vpmovsxwd", sext, sext_invec, "s", WriteShuffle256>;
9544defm VPMOVSXWQ: WriteShuffle256_WQ<0x24, "vpmovsxwq", sext, sext_invec, "s", WriteShuffle256>;
9545defm VPMOVSXDQ: WriteShuffle256_DQ<0x25, "vpmovsxdq", sext, sext_invec, "s", WriteShuffle256>;
9546
9547
9548// Patterns that we also need any extend versions of. aext_vector_inreg
9549// is currently legalized to zext_vector_inreg.
9550multiclass AVX512_pmovx_patterns_base<string OpcPrefix, SDNode ExtOp> {
9551  // 256-bit patterns
9552  let Predicates = [HasVLX, HasBWI] in {
9553    def : Pat<(v16i16 (ExtOp (loadv16i8 addr:$src))),
9554              (!cast<I>(OpcPrefix#BWZ256rm) addr:$src)>;
9555  }
9556
9557  let Predicates = [HasVLX] in {
9558    def : Pat<(v8i32 (ExtOp (loadv8i16 addr:$src))),
9559              (!cast<I>(OpcPrefix#WDZ256rm) addr:$src)>;
9560
9561    def : Pat<(v4i64 (ExtOp (loadv4i32 addr:$src))),
9562              (!cast<I>(OpcPrefix#DQZ256rm) addr:$src)>;
9563  }
9564
9565  // 512-bit patterns
9566  let Predicates = [HasBWI] in {
9567    def : Pat<(v32i16 (ExtOp (loadv32i8 addr:$src))),
9568              (!cast<I>(OpcPrefix#BWZrm) addr:$src)>;
9569  }
9570  let Predicates = [HasAVX512] in {
9571    def : Pat<(v16i32 (ExtOp (loadv16i8 addr:$src))),
9572              (!cast<I>(OpcPrefix#BDZrm) addr:$src)>;
9573    def : Pat<(v16i32 (ExtOp (loadv16i16 addr:$src))),
9574              (!cast<I>(OpcPrefix#WDZrm) addr:$src)>;
9575
9576    def : Pat<(v8i64 (ExtOp (loadv8i16 addr:$src))),
9577              (!cast<I>(OpcPrefix#WQZrm) addr:$src)>;
9578
9579    def : Pat<(v8i64 (ExtOp (loadv8i32 addr:$src))),
9580              (!cast<I>(OpcPrefix#DQZrm) addr:$src)>;
9581  }
9582}
9583
9584multiclass AVX512_pmovx_patterns<string OpcPrefix, SDNode ExtOp,
9585                                 SDNode InVecOp> :
9586    AVX512_pmovx_patterns_base<OpcPrefix, ExtOp> {
9587  // 128-bit patterns
9588  let Predicates = [HasVLX, HasBWI] in {
9589  def : Pat<(v8i16 (InVecOp (bc_v16i8 (v2i64 (scalar_to_vector (loadi64 addr:$src)))))),
9590            (!cast<I>(OpcPrefix#BWZ128rm) addr:$src)>;
9591  def : Pat<(v8i16 (InVecOp (bc_v16i8 (v2f64 (scalar_to_vector (loadf64 addr:$src)))))),
9592            (!cast<I>(OpcPrefix#BWZ128rm) addr:$src)>;
9593  def : Pat<(v8i16 (InVecOp (bc_v16i8 (v2i64 (X86vzload64 addr:$src))))),
9594            (!cast<I>(OpcPrefix#BWZ128rm) addr:$src)>;
9595  }
9596  let Predicates = [HasVLX] in {
9597  def : Pat<(v4i32 (InVecOp (bc_v16i8 (v4i32 (scalar_to_vector (loadi32 addr:$src)))))),
9598            (!cast<I>(OpcPrefix#BDZ128rm) addr:$src)>;
9599  def : Pat<(v4i32 (InVecOp (bc_v16i8 (v4i32 (X86vzload32 addr:$src))))),
9600            (!cast<I>(OpcPrefix#BDZ128rm) addr:$src)>;
9601
9602  def : Pat<(v2i64 (InVecOp (bc_v16i8 (v4i32 (scalar_to_vector (extloadi32i16 addr:$src)))))),
9603            (!cast<I>(OpcPrefix#BQZ128rm) addr:$src)>;
9604
9605  def : Pat<(v4i32 (InVecOp (bc_v8i16 (v2i64 (scalar_to_vector (loadi64 addr:$src)))))),
9606            (!cast<I>(OpcPrefix#WDZ128rm) addr:$src)>;
9607  def : Pat<(v4i32 (InVecOp (bc_v8i16 (v2f64 (scalar_to_vector (loadf64 addr:$src)))))),
9608            (!cast<I>(OpcPrefix#WDZ128rm) addr:$src)>;
9609  def : Pat<(v4i32 (InVecOp (bc_v8i16 (v2i64 (X86vzload64 addr:$src))))),
9610            (!cast<I>(OpcPrefix#WDZ128rm) addr:$src)>;
9611
9612  def : Pat<(v2i64 (InVecOp (bc_v8i16 (v4i32 (scalar_to_vector (loadi32 addr:$src)))))),
9613            (!cast<I>(OpcPrefix#WQZ128rm) addr:$src)>;
9614  def : Pat<(v2i64 (InVecOp (bc_v8i16 (v4i32 (X86vzload32 addr:$src))))),
9615            (!cast<I>(OpcPrefix#WQZ128rm) addr:$src)>;
9616
9617  def : Pat<(v2i64 (InVecOp (bc_v4i32 (v2i64 (scalar_to_vector (loadi64 addr:$src)))))),
9618            (!cast<I>(OpcPrefix#DQZ128rm) addr:$src)>;
9619  def : Pat<(v2i64 (InVecOp (bc_v4i32 (v2f64 (scalar_to_vector (loadf64 addr:$src)))))),
9620            (!cast<I>(OpcPrefix#DQZ128rm) addr:$src)>;
9621  def : Pat<(v2i64 (InVecOp (bc_v4i32 (v2i64 (X86vzload64 addr:$src))))),
9622            (!cast<I>(OpcPrefix#DQZ128rm) addr:$src)>;
9623  }
9624  let Predicates = [HasVLX] in {
9625  def : Pat<(v8i32 (InVecOp (bc_v16i8 (v2i64 (scalar_to_vector (loadi64 addr:$src)))))),
9626            (!cast<I>(OpcPrefix#BDZ256rm) addr:$src)>;
9627  def : Pat<(v8i32 (InVecOp (bc_v16i8 (v2i64 (scalar_to_vector (loadf64 addr:$src)))))),
9628            (!cast<I>(OpcPrefix#BDZ256rm) addr:$src)>;
9629  def : Pat<(v8i32 (InVecOp (bc_v16i8 (v2i64 (X86vzload64 addr:$src))))),
9630            (!cast<I>(OpcPrefix#BDZ256rm) addr:$src)>;
9631
9632  def : Pat<(v4i64 (InVecOp (bc_v16i8 (v4i32 (scalar_to_vector (loadi32 addr:$src)))))),
9633            (!cast<I>(OpcPrefix#BQZ256rm) addr:$src)>;
9634  def : Pat<(v4i64 (InVecOp (bc_v16i8 (v4i32 (X86vzload32 addr:$src))))),
9635            (!cast<I>(OpcPrefix#BQZ256rm) addr:$src)>;
9636
9637  def : Pat<(v4i64 (InVecOp (bc_v8i16 (v2i64 (scalar_to_vector (loadi64 addr:$src)))))),
9638            (!cast<I>(OpcPrefix#WQZ256rm) addr:$src)>;
9639  def : Pat<(v4i64 (InVecOp (bc_v8i16 (v2i64 (scalar_to_vector (loadf64 addr:$src)))))),
9640            (!cast<I>(OpcPrefix#WQZ256rm) addr:$src)>;
9641  def : Pat<(v4i64 (InVecOp (bc_v8i16 (v2i64 (X86vzload64 addr:$src))))),
9642            (!cast<I>(OpcPrefix#WQZ256rm) addr:$src)>;
9643  }
9644  // 512-bit patterns
9645  let Predicates = [HasAVX512] in {
9646  def : Pat<(v8i64 (InVecOp (bc_v16i8 (v2i64 (scalar_to_vector (loadi64 addr:$src)))))),
9647            (!cast<I>(OpcPrefix#BQZrm) addr:$src)>;
9648  def : Pat<(v8i64 (InVecOp (bc_v16i8 (v2f64 (scalar_to_vector (loadf64 addr:$src)))))),
9649            (!cast<I>(OpcPrefix#BQZrm) addr:$src)>;
9650  def : Pat<(v8i64 (InVecOp (bc_v16i8 (v2i64 (X86vzload64 addr:$src))))),
9651            (!cast<I>(OpcPrefix#BQZrm) addr:$src)>;
9652  }
9653}
9654
9655defm : AVX512_pmovx_patterns<"VPMOVSX", sext, sext_invec>;
9656defm : AVX512_pmovx_patterns<"VPMOVZX", zext, zext_invec>;
9657
9658// Without BWI we can't do a trunc from v16i16 to v16i8. DAG combine can merge
9659// ext+trunc aggressively making it impossible to legalize the DAG to this
9660// pattern directly.
9661let Predicates = [HasAVX512, NoBWI] in {
9662def: Pat<(v16i8 (trunc (v16i16 VR256X:$src))),
9663         (VPMOVDBZrr (v16i32 (VPMOVZXWDZrr VR256X:$src)))>;
9664def: Pat<(v16i8 (trunc (loadv16i16 addr:$src))),
9665         (VPMOVDBZrr (v16i32 (VPMOVZXWDZrm addr:$src)))>;
9666}
9667
9668//===----------------------------------------------------------------------===//
9669// GATHER - SCATTER Operations
9670
9671// FIXME: Improve scheduling of gather/scatter instructions.
9672multiclass avx512_gather<bits<8> opc, string OpcodeStr, X86VectorVTInfo _,
9673                         X86MemOperand memop, RegisterClass MaskRC = _.KRCWM> {
9674  let Constraints = "@earlyclobber $dst, $src1 = $dst, $mask = $mask_wb",
9675      ExeDomain = _.ExeDomain, mayLoad = 1, hasSideEffects = 0 in
9676  def rm  : AVX5128I<opc, MRMSrcMem, (outs _.RC:$dst, MaskRC:$mask_wb),
9677            (ins _.RC:$src1, MaskRC:$mask, memop:$src2),
9678            !strconcat(OpcodeStr#_.Suffix,
9679            "\t{$src2, ${dst} {${mask}}|${dst} {${mask}}, $src2}"),
9680            []>, EVEX, EVEX_K, EVEX_CD8<_.EltSize, CD8VT1>,
9681            Sched<[WriteLoad, WriteVecMaskedGatherWriteback]>;
9682}
9683
9684multiclass avx512_gather_q_pd<bits<8> dopc, bits<8> qopc,
9685                        AVX512VLVectorVTInfo _, string OpcodeStr, string SUFF> {
9686  defm NAME#D#SUFF#Z: avx512_gather<dopc, OpcodeStr#"d", _.info512,
9687                                      vy512xmem>, EVEX_V512, VEX_W;
9688  defm NAME#Q#SUFF#Z: avx512_gather<qopc, OpcodeStr#"q", _.info512,
9689                                      vz512mem>, EVEX_V512, VEX_W;
9690let Predicates = [HasVLX] in {
9691  defm NAME#D#SUFF#Z256: avx512_gather<dopc, OpcodeStr#"d", _.info256,
9692                              vx256xmem>, EVEX_V256, VEX_W;
9693  defm NAME#Q#SUFF#Z256: avx512_gather<qopc, OpcodeStr#"q", _.info256,
9694                              vy256xmem>, EVEX_V256, VEX_W;
9695  defm NAME#D#SUFF#Z128: avx512_gather<dopc, OpcodeStr#"d", _.info128,
9696                              vx128xmem>, EVEX_V128, VEX_W;
9697  defm NAME#Q#SUFF#Z128: avx512_gather<qopc, OpcodeStr#"q", _.info128,
9698                              vx128xmem>, EVEX_V128, VEX_W;
9699}
9700}
9701
9702multiclass avx512_gather_d_ps<bits<8> dopc, bits<8> qopc,
9703                       AVX512VLVectorVTInfo _, string OpcodeStr, string SUFF> {
9704  defm NAME#D#SUFF#Z: avx512_gather<dopc, OpcodeStr#"d", _.info512, vz512mem>,
9705                                       EVEX_V512;
9706  defm NAME#Q#SUFF#Z: avx512_gather<qopc, OpcodeStr#"q", _.info256, vz256mem>,
9707                                       EVEX_V512;
9708let Predicates = [HasVLX] in {
9709  defm NAME#D#SUFF#Z256: avx512_gather<dopc, OpcodeStr#"d", _.info256,
9710                                          vy256xmem>, EVEX_V256;
9711  defm NAME#Q#SUFF#Z256: avx512_gather<qopc, OpcodeStr#"q", _.info128,
9712                                          vy128xmem>, EVEX_V256;
9713  defm NAME#D#SUFF#Z128: avx512_gather<dopc, OpcodeStr#"d", _.info128,
9714                                          vx128xmem>, EVEX_V128;
9715  defm NAME#Q#SUFF#Z128: avx512_gather<qopc, OpcodeStr#"q", _.info128,
9716                                          vx64xmem, VK2WM>, EVEX_V128;
9717}
9718}
9719
9720
9721defm VGATHER : avx512_gather_q_pd<0x92, 0x93, avx512vl_f64_info, "vgather", "PD">,
9722               avx512_gather_d_ps<0x92, 0x93, avx512vl_f32_info, "vgather", "PS">;
9723
9724defm VPGATHER : avx512_gather_q_pd<0x90, 0x91, avx512vl_i64_info, "vpgather", "Q">,
9725                avx512_gather_d_ps<0x90, 0x91, avx512vl_i32_info, "vpgather", "D">;
9726
9727multiclass avx512_scatter<bits<8> opc, string OpcodeStr, X86VectorVTInfo _,
9728                          X86MemOperand memop, RegisterClass MaskRC = _.KRCWM> {
9729
9730let mayStore = 1, Constraints = "$mask = $mask_wb", ExeDomain = _.ExeDomain,
9731    hasSideEffects = 0 in
9732
9733  def mr  : AVX5128I<opc, MRMDestMem, (outs MaskRC:$mask_wb),
9734            (ins memop:$dst, MaskRC:$mask, _.RC:$src),
9735            !strconcat(OpcodeStr#_.Suffix,
9736            "\t{$src, ${dst} {${mask}}|${dst} {${mask}}, $src}"),
9737            []>, EVEX, EVEX_K, EVEX_CD8<_.EltSize, CD8VT1>,
9738            Sched<[WriteStore]>;
9739}
9740
9741multiclass avx512_scatter_q_pd<bits<8> dopc, bits<8> qopc,
9742                        AVX512VLVectorVTInfo _, string OpcodeStr, string SUFF> {
9743  defm NAME#D#SUFF#Z: avx512_scatter<dopc, OpcodeStr#"d", _.info512,
9744                                      vy512xmem>, EVEX_V512, VEX_W;
9745  defm NAME#Q#SUFF#Z: avx512_scatter<qopc, OpcodeStr#"q", _.info512,
9746                                      vz512mem>, EVEX_V512, VEX_W;
9747let Predicates = [HasVLX] in {
9748  defm NAME#D#SUFF#Z256: avx512_scatter<dopc, OpcodeStr#"d", _.info256,
9749                              vx256xmem>, EVEX_V256, VEX_W;
9750  defm NAME#Q#SUFF#Z256: avx512_scatter<qopc, OpcodeStr#"q", _.info256,
9751                              vy256xmem>, EVEX_V256, VEX_W;
9752  defm NAME#D#SUFF#Z128: avx512_scatter<dopc, OpcodeStr#"d", _.info128,
9753                              vx128xmem>, EVEX_V128, VEX_W;
9754  defm NAME#Q#SUFF#Z128: avx512_scatter<qopc, OpcodeStr#"q", _.info128,
9755                              vx128xmem>, EVEX_V128, VEX_W;
9756}
9757}
9758
9759multiclass avx512_scatter_d_ps<bits<8> dopc, bits<8> qopc,
9760                       AVX512VLVectorVTInfo _, string OpcodeStr, string SUFF> {
9761  defm NAME#D#SUFF#Z: avx512_scatter<dopc, OpcodeStr#"d", _.info512, vz512mem>,
9762                                       EVEX_V512;
9763  defm NAME#Q#SUFF#Z: avx512_scatter<qopc, OpcodeStr#"q", _.info256, vz256mem>,
9764                                       EVEX_V512;
9765let Predicates = [HasVLX] in {
9766  defm NAME#D#SUFF#Z256: avx512_scatter<dopc, OpcodeStr#"d", _.info256,
9767                                          vy256xmem>, EVEX_V256;
9768  defm NAME#Q#SUFF#Z256: avx512_scatter<qopc, OpcodeStr#"q", _.info128,
9769                                          vy128xmem>, EVEX_V256;
9770  defm NAME#D#SUFF#Z128: avx512_scatter<dopc, OpcodeStr#"d", _.info128,
9771                                          vx128xmem>, EVEX_V128;
9772  defm NAME#Q#SUFF#Z128: avx512_scatter<qopc, OpcodeStr#"q", _.info128,
9773                                          vx64xmem, VK2WM>, EVEX_V128;
9774}
9775}
9776
9777defm VSCATTER : avx512_scatter_q_pd<0xA2, 0xA3, avx512vl_f64_info, "vscatter", "PD">,
9778               avx512_scatter_d_ps<0xA2, 0xA3, avx512vl_f32_info, "vscatter", "PS">;
9779
9780defm VPSCATTER : avx512_scatter_q_pd<0xA0, 0xA1, avx512vl_i64_info, "vpscatter", "Q">,
9781                avx512_scatter_d_ps<0xA0, 0xA1, avx512vl_i32_info, "vpscatter", "D">;
9782
9783// prefetch
9784multiclass avx512_gather_scatter_prefetch<bits<8> opc, Format F, string OpcodeStr,
9785                       RegisterClass KRC, X86MemOperand memop> {
9786  let Predicates = [HasPFI], mayLoad = 1, mayStore = 1 in
9787  def m  : AVX5128I<opc, F, (outs), (ins KRC:$mask, memop:$src),
9788            !strconcat(OpcodeStr, "\t{$src {${mask}}|{${mask}}, $src}"), []>,
9789            EVEX, EVEX_K, Sched<[WriteLoad]>;
9790}
9791
9792defm VGATHERPF0DPS: avx512_gather_scatter_prefetch<0xC6, MRM1m, "vgatherpf0dps",
9793                     VK16WM, vz512mem>, EVEX_V512, EVEX_CD8<32, CD8VT1>;
9794
9795defm VGATHERPF0QPS: avx512_gather_scatter_prefetch<0xC7, MRM1m, "vgatherpf0qps",
9796                     VK8WM, vz256mem>, EVEX_V512, EVEX_CD8<64, CD8VT1>;
9797
9798defm VGATHERPF0DPD: avx512_gather_scatter_prefetch<0xC6, MRM1m, "vgatherpf0dpd",
9799                     VK8WM, vy512xmem>, EVEX_V512, VEX_W, EVEX_CD8<32, CD8VT1>;
9800
9801defm VGATHERPF0QPD: avx512_gather_scatter_prefetch<0xC7, MRM1m, "vgatherpf0qpd",
9802                     VK8WM, vz512mem>, EVEX_V512, VEX_W, EVEX_CD8<64, CD8VT1>;
9803
9804defm VGATHERPF1DPS: avx512_gather_scatter_prefetch<0xC6, MRM2m, "vgatherpf1dps",
9805                     VK16WM, vz512mem>, EVEX_V512, EVEX_CD8<32, CD8VT1>;
9806
9807defm VGATHERPF1QPS: avx512_gather_scatter_prefetch<0xC7, MRM2m, "vgatherpf1qps",
9808                     VK8WM, vz256mem>, EVEX_V512, EVEX_CD8<64, CD8VT1>;
9809
9810defm VGATHERPF1DPD: avx512_gather_scatter_prefetch<0xC6, MRM2m, "vgatherpf1dpd",
9811                     VK8WM, vy512xmem>, EVEX_V512, VEX_W, EVEX_CD8<32, CD8VT1>;
9812
9813defm VGATHERPF1QPD: avx512_gather_scatter_prefetch<0xC7, MRM2m, "vgatherpf1qpd",
9814                     VK8WM, vz512mem>, EVEX_V512, VEX_W, EVEX_CD8<64, CD8VT1>;
9815
9816defm VSCATTERPF0DPS: avx512_gather_scatter_prefetch<0xC6, MRM5m, "vscatterpf0dps",
9817                     VK16WM, vz512mem>, EVEX_V512, EVEX_CD8<32, CD8VT1>;
9818
9819defm VSCATTERPF0QPS: avx512_gather_scatter_prefetch<0xC7, MRM5m, "vscatterpf0qps",
9820                     VK8WM, vz256mem>, EVEX_V512, EVEX_CD8<64, CD8VT1>;
9821
9822defm VSCATTERPF0DPD: avx512_gather_scatter_prefetch<0xC6, MRM5m, "vscatterpf0dpd",
9823                     VK8WM, vy512xmem>, EVEX_V512, VEX_W, EVEX_CD8<32, CD8VT1>;
9824
9825defm VSCATTERPF0QPD: avx512_gather_scatter_prefetch<0xC7, MRM5m, "vscatterpf0qpd",
9826                     VK8WM, vz512mem>, EVEX_V512, VEX_W, EVEX_CD8<64, CD8VT1>;
9827
9828defm VSCATTERPF1DPS: avx512_gather_scatter_prefetch<0xC6, MRM6m, "vscatterpf1dps",
9829                     VK16WM, vz512mem>, EVEX_V512, EVEX_CD8<32, CD8VT1>;
9830
9831defm VSCATTERPF1QPS: avx512_gather_scatter_prefetch<0xC7, MRM6m, "vscatterpf1qps",
9832                     VK8WM, vz256mem>, EVEX_V512, EVEX_CD8<64, CD8VT1>;
9833
9834defm VSCATTERPF1DPD: avx512_gather_scatter_prefetch<0xC6, MRM6m, "vscatterpf1dpd",
9835                     VK8WM, vy512xmem>, EVEX_V512, VEX_W, EVEX_CD8<32, CD8VT1>;
9836
9837defm VSCATTERPF1QPD: avx512_gather_scatter_prefetch<0xC7, MRM6m, "vscatterpf1qpd",
9838                     VK8WM, vz512mem>, EVEX_V512, VEX_W, EVEX_CD8<64, CD8VT1>;
9839
9840multiclass cvt_by_vec_width<bits<8> opc, X86VectorVTInfo Vec, string OpcodeStr > {
9841def rr : AVX512XS8I<opc, MRMSrcReg, (outs Vec.RC:$dst), (ins Vec.KRC:$src),
9842                  !strconcat(OpcodeStr#Vec.Suffix, "\t{$src, $dst|$dst, $src}"),
9843                  [(set Vec.RC:$dst, (Vec.VT (sext Vec.KRC:$src)))]>,
9844                  EVEX, Sched<[WriteMove]>; // TODO - WriteVecTrunc?
9845}
9846
9847multiclass cvt_mask_by_elt_width<bits<8> opc, AVX512VLVectorVTInfo VTInfo,
9848                                 string OpcodeStr, Predicate prd> {
9849let Predicates = [prd] in
9850  defm Z : cvt_by_vec_width<opc, VTInfo.info512, OpcodeStr>, EVEX_V512;
9851
9852  let Predicates = [prd, HasVLX] in {
9853    defm Z256 : cvt_by_vec_width<opc, VTInfo.info256, OpcodeStr>, EVEX_V256;
9854    defm Z128 : cvt_by_vec_width<opc, VTInfo.info128, OpcodeStr>, EVEX_V128;
9855  }
9856}
9857
9858defm VPMOVM2B : cvt_mask_by_elt_width<0x28, avx512vl_i8_info, "vpmovm2" , HasBWI>;
9859defm VPMOVM2W : cvt_mask_by_elt_width<0x28, avx512vl_i16_info, "vpmovm2", HasBWI> , VEX_W;
9860defm VPMOVM2D : cvt_mask_by_elt_width<0x38, avx512vl_i32_info, "vpmovm2", HasDQI>;
9861defm VPMOVM2Q : cvt_mask_by_elt_width<0x38, avx512vl_i64_info, "vpmovm2", HasDQI> , VEX_W;
9862
9863multiclass convert_vector_to_mask_common<bits<8> opc, X86VectorVTInfo _, string OpcodeStr > {
9864    def rr : AVX512XS8I<opc, MRMSrcReg, (outs _.KRC:$dst), (ins _.RC:$src),
9865                        !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
9866                        [(set _.KRC:$dst, (X86pcmpgtm _.ImmAllZerosV, (_.VT _.RC:$src)))]>,
9867                        EVEX, Sched<[WriteMove]>;
9868}
9869
9870// Use 512bit version to implement 128/256 bit in case NoVLX.
9871multiclass convert_vector_to_mask_lowering<X86VectorVTInfo ExtendInfo,
9872                                           X86VectorVTInfo _,
9873                                           string Name> {
9874
9875  def : Pat<(_.KVT (X86pcmpgtm _.ImmAllZerosV, (_.VT _.RC:$src))),
9876            (_.KVT (COPY_TO_REGCLASS
9877                     (!cast<Instruction>(Name#"Zrr")
9878                       (INSERT_SUBREG (ExtendInfo.VT (IMPLICIT_DEF)),
9879                                      _.RC:$src, _.SubRegIdx)),
9880                   _.KRC))>;
9881}
9882
9883multiclass avx512_convert_vector_to_mask<bits<8> opc, string OpcodeStr,
9884                                   AVX512VLVectorVTInfo VTInfo, Predicate prd> {
9885  let Predicates = [prd] in
9886    defm Z : convert_vector_to_mask_common <opc, VTInfo.info512, OpcodeStr>,
9887                                            EVEX_V512;
9888
9889  let Predicates = [prd, HasVLX] in {
9890    defm Z256 : convert_vector_to_mask_common<opc, VTInfo.info256, OpcodeStr>,
9891                                              EVEX_V256;
9892    defm Z128 : convert_vector_to_mask_common<opc, VTInfo.info128, OpcodeStr>,
9893                                               EVEX_V128;
9894  }
9895  let Predicates = [prd, NoVLX] in {
9896    defm Z256_Alt : convert_vector_to_mask_lowering<VTInfo.info512, VTInfo.info256, NAME>;
9897    defm Z128_Alt : convert_vector_to_mask_lowering<VTInfo.info512, VTInfo.info128, NAME>;
9898  }
9899}
9900
9901defm VPMOVB2M : avx512_convert_vector_to_mask<0x29, "vpmovb2m",
9902                                              avx512vl_i8_info, HasBWI>;
9903defm VPMOVW2M : avx512_convert_vector_to_mask<0x29, "vpmovw2m",
9904                                              avx512vl_i16_info, HasBWI>, VEX_W;
9905defm VPMOVD2M : avx512_convert_vector_to_mask<0x39, "vpmovd2m",
9906                                              avx512vl_i32_info, HasDQI>;
9907defm VPMOVQ2M : avx512_convert_vector_to_mask<0x39, "vpmovq2m",
9908                                              avx512vl_i64_info, HasDQI>, VEX_W;
9909
9910// Patterns for handling sext from a mask register to v16i8/v16i16 when DQI
9911// is available, but BWI is not. We can't handle this in lowering because
9912// a target independent DAG combine likes to combine sext and trunc.
9913let Predicates = [HasDQI, NoBWI] in {
9914  def : Pat<(v16i8 (sext (v16i1 VK16:$src))),
9915            (VPMOVDBZrr (v16i32 (VPMOVM2DZrr VK16:$src)))>;
9916  def : Pat<(v16i16 (sext (v16i1 VK16:$src))),
9917            (VPMOVDWZrr (v16i32 (VPMOVM2DZrr VK16:$src)))>;
9918}
9919
9920let Predicates = [HasDQI, NoBWI, HasVLX] in {
9921  def : Pat<(v8i16 (sext (v8i1 VK8:$src))),
9922            (VPMOVDWZ256rr (v8i32 (VPMOVM2DZ256rr VK8:$src)))>;
9923}
9924
9925//===----------------------------------------------------------------------===//
9926// AVX-512 - COMPRESS and EXPAND
9927//
9928
9929multiclass compress_by_vec_width_common<bits<8> opc, X86VectorVTInfo _,
9930                                 string OpcodeStr, X86FoldableSchedWrite sched> {
9931  defm rr : AVX512_maskable<opc, MRMDestReg, _, (outs _.RC:$dst),
9932              (ins _.RC:$src1), OpcodeStr, "$src1", "$src1",
9933              (null_frag)>, AVX5128IBase,
9934              Sched<[sched]>;
9935
9936  let mayStore = 1, hasSideEffects = 0 in
9937  def mr : AVX5128I<opc, MRMDestMem, (outs),
9938              (ins _.MemOp:$dst, _.RC:$src),
9939              OpcodeStr # "\t{$src, $dst|$dst, $src}",
9940              []>, EVEX_CD8<_.EltSize, CD8VT1>,
9941              Sched<[sched.Folded]>;
9942
9943  def mrk : AVX5128I<opc, MRMDestMem, (outs),
9944              (ins _.MemOp:$dst, _.KRCWM:$mask, _.RC:$src),
9945              OpcodeStr # "\t{$src, $dst {${mask}}|$dst {${mask}}, $src}",
9946              []>,
9947              EVEX_K, EVEX_CD8<_.EltSize, CD8VT1>,
9948              Sched<[sched.Folded]>;
9949}
9950
9951multiclass compress_by_vec_width_lowering<X86VectorVTInfo _, string Name> {
9952  def : Pat<(X86mCompressingStore (_.VT _.RC:$src), addr:$dst, _.KRCWM:$mask),
9953            (!cast<Instruction>(Name#_.ZSuffix#mrk)
9954                            addr:$dst, _.KRCWM:$mask, _.RC:$src)>;
9955
9956  def : Pat<(X86compress (_.VT _.RC:$src), _.RC:$src0, _.KRCWM:$mask),
9957            (!cast<Instruction>(Name#_.ZSuffix#rrk)
9958                            _.RC:$src0, _.KRCWM:$mask, _.RC:$src)>;
9959  def : Pat<(X86compress (_.VT _.RC:$src), _.ImmAllZerosV, _.KRCWM:$mask),
9960            (!cast<Instruction>(Name#_.ZSuffix#rrkz)
9961                            _.KRCWM:$mask, _.RC:$src)>;
9962}
9963
9964multiclass compress_by_elt_width<bits<8> opc, string OpcodeStr,
9965                                 X86FoldableSchedWrite sched,
9966                                 AVX512VLVectorVTInfo VTInfo,
9967                                 Predicate Pred = HasAVX512> {
9968  let Predicates = [Pred] in
9969  defm Z : compress_by_vec_width_common<opc, VTInfo.info512, OpcodeStr, sched>,
9970           compress_by_vec_width_lowering<VTInfo.info512, NAME>, EVEX_V512;
9971
9972  let Predicates = [Pred, HasVLX] in {
9973    defm Z256 : compress_by_vec_width_common<opc, VTInfo.info256, OpcodeStr, sched>,
9974                compress_by_vec_width_lowering<VTInfo.info256, NAME>, EVEX_V256;
9975    defm Z128 : compress_by_vec_width_common<opc, VTInfo.info128, OpcodeStr, sched>,
9976                compress_by_vec_width_lowering<VTInfo.info128, NAME>, EVEX_V128;
9977  }
9978}
9979
9980// FIXME: Is there a better scheduler class for VPCOMPRESS?
9981defm VPCOMPRESSD : compress_by_elt_width <0x8B, "vpcompressd", WriteVarShuffle256,
9982                                          avx512vl_i32_info>, EVEX, NotMemoryFoldable;
9983defm VPCOMPRESSQ : compress_by_elt_width <0x8B, "vpcompressq", WriteVarShuffle256,
9984                                          avx512vl_i64_info>, EVEX, VEX_W, NotMemoryFoldable;
9985defm VCOMPRESSPS : compress_by_elt_width <0x8A, "vcompressps", WriteVarShuffle256,
9986                                          avx512vl_f32_info>, EVEX, NotMemoryFoldable;
9987defm VCOMPRESSPD : compress_by_elt_width <0x8A, "vcompresspd", WriteVarShuffle256,
9988                                          avx512vl_f64_info>, EVEX, VEX_W, NotMemoryFoldable;
9989
9990// expand
9991multiclass expand_by_vec_width<bits<8> opc, X86VectorVTInfo _,
9992                                 string OpcodeStr, X86FoldableSchedWrite sched> {
9993  defm rr : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
9994              (ins _.RC:$src1), OpcodeStr, "$src1", "$src1",
9995              (null_frag)>, AVX5128IBase,
9996              Sched<[sched]>;
9997
9998  defm rm : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
9999              (ins _.MemOp:$src1), OpcodeStr, "$src1", "$src1",
10000              (null_frag)>,
10001            AVX5128IBase, EVEX_CD8<_.EltSize, CD8VT1>,
10002            Sched<[sched.Folded, sched.ReadAfterFold]>;
10003}
10004
10005multiclass expand_by_vec_width_lowering<X86VectorVTInfo _, string Name> {
10006
10007  def : Pat<(_.VT (X86mExpandingLoad addr:$src, _.KRCWM:$mask, undef)),
10008            (!cast<Instruction>(Name#_.ZSuffix#rmkz)
10009                                        _.KRCWM:$mask, addr:$src)>;
10010
10011  def : Pat<(_.VT (X86mExpandingLoad addr:$src, _.KRCWM:$mask, _.ImmAllZerosV)),
10012            (!cast<Instruction>(Name#_.ZSuffix#rmkz)
10013                                        _.KRCWM:$mask, addr:$src)>;
10014
10015  def : Pat<(_.VT (X86mExpandingLoad addr:$src, _.KRCWM:$mask,
10016                                               (_.VT _.RC:$src0))),
10017            (!cast<Instruction>(Name#_.ZSuffix#rmk)
10018                            _.RC:$src0, _.KRCWM:$mask, addr:$src)>;
10019
10020  def : Pat<(X86expand (_.VT _.RC:$src), _.RC:$src0, _.KRCWM:$mask),
10021            (!cast<Instruction>(Name#_.ZSuffix#rrk)
10022                            _.RC:$src0, _.KRCWM:$mask, _.RC:$src)>;
10023  def : Pat<(X86expand (_.VT _.RC:$src), _.ImmAllZerosV, _.KRCWM:$mask),
10024            (!cast<Instruction>(Name#_.ZSuffix#rrkz)
10025                            _.KRCWM:$mask, _.RC:$src)>;
10026}
10027
10028multiclass expand_by_elt_width<bits<8> opc, string OpcodeStr,
10029                               X86FoldableSchedWrite sched,
10030                               AVX512VLVectorVTInfo VTInfo,
10031                               Predicate Pred = HasAVX512> {
10032  let Predicates = [Pred] in
10033  defm Z : expand_by_vec_width<opc, VTInfo.info512, OpcodeStr, sched>,
10034           expand_by_vec_width_lowering<VTInfo.info512, NAME>, EVEX_V512;
10035
10036  let Predicates = [Pred, HasVLX] in {
10037    defm Z256 : expand_by_vec_width<opc, VTInfo.info256, OpcodeStr, sched>,
10038                expand_by_vec_width_lowering<VTInfo.info256, NAME>, EVEX_V256;
10039    defm Z128 : expand_by_vec_width<opc, VTInfo.info128, OpcodeStr, sched>,
10040                expand_by_vec_width_lowering<VTInfo.info128, NAME>, EVEX_V128;
10041  }
10042}
10043
10044// FIXME: Is there a better scheduler class for VPEXPAND?
10045defm VPEXPANDD : expand_by_elt_width <0x89, "vpexpandd", WriteVarShuffle256,
10046                                      avx512vl_i32_info>, EVEX;
10047defm VPEXPANDQ : expand_by_elt_width <0x89, "vpexpandq", WriteVarShuffle256,
10048                                      avx512vl_i64_info>, EVEX, VEX_W;
10049defm VEXPANDPS : expand_by_elt_width <0x88, "vexpandps", WriteVarShuffle256,
10050                                      avx512vl_f32_info>, EVEX;
10051defm VEXPANDPD : expand_by_elt_width <0x88, "vexpandpd", WriteVarShuffle256,
10052                                      avx512vl_f64_info>, EVEX, VEX_W;
10053
10054//handle instruction  reg_vec1 = op(reg_vec,imm)
10055//                               op(mem_vec,imm)
10056//                               op(broadcast(eltVt),imm)
10057//all instruction created with FROUND_CURRENT
10058multiclass avx512_unary_fp_packed_imm<bits<8> opc, string OpcodeStr,
10059                                      SDPatternOperator OpNode,
10060                                      SDPatternOperator MaskOpNode,
10061                                      X86FoldableSchedWrite sched,
10062                                      X86VectorVTInfo _> {
10063  let ExeDomain = _.ExeDomain, Uses = [MXCSR], mayRaiseFPException = 1 in {
10064  defm rri : AVX512_maskable_split<opc, MRMSrcReg, _, (outs _.RC:$dst),
10065                      (ins _.RC:$src1, i32u8imm:$src2),
10066                      OpcodeStr#_.Suffix, "$src2, $src1", "$src1, $src2",
10067                      (OpNode (_.VT _.RC:$src1), (i32 timm:$src2)),
10068                      (MaskOpNode (_.VT _.RC:$src1), (i32 timm:$src2))>,
10069                      Sched<[sched]>;
10070  defm rmi : AVX512_maskable_split<opc, MRMSrcMem, _, (outs _.RC:$dst),
10071                    (ins _.MemOp:$src1, i32u8imm:$src2),
10072                    OpcodeStr#_.Suffix, "$src2, $src1", "$src1, $src2",
10073                    (OpNode (_.VT (bitconvert (_.LdFrag addr:$src1))),
10074                            (i32 timm:$src2)),
10075                    (MaskOpNode (_.VT (bitconvert (_.LdFrag addr:$src1))),
10076                                (i32 timm:$src2))>,
10077                    Sched<[sched.Folded, sched.ReadAfterFold]>;
10078  defm rmbi : AVX512_maskable_split<opc, MRMSrcMem, _, (outs _.RC:$dst),
10079                    (ins _.ScalarMemOp:$src1, i32u8imm:$src2),
10080                    OpcodeStr#_.Suffix, "$src2, ${src1}"#_.BroadcastStr,
10081                    "${src1}"#_.BroadcastStr#", $src2",
10082                    (OpNode (_.VT (_.BroadcastLdFrag addr:$src1)),
10083                            (i32 timm:$src2)),
10084                    (MaskOpNode (_.VT (_.BroadcastLdFrag addr:$src1)),
10085                                (i32 timm:$src2))>, EVEX_B,
10086                    Sched<[sched.Folded, sched.ReadAfterFold]>;
10087  }
10088}
10089
10090//handle instruction  reg_vec1 = op(reg_vec2,reg_vec3,imm),{sae}
10091multiclass avx512_unary_fp_sae_packed_imm<bits<8> opc, string OpcodeStr,
10092                                          SDNode OpNode, X86FoldableSchedWrite sched,
10093                                          X86VectorVTInfo _> {
10094  let ExeDomain = _.ExeDomain, Uses = [MXCSR] in
10095  defm rrib : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
10096                      (ins _.RC:$src1, i32u8imm:$src2),
10097                      OpcodeStr#_.Suffix, "$src2, {sae}, $src1",
10098                      "$src1, {sae}, $src2",
10099                      (OpNode (_.VT _.RC:$src1),
10100                              (i32 timm:$src2))>,
10101                      EVEX_B, Sched<[sched]>;
10102}
10103
10104multiclass avx512_common_unary_fp_sae_packed_imm<string OpcodeStr,
10105            AVX512VLVectorVTInfo _, bits<8> opc, SDPatternOperator OpNode,
10106            SDPatternOperator MaskOpNode, SDNode OpNodeSAE, X86SchedWriteWidths sched,
10107            Predicate prd>{
10108  let Predicates = [prd] in {
10109    defm Z    : avx512_unary_fp_packed_imm<opc, OpcodeStr, OpNode, MaskOpNode,
10110                                           sched.ZMM, _.info512>,
10111                avx512_unary_fp_sae_packed_imm<opc, OpcodeStr, OpNodeSAE,
10112                                               sched.ZMM, _.info512>, EVEX_V512;
10113  }
10114  let Predicates = [prd, HasVLX] in {
10115    defm Z128 : avx512_unary_fp_packed_imm<opc, OpcodeStr, OpNode, MaskOpNode,
10116                                           sched.XMM, _.info128>, EVEX_V128;
10117    defm Z256 : avx512_unary_fp_packed_imm<opc, OpcodeStr, OpNode, MaskOpNode,
10118                                           sched.YMM, _.info256>, EVEX_V256;
10119  }
10120}
10121
10122//handle instruction  reg_vec1 = op(reg_vec2,reg_vec3,imm)
10123//                               op(reg_vec2,mem_vec,imm)
10124//                               op(reg_vec2,broadcast(eltVt),imm)
10125//all instruction created with FROUND_CURRENT
10126multiclass avx512_fp_packed_imm<bits<8> opc, string OpcodeStr, SDNode OpNode,
10127                                X86FoldableSchedWrite sched, X86VectorVTInfo _>{
10128  let ExeDomain = _.ExeDomain, Uses = [MXCSR], mayRaiseFPException = 1 in {
10129  defm rri : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
10130                      (ins _.RC:$src1, _.RC:$src2, i32u8imm:$src3),
10131                      OpcodeStr, "$src3, $src2, $src1", "$src1, $src2, $src3",
10132                      (OpNode (_.VT _.RC:$src1),
10133                              (_.VT _.RC:$src2),
10134                              (i32 timm:$src3))>,
10135                      Sched<[sched]>;
10136  defm rmi : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
10137                    (ins _.RC:$src1, _.MemOp:$src2, i32u8imm:$src3),
10138                    OpcodeStr, "$src3, $src2, $src1", "$src1, $src2, $src3",
10139                    (OpNode (_.VT _.RC:$src1),
10140                            (_.VT (bitconvert (_.LdFrag addr:$src2))),
10141                            (i32 timm:$src3))>,
10142                    Sched<[sched.Folded, sched.ReadAfterFold]>;
10143  defm rmbi : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
10144                    (ins _.RC:$src1, _.ScalarMemOp:$src2, i32u8imm:$src3),
10145                    OpcodeStr, "$src3, ${src2}"#_.BroadcastStr#", $src1",
10146                    "$src1, ${src2}"#_.BroadcastStr#", $src3",
10147                    (OpNode (_.VT _.RC:$src1),
10148                            (_.VT (_.BroadcastLdFrag addr:$src2)),
10149                            (i32 timm:$src3))>, EVEX_B,
10150                    Sched<[sched.Folded, sched.ReadAfterFold]>;
10151  }
10152}
10153
10154//handle instruction  reg_vec1 = op(reg_vec2,reg_vec3,imm)
10155//                               op(reg_vec2,mem_vec,imm)
10156multiclass avx512_3Op_rm_imm8<bits<8> opc, string OpcodeStr, SDNode OpNode,
10157                              X86FoldableSchedWrite sched, X86VectorVTInfo DestInfo,
10158                              X86VectorVTInfo SrcInfo>{
10159  let ExeDomain = DestInfo.ExeDomain in {
10160  defm rri : AVX512_maskable<opc, MRMSrcReg, DestInfo, (outs DestInfo.RC:$dst),
10161                  (ins SrcInfo.RC:$src1, SrcInfo.RC:$src2, u8imm:$src3),
10162                  OpcodeStr, "$src3, $src2, $src1", "$src1, $src2, $src3",
10163                  (DestInfo.VT (OpNode (SrcInfo.VT SrcInfo.RC:$src1),
10164                               (SrcInfo.VT SrcInfo.RC:$src2),
10165                               (i8 timm:$src3)))>,
10166                  Sched<[sched]>;
10167  defm rmi : AVX512_maskable<opc, MRMSrcMem, DestInfo, (outs DestInfo.RC:$dst),
10168                (ins SrcInfo.RC:$src1, SrcInfo.MemOp:$src2, u8imm:$src3),
10169                OpcodeStr, "$src3, $src2, $src1", "$src1, $src2, $src3",
10170                (DestInfo.VT (OpNode (SrcInfo.VT SrcInfo.RC:$src1),
10171                             (SrcInfo.VT (bitconvert
10172                                                (SrcInfo.LdFrag addr:$src2))),
10173                             (i8 timm:$src3)))>,
10174                Sched<[sched.Folded, sched.ReadAfterFold]>;
10175  }
10176}
10177
10178//handle instruction  reg_vec1 = op(reg_vec2,reg_vec3,imm)
10179//                               op(reg_vec2,mem_vec,imm)
10180//                               op(reg_vec2,broadcast(eltVt),imm)
10181multiclass avx512_3Op_imm8<bits<8> opc, string OpcodeStr, SDNode OpNode,
10182                           X86FoldableSchedWrite sched, X86VectorVTInfo _>:
10183  avx512_3Op_rm_imm8<opc, OpcodeStr, OpNode, sched, _, _>{
10184
10185  let ExeDomain = _.ExeDomain in
10186  defm rmbi : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
10187                    (ins _.RC:$src1, _.ScalarMemOp:$src2, u8imm:$src3),
10188                    OpcodeStr, "$src3, ${src2}"#_.BroadcastStr#", $src1",
10189                    "$src1, ${src2}"#_.BroadcastStr#", $src3",
10190                    (OpNode (_.VT _.RC:$src1),
10191                            (_.VT (_.BroadcastLdFrag addr:$src2)),
10192                            (i8 timm:$src3))>, EVEX_B,
10193                    Sched<[sched.Folded, sched.ReadAfterFold]>;
10194}
10195
10196//handle scalar instruction  reg_vec1 = op(reg_vec2,reg_vec3,imm)
10197//                                      op(reg_vec2,mem_scalar,imm)
10198multiclass avx512_fp_scalar_imm<bits<8> opc, string OpcodeStr, SDNode OpNode,
10199                                X86FoldableSchedWrite sched, X86VectorVTInfo _> {
10200  let ExeDomain = _.ExeDomain, Uses = [MXCSR], mayRaiseFPException = 1 in {
10201  defm rri : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
10202                      (ins _.RC:$src1, _.RC:$src2, i32u8imm:$src3),
10203                      OpcodeStr, "$src3, $src2, $src1", "$src1, $src2, $src3",
10204                      (OpNode (_.VT _.RC:$src1),
10205                              (_.VT _.RC:$src2),
10206                              (i32 timm:$src3))>,
10207                      Sched<[sched]>;
10208  defm rmi : AVX512_maskable_scalar<opc, MRMSrcMem, _, (outs _.RC:$dst),
10209                    (ins _.RC:$src1, _.IntScalarMemOp:$src2, i32u8imm:$src3),
10210                    OpcodeStr, "$src3, $src2, $src1", "$src1, $src2, $src3",
10211                    (OpNode (_.VT _.RC:$src1),
10212                            (_.ScalarIntMemFrags addr:$src2),
10213                            (i32 timm:$src3))>,
10214                    Sched<[sched.Folded, sched.ReadAfterFold]>;
10215  }
10216}
10217
10218//handle instruction  reg_vec1 = op(reg_vec2,reg_vec3,imm),{sae}
10219multiclass avx512_fp_sae_packed_imm<bits<8> opc, string OpcodeStr,
10220                                    SDNode OpNode, X86FoldableSchedWrite sched,
10221                                    X86VectorVTInfo _> {
10222  let ExeDomain = _.ExeDomain, Uses = [MXCSR] in
10223  defm rrib : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
10224                      (ins _.RC:$src1, _.RC:$src2, i32u8imm:$src3),
10225                      OpcodeStr, "$src3, {sae}, $src2, $src1",
10226                      "$src1, $src2, {sae}, $src3",
10227                      (OpNode (_.VT _.RC:$src1),
10228                              (_.VT _.RC:$src2),
10229                              (i32 timm:$src3))>,
10230                      EVEX_B, Sched<[sched]>;
10231}
10232
10233//handle scalar instruction  reg_vec1 = op(reg_vec2,reg_vec3,imm),{sae}
10234multiclass avx512_fp_sae_scalar_imm<bits<8> opc, string OpcodeStr, SDNode OpNode,
10235                                    X86FoldableSchedWrite sched, X86VectorVTInfo _> {
10236  let ExeDomain = _.ExeDomain, Uses = [MXCSR] in
10237  defm NAME#rrib : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
10238                      (ins _.RC:$src1, _.RC:$src2, i32u8imm:$src3),
10239                      OpcodeStr, "$src3, {sae}, $src2, $src1",
10240                      "$src1, $src2, {sae}, $src3",
10241                      (OpNode (_.VT _.RC:$src1),
10242                              (_.VT _.RC:$src2),
10243                              (i32 timm:$src3))>,
10244                      EVEX_B, Sched<[sched]>;
10245}
10246
10247multiclass avx512_common_fp_sae_packed_imm<string OpcodeStr,
10248            AVX512VLVectorVTInfo _, bits<8> opc, SDNode OpNode,
10249            SDNode OpNodeSAE, X86SchedWriteWidths sched, Predicate prd>{
10250  let Predicates = [prd] in {
10251    defm Z    : avx512_fp_packed_imm<opc, OpcodeStr, OpNode, sched.ZMM, _.info512>,
10252                avx512_fp_sae_packed_imm<opc, OpcodeStr, OpNodeSAE, sched.ZMM, _.info512>,
10253                                  EVEX_V512;
10254
10255  }
10256  let Predicates = [prd, HasVLX] in {
10257    defm Z128 : avx512_fp_packed_imm<opc, OpcodeStr, OpNode, sched.XMM, _.info128>,
10258                                  EVEX_V128;
10259    defm Z256 : avx512_fp_packed_imm<opc, OpcodeStr, OpNode, sched.YMM, _.info256>,
10260                                  EVEX_V256;
10261  }
10262}
10263
10264multiclass avx512_common_3Op_rm_imm8<bits<8> opc, SDNode OpNode, string OpStr,
10265                   X86SchedWriteWidths sched, AVX512VLVectorVTInfo DestInfo,
10266                   AVX512VLVectorVTInfo SrcInfo, Predicate Pred = HasBWI> {
10267  let Predicates = [Pred] in {
10268    defm Z    : avx512_3Op_rm_imm8<opc, OpStr, OpNode, sched.ZMM, DestInfo.info512,
10269                           SrcInfo.info512>, EVEX_V512, AVX512AIi8Base, EVEX_4V;
10270  }
10271  let Predicates = [Pred, HasVLX] in {
10272    defm Z128 : avx512_3Op_rm_imm8<opc, OpStr, OpNode, sched.XMM, DestInfo.info128,
10273                           SrcInfo.info128>, EVEX_V128, AVX512AIi8Base, EVEX_4V;
10274    defm Z256 : avx512_3Op_rm_imm8<opc, OpStr, OpNode, sched.YMM, DestInfo.info256,
10275                           SrcInfo.info256>, EVEX_V256, AVX512AIi8Base, EVEX_4V;
10276  }
10277}
10278
10279multiclass avx512_common_3Op_imm8<string OpcodeStr, AVX512VLVectorVTInfo _,
10280                                  bits<8> opc, SDNode OpNode, X86SchedWriteWidths sched,
10281                                  Predicate Pred = HasAVX512> {
10282  let Predicates = [Pred] in {
10283    defm Z    : avx512_3Op_imm8<opc, OpcodeStr, OpNode, sched.ZMM, _.info512>,
10284                                EVEX_V512;
10285  }
10286  let Predicates = [Pred, HasVLX] in {
10287    defm Z128 : avx512_3Op_imm8<opc, OpcodeStr, OpNode, sched.XMM, _.info128>,
10288                                EVEX_V128;
10289    defm Z256 : avx512_3Op_imm8<opc, OpcodeStr, OpNode, sched.YMM, _.info256>,
10290                                EVEX_V256;
10291  }
10292}
10293
10294multiclass avx512_common_fp_sae_scalar_imm<string OpcodeStr,
10295                  X86VectorVTInfo _, bits<8> opc, SDNode OpNode,
10296                  SDNode OpNodeSAE, X86SchedWriteWidths sched, Predicate prd> {
10297  let Predicates = [prd] in {
10298     defm Z : avx512_fp_scalar_imm<opc, OpcodeStr, OpNode, sched.XMM, _>,
10299              avx512_fp_sae_scalar_imm<opc, OpcodeStr, OpNodeSAE, sched.XMM, _>;
10300  }
10301}
10302
10303multiclass avx512_common_unary_fp_sae_packed_imm_all<string OpcodeStr,
10304                    bits<8> opcPs, bits<8> opcPd, SDPatternOperator OpNode,
10305                    SDPatternOperator MaskOpNode, SDNode OpNodeSAE,
10306                    X86SchedWriteWidths sched, Predicate prd>{
10307  defm PS : avx512_common_unary_fp_sae_packed_imm<OpcodeStr, avx512vl_f32_info,
10308                            opcPs, OpNode, MaskOpNode, OpNodeSAE, sched, prd>,
10309                            EVEX_CD8<32, CD8VF>;
10310  defm PD : avx512_common_unary_fp_sae_packed_imm<OpcodeStr, avx512vl_f64_info,
10311                            opcPd, OpNode, MaskOpNode, OpNodeSAE, sched, prd>,
10312                            EVEX_CD8<64, CD8VF>, VEX_W;
10313}
10314
10315defm VREDUCE   : avx512_common_unary_fp_sae_packed_imm_all<"vreduce", 0x56, 0x56,
10316                              X86VReduce, X86VReduce, X86VReduceSAE,
10317                              SchedWriteFRnd, HasDQI>, AVX512AIi8Base, EVEX;
10318defm VRNDSCALE : avx512_common_unary_fp_sae_packed_imm_all<"vrndscale", 0x08, 0x09,
10319                              X86any_VRndScale, X86VRndScale, X86VRndScaleSAE,
10320                              SchedWriteFRnd, HasAVX512>,
10321                              AVX512AIi8Base, EVEX;
10322defm VGETMANT : avx512_common_unary_fp_sae_packed_imm_all<"vgetmant", 0x26, 0x26,
10323                              X86VGetMant, X86VGetMant, X86VGetMantSAE,
10324                              SchedWriteFRnd, HasAVX512>, AVX512AIi8Base, EVEX;
10325
10326defm VRANGEPD : avx512_common_fp_sae_packed_imm<"vrangepd", avx512vl_f64_info,
10327                                                0x50, X86VRange, X86VRangeSAE,
10328                                                SchedWriteFAdd, HasDQI>,
10329      AVX512AIi8Base, EVEX_4V, EVEX_CD8<64, CD8VF>, VEX_W;
10330defm VRANGEPS : avx512_common_fp_sae_packed_imm<"vrangeps", avx512vl_f32_info,
10331                                                0x50, X86VRange, X86VRangeSAE,
10332                                                SchedWriteFAdd, HasDQI>,
10333      AVX512AIi8Base, EVEX_4V, EVEX_CD8<32, CD8VF>;
10334
10335defm VRANGESD: avx512_common_fp_sae_scalar_imm<"vrangesd",
10336      f64x_info, 0x51, X86Ranges, X86RangesSAE, SchedWriteFAdd, HasDQI>,
10337      AVX512AIi8Base, VEX_LIG, EVEX_4V, EVEX_CD8<64, CD8VT1>, VEX_W;
10338defm VRANGESS: avx512_common_fp_sae_scalar_imm<"vrangess", f32x_info,
10339      0x51, X86Ranges, X86RangesSAE, SchedWriteFAdd, HasDQI>,
10340      AVX512AIi8Base, VEX_LIG, EVEX_4V, EVEX_CD8<32, CD8VT1>;
10341
10342defm VREDUCESD: avx512_common_fp_sae_scalar_imm<"vreducesd", f64x_info,
10343      0x57, X86Reduces, X86ReducesSAE, SchedWriteFRnd, HasDQI>,
10344      AVX512AIi8Base, VEX_LIG, EVEX_4V, EVEX_CD8<64, CD8VT1>, VEX_W;
10345defm VREDUCESS: avx512_common_fp_sae_scalar_imm<"vreducess", f32x_info,
10346      0x57, X86Reduces, X86ReducesSAE, SchedWriteFRnd, HasDQI>,
10347      AVX512AIi8Base, VEX_LIG, EVEX_4V, EVEX_CD8<32, CD8VT1>;
10348
10349defm VGETMANTSD: avx512_common_fp_sae_scalar_imm<"vgetmantsd", f64x_info,
10350      0x27, X86GetMants, X86GetMantsSAE, SchedWriteFRnd, HasAVX512>,
10351      AVX512AIi8Base, VEX_LIG, EVEX_4V, EVEX_CD8<64, CD8VT1>, VEX_W;
10352defm VGETMANTSS: avx512_common_fp_sae_scalar_imm<"vgetmantss", f32x_info,
10353      0x27, X86GetMants, X86GetMantsSAE, SchedWriteFRnd, HasAVX512>,
10354      AVX512AIi8Base, VEX_LIG, EVEX_4V, EVEX_CD8<32, CD8VT1>;
10355
10356multiclass avx512_shuff_packed_128_common<bits<8> opc, string OpcodeStr,
10357                                          X86FoldableSchedWrite sched,
10358                                          X86VectorVTInfo _,
10359                                          X86VectorVTInfo CastInfo,
10360                                          string EVEX2VEXOvrd> {
10361  let ExeDomain = _.ExeDomain in {
10362  defm rri : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
10363                  (ins _.RC:$src1, _.RC:$src2, u8imm:$src3),
10364                  OpcodeStr, "$src3, $src2, $src1", "$src1, $src2, $src3",
10365                  (_.VT (bitconvert
10366                         (CastInfo.VT (X86Shuf128 _.RC:$src1, _.RC:$src2,
10367                                                  (i8 timm:$src3)))))>,
10368                  Sched<[sched]>, EVEX2VEXOverride<EVEX2VEXOvrd#"rr">;
10369  defm rmi : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
10370                (ins _.RC:$src1, _.MemOp:$src2, u8imm:$src3),
10371                OpcodeStr, "$src3, $src2, $src1", "$src1, $src2, $src3",
10372                (_.VT
10373                 (bitconvert
10374                  (CastInfo.VT (X86Shuf128 _.RC:$src1,
10375                                           (CastInfo.LdFrag addr:$src2),
10376                                           (i8 timm:$src3)))))>,
10377                Sched<[sched.Folded, sched.ReadAfterFold]>,
10378                EVEX2VEXOverride<EVEX2VEXOvrd#"rm">;
10379  defm rmbi : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
10380                    (ins _.RC:$src1, _.ScalarMemOp:$src2, u8imm:$src3),
10381                    OpcodeStr, "$src3, ${src2}"#_.BroadcastStr#", $src1",
10382                    "$src1, ${src2}"#_.BroadcastStr#", $src3",
10383                    (_.VT
10384                     (bitconvert
10385                      (CastInfo.VT
10386                       (X86Shuf128 _.RC:$src1,
10387                                   (_.BroadcastLdFrag addr:$src2),
10388                                   (i8 timm:$src3)))))>, EVEX_B,
10389                    Sched<[sched.Folded, sched.ReadAfterFold]>;
10390  }
10391}
10392
10393multiclass avx512_shuff_packed_128<string OpcodeStr, X86FoldableSchedWrite sched,
10394                                   AVX512VLVectorVTInfo _,
10395                                   AVX512VLVectorVTInfo CastInfo, bits<8> opc,
10396                                   string EVEX2VEXOvrd>{
10397  let Predicates = [HasAVX512] in
10398  defm Z : avx512_shuff_packed_128_common<opc, OpcodeStr, sched,
10399                                          _.info512, CastInfo.info512, "">, EVEX_V512;
10400
10401  let Predicates = [HasAVX512, HasVLX] in
10402  defm Z256 : avx512_shuff_packed_128_common<opc, OpcodeStr, sched,
10403                                             _.info256, CastInfo.info256,
10404                                             EVEX2VEXOvrd>, EVEX_V256;
10405}
10406
10407defm VSHUFF32X4 : avx512_shuff_packed_128<"vshuff32x4", WriteFShuffle256,
10408      avx512vl_f32_info, avx512vl_f64_info, 0x23, "VPERM2F128">, AVX512AIi8Base, EVEX_4V, EVEX_CD8<32, CD8VF>;
10409defm VSHUFF64X2 : avx512_shuff_packed_128<"vshuff64x2", WriteFShuffle256,
10410      avx512vl_f64_info, avx512vl_f64_info, 0x23, "VPERM2F128">, AVX512AIi8Base, EVEX_4V, EVEX_CD8<64, CD8VF>, VEX_W;
10411defm VSHUFI32X4 : avx512_shuff_packed_128<"vshufi32x4", WriteFShuffle256,
10412      avx512vl_i32_info, avx512vl_i64_info, 0x43, "VPERM2I128">, AVX512AIi8Base, EVEX_4V, EVEX_CD8<32, CD8VF>;
10413defm VSHUFI64X2 : avx512_shuff_packed_128<"vshufi64x2", WriteFShuffle256,
10414      avx512vl_i64_info, avx512vl_i64_info, 0x43, "VPERM2I128">, AVX512AIi8Base, EVEX_4V, EVEX_CD8<64, CD8VF>, VEX_W;
10415
10416multiclass avx512_valign<bits<8> opc, string OpcodeStr,
10417                         X86FoldableSchedWrite sched, X86VectorVTInfo _>{
10418  // NOTE: EVEX2VEXOverride changed back to Unset for 256-bit at the
10419  // instantiation of this class.
10420  let ExeDomain = _.ExeDomain in {
10421  defm rri : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
10422                  (ins _.RC:$src1, _.RC:$src2, u8imm:$src3),
10423                  OpcodeStr, "$src3, $src2, $src1", "$src1, $src2, $src3",
10424                  (_.VT (X86VAlign _.RC:$src1, _.RC:$src2, (i8 timm:$src3)))>,
10425                  Sched<[sched]>, EVEX2VEXOverride<"VPALIGNRrri">;
10426  defm rmi : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
10427                (ins _.RC:$src1, _.MemOp:$src2, u8imm:$src3),
10428                OpcodeStr, "$src3, $src2, $src1", "$src1, $src2, $src3",
10429                (_.VT (X86VAlign _.RC:$src1,
10430                                 (bitconvert (_.LdFrag addr:$src2)),
10431                                 (i8 timm:$src3)))>,
10432                Sched<[sched.Folded, sched.ReadAfterFold]>,
10433                EVEX2VEXOverride<"VPALIGNRrmi">;
10434
10435  defm rmbi : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
10436                   (ins _.RC:$src1, _.ScalarMemOp:$src2, u8imm:$src3),
10437                   OpcodeStr, "$src3, ${src2}"#_.BroadcastStr#", $src1",
10438                   "$src1, ${src2}"#_.BroadcastStr#", $src3",
10439                   (X86VAlign _.RC:$src1,
10440                              (_.VT (_.BroadcastLdFrag addr:$src2)),
10441                              (i8 timm:$src3))>, EVEX_B,
10442                   Sched<[sched.Folded, sched.ReadAfterFold]>;
10443  }
10444}
10445
10446multiclass avx512_valign_common<string OpcodeStr, X86SchedWriteWidths sched,
10447                                AVX512VLVectorVTInfo _> {
10448  let Predicates = [HasAVX512] in {
10449    defm Z    : avx512_valign<0x03, OpcodeStr, sched.ZMM, _.info512>,
10450                                AVX512AIi8Base, EVEX_4V, EVEX_V512;
10451  }
10452  let Predicates = [HasAVX512, HasVLX] in {
10453    defm Z128 : avx512_valign<0x03, OpcodeStr, sched.XMM, _.info128>,
10454                                AVX512AIi8Base, EVEX_4V, EVEX_V128;
10455    // We can't really override the 256-bit version so change it back to unset.
10456    let EVEX2VEXOverride = ? in
10457    defm Z256 : avx512_valign<0x03, OpcodeStr, sched.YMM, _.info256>,
10458                                AVX512AIi8Base, EVEX_4V, EVEX_V256;
10459  }
10460}
10461
10462defm VALIGND: avx512_valign_common<"valignd", SchedWriteShuffle,
10463                                   avx512vl_i32_info>, EVEX_CD8<32, CD8VF>;
10464defm VALIGNQ: avx512_valign_common<"valignq", SchedWriteShuffle,
10465                                   avx512vl_i64_info>, EVEX_CD8<64, CD8VF>,
10466                                   VEX_W;
10467
10468defm VPALIGNR: avx512_common_3Op_rm_imm8<0x0F, X86PAlignr, "vpalignr",
10469                                         SchedWriteShuffle, avx512vl_i8_info,
10470                                         avx512vl_i8_info>, EVEX_CD8<8, CD8VF>;
10471
10472// Fragments to help convert valignq into masked valignd. Or valignq/valignd
10473// into vpalignr.
10474def ValignqImm32XForm : SDNodeXForm<timm, [{
10475  return getI8Imm(N->getZExtValue() * 2, SDLoc(N));
10476}]>;
10477def ValignqImm8XForm : SDNodeXForm<timm, [{
10478  return getI8Imm(N->getZExtValue() * 8, SDLoc(N));
10479}]>;
10480def ValigndImm8XForm : SDNodeXForm<timm, [{
10481  return getI8Imm(N->getZExtValue() * 4, SDLoc(N));
10482}]>;
10483
10484multiclass avx512_vpalign_mask_lowering<string OpcodeStr, SDNode OpNode,
10485                                        X86VectorVTInfo From, X86VectorVTInfo To,
10486                                        SDNodeXForm ImmXForm> {
10487  def : Pat<(To.VT (vselect_mask To.KRCWM:$mask,
10488                                 (bitconvert
10489                                  (From.VT (OpNode From.RC:$src1, From.RC:$src2,
10490                                                   timm:$src3))),
10491                                 To.RC:$src0)),
10492            (!cast<Instruction>(OpcodeStr#"rrik") To.RC:$src0, To.KRCWM:$mask,
10493                                                  To.RC:$src1, To.RC:$src2,
10494                                                  (ImmXForm timm:$src3))>;
10495
10496  def : Pat<(To.VT (vselect_mask To.KRCWM:$mask,
10497                                 (bitconvert
10498                                  (From.VT (OpNode From.RC:$src1, From.RC:$src2,
10499                                                   timm:$src3))),
10500                                 To.ImmAllZerosV)),
10501            (!cast<Instruction>(OpcodeStr#"rrikz") To.KRCWM:$mask,
10502                                                   To.RC:$src1, To.RC:$src2,
10503                                                   (ImmXForm timm:$src3))>;
10504
10505  def : Pat<(To.VT (vselect_mask To.KRCWM:$mask,
10506                                 (bitconvert
10507                                  (From.VT (OpNode From.RC:$src1,
10508                                                   (From.LdFrag addr:$src2),
10509                                           timm:$src3))),
10510                                 To.RC:$src0)),
10511            (!cast<Instruction>(OpcodeStr#"rmik") To.RC:$src0, To.KRCWM:$mask,
10512                                                  To.RC:$src1, addr:$src2,
10513                                                  (ImmXForm timm:$src3))>;
10514
10515  def : Pat<(To.VT (vselect_mask To.KRCWM:$mask,
10516                                 (bitconvert
10517                                  (From.VT (OpNode From.RC:$src1,
10518                                                   (From.LdFrag addr:$src2),
10519                                           timm:$src3))),
10520                                 To.ImmAllZerosV)),
10521            (!cast<Instruction>(OpcodeStr#"rmikz") To.KRCWM:$mask,
10522                                                   To.RC:$src1, addr:$src2,
10523                                                   (ImmXForm timm:$src3))>;
10524}
10525
10526multiclass avx512_vpalign_mask_lowering_mb<string OpcodeStr, SDNode OpNode,
10527                                           X86VectorVTInfo From,
10528                                           X86VectorVTInfo To,
10529                                           SDNodeXForm ImmXForm> :
10530      avx512_vpalign_mask_lowering<OpcodeStr, OpNode, From, To, ImmXForm> {
10531  def : Pat<(From.VT (OpNode From.RC:$src1,
10532                             (bitconvert (To.VT (To.BroadcastLdFrag addr:$src2))),
10533                             timm:$src3)),
10534            (!cast<Instruction>(OpcodeStr#"rmbi") To.RC:$src1, addr:$src2,
10535                                                  (ImmXForm timm:$src3))>;
10536
10537  def : Pat<(To.VT (vselect_mask To.KRCWM:$mask,
10538                                 (bitconvert
10539                                  (From.VT (OpNode From.RC:$src1,
10540                                           (bitconvert
10541                                            (To.VT (To.BroadcastLdFrag addr:$src2))),
10542                                           timm:$src3))),
10543                                 To.RC:$src0)),
10544            (!cast<Instruction>(OpcodeStr#"rmbik") To.RC:$src0, To.KRCWM:$mask,
10545                                                   To.RC:$src1, addr:$src2,
10546                                                   (ImmXForm timm:$src3))>;
10547
10548  def : Pat<(To.VT (vselect_mask To.KRCWM:$mask,
10549                                 (bitconvert
10550                                  (From.VT (OpNode From.RC:$src1,
10551                                           (bitconvert
10552                                            (To.VT (To.BroadcastLdFrag addr:$src2))),
10553                                           timm:$src3))),
10554                                 To.ImmAllZerosV)),
10555            (!cast<Instruction>(OpcodeStr#"rmbikz") To.KRCWM:$mask,
10556                                                    To.RC:$src1, addr:$src2,
10557                                                    (ImmXForm timm:$src3))>;
10558}
10559
10560let Predicates = [HasAVX512] in {
10561  // For 512-bit we lower to the widest element type we can. So we only need
10562  // to handle converting valignq to valignd.
10563  defm : avx512_vpalign_mask_lowering_mb<"VALIGNDZ", X86VAlign, v8i64_info,
10564                                         v16i32_info, ValignqImm32XForm>;
10565}
10566
10567let Predicates = [HasVLX] in {
10568  // For 128-bit we lower to the widest element type we can. So we only need
10569  // to handle converting valignq to valignd.
10570  defm : avx512_vpalign_mask_lowering_mb<"VALIGNDZ128", X86VAlign, v2i64x_info,
10571                                         v4i32x_info, ValignqImm32XForm>;
10572  // For 256-bit we lower to the widest element type we can. So we only need
10573  // to handle converting valignq to valignd.
10574  defm : avx512_vpalign_mask_lowering_mb<"VALIGNDZ256", X86VAlign, v4i64x_info,
10575                                         v8i32x_info, ValignqImm32XForm>;
10576}
10577
10578let Predicates = [HasVLX, HasBWI] in {
10579  // We can turn 128 and 256 bit VALIGND/VALIGNQ into VPALIGNR.
10580  defm : avx512_vpalign_mask_lowering<"VPALIGNRZ128", X86VAlign, v2i64x_info,
10581                                      v16i8x_info, ValignqImm8XForm>;
10582  defm : avx512_vpalign_mask_lowering<"VPALIGNRZ128", X86VAlign, v4i32x_info,
10583                                      v16i8x_info, ValigndImm8XForm>;
10584}
10585
10586defm VDBPSADBW: avx512_common_3Op_rm_imm8<0x42, X86dbpsadbw, "vdbpsadbw",
10587                SchedWritePSADBW, avx512vl_i16_info, avx512vl_i8_info>,
10588                EVEX_CD8<8, CD8VF>, NotEVEX2VEXConvertible;
10589
10590multiclass avx512_unary_rm<bits<8> opc, string OpcodeStr, SDNode OpNode,
10591                           X86FoldableSchedWrite sched, X86VectorVTInfo _> {
10592  let ExeDomain = _.ExeDomain in {
10593  defm rr : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
10594                    (ins _.RC:$src1), OpcodeStr,
10595                    "$src1", "$src1",
10596                    (_.VT (OpNode (_.VT _.RC:$src1)))>, EVEX, AVX5128IBase,
10597                    Sched<[sched]>;
10598
10599  defm rm : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
10600                  (ins _.MemOp:$src1), OpcodeStr,
10601                  "$src1", "$src1",
10602                  (_.VT (OpNode (_.VT (bitconvert (_.LdFrag addr:$src1)))))>,
10603            EVEX, AVX5128IBase, EVEX_CD8<_.EltSize, CD8VF>,
10604            Sched<[sched.Folded]>;
10605  }
10606}
10607
10608multiclass avx512_unary_rmb<bits<8> opc, string OpcodeStr, SDNode OpNode,
10609                            X86FoldableSchedWrite sched, X86VectorVTInfo _> :
10610           avx512_unary_rm<opc, OpcodeStr, OpNode, sched, _> {
10611  defm rmb : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
10612                  (ins _.ScalarMemOp:$src1), OpcodeStr,
10613                  "${src1}"#_.BroadcastStr,
10614                  "${src1}"#_.BroadcastStr,
10615                  (_.VT (OpNode (_.VT (_.BroadcastLdFrag addr:$src1))))>,
10616             EVEX, AVX5128IBase, EVEX_B, EVEX_CD8<_.EltSize, CD8VF>,
10617             Sched<[sched.Folded]>;
10618}
10619
10620multiclass avx512_unary_rm_vl<bits<8> opc, string OpcodeStr, SDNode OpNode,
10621                              X86SchedWriteWidths sched,
10622                              AVX512VLVectorVTInfo VTInfo, Predicate prd> {
10623  let Predicates = [prd] in
10624    defm Z : avx512_unary_rm<opc, OpcodeStr, OpNode, sched.ZMM, VTInfo.info512>,
10625                             EVEX_V512;
10626
10627  let Predicates = [prd, HasVLX] in {
10628    defm Z256 : avx512_unary_rm<opc, OpcodeStr, OpNode, sched.YMM, VTInfo.info256>,
10629                              EVEX_V256;
10630    defm Z128 : avx512_unary_rm<opc, OpcodeStr, OpNode, sched.XMM, VTInfo.info128>,
10631                              EVEX_V128;
10632  }
10633}
10634
10635multiclass avx512_unary_rmb_vl<bits<8> opc, string OpcodeStr, SDNode OpNode,
10636                               X86SchedWriteWidths sched, AVX512VLVectorVTInfo VTInfo,
10637                               Predicate prd> {
10638  let Predicates = [prd] in
10639    defm Z : avx512_unary_rmb<opc, OpcodeStr, OpNode, sched.ZMM, VTInfo.info512>,
10640                              EVEX_V512;
10641
10642  let Predicates = [prd, HasVLX] in {
10643    defm Z256 : avx512_unary_rmb<opc, OpcodeStr, OpNode, sched.YMM, VTInfo.info256>,
10644                                 EVEX_V256;
10645    defm Z128 : avx512_unary_rmb<opc, OpcodeStr, OpNode, sched.XMM, VTInfo.info128>,
10646                                 EVEX_V128;
10647  }
10648}
10649
10650multiclass avx512_unary_rm_vl_dq<bits<8> opc_d, bits<8> opc_q, string OpcodeStr,
10651                                 SDNode OpNode, X86SchedWriteWidths sched,
10652                                 Predicate prd> {
10653  defm Q : avx512_unary_rmb_vl<opc_q, OpcodeStr#"q", OpNode, sched,
10654                               avx512vl_i64_info, prd>, VEX_W;
10655  defm D : avx512_unary_rmb_vl<opc_d, OpcodeStr#"d", OpNode, sched,
10656                               avx512vl_i32_info, prd>;
10657}
10658
10659multiclass avx512_unary_rm_vl_bw<bits<8> opc_b, bits<8> opc_w, string OpcodeStr,
10660                                 SDNode OpNode, X86SchedWriteWidths sched,
10661                                 Predicate prd> {
10662  defm W : avx512_unary_rm_vl<opc_w, OpcodeStr#"w", OpNode, sched,
10663                              avx512vl_i16_info, prd>, VEX_WIG;
10664  defm B : avx512_unary_rm_vl<opc_b, OpcodeStr#"b", OpNode, sched,
10665                              avx512vl_i8_info, prd>, VEX_WIG;
10666}
10667
10668multiclass avx512_unary_rm_vl_all<bits<8> opc_b, bits<8> opc_w,
10669                                  bits<8> opc_d, bits<8> opc_q,
10670                                  string OpcodeStr, SDNode OpNode,
10671                                  X86SchedWriteWidths sched> {
10672  defm NAME : avx512_unary_rm_vl_dq<opc_d, opc_q, OpcodeStr, OpNode, sched,
10673                                    HasAVX512>,
10674              avx512_unary_rm_vl_bw<opc_b, opc_w, OpcodeStr, OpNode, sched,
10675                                    HasBWI>;
10676}
10677
10678defm VPABS : avx512_unary_rm_vl_all<0x1C, 0x1D, 0x1E, 0x1F, "vpabs", abs,
10679                                    SchedWriteVecALU>;
10680
10681// VPABS: Use 512bit version to implement 128/256 bit in case NoVLX.
10682let Predicates = [HasAVX512, NoVLX] in {
10683  def : Pat<(v4i64 (abs VR256X:$src)),
10684            (EXTRACT_SUBREG
10685                (VPABSQZrr
10686                    (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src, sub_ymm)),
10687             sub_ymm)>;
10688  def : Pat<(v2i64 (abs VR128X:$src)),
10689            (EXTRACT_SUBREG
10690                (VPABSQZrr
10691                    (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm)),
10692             sub_xmm)>;
10693}
10694
10695// Use 512bit version to implement 128/256 bit.
10696multiclass avx512_unary_lowering<string InstrStr, SDNode OpNode,
10697                                 AVX512VLVectorVTInfo _, Predicate prd> {
10698  let Predicates = [prd, NoVLX] in {
10699    def : Pat<(_.info256.VT (OpNode (_.info256.VT _.info256.RC:$src1))),
10700              (EXTRACT_SUBREG
10701                (!cast<Instruction>(InstrStr # "Zrr")
10702                  (INSERT_SUBREG(_.info512.VT(IMPLICIT_DEF)),
10703                                 _.info256.RC:$src1,
10704                                 _.info256.SubRegIdx)),
10705              _.info256.SubRegIdx)>;
10706
10707    def : Pat<(_.info128.VT (OpNode (_.info128.VT _.info128.RC:$src1))),
10708              (EXTRACT_SUBREG
10709                (!cast<Instruction>(InstrStr # "Zrr")
10710                  (INSERT_SUBREG(_.info512.VT(IMPLICIT_DEF)),
10711                                 _.info128.RC:$src1,
10712                                 _.info128.SubRegIdx)),
10713              _.info128.SubRegIdx)>;
10714  }
10715}
10716
10717defm VPLZCNT    : avx512_unary_rm_vl_dq<0x44, 0x44, "vplzcnt", ctlz,
10718                                        SchedWriteVecIMul, HasCDI>;
10719
10720// FIXME: Is there a better scheduler class for VPCONFLICT?
10721defm VPCONFLICT : avx512_unary_rm_vl_dq<0xC4, 0xC4, "vpconflict", X86Conflict,
10722                                        SchedWriteVecALU, HasCDI>;
10723
10724// VPLZCNT: Use 512bit version to implement 128/256 bit in case NoVLX.
10725defm : avx512_unary_lowering<"VPLZCNTQ", ctlz, avx512vl_i64_info, HasCDI>;
10726defm : avx512_unary_lowering<"VPLZCNTD", ctlz, avx512vl_i32_info, HasCDI>;
10727
10728//===---------------------------------------------------------------------===//
10729// Counts number of ones - VPOPCNTD and VPOPCNTQ
10730//===---------------------------------------------------------------------===//
10731
10732// FIXME: Is there a better scheduler class for VPOPCNTD/VPOPCNTQ?
10733defm VPOPCNT : avx512_unary_rm_vl_dq<0x55, 0x55, "vpopcnt", ctpop,
10734                                     SchedWriteVecALU, HasVPOPCNTDQ>;
10735
10736defm : avx512_unary_lowering<"VPOPCNTQ", ctpop, avx512vl_i64_info, HasVPOPCNTDQ>;
10737defm : avx512_unary_lowering<"VPOPCNTD", ctpop, avx512vl_i32_info, HasVPOPCNTDQ>;
10738
10739//===---------------------------------------------------------------------===//
10740// Replicate Single FP - MOVSHDUP and MOVSLDUP
10741//===---------------------------------------------------------------------===//
10742
10743multiclass avx512_replicate<bits<8> opc, string OpcodeStr, SDNode OpNode,
10744                            X86SchedWriteWidths sched> {
10745  defm NAME:       avx512_unary_rm_vl<opc, OpcodeStr, OpNode, sched,
10746                                      avx512vl_f32_info, HasAVX512>, XS;
10747}
10748
10749defm VMOVSHDUP : avx512_replicate<0x16, "vmovshdup", X86Movshdup,
10750                                  SchedWriteFShuffle>;
10751defm VMOVSLDUP : avx512_replicate<0x12, "vmovsldup", X86Movsldup,
10752                                  SchedWriteFShuffle>;
10753
10754//===----------------------------------------------------------------------===//
10755// AVX-512 - MOVDDUP
10756//===----------------------------------------------------------------------===//
10757
10758multiclass avx512_movddup_128<bits<8> opc, string OpcodeStr,
10759                              X86FoldableSchedWrite sched, X86VectorVTInfo _> {
10760  let ExeDomain = _.ExeDomain in {
10761  defm rr : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
10762                   (ins _.RC:$src), OpcodeStr, "$src", "$src",
10763                   (_.VT (X86VBroadcast (_.VT _.RC:$src)))>, EVEX,
10764                   Sched<[sched]>;
10765  defm rm : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
10766                 (ins _.ScalarMemOp:$src), OpcodeStr, "$src", "$src",
10767                 (_.VT (_.BroadcastLdFrag addr:$src))>,
10768                 EVEX, EVEX_CD8<_.EltSize, CD8VH>,
10769                 Sched<[sched.Folded]>;
10770  }
10771}
10772
10773multiclass avx512_movddup_common<bits<8> opc, string OpcodeStr, SDNode OpNode,
10774                                 X86SchedWriteWidths sched, AVX512VLVectorVTInfo VTInfo> {
10775  defm Z : avx512_unary_rm<opc, OpcodeStr, X86Movddup, sched.ZMM,
10776                           VTInfo.info512>, EVEX_V512;
10777
10778  let Predicates = [HasAVX512, HasVLX] in {
10779    defm Z256 : avx512_unary_rm<opc, OpcodeStr, X86Movddup, sched.YMM,
10780                                VTInfo.info256>, EVEX_V256;
10781    defm Z128 : avx512_movddup_128<opc, OpcodeStr, sched.XMM,
10782                                   VTInfo.info128>, EVEX_V128;
10783  }
10784}
10785
10786multiclass avx512_movddup<bits<8> opc, string OpcodeStr, SDNode OpNode,
10787                          X86SchedWriteWidths sched> {
10788  defm NAME:      avx512_movddup_common<opc, OpcodeStr, OpNode, sched,
10789                                        avx512vl_f64_info>, XD, VEX_W;
10790}
10791
10792defm VMOVDDUP : avx512_movddup<0x12, "vmovddup", X86Movddup, SchedWriteFShuffle>;
10793
10794let Predicates = [HasVLX] in {
10795def : Pat<(v2f64 (X86VBroadcast f64:$src)),
10796          (VMOVDDUPZ128rr (v2f64 (COPY_TO_REGCLASS FR64X:$src, VR128X)))>;
10797
10798def : Pat<(vselect_mask (v2i1 VK2WM:$mask), (v2f64 (X86VBroadcast f64:$src)),
10799                        (v2f64 VR128X:$src0)),
10800          (VMOVDDUPZ128rrk VR128X:$src0, VK2WM:$mask,
10801                           (v2f64 (COPY_TO_REGCLASS FR64X:$src, VR128X)))>;
10802def : Pat<(vselect_mask (v2i1 VK2WM:$mask), (v2f64 (X86VBroadcast f64:$src)),
10803                        immAllZerosV),
10804          (VMOVDDUPZ128rrkz VK2WM:$mask, (v2f64 (COPY_TO_REGCLASS FR64X:$src, VR128X)))>;
10805}
10806
10807//===----------------------------------------------------------------------===//
10808// AVX-512 - Unpack Instructions
10809//===----------------------------------------------------------------------===//
10810
10811let Uses = []<Register>, mayRaiseFPException = 0 in {
10812defm VUNPCKH : avx512_fp_binop_p<0x15, "vunpckh", X86Unpckh, X86Unpckh, HasAVX512,
10813                                 SchedWriteFShuffleSizes, 0, 1>;
10814defm VUNPCKL : avx512_fp_binop_p<0x14, "vunpckl", X86Unpckl, X86Unpckl, HasAVX512,
10815                                 SchedWriteFShuffleSizes>;
10816}
10817
10818defm VPUNPCKLBW : avx512_binop_rm_vl_b<0x60, "vpunpcklbw", X86Unpckl,
10819                                       SchedWriteShuffle, HasBWI>;
10820defm VPUNPCKHBW : avx512_binop_rm_vl_b<0x68, "vpunpckhbw", X86Unpckh,
10821                                       SchedWriteShuffle, HasBWI>;
10822defm VPUNPCKLWD : avx512_binop_rm_vl_w<0x61, "vpunpcklwd", X86Unpckl,
10823                                       SchedWriteShuffle, HasBWI>;
10824defm VPUNPCKHWD : avx512_binop_rm_vl_w<0x69, "vpunpckhwd", X86Unpckh,
10825                                       SchedWriteShuffle, HasBWI>;
10826
10827defm VPUNPCKLDQ : avx512_binop_rm_vl_d<0x62, "vpunpckldq", X86Unpckl,
10828                                       SchedWriteShuffle, HasAVX512>;
10829defm VPUNPCKHDQ : avx512_binop_rm_vl_d<0x6A, "vpunpckhdq", X86Unpckh,
10830                                       SchedWriteShuffle, HasAVX512>;
10831defm VPUNPCKLQDQ : avx512_binop_rm_vl_q<0x6C, "vpunpcklqdq", X86Unpckl,
10832                                        SchedWriteShuffle, HasAVX512>;
10833defm VPUNPCKHQDQ : avx512_binop_rm_vl_q<0x6D, "vpunpckhqdq", X86Unpckh,
10834                                        SchedWriteShuffle, HasAVX512>;
10835
10836//===----------------------------------------------------------------------===//
10837// AVX-512 - Extract & Insert Integer Instructions
10838//===----------------------------------------------------------------------===//
10839
10840multiclass avx512_extract_elt_bw_m<bits<8> opc, string OpcodeStr, SDNode OpNode,
10841                                                            X86VectorVTInfo _> {
10842  def mr : AVX512Ii8<opc, MRMDestMem, (outs),
10843              (ins _.ScalarMemOp:$dst, _.RC:$src1, u8imm:$src2),
10844              OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}",
10845              [(store (_.EltVT (trunc (OpNode (_.VT _.RC:$src1), timm:$src2))),
10846                       addr:$dst)]>,
10847              EVEX, EVEX_CD8<_.EltSize, CD8VT1>, Sched<[WriteVecExtractSt]>;
10848}
10849
10850multiclass avx512_extract_elt_b<string OpcodeStr, X86VectorVTInfo _> {
10851  let Predicates = [HasBWI] in {
10852    def rr : AVX512Ii8<0x14, MRMDestReg, (outs GR32orGR64:$dst),
10853                  (ins _.RC:$src1, u8imm:$src2),
10854                  OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}",
10855                  [(set GR32orGR64:$dst,
10856                        (X86pextrb (_.VT _.RC:$src1), timm:$src2))]>,
10857                  EVEX, TAPD, Sched<[WriteVecExtract]>;
10858
10859    defm NAME : avx512_extract_elt_bw_m<0x14, OpcodeStr, X86pextrb, _>, TAPD;
10860  }
10861}
10862
10863multiclass avx512_extract_elt_w<string OpcodeStr, X86VectorVTInfo _> {
10864  let Predicates = [HasBWI] in {
10865    def rr : AVX512Ii8<0xC5, MRMSrcReg, (outs GR32orGR64:$dst),
10866                  (ins _.RC:$src1, u8imm:$src2),
10867                  OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}",
10868                  [(set GR32orGR64:$dst,
10869                        (X86pextrw (_.VT _.RC:$src1), timm:$src2))]>,
10870                  EVEX, PD, Sched<[WriteVecExtract]>;
10871
10872    let hasSideEffects = 0, isCodeGenOnly = 1, ForceDisassemble = 1 in
10873    def rr_REV : AVX512Ii8<0x15, MRMDestReg, (outs GR32orGR64:$dst),
10874                   (ins _.RC:$src1, u8imm:$src2),
10875                   OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}", []>,
10876                   EVEX, TAPD, FoldGenData<NAME#rr>,
10877                   Sched<[WriteVecExtract]>;
10878
10879    defm NAME : avx512_extract_elt_bw_m<0x15, OpcodeStr, X86pextrw, _>, TAPD;
10880  }
10881}
10882
10883multiclass avx512_extract_elt_dq<string OpcodeStr, X86VectorVTInfo _,
10884                                                            RegisterClass GRC> {
10885  let Predicates = [HasDQI] in {
10886    def rr : AVX512Ii8<0x16, MRMDestReg, (outs GRC:$dst),
10887                  (ins _.RC:$src1, u8imm:$src2),
10888                  OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}",
10889                  [(set GRC:$dst,
10890                      (extractelt (_.VT _.RC:$src1), imm:$src2))]>,
10891                  EVEX, TAPD, Sched<[WriteVecExtract]>;
10892
10893    def mr : AVX512Ii8<0x16, MRMDestMem, (outs),
10894                (ins _.ScalarMemOp:$dst, _.RC:$src1, u8imm:$src2),
10895                OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}",
10896                [(store (extractelt (_.VT _.RC:$src1),
10897                                    imm:$src2),addr:$dst)]>,
10898                EVEX, EVEX_CD8<_.EltSize, CD8VT1>, TAPD,
10899                Sched<[WriteVecExtractSt]>;
10900  }
10901}
10902
10903defm VPEXTRBZ : avx512_extract_elt_b<"vpextrb", v16i8x_info>, VEX_WIG;
10904defm VPEXTRWZ : avx512_extract_elt_w<"vpextrw", v8i16x_info>, VEX_WIG;
10905defm VPEXTRDZ : avx512_extract_elt_dq<"vpextrd", v4i32x_info, GR32>;
10906defm VPEXTRQZ : avx512_extract_elt_dq<"vpextrq", v2i64x_info, GR64>, VEX_W;
10907
10908multiclass avx512_insert_elt_m<bits<8> opc, string OpcodeStr, SDNode OpNode,
10909                                            X86VectorVTInfo _, PatFrag LdFrag,
10910                                            SDPatternOperator immoperator> {
10911  def rm : AVX512Ii8<opc, MRMSrcMem, (outs _.RC:$dst),
10912      (ins _.RC:$src1,  _.ScalarMemOp:$src2, u8imm:$src3),
10913      OpcodeStr#"\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}",
10914      [(set _.RC:$dst,
10915          (_.VT (OpNode _.RC:$src1, (LdFrag addr:$src2), immoperator:$src3)))]>,
10916      EVEX_4V, EVEX_CD8<_.EltSize, CD8VT1>, Sched<[WriteVecInsert.Folded, WriteVecInsert.ReadAfterFold]>;
10917}
10918
10919multiclass avx512_insert_elt_bw<bits<8> opc, string OpcodeStr, SDNode OpNode,
10920                                            X86VectorVTInfo _, PatFrag LdFrag> {
10921  let Predicates = [HasBWI] in {
10922    def rr : AVX512Ii8<opc, MRMSrcReg, (outs _.RC:$dst),
10923        (ins _.RC:$src1, GR32orGR64:$src2, u8imm:$src3),
10924        OpcodeStr#"\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}",
10925        [(set _.RC:$dst,
10926            (OpNode _.RC:$src1, GR32orGR64:$src2, timm:$src3))]>, EVEX_4V,
10927        Sched<[WriteVecInsert]>;
10928
10929    defm NAME : avx512_insert_elt_m<opc, OpcodeStr, OpNode, _, LdFrag, timm>;
10930  }
10931}
10932
10933multiclass avx512_insert_elt_dq<bits<8> opc, string OpcodeStr,
10934                                         X86VectorVTInfo _, RegisterClass GRC> {
10935  let Predicates = [HasDQI] in {
10936    def rr : AVX512Ii8<opc, MRMSrcReg, (outs _.RC:$dst),
10937        (ins _.RC:$src1, GRC:$src2, u8imm:$src3),
10938        OpcodeStr#"\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}",
10939        [(set _.RC:$dst,
10940            (_.VT (insertelt _.RC:$src1, GRC:$src2, imm:$src3)))]>,
10941        EVEX_4V, TAPD, Sched<[WriteVecInsert]>;
10942
10943    defm NAME : avx512_insert_elt_m<opc, OpcodeStr, insertelt, _,
10944                                    _.ScalarLdFrag, imm>, TAPD;
10945  }
10946}
10947
10948defm VPINSRBZ : avx512_insert_elt_bw<0x20, "vpinsrb", X86pinsrb, v16i8x_info,
10949                                     extloadi8>, TAPD, VEX_WIG;
10950defm VPINSRWZ : avx512_insert_elt_bw<0xC4, "vpinsrw", X86pinsrw, v8i16x_info,
10951                                     extloadi16>, PD, VEX_WIG;
10952defm VPINSRDZ : avx512_insert_elt_dq<0x22, "vpinsrd", v4i32x_info, GR32>;
10953defm VPINSRQZ : avx512_insert_elt_dq<0x22, "vpinsrq", v2i64x_info, GR64>, VEX_W;
10954
10955//===----------------------------------------------------------------------===//
10956// VSHUFPS - VSHUFPD Operations
10957//===----------------------------------------------------------------------===//
10958
10959multiclass avx512_shufp<string OpcodeStr, AVX512VLVectorVTInfo VTInfo_I,
10960                        AVX512VLVectorVTInfo VTInfo_FP>{
10961  defm NAME: avx512_common_3Op_imm8<OpcodeStr, VTInfo_FP, 0xC6, X86Shufp,
10962                                    SchedWriteFShuffle>,
10963                                    EVEX_CD8<VTInfo_FP.info512.EltSize, CD8VF>,
10964                                    AVX512AIi8Base, EVEX_4V;
10965}
10966
10967defm VSHUFPS: avx512_shufp<"vshufps", avx512vl_i32_info, avx512vl_f32_info>, PS;
10968defm VSHUFPD: avx512_shufp<"vshufpd", avx512vl_i64_info, avx512vl_f64_info>, PD, VEX_W;
10969
10970//===----------------------------------------------------------------------===//
10971// AVX-512 - Byte shift Left/Right
10972//===----------------------------------------------------------------------===//
10973
10974multiclass avx512_shift_packed<bits<8> opc, SDNode OpNode, Format MRMr,
10975                               Format MRMm, string OpcodeStr,
10976                               X86FoldableSchedWrite sched, X86VectorVTInfo _>{
10977  def ri : AVX512<opc, MRMr,
10978             (outs _.RC:$dst), (ins _.RC:$src1, u8imm:$src2),
10979             !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
10980             [(set _.RC:$dst,(_.VT (OpNode _.RC:$src1, (i8 timm:$src2))))]>,
10981             Sched<[sched]>;
10982  def mi : AVX512<opc, MRMm,
10983           (outs _.RC:$dst), (ins _.MemOp:$src1, u8imm:$src2),
10984           !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
10985           [(set _.RC:$dst,(_.VT (OpNode
10986                                 (_.VT (bitconvert (_.LdFrag addr:$src1))),
10987                                 (i8 timm:$src2))))]>,
10988           Sched<[sched.Folded, sched.ReadAfterFold]>;
10989}
10990
10991multiclass avx512_shift_packed_all<bits<8> opc, SDNode OpNode, Format MRMr,
10992                                   Format MRMm, string OpcodeStr,
10993                                   X86SchedWriteWidths sched, Predicate prd>{
10994  let Predicates = [prd] in
10995    defm Z : avx512_shift_packed<opc, OpNode, MRMr, MRMm, OpcodeStr,
10996                                 sched.ZMM, v64i8_info>, EVEX_V512;
10997  let Predicates = [prd, HasVLX] in {
10998    defm Z256 : avx512_shift_packed<opc, OpNode, MRMr, MRMm, OpcodeStr,
10999                                    sched.YMM, v32i8x_info>, EVEX_V256;
11000    defm Z128 : avx512_shift_packed<opc, OpNode, MRMr, MRMm, OpcodeStr,
11001                                    sched.XMM, v16i8x_info>, EVEX_V128;
11002  }
11003}
11004defm VPSLLDQ : avx512_shift_packed_all<0x73, X86vshldq, MRM7r, MRM7m, "vpslldq",
11005                                       SchedWriteShuffle, HasBWI>,
11006                                       AVX512PDIi8Base, EVEX_4V, VEX_WIG;
11007defm VPSRLDQ : avx512_shift_packed_all<0x73, X86vshrdq, MRM3r, MRM3m, "vpsrldq",
11008                                       SchedWriteShuffle, HasBWI>,
11009                                       AVX512PDIi8Base, EVEX_4V, VEX_WIG;
11010
11011multiclass avx512_psadbw_packed<bits<8> opc, SDNode OpNode,
11012                                string OpcodeStr, X86FoldableSchedWrite sched,
11013                                X86VectorVTInfo _dst, X86VectorVTInfo _src> {
11014  let isCommutable = 1 in
11015  def rr : AVX512BI<opc, MRMSrcReg,
11016             (outs _dst.RC:$dst), (ins _src.RC:$src1, _src.RC:$src2),
11017             !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
11018             [(set _dst.RC:$dst,(_dst.VT
11019                                (OpNode (_src.VT _src.RC:$src1),
11020                                        (_src.VT _src.RC:$src2))))]>,
11021             Sched<[sched]>;
11022  def rm : AVX512BI<opc, MRMSrcMem,
11023           (outs _dst.RC:$dst), (ins _src.RC:$src1, _src.MemOp:$src2),
11024           !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
11025           [(set _dst.RC:$dst,(_dst.VT
11026                              (OpNode (_src.VT _src.RC:$src1),
11027                              (_src.VT (bitconvert
11028                                        (_src.LdFrag addr:$src2))))))]>,
11029           Sched<[sched.Folded, sched.ReadAfterFold]>;
11030}
11031
11032multiclass avx512_psadbw_packed_all<bits<8> opc, SDNode OpNode,
11033                                    string OpcodeStr, X86SchedWriteWidths sched,
11034                                    Predicate prd> {
11035  let Predicates = [prd] in
11036    defm Z : avx512_psadbw_packed<opc, OpNode, OpcodeStr, sched.ZMM,
11037                                  v8i64_info, v64i8_info>, EVEX_V512;
11038  let Predicates = [prd, HasVLX] in {
11039    defm Z256 : avx512_psadbw_packed<opc, OpNode, OpcodeStr, sched.YMM,
11040                                     v4i64x_info, v32i8x_info>, EVEX_V256;
11041    defm Z128 : avx512_psadbw_packed<opc, OpNode, OpcodeStr, sched.XMM,
11042                                     v2i64x_info, v16i8x_info>, EVEX_V128;
11043  }
11044}
11045
11046defm VPSADBW : avx512_psadbw_packed_all<0xf6, X86psadbw, "vpsadbw",
11047                                        SchedWritePSADBW, HasBWI>, EVEX_4V, VEX_WIG;
11048
11049// Transforms to swizzle an immediate to enable better matching when
11050// memory operand isn't in the right place.
11051def VPTERNLOG321_imm8 : SDNodeXForm<timm, [{
11052  // Convert a VPTERNLOG immediate by swapping operand 0 and operand 2.
11053  uint8_t Imm = N->getZExtValue();
11054  // Swap bits 1/4 and 3/6.
11055  uint8_t NewImm = Imm & 0xa5;
11056  if (Imm & 0x02) NewImm |= 0x10;
11057  if (Imm & 0x10) NewImm |= 0x02;
11058  if (Imm & 0x08) NewImm |= 0x40;
11059  if (Imm & 0x40) NewImm |= 0x08;
11060  return getI8Imm(NewImm, SDLoc(N));
11061}]>;
11062def VPTERNLOG213_imm8 : SDNodeXForm<timm, [{
11063  // Convert a VPTERNLOG immediate by swapping operand 1 and operand 2.
11064  uint8_t Imm = N->getZExtValue();
11065  // Swap bits 2/4 and 3/5.
11066  uint8_t NewImm = Imm & 0xc3;
11067  if (Imm & 0x04) NewImm |= 0x10;
11068  if (Imm & 0x10) NewImm |= 0x04;
11069  if (Imm & 0x08) NewImm |= 0x20;
11070  if (Imm & 0x20) NewImm |= 0x08;
11071  return getI8Imm(NewImm, SDLoc(N));
11072}]>;
11073def VPTERNLOG132_imm8 : SDNodeXForm<timm, [{
11074  // Convert a VPTERNLOG immediate by swapping operand 1 and operand 2.
11075  uint8_t Imm = N->getZExtValue();
11076  // Swap bits 1/2 and 5/6.
11077  uint8_t NewImm = Imm & 0x99;
11078  if (Imm & 0x02) NewImm |= 0x04;
11079  if (Imm & 0x04) NewImm |= 0x02;
11080  if (Imm & 0x20) NewImm |= 0x40;
11081  if (Imm & 0x40) NewImm |= 0x20;
11082  return getI8Imm(NewImm, SDLoc(N));
11083}]>;
11084def VPTERNLOG231_imm8 : SDNodeXForm<timm, [{
11085  // Convert a VPTERNLOG immediate by moving operand 1 to the end.
11086  uint8_t Imm = N->getZExtValue();
11087  // Move bits 1->2, 2->4, 3->6, 4->1, 5->3, 6->5
11088  uint8_t NewImm = Imm & 0x81;
11089  if (Imm & 0x02) NewImm |= 0x04;
11090  if (Imm & 0x04) NewImm |= 0x10;
11091  if (Imm & 0x08) NewImm |= 0x40;
11092  if (Imm & 0x10) NewImm |= 0x02;
11093  if (Imm & 0x20) NewImm |= 0x08;
11094  if (Imm & 0x40) NewImm |= 0x20;
11095  return getI8Imm(NewImm, SDLoc(N));
11096}]>;
11097def VPTERNLOG312_imm8 : SDNodeXForm<timm, [{
11098  // Convert a VPTERNLOG immediate by moving operand 2 to the beginning.
11099  uint8_t Imm = N->getZExtValue();
11100  // Move bits 1->4, 2->1, 3->5, 4->2, 5->6, 6->3
11101  uint8_t NewImm = Imm & 0x81;
11102  if (Imm & 0x02) NewImm |= 0x10;
11103  if (Imm & 0x04) NewImm |= 0x02;
11104  if (Imm & 0x08) NewImm |= 0x20;
11105  if (Imm & 0x10) NewImm |= 0x04;
11106  if (Imm & 0x20) NewImm |= 0x40;
11107  if (Imm & 0x40) NewImm |= 0x08;
11108  return getI8Imm(NewImm, SDLoc(N));
11109}]>;
11110
11111multiclass avx512_ternlog<bits<8> opc, string OpcodeStr, SDNode OpNode,
11112                          X86FoldableSchedWrite sched, X86VectorVTInfo _,
11113                          string Name>{
11114  let Constraints = "$src1 = $dst", ExeDomain = _.ExeDomain in {
11115  defm rri : AVX512_maskable_3src<opc, MRMSrcReg, _, (outs _.RC:$dst),
11116                      (ins _.RC:$src2, _.RC:$src3, u8imm:$src4),
11117                      OpcodeStr, "$src4, $src3, $src2", "$src2, $src3, $src4",
11118                      (OpNode (_.VT _.RC:$src1),
11119                              (_.VT _.RC:$src2),
11120                              (_.VT _.RC:$src3),
11121                              (i8 timm:$src4)), 1, 1>,
11122                      AVX512AIi8Base, EVEX_4V, Sched<[sched]>;
11123  defm rmi : AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst),
11124                    (ins _.RC:$src2, _.MemOp:$src3, u8imm:$src4),
11125                    OpcodeStr, "$src4, $src3, $src2", "$src2, $src3, $src4",
11126                    (OpNode (_.VT _.RC:$src1),
11127                            (_.VT _.RC:$src2),
11128                            (_.VT (bitconvert (_.LdFrag addr:$src3))),
11129                            (i8 timm:$src4)), 1, 0>,
11130                    AVX512AIi8Base, EVEX_4V, EVEX_CD8<_.EltSize, CD8VF>,
11131                    Sched<[sched.Folded, sched.ReadAfterFold]>;
11132  defm rmbi : AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst),
11133                    (ins _.RC:$src2, _.ScalarMemOp:$src3, u8imm:$src4),
11134                    OpcodeStr, "$src4, ${src3}"#_.BroadcastStr#", $src2",
11135                    "$src2, ${src3}"#_.BroadcastStr#", $src4",
11136                    (OpNode (_.VT _.RC:$src1),
11137                            (_.VT _.RC:$src2),
11138                            (_.VT (_.BroadcastLdFrag addr:$src3)),
11139                            (i8 timm:$src4)), 1, 0>, EVEX_B,
11140                    AVX512AIi8Base, EVEX_4V, EVEX_CD8<_.EltSize, CD8VF>,
11141                    Sched<[sched.Folded, sched.ReadAfterFold]>;
11142  }// Constraints = "$src1 = $dst"
11143
11144  // Additional patterns for matching passthru operand in other positions.
11145  def : Pat<(_.VT (vselect_mask _.KRCWM:$mask,
11146                   (OpNode _.RC:$src3, _.RC:$src2, _.RC:$src1, (i8 timm:$src4)),
11147                   _.RC:$src1)),
11148            (!cast<Instruction>(Name#_.ZSuffix#rrik) _.RC:$src1, _.KRCWM:$mask,
11149             _.RC:$src2, _.RC:$src3, (VPTERNLOG321_imm8 timm:$src4))>;
11150  def : Pat<(_.VT (vselect_mask _.KRCWM:$mask,
11151                   (OpNode _.RC:$src2, _.RC:$src1, _.RC:$src3, (i8 timm:$src4)),
11152                   _.RC:$src1)),
11153            (!cast<Instruction>(Name#_.ZSuffix#rrik) _.RC:$src1, _.KRCWM:$mask,
11154             _.RC:$src2, _.RC:$src3, (VPTERNLOG213_imm8 timm:$src4))>;
11155
11156  // Additional patterns for matching zero masking with loads in other
11157  // positions.
11158  def : Pat<(_.VT (vselect_mask _.KRCWM:$mask,
11159                   (OpNode (bitconvert (_.LdFrag addr:$src3)),
11160                    _.RC:$src2, _.RC:$src1, (i8 timm:$src4)),
11161                   _.ImmAllZerosV)),
11162            (!cast<Instruction>(Name#_.ZSuffix#rmikz) _.RC:$src1, _.KRCWM:$mask,
11163             _.RC:$src2, addr:$src3, (VPTERNLOG321_imm8 timm:$src4))>;
11164  def : Pat<(_.VT (vselect_mask _.KRCWM:$mask,
11165                   (OpNode _.RC:$src1, (bitconvert (_.LdFrag addr:$src3)),
11166                    _.RC:$src2, (i8 timm:$src4)),
11167                   _.ImmAllZerosV)),
11168            (!cast<Instruction>(Name#_.ZSuffix#rmikz) _.RC:$src1, _.KRCWM:$mask,
11169             _.RC:$src2, addr:$src3, (VPTERNLOG132_imm8 timm:$src4))>;
11170
11171  // Additional patterns for matching masked loads with different
11172  // operand orders.
11173  def : Pat<(_.VT (vselect_mask _.KRCWM:$mask,
11174                   (OpNode _.RC:$src1, (bitconvert (_.LdFrag addr:$src3)),
11175                    _.RC:$src2, (i8 timm:$src4)),
11176                   _.RC:$src1)),
11177            (!cast<Instruction>(Name#_.ZSuffix#rmik) _.RC:$src1, _.KRCWM:$mask,
11178             _.RC:$src2, addr:$src3, (VPTERNLOG132_imm8 timm:$src4))>;
11179  def : Pat<(_.VT (vselect_mask _.KRCWM:$mask,
11180                   (OpNode (bitconvert (_.LdFrag addr:$src3)),
11181                    _.RC:$src2, _.RC:$src1, (i8 timm:$src4)),
11182                   _.RC:$src1)),
11183            (!cast<Instruction>(Name#_.ZSuffix#rmik) _.RC:$src1, _.KRCWM:$mask,
11184             _.RC:$src2, addr:$src3, (VPTERNLOG321_imm8 timm:$src4))>;
11185  def : Pat<(_.VT (vselect_mask _.KRCWM:$mask,
11186                   (OpNode _.RC:$src2, _.RC:$src1,
11187                    (bitconvert (_.LdFrag addr:$src3)), (i8 timm:$src4)),
11188                   _.RC:$src1)),
11189            (!cast<Instruction>(Name#_.ZSuffix#rmik) _.RC:$src1, _.KRCWM:$mask,
11190             _.RC:$src2, addr:$src3, (VPTERNLOG213_imm8 timm:$src4))>;
11191  def : Pat<(_.VT (vselect_mask _.KRCWM:$mask,
11192                   (OpNode _.RC:$src2, (bitconvert (_.LdFrag addr:$src3)),
11193                    _.RC:$src1, (i8 timm:$src4)),
11194                   _.RC:$src1)),
11195            (!cast<Instruction>(Name#_.ZSuffix#rmik) _.RC:$src1, _.KRCWM:$mask,
11196             _.RC:$src2, addr:$src3, (VPTERNLOG231_imm8 timm:$src4))>;
11197  def : Pat<(_.VT (vselect_mask _.KRCWM:$mask,
11198                   (OpNode (bitconvert (_.LdFrag addr:$src3)),
11199                    _.RC:$src1, _.RC:$src2, (i8 timm:$src4)),
11200                   _.RC:$src1)),
11201            (!cast<Instruction>(Name#_.ZSuffix#rmik) _.RC:$src1, _.KRCWM:$mask,
11202             _.RC:$src2, addr:$src3, (VPTERNLOG312_imm8 timm:$src4))>;
11203
11204  // Additional patterns for matching zero masking with broadcasts in other
11205  // positions.
11206  def : Pat<(_.VT (vselect_mask _.KRCWM:$mask,
11207                   (OpNode (_.BroadcastLdFrag addr:$src3),
11208                    _.RC:$src2, _.RC:$src1, (i8 timm:$src4)),
11209                   _.ImmAllZerosV)),
11210            (!cast<Instruction>(Name#_.ZSuffix#rmbikz) _.RC:$src1,
11211             _.KRCWM:$mask, _.RC:$src2, addr:$src3,
11212             (VPTERNLOG321_imm8 timm:$src4))>;
11213  def : Pat<(_.VT (vselect_mask _.KRCWM:$mask,
11214                   (OpNode _.RC:$src1,
11215                    (_.BroadcastLdFrag addr:$src3),
11216                    _.RC:$src2, (i8 timm:$src4)),
11217                   _.ImmAllZerosV)),
11218            (!cast<Instruction>(Name#_.ZSuffix#rmbikz) _.RC:$src1,
11219             _.KRCWM:$mask, _.RC:$src2, addr:$src3,
11220             (VPTERNLOG132_imm8 timm:$src4))>;
11221
11222  // Additional patterns for matching masked broadcasts with different
11223  // operand orders.
11224  def : Pat<(_.VT (vselect_mask _.KRCWM:$mask,
11225                   (OpNode _.RC:$src1, (_.BroadcastLdFrag addr:$src3),
11226                    _.RC:$src2, (i8 timm:$src4)),
11227                   _.RC:$src1)),
11228            (!cast<Instruction>(Name#_.ZSuffix#rmbik) _.RC:$src1, _.KRCWM:$mask,
11229             _.RC:$src2, addr:$src3, (VPTERNLOG132_imm8 timm:$src4))>;
11230  def : Pat<(_.VT (vselect_mask _.KRCWM:$mask,
11231                   (OpNode (_.BroadcastLdFrag addr:$src3),
11232                    _.RC:$src2, _.RC:$src1, (i8 timm:$src4)),
11233                   _.RC:$src1)),
11234            (!cast<Instruction>(Name#_.ZSuffix#rmbik) _.RC:$src1, _.KRCWM:$mask,
11235             _.RC:$src2, addr:$src3, (VPTERNLOG321_imm8 timm:$src4))>;
11236  def : Pat<(_.VT (vselect_mask _.KRCWM:$mask,
11237                   (OpNode _.RC:$src2, _.RC:$src1,
11238                    (_.BroadcastLdFrag addr:$src3),
11239                    (i8 timm:$src4)), _.RC:$src1)),
11240            (!cast<Instruction>(Name#_.ZSuffix#rmbik) _.RC:$src1, _.KRCWM:$mask,
11241             _.RC:$src2, addr:$src3, (VPTERNLOG213_imm8 timm:$src4))>;
11242  def : Pat<(_.VT (vselect_mask _.KRCWM:$mask,
11243                   (OpNode _.RC:$src2,
11244                    (_.BroadcastLdFrag addr:$src3),
11245                    _.RC:$src1, (i8 timm:$src4)),
11246                   _.RC:$src1)),
11247            (!cast<Instruction>(Name#_.ZSuffix#rmbik) _.RC:$src1, _.KRCWM:$mask,
11248             _.RC:$src2, addr:$src3, (VPTERNLOG231_imm8 timm:$src4))>;
11249  def : Pat<(_.VT (vselect_mask _.KRCWM:$mask,
11250                   (OpNode (_.BroadcastLdFrag addr:$src3),
11251                    _.RC:$src1, _.RC:$src2, (i8 timm:$src4)),
11252                   _.RC:$src1)),
11253            (!cast<Instruction>(Name#_.ZSuffix#rmbik) _.RC:$src1, _.KRCWM:$mask,
11254             _.RC:$src2, addr:$src3, (VPTERNLOG312_imm8 timm:$src4))>;
11255}
11256
11257multiclass avx512_common_ternlog<string OpcodeStr, X86SchedWriteWidths sched,
11258                                 AVX512VLVectorVTInfo _> {
11259  let Predicates = [HasAVX512] in
11260    defm Z    : avx512_ternlog<0x25, OpcodeStr, X86vpternlog, sched.ZMM,
11261                               _.info512, NAME>, EVEX_V512;
11262  let Predicates = [HasAVX512, HasVLX] in {
11263    defm Z128 : avx512_ternlog<0x25, OpcodeStr, X86vpternlog, sched.XMM,
11264                               _.info128, NAME>, EVEX_V128;
11265    defm Z256 : avx512_ternlog<0x25, OpcodeStr, X86vpternlog, sched.YMM,
11266                               _.info256, NAME>, EVEX_V256;
11267  }
11268}
11269
11270defm VPTERNLOGD : avx512_common_ternlog<"vpternlogd", SchedWriteVecALU,
11271                                        avx512vl_i32_info>;
11272defm VPTERNLOGQ : avx512_common_ternlog<"vpternlogq", SchedWriteVecALU,
11273                                        avx512vl_i64_info>, VEX_W;
11274
11275// Patterns to implement vnot using vpternlog instead of creating all ones
11276// using pcmpeq or vpternlog and then xoring with that. The value 15 is chosen
11277// so that the result is only dependent on src0. But we use the same source
11278// for all operands to prevent a false dependency.
11279// TODO: We should maybe have a more generalized algorithm for folding to
11280// vpternlog.
11281let Predicates = [HasAVX512] in {
11282  def : Pat<(v64i8 (vnot VR512:$src)),
11283            (VPTERNLOGQZrri VR512:$src, VR512:$src, VR512:$src, (i8 15))>;
11284  def : Pat<(v32i16 (vnot VR512:$src)),
11285            (VPTERNLOGQZrri VR512:$src, VR512:$src, VR512:$src, (i8 15))>;
11286  def : Pat<(v16i32 (vnot VR512:$src)),
11287            (VPTERNLOGQZrri VR512:$src, VR512:$src, VR512:$src, (i8 15))>;
11288  def : Pat<(v8i64 (vnot VR512:$src)),
11289            (VPTERNLOGQZrri VR512:$src, VR512:$src, VR512:$src, (i8 15))>;
11290}
11291
11292let Predicates = [HasAVX512, NoVLX] in {
11293  def : Pat<(v16i8 (vnot VR128X:$src)),
11294            (EXTRACT_SUBREG
11295             (VPTERNLOGQZrri
11296              (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
11297              (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
11298              (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
11299              (i8 15)), sub_xmm)>;
11300  def : Pat<(v8i16 (vnot VR128X:$src)),
11301            (EXTRACT_SUBREG
11302             (VPTERNLOGQZrri
11303              (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
11304              (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
11305              (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
11306              (i8 15)), sub_xmm)>;
11307  def : Pat<(v4i32 (vnot VR128X:$src)),
11308            (EXTRACT_SUBREG
11309             (VPTERNLOGQZrri
11310              (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
11311              (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
11312              (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
11313              (i8 15)), sub_xmm)>;
11314  def : Pat<(v2i64 (vnot VR128X:$src)),
11315            (EXTRACT_SUBREG
11316             (VPTERNLOGQZrri
11317              (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
11318              (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
11319              (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
11320              (i8 15)), sub_xmm)>;
11321
11322  def : Pat<(v32i8 (vnot VR256X:$src)),
11323            (EXTRACT_SUBREG
11324             (VPTERNLOGQZrri
11325              (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src, sub_ymm),
11326              (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src, sub_ymm),
11327              (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src, sub_ymm),
11328              (i8 15)), sub_ymm)>;
11329  def : Pat<(v16i16 (vnot VR256X:$src)),
11330            (EXTRACT_SUBREG
11331             (VPTERNLOGQZrri
11332              (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src, sub_ymm),
11333              (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src, sub_ymm),
11334              (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src, sub_ymm),
11335              (i8 15)), sub_ymm)>;
11336  def : Pat<(v8i32 (vnot VR256X:$src)),
11337            (EXTRACT_SUBREG
11338             (VPTERNLOGQZrri
11339              (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src, sub_ymm),
11340              (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src, sub_ymm),
11341              (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src, sub_ymm),
11342              (i8 15)), sub_ymm)>;
11343  def : Pat<(v4i64 (vnot VR256X:$src)),
11344            (EXTRACT_SUBREG
11345             (VPTERNLOGQZrri
11346              (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src, sub_ymm),
11347              (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src, sub_ymm),
11348              (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src, sub_ymm),
11349              (i8 15)), sub_ymm)>;
11350}
11351
11352let Predicates = [HasVLX] in {
11353  def : Pat<(v16i8 (vnot VR128X:$src)),
11354            (VPTERNLOGQZ128rri VR128X:$src, VR128X:$src, VR128X:$src, (i8 15))>;
11355  def : Pat<(v8i16 (vnot VR128X:$src)),
11356            (VPTERNLOGQZ128rri VR128X:$src, VR128X:$src, VR128X:$src, (i8 15))>;
11357  def : Pat<(v4i32 (vnot VR128X:$src)),
11358            (VPTERNLOGQZ128rri VR128X:$src, VR128X:$src, VR128X:$src, (i8 15))>;
11359  def : Pat<(v2i64 (vnot VR128X:$src)),
11360            (VPTERNLOGQZ128rri VR128X:$src, VR128X:$src, VR128X:$src, (i8 15))>;
11361
11362  def : Pat<(v32i8 (vnot VR256X:$src)),
11363            (VPTERNLOGQZ256rri VR256X:$src, VR256X:$src, VR256X:$src, (i8 15))>;
11364  def : Pat<(v16i16 (vnot VR256X:$src)),
11365            (VPTERNLOGQZ256rri VR256X:$src, VR256X:$src, VR256X:$src, (i8 15))>;
11366  def : Pat<(v8i32 (vnot VR256X:$src)),
11367            (VPTERNLOGQZ256rri VR256X:$src, VR256X:$src, VR256X:$src, (i8 15))>;
11368  def : Pat<(v4i64 (vnot VR256X:$src)),
11369            (VPTERNLOGQZ256rri VR256X:$src, VR256X:$src, VR256X:$src, (i8 15))>;
11370}
11371
11372//===----------------------------------------------------------------------===//
11373// AVX-512 - FixupImm
11374//===----------------------------------------------------------------------===//
11375
11376multiclass avx512_fixupimm_packed<bits<8> opc, string OpcodeStr,
11377                                  X86FoldableSchedWrite sched, X86VectorVTInfo _,
11378                                  X86VectorVTInfo TblVT>{
11379  let Constraints = "$src1 = $dst", ExeDomain = _.ExeDomain,
11380      Uses = [MXCSR], mayRaiseFPException = 1 in {
11381    defm rri : AVX512_maskable_3src<opc, MRMSrcReg, _, (outs _.RC:$dst),
11382                        (ins _.RC:$src2, _.RC:$src3, i32u8imm:$src4),
11383                         OpcodeStr#_.Suffix, "$src4, $src3, $src2", "$src2, $src3, $src4",
11384                        (X86VFixupimm (_.VT _.RC:$src1),
11385                                      (_.VT _.RC:$src2),
11386                                      (TblVT.VT _.RC:$src3),
11387                                      (i32 timm:$src4))>, Sched<[sched]>;
11388    defm rmi : AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst),
11389                      (ins _.RC:$src2, _.MemOp:$src3, i32u8imm:$src4),
11390                      OpcodeStr#_.Suffix, "$src4, $src3, $src2", "$src2, $src3, $src4",
11391                      (X86VFixupimm (_.VT _.RC:$src1),
11392                                    (_.VT _.RC:$src2),
11393                                    (TblVT.VT (bitconvert (TblVT.LdFrag addr:$src3))),
11394                                    (i32 timm:$src4))>,
11395                      Sched<[sched.Folded, sched.ReadAfterFold]>;
11396    defm rmbi : AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst),
11397                      (ins _.RC:$src2, _.ScalarMemOp:$src3, i32u8imm:$src4),
11398                    OpcodeStr#_.Suffix, "$src4, ${src3}"#_.BroadcastStr#", $src2",
11399                    "$src2, ${src3}"#_.BroadcastStr#", $src4",
11400                      (X86VFixupimm (_.VT _.RC:$src1),
11401                                    (_.VT _.RC:$src2),
11402                                    (TblVT.VT (TblVT.BroadcastLdFrag addr:$src3)),
11403                                    (i32 timm:$src4))>,
11404                    EVEX_B, Sched<[sched.Folded, sched.ReadAfterFold]>;
11405  } // Constraints = "$src1 = $dst"
11406}
11407
11408multiclass avx512_fixupimm_packed_sae<bits<8> opc, string OpcodeStr,
11409                                      X86FoldableSchedWrite sched,
11410                                      X86VectorVTInfo _, X86VectorVTInfo TblVT>
11411  : avx512_fixupimm_packed<opc, OpcodeStr, sched, _, TblVT> {
11412let Constraints = "$src1 = $dst", ExeDomain = _.ExeDomain, Uses = [MXCSR] in {
11413  defm rrib : AVX512_maskable_3src<opc, MRMSrcReg, _, (outs _.RC:$dst),
11414                      (ins _.RC:$src2, _.RC:$src3, i32u8imm:$src4),
11415                      OpcodeStr#_.Suffix, "$src4, {sae}, $src3, $src2",
11416                      "$src2, $src3, {sae}, $src4",
11417                      (X86VFixupimmSAE (_.VT _.RC:$src1),
11418                                       (_.VT _.RC:$src2),
11419                                       (TblVT.VT _.RC:$src3),
11420                                       (i32 timm:$src4))>,
11421                      EVEX_B, Sched<[sched]>;
11422  }
11423}
11424
11425multiclass avx512_fixupimm_scalar<bits<8> opc, string OpcodeStr,
11426                                  X86FoldableSchedWrite sched, X86VectorVTInfo _,
11427                                  X86VectorVTInfo _src3VT> {
11428  let Constraints = "$src1 = $dst" , Predicates = [HasAVX512],
11429      ExeDomain = _.ExeDomain in {
11430    defm rri : AVX512_maskable_3src_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
11431                      (ins _.RC:$src2, _.RC:$src3, i32u8imm:$src4),
11432                      OpcodeStr#_.Suffix, "$src4, $src3, $src2", "$src2, $src3, $src4",
11433                      (X86VFixupimms (_.VT _.RC:$src1),
11434                                     (_.VT _.RC:$src2),
11435                                     (_src3VT.VT _src3VT.RC:$src3),
11436                                     (i32 timm:$src4))>, Sched<[sched]>, SIMD_EXC;
11437    let Uses = [MXCSR] in
11438    defm rrib : AVX512_maskable_3src_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
11439                      (ins _.RC:$src2, _.RC:$src3, i32u8imm:$src4),
11440                      OpcodeStr#_.Suffix, "$src4, {sae}, $src3, $src2",
11441                      "$src2, $src3, {sae}, $src4",
11442                      (X86VFixupimmSAEs (_.VT _.RC:$src1),
11443                                        (_.VT _.RC:$src2),
11444                                        (_src3VT.VT _src3VT.RC:$src3),
11445                                        (i32 timm:$src4))>,
11446                      EVEX_B, Sched<[sched.Folded, sched.ReadAfterFold]>;
11447    defm rmi : AVX512_maskable_3src_scalar<opc, MRMSrcMem, _, (outs _.RC:$dst),
11448                     (ins _.RC:$src2, _.ScalarMemOp:$src3, i32u8imm:$src4),
11449                     OpcodeStr#_.Suffix, "$src4, $src3, $src2", "$src2, $src3, $src4",
11450                     (X86VFixupimms (_.VT _.RC:$src1),
11451                                    (_.VT _.RC:$src2),
11452                                    (_src3VT.VT (scalar_to_vector
11453                                              (_src3VT.ScalarLdFrag addr:$src3))),
11454                                    (i32 timm:$src4))>,
11455                     Sched<[sched.Folded, sched.ReadAfterFold]>, SIMD_EXC;
11456  }
11457}
11458
11459multiclass avx512_fixupimm_packed_all<X86SchedWriteWidths sched,
11460                                      AVX512VLVectorVTInfo _Vec,
11461                                      AVX512VLVectorVTInfo _Tbl> {
11462  let Predicates = [HasAVX512] in
11463    defm Z    : avx512_fixupimm_packed_sae<0x54, "vfixupimm", sched.ZMM,
11464                                _Vec.info512, _Tbl.info512>, AVX512AIi8Base,
11465                                EVEX_4V, EVEX_V512;
11466  let Predicates = [HasAVX512, HasVLX] in {
11467    defm Z128 : avx512_fixupimm_packed<0x54, "vfixupimm", sched.XMM,
11468                            _Vec.info128, _Tbl.info128>, AVX512AIi8Base,
11469                            EVEX_4V, EVEX_V128;
11470    defm Z256 : avx512_fixupimm_packed<0x54, "vfixupimm", sched.YMM,
11471                            _Vec.info256, _Tbl.info256>, AVX512AIi8Base,
11472                            EVEX_4V, EVEX_V256;
11473  }
11474}
11475
11476defm VFIXUPIMMSSZ : avx512_fixupimm_scalar<0x55, "vfixupimm",
11477                                           SchedWriteFAdd.Scl, f32x_info, v4i32x_info>,
11478                          AVX512AIi8Base, VEX_LIG, EVEX_4V, EVEX_CD8<32, CD8VT1>;
11479defm VFIXUPIMMSDZ : avx512_fixupimm_scalar<0x55, "vfixupimm",
11480                                           SchedWriteFAdd.Scl, f64x_info, v2i64x_info>,
11481                          AVX512AIi8Base, VEX_LIG, EVEX_4V, EVEX_CD8<64, CD8VT1>, VEX_W;
11482defm VFIXUPIMMPS : avx512_fixupimm_packed_all<SchedWriteFAdd, avx512vl_f32_info,
11483                         avx512vl_i32_info>, EVEX_CD8<32, CD8VF>;
11484defm VFIXUPIMMPD : avx512_fixupimm_packed_all<SchedWriteFAdd, avx512vl_f64_info,
11485                         avx512vl_i64_info>, EVEX_CD8<64, CD8VF>, VEX_W;
11486
11487// Patterns used to select SSE scalar fp arithmetic instructions from
11488// either:
11489//
11490// (1) a scalar fp operation followed by a blend
11491//
11492// The effect is that the backend no longer emits unnecessary vector
11493// insert instructions immediately after SSE scalar fp instructions
11494// like addss or mulss.
11495//
11496// For example, given the following code:
11497//   __m128 foo(__m128 A, __m128 B) {
11498//     A[0] += B[0];
11499//     return A;
11500//   }
11501//
11502// Previously we generated:
11503//   addss %xmm0, %xmm1
11504//   movss %xmm1, %xmm0
11505//
11506// We now generate:
11507//   addss %xmm1, %xmm0
11508//
11509// (2) a vector packed single/double fp operation followed by a vector insert
11510//
11511// The effect is that the backend converts the packed fp instruction
11512// followed by a vector insert into a single SSE scalar fp instruction.
11513//
11514// For example, given the following code:
11515//   __m128 foo(__m128 A, __m128 B) {
11516//     __m128 C = A + B;
11517//     return (__m128) {c[0], a[1], a[2], a[3]};
11518//   }
11519//
11520// Previously we generated:
11521//   addps %xmm0, %xmm1
11522//   movss %xmm1, %xmm0
11523//
11524// We now generate:
11525//   addss %xmm1, %xmm0
11526
11527// TODO: Some canonicalization in lowering would simplify the number of
11528// patterns we have to try to match.
11529multiclass AVX512_scalar_math_fp_patterns<SDPatternOperator Op, SDNode MaskedOp,
11530                                          string OpcPrefix, SDNode MoveNode,
11531                                          X86VectorVTInfo _, PatLeaf ZeroFP> {
11532  let Predicates = [HasAVX512] in {
11533    // extracted scalar math op with insert via movss
11534    def : Pat<(MoveNode
11535               (_.VT VR128X:$dst),
11536               (_.VT (scalar_to_vector
11537                      (Op (_.EltVT (extractelt (_.VT VR128X:$dst), (iPTR 0))),
11538                          _.FRC:$src)))),
11539              (!cast<Instruction>("V"#OpcPrefix#"Zrr_Int") _.VT:$dst,
11540               (_.VT (COPY_TO_REGCLASS _.FRC:$src, VR128X)))>;
11541    def : Pat<(MoveNode
11542               (_.VT VR128X:$dst),
11543               (_.VT (scalar_to_vector
11544                      (Op (_.EltVT (extractelt (_.VT VR128X:$dst), (iPTR 0))),
11545                          (_.ScalarLdFrag addr:$src))))),
11546              (!cast<Instruction>("V"#OpcPrefix#"Zrm_Int") _.VT:$dst, addr:$src)>;
11547
11548    // extracted masked scalar math op with insert via movss
11549    def : Pat<(MoveNode (_.VT VR128X:$src1),
11550               (scalar_to_vector
11551                (X86selects_mask VK1WM:$mask,
11552                            (MaskedOp (_.EltVT
11553                                       (extractelt (_.VT VR128X:$src1), (iPTR 0))),
11554                                      _.FRC:$src2),
11555                            _.FRC:$src0))),
11556              (!cast<Instruction>("V"#OpcPrefix#"Zrr_Intk")
11557               (_.VT (COPY_TO_REGCLASS _.FRC:$src0, VR128X)),
11558               VK1WM:$mask, _.VT:$src1,
11559               (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)))>;
11560    def : Pat<(MoveNode (_.VT VR128X:$src1),
11561               (scalar_to_vector
11562                (X86selects_mask VK1WM:$mask,
11563                            (MaskedOp (_.EltVT
11564                                       (extractelt (_.VT VR128X:$src1), (iPTR 0))),
11565                                      (_.ScalarLdFrag addr:$src2)),
11566                            _.FRC:$src0))),
11567              (!cast<Instruction>("V"#OpcPrefix#"Zrm_Intk")
11568               (_.VT (COPY_TO_REGCLASS _.FRC:$src0, VR128X)),
11569               VK1WM:$mask, _.VT:$src1, addr:$src2)>;
11570
11571    // extracted masked scalar math op with insert via movss
11572    def : Pat<(MoveNode (_.VT VR128X:$src1),
11573               (scalar_to_vector
11574                (X86selects_mask VK1WM:$mask,
11575                            (MaskedOp (_.EltVT
11576                                       (extractelt (_.VT VR128X:$src1), (iPTR 0))),
11577                                      _.FRC:$src2), (_.EltVT ZeroFP)))),
11578      (!cast<I>("V"#OpcPrefix#"Zrr_Intkz")
11579          VK1WM:$mask, _.VT:$src1,
11580          (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)))>;
11581    def : Pat<(MoveNode (_.VT VR128X:$src1),
11582               (scalar_to_vector
11583                (X86selects_mask VK1WM:$mask,
11584                            (MaskedOp (_.EltVT
11585                                       (extractelt (_.VT VR128X:$src1), (iPTR 0))),
11586                                      (_.ScalarLdFrag addr:$src2)), (_.EltVT ZeroFP)))),
11587      (!cast<I>("V"#OpcPrefix#"Zrm_Intkz") VK1WM:$mask, _.VT:$src1, addr:$src2)>;
11588  }
11589}
11590
11591defm : AVX512_scalar_math_fp_patterns<any_fadd, fadd, "ADDSS", X86Movss, v4f32x_info, fp32imm0>;
11592defm : AVX512_scalar_math_fp_patterns<any_fsub, fsub, "SUBSS", X86Movss, v4f32x_info, fp32imm0>;
11593defm : AVX512_scalar_math_fp_patterns<any_fmul, fmul, "MULSS", X86Movss, v4f32x_info, fp32imm0>;
11594defm : AVX512_scalar_math_fp_patterns<any_fdiv, fdiv, "DIVSS", X86Movss, v4f32x_info, fp32imm0>;
11595
11596defm : AVX512_scalar_math_fp_patterns<any_fadd, fadd, "ADDSD", X86Movsd, v2f64x_info, fp64imm0>;
11597defm : AVX512_scalar_math_fp_patterns<any_fsub, fsub, "SUBSD", X86Movsd, v2f64x_info, fp64imm0>;
11598defm : AVX512_scalar_math_fp_patterns<any_fmul, fmul, "MULSD", X86Movsd, v2f64x_info, fp64imm0>;
11599defm : AVX512_scalar_math_fp_patterns<any_fdiv, fdiv, "DIVSD", X86Movsd, v2f64x_info, fp64imm0>;
11600
11601multiclass AVX512_scalar_unary_math_patterns<SDPatternOperator OpNode, string OpcPrefix,
11602                                             SDNode Move, X86VectorVTInfo _> {
11603  let Predicates = [HasAVX512] in {
11604    def : Pat<(_.VT (Move _.VT:$dst,
11605                     (scalar_to_vector (OpNode (extractelt _.VT:$src, 0))))),
11606              (!cast<Instruction>("V"#OpcPrefix#"Zr_Int") _.VT:$dst, _.VT:$src)>;
11607  }
11608}
11609
11610defm : AVX512_scalar_unary_math_patterns<any_fsqrt, "SQRTSS", X86Movss, v4f32x_info>;
11611defm : AVX512_scalar_unary_math_patterns<any_fsqrt, "SQRTSD", X86Movsd, v2f64x_info>;
11612
11613//===----------------------------------------------------------------------===//
11614// AES instructions
11615//===----------------------------------------------------------------------===//
11616
11617multiclass avx512_vaes<bits<8> Op, string OpStr, string IntPrefix> {
11618  let Predicates = [HasVLX, HasVAES] in {
11619    defm Z128 : AESI_binop_rm_int<Op, OpStr,
11620                                  !cast<Intrinsic>(IntPrefix),
11621                                  loadv2i64, 0, VR128X, i128mem>,
11622                  EVEX_4V, EVEX_CD8<64, CD8VF>, EVEX_V128, VEX_WIG;
11623    defm Z256 : AESI_binop_rm_int<Op, OpStr,
11624                                  !cast<Intrinsic>(IntPrefix#"_256"),
11625                                  loadv4i64, 0, VR256X, i256mem>,
11626                  EVEX_4V, EVEX_CD8<64, CD8VF>, EVEX_V256, VEX_WIG;
11627    }
11628    let Predicates = [HasAVX512, HasVAES] in
11629    defm Z    : AESI_binop_rm_int<Op, OpStr,
11630                                  !cast<Intrinsic>(IntPrefix#"_512"),
11631                                  loadv8i64, 0, VR512, i512mem>,
11632                  EVEX_4V, EVEX_CD8<64, CD8VF>, EVEX_V512, VEX_WIG;
11633}
11634
11635defm VAESENC      : avx512_vaes<0xDC, "vaesenc", "int_x86_aesni_aesenc">;
11636defm VAESENCLAST  : avx512_vaes<0xDD, "vaesenclast", "int_x86_aesni_aesenclast">;
11637defm VAESDEC      : avx512_vaes<0xDE, "vaesdec", "int_x86_aesni_aesdec">;
11638defm VAESDECLAST  : avx512_vaes<0xDF, "vaesdeclast", "int_x86_aesni_aesdeclast">;
11639
11640//===----------------------------------------------------------------------===//
11641// PCLMUL instructions - Carry less multiplication
11642//===----------------------------------------------------------------------===//
11643
11644let Predicates = [HasAVX512, HasVPCLMULQDQ] in
11645defm VPCLMULQDQZ : vpclmulqdq<VR512, i512mem, loadv8i64, int_x86_pclmulqdq_512>,
11646                              EVEX_4V, EVEX_V512, EVEX_CD8<64, CD8VF>, VEX_WIG;
11647
11648let Predicates = [HasVLX, HasVPCLMULQDQ] in {
11649defm VPCLMULQDQZ128 : vpclmulqdq<VR128X, i128mem, loadv2i64, int_x86_pclmulqdq>,
11650                              EVEX_4V, EVEX_V128, EVEX_CD8<64, CD8VF>, VEX_WIG;
11651
11652defm VPCLMULQDQZ256: vpclmulqdq<VR256X, i256mem, loadv4i64,
11653                                int_x86_pclmulqdq_256>, EVEX_4V, EVEX_V256,
11654                                EVEX_CD8<64, CD8VF>, VEX_WIG;
11655}
11656
11657// Aliases
11658defm : vpclmulqdq_aliases<"VPCLMULQDQZ", VR512, i512mem>;
11659defm : vpclmulqdq_aliases<"VPCLMULQDQZ128", VR128X, i128mem>;
11660defm : vpclmulqdq_aliases<"VPCLMULQDQZ256", VR256X, i256mem>;
11661
11662//===----------------------------------------------------------------------===//
11663// VBMI2
11664//===----------------------------------------------------------------------===//
11665
11666multiclass VBMI2_shift_var_rm<bits<8> Op, string OpStr, SDNode OpNode,
11667                              X86FoldableSchedWrite sched, X86VectorVTInfo VTI> {
11668  let Constraints = "$src1 = $dst",
11669      ExeDomain   = VTI.ExeDomain in {
11670    defm r:   AVX512_maskable_3src<Op, MRMSrcReg, VTI, (outs VTI.RC:$dst),
11671                (ins VTI.RC:$src2, VTI.RC:$src3), OpStr,
11672                "$src3, $src2", "$src2, $src3",
11673                (VTI.VT (OpNode VTI.RC:$src1, VTI.RC:$src2, VTI.RC:$src3))>,
11674                AVX512FMA3Base, Sched<[sched]>;
11675    defm m:   AVX512_maskable_3src<Op, MRMSrcMem, VTI, (outs VTI.RC:$dst),
11676                (ins VTI.RC:$src2, VTI.MemOp:$src3), OpStr,
11677                "$src3, $src2", "$src2, $src3",
11678                (VTI.VT (OpNode VTI.RC:$src1, VTI.RC:$src2,
11679                        (VTI.VT (VTI.LdFrag addr:$src3))))>,
11680                AVX512FMA3Base,
11681                Sched<[sched.Folded, sched.ReadAfterFold]>;
11682  }
11683}
11684
11685multiclass VBMI2_shift_var_rmb<bits<8> Op, string OpStr, SDNode OpNode,
11686                               X86FoldableSchedWrite sched, X86VectorVTInfo VTI>
11687         : VBMI2_shift_var_rm<Op, OpStr, OpNode, sched, VTI> {
11688  let Constraints = "$src1 = $dst",
11689      ExeDomain   = VTI.ExeDomain in
11690  defm mb:  AVX512_maskable_3src<Op, MRMSrcMem, VTI, (outs VTI.RC:$dst),
11691              (ins VTI.RC:$src2, VTI.ScalarMemOp:$src3), OpStr,
11692              "${src3}"#VTI.BroadcastStr#", $src2",
11693              "$src2, ${src3}"#VTI.BroadcastStr,
11694              (OpNode VTI.RC:$src1, VTI.RC:$src2,
11695               (VTI.VT (VTI.BroadcastLdFrag addr:$src3)))>,
11696              AVX512FMA3Base, EVEX_B,
11697              Sched<[sched.Folded, sched.ReadAfterFold]>;
11698}
11699
11700multiclass VBMI2_shift_var_rm_common<bits<8> Op, string OpStr, SDNode OpNode,
11701                                     X86SchedWriteWidths sched, AVX512VLVectorVTInfo VTI> {
11702  let Predicates = [HasVBMI2] in
11703  defm Z      : VBMI2_shift_var_rm<Op, OpStr, OpNode, sched.ZMM, VTI.info512>,
11704                                   EVEX_V512;
11705  let Predicates = [HasVBMI2, HasVLX] in {
11706    defm Z256 : VBMI2_shift_var_rm<Op, OpStr, OpNode, sched.YMM, VTI.info256>,
11707                                   EVEX_V256;
11708    defm Z128 : VBMI2_shift_var_rm<Op, OpStr, OpNode, sched.XMM, VTI.info128>,
11709                                   EVEX_V128;
11710  }
11711}
11712
11713multiclass VBMI2_shift_var_rmb_common<bits<8> Op, string OpStr, SDNode OpNode,
11714                                      X86SchedWriteWidths sched, AVX512VLVectorVTInfo VTI> {
11715  let Predicates = [HasVBMI2] in
11716  defm Z      : VBMI2_shift_var_rmb<Op, OpStr, OpNode, sched.ZMM, VTI.info512>,
11717                                    EVEX_V512;
11718  let Predicates = [HasVBMI2, HasVLX] in {
11719    defm Z256 : VBMI2_shift_var_rmb<Op, OpStr, OpNode, sched.YMM, VTI.info256>,
11720                                    EVEX_V256;
11721    defm Z128 : VBMI2_shift_var_rmb<Op, OpStr, OpNode, sched.XMM, VTI.info128>,
11722                                    EVEX_V128;
11723  }
11724}
11725multiclass VBMI2_shift_var<bits<8> wOp, bits<8> dqOp, string Prefix,
11726                           SDNode OpNode, X86SchedWriteWidths sched> {
11727  defm W : VBMI2_shift_var_rm_common<wOp, Prefix#"w", OpNode, sched,
11728             avx512vl_i16_info>, VEX_W, EVEX_CD8<16, CD8VF>;
11729  defm D : VBMI2_shift_var_rmb_common<dqOp, Prefix#"d", OpNode, sched,
11730             avx512vl_i32_info>, EVEX_CD8<32, CD8VF>;
11731  defm Q : VBMI2_shift_var_rmb_common<dqOp, Prefix#"q", OpNode, sched,
11732             avx512vl_i64_info>, VEX_W, EVEX_CD8<64, CD8VF>;
11733}
11734
11735multiclass VBMI2_shift_imm<bits<8> wOp, bits<8> dqOp, string Prefix,
11736                           SDNode OpNode, X86SchedWriteWidths sched> {
11737  defm W : avx512_common_3Op_rm_imm8<wOp, OpNode, Prefix#"w", sched,
11738             avx512vl_i16_info, avx512vl_i16_info, HasVBMI2>,
11739             VEX_W, EVEX_CD8<16, CD8VF>;
11740  defm D : avx512_common_3Op_imm8<Prefix#"d", avx512vl_i32_info, dqOp,
11741             OpNode, sched, HasVBMI2>, AVX512AIi8Base, EVEX_4V, EVEX_CD8<32, CD8VF>;
11742  defm Q : avx512_common_3Op_imm8<Prefix#"q", avx512vl_i64_info, dqOp, OpNode,
11743             sched, HasVBMI2>, AVX512AIi8Base, EVEX_4V, EVEX_CD8<64, CD8VF>, VEX_W;
11744}
11745
11746// Concat & Shift
11747defm VPSHLDV : VBMI2_shift_var<0x70, 0x71, "vpshldv", X86VShldv, SchedWriteVecIMul>;
11748defm VPSHRDV : VBMI2_shift_var<0x72, 0x73, "vpshrdv", X86VShrdv, SchedWriteVecIMul>;
11749defm VPSHLD  : VBMI2_shift_imm<0x70, 0x71, "vpshld", X86VShld, SchedWriteVecIMul>;
11750defm VPSHRD  : VBMI2_shift_imm<0x72, 0x73, "vpshrd", X86VShrd, SchedWriteVecIMul>;
11751
11752// Compress
11753defm VPCOMPRESSB : compress_by_elt_width<0x63, "vpcompressb", WriteVarShuffle256,
11754                                         avx512vl_i8_info, HasVBMI2>, EVEX,
11755                                         NotMemoryFoldable;
11756defm VPCOMPRESSW : compress_by_elt_width <0x63, "vpcompressw", WriteVarShuffle256,
11757                                          avx512vl_i16_info, HasVBMI2>, EVEX, VEX_W,
11758                                          NotMemoryFoldable;
11759// Expand
11760defm VPEXPANDB : expand_by_elt_width <0x62, "vpexpandb", WriteVarShuffle256,
11761                                      avx512vl_i8_info, HasVBMI2>, EVEX;
11762defm VPEXPANDW : expand_by_elt_width <0x62, "vpexpandw", WriteVarShuffle256,
11763                                      avx512vl_i16_info, HasVBMI2>, EVEX, VEX_W;
11764
11765//===----------------------------------------------------------------------===//
11766// VNNI
11767//===----------------------------------------------------------------------===//
11768
11769let Constraints = "$src1 = $dst" in
11770multiclass VNNI_rmb<bits<8> Op, string OpStr, SDNode OpNode,
11771                    X86FoldableSchedWrite sched, X86VectorVTInfo VTI,
11772                    bit IsCommutable> {
11773  let ExeDomain = VTI.ExeDomain in {
11774  defm r  :   AVX512_maskable_3src<Op, MRMSrcReg, VTI, (outs VTI.RC:$dst),
11775                                   (ins VTI.RC:$src2, VTI.RC:$src3), OpStr,
11776                                   "$src3, $src2", "$src2, $src3",
11777                                   (VTI.VT (OpNode VTI.RC:$src1,
11778                                            VTI.RC:$src2, VTI.RC:$src3)),
11779                                   IsCommutable, IsCommutable>,
11780                                   EVEX_4V, T8PD, Sched<[sched]>;
11781  defm m  :   AVX512_maskable_3src<Op, MRMSrcMem, VTI, (outs VTI.RC:$dst),
11782                                   (ins VTI.RC:$src2, VTI.MemOp:$src3), OpStr,
11783                                   "$src3, $src2", "$src2, $src3",
11784                                   (VTI.VT (OpNode VTI.RC:$src1, VTI.RC:$src2,
11785                                            (VTI.VT (VTI.LdFrag addr:$src3))))>,
11786                                   EVEX_4V, EVEX_CD8<32, CD8VF>, T8PD,
11787                                   Sched<[sched.Folded, sched.ReadAfterFold]>;
11788  defm mb :   AVX512_maskable_3src<Op, MRMSrcMem, VTI, (outs VTI.RC:$dst),
11789                                   (ins VTI.RC:$src2, VTI.ScalarMemOp:$src3),
11790                                   OpStr, "${src3}"#VTI.BroadcastStr#", $src2",
11791                                   "$src2, ${src3}"#VTI.BroadcastStr,
11792                                   (OpNode VTI.RC:$src1, VTI.RC:$src2,
11793                                    (VTI.VT (VTI.BroadcastLdFrag addr:$src3)))>,
11794                                   EVEX_4V, EVEX_CD8<32, CD8VF>, EVEX_B,
11795                                   T8PD, Sched<[sched.Folded, sched.ReadAfterFold]>;
11796  }
11797}
11798
11799multiclass VNNI_common<bits<8> Op, string OpStr, SDNode OpNode,
11800                       X86SchedWriteWidths sched, bit IsCommutable> {
11801  let Predicates = [HasVNNI] in
11802  defm Z      :   VNNI_rmb<Op, OpStr, OpNode, sched.ZMM, v16i32_info,
11803                           IsCommutable>, EVEX_V512;
11804  let Predicates = [HasVNNI, HasVLX] in {
11805    defm Z256 :   VNNI_rmb<Op, OpStr, OpNode, sched.YMM, v8i32x_info,
11806                           IsCommutable>, EVEX_V256;
11807    defm Z128 :   VNNI_rmb<Op, OpStr, OpNode, sched.XMM, v4i32x_info,
11808                           IsCommutable>, EVEX_V128;
11809  }
11810}
11811
11812// FIXME: Is there a better scheduler class for VPDP?
11813defm VPDPBUSD   : VNNI_common<0x50, "vpdpbusd", X86Vpdpbusd, SchedWriteVecIMul, 0>;
11814defm VPDPBUSDS  : VNNI_common<0x51, "vpdpbusds", X86Vpdpbusds, SchedWriteVecIMul, 0>;
11815defm VPDPWSSD   : VNNI_common<0x52, "vpdpwssd", X86Vpdpwssd, SchedWriteVecIMul, 1>;
11816defm VPDPWSSDS  : VNNI_common<0x53, "vpdpwssds", X86Vpdpwssds, SchedWriteVecIMul, 1>;
11817
11818// Patterns to match VPDPWSSD from existing instructions/intrinsics.
11819let Predicates = [HasVNNI] in {
11820  def : Pat<(v16i32 (add VR512:$src1,
11821                         (X86vpmaddwd_su VR512:$src2, VR512:$src3))),
11822            (VPDPWSSDZr VR512:$src1, VR512:$src2, VR512:$src3)>;
11823  def : Pat<(v16i32 (add VR512:$src1,
11824                         (X86vpmaddwd_su VR512:$src2, (load addr:$src3)))),
11825            (VPDPWSSDZm VR512:$src1, VR512:$src2, addr:$src3)>;
11826}
11827let Predicates = [HasVNNI,HasVLX] in {
11828  def : Pat<(v8i32 (add VR256X:$src1,
11829                        (X86vpmaddwd_su VR256X:$src2, VR256X:$src3))),
11830            (VPDPWSSDZ256r VR256X:$src1, VR256X:$src2, VR256X:$src3)>;
11831  def : Pat<(v8i32 (add VR256X:$src1,
11832                        (X86vpmaddwd_su VR256X:$src2, (load addr:$src3)))),
11833            (VPDPWSSDZ256m VR256X:$src1, VR256X:$src2, addr:$src3)>;
11834  def : Pat<(v4i32 (add VR128X:$src1,
11835                        (X86vpmaddwd_su VR128X:$src2, VR128X:$src3))),
11836            (VPDPWSSDZ128r VR128X:$src1, VR128X:$src2, VR128X:$src3)>;
11837  def : Pat<(v4i32 (add VR128X:$src1,
11838                        (X86vpmaddwd_su VR128X:$src2, (load addr:$src3)))),
11839            (VPDPWSSDZ128m VR128X:$src1, VR128X:$src2, addr:$src3)>;
11840}
11841
11842//===----------------------------------------------------------------------===//
11843// Bit Algorithms
11844//===----------------------------------------------------------------------===//
11845
11846// FIXME: Is there a better scheduler class for VPOPCNTB/VPOPCNTW?
11847defm VPOPCNTB : avx512_unary_rm_vl<0x54, "vpopcntb", ctpop, SchedWriteVecALU,
11848                                   avx512vl_i8_info, HasBITALG>;
11849defm VPOPCNTW : avx512_unary_rm_vl<0x54, "vpopcntw", ctpop, SchedWriteVecALU,
11850                                   avx512vl_i16_info, HasBITALG>, VEX_W;
11851
11852defm : avx512_unary_lowering<"VPOPCNTB", ctpop, avx512vl_i8_info, HasBITALG>;
11853defm : avx512_unary_lowering<"VPOPCNTW", ctpop, avx512vl_i16_info, HasBITALG>;
11854
11855def X86Vpshufbitqmb_su : PatFrag<(ops node:$src1, node:$src2),
11856                                 (X86Vpshufbitqmb node:$src1, node:$src2), [{
11857  return N->hasOneUse();
11858}]>;
11859
11860multiclass VPSHUFBITQMB_rm<X86FoldableSchedWrite sched, X86VectorVTInfo VTI> {
11861  defm rr : AVX512_maskable_cmp<0x8F, MRMSrcReg, VTI, (outs VTI.KRC:$dst),
11862                                (ins VTI.RC:$src1, VTI.RC:$src2),
11863                                "vpshufbitqmb",
11864                                "$src2, $src1", "$src1, $src2",
11865                                (X86Vpshufbitqmb (VTI.VT VTI.RC:$src1),
11866                                (VTI.VT VTI.RC:$src2)),
11867                                (X86Vpshufbitqmb_su (VTI.VT VTI.RC:$src1),
11868                                (VTI.VT VTI.RC:$src2))>, EVEX_4V, T8PD,
11869                                Sched<[sched]>;
11870  defm rm : AVX512_maskable_cmp<0x8F, MRMSrcMem, VTI, (outs VTI.KRC:$dst),
11871                                (ins VTI.RC:$src1, VTI.MemOp:$src2),
11872                                "vpshufbitqmb",
11873                                "$src2, $src1", "$src1, $src2",
11874                                (X86Vpshufbitqmb (VTI.VT VTI.RC:$src1),
11875                                (VTI.VT (VTI.LdFrag addr:$src2))),
11876                                (X86Vpshufbitqmb_su (VTI.VT VTI.RC:$src1),
11877                                (VTI.VT (VTI.LdFrag addr:$src2)))>,
11878                                EVEX_4V, EVEX_CD8<8, CD8VF>, T8PD,
11879                                Sched<[sched.Folded, sched.ReadAfterFold]>;
11880}
11881
11882multiclass VPSHUFBITQMB_common<X86SchedWriteWidths sched, AVX512VLVectorVTInfo VTI> {
11883  let Predicates = [HasBITALG] in
11884  defm Z      : VPSHUFBITQMB_rm<sched.ZMM, VTI.info512>, EVEX_V512;
11885  let Predicates = [HasBITALG, HasVLX] in {
11886    defm Z256 : VPSHUFBITQMB_rm<sched.YMM, VTI.info256>, EVEX_V256;
11887    defm Z128 : VPSHUFBITQMB_rm<sched.XMM, VTI.info128>, EVEX_V128;
11888  }
11889}
11890
11891// FIXME: Is there a better scheduler class for VPSHUFBITQMB?
11892defm VPSHUFBITQMB : VPSHUFBITQMB_common<SchedWriteVecIMul, avx512vl_i8_info>;
11893
11894//===----------------------------------------------------------------------===//
11895// GFNI
11896//===----------------------------------------------------------------------===//
11897
11898multiclass GF2P8MULB_avx512_common<bits<8> Op, string OpStr, SDNode OpNode,
11899                                   X86SchedWriteWidths sched> {
11900  let Predicates = [HasGFNI, HasAVX512, HasBWI] in
11901  defm Z      : avx512_binop_rm<Op, OpStr, OpNode, v64i8_info, sched.ZMM, 1>,
11902                                EVEX_V512;
11903  let Predicates = [HasGFNI, HasVLX, HasBWI] in {
11904    defm Z256 : avx512_binop_rm<Op, OpStr, OpNode, v32i8x_info, sched.YMM, 1>,
11905                                EVEX_V256;
11906    defm Z128 : avx512_binop_rm<Op, OpStr, OpNode, v16i8x_info, sched.XMM, 1>,
11907                                EVEX_V128;
11908  }
11909}
11910
11911defm VGF2P8MULB : GF2P8MULB_avx512_common<0xCF, "vgf2p8mulb", X86GF2P8mulb,
11912                                          SchedWriteVecALU>,
11913                                          EVEX_CD8<8, CD8VF>, T8PD;
11914
11915multiclass GF2P8AFFINE_avx512_rmb_imm<bits<8> Op, string OpStr, SDNode OpNode,
11916                                      X86FoldableSchedWrite sched, X86VectorVTInfo VTI,
11917                                      X86VectorVTInfo BcstVTI>
11918           : avx512_3Op_rm_imm8<Op, OpStr, OpNode, sched, VTI, VTI> {
11919  let ExeDomain = VTI.ExeDomain in
11920  defm rmbi : AVX512_maskable<Op, MRMSrcMem, VTI, (outs VTI.RC:$dst),
11921                (ins VTI.RC:$src1, VTI.ScalarMemOp:$src2, u8imm:$src3),
11922                OpStr, "$src3, ${src2}"#BcstVTI.BroadcastStr#", $src1",
11923                "$src1, ${src2}"#BcstVTI.BroadcastStr#", $src3",
11924                (OpNode (VTI.VT VTI.RC:$src1),
11925                 (bitconvert (BcstVTI.VT (X86VBroadcastld64 addr:$src2))),
11926                 (i8 timm:$src3))>, EVEX_B,
11927                 Sched<[sched.Folded, sched.ReadAfterFold]>;
11928}
11929
11930multiclass GF2P8AFFINE_avx512_common<bits<8> Op, string OpStr, SDNode OpNode,
11931                                     X86SchedWriteWidths sched> {
11932  let Predicates = [HasGFNI, HasAVX512, HasBWI] in
11933  defm Z      : GF2P8AFFINE_avx512_rmb_imm<Op, OpStr, OpNode, sched.ZMM,
11934                                           v64i8_info, v8i64_info>, EVEX_V512;
11935  let Predicates = [HasGFNI, HasVLX, HasBWI] in {
11936    defm Z256 : GF2P8AFFINE_avx512_rmb_imm<Op, OpStr, OpNode, sched.YMM,
11937                                           v32i8x_info, v4i64x_info>, EVEX_V256;
11938    defm Z128 : GF2P8AFFINE_avx512_rmb_imm<Op, OpStr, OpNode, sched.XMM,
11939                                           v16i8x_info, v2i64x_info>, EVEX_V128;
11940  }
11941}
11942
11943defm VGF2P8AFFINEINVQB : GF2P8AFFINE_avx512_common<0xCF, "vgf2p8affineinvqb",
11944                         X86GF2P8affineinvqb, SchedWriteVecIMul>,
11945                         EVEX_4V, EVEX_CD8<8, CD8VF>, VEX_W, AVX512AIi8Base;
11946defm VGF2P8AFFINEQB    : GF2P8AFFINE_avx512_common<0xCE, "vgf2p8affineqb",
11947                         X86GF2P8affineqb, SchedWriteVecIMul>,
11948                         EVEX_4V, EVEX_CD8<8, CD8VF>, VEX_W, AVX512AIi8Base;
11949
11950
11951//===----------------------------------------------------------------------===//
11952// AVX5124FMAPS
11953//===----------------------------------------------------------------------===//
11954
11955let hasSideEffects = 0, mayLoad = 1, ExeDomain = SSEPackedSingle,
11956    Constraints = "$src1 = $dst", Uses = [MXCSR], mayRaiseFPException = 1 in {
11957defm V4FMADDPSrm : AVX512_maskable_3src_in_asm<0x9A, MRMSrcMem, v16f32_info,
11958                    (outs VR512:$dst), (ins VR512:$src2, f128mem:$src3),
11959                    "v4fmaddps", "$src3, $src2", "$src2, $src3",
11960                    []>, EVEX_V512, EVEX_4V, T8XD, EVEX_CD8<32, CD8VQ>,
11961                    Sched<[SchedWriteFMA.ZMM.Folded]>;
11962
11963defm V4FNMADDPSrm : AVX512_maskable_3src_in_asm<0xAA, MRMSrcMem, v16f32_info,
11964                     (outs VR512:$dst), (ins VR512:$src2, f128mem:$src3),
11965                     "v4fnmaddps", "$src3, $src2", "$src2, $src3",
11966                     []>, EVEX_V512, EVEX_4V, T8XD, EVEX_CD8<32, CD8VQ>,
11967                     Sched<[SchedWriteFMA.ZMM.Folded]>;
11968
11969defm V4FMADDSSrm : AVX512_maskable_3src_in_asm<0x9B, MRMSrcMem, f32x_info,
11970                    (outs VR128X:$dst), (ins  VR128X:$src2, f128mem:$src3),
11971                    "v4fmaddss", "$src3, $src2", "$src2, $src3",
11972                    []>, VEX_LIG, EVEX_4V, T8XD, EVEX_CD8<32, CD8VF>,
11973                    Sched<[SchedWriteFMA.Scl.Folded]>;
11974
11975defm V4FNMADDSSrm : AVX512_maskable_3src_in_asm<0xAB, MRMSrcMem, f32x_info,
11976                     (outs VR128X:$dst), (ins VR128X:$src2, f128mem:$src3),
11977                     "v4fnmaddss", "$src3, $src2", "$src2, $src3",
11978                     []>, VEX_LIG, EVEX_4V, T8XD, EVEX_CD8<32, CD8VF>,
11979                     Sched<[SchedWriteFMA.Scl.Folded]>;
11980}
11981
11982//===----------------------------------------------------------------------===//
11983// AVX5124VNNIW
11984//===----------------------------------------------------------------------===//
11985
11986let hasSideEffects = 0, mayLoad = 1, ExeDomain = SSEPackedInt,
11987    Constraints = "$src1 = $dst" in {
11988defm VP4DPWSSDrm : AVX512_maskable_3src_in_asm<0x52, MRMSrcMem, v16i32_info,
11989                    (outs VR512:$dst), (ins VR512:$src2, f128mem:$src3),
11990                     "vp4dpwssd", "$src3, $src2", "$src2, $src3",
11991                    []>, EVEX_V512, EVEX_4V, T8XD, EVEX_CD8<32, CD8VQ>,
11992                    Sched<[SchedWriteFMA.ZMM.Folded]>;
11993
11994defm VP4DPWSSDSrm : AVX512_maskable_3src_in_asm<0x53, MRMSrcMem, v16i32_info,
11995                     (outs VR512:$dst), (ins VR512:$src2, f128mem:$src3),
11996                     "vp4dpwssds", "$src3, $src2", "$src2, $src3",
11997                     []>, EVEX_V512, EVEX_4V, T8XD, EVEX_CD8<32, CD8VQ>,
11998                     Sched<[SchedWriteFMA.ZMM.Folded]>;
11999}
12000
12001let hasSideEffects = 0 in {
12002  let mayStore = 1, SchedRW = [WriteFStoreX] in
12003  def MASKPAIR16STORE : PseudoI<(outs), (ins anymem:$dst, VK16PAIR:$src), []>;
12004  let mayLoad = 1, SchedRW = [WriteFLoadX] in
12005  def MASKPAIR16LOAD : PseudoI<(outs VK16PAIR:$dst), (ins anymem:$src), []>;
12006}
12007
12008//===----------------------------------------------------------------------===//
12009// VP2INTERSECT
12010//===----------------------------------------------------------------------===//
12011
12012multiclass avx512_vp2intersect_modes<X86FoldableSchedWrite sched, X86VectorVTInfo _> {
12013  def rr : I<0x68, MRMSrcReg,
12014                  (outs _.KRPC:$dst),
12015                  (ins _.RC:$src1, _.RC:$src2),
12016                  !strconcat("vp2intersect", _.Suffix,
12017                             "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
12018                  [(set _.KRPC:$dst, (X86vp2intersect
12019                            _.RC:$src1, (_.VT _.RC:$src2)))]>,
12020                  EVEX_4V, T8XD, Sched<[sched]>;
12021
12022  def rm : I<0x68, MRMSrcMem,
12023                  (outs _.KRPC:$dst),
12024                  (ins  _.RC:$src1, _.MemOp:$src2),
12025                  !strconcat("vp2intersect", _.Suffix,
12026                             "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
12027                  [(set _.KRPC:$dst, (X86vp2intersect
12028                            _.RC:$src1, (_.VT (bitconvert (_.LdFrag addr:$src2)))))]>,
12029                  EVEX_4V, T8XD, EVEX_CD8<_.EltSize, CD8VF>,
12030                  Sched<[sched.Folded, sched.ReadAfterFold]>;
12031
12032  def rmb : I<0x68, MRMSrcMem,
12033                  (outs _.KRPC:$dst),
12034                  (ins _.RC:$src1, _.ScalarMemOp:$src2),
12035                  !strconcat("vp2intersect", _.Suffix, "\t{${src2}", _.BroadcastStr,
12036                             ", $src1, $dst|$dst, $src1, ${src2}", _.BroadcastStr ,"}"),
12037                  [(set _.KRPC:$dst, (X86vp2intersect
12038                             _.RC:$src1, (_.VT (_.BroadcastLdFrag addr:$src2))))]>,
12039                  EVEX_4V, T8XD, EVEX_B, EVEX_CD8<_.EltSize, CD8VF>,
12040                  Sched<[sched.Folded, sched.ReadAfterFold]>;
12041}
12042
12043multiclass avx512_vp2intersect<X86SchedWriteWidths sched, AVX512VLVectorVTInfo _> {
12044  let Predicates  = [HasAVX512, HasVP2INTERSECT] in
12045    defm Z : avx512_vp2intersect_modes<sched.ZMM, _.info512>, EVEX_V512;
12046
12047  let Predicates = [HasAVX512, HasVP2INTERSECT, HasVLX] in {
12048    defm Z256 : avx512_vp2intersect_modes<sched.YMM, _.info256>, EVEX_V256;
12049    defm Z128 : avx512_vp2intersect_modes<sched.XMM, _.info128>, EVEX_V128;
12050  }
12051}
12052
12053defm VP2INTERSECTD : avx512_vp2intersect<SchedWriteVecALU, avx512vl_i32_info>;
12054defm VP2INTERSECTQ : avx512_vp2intersect<SchedWriteVecALU, avx512vl_i64_info>, VEX_W;
12055
12056multiclass avx512_binop_all2<bits<8> opc, string OpcodeStr,
12057                             X86SchedWriteWidths sched,
12058                             AVX512VLVectorVTInfo _SrcVTInfo,
12059                             AVX512VLVectorVTInfo _DstVTInfo,
12060                             SDNode OpNode, Predicate prd,
12061                             bit IsCommutable = 0> {
12062  let Predicates = [prd] in
12063    defm NAME#Z : avx512_binop_rm2<opc, OpcodeStr, sched.ZMM, OpNode,
12064                                   _SrcVTInfo.info512, _DstVTInfo.info512,
12065                                   _SrcVTInfo.info512, IsCommutable>,
12066                                   EVEX_V512, EVEX_CD8<32, CD8VF>;
12067  let Predicates = [HasVLX, prd] in {
12068    defm NAME#Z256 : avx512_binop_rm2<opc, OpcodeStr, sched.YMM, OpNode,
12069                                      _SrcVTInfo.info256, _DstVTInfo.info256,
12070                                      _SrcVTInfo.info256, IsCommutable>,
12071                                     EVEX_V256, EVEX_CD8<32, CD8VF>;
12072    defm NAME#Z128 : avx512_binop_rm2<opc, OpcodeStr, sched.XMM, OpNode,
12073                                      _SrcVTInfo.info128, _DstVTInfo.info128,
12074                                      _SrcVTInfo.info128, IsCommutable>,
12075                                      EVEX_V128, EVEX_CD8<32, CD8VF>;
12076  }
12077}
12078
12079let ExeDomain = SSEPackedSingle in
12080defm VCVTNE2PS2BF16 : avx512_binop_all2<0x72, "vcvtne2ps2bf16",
12081                                        SchedWriteCvtPD2PS, //FIXME: Should be SchedWriteCvtPS2BF
12082                                        avx512vl_f32_info, avx512vl_i16_info,
12083                                        X86cvtne2ps2bf16, HasBF16, 0>, T8XD;
12084
12085// Truncate Float to BFloat16
12086multiclass avx512_cvtps2bf16<bits<8> opc, string OpcodeStr,
12087                             X86SchedWriteWidths sched> {
12088  let ExeDomain = SSEPackedSingle in {
12089  let Predicates = [HasBF16], Uses = []<Register>, mayRaiseFPException = 0 in {
12090    defm Z : avx512_vcvt_fp<opc, OpcodeStr, v16i16x_info, v16f32_info,
12091                            X86cvtneps2bf16, X86cvtneps2bf16, sched.ZMM>, EVEX_V512;
12092  }
12093  let Predicates = [HasBF16, HasVLX] in {
12094    let Uses = []<Register>, mayRaiseFPException = 0 in {
12095    defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v8i16x_info, v4f32x_info,
12096                               null_frag, null_frag, sched.XMM, "{1to4}", "{x}", f128mem,
12097                               VK4WM>, EVEX_V128;
12098    defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v8i16x_info, v8f32x_info,
12099                               X86cvtneps2bf16, X86cvtneps2bf16,
12100                               sched.YMM, "{1to8}", "{y}">, EVEX_V256;
12101    }
12102  } // Predicates = [HasBF16, HasVLX]
12103  } // ExeDomain = SSEPackedSingle
12104
12105  def : InstAlias<OpcodeStr#"x\t{$src, $dst|$dst, $src}",
12106                  (!cast<Instruction>(NAME # "Z128rr") VR128X:$dst,
12107                  VR128X:$src), 0>;
12108  def : InstAlias<OpcodeStr#"x\t{$src, $dst|$dst, $src}",
12109                  (!cast<Instruction>(NAME # "Z128rm") VR128X:$dst,
12110                  f128mem:$src), 0, "intel">;
12111  def : InstAlias<OpcodeStr#"y\t{$src, $dst|$dst, $src}",
12112                  (!cast<Instruction>(NAME # "Z256rr") VR128X:$dst,
12113                  VR256X:$src), 0>;
12114  def : InstAlias<OpcodeStr#"y\t{$src, $dst|$dst, $src}",
12115                  (!cast<Instruction>(NAME # "Z256rm") VR128X:$dst,
12116                  f256mem:$src), 0, "intel">;
12117}
12118
12119defm VCVTNEPS2BF16 : avx512_cvtps2bf16<0x72, "vcvtneps2bf16",
12120                                       SchedWriteCvtPD2PS>, T8XS,
12121                                       EVEX_CD8<32, CD8VF>;
12122
12123let Predicates = [HasBF16, HasVLX] in {
12124  // Special patterns to allow use of X86mcvtneps2bf16 for masking. Instruction
12125  // patterns have been disabled with null_frag.
12126  def : Pat<(v8i16 (X86cvtneps2bf16 (v4f32 VR128X:$src))),
12127            (VCVTNEPS2BF16Z128rr VR128X:$src)>;
12128  def : Pat<(X86mcvtneps2bf16 (v4f32 VR128X:$src), (v8i16 VR128X:$src0),
12129                              VK4WM:$mask),
12130            (VCVTNEPS2BF16Z128rrk VR128X:$src0, VK4WM:$mask, VR128X:$src)>;
12131  def : Pat<(X86mcvtneps2bf16 (v4f32 VR128X:$src), v8i16x_info.ImmAllZerosV,
12132                              VK4WM:$mask),
12133            (VCVTNEPS2BF16Z128rrkz VK4WM:$mask, VR128X:$src)>;
12134
12135  def : Pat<(v8i16 (X86cvtneps2bf16 (loadv4f32 addr:$src))),
12136            (VCVTNEPS2BF16Z128rm addr:$src)>;
12137  def : Pat<(X86mcvtneps2bf16 (loadv4f32 addr:$src), (v8i16 VR128X:$src0),
12138                              VK4WM:$mask),
12139            (VCVTNEPS2BF16Z128rmk VR128X:$src0, VK4WM:$mask, addr:$src)>;
12140  def : Pat<(X86mcvtneps2bf16 (loadv4f32 addr:$src), v8i16x_info.ImmAllZerosV,
12141                              VK4WM:$mask),
12142            (VCVTNEPS2BF16Z128rmkz VK4WM:$mask, addr:$src)>;
12143
12144  def : Pat<(v8i16 (X86cvtneps2bf16 (v4f32
12145                                     (X86VBroadcastld32 addr:$src)))),
12146            (VCVTNEPS2BF16Z128rmb addr:$src)>;
12147  def : Pat<(X86mcvtneps2bf16 (v4f32 (X86VBroadcastld32 addr:$src)),
12148                              (v8i16 VR128X:$src0), VK4WM:$mask),
12149            (VCVTNEPS2BF16Z128rmbk VR128X:$src0, VK4WM:$mask, addr:$src)>;
12150  def : Pat<(X86mcvtneps2bf16 (v4f32 (X86VBroadcastld32 addr:$src)),
12151                              v8i16x_info.ImmAllZerosV, VK4WM:$mask),
12152            (VCVTNEPS2BF16Z128rmbkz VK4WM:$mask, addr:$src)>;
12153}
12154
12155let Constraints = "$src1 = $dst" in {
12156multiclass avx512_dpbf16ps_rm<bits<8> opc, string OpcodeStr, SDNode OpNode,
12157                              X86FoldableSchedWrite sched,
12158                              X86VectorVTInfo _, X86VectorVTInfo src_v> {
12159  defm r: AVX512_maskable_3src<opc, MRMSrcReg, _, (outs _.RC:$dst),
12160                           (ins src_v.RC:$src2, src_v.RC:$src3),
12161                           OpcodeStr, "$src3, $src2", "$src2, $src3",
12162                           (_.VT (OpNode _.RC:$src1, src_v.RC:$src2, src_v.RC:$src3))>,
12163                           EVEX_4V, Sched<[sched]>;
12164
12165  defm m: AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst),
12166                               (ins src_v.RC:$src2, src_v.MemOp:$src3),
12167                               OpcodeStr, "$src3, $src2", "$src2, $src3",
12168                               (_.VT (OpNode _.RC:$src1, src_v.RC:$src2,
12169                               (src_v.LdFrag addr:$src3)))>, EVEX_4V,
12170                               Sched<[sched.Folded, sched.ReadAfterFold]>;
12171
12172  defm mb: AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst),
12173                  (ins src_v.RC:$src2, src_v.ScalarMemOp:$src3),
12174                  OpcodeStr,
12175                  !strconcat("${src3}", _.BroadcastStr,", $src2"),
12176                  !strconcat("$src2, ${src3}", _.BroadcastStr),
12177                  (_.VT (OpNode _.RC:$src1, src_v.RC:$src2,
12178                  (src_v.VT (src_v.BroadcastLdFrag addr:$src3))))>,
12179                  EVEX_B, EVEX_4V, Sched<[sched.Folded, sched.ReadAfterFold]>;
12180
12181}
12182} // Constraints = "$src1 = $dst"
12183
12184multiclass avx512_dpbf16ps_sizes<bits<8> opc, string OpcodeStr, SDNode OpNode,
12185                                 X86SchedWriteWidths sched, AVX512VLVectorVTInfo _,
12186                                 AVX512VLVectorVTInfo src_v, Predicate prd> {
12187  let Predicates = [prd] in {
12188    defm Z    : avx512_dpbf16ps_rm<opc, OpcodeStr, OpNode, sched.ZMM, _.info512,
12189                                   src_v.info512>, EVEX_V512;
12190  }
12191  let Predicates = [HasVLX, prd] in {
12192    defm Z256 : avx512_dpbf16ps_rm<opc, OpcodeStr, OpNode, sched.YMM, _.info256,
12193                                   src_v.info256>, EVEX_V256;
12194    defm Z128 : avx512_dpbf16ps_rm<opc, OpcodeStr, OpNode, sched.XMM, _.info128,
12195                                   src_v.info128>, EVEX_V128;
12196  }
12197}
12198
12199let ExeDomain = SSEPackedSingle in
12200defm VDPBF16PS : avx512_dpbf16ps_sizes<0x52, "vdpbf16ps", X86dpbf16ps, SchedWriteFMA,
12201                                       avx512vl_f32_info, avx512vl_i32_info,
12202                                       HasBF16>, T8XS, EVEX_CD8<32, CD8VF>;
12203