xref: /freebsd/contrib/llvm-project/llvm/lib/Target/X86/X86InstrAVX512.td (revision 25ecdc7d52770caf1c9b44b5ec11f468f6b636f3)
1//===-- X86InstrAVX512.td - AVX512 Instruction Set ---------*- tablegen -*-===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file describes the X86 AVX512 instruction set, defining the
10// instructions, and properties of the instructions which are needed for code
11// generation, machine code emission, and analysis.
12//
13//===----------------------------------------------------------------------===//
14
15// Group template arguments that can be derived from the vector type (EltNum x
16// EltVT).  These are things like the register class for the writemask, etc.
17// The idea is to pass one of these as the template argument rather than the
18// individual arguments.
19// The template is also used for scalar types, in this case numelts is 1.
20class X86VectorVTInfo<int numelts, ValueType eltvt, RegisterClass rc,
21                      string suffix = ""> {
22  RegisterClass RC = rc;
23  ValueType EltVT = eltvt;
24  int NumElts = numelts;
25
26  // Corresponding mask register class.
27  RegisterClass KRC = !cast<RegisterClass>("VK" # NumElts);
28
29  // Corresponding mask register pair class.
30  RegisterOperand KRPC = !if (!gt(NumElts, 16), ?,
31                              !cast<RegisterOperand>("VK" # NumElts # "Pair"));
32
33  // Corresponding write-mask register class.
34  RegisterClass KRCWM = !cast<RegisterClass>("VK" # NumElts # "WM");
35
36  // The mask VT.
37  ValueType KVT = !cast<ValueType>("v" # NumElts # "i1");
38
39  // Suffix used in the instruction mnemonic.
40  string Suffix = suffix;
41
42  // VTName is a string name for vector VT. For vector types it will be
43  // v # NumElts # EltVT, so for vector of 8 elements of i32 it will be v8i32
44  // It is a little bit complex for scalar types, where NumElts = 1.
45  // In this case we build v4f32 or v2f64
46  string VTName = "v" # !if (!eq (NumElts, 1),
47                        !if (!eq (EltVT.Size, 32), 4,
48                        !if (!eq (EltVT.Size, 64), 2, NumElts)), NumElts) # EltVT;
49
50  // The vector VT.
51  ValueType VT = !cast<ValueType>(VTName);
52
53  string EltTypeName = !cast<string>(EltVT);
54  // Size of the element type in bits, e.g. 32 for v16i32.
55  string EltSizeName = !subst("i", "", !subst("f", "", EltTypeName));
56  int EltSize = EltVT.Size;
57
58  // "i" for integer types and "f" for floating-point types
59  string TypeVariantName = !subst(EltSizeName, "", EltTypeName);
60
61  // Size of RC in bits, e.g. 512 for VR512.
62  int Size = VT.Size;
63
64  // The corresponding memory operand, e.g. i512mem for VR512.
65  X86MemOperand MemOp = !cast<X86MemOperand>(TypeVariantName # Size # "mem");
66  X86MemOperand ScalarMemOp = !cast<X86MemOperand>(EltVT # "mem");
67  // FP scalar memory operand for intrinsics - ssmem/sdmem.
68  Operand IntScalarMemOp = !if (!eq (EltTypeName, "f32"), !cast<Operand>("ssmem"),
69                           !if (!eq (EltTypeName, "f64"), !cast<Operand>("sdmem"), ?));
70
71  // Load patterns
72  PatFrag LdFrag = !cast<PatFrag>("load" # VTName);
73
74  PatFrag AlignedLdFrag = !cast<PatFrag>("alignedload" # VTName);
75
76  PatFrag ScalarLdFrag = !cast<PatFrag>("load" # EltVT);
77  PatFrag BroadcastLdFrag = !cast<PatFrag>("X86VBroadcastld" # EltSizeName);
78
79  PatFrags ScalarIntMemFrags = !if (!eq (EltTypeName, "f32"),
80                                           !cast<PatFrags>("sse_load_f32"),
81                               !if (!eq (EltTypeName, "f64"),
82                                     !cast<PatFrags>("sse_load_f64"),
83                               ?));
84
85  // The string to specify embedded broadcast in assembly.
86  string BroadcastStr = "{1to" # NumElts # "}";
87
88  // 8-bit compressed displacement tuple/subvector format.  This is only
89  // defined for NumElts <= 8.
90  CD8VForm CD8TupleForm = !if (!eq (!srl(NumElts, 4), 0),
91                               !cast<CD8VForm>("CD8VT" # NumElts), ?);
92
93  SubRegIndex SubRegIdx = !if (!eq (Size, 128), sub_xmm,
94                          !if (!eq (Size, 256), sub_ymm, ?));
95
96  Domain ExeDomain = !if (!eq (EltTypeName, "f32"), SSEPackedSingle,
97                     !if (!eq (EltTypeName, "f64"), SSEPackedDouble,
98                     SSEPackedInt));
99
100  RegisterClass FRC = !if (!eq (EltTypeName, "f32"), FR32X, FR64X);
101
102  dag ImmAllZerosV = (VT immAllZerosV);
103
104  string ZSuffix = !if (!eq (Size, 128), "Z128",
105                   !if (!eq (Size, 256), "Z256", "Z"));
106}
107
108def v64i8_info  : X86VectorVTInfo<64,  i8, VR512, "b">;
109def v32i16_info : X86VectorVTInfo<32, i16, VR512, "w">;
110def v16i32_info : X86VectorVTInfo<16, i32, VR512, "d">;
111def v8i64_info  : X86VectorVTInfo<8,  i64, VR512, "q">;
112def v16f32_info : X86VectorVTInfo<16, f32, VR512, "ps">;
113def v8f64_info  : X86VectorVTInfo<8,  f64, VR512, "pd">;
114
115// "x" in v32i8x_info means RC = VR256X
116def v32i8x_info  : X86VectorVTInfo<32,  i8, VR256X, "b">;
117def v16i16x_info : X86VectorVTInfo<16, i16, VR256X, "w">;
118def v8i32x_info  : X86VectorVTInfo<8,  i32, VR256X, "d">;
119def v4i64x_info  : X86VectorVTInfo<4,  i64, VR256X, "q">;
120def v8f32x_info  : X86VectorVTInfo<8,  f32, VR256X, "ps">;
121def v4f64x_info  : X86VectorVTInfo<4,  f64, VR256X, "pd">;
122
123def v16i8x_info  : X86VectorVTInfo<16,  i8, VR128X, "b">;
124def v8i16x_info  : X86VectorVTInfo<8,  i16, VR128X, "w">;
125def v4i32x_info  : X86VectorVTInfo<4,  i32, VR128X, "d">;
126def v2i64x_info  : X86VectorVTInfo<2,  i64, VR128X, "q">;
127def v4f32x_info  : X86VectorVTInfo<4,  f32, VR128X, "ps">;
128def v2f64x_info  : X86VectorVTInfo<2,  f64, VR128X, "pd">;
129
130// We map scalar types to the smallest (128-bit) vector type
131// with the appropriate element type. This allows to use the same masking logic.
132def i32x_info    : X86VectorVTInfo<1,  i32, GR32, "si">;
133def i64x_info    : X86VectorVTInfo<1,  i64, GR64, "sq">;
134def f32x_info    : X86VectorVTInfo<1,  f32, VR128X, "ss">;
135def f64x_info    : X86VectorVTInfo<1,  f64, VR128X, "sd">;
136
137class AVX512VLVectorVTInfo<X86VectorVTInfo i512, X86VectorVTInfo i256,
138                           X86VectorVTInfo i128> {
139  X86VectorVTInfo info512 = i512;
140  X86VectorVTInfo info256 = i256;
141  X86VectorVTInfo info128 = i128;
142}
143
144def avx512vl_i8_info  : AVX512VLVectorVTInfo<v64i8_info, v32i8x_info,
145                                             v16i8x_info>;
146def avx512vl_i16_info : AVX512VLVectorVTInfo<v32i16_info, v16i16x_info,
147                                             v8i16x_info>;
148def avx512vl_i32_info : AVX512VLVectorVTInfo<v16i32_info, v8i32x_info,
149                                             v4i32x_info>;
150def avx512vl_i64_info : AVX512VLVectorVTInfo<v8i64_info, v4i64x_info,
151                                             v2i64x_info>;
152def avx512vl_f32_info : AVX512VLVectorVTInfo<v16f32_info, v8f32x_info,
153                                             v4f32x_info>;
154def avx512vl_f64_info : AVX512VLVectorVTInfo<v8f64_info, v4f64x_info,
155                                             v2f64x_info>;
156
157class X86KVectorVTInfo<RegisterClass _krc, RegisterClass _krcwm,
158                       ValueType _vt> {
159  RegisterClass KRC = _krc;
160  RegisterClass KRCWM = _krcwm;
161  ValueType KVT = _vt;
162}
163
164def v1i1_info : X86KVectorVTInfo<VK1, VK1WM, v1i1>;
165def v2i1_info : X86KVectorVTInfo<VK2, VK2WM, v2i1>;
166def v4i1_info : X86KVectorVTInfo<VK4, VK4WM, v4i1>;
167def v8i1_info : X86KVectorVTInfo<VK8, VK8WM, v8i1>;
168def v16i1_info : X86KVectorVTInfo<VK16, VK16WM, v16i1>;
169def v32i1_info : X86KVectorVTInfo<VK32, VK32WM, v32i1>;
170def v64i1_info : X86KVectorVTInfo<VK64, VK64WM, v64i1>;
171
172// Used for matching masked operations. Ensures the operation part only has a
173// single use.
174def vselect_mask : PatFrag<(ops node:$mask, node:$src1, node:$src2),
175                           (vselect node:$mask, node:$src1, node:$src2), [{
176  return isProfitableToFormMaskedOp(N);
177}]>;
178
179def X86selects_mask : PatFrag<(ops node:$mask, node:$src1, node:$src2),
180                              (X86selects node:$mask, node:$src1, node:$src2), [{
181  return isProfitableToFormMaskedOp(N);
182}]>;
183
184// This multiclass generates the masking variants from the non-masking
185// variant.  It only provides the assembly pieces for the masking variants.
186// It assumes custom ISel patterns for masking which can be provided as
187// template arguments.
188multiclass AVX512_maskable_custom<bits<8> O, Format F,
189                                  dag Outs,
190                                  dag Ins, dag MaskingIns, dag ZeroMaskingIns,
191                                  string OpcodeStr,
192                                  string AttSrcAsm, string IntelSrcAsm,
193                                  list<dag> Pattern,
194                                  list<dag> MaskingPattern,
195                                  list<dag> ZeroMaskingPattern,
196                                  string MaskingConstraint = "",
197                                  bit IsCommutable = 0,
198                                  bit IsKCommutable = 0,
199                                  bit IsKZCommutable = IsCommutable> {
200  let isCommutable = IsCommutable in
201    def NAME: AVX512<O, F, Outs, Ins,
202                       OpcodeStr#"\t{"#AttSrcAsm#", $dst|"#
203                                     "$dst, "#IntelSrcAsm#"}",
204                       Pattern>;
205
206  // Prefer over VMOV*rrk Pat<>
207  let isCommutable = IsKCommutable in
208    def NAME#k: AVX512<O, F, Outs, MaskingIns,
209                       OpcodeStr#"\t{"#AttSrcAsm#", $dst {${mask}}|"#
210                                     "$dst {${mask}}, "#IntelSrcAsm#"}",
211                       MaskingPattern>,
212              EVEX_K {
213      // In case of the 3src subclass this is overridden with a let.
214      string Constraints = MaskingConstraint;
215    }
216
217  // Zero mask does not add any restrictions to commute operands transformation.
218  // So, it is Ok to use IsCommutable instead of IsKCommutable.
219  let isCommutable = IsKZCommutable in // Prefer over VMOV*rrkz Pat<>
220    def NAME#kz: AVX512<O, F, Outs, ZeroMaskingIns,
221                       OpcodeStr#"\t{"#AttSrcAsm#", $dst {${mask}} {z}|"#
222                                     "$dst {${mask}} {z}, "#IntelSrcAsm#"}",
223                       ZeroMaskingPattern>,
224              EVEX_KZ;
225}
226
227
228// Common base class of AVX512_maskable and AVX512_maskable_3src.
229multiclass AVX512_maskable_common<bits<8> O, Format F, X86VectorVTInfo _,
230                                  dag Outs,
231                                  dag Ins, dag MaskingIns, dag ZeroMaskingIns,
232                                  string OpcodeStr,
233                                  string AttSrcAsm, string IntelSrcAsm,
234                                  dag RHS, dag MaskingRHS,
235                                  SDPatternOperator Select = vselect_mask,
236                                  string MaskingConstraint = "",
237                                  bit IsCommutable = 0,
238                                  bit IsKCommutable = 0,
239                                  bit IsKZCommutable = IsCommutable> :
240  AVX512_maskable_custom<O, F, Outs, Ins, MaskingIns, ZeroMaskingIns, OpcodeStr,
241                         AttSrcAsm, IntelSrcAsm,
242                         [(set _.RC:$dst, RHS)],
243                         [(set _.RC:$dst, MaskingRHS)],
244                         [(set _.RC:$dst,
245                               (Select _.KRCWM:$mask, RHS, _.ImmAllZerosV))],
246                         MaskingConstraint, IsCommutable,
247                         IsKCommutable, IsKZCommutable>;
248
249// This multiclass generates the unconditional/non-masking, the masking and
250// the zero-masking variant of the vector instruction.  In the masking case, the
251// preserved vector elements come from a new dummy input operand tied to $dst.
252// This version uses a separate dag for non-masking and masking.
253multiclass AVX512_maskable_split<bits<8> O, Format F, X86VectorVTInfo _,
254                           dag Outs, dag Ins, string OpcodeStr,
255                           string AttSrcAsm, string IntelSrcAsm,
256                           dag RHS, dag MaskRHS,
257                           bit IsCommutable = 0, bit IsKCommutable = 0,
258                           bit IsKZCommutable = IsCommutable> :
259   AVX512_maskable_custom<O, F, Outs, Ins,
260                          !con((ins _.RC:$src0, _.KRCWM:$mask), Ins),
261                          !con((ins _.KRCWM:$mask), Ins),
262                          OpcodeStr, AttSrcAsm, IntelSrcAsm,
263                          [(set _.RC:$dst, RHS)],
264                          [(set _.RC:$dst,
265                              (vselect_mask _.KRCWM:$mask, MaskRHS, _.RC:$src0))],
266                          [(set _.RC:$dst,
267                              (vselect_mask _.KRCWM:$mask, MaskRHS, _.ImmAllZerosV))],
268                          "$src0 = $dst", IsCommutable, IsKCommutable,
269                          IsKZCommutable>;
270
271// This multiclass generates the unconditional/non-masking, the masking and
272// the zero-masking variant of the vector instruction.  In the masking case, the
273// preserved vector elements come from a new dummy input operand tied to $dst.
274multiclass AVX512_maskable<bits<8> O, Format F, X86VectorVTInfo _,
275                           dag Outs, dag Ins, string OpcodeStr,
276                           string AttSrcAsm, string IntelSrcAsm,
277                           dag RHS,
278                           bit IsCommutable = 0, bit IsKCommutable = 0,
279                           bit IsKZCommutable = IsCommutable,
280                           SDPatternOperator Select = vselect_mask> :
281   AVX512_maskable_common<O, F, _, Outs, Ins,
282                          !con((ins _.RC:$src0, _.KRCWM:$mask), Ins),
283                          !con((ins _.KRCWM:$mask), Ins),
284                          OpcodeStr, AttSrcAsm, IntelSrcAsm, RHS,
285                          (Select _.KRCWM:$mask, RHS, _.RC:$src0),
286                          Select, "$src0 = $dst", IsCommutable, IsKCommutable,
287                          IsKZCommutable>;
288
289// This multiclass generates the unconditional/non-masking, the masking and
290// the zero-masking variant of the scalar instruction.
291multiclass AVX512_maskable_scalar<bits<8> O, Format F, X86VectorVTInfo _,
292                           dag Outs, dag Ins, string OpcodeStr,
293                           string AttSrcAsm, string IntelSrcAsm,
294                           dag RHS> :
295   AVX512_maskable<O, F, _, Outs, Ins, OpcodeStr, AttSrcAsm, IntelSrcAsm,
296                   RHS, 0, 0, 0, X86selects_mask>;
297
298// Similar to AVX512_maskable but in this case one of the source operands
299// ($src1) is already tied to $dst so we just use that for the preserved
300// vector elements.  NOTE that the NonTiedIns (the ins dag) should exclude
301// $src1.
302multiclass AVX512_maskable_3src<bits<8> O, Format F, X86VectorVTInfo _,
303                                dag Outs, dag NonTiedIns, string OpcodeStr,
304                                string AttSrcAsm, string IntelSrcAsm,
305                                dag RHS,
306                                bit IsCommutable = 0,
307                                bit IsKCommutable = 0,
308                                SDPatternOperator Select = vselect_mask,
309                                bit MaskOnly = 0> :
310   AVX512_maskable_common<O, F, _, Outs,
311                          !con((ins _.RC:$src1), NonTiedIns),
312                          !con((ins _.RC:$src1, _.KRCWM:$mask), NonTiedIns),
313                          !con((ins _.RC:$src1, _.KRCWM:$mask), NonTiedIns),
314                          OpcodeStr, AttSrcAsm, IntelSrcAsm,
315                          !if(MaskOnly, (null_frag), RHS),
316                          (Select _.KRCWM:$mask, RHS, _.RC:$src1),
317                          Select, "", IsCommutable, IsKCommutable>;
318
319// Similar to AVX512_maskable_3src but in this case the input VT for the tied
320// operand differs from the output VT. This requires a bitconvert on
321// the preserved vector going into the vselect.
322// NOTE: The unmasked pattern is disabled.
323multiclass AVX512_maskable_3src_cast<bits<8> O, Format F, X86VectorVTInfo OutVT,
324                                     X86VectorVTInfo InVT,
325                                     dag Outs, dag NonTiedIns, string OpcodeStr,
326                                     string AttSrcAsm, string IntelSrcAsm,
327                                     dag RHS, bit IsCommutable = 0> :
328   AVX512_maskable_common<O, F, OutVT, Outs,
329                          !con((ins InVT.RC:$src1), NonTiedIns),
330                          !con((ins InVT.RC:$src1, InVT.KRCWM:$mask), NonTiedIns),
331                          !con((ins InVT.RC:$src1, InVT.KRCWM:$mask), NonTiedIns),
332                          OpcodeStr, AttSrcAsm, IntelSrcAsm, (null_frag),
333                          (vselect_mask InVT.KRCWM:$mask, RHS,
334                           (bitconvert InVT.RC:$src1)),
335                           vselect_mask, "", IsCommutable>;
336
337multiclass AVX512_maskable_3src_scalar<bits<8> O, Format F, X86VectorVTInfo _,
338                                     dag Outs, dag NonTiedIns, string OpcodeStr,
339                                     string AttSrcAsm, string IntelSrcAsm,
340                                     dag RHS,
341                                     bit IsCommutable = 0,
342                                     bit IsKCommutable = 0,
343                                     bit MaskOnly = 0> :
344   AVX512_maskable_3src<O, F, _, Outs, NonTiedIns, OpcodeStr, AttSrcAsm,
345                        IntelSrcAsm, RHS, IsCommutable, IsKCommutable,
346                        X86selects_mask, MaskOnly>;
347
348multiclass AVX512_maskable_in_asm<bits<8> O, Format F, X86VectorVTInfo _,
349                                  dag Outs, dag Ins,
350                                  string OpcodeStr,
351                                  string AttSrcAsm, string IntelSrcAsm,
352                                  list<dag> Pattern> :
353   AVX512_maskable_custom<O, F, Outs, Ins,
354                          !con((ins _.RC:$src0, _.KRCWM:$mask), Ins),
355                          !con((ins _.KRCWM:$mask), Ins),
356                          OpcodeStr, AttSrcAsm, IntelSrcAsm, Pattern, [], [],
357                          "$src0 = $dst">;
358
359multiclass AVX512_maskable_3src_in_asm<bits<8> O, Format F, X86VectorVTInfo _,
360                                       dag Outs, dag NonTiedIns,
361                                       string OpcodeStr,
362                                       string AttSrcAsm, string IntelSrcAsm,
363                                       list<dag> Pattern> :
364   AVX512_maskable_custom<O, F, Outs,
365                          !con((ins _.RC:$src1), NonTiedIns),
366                          !con((ins _.RC:$src1, _.KRCWM:$mask), NonTiedIns),
367                          !con((ins _.RC:$src1, _.KRCWM:$mask), NonTiedIns),
368                          OpcodeStr, AttSrcAsm, IntelSrcAsm, Pattern, [], [],
369                          "">;
370
371// Instruction with mask that puts result in mask register,
372// like "compare" and "vptest"
373multiclass AVX512_maskable_custom_cmp<bits<8> O, Format F,
374                                  dag Outs,
375                                  dag Ins, dag MaskingIns,
376                                  string OpcodeStr,
377                                  string AttSrcAsm, string IntelSrcAsm,
378                                  list<dag> Pattern,
379                                  list<dag> MaskingPattern,
380                                  bit IsCommutable = 0> {
381    let isCommutable = IsCommutable in {
382    def NAME: AVX512<O, F, Outs, Ins,
383                       OpcodeStr#"\t{"#AttSrcAsm#", $dst|"#
384                                     "$dst, "#IntelSrcAsm#"}",
385                       Pattern>;
386
387    def NAME#k: AVX512<O, F, Outs, MaskingIns,
388                       OpcodeStr#"\t{"#AttSrcAsm#", $dst {${mask}}|"#
389                                     "$dst {${mask}}, "#IntelSrcAsm#"}",
390                       MaskingPattern>, EVEX_K;
391    }
392}
393
394multiclass AVX512_maskable_common_cmp<bits<8> O, Format F, X86VectorVTInfo _,
395                                  dag Outs,
396                                  dag Ins, dag MaskingIns,
397                                  string OpcodeStr,
398                                  string AttSrcAsm, string IntelSrcAsm,
399                                  dag RHS, dag MaskingRHS,
400                                  bit IsCommutable = 0> :
401  AVX512_maskable_custom_cmp<O, F, Outs, Ins, MaskingIns, OpcodeStr,
402                         AttSrcAsm, IntelSrcAsm,
403                         [(set _.KRC:$dst, RHS)],
404                         [(set _.KRC:$dst, MaskingRHS)], IsCommutable>;
405
406multiclass AVX512_maskable_cmp<bits<8> O, Format F, X86VectorVTInfo _,
407                           dag Outs, dag Ins, string OpcodeStr,
408                           string AttSrcAsm, string IntelSrcAsm,
409                           dag RHS, dag RHS_su, bit IsCommutable = 0> :
410   AVX512_maskable_common_cmp<O, F, _, Outs, Ins,
411                          !con((ins _.KRCWM:$mask), Ins),
412                          OpcodeStr, AttSrcAsm, IntelSrcAsm, RHS,
413                          (and _.KRCWM:$mask, RHS_su), IsCommutable>;
414
415// Used by conversion instructions.
416multiclass AVX512_maskable_cvt<bits<8> O, Format F, X86VectorVTInfo _,
417                                  dag Outs,
418                                  dag Ins, dag MaskingIns, dag ZeroMaskingIns,
419                                  string OpcodeStr,
420                                  string AttSrcAsm, string IntelSrcAsm,
421                                  dag RHS, dag MaskingRHS, dag ZeroMaskingRHS> :
422  AVX512_maskable_custom<O, F, Outs, Ins, MaskingIns, ZeroMaskingIns, OpcodeStr,
423                         AttSrcAsm, IntelSrcAsm,
424                         [(set _.RC:$dst, RHS)],
425                         [(set _.RC:$dst, MaskingRHS)],
426                         [(set _.RC:$dst, ZeroMaskingRHS)],
427                         "$src0 = $dst">;
428
429multiclass AVX512_maskable_fma<bits<8> O, Format F, X86VectorVTInfo _,
430                               dag Outs, dag NonTiedIns, string OpcodeStr,
431                               string AttSrcAsm, string IntelSrcAsm,
432                               dag RHS, dag MaskingRHS, bit IsCommutable,
433                               bit IsKCommutable> :
434   AVX512_maskable_custom<O, F, Outs,
435                          !con((ins _.RC:$src1), NonTiedIns),
436                          !con((ins _.RC:$src1, _.KRCWM:$mask), NonTiedIns),
437                          !con((ins _.RC:$src1, _.KRCWM:$mask), NonTiedIns),
438                          OpcodeStr, AttSrcAsm, IntelSrcAsm,
439                          [(set _.RC:$dst, RHS)],
440                          [(set _.RC:$dst,
441                            (vselect_mask _.KRCWM:$mask, MaskingRHS, _.RC:$src1))],
442                          [(set _.RC:$dst,
443                            (vselect_mask _.KRCWM:$mask, MaskingRHS, _.ImmAllZerosV))],
444                          "", IsCommutable, IsKCommutable>;
445
446// Alias instruction that maps zero vector to pxor / xorp* for AVX-512.
447// This is expanded by ExpandPostRAPseudos to an xorps / vxorps, and then
448// swizzled by ExecutionDomainFix to pxor.
449// We set canFoldAsLoad because this can be converted to a constant-pool
450// load of an all-zeros value if folding it would be beneficial.
451let isReMaterializable = 1, isAsCheapAsAMove = 1, canFoldAsLoad = 1,
452    isPseudo = 1, Predicates = [HasAVX512], SchedRW = [WriteZero] in {
453def AVX512_512_SET0 : I<0, Pseudo, (outs VR512:$dst), (ins), "",
454               [(set VR512:$dst, (v16i32 immAllZerosV))]>;
455def AVX512_512_SETALLONES : I<0, Pseudo, (outs VR512:$dst), (ins), "",
456               [(set VR512:$dst, (v16i32 immAllOnesV))]>;
457}
458
459let Predicates = [HasAVX512] in {
460def : Pat<(v64i8 immAllZerosV), (AVX512_512_SET0)>;
461def : Pat<(v32i16 immAllZerosV), (AVX512_512_SET0)>;
462def : Pat<(v8i64 immAllZerosV), (AVX512_512_SET0)>;
463def : Pat<(v16f32 immAllZerosV), (AVX512_512_SET0)>;
464def : Pat<(v8f64 immAllZerosV), (AVX512_512_SET0)>;
465}
466
467// Alias instructions that allow VPTERNLOG to be used with a mask to create
468// a mix of all ones and all zeros elements. This is done this way to force
469// the same register to be used as input for all three sources.
470let isPseudo = 1, Predicates = [HasAVX512], SchedRW = [WriteVecALU] in {
471def AVX512_512_SEXT_MASK_32 : I<0, Pseudo, (outs VR512:$dst),
472                                (ins VK16WM:$mask), "",
473                           [(set VR512:$dst, (vselect (v16i1 VK16WM:$mask),
474                                                      (v16i32 immAllOnesV),
475                                                      (v16i32 immAllZerosV)))]>;
476def AVX512_512_SEXT_MASK_64 : I<0, Pseudo, (outs VR512:$dst),
477                                (ins VK8WM:$mask), "",
478                [(set VR512:$dst, (vselect (v8i1 VK8WM:$mask),
479                                           (v8i64 immAllOnesV),
480                                           (v8i64 immAllZerosV)))]>;
481}
482
483let isReMaterializable = 1, isAsCheapAsAMove = 1, canFoldAsLoad = 1,
484    isPseudo = 1, Predicates = [HasAVX512], SchedRW = [WriteZero] in {
485def AVX512_128_SET0 : I<0, Pseudo, (outs VR128X:$dst), (ins), "",
486               [(set VR128X:$dst, (v4i32 immAllZerosV))]>;
487def AVX512_256_SET0 : I<0, Pseudo, (outs VR256X:$dst), (ins), "",
488               [(set VR256X:$dst, (v8i32 immAllZerosV))]>;
489}
490
491let Predicates = [HasAVX512] in {
492def : Pat<(v8i16 immAllZerosV), (AVX512_128_SET0)>;
493def : Pat<(v16i8 immAllZerosV), (AVX512_128_SET0)>;
494def : Pat<(v2i64 immAllZerosV), (AVX512_128_SET0)>;
495def : Pat<(v4f32 immAllZerosV), (AVX512_128_SET0)>;
496def : Pat<(v2f64 immAllZerosV), (AVX512_128_SET0)>;
497def : Pat<(v32i8 immAllZerosV), (AVX512_256_SET0)>;
498def : Pat<(v16i16 immAllZerosV), (AVX512_256_SET0)>;
499def : Pat<(v4i64 immAllZerosV), (AVX512_256_SET0)>;
500def : Pat<(v8f32 immAllZerosV), (AVX512_256_SET0)>;
501def : Pat<(v4f64 immAllZerosV), (AVX512_256_SET0)>;
502}
503
504// Alias instructions that map fld0 to xorps for sse or vxorps for avx.
505// This is expanded by ExpandPostRAPseudos.
506let isReMaterializable = 1, isAsCheapAsAMove = 1, canFoldAsLoad = 1,
507    isPseudo = 1, SchedRW = [WriteZero], Predicates = [HasAVX512] in {
508  def AVX512_FsFLD0SS : I<0, Pseudo, (outs FR32X:$dst), (ins), "",
509                          [(set FR32X:$dst, fp32imm0)]>;
510  def AVX512_FsFLD0SD : I<0, Pseudo, (outs FR64X:$dst), (ins), "",
511                          [(set FR64X:$dst, fp64imm0)]>;
512  def AVX512_FsFLD0F128 : I<0, Pseudo, (outs VR128X:$dst), (ins), "",
513                            [(set VR128X:$dst, fp128imm0)]>;
514}
515
516//===----------------------------------------------------------------------===//
517// AVX-512 - VECTOR INSERT
518//
519
520// Supports two different pattern operators for mask and unmasked ops. Allows
521// null_frag to be passed for one.
522multiclass vinsert_for_size_split<int Opcode, X86VectorVTInfo From,
523                                  X86VectorVTInfo To,
524                                  SDPatternOperator vinsert_insert,
525                                  SDPatternOperator vinsert_for_mask,
526                                  X86FoldableSchedWrite sched> {
527  let hasSideEffects = 0, ExeDomain = To.ExeDomain in {
528    defm rr : AVX512_maskable_split<Opcode, MRMSrcReg, To, (outs To.RC:$dst),
529                   (ins To.RC:$src1, From.RC:$src2, u8imm:$src3),
530                   "vinsert" # From.EltTypeName # "x" # From.NumElts,
531                   "$src3, $src2, $src1", "$src1, $src2, $src3",
532                   (vinsert_insert:$src3 (To.VT To.RC:$src1),
533                                         (From.VT From.RC:$src2),
534                                         (iPTR imm)),
535                   (vinsert_for_mask:$src3 (To.VT To.RC:$src1),
536                                           (From.VT From.RC:$src2),
537                                           (iPTR imm))>,
538                   AVX512AIi8Base, EVEX_4V, Sched<[sched]>;
539    let mayLoad = 1 in
540    defm rm : AVX512_maskable_split<Opcode, MRMSrcMem, To, (outs To.RC:$dst),
541                   (ins To.RC:$src1, From.MemOp:$src2, u8imm:$src3),
542                   "vinsert" # From.EltTypeName # "x" # From.NumElts,
543                   "$src3, $src2, $src1", "$src1, $src2, $src3",
544                   (vinsert_insert:$src3 (To.VT To.RC:$src1),
545                               (From.VT (From.LdFrag addr:$src2)),
546                               (iPTR imm)),
547                   (vinsert_for_mask:$src3 (To.VT To.RC:$src1),
548                               (From.VT (From.LdFrag addr:$src2)),
549                               (iPTR imm))>, AVX512AIi8Base, EVEX_4V,
550                   EVEX_CD8<From.EltSize, From.CD8TupleForm>,
551                   Sched<[sched.Folded, sched.ReadAfterFold]>;
552  }
553}
554
555// Passes the same pattern operator for masked and unmasked ops.
556multiclass vinsert_for_size<int Opcode, X86VectorVTInfo From,
557                            X86VectorVTInfo To,
558                            SDPatternOperator vinsert_insert,
559                            X86FoldableSchedWrite sched> :
560  vinsert_for_size_split<Opcode, From, To, vinsert_insert, vinsert_insert, sched>;
561
562multiclass vinsert_for_size_lowering<string InstrStr, X86VectorVTInfo From,
563                       X86VectorVTInfo To, PatFrag vinsert_insert,
564                       SDNodeXForm INSERT_get_vinsert_imm , list<Predicate> p> {
565  let Predicates = p in {
566    def : Pat<(vinsert_insert:$ins
567                     (To.VT To.RC:$src1), (From.VT From.RC:$src2), (iPTR imm)),
568              (To.VT (!cast<Instruction>(InstrStr#"rr")
569                     To.RC:$src1, From.RC:$src2,
570                     (INSERT_get_vinsert_imm To.RC:$ins)))>;
571
572    def : Pat<(vinsert_insert:$ins
573                  (To.VT To.RC:$src1),
574                  (From.VT (From.LdFrag addr:$src2)),
575                  (iPTR imm)),
576              (To.VT (!cast<Instruction>(InstrStr#"rm")
577                  To.RC:$src1, addr:$src2,
578                  (INSERT_get_vinsert_imm To.RC:$ins)))>;
579  }
580}
581
582multiclass vinsert_for_type<ValueType EltVT32, int Opcode128,
583                            ValueType EltVT64, int Opcode256,
584                            X86FoldableSchedWrite sched> {
585
586  let Predicates = [HasVLX] in
587    defm NAME # "32x4Z256" : vinsert_for_size<Opcode128,
588                                 X86VectorVTInfo< 4, EltVT32, VR128X>,
589                                 X86VectorVTInfo< 8, EltVT32, VR256X>,
590                                 vinsert128_insert, sched>, EVEX_V256;
591
592  defm NAME # "32x4Z" : vinsert_for_size<Opcode128,
593                                 X86VectorVTInfo< 4, EltVT32, VR128X>,
594                                 X86VectorVTInfo<16, EltVT32, VR512>,
595                                 vinsert128_insert, sched>, EVEX_V512;
596
597  defm NAME # "64x4Z" : vinsert_for_size<Opcode256,
598                                 X86VectorVTInfo< 4, EltVT64, VR256X>,
599                                 X86VectorVTInfo< 8, EltVT64, VR512>,
600                                 vinsert256_insert, sched>, VEX_W, EVEX_V512;
601
602  // Even with DQI we'd like to only use these instructions for masking.
603  let Predicates = [HasVLX, HasDQI] in
604    defm NAME # "64x2Z256" : vinsert_for_size_split<Opcode128,
605                                   X86VectorVTInfo< 2, EltVT64, VR128X>,
606                                   X86VectorVTInfo< 4, EltVT64, VR256X>,
607                                   null_frag, vinsert128_insert, sched>,
608                                   VEX_W1X, EVEX_V256;
609
610  // Even with DQI we'd like to only use these instructions for masking.
611  let Predicates = [HasDQI] in {
612    defm NAME # "64x2Z" : vinsert_for_size_split<Opcode128,
613                                 X86VectorVTInfo< 2, EltVT64, VR128X>,
614                                 X86VectorVTInfo< 8, EltVT64, VR512>,
615                                 null_frag, vinsert128_insert, sched>,
616                                 VEX_W, EVEX_V512;
617
618    defm NAME # "32x8Z" : vinsert_for_size_split<Opcode256,
619                                   X86VectorVTInfo< 8, EltVT32, VR256X>,
620                                   X86VectorVTInfo<16, EltVT32, VR512>,
621                                   null_frag, vinsert256_insert, sched>,
622                                   EVEX_V512;
623  }
624}
625
626// FIXME: Is there a better scheduler class for VINSERTF/VINSERTI?
627defm VINSERTF : vinsert_for_type<f32, 0x18, f64, 0x1a, WriteFShuffle256>;
628defm VINSERTI : vinsert_for_type<i32, 0x38, i64, 0x3a, WriteShuffle256>;
629
630// Codegen pattern with the alternative types,
631// Even with AVX512DQ we'll still use these for unmasked operations.
632defm : vinsert_for_size_lowering<"VINSERTF32x4Z256", v2f64x_info, v4f64x_info,
633              vinsert128_insert, INSERT_get_vinsert128_imm, [HasVLX]>;
634defm : vinsert_for_size_lowering<"VINSERTI32x4Z256", v2i64x_info, v4i64x_info,
635              vinsert128_insert, INSERT_get_vinsert128_imm, [HasVLX]>;
636
637defm : vinsert_for_size_lowering<"VINSERTF32x4Z", v2f64x_info, v8f64_info,
638              vinsert128_insert, INSERT_get_vinsert128_imm, [HasAVX512]>;
639defm : vinsert_for_size_lowering<"VINSERTI32x4Z", v2i64x_info, v8i64_info,
640              vinsert128_insert, INSERT_get_vinsert128_imm, [HasAVX512]>;
641
642defm : vinsert_for_size_lowering<"VINSERTF64x4Z", v8f32x_info, v16f32_info,
643              vinsert256_insert, INSERT_get_vinsert256_imm, [HasAVX512]>;
644defm : vinsert_for_size_lowering<"VINSERTI64x4Z", v8i32x_info, v16i32_info,
645              vinsert256_insert, INSERT_get_vinsert256_imm, [HasAVX512]>;
646
647// Codegen pattern with the alternative types insert VEC128 into VEC256
648defm : vinsert_for_size_lowering<"VINSERTI32x4Z256", v8i16x_info, v16i16x_info,
649              vinsert128_insert, INSERT_get_vinsert128_imm, [HasVLX]>;
650defm : vinsert_for_size_lowering<"VINSERTI32x4Z256", v16i8x_info, v32i8x_info,
651              vinsert128_insert, INSERT_get_vinsert128_imm, [HasVLX]>;
652// Codegen pattern with the alternative types insert VEC128 into VEC512
653defm : vinsert_for_size_lowering<"VINSERTI32x4Z", v8i16x_info, v32i16_info,
654              vinsert128_insert, INSERT_get_vinsert128_imm, [HasAVX512]>;
655defm : vinsert_for_size_lowering<"VINSERTI32x4Z", v16i8x_info, v64i8_info,
656               vinsert128_insert, INSERT_get_vinsert128_imm, [HasAVX512]>;
657// Codegen pattern with the alternative types insert VEC256 into VEC512
658defm : vinsert_for_size_lowering<"VINSERTI64x4Z", v16i16x_info, v32i16_info,
659              vinsert256_insert, INSERT_get_vinsert256_imm, [HasAVX512]>;
660defm : vinsert_for_size_lowering<"VINSERTI64x4Z", v32i8x_info, v64i8_info,
661              vinsert256_insert, INSERT_get_vinsert256_imm, [HasAVX512]>;
662
663
664multiclass vinsert_for_mask_cast<string InstrStr, X86VectorVTInfo From,
665                                 X86VectorVTInfo To, X86VectorVTInfo Cast,
666                                 PatFrag vinsert_insert,
667                                 SDNodeXForm INSERT_get_vinsert_imm,
668                                 list<Predicate> p> {
669let Predicates = p in {
670  def : Pat<(Cast.VT
671             (vselect_mask Cast.KRCWM:$mask,
672                           (bitconvert
673                            (vinsert_insert:$ins (To.VT To.RC:$src1),
674                                                 (From.VT From.RC:$src2),
675                                                 (iPTR imm))),
676                           Cast.RC:$src0)),
677            (!cast<Instruction>(InstrStr#"rrk")
678             Cast.RC:$src0, Cast.KRCWM:$mask, To.RC:$src1, From.RC:$src2,
679             (INSERT_get_vinsert_imm To.RC:$ins))>;
680  def : Pat<(Cast.VT
681             (vselect_mask Cast.KRCWM:$mask,
682                           (bitconvert
683                            (vinsert_insert:$ins (To.VT To.RC:$src1),
684                                                 (From.VT
685                                                  (bitconvert
686                                                   (From.LdFrag addr:$src2))),
687                                                 (iPTR imm))),
688                           Cast.RC:$src0)),
689            (!cast<Instruction>(InstrStr#"rmk")
690             Cast.RC:$src0, Cast.KRCWM:$mask, To.RC:$src1, addr:$src2,
691             (INSERT_get_vinsert_imm To.RC:$ins))>;
692
693  def : Pat<(Cast.VT
694             (vselect_mask Cast.KRCWM:$mask,
695                           (bitconvert
696                            (vinsert_insert:$ins (To.VT To.RC:$src1),
697                                                 (From.VT From.RC:$src2),
698                                                 (iPTR imm))),
699                           Cast.ImmAllZerosV)),
700            (!cast<Instruction>(InstrStr#"rrkz")
701             Cast.KRCWM:$mask, To.RC:$src1, From.RC:$src2,
702             (INSERT_get_vinsert_imm To.RC:$ins))>;
703  def : Pat<(Cast.VT
704             (vselect_mask Cast.KRCWM:$mask,
705                           (bitconvert
706                            (vinsert_insert:$ins (To.VT To.RC:$src1),
707                                                 (From.VT (From.LdFrag addr:$src2)),
708                                                 (iPTR imm))),
709                           Cast.ImmAllZerosV)),
710            (!cast<Instruction>(InstrStr#"rmkz")
711             Cast.KRCWM:$mask, To.RC:$src1, addr:$src2,
712             (INSERT_get_vinsert_imm To.RC:$ins))>;
713}
714}
715
716defm : vinsert_for_mask_cast<"VINSERTF32x4Z256", v2f64x_info, v4f64x_info,
717                             v8f32x_info, vinsert128_insert,
718                             INSERT_get_vinsert128_imm, [HasVLX]>;
719defm : vinsert_for_mask_cast<"VINSERTF64x2Z256", v4f32x_info, v8f32x_info,
720                             v4f64x_info, vinsert128_insert,
721                             INSERT_get_vinsert128_imm, [HasDQI, HasVLX]>;
722
723defm : vinsert_for_mask_cast<"VINSERTI32x4Z256", v2i64x_info, v4i64x_info,
724                             v8i32x_info, vinsert128_insert,
725                             INSERT_get_vinsert128_imm, [HasVLX]>;
726defm : vinsert_for_mask_cast<"VINSERTI32x4Z256", v8i16x_info, v16i16x_info,
727                             v8i32x_info, vinsert128_insert,
728                             INSERT_get_vinsert128_imm, [HasVLX]>;
729defm : vinsert_for_mask_cast<"VINSERTI32x4Z256", v16i8x_info, v32i8x_info,
730                             v8i32x_info, vinsert128_insert,
731                             INSERT_get_vinsert128_imm, [HasVLX]>;
732defm : vinsert_for_mask_cast<"VINSERTF64x2Z256", v4i32x_info, v8i32x_info,
733                             v4i64x_info, vinsert128_insert,
734                             INSERT_get_vinsert128_imm, [HasDQI, HasVLX]>;
735defm : vinsert_for_mask_cast<"VINSERTF64x2Z256", v8i16x_info, v16i16x_info,
736                             v4i64x_info, vinsert128_insert,
737                             INSERT_get_vinsert128_imm, [HasDQI, HasVLX]>;
738defm : vinsert_for_mask_cast<"VINSERTF64x2Z256", v16i8x_info, v32i8x_info,
739                             v4i64x_info, vinsert128_insert,
740                             INSERT_get_vinsert128_imm, [HasDQI, HasVLX]>;
741
742defm : vinsert_for_mask_cast<"VINSERTF32x4Z", v2f64x_info, v8f64_info,
743                             v16f32_info, vinsert128_insert,
744                             INSERT_get_vinsert128_imm, [HasAVX512]>;
745defm : vinsert_for_mask_cast<"VINSERTF64x2Z", v4f32x_info, v16f32_info,
746                             v8f64_info, vinsert128_insert,
747                             INSERT_get_vinsert128_imm, [HasDQI]>;
748
749defm : vinsert_for_mask_cast<"VINSERTI32x4Z", v2i64x_info, v8i64_info,
750                             v16i32_info, vinsert128_insert,
751                             INSERT_get_vinsert128_imm, [HasAVX512]>;
752defm : vinsert_for_mask_cast<"VINSERTI32x4Z", v8i16x_info, v32i16_info,
753                             v16i32_info, vinsert128_insert,
754                             INSERT_get_vinsert128_imm, [HasAVX512]>;
755defm : vinsert_for_mask_cast<"VINSERTI32x4Z", v16i8x_info, v64i8_info,
756                             v16i32_info, vinsert128_insert,
757                             INSERT_get_vinsert128_imm, [HasAVX512]>;
758defm : vinsert_for_mask_cast<"VINSERTI64x2Z", v4i32x_info, v16i32_info,
759                             v8i64_info, vinsert128_insert,
760                             INSERT_get_vinsert128_imm, [HasDQI]>;
761defm : vinsert_for_mask_cast<"VINSERTI64x2Z", v8i16x_info, v32i16_info,
762                             v8i64_info, vinsert128_insert,
763                             INSERT_get_vinsert128_imm, [HasDQI]>;
764defm : vinsert_for_mask_cast<"VINSERTI64x2Z", v16i8x_info, v64i8_info,
765                             v8i64_info, vinsert128_insert,
766                             INSERT_get_vinsert128_imm, [HasDQI]>;
767
768defm : vinsert_for_mask_cast<"VINSERTF32x8Z", v4f64x_info, v8f64_info,
769                             v16f32_info, vinsert256_insert,
770                             INSERT_get_vinsert256_imm, [HasDQI]>;
771defm : vinsert_for_mask_cast<"VINSERTF64x4Z", v8f32x_info, v16f32_info,
772                             v8f64_info, vinsert256_insert,
773                             INSERT_get_vinsert256_imm, [HasAVX512]>;
774
775defm : vinsert_for_mask_cast<"VINSERTI32x8Z", v4i64x_info, v8i64_info,
776                             v16i32_info, vinsert256_insert,
777                             INSERT_get_vinsert256_imm, [HasDQI]>;
778defm : vinsert_for_mask_cast<"VINSERTI32x8Z", v16i16x_info, v32i16_info,
779                             v16i32_info, vinsert256_insert,
780                             INSERT_get_vinsert256_imm, [HasDQI]>;
781defm : vinsert_for_mask_cast<"VINSERTI32x8Z", v32i8x_info, v64i8_info,
782                             v16i32_info, vinsert256_insert,
783                             INSERT_get_vinsert256_imm, [HasDQI]>;
784defm : vinsert_for_mask_cast<"VINSERTI64x4Z", v8i32x_info, v16i32_info,
785                             v8i64_info, vinsert256_insert,
786                             INSERT_get_vinsert256_imm, [HasAVX512]>;
787defm : vinsert_for_mask_cast<"VINSERTI64x4Z", v16i16x_info, v32i16_info,
788                             v8i64_info, vinsert256_insert,
789                             INSERT_get_vinsert256_imm, [HasAVX512]>;
790defm : vinsert_for_mask_cast<"VINSERTI64x4Z", v32i8x_info, v64i8_info,
791                             v8i64_info, vinsert256_insert,
792                             INSERT_get_vinsert256_imm, [HasAVX512]>;
793
794// vinsertps - insert f32 to XMM
795let ExeDomain = SSEPackedSingle in {
796let isCommutable = 1 in
797def VINSERTPSZrr : AVX512AIi8<0x21, MRMSrcReg, (outs VR128X:$dst),
798      (ins VR128X:$src1, VR128X:$src2, u8imm:$src3),
799      "vinsertps\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}",
800      [(set VR128X:$dst, (X86insertps VR128X:$src1, VR128X:$src2, timm:$src3))]>,
801      EVEX_4V, Sched<[SchedWriteFShuffle.XMM]>;
802def VINSERTPSZrm: AVX512AIi8<0x21, MRMSrcMem, (outs VR128X:$dst),
803      (ins VR128X:$src1, f32mem:$src2, u8imm:$src3),
804      "vinsertps\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}",
805      [(set VR128X:$dst, (X86insertps VR128X:$src1,
806                          (v4f32 (scalar_to_vector (loadf32 addr:$src2))),
807                          timm:$src3))]>,
808      EVEX_4V, EVEX_CD8<32, CD8VT1>,
809      Sched<[SchedWriteFShuffle.XMM.Folded, SchedWriteFShuffle.XMM.ReadAfterFold]>;
810}
811
812//===----------------------------------------------------------------------===//
813// AVX-512 VECTOR EXTRACT
814//---
815
816// Supports two different pattern operators for mask and unmasked ops. Allows
817// null_frag to be passed for one.
818multiclass vextract_for_size_split<int Opcode,
819                                   X86VectorVTInfo From, X86VectorVTInfo To,
820                                   SDPatternOperator vextract_extract,
821                                   SDPatternOperator vextract_for_mask,
822                                   SchedWrite SchedRR, SchedWrite SchedMR> {
823
824  let hasSideEffects = 0, ExeDomain = To.ExeDomain in {
825    defm rr : AVX512_maskable_split<Opcode, MRMDestReg, To, (outs To.RC:$dst),
826                (ins From.RC:$src1, u8imm:$idx),
827                "vextract" # To.EltTypeName # "x" # To.NumElts,
828                "$idx, $src1", "$src1, $idx",
829                (vextract_extract:$idx (From.VT From.RC:$src1), (iPTR imm)),
830                (vextract_for_mask:$idx (From.VT From.RC:$src1), (iPTR imm))>,
831                AVX512AIi8Base, EVEX, Sched<[SchedRR]>;
832
833    def mr  : AVX512AIi8<Opcode, MRMDestMem, (outs),
834                    (ins To.MemOp:$dst, From.RC:$src1, u8imm:$idx),
835                    "vextract" # To.EltTypeName # "x" # To.NumElts #
836                        "\t{$idx, $src1, $dst|$dst, $src1, $idx}",
837                    [(store (To.VT (vextract_extract:$idx
838                                    (From.VT From.RC:$src1), (iPTR imm))),
839                             addr:$dst)]>, EVEX,
840                    Sched<[SchedMR]>;
841
842    let mayStore = 1, hasSideEffects = 0 in
843    def mrk : AVX512AIi8<Opcode, MRMDestMem, (outs),
844                    (ins To.MemOp:$dst, To.KRCWM:$mask,
845                                        From.RC:$src1, u8imm:$idx),
846                     "vextract" # To.EltTypeName # "x" # To.NumElts #
847                          "\t{$idx, $src1, $dst {${mask}}|"
848                          "$dst {${mask}}, $src1, $idx}", []>,
849                    EVEX_K, EVEX, Sched<[SchedMR]>, NotMemoryFoldable;
850  }
851}
852
853// Passes the same pattern operator for masked and unmasked ops.
854multiclass vextract_for_size<int Opcode, X86VectorVTInfo From,
855                             X86VectorVTInfo To,
856                             SDPatternOperator vextract_extract,
857                             SchedWrite SchedRR, SchedWrite SchedMR> :
858  vextract_for_size_split<Opcode, From, To, vextract_extract, vextract_extract, SchedRR, SchedMR>;
859
860// Codegen pattern for the alternative types
861multiclass vextract_for_size_lowering<string InstrStr, X86VectorVTInfo From,
862                X86VectorVTInfo To, PatFrag vextract_extract,
863                SDNodeXForm EXTRACT_get_vextract_imm, list<Predicate> p> {
864  let Predicates = p in {
865     def : Pat<(vextract_extract:$ext (From.VT From.RC:$src1), (iPTR imm)),
866               (To.VT (!cast<Instruction>(InstrStr#"rr")
867                          From.RC:$src1,
868                          (EXTRACT_get_vextract_imm To.RC:$ext)))>;
869     def : Pat<(store (To.VT (vextract_extract:$ext (From.VT From.RC:$src1),
870                              (iPTR imm))), addr:$dst),
871               (!cast<Instruction>(InstrStr#"mr") addr:$dst, From.RC:$src1,
872                (EXTRACT_get_vextract_imm To.RC:$ext))>;
873  }
874}
875
876multiclass vextract_for_type<ValueType EltVT32, int Opcode128,
877                             ValueType EltVT64, int Opcode256,
878                             SchedWrite SchedRR, SchedWrite SchedMR> {
879  let Predicates = [HasAVX512] in {
880    defm NAME # "32x4Z" : vextract_for_size<Opcode128,
881                                   X86VectorVTInfo<16, EltVT32, VR512>,
882                                   X86VectorVTInfo< 4, EltVT32, VR128X>,
883                                   vextract128_extract, SchedRR, SchedMR>,
884                                       EVEX_V512, EVEX_CD8<32, CD8VT4>;
885    defm NAME # "64x4Z" : vextract_for_size<Opcode256,
886                                   X86VectorVTInfo< 8, EltVT64, VR512>,
887                                   X86VectorVTInfo< 4, EltVT64, VR256X>,
888                                   vextract256_extract, SchedRR, SchedMR>,
889                                       VEX_W, EVEX_V512, EVEX_CD8<64, CD8VT4>;
890  }
891  let Predicates = [HasVLX] in
892    defm NAME # "32x4Z256" : vextract_for_size<Opcode128,
893                                 X86VectorVTInfo< 8, EltVT32, VR256X>,
894                                 X86VectorVTInfo< 4, EltVT32, VR128X>,
895                                 vextract128_extract, SchedRR, SchedMR>,
896                                     EVEX_V256, EVEX_CD8<32, CD8VT4>;
897
898  // Even with DQI we'd like to only use these instructions for masking.
899  let Predicates = [HasVLX, HasDQI] in
900    defm NAME # "64x2Z256" : vextract_for_size_split<Opcode128,
901                                 X86VectorVTInfo< 4, EltVT64, VR256X>,
902                                 X86VectorVTInfo< 2, EltVT64, VR128X>,
903                                 null_frag, vextract128_extract, SchedRR, SchedMR>,
904                                     VEX_W1X, EVEX_V256, EVEX_CD8<64, CD8VT2>;
905
906  // Even with DQI we'd like to only use these instructions for masking.
907  let Predicates = [HasDQI] in {
908    defm NAME # "64x2Z" : vextract_for_size_split<Opcode128,
909                                 X86VectorVTInfo< 8, EltVT64, VR512>,
910                                 X86VectorVTInfo< 2, EltVT64, VR128X>,
911                                 null_frag, vextract128_extract, SchedRR, SchedMR>,
912                                     VEX_W, EVEX_V512, EVEX_CD8<64, CD8VT2>;
913    defm NAME # "32x8Z" : vextract_for_size_split<Opcode256,
914                                 X86VectorVTInfo<16, EltVT32, VR512>,
915                                 X86VectorVTInfo< 8, EltVT32, VR256X>,
916                                 null_frag, vextract256_extract, SchedRR, SchedMR>,
917                                     EVEX_V512, EVEX_CD8<32, CD8VT8>;
918  }
919}
920
921// TODO - replace WriteFStore/WriteVecStore with X86SchedWriteMoveLSWidths types.
922defm VEXTRACTF : vextract_for_type<f32, 0x19, f64, 0x1b, WriteFShuffle256, WriteFStore>;
923defm VEXTRACTI : vextract_for_type<i32, 0x39, i64, 0x3b, WriteShuffle256, WriteVecStore>;
924
925// extract_subvector codegen patterns with the alternative types.
926// Even with AVX512DQ we'll still use these for unmasked operations.
927defm : vextract_for_size_lowering<"VEXTRACTF32x4Z", v8f64_info, v2f64x_info,
928          vextract128_extract, EXTRACT_get_vextract128_imm, [HasAVX512]>;
929defm : vextract_for_size_lowering<"VEXTRACTI32x4Z", v8i64_info, v2i64x_info,
930          vextract128_extract, EXTRACT_get_vextract128_imm, [HasAVX512]>;
931
932defm : vextract_for_size_lowering<"VEXTRACTF64x4Z", v16f32_info, v8f32x_info,
933          vextract256_extract, EXTRACT_get_vextract256_imm, [HasAVX512]>;
934defm : vextract_for_size_lowering<"VEXTRACTI64x4Z", v16i32_info, v8i32x_info,
935          vextract256_extract, EXTRACT_get_vextract256_imm, [HasAVX512]>;
936
937defm : vextract_for_size_lowering<"VEXTRACTF32x4Z256", v4f64x_info, v2f64x_info,
938          vextract128_extract, EXTRACT_get_vextract128_imm, [HasVLX]>;
939defm : vextract_for_size_lowering<"VEXTRACTI32x4Z256", v4i64x_info, v2i64x_info,
940          vextract128_extract, EXTRACT_get_vextract128_imm, [HasVLX]>;
941
942// Codegen pattern with the alternative types extract VEC128 from VEC256
943defm : vextract_for_size_lowering<"VEXTRACTI32x4Z256", v16i16x_info, v8i16x_info,
944          vextract128_extract, EXTRACT_get_vextract128_imm, [HasVLX]>;
945defm : vextract_for_size_lowering<"VEXTRACTI32x4Z256", v32i8x_info, v16i8x_info,
946          vextract128_extract, EXTRACT_get_vextract128_imm, [HasVLX]>;
947
948// Codegen pattern with the alternative types extract VEC128 from VEC512
949defm : vextract_for_size_lowering<"VEXTRACTI32x4Z", v32i16_info, v8i16x_info,
950                 vextract128_extract, EXTRACT_get_vextract128_imm, [HasAVX512]>;
951defm : vextract_for_size_lowering<"VEXTRACTI32x4Z", v64i8_info, v16i8x_info,
952                 vextract128_extract, EXTRACT_get_vextract128_imm, [HasAVX512]>;
953// Codegen pattern with the alternative types extract VEC256 from VEC512
954defm : vextract_for_size_lowering<"VEXTRACTI64x4Z", v32i16_info, v16i16x_info,
955                 vextract256_extract, EXTRACT_get_vextract256_imm, [HasAVX512]>;
956defm : vextract_for_size_lowering<"VEXTRACTI64x4Z", v64i8_info, v32i8x_info,
957                 vextract256_extract, EXTRACT_get_vextract256_imm, [HasAVX512]>;
958
959
960// A 128-bit extract from bits [255:128] of a 512-bit vector should use a
961// smaller extract to enable EVEX->VEX.
962let Predicates = [NoVLX] in {
963def : Pat<(v2i64 (extract_subvector (v8i64 VR512:$src), (iPTR 2))),
964          (v2i64 (VEXTRACTI128rr
965                  (v4i64 (EXTRACT_SUBREG (v8i64 VR512:$src), sub_ymm)),
966                  (iPTR 1)))>;
967def : Pat<(v2f64 (extract_subvector (v8f64 VR512:$src), (iPTR 2))),
968          (v2f64 (VEXTRACTF128rr
969                  (v4f64 (EXTRACT_SUBREG (v8f64 VR512:$src), sub_ymm)),
970                  (iPTR 1)))>;
971def : Pat<(v4i32 (extract_subvector (v16i32 VR512:$src), (iPTR 4))),
972          (v4i32 (VEXTRACTI128rr
973                  (v8i32 (EXTRACT_SUBREG (v16i32 VR512:$src), sub_ymm)),
974                  (iPTR 1)))>;
975def : Pat<(v4f32 (extract_subvector (v16f32 VR512:$src), (iPTR 4))),
976          (v4f32 (VEXTRACTF128rr
977                  (v8f32 (EXTRACT_SUBREG (v16f32 VR512:$src), sub_ymm)),
978                  (iPTR 1)))>;
979def : Pat<(v8i16 (extract_subvector (v32i16 VR512:$src), (iPTR 8))),
980          (v8i16 (VEXTRACTI128rr
981                  (v16i16 (EXTRACT_SUBREG (v32i16 VR512:$src), sub_ymm)),
982                  (iPTR 1)))>;
983def : Pat<(v16i8 (extract_subvector (v64i8 VR512:$src), (iPTR 16))),
984          (v16i8 (VEXTRACTI128rr
985                  (v32i8 (EXTRACT_SUBREG (v64i8 VR512:$src), sub_ymm)),
986                  (iPTR 1)))>;
987}
988
989// A 128-bit extract from bits [255:128] of a 512-bit vector should use a
990// smaller extract to enable EVEX->VEX.
991let Predicates = [HasVLX] in {
992def : Pat<(v2i64 (extract_subvector (v8i64 VR512:$src), (iPTR 2))),
993          (v2i64 (VEXTRACTI32x4Z256rr
994                  (v4i64 (EXTRACT_SUBREG (v8i64 VR512:$src), sub_ymm)),
995                  (iPTR 1)))>;
996def : Pat<(v2f64 (extract_subvector (v8f64 VR512:$src), (iPTR 2))),
997          (v2f64 (VEXTRACTF32x4Z256rr
998                  (v4f64 (EXTRACT_SUBREG (v8f64 VR512:$src), sub_ymm)),
999                  (iPTR 1)))>;
1000def : Pat<(v4i32 (extract_subvector (v16i32 VR512:$src), (iPTR 4))),
1001          (v4i32 (VEXTRACTI32x4Z256rr
1002                  (v8i32 (EXTRACT_SUBREG (v16i32 VR512:$src), sub_ymm)),
1003                  (iPTR 1)))>;
1004def : Pat<(v4f32 (extract_subvector (v16f32 VR512:$src), (iPTR 4))),
1005          (v4f32 (VEXTRACTF32x4Z256rr
1006                  (v8f32 (EXTRACT_SUBREG (v16f32 VR512:$src), sub_ymm)),
1007                  (iPTR 1)))>;
1008def : Pat<(v8i16 (extract_subvector (v32i16 VR512:$src), (iPTR 8))),
1009          (v8i16 (VEXTRACTI32x4Z256rr
1010                  (v16i16 (EXTRACT_SUBREG (v32i16 VR512:$src), sub_ymm)),
1011                  (iPTR 1)))>;
1012def : Pat<(v16i8 (extract_subvector (v64i8 VR512:$src), (iPTR 16))),
1013          (v16i8 (VEXTRACTI32x4Z256rr
1014                  (v32i8 (EXTRACT_SUBREG (v64i8 VR512:$src), sub_ymm)),
1015                  (iPTR 1)))>;
1016}
1017
1018
1019// Additional patterns for handling a bitcast between the vselect and the
1020// extract_subvector.
1021multiclass vextract_for_mask_cast<string InstrStr, X86VectorVTInfo From,
1022                                  X86VectorVTInfo To, X86VectorVTInfo Cast,
1023                                  PatFrag vextract_extract,
1024                                  SDNodeXForm EXTRACT_get_vextract_imm,
1025                                  list<Predicate> p> {
1026let Predicates = p in {
1027  def : Pat<(Cast.VT (vselect_mask Cast.KRCWM:$mask,
1028                                   (bitconvert
1029                                    (To.VT (vextract_extract:$ext
1030                                            (From.VT From.RC:$src), (iPTR imm)))),
1031                                   To.RC:$src0)),
1032            (Cast.VT (!cast<Instruction>(InstrStr#"rrk")
1033                      Cast.RC:$src0, Cast.KRCWM:$mask, From.RC:$src,
1034                      (EXTRACT_get_vextract_imm To.RC:$ext)))>;
1035
1036  def : Pat<(Cast.VT (vselect_mask Cast.KRCWM:$mask,
1037                                   (bitconvert
1038                                    (To.VT (vextract_extract:$ext
1039                                            (From.VT From.RC:$src), (iPTR imm)))),
1040                                   Cast.ImmAllZerosV)),
1041            (Cast.VT (!cast<Instruction>(InstrStr#"rrkz")
1042                      Cast.KRCWM:$mask, From.RC:$src,
1043                      (EXTRACT_get_vextract_imm To.RC:$ext)))>;
1044}
1045}
1046
1047defm : vextract_for_mask_cast<"VEXTRACTF32x4Z256", v4f64x_info, v2f64x_info,
1048                              v4f32x_info, vextract128_extract,
1049                              EXTRACT_get_vextract128_imm, [HasVLX]>;
1050defm : vextract_for_mask_cast<"VEXTRACTF64x2Z256", v8f32x_info, v4f32x_info,
1051                              v2f64x_info, vextract128_extract,
1052                              EXTRACT_get_vextract128_imm, [HasDQI, HasVLX]>;
1053
1054defm : vextract_for_mask_cast<"VEXTRACTI32x4Z256", v4i64x_info, v2i64x_info,
1055                              v4i32x_info, vextract128_extract,
1056                              EXTRACT_get_vextract128_imm, [HasVLX]>;
1057defm : vextract_for_mask_cast<"VEXTRACTI32x4Z256", v16i16x_info, v8i16x_info,
1058                              v4i32x_info, vextract128_extract,
1059                              EXTRACT_get_vextract128_imm, [HasVLX]>;
1060defm : vextract_for_mask_cast<"VEXTRACTI32x4Z256", v32i8x_info, v16i8x_info,
1061                              v4i32x_info, vextract128_extract,
1062                              EXTRACT_get_vextract128_imm, [HasVLX]>;
1063defm : vextract_for_mask_cast<"VEXTRACTI64x2Z256", v8i32x_info, v4i32x_info,
1064                              v2i64x_info, vextract128_extract,
1065                              EXTRACT_get_vextract128_imm, [HasDQI, HasVLX]>;
1066defm : vextract_for_mask_cast<"VEXTRACTI64x2Z256", v16i16x_info, v8i16x_info,
1067                              v2i64x_info, vextract128_extract,
1068                              EXTRACT_get_vextract128_imm, [HasDQI, HasVLX]>;
1069defm : vextract_for_mask_cast<"VEXTRACTI64x2Z256", v32i8x_info, v16i8x_info,
1070                              v2i64x_info, vextract128_extract,
1071                              EXTRACT_get_vextract128_imm, [HasDQI, HasVLX]>;
1072
1073defm : vextract_for_mask_cast<"VEXTRACTF32x4Z", v8f64_info, v2f64x_info,
1074                              v4f32x_info, vextract128_extract,
1075                              EXTRACT_get_vextract128_imm, [HasAVX512]>;
1076defm : vextract_for_mask_cast<"VEXTRACTF64x2Z", v16f32_info, v4f32x_info,
1077                              v2f64x_info, vextract128_extract,
1078                              EXTRACT_get_vextract128_imm, [HasDQI]>;
1079
1080defm : vextract_for_mask_cast<"VEXTRACTI32x4Z", v8i64_info, v2i64x_info,
1081                              v4i32x_info, vextract128_extract,
1082                              EXTRACT_get_vextract128_imm, [HasAVX512]>;
1083defm : vextract_for_mask_cast<"VEXTRACTI32x4Z", v32i16_info, v8i16x_info,
1084                              v4i32x_info, vextract128_extract,
1085                              EXTRACT_get_vextract128_imm, [HasAVX512]>;
1086defm : vextract_for_mask_cast<"VEXTRACTI32x4Z", v64i8_info, v16i8x_info,
1087                              v4i32x_info, vextract128_extract,
1088                              EXTRACT_get_vextract128_imm, [HasAVX512]>;
1089defm : vextract_for_mask_cast<"VEXTRACTI64x2Z", v16i32_info, v4i32x_info,
1090                              v2i64x_info, vextract128_extract,
1091                              EXTRACT_get_vextract128_imm, [HasDQI]>;
1092defm : vextract_for_mask_cast<"VEXTRACTI64x2Z", v32i16_info, v8i16x_info,
1093                              v2i64x_info, vextract128_extract,
1094                              EXTRACT_get_vextract128_imm, [HasDQI]>;
1095defm : vextract_for_mask_cast<"VEXTRACTI64x2Z", v64i8_info, v16i8x_info,
1096                              v2i64x_info, vextract128_extract,
1097                              EXTRACT_get_vextract128_imm, [HasDQI]>;
1098
1099defm : vextract_for_mask_cast<"VEXTRACTF32x8Z", v8f64_info, v4f64x_info,
1100                              v8f32x_info, vextract256_extract,
1101                              EXTRACT_get_vextract256_imm, [HasDQI]>;
1102defm : vextract_for_mask_cast<"VEXTRACTF64x4Z", v16f32_info, v8f32x_info,
1103                              v4f64x_info, vextract256_extract,
1104                              EXTRACT_get_vextract256_imm, [HasAVX512]>;
1105
1106defm : vextract_for_mask_cast<"VEXTRACTI32x8Z", v8i64_info, v4i64x_info,
1107                              v8i32x_info, vextract256_extract,
1108                              EXTRACT_get_vextract256_imm, [HasDQI]>;
1109defm : vextract_for_mask_cast<"VEXTRACTI32x8Z", v32i16_info, v16i16x_info,
1110                              v8i32x_info, vextract256_extract,
1111                              EXTRACT_get_vextract256_imm, [HasDQI]>;
1112defm : vextract_for_mask_cast<"VEXTRACTI32x8Z", v64i8_info, v32i8x_info,
1113                              v8i32x_info, vextract256_extract,
1114                              EXTRACT_get_vextract256_imm, [HasDQI]>;
1115defm : vextract_for_mask_cast<"VEXTRACTI64x4Z", v16i32_info, v8i32x_info,
1116                              v4i64x_info, vextract256_extract,
1117                              EXTRACT_get_vextract256_imm, [HasAVX512]>;
1118defm : vextract_for_mask_cast<"VEXTRACTI64x4Z", v32i16_info, v16i16x_info,
1119                              v4i64x_info, vextract256_extract,
1120                              EXTRACT_get_vextract256_imm, [HasAVX512]>;
1121defm : vextract_for_mask_cast<"VEXTRACTI64x4Z", v64i8_info, v32i8x_info,
1122                              v4i64x_info, vextract256_extract,
1123                              EXTRACT_get_vextract256_imm, [HasAVX512]>;
1124
1125// vextractps - extract 32 bits from XMM
1126def VEXTRACTPSZrr : AVX512AIi8<0x17, MRMDestReg, (outs GR32:$dst),
1127      (ins VR128X:$src1, u8imm:$src2),
1128      "vextractps\t{$src2, $src1, $dst|$dst, $src1, $src2}",
1129      [(set GR32:$dst, (extractelt (bc_v4i32 (v4f32 VR128X:$src1)), imm:$src2))]>,
1130      EVEX, VEX_WIG, Sched<[WriteVecExtract]>;
1131
1132def VEXTRACTPSZmr : AVX512AIi8<0x17, MRMDestMem, (outs),
1133      (ins f32mem:$dst, VR128X:$src1, u8imm:$src2),
1134      "vextractps\t{$src2, $src1, $dst|$dst, $src1, $src2}",
1135      [(store (extractelt (bc_v4i32 (v4f32 VR128X:$src1)), imm:$src2),
1136                          addr:$dst)]>,
1137      EVEX, VEX_WIG, EVEX_CD8<32, CD8VT1>, Sched<[WriteVecExtractSt]>;
1138
1139//===---------------------------------------------------------------------===//
1140// AVX-512 BROADCAST
1141//---
1142// broadcast with a scalar argument.
1143multiclass avx512_broadcast_scalar<bits<8> opc, string OpcodeStr,
1144                            string Name,
1145                            X86VectorVTInfo DestInfo, X86VectorVTInfo SrcInfo> {
1146  def : Pat<(DestInfo.VT (X86VBroadcast SrcInfo.FRC:$src)),
1147            (!cast<Instruction>(Name#DestInfo.ZSuffix#rr)
1148             (SrcInfo.VT (COPY_TO_REGCLASS SrcInfo.FRC:$src, SrcInfo.RC)))>;
1149  def : Pat<(DestInfo.VT (vselect_mask DestInfo.KRCWM:$mask,
1150                                       (X86VBroadcast SrcInfo.FRC:$src),
1151                                       DestInfo.RC:$src0)),
1152            (!cast<Instruction>(Name#DestInfo.ZSuffix#rrk)
1153             DestInfo.RC:$src0, DestInfo.KRCWM:$mask,
1154             (SrcInfo.VT (COPY_TO_REGCLASS SrcInfo.FRC:$src, SrcInfo.RC)))>;
1155  def : Pat<(DestInfo.VT (vselect_mask DestInfo.KRCWM:$mask,
1156                                       (X86VBroadcast SrcInfo.FRC:$src),
1157                                       DestInfo.ImmAllZerosV)),
1158            (!cast<Instruction>(Name#DestInfo.ZSuffix#rrkz)
1159             DestInfo.KRCWM:$mask, (SrcInfo.VT (COPY_TO_REGCLASS SrcInfo.FRC:$src, SrcInfo.RC)))>;
1160}
1161
1162// Split version to allow mask and broadcast node to be different types. This
1163// helps support the 32x2 broadcasts.
1164multiclass avx512_broadcast_rm_split<bits<8> opc, string OpcodeStr,
1165                                     string Name,
1166                                     SchedWrite SchedRR, SchedWrite SchedRM,
1167                                     X86VectorVTInfo MaskInfo,
1168                                     X86VectorVTInfo DestInfo,
1169                                     X86VectorVTInfo SrcInfo,
1170                                     bit IsConvertibleToThreeAddress,
1171                                     SDPatternOperator UnmaskedOp = X86VBroadcast,
1172                                     SDPatternOperator UnmaskedBcastOp = SrcInfo.BroadcastLdFrag> {
1173  let hasSideEffects = 0 in
1174  def rr : AVX512PI<opc, MRMSrcReg, (outs MaskInfo.RC:$dst), (ins SrcInfo.RC:$src),
1175                    !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
1176                    [(set MaskInfo.RC:$dst,
1177                      (MaskInfo.VT
1178                       (bitconvert
1179                        (DestInfo.VT
1180                         (UnmaskedOp (SrcInfo.VT SrcInfo.RC:$src))))))],
1181                    DestInfo.ExeDomain>, T8PD, EVEX, Sched<[SchedRR]>;
1182  def rrkz : AVX512PI<opc, MRMSrcReg, (outs MaskInfo.RC:$dst),
1183                      (ins MaskInfo.KRCWM:$mask, SrcInfo.RC:$src),
1184                      !strconcat(OpcodeStr, "\t{$src, ${dst} {${mask}} {z}|",
1185                       "${dst} {${mask}} {z}, $src}"),
1186                       [(set MaskInfo.RC:$dst,
1187                         (vselect_mask MaskInfo.KRCWM:$mask,
1188                          (MaskInfo.VT
1189                           (bitconvert
1190                            (DestInfo.VT
1191                             (X86VBroadcast (SrcInfo.VT SrcInfo.RC:$src))))),
1192                          MaskInfo.ImmAllZerosV))],
1193                       DestInfo.ExeDomain>, T8PD, EVEX, EVEX_KZ, Sched<[SchedRR]>;
1194  let Constraints = "$src0 = $dst" in
1195  def rrk : AVX512PI<opc, MRMSrcReg, (outs MaskInfo.RC:$dst),
1196                     (ins MaskInfo.RC:$src0, MaskInfo.KRCWM:$mask,
1197                          SrcInfo.RC:$src),
1198                     !strconcat(OpcodeStr, "\t{$src, ${dst} {${mask}}|",
1199                     "${dst} {${mask}}, $src}"),
1200                     [(set MaskInfo.RC:$dst,
1201                       (vselect_mask MaskInfo.KRCWM:$mask,
1202                        (MaskInfo.VT
1203                         (bitconvert
1204                          (DestInfo.VT
1205                           (X86VBroadcast (SrcInfo.VT SrcInfo.RC:$src))))),
1206                        MaskInfo.RC:$src0))],
1207                      DestInfo.ExeDomain>, T8PD, EVEX, EVEX_K, Sched<[SchedRR]>;
1208
1209  let hasSideEffects = 0, mayLoad = 1 in
1210  def rm : AVX512PI<opc, MRMSrcMem, (outs MaskInfo.RC:$dst),
1211                    (ins SrcInfo.ScalarMemOp:$src),
1212                    !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
1213                    [(set MaskInfo.RC:$dst,
1214                      (MaskInfo.VT
1215                       (bitconvert
1216                        (DestInfo.VT
1217                         (UnmaskedBcastOp addr:$src)))))],
1218                    DestInfo.ExeDomain>, T8PD, EVEX,
1219                    EVEX_CD8<SrcInfo.EltSize, CD8VT1>, Sched<[SchedRM]>;
1220
1221  def rmkz : AVX512PI<opc, MRMSrcMem, (outs MaskInfo.RC:$dst),
1222                      (ins MaskInfo.KRCWM:$mask, SrcInfo.ScalarMemOp:$src),
1223                      !strconcat(OpcodeStr, "\t{$src, ${dst} {${mask}} {z}|",
1224                       "${dst} {${mask}} {z}, $src}"),
1225                       [(set MaskInfo.RC:$dst,
1226                         (vselect_mask MaskInfo.KRCWM:$mask,
1227                          (MaskInfo.VT
1228                           (bitconvert
1229                            (DestInfo.VT
1230                             (SrcInfo.BroadcastLdFrag addr:$src)))),
1231                          MaskInfo.ImmAllZerosV))],
1232                       DestInfo.ExeDomain>, T8PD, EVEX, EVEX_KZ,
1233                       EVEX_CD8<SrcInfo.EltSize, CD8VT1>, Sched<[SchedRM]>;
1234
1235  let Constraints = "$src0 = $dst",
1236      isConvertibleToThreeAddress = IsConvertibleToThreeAddress in
1237  def rmk : AVX512PI<opc, MRMSrcMem, (outs MaskInfo.RC:$dst),
1238                     (ins MaskInfo.RC:$src0, MaskInfo.KRCWM:$mask,
1239                          SrcInfo.ScalarMemOp:$src),
1240                     !strconcat(OpcodeStr, "\t{$src, ${dst} {${mask}}|",
1241                     "${dst} {${mask}}, $src}"),
1242                     [(set MaskInfo.RC:$dst,
1243                       (vselect_mask MaskInfo.KRCWM:$mask,
1244                        (MaskInfo.VT
1245                         (bitconvert
1246                          (DestInfo.VT
1247                           (SrcInfo.BroadcastLdFrag addr:$src)))),
1248                        MaskInfo.RC:$src0))],
1249                      DestInfo.ExeDomain>, T8PD, EVEX, EVEX_K,
1250                      EVEX_CD8<SrcInfo.EltSize, CD8VT1>, Sched<[SchedRM]>;
1251}
1252
1253// Helper class to force mask and broadcast result to same type.
1254multiclass avx512_broadcast_rm<bits<8> opc, string OpcodeStr, string Name,
1255                               SchedWrite SchedRR, SchedWrite SchedRM,
1256                               X86VectorVTInfo DestInfo,
1257                               X86VectorVTInfo SrcInfo,
1258                               bit IsConvertibleToThreeAddress> :
1259  avx512_broadcast_rm_split<opc, OpcodeStr, Name, SchedRR, SchedRM,
1260                            DestInfo, DestInfo, SrcInfo,
1261                            IsConvertibleToThreeAddress>;
1262
1263multiclass avx512_fp_broadcast_sd<bits<8> opc, string OpcodeStr,
1264                                                       AVX512VLVectorVTInfo _> {
1265  let Predicates = [HasAVX512] in {
1266    defm Z  : avx512_broadcast_rm<opc, OpcodeStr, NAME, WriteFShuffle256,
1267                                  WriteFShuffle256Ld, _.info512, _.info128, 1>,
1268              avx512_broadcast_scalar<opc, OpcodeStr, NAME, _.info512,
1269                                      _.info128>,
1270              EVEX_V512;
1271  }
1272
1273  let Predicates = [HasVLX] in {
1274    defm Z256  : avx512_broadcast_rm<opc, OpcodeStr, NAME, WriteFShuffle256,
1275                                     WriteFShuffle256Ld, _.info256, _.info128, 1>,
1276                 avx512_broadcast_scalar<opc, OpcodeStr, NAME, _.info256,
1277                                         _.info128>,
1278                 EVEX_V256;
1279  }
1280}
1281
1282multiclass avx512_fp_broadcast_ss<bits<8> opc, string OpcodeStr,
1283                                                       AVX512VLVectorVTInfo _> {
1284  let Predicates = [HasAVX512] in {
1285    defm Z  : avx512_broadcast_rm<opc, OpcodeStr, NAME, WriteFShuffle256,
1286                                  WriteFShuffle256Ld, _.info512, _.info128, 1>,
1287              avx512_broadcast_scalar<opc, OpcodeStr, NAME, _.info512,
1288                                      _.info128>,
1289              EVEX_V512;
1290  }
1291
1292  let Predicates = [HasVLX] in {
1293    defm Z256  : avx512_broadcast_rm<opc, OpcodeStr, NAME, WriteFShuffle256,
1294                                     WriteFShuffle256Ld, _.info256, _.info128, 1>,
1295                 avx512_broadcast_scalar<opc, OpcodeStr, NAME, _.info256,
1296                                         _.info128>,
1297                 EVEX_V256;
1298    defm Z128  : avx512_broadcast_rm<opc, OpcodeStr, NAME, WriteFShuffle256,
1299                                     WriteFShuffle256Ld, _.info128, _.info128, 1>,
1300                 avx512_broadcast_scalar<opc, OpcodeStr, NAME, _.info128,
1301                                         _.info128>,
1302                 EVEX_V128;
1303  }
1304}
1305defm VBROADCASTSS  : avx512_fp_broadcast_ss<0x18, "vbroadcastss",
1306                                       avx512vl_f32_info>;
1307defm VBROADCASTSD  : avx512_fp_broadcast_sd<0x19, "vbroadcastsd",
1308                                       avx512vl_f64_info>, VEX_W1X;
1309
1310multiclass avx512_int_broadcast_reg<bits<8> opc, SchedWrite SchedRR,
1311                                    X86VectorVTInfo _, SDPatternOperator OpNode,
1312                                    RegisterClass SrcRC> {
1313  // Fold with a mask even if it has multiple uses since it is cheap.
1314  let ExeDomain = _.ExeDomain in
1315  defm rr : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
1316                          (ins SrcRC:$src),
1317                          "vpbroadcast"#_.Suffix, "$src", "$src",
1318                          (_.VT (OpNode SrcRC:$src)), /*IsCommutable*/0,
1319                          /*IsKCommutable*/0, /*IsKZCommutable*/0, vselect>,
1320                          T8PD, EVEX, Sched<[SchedRR]>;
1321}
1322
1323multiclass avx512_int_broadcastbw_reg<bits<8> opc, string Name, SchedWrite SchedRR,
1324                                    X86VectorVTInfo _, SDPatternOperator OpNode,
1325                                    RegisterClass SrcRC, SubRegIndex Subreg> {
1326  let hasSideEffects = 0, ExeDomain = _.ExeDomain in
1327  defm rr : AVX512_maskable_custom<opc, MRMSrcReg,
1328                         (outs _.RC:$dst), (ins GR32:$src),
1329                         !con((ins _.RC:$src0, _.KRCWM:$mask), (ins GR32:$src)),
1330                         !con((ins _.KRCWM:$mask), (ins GR32:$src)),
1331                         "vpbroadcast"#_.Suffix, "$src", "$src", [], [], [],
1332                         "$src0 = $dst">, T8PD, EVEX, Sched<[SchedRR]>;
1333
1334  def : Pat <(_.VT (OpNode SrcRC:$src)),
1335             (!cast<Instruction>(Name#rr)
1336              (i32 (INSERT_SUBREG (i32 (IMPLICIT_DEF)), SrcRC:$src, Subreg)))>;
1337
1338  // Fold with a mask even if it has multiple uses since it is cheap.
1339  def : Pat <(vselect _.KRCWM:$mask, (_.VT (OpNode SrcRC:$src)), _.RC:$src0),
1340             (!cast<Instruction>(Name#rrk) _.RC:$src0, _.KRCWM:$mask,
1341              (i32 (INSERT_SUBREG (i32 (IMPLICIT_DEF)), SrcRC:$src, Subreg)))>;
1342
1343  def : Pat <(vselect _.KRCWM:$mask, (_.VT (OpNode SrcRC:$src)), _.ImmAllZerosV),
1344             (!cast<Instruction>(Name#rrkz) _.KRCWM:$mask,
1345              (i32 (INSERT_SUBREG (i32 (IMPLICIT_DEF)), SrcRC:$src, Subreg)))>;
1346}
1347
1348multiclass avx512_int_broadcastbw_reg_vl<bits<8> opc, string Name,
1349                      AVX512VLVectorVTInfo _, SDPatternOperator OpNode,
1350                      RegisterClass SrcRC, SubRegIndex Subreg, Predicate prd> {
1351  let Predicates = [prd] in
1352    defm Z : avx512_int_broadcastbw_reg<opc, Name#Z, WriteShuffle256, _.info512,
1353              OpNode, SrcRC, Subreg>, EVEX_V512;
1354  let Predicates = [prd, HasVLX] in {
1355    defm Z256 : avx512_int_broadcastbw_reg<opc, Name#Z256, WriteShuffle256,
1356              _.info256, OpNode, SrcRC, Subreg>, EVEX_V256;
1357    defm Z128 : avx512_int_broadcastbw_reg<opc, Name#Z128, WriteShuffle,
1358              _.info128, OpNode, SrcRC, Subreg>, EVEX_V128;
1359  }
1360}
1361
1362multiclass avx512_int_broadcast_reg_vl<bits<8> opc, AVX512VLVectorVTInfo _,
1363                                       SDPatternOperator OpNode,
1364                                       RegisterClass SrcRC, Predicate prd> {
1365  let Predicates = [prd] in
1366    defm Z : avx512_int_broadcast_reg<opc, WriteShuffle256, _.info512, OpNode,
1367                                      SrcRC>, EVEX_V512;
1368  let Predicates = [prd, HasVLX] in {
1369    defm Z256 : avx512_int_broadcast_reg<opc, WriteShuffle256, _.info256, OpNode,
1370                                         SrcRC>, EVEX_V256;
1371    defm Z128 : avx512_int_broadcast_reg<opc, WriteShuffle, _.info128, OpNode,
1372                                         SrcRC>, EVEX_V128;
1373  }
1374}
1375
1376defm VPBROADCASTBr : avx512_int_broadcastbw_reg_vl<0x7A, "VPBROADCASTBr",
1377                       avx512vl_i8_info, X86VBroadcast, GR8, sub_8bit, HasBWI>;
1378defm VPBROADCASTWr : avx512_int_broadcastbw_reg_vl<0x7B, "VPBROADCASTWr",
1379                       avx512vl_i16_info, X86VBroadcast, GR16, sub_16bit,
1380                       HasBWI>;
1381defm VPBROADCASTDr : avx512_int_broadcast_reg_vl<0x7C, avx512vl_i32_info,
1382                                                 X86VBroadcast, GR32, HasAVX512>;
1383defm VPBROADCASTQr : avx512_int_broadcast_reg_vl<0x7C, avx512vl_i64_info,
1384                                                 X86VBroadcast, GR64, HasAVX512>, VEX_W;
1385
1386multiclass avx512_int_broadcast_rm_vl<bits<8> opc, string OpcodeStr,
1387                                        AVX512VLVectorVTInfo _, Predicate prd,
1388                                        bit IsConvertibleToThreeAddress> {
1389  let Predicates = [prd] in {
1390    defm Z :   avx512_broadcast_rm<opc, OpcodeStr, NAME, WriteShuffle256,
1391                                   WriteShuffle256Ld, _.info512, _.info128,
1392                                   IsConvertibleToThreeAddress>,
1393                                  EVEX_V512;
1394  }
1395  let Predicates = [prd, HasVLX] in {
1396    defm Z256 : avx512_broadcast_rm<opc, OpcodeStr, NAME, WriteShuffle256,
1397                                    WriteShuffle256Ld, _.info256, _.info128,
1398                                    IsConvertibleToThreeAddress>,
1399                                 EVEX_V256;
1400    defm Z128 : avx512_broadcast_rm<opc, OpcodeStr, NAME, WriteShuffle,
1401                                    WriteShuffleXLd, _.info128, _.info128,
1402                                    IsConvertibleToThreeAddress>,
1403                                 EVEX_V128;
1404  }
1405}
1406
1407defm VPBROADCASTB  : avx512_int_broadcast_rm_vl<0x78, "vpbroadcastb",
1408                                           avx512vl_i8_info, HasBWI, 0>;
1409defm VPBROADCASTW  : avx512_int_broadcast_rm_vl<0x79, "vpbroadcastw",
1410                                           avx512vl_i16_info, HasBWI, 0>;
1411defm VPBROADCASTD  : avx512_int_broadcast_rm_vl<0x58, "vpbroadcastd",
1412                                           avx512vl_i32_info, HasAVX512, 1>;
1413defm VPBROADCASTQ  : avx512_int_broadcast_rm_vl<0x59, "vpbroadcastq",
1414                                           avx512vl_i64_info, HasAVX512, 1>, VEX_W1X;
1415
1416multiclass avx512_subvec_broadcast_rm<bits<8> opc, string OpcodeStr,
1417                          X86VectorVTInfo _Dst, X86VectorVTInfo _Src> {
1418  defm rm : AVX512_maskable<opc, MRMSrcMem, _Dst, (outs _Dst.RC:$dst),
1419                           (ins _Src.MemOp:$src), OpcodeStr, "$src", "$src",
1420                           (_Dst.VT (X86SubVBroadcast
1421                             (_Src.VT (_Src.LdFrag addr:$src))))>,
1422                           Sched<[SchedWriteShuffle.YMM.Folded]>,
1423                           AVX5128IBase, EVEX;
1424}
1425
1426// This should be used for the AVX512DQ broadcast instructions. It disables
1427// the unmasked patterns so that we only use the DQ instructions when masking
1428//  is requested.
1429multiclass avx512_subvec_broadcast_rm_dq<bits<8> opc, string OpcodeStr,
1430                          X86VectorVTInfo _Dst, X86VectorVTInfo _Src> {
1431  let hasSideEffects = 0, mayLoad = 1 in
1432  defm rm : AVX512_maskable_split<opc, MRMSrcMem, _Dst, (outs _Dst.RC:$dst),
1433                           (ins _Src.MemOp:$src), OpcodeStr, "$src", "$src",
1434                           (null_frag),
1435                           (_Dst.VT (X86SubVBroadcast
1436                             (_Src.VT (_Src.LdFrag addr:$src))))>,
1437                           Sched<[SchedWriteShuffle.YMM.Folded]>,
1438                           AVX5128IBase, EVEX;
1439}
1440
1441//===----------------------------------------------------------------------===//
1442// AVX-512 BROADCAST SUBVECTORS
1443//
1444
1445defm VBROADCASTI32X4 : avx512_subvec_broadcast_rm<0x5a, "vbroadcasti32x4",
1446                       v16i32_info, v4i32x_info>,
1447                       EVEX_V512, EVEX_CD8<32, CD8VT4>;
1448defm VBROADCASTF32X4 : avx512_subvec_broadcast_rm<0x1a, "vbroadcastf32x4",
1449                       v16f32_info, v4f32x_info>,
1450                       EVEX_V512, EVEX_CD8<32, CD8VT4>;
1451defm VBROADCASTI64X4 : avx512_subvec_broadcast_rm<0x5b, "vbroadcasti64x4",
1452                       v8i64_info, v4i64x_info>, VEX_W,
1453                       EVEX_V512, EVEX_CD8<64, CD8VT4>;
1454defm VBROADCASTF64X4 : avx512_subvec_broadcast_rm<0x1b, "vbroadcastf64x4",
1455                       v8f64_info, v4f64x_info>, VEX_W,
1456                       EVEX_V512, EVEX_CD8<64, CD8VT4>;
1457
1458let Predicates = [HasAVX512] in {
1459def : Pat<(v16f32 (X86SubVBroadcast (loadv8f32 addr:$src))),
1460          (VBROADCASTF64X4rm addr:$src)>;
1461def : Pat<(v16i32 (X86SubVBroadcast (loadv8i32 addr:$src))),
1462          (VBROADCASTI64X4rm addr:$src)>;
1463def : Pat<(v32i16 (X86SubVBroadcast (loadv16i16 addr:$src))),
1464          (VBROADCASTI64X4rm addr:$src)>;
1465def : Pat<(v64i8 (X86SubVBroadcast (loadv32i8 addr:$src))),
1466          (VBROADCASTI64X4rm addr:$src)>;
1467
1468// Provide fallback in case the load node that is used in the patterns above
1469// is used by additional users, which prevents the pattern selection.
1470def : Pat<(v8f64 (X86SubVBroadcast (v4f64 VR256X:$src))),
1471          (VINSERTF64x4Zrr (INSERT_SUBREG (v8f64 (IMPLICIT_DEF)), VR256X:$src, sub_ymm),
1472                           (v4f64 VR256X:$src), 1)>;
1473def : Pat<(v16f32 (X86SubVBroadcast (v8f32 VR256X:$src))),
1474          (VINSERTF64x4Zrr (INSERT_SUBREG (v16f32 (IMPLICIT_DEF)), VR256X:$src, sub_ymm),
1475                           (v8f32 VR256X:$src), 1)>;
1476def : Pat<(v8i64 (X86SubVBroadcast (v4i64 VR256X:$src))),
1477          (VINSERTI64x4Zrr (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src, sub_ymm),
1478                           (v4i64 VR256X:$src), 1)>;
1479def : Pat<(v16i32 (X86SubVBroadcast (v8i32 VR256X:$src))),
1480          (VINSERTI64x4Zrr (INSERT_SUBREG (v16i32 (IMPLICIT_DEF)), VR256X:$src, sub_ymm),
1481                           (v8i32 VR256X:$src), 1)>;
1482def : Pat<(v32i16 (X86SubVBroadcast (v16i16 VR256X:$src))),
1483          (VINSERTI64x4Zrr (INSERT_SUBREG (v32i16 (IMPLICIT_DEF)), VR256X:$src, sub_ymm),
1484                           (v16i16 VR256X:$src), 1)>;
1485def : Pat<(v64i8 (X86SubVBroadcast (v32i8 VR256X:$src))),
1486          (VINSERTI64x4Zrr (INSERT_SUBREG (v64i8 (IMPLICIT_DEF)), VR256X:$src, sub_ymm),
1487                           (v32i8 VR256X:$src), 1)>;
1488
1489def : Pat<(v8f64 (X86SubVBroadcast (loadv2f64 addr:$src))),
1490          (VBROADCASTF32X4rm addr:$src)>;
1491def : Pat<(v8i64 (X86SubVBroadcast (loadv2i64 addr:$src))),
1492          (VBROADCASTI32X4rm addr:$src)>;
1493def : Pat<(v32i16 (X86SubVBroadcast (loadv8i16 addr:$src))),
1494          (VBROADCASTI32X4rm addr:$src)>;
1495def : Pat<(v64i8 (X86SubVBroadcast (loadv16i8 addr:$src))),
1496          (VBROADCASTI32X4rm addr:$src)>;
1497
1498// Patterns for selects of bitcasted operations.
1499def : Pat<(vselect_mask VK16WM:$mask,
1500                        (bc_v16f32 (v8f64 (X86SubVBroadcast (loadv2f64 addr:$src)))),
1501                        (v16f32 immAllZerosV)),
1502          (VBROADCASTF32X4rmkz VK16WM:$mask, addr:$src)>;
1503def : Pat<(vselect_mask VK16WM:$mask,
1504                        (bc_v16f32 (v8f64 (X86SubVBroadcast (loadv2f64 addr:$src)))),
1505                        VR512:$src0),
1506          (VBROADCASTF32X4rmk VR512:$src0, VK16WM:$mask, addr:$src)>;
1507def : Pat<(vselect_mask VK16WM:$mask,
1508                        (bc_v16i32 (v8i64 (X86SubVBroadcast (loadv2i64 addr:$src)))),
1509                        (v16i32 immAllZerosV)),
1510          (VBROADCASTI32X4rmkz VK16WM:$mask, addr:$src)>;
1511def : Pat<(vselect_mask VK16WM:$mask,
1512                        (bc_v16i32 (v8i64 (X86SubVBroadcast (loadv2i64 addr:$src)))),
1513                        VR512:$src0),
1514          (VBROADCASTI32X4rmk VR512:$src0, VK16WM:$mask, addr:$src)>;
1515
1516def : Pat<(vselect_mask VK8WM:$mask,
1517                        (bc_v8f64 (v16f32 (X86SubVBroadcast (loadv8f32 addr:$src)))),
1518                        (v8f64 immAllZerosV)),
1519          (VBROADCASTF64X4rmkz VK8WM:$mask, addr:$src)>;
1520def : Pat<(vselect_mask VK8WM:$mask,
1521                        (bc_v8f64 (v16f32 (X86SubVBroadcast (loadv8f32 addr:$src)))),
1522                        VR512:$src0),
1523          (VBROADCASTF64X4rmk VR512:$src0, VK8WM:$mask, addr:$src)>;
1524def : Pat<(vselect_mask VK8WM:$mask,
1525                        (bc_v8i64 (v16i32 (X86SubVBroadcast (loadv8i32 addr:$src)))),
1526                        (v8i64 immAllZerosV)),
1527          (VBROADCASTI64X4rmkz VK8WM:$mask, addr:$src)>;
1528def : Pat<(vselect_mask VK8WM:$mask,
1529                        (bc_v8i64 (v16i32 (X86SubVBroadcast (loadv8i32 addr:$src)))),
1530                        VR512:$src0),
1531          (VBROADCASTI64X4rmk VR512:$src0, VK8WM:$mask, addr:$src)>;
1532}
1533
1534let Predicates = [HasVLX] in {
1535defm VBROADCASTI32X4Z256 : avx512_subvec_broadcast_rm<0x5a, "vbroadcasti32x4",
1536                           v8i32x_info, v4i32x_info>,
1537                           EVEX_V256, EVEX_CD8<32, CD8VT4>;
1538defm VBROADCASTF32X4Z256 : avx512_subvec_broadcast_rm<0x1a, "vbroadcastf32x4",
1539                           v8f32x_info, v4f32x_info>,
1540                           EVEX_V256, EVEX_CD8<32, CD8VT4>;
1541
1542def : Pat<(v4f64 (X86SubVBroadcast (loadv2f64 addr:$src))),
1543          (VBROADCASTF32X4Z256rm addr:$src)>;
1544def : Pat<(v4i64 (X86SubVBroadcast (loadv2i64 addr:$src))),
1545          (VBROADCASTI32X4Z256rm addr:$src)>;
1546def : Pat<(v16i16 (X86SubVBroadcast (loadv8i16 addr:$src))),
1547          (VBROADCASTI32X4Z256rm addr:$src)>;
1548def : Pat<(v32i8 (X86SubVBroadcast (loadv16i8 addr:$src))),
1549          (VBROADCASTI32X4Z256rm addr:$src)>;
1550
1551// Patterns for selects of bitcasted operations.
1552def : Pat<(vselect_mask VK8WM:$mask,
1553                        (bc_v8f32 (v4f64 (X86SubVBroadcast (loadv2f64 addr:$src)))),
1554                        (v8f32 immAllZerosV)),
1555          (VBROADCASTF32X4Z256rmkz VK8WM:$mask, addr:$src)>;
1556def : Pat<(vselect_mask VK8WM:$mask,
1557                        (bc_v8f32 (v4f64 (X86SubVBroadcast (loadv2f64 addr:$src)))),
1558                        VR256X:$src0),
1559          (VBROADCASTF32X4Z256rmk VR256X:$src0, VK8WM:$mask, addr:$src)>;
1560def : Pat<(vselect_mask VK8WM:$mask,
1561                        (bc_v8i32 (v4i64 (X86SubVBroadcast (loadv2i64 addr:$src)))),
1562                        (v8i32 immAllZerosV)),
1563          (VBROADCASTI32X4Z256rmkz VK8WM:$mask, addr:$src)>;
1564def : Pat<(vselect_mask VK8WM:$mask,
1565                        (bc_v8i32 (v4i64 (X86SubVBroadcast (loadv2i64 addr:$src)))),
1566                        VR256X:$src0),
1567          (VBROADCASTI32X4Z256rmk VR256X:$src0, VK8WM:$mask, addr:$src)>;
1568
1569
1570// Provide fallback in case the load node that is used in the patterns above
1571// is used by additional users, which prevents the pattern selection.
1572def : Pat<(v4f64 (X86SubVBroadcast (v2f64 VR128X:$src))),
1573          (VINSERTF32x4Z256rr (INSERT_SUBREG (v4f64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
1574                              (v2f64 VR128X:$src), 1)>;
1575def : Pat<(v8f32 (X86SubVBroadcast (v4f32 VR128X:$src))),
1576          (VINSERTF32x4Z256rr (INSERT_SUBREG (v8f32 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
1577                              (v4f32 VR128X:$src), 1)>;
1578def : Pat<(v4i64 (X86SubVBroadcast (v2i64 VR128X:$src))),
1579          (VINSERTI32x4Z256rr (INSERT_SUBREG (v4i64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
1580                              (v2i64 VR128X:$src), 1)>;
1581def : Pat<(v8i32 (X86SubVBroadcast (v4i32 VR128X:$src))),
1582          (VINSERTI32x4Z256rr (INSERT_SUBREG (v8i32 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
1583                              (v4i32 VR128X:$src), 1)>;
1584def : Pat<(v16i16 (X86SubVBroadcast (v8i16 VR128X:$src))),
1585          (VINSERTI32x4Z256rr (INSERT_SUBREG (v16i16 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
1586                              (v8i16 VR128X:$src), 1)>;
1587def : Pat<(v32i8 (X86SubVBroadcast (v16i8 VR128X:$src))),
1588          (VINSERTI32x4Z256rr (INSERT_SUBREG (v32i8 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
1589                              (v16i8 VR128X:$src), 1)>;
1590}
1591
1592let Predicates = [HasVLX, HasDQI] in {
1593defm VBROADCASTI64X2Z128 : avx512_subvec_broadcast_rm_dq<0x5a, "vbroadcasti64x2",
1594                           v4i64x_info, v2i64x_info>, VEX_W1X,
1595                           EVEX_V256, EVEX_CD8<64, CD8VT2>;
1596defm VBROADCASTF64X2Z128 : avx512_subvec_broadcast_rm_dq<0x1a, "vbroadcastf64x2",
1597                           v4f64x_info, v2f64x_info>, VEX_W1X,
1598                           EVEX_V256, EVEX_CD8<64, CD8VT2>;
1599
1600// Patterns for selects of bitcasted operations.
1601def : Pat<(vselect_mask VK4WM:$mask,
1602                        (bc_v4f64 (v8f32 (X86SubVBroadcast (loadv4f32 addr:$src)))),
1603                        (v4f64 immAllZerosV)),
1604          (VBROADCASTF64X2Z128rmkz VK4WM:$mask, addr:$src)>;
1605def : Pat<(vselect_mask VK4WM:$mask,
1606                        (bc_v4f64 (v8f32 (X86SubVBroadcast (loadv4f32 addr:$src)))),
1607                        VR256X:$src0),
1608          (VBROADCASTF64X2Z128rmk VR256X:$src0, VK4WM:$mask, addr:$src)>;
1609def : Pat<(vselect_mask VK4WM:$mask,
1610                        (bc_v4i64 (v8i32 (X86SubVBroadcast (loadv4i32 addr:$src)))),
1611                        (v4i64 immAllZerosV)),
1612          (VBROADCASTI64X2Z128rmkz VK4WM:$mask, addr:$src)>;
1613def : Pat<(vselect_mask VK4WM:$mask,
1614                        (bc_v4i64 (v8i32 (X86SubVBroadcast (loadv4i32 addr:$src)))),
1615                        VR256X:$src0),
1616          (VBROADCASTI64X2Z128rmk VR256X:$src0, VK4WM:$mask, addr:$src)>;
1617}
1618
1619let Predicates = [HasDQI] in {
1620defm VBROADCASTI64X2 : avx512_subvec_broadcast_rm_dq<0x5a, "vbroadcasti64x2",
1621                       v8i64_info, v2i64x_info>, VEX_W,
1622                       EVEX_V512, EVEX_CD8<64, CD8VT2>;
1623defm VBROADCASTI32X8 : avx512_subvec_broadcast_rm_dq<0x5b, "vbroadcasti32x8",
1624                       v16i32_info, v8i32x_info>,
1625                       EVEX_V512, EVEX_CD8<32, CD8VT8>;
1626defm VBROADCASTF64X2 : avx512_subvec_broadcast_rm_dq<0x1a, "vbroadcastf64x2",
1627                       v8f64_info, v2f64x_info>, VEX_W,
1628                       EVEX_V512, EVEX_CD8<64, CD8VT2>;
1629defm VBROADCASTF32X8 : avx512_subvec_broadcast_rm_dq<0x1b, "vbroadcastf32x8",
1630                       v16f32_info, v8f32x_info>,
1631                       EVEX_V512, EVEX_CD8<32, CD8VT8>;
1632
1633// Patterns for selects of bitcasted operations.
1634def : Pat<(vselect_mask VK16WM:$mask,
1635                        (bc_v16f32 (v8f64 (X86SubVBroadcast (loadv4f64 addr:$src)))),
1636                        (v16f32 immAllZerosV)),
1637          (VBROADCASTF32X8rmkz VK16WM:$mask, addr:$src)>;
1638def : Pat<(vselect_mask VK16WM:$mask,
1639                        (bc_v16f32 (v8f64 (X86SubVBroadcast (loadv4f64 addr:$src)))),
1640                        VR512:$src0),
1641          (VBROADCASTF32X8rmk VR512:$src0, VK16WM:$mask, addr:$src)>;
1642def : Pat<(vselect_mask VK16WM:$mask,
1643                        (bc_v16i32 (v8i64 (X86SubVBroadcast (loadv4i64 addr:$src)))),
1644                        (v16i32 immAllZerosV)),
1645          (VBROADCASTI32X8rmkz VK16WM:$mask, addr:$src)>;
1646def : Pat<(vselect_mask VK16WM:$mask,
1647                        (bc_v16i32 (v8i64 (X86SubVBroadcast (loadv4i64 addr:$src)))),
1648                        VR512:$src0),
1649          (VBROADCASTI32X8rmk VR512:$src0, VK16WM:$mask, addr:$src)>;
1650
1651def : Pat<(vselect_mask VK8WM:$mask,
1652                        (bc_v8f64 (v16f32 (X86SubVBroadcast (loadv4f32 addr:$src)))),
1653                        (v8f64 immAllZerosV)),
1654          (VBROADCASTF64X2rmkz VK8WM:$mask, addr:$src)>;
1655def : Pat<(vselect_mask VK8WM:$mask,
1656                        (bc_v8f64 (v16f32 (X86SubVBroadcast (loadv4f32 addr:$src)))),
1657                        VR512:$src0),
1658          (VBROADCASTF64X2rmk VR512:$src0, VK8WM:$mask, addr:$src)>;
1659def : Pat<(vselect_mask VK8WM:$mask,
1660                        (bc_v8i64 (v16i32 (X86SubVBroadcast (loadv4i32 addr:$src)))),
1661                        (v8i64 immAllZerosV)),
1662          (VBROADCASTI64X2rmkz VK8WM:$mask, addr:$src)>;
1663def : Pat<(vselect_mask VK8WM:$mask,
1664                        (bc_v8i64 (v16i32 (X86SubVBroadcast (loadv4i32 addr:$src)))),
1665                        VR512:$src0),
1666          (VBROADCASTI64X2rmk VR512:$src0, VK8WM:$mask, addr:$src)>;
1667}
1668
1669multiclass avx512_common_broadcast_32x2<bits<8> opc, string OpcodeStr,
1670                         AVX512VLVectorVTInfo _Dst, AVX512VLVectorVTInfo _Src> {
1671  let Predicates = [HasDQI] in
1672    defm Z :    avx512_broadcast_rm_split<opc, OpcodeStr, NAME, WriteShuffle256,
1673                                          WriteShuffle256Ld, _Dst.info512,
1674                                          _Src.info512, _Src.info128, 0, null_frag, null_frag>,
1675                                          EVEX_V512;
1676  let Predicates = [HasDQI, HasVLX] in
1677    defm Z256 : avx512_broadcast_rm_split<opc, OpcodeStr, NAME, WriteShuffle256,
1678                                          WriteShuffle256Ld, _Dst.info256,
1679                                          _Src.info256, _Src.info128, 0, null_frag, null_frag>,
1680                                          EVEX_V256;
1681}
1682
1683multiclass avx512_common_broadcast_i32x2<bits<8> opc, string OpcodeStr,
1684                         AVX512VLVectorVTInfo _Dst, AVX512VLVectorVTInfo _Src> :
1685  avx512_common_broadcast_32x2<opc, OpcodeStr, _Dst, _Src> {
1686
1687  let Predicates = [HasDQI, HasVLX] in
1688    defm Z128 : avx512_broadcast_rm_split<opc, OpcodeStr, NAME, WriteShuffle,
1689                                          WriteShuffleXLd, _Dst.info128,
1690                                          _Src.info128, _Src.info128, 0, null_frag, null_frag>,
1691                                          EVEX_V128;
1692}
1693
1694defm VBROADCASTI32X2  : avx512_common_broadcast_i32x2<0x59, "vbroadcasti32x2",
1695                                          avx512vl_i32_info, avx512vl_i64_info>;
1696defm VBROADCASTF32X2  : avx512_common_broadcast_32x2<0x19, "vbroadcastf32x2",
1697                                          avx512vl_f32_info, avx512vl_f64_info>;
1698
1699//===----------------------------------------------------------------------===//
1700// AVX-512 BROADCAST MASK TO VECTOR REGISTER
1701//---
1702multiclass avx512_mask_broadcastm<bits<8> opc, string OpcodeStr,
1703                                  X86VectorVTInfo _, RegisterClass KRC> {
1704  def rr : AVX512XS8I<opc, MRMSrcReg, (outs _.RC:$dst), (ins KRC:$src),
1705                  !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
1706                  [(set _.RC:$dst, (_.VT (X86VBroadcastm KRC:$src)))]>,
1707                  EVEX, Sched<[WriteShuffle]>;
1708}
1709
1710multiclass avx512_mask_broadcast<bits<8> opc, string OpcodeStr,
1711                                 AVX512VLVectorVTInfo VTInfo, RegisterClass KRC> {
1712  let Predicates = [HasCDI] in
1713    defm Z : avx512_mask_broadcastm<opc, OpcodeStr, VTInfo.info512, KRC>, EVEX_V512;
1714  let Predicates = [HasCDI, HasVLX] in {
1715    defm Z256 : avx512_mask_broadcastm<opc, OpcodeStr, VTInfo.info256, KRC>, EVEX_V256;
1716    defm Z128 : avx512_mask_broadcastm<opc, OpcodeStr, VTInfo.info128, KRC>, EVEX_V128;
1717  }
1718}
1719
1720defm VPBROADCASTMW2D : avx512_mask_broadcast<0x3A, "vpbroadcastmw2d",
1721                                               avx512vl_i32_info, VK16>;
1722defm VPBROADCASTMB2Q : avx512_mask_broadcast<0x2A, "vpbroadcastmb2q",
1723                                               avx512vl_i64_info, VK8>, VEX_W;
1724
1725//===----------------------------------------------------------------------===//
1726// -- VPERMI2 - 3 source operands form --
1727multiclass avx512_perm_i<bits<8> opc, string OpcodeStr,
1728                         X86FoldableSchedWrite sched,
1729                         X86VectorVTInfo _, X86VectorVTInfo IdxVT> {
1730let Constraints = "$src1 = $dst", ExeDomain = _.ExeDomain,
1731    hasSideEffects = 0 in {
1732  defm rr: AVX512_maskable_3src_cast<opc, MRMSrcReg, _, IdxVT, (outs _.RC:$dst),
1733          (ins _.RC:$src2, _.RC:$src3),
1734          OpcodeStr, "$src3, $src2", "$src2, $src3",
1735          (_.VT (X86VPermt2 _.RC:$src2, IdxVT.RC:$src1, _.RC:$src3)), 1>,
1736          EVEX_4V, AVX5128IBase, Sched<[sched]>;
1737
1738  let mayLoad = 1 in
1739  defm rm: AVX512_maskable_3src_cast<opc, MRMSrcMem, _, IdxVT, (outs _.RC:$dst),
1740            (ins _.RC:$src2, _.MemOp:$src3),
1741            OpcodeStr, "$src3, $src2", "$src2, $src3",
1742            (_.VT (X86VPermt2 _.RC:$src2, IdxVT.RC:$src1,
1743                   (_.VT (_.LdFrag addr:$src3)))), 1>,
1744            EVEX_4V, AVX5128IBase, Sched<[sched.Folded, sched.ReadAfterFold]>;
1745  }
1746}
1747
1748multiclass avx512_perm_i_mb<bits<8> opc, string OpcodeStr,
1749                            X86FoldableSchedWrite sched,
1750                            X86VectorVTInfo _, X86VectorVTInfo IdxVT> {
1751  let Constraints = "$src1 = $dst", ExeDomain = _.ExeDomain,
1752      hasSideEffects = 0, mayLoad = 1 in
1753  defm rmb: AVX512_maskable_3src_cast<opc, MRMSrcMem, _, IdxVT, (outs _.RC:$dst),
1754              (ins _.RC:$src2, _.ScalarMemOp:$src3),
1755              OpcodeStr,   !strconcat("${src3}", _.BroadcastStr,", $src2"),
1756              !strconcat("$src2, ${src3}", _.BroadcastStr ),
1757              (_.VT (X86VPermt2 _.RC:$src2,
1758               IdxVT.RC:$src1,(_.VT (_.BroadcastLdFrag addr:$src3)))), 1>,
1759              AVX5128IBase, EVEX_4V, EVEX_B,
1760              Sched<[sched.Folded, sched.ReadAfterFold]>;
1761}
1762
1763multiclass avx512_perm_i_sizes<bits<8> opc, string OpcodeStr,
1764                               X86FoldableSchedWrite sched,
1765                               AVX512VLVectorVTInfo VTInfo,
1766                               AVX512VLVectorVTInfo ShuffleMask> {
1767  defm NAME: avx512_perm_i<opc, OpcodeStr, sched, VTInfo.info512,
1768                           ShuffleMask.info512>,
1769            avx512_perm_i_mb<opc, OpcodeStr, sched, VTInfo.info512,
1770                             ShuffleMask.info512>, EVEX_V512;
1771  let Predicates = [HasVLX] in {
1772  defm NAME#128: avx512_perm_i<opc, OpcodeStr, sched, VTInfo.info128,
1773                               ShuffleMask.info128>,
1774                 avx512_perm_i_mb<opc, OpcodeStr, sched, VTInfo.info128,
1775                                  ShuffleMask.info128>, EVEX_V128;
1776  defm NAME#256: avx512_perm_i<opc, OpcodeStr, sched, VTInfo.info256,
1777                               ShuffleMask.info256>,
1778                 avx512_perm_i_mb<opc, OpcodeStr, sched, VTInfo.info256,
1779                                  ShuffleMask.info256>, EVEX_V256;
1780  }
1781}
1782
1783multiclass avx512_perm_i_sizes_bw<bits<8> opc, string OpcodeStr,
1784                                  X86FoldableSchedWrite sched,
1785                                  AVX512VLVectorVTInfo VTInfo,
1786                                  AVX512VLVectorVTInfo Idx,
1787                                  Predicate Prd> {
1788  let Predicates = [Prd] in
1789  defm NAME: avx512_perm_i<opc, OpcodeStr, sched, VTInfo.info512,
1790                           Idx.info512>, EVEX_V512;
1791  let Predicates = [Prd, HasVLX] in {
1792  defm NAME#128: avx512_perm_i<opc, OpcodeStr, sched, VTInfo.info128,
1793                               Idx.info128>, EVEX_V128;
1794  defm NAME#256: avx512_perm_i<opc, OpcodeStr, sched, VTInfo.info256,
1795                               Idx.info256>,  EVEX_V256;
1796  }
1797}
1798
1799defm VPERMI2D  : avx512_perm_i_sizes<0x76, "vpermi2d", WriteVarShuffle256,
1800                  avx512vl_i32_info, avx512vl_i32_info>, EVEX_CD8<32, CD8VF>;
1801defm VPERMI2Q  : avx512_perm_i_sizes<0x76, "vpermi2q", WriteVarShuffle256,
1802                  avx512vl_i64_info, avx512vl_i64_info>, VEX_W, EVEX_CD8<64, CD8VF>;
1803defm VPERMI2W  : avx512_perm_i_sizes_bw<0x75, "vpermi2w", WriteVarShuffle256,
1804                  avx512vl_i16_info, avx512vl_i16_info, HasBWI>,
1805                  VEX_W, EVEX_CD8<16, CD8VF>;
1806defm VPERMI2B  : avx512_perm_i_sizes_bw<0x75, "vpermi2b", WriteVarShuffle256,
1807                  avx512vl_i8_info, avx512vl_i8_info, HasVBMI>,
1808                  EVEX_CD8<8, CD8VF>;
1809defm VPERMI2PS : avx512_perm_i_sizes<0x77, "vpermi2ps", WriteFVarShuffle256,
1810                  avx512vl_f32_info, avx512vl_i32_info>, EVEX_CD8<32, CD8VF>;
1811defm VPERMI2PD : avx512_perm_i_sizes<0x77, "vpermi2pd", WriteFVarShuffle256,
1812                  avx512vl_f64_info, avx512vl_i64_info>, VEX_W, EVEX_CD8<64, CD8VF>;
1813
1814// Extra patterns to deal with extra bitcasts due to passthru and index being
1815// different types on the fp versions.
1816multiclass avx512_perm_i_lowering<string InstrStr, X86VectorVTInfo _,
1817                                  X86VectorVTInfo IdxVT,
1818                                  X86VectorVTInfo CastVT> {
1819  def : Pat<(_.VT (vselect_mask _.KRCWM:$mask,
1820                                (X86VPermt2 (_.VT _.RC:$src2),
1821                                            (IdxVT.VT (bitconvert
1822                                                       (CastVT.VT _.RC:$src1))),
1823                                            _.RC:$src3),
1824                                (_.VT (bitconvert (CastVT.VT _.RC:$src1))))),
1825            (!cast<Instruction>(InstrStr#"rrk") _.RC:$src1, _.KRCWM:$mask,
1826                                                _.RC:$src2, _.RC:$src3)>;
1827  def : Pat<(_.VT (vselect_mask _.KRCWM:$mask,
1828                                (X86VPermt2 _.RC:$src2,
1829                                            (IdxVT.VT (bitconvert
1830                                                       (CastVT.VT _.RC:$src1))),
1831                                            (_.LdFrag addr:$src3)),
1832                                (_.VT (bitconvert  (CastVT.VT _.RC:$src1))))),
1833            (!cast<Instruction>(InstrStr#"rmk") _.RC:$src1, _.KRCWM:$mask,
1834                                                _.RC:$src2, addr:$src3)>;
1835  def : Pat<(_.VT (vselect_mask _.KRCWM:$mask,
1836                                 (X86VPermt2 _.RC:$src2,
1837                                             (IdxVT.VT (bitconvert  (CastVT.VT _.RC:$src1))),
1838                                             (_.BroadcastLdFrag addr:$src3)),
1839                                 (_.VT (bitconvert  (CastVT.VT _.RC:$src1))))),
1840            (!cast<Instruction>(InstrStr#"rmbk") _.RC:$src1, _.KRCWM:$mask,
1841                                                 _.RC:$src2, addr:$src3)>;
1842}
1843
1844// TODO: Should we add more casts? The vXi64 case is common due to ABI.
1845defm : avx512_perm_i_lowering<"VPERMI2PS", v16f32_info, v16i32_info, v8i64_info>;
1846defm : avx512_perm_i_lowering<"VPERMI2PS256", v8f32x_info, v8i32x_info, v4i64x_info>;
1847defm : avx512_perm_i_lowering<"VPERMI2PS128", v4f32x_info, v4i32x_info, v2i64x_info>;
1848
1849// VPERMT2
1850multiclass avx512_perm_t<bits<8> opc, string OpcodeStr,
1851                         X86FoldableSchedWrite sched,
1852                         X86VectorVTInfo _, X86VectorVTInfo IdxVT> {
1853let Constraints = "$src1 = $dst", ExeDomain = _.ExeDomain in {
1854  defm rr: AVX512_maskable_3src<opc, MRMSrcReg, _, (outs _.RC:$dst),
1855          (ins IdxVT.RC:$src2, _.RC:$src3),
1856          OpcodeStr, "$src3, $src2", "$src2, $src3",
1857          (_.VT (X86VPermt2 _.RC:$src1, IdxVT.RC:$src2, _.RC:$src3)), 1>,
1858          EVEX_4V, AVX5128IBase, Sched<[sched]>;
1859
1860  defm rm: AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst),
1861            (ins IdxVT.RC:$src2, _.MemOp:$src3),
1862            OpcodeStr, "$src3, $src2", "$src2, $src3",
1863            (_.VT (X86VPermt2 _.RC:$src1, IdxVT.RC:$src2,
1864                   (_.LdFrag addr:$src3))), 1>,
1865            EVEX_4V, AVX5128IBase, Sched<[sched.Folded, sched.ReadAfterFold]>;
1866  }
1867}
1868multiclass avx512_perm_t_mb<bits<8> opc, string OpcodeStr,
1869                            X86FoldableSchedWrite sched,
1870                            X86VectorVTInfo _, X86VectorVTInfo IdxVT> {
1871  let Constraints = "$src1 = $dst", ExeDomain = _.ExeDomain in
1872  defm rmb: AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst),
1873              (ins IdxVT.RC:$src2, _.ScalarMemOp:$src3),
1874              OpcodeStr,   !strconcat("${src3}", _.BroadcastStr,", $src2"),
1875              !strconcat("$src2, ${src3}", _.BroadcastStr ),
1876              (_.VT (X86VPermt2 _.RC:$src1,
1877               IdxVT.RC:$src2,(_.VT (_.BroadcastLdFrag addr:$src3)))), 1>,
1878              AVX5128IBase, EVEX_4V, EVEX_B,
1879              Sched<[sched.Folded, sched.ReadAfterFold]>;
1880}
1881
1882multiclass avx512_perm_t_sizes<bits<8> opc, string OpcodeStr,
1883                               X86FoldableSchedWrite sched,
1884                               AVX512VLVectorVTInfo VTInfo,
1885                               AVX512VLVectorVTInfo ShuffleMask> {
1886  defm NAME: avx512_perm_t<opc, OpcodeStr, sched, VTInfo.info512,
1887                              ShuffleMask.info512>,
1888            avx512_perm_t_mb<opc, OpcodeStr, sched, VTInfo.info512,
1889                              ShuffleMask.info512>, EVEX_V512;
1890  let Predicates = [HasVLX] in {
1891  defm NAME#128: avx512_perm_t<opc, OpcodeStr, sched, VTInfo.info128,
1892                              ShuffleMask.info128>,
1893                 avx512_perm_t_mb<opc, OpcodeStr, sched, VTInfo.info128,
1894                              ShuffleMask.info128>, EVEX_V128;
1895  defm NAME#256: avx512_perm_t<opc, OpcodeStr, sched, VTInfo.info256,
1896                              ShuffleMask.info256>,
1897                 avx512_perm_t_mb<opc, OpcodeStr, sched, VTInfo.info256,
1898                              ShuffleMask.info256>, EVEX_V256;
1899  }
1900}
1901
1902multiclass avx512_perm_t_sizes_bw<bits<8> opc, string OpcodeStr,
1903                                  X86FoldableSchedWrite sched,
1904                                  AVX512VLVectorVTInfo VTInfo,
1905                                  AVX512VLVectorVTInfo Idx, Predicate Prd> {
1906  let Predicates = [Prd] in
1907  defm NAME: avx512_perm_t<opc, OpcodeStr, sched, VTInfo.info512,
1908                           Idx.info512>, EVEX_V512;
1909  let Predicates = [Prd, HasVLX] in {
1910  defm NAME#128: avx512_perm_t<opc, OpcodeStr, sched, VTInfo.info128,
1911                               Idx.info128>, EVEX_V128;
1912  defm NAME#256: avx512_perm_t<opc, OpcodeStr, sched, VTInfo.info256,
1913                               Idx.info256>, EVEX_V256;
1914  }
1915}
1916
1917defm VPERMT2D  : avx512_perm_t_sizes<0x7E, "vpermt2d", WriteVarShuffle256,
1918                  avx512vl_i32_info, avx512vl_i32_info>, EVEX_CD8<32, CD8VF>;
1919defm VPERMT2Q  : avx512_perm_t_sizes<0x7E, "vpermt2q", WriteVarShuffle256,
1920                  avx512vl_i64_info, avx512vl_i64_info>, VEX_W, EVEX_CD8<64, CD8VF>;
1921defm VPERMT2W  : avx512_perm_t_sizes_bw<0x7D, "vpermt2w", WriteVarShuffle256,
1922                  avx512vl_i16_info, avx512vl_i16_info, HasBWI>,
1923                  VEX_W, EVEX_CD8<16, CD8VF>;
1924defm VPERMT2B  : avx512_perm_t_sizes_bw<0x7D, "vpermt2b", WriteVarShuffle256,
1925                  avx512vl_i8_info, avx512vl_i8_info, HasVBMI>,
1926                  EVEX_CD8<8, CD8VF>;
1927defm VPERMT2PS : avx512_perm_t_sizes<0x7F, "vpermt2ps", WriteFVarShuffle256,
1928                  avx512vl_f32_info, avx512vl_i32_info>, EVEX_CD8<32, CD8VF>;
1929defm VPERMT2PD : avx512_perm_t_sizes<0x7F, "vpermt2pd", WriteFVarShuffle256,
1930                  avx512vl_f64_info, avx512vl_i64_info>, VEX_W, EVEX_CD8<64, CD8VF>;
1931
1932//===----------------------------------------------------------------------===//
1933// AVX-512 - BLEND using mask
1934//
1935
1936multiclass WriteFVarBlendask<bits<8> opc, string OpcodeStr,
1937                             X86FoldableSchedWrite sched, X86VectorVTInfo _> {
1938  let ExeDomain = _.ExeDomain, hasSideEffects = 0 in {
1939  def rr : AVX5128I<opc, MRMSrcReg, (outs _.RC:$dst),
1940             (ins _.RC:$src1, _.RC:$src2),
1941             !strconcat(OpcodeStr,
1942             "\t{$src2, $src1, ${dst}|${dst}, $src1, $src2}"), []>,
1943             EVEX_4V, Sched<[sched]>;
1944  def rrk : AVX5128I<opc, MRMSrcReg, (outs _.RC:$dst),
1945             (ins _.KRCWM:$mask, _.RC:$src1, _.RC:$src2),
1946             !strconcat(OpcodeStr,
1947             "\t{$src2, $src1, ${dst} {${mask}}|${dst} {${mask}}, $src1, $src2}"),
1948             []>, EVEX_4V, EVEX_K, Sched<[sched]>;
1949  def rrkz : AVX5128I<opc, MRMSrcReg, (outs _.RC:$dst),
1950             (ins _.KRCWM:$mask, _.RC:$src1, _.RC:$src2),
1951             !strconcat(OpcodeStr,
1952             "\t{$src2, $src1, ${dst} {${mask}} {z}|${dst} {${mask}} {z}, $src1, $src2}"),
1953             []>, EVEX_4V, EVEX_KZ, Sched<[sched]>, NotMemoryFoldable;
1954  let mayLoad = 1 in {
1955  def rm  : AVX5128I<opc, MRMSrcMem, (outs _.RC:$dst),
1956             (ins _.RC:$src1, _.MemOp:$src2),
1957             !strconcat(OpcodeStr,
1958             "\t{$src2, $src1, ${dst}|${dst}, $src1, $src2}"),
1959             []>, EVEX_4V, EVEX_CD8<_.EltSize, CD8VF>,
1960             Sched<[sched.Folded, sched.ReadAfterFold]>;
1961  def rmk : AVX5128I<opc, MRMSrcMem, (outs _.RC:$dst),
1962             (ins _.KRCWM:$mask, _.RC:$src1, _.MemOp:$src2),
1963             !strconcat(OpcodeStr,
1964             "\t{$src2, $src1, ${dst} {${mask}}|${dst} {${mask}}, $src1, $src2}"),
1965             []>, EVEX_4V, EVEX_K, EVEX_CD8<_.EltSize, CD8VF>,
1966             Sched<[sched.Folded, sched.ReadAfterFold]>;
1967  def rmkz : AVX5128I<opc, MRMSrcMem, (outs _.RC:$dst),
1968             (ins _.KRCWM:$mask, _.RC:$src1, _.MemOp:$src2),
1969             !strconcat(OpcodeStr,
1970             "\t{$src2, $src1, ${dst} {${mask}} {z}|${dst} {${mask}} {z}, $src1, $src2}"),
1971             []>, EVEX_4V, EVEX_KZ, EVEX_CD8<_.EltSize, CD8VF>,
1972             Sched<[sched.Folded, sched.ReadAfterFold]>, NotMemoryFoldable;
1973  }
1974  }
1975}
1976multiclass WriteFVarBlendask_rmb<bits<8> opc, string OpcodeStr,
1977                                 X86FoldableSchedWrite sched, X86VectorVTInfo _> {
1978  let ExeDomain = _.ExeDomain, mayLoad = 1, hasSideEffects = 0 in {
1979  def rmbk : AVX5128I<opc, MRMSrcMem, (outs _.RC:$dst),
1980      (ins _.KRCWM:$mask, _.RC:$src1, _.ScalarMemOp:$src2),
1981       !strconcat(OpcodeStr,
1982            "\t{${src2}", _.BroadcastStr, ", $src1, $dst {${mask}}|",
1983            "$dst {${mask}}, $src1, ${src2}", _.BroadcastStr, "}"), []>,
1984      EVEX_4V, EVEX_K, EVEX_B, EVEX_CD8<_.EltSize, CD8VF>,
1985      Sched<[sched.Folded, sched.ReadAfterFold]>;
1986
1987  def rmbkz : AVX5128I<opc, MRMSrcMem, (outs _.RC:$dst),
1988      (ins _.KRCWM:$mask, _.RC:$src1, _.ScalarMemOp:$src2),
1989       !strconcat(OpcodeStr,
1990            "\t{${src2}", _.BroadcastStr, ", $src1, $dst {${mask}} {z}|",
1991            "$dst {${mask}} {z}, $src1, ${src2}", _.BroadcastStr, "}"), []>,
1992      EVEX_4V, EVEX_KZ, EVEX_B, EVEX_CD8<_.EltSize, CD8VF>,
1993      Sched<[sched.Folded, sched.ReadAfterFold]>, NotMemoryFoldable;
1994
1995  def rmb : AVX5128I<opc, MRMSrcMem, (outs _.RC:$dst),
1996      (ins _.RC:$src1, _.ScalarMemOp:$src2),
1997       !strconcat(OpcodeStr,
1998            "\t{${src2}", _.BroadcastStr, ", $src1, $dst|",
1999            "$dst, $src1, ${src2}", _.BroadcastStr, "}"), []>,
2000      EVEX_4V, EVEX_B, EVEX_CD8<_.EltSize, CD8VF>,
2001      Sched<[sched.Folded, sched.ReadAfterFold]>;
2002  }
2003}
2004
2005multiclass blendmask_dq<bits<8> opc, string OpcodeStr, X86SchedWriteWidths sched,
2006                        AVX512VLVectorVTInfo VTInfo> {
2007  defm Z : WriteFVarBlendask<opc, OpcodeStr, sched.ZMM, VTInfo.info512>,
2008           WriteFVarBlendask_rmb<opc, OpcodeStr, sched.ZMM, VTInfo.info512>,
2009                                 EVEX_V512;
2010
2011  let Predicates = [HasVLX] in {
2012    defm Z256 : WriteFVarBlendask<opc, OpcodeStr, sched.YMM, VTInfo.info256>,
2013                WriteFVarBlendask_rmb<opc, OpcodeStr, sched.YMM, VTInfo.info256>,
2014                                      EVEX_V256;
2015    defm Z128 : WriteFVarBlendask<opc, OpcodeStr, sched.XMM, VTInfo.info128>,
2016                WriteFVarBlendask_rmb<opc, OpcodeStr, sched.XMM, VTInfo.info128>,
2017                                      EVEX_V128;
2018  }
2019}
2020
2021multiclass blendmask_bw<bits<8> opc, string OpcodeStr, X86SchedWriteWidths sched,
2022                        AVX512VLVectorVTInfo VTInfo> {
2023  let Predicates = [HasBWI] in
2024    defm Z : WriteFVarBlendask<opc, OpcodeStr, sched.ZMM, VTInfo.info512>,
2025                               EVEX_V512;
2026
2027  let Predicates = [HasBWI, HasVLX] in {
2028    defm Z256 : WriteFVarBlendask<opc, OpcodeStr, sched.YMM, VTInfo.info256>,
2029                                  EVEX_V256;
2030    defm Z128 : WriteFVarBlendask<opc, OpcodeStr, sched.XMM, VTInfo.info128>,
2031                                  EVEX_V128;
2032  }
2033}
2034
2035defm VBLENDMPS : blendmask_dq<0x65, "vblendmps", SchedWriteFVarBlend,
2036                              avx512vl_f32_info>;
2037defm VBLENDMPD : blendmask_dq<0x65, "vblendmpd", SchedWriteFVarBlend,
2038                              avx512vl_f64_info>, VEX_W;
2039defm VPBLENDMD : blendmask_dq<0x64, "vpblendmd", SchedWriteVarBlend,
2040                              avx512vl_i32_info>;
2041defm VPBLENDMQ : blendmask_dq<0x64, "vpblendmq", SchedWriteVarBlend,
2042                              avx512vl_i64_info>, VEX_W;
2043defm VPBLENDMB : blendmask_bw<0x66, "vpblendmb", SchedWriteVarBlend,
2044                              avx512vl_i8_info>;
2045defm VPBLENDMW : blendmask_bw<0x66, "vpblendmw", SchedWriteVarBlend,
2046                              avx512vl_i16_info>, VEX_W;
2047
2048//===----------------------------------------------------------------------===//
2049// Compare Instructions
2050//===----------------------------------------------------------------------===//
2051
2052// avx512_cmp_scalar - AVX512 CMPSS and CMPSD
2053
2054multiclass avx512_cmp_scalar<X86VectorVTInfo _, SDNode OpNode, SDNode OpNodeSAE,
2055                             PatFrag OpNode_su, PatFrag OpNodeSAE_su,
2056                             X86FoldableSchedWrite sched> {
2057  defm  rr_Int  : AVX512_maskable_cmp<0xC2, MRMSrcReg, _,
2058                      (outs _.KRC:$dst),
2059                      (ins _.RC:$src1, _.RC:$src2, u8imm:$cc),
2060                      "vcmp"#_.Suffix,
2061                      "$cc, $src2, $src1", "$src1, $src2, $cc",
2062                      (OpNode (_.VT _.RC:$src1), (_.VT _.RC:$src2), timm:$cc),
2063                      (OpNode_su (_.VT _.RC:$src1), (_.VT _.RC:$src2),
2064                                 timm:$cc)>, EVEX_4V, VEX_LIG, Sched<[sched]>, SIMD_EXC;
2065  let mayLoad = 1 in
2066  defm  rm_Int  : AVX512_maskable_cmp<0xC2, MRMSrcMem, _,
2067                    (outs _.KRC:$dst),
2068                    (ins _.RC:$src1, _.IntScalarMemOp:$src2, u8imm:$cc),
2069                    "vcmp"#_.Suffix,
2070                    "$cc, $src2, $src1", "$src1, $src2, $cc",
2071                    (OpNode (_.VT _.RC:$src1), (_.ScalarIntMemFrags addr:$src2),
2072                        timm:$cc),
2073                    (OpNode_su (_.VT _.RC:$src1), (_.ScalarIntMemFrags addr:$src2),
2074                        timm:$cc)>, EVEX_4V, VEX_LIG, EVEX_CD8<_.EltSize, CD8VT1>,
2075                    Sched<[sched.Folded, sched.ReadAfterFold]>, SIMD_EXC;
2076
2077  let Uses = [MXCSR] in
2078  defm  rrb_Int  : AVX512_maskable_cmp<0xC2, MRMSrcReg, _,
2079                     (outs _.KRC:$dst),
2080                     (ins _.RC:$src1, _.RC:$src2, u8imm:$cc),
2081                     "vcmp"#_.Suffix,
2082                     "$cc, {sae}, $src2, $src1","$src1, $src2, {sae}, $cc",
2083                     (OpNodeSAE (_.VT _.RC:$src1), (_.VT _.RC:$src2),
2084                                timm:$cc),
2085                     (OpNodeSAE_su (_.VT _.RC:$src1), (_.VT _.RC:$src2),
2086                                   timm:$cc)>,
2087                     EVEX_4V, VEX_LIG, EVEX_B, Sched<[sched]>;
2088
2089  let isCodeGenOnly = 1 in {
2090    let isCommutable = 1 in
2091    def rr : AVX512Ii8<0xC2, MRMSrcReg,
2092                (outs _.KRC:$dst), (ins _.FRC:$src1, _.FRC:$src2, u8imm:$cc),
2093                !strconcat("vcmp", _.Suffix,
2094                           "\t{$cc, $src2, $src1, $dst|$dst, $src1, $src2, $cc}"),
2095                [(set _.KRC:$dst, (OpNode _.FRC:$src1,
2096                                          _.FRC:$src2,
2097                                          timm:$cc))]>,
2098                EVEX_4V, VEX_LIG, Sched<[sched]>, SIMD_EXC;
2099    def rm : AVX512Ii8<0xC2, MRMSrcMem,
2100              (outs _.KRC:$dst),
2101              (ins _.FRC:$src1, _.ScalarMemOp:$src2, u8imm:$cc),
2102              !strconcat("vcmp", _.Suffix,
2103                         "\t{$cc, $src2, $src1, $dst|$dst, $src1, $src2, $cc}"),
2104              [(set _.KRC:$dst, (OpNode _.FRC:$src1,
2105                                        (_.ScalarLdFrag addr:$src2),
2106                                        timm:$cc))]>,
2107              EVEX_4V, VEX_LIG, EVEX_CD8<_.EltSize, CD8VT1>,
2108              Sched<[sched.Folded, sched.ReadAfterFold]>, SIMD_EXC;
2109  }
2110}
2111
2112def X86cmpms_su : PatFrag<(ops node:$src1, node:$src2, node:$cc),
2113                          (X86cmpms node:$src1, node:$src2, node:$cc), [{
2114  return N->hasOneUse();
2115}]>;
2116def X86cmpmsSAE_su : PatFrag<(ops node:$src1, node:$src2, node:$cc),
2117                          (X86cmpmsSAE node:$src1, node:$src2, node:$cc), [{
2118  return N->hasOneUse();
2119}]>;
2120
2121let Predicates = [HasAVX512] in {
2122  let ExeDomain = SSEPackedSingle in
2123  defm VCMPSSZ : avx512_cmp_scalar<f32x_info, X86cmpms, X86cmpmsSAE,
2124                                   X86cmpms_su, X86cmpmsSAE_su,
2125                                   SchedWriteFCmp.Scl>, AVX512XSIi8Base;
2126  let ExeDomain = SSEPackedDouble in
2127  defm VCMPSDZ : avx512_cmp_scalar<f64x_info, X86cmpms, X86cmpmsSAE,
2128                                   X86cmpms_su, X86cmpmsSAE_su,
2129                                   SchedWriteFCmp.Scl>, AVX512XDIi8Base, VEX_W;
2130}
2131
2132multiclass avx512_icmp_packed<bits<8> opc, string OpcodeStr,
2133                              X86FoldableSchedWrite sched,
2134                              X86VectorVTInfo _, bit IsCommutable> {
2135  let isCommutable = IsCommutable, hasSideEffects = 0 in
2136  def rr : AVX512BI<opc, MRMSrcReg,
2137             (outs _.KRC:$dst), (ins _.RC:$src1, _.RC:$src2),
2138             !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
2139             []>, EVEX_4V, Sched<[sched]>;
2140  let mayLoad = 1, hasSideEffects = 0 in
2141  def rm : AVX512BI<opc, MRMSrcMem,
2142             (outs _.KRC:$dst), (ins _.RC:$src1, _.MemOp:$src2),
2143             !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
2144             []>, EVEX_4V, Sched<[sched.Folded, sched.ReadAfterFold]>;
2145  let isCommutable = IsCommutable, hasSideEffects = 0 in
2146  def rrk : AVX512BI<opc, MRMSrcReg,
2147              (outs _.KRC:$dst), (ins _.KRCWM:$mask, _.RC:$src1, _.RC:$src2),
2148              !strconcat(OpcodeStr, "\t{$src2, $src1, $dst {${mask}}|",
2149                          "$dst {${mask}}, $src1, $src2}"),
2150              []>, EVEX_4V, EVEX_K, Sched<[sched]>;
2151  let mayLoad = 1, hasSideEffects = 0 in
2152  def rmk : AVX512BI<opc, MRMSrcMem,
2153              (outs _.KRC:$dst), (ins _.KRCWM:$mask, _.RC:$src1, _.MemOp:$src2),
2154              !strconcat(OpcodeStr, "\t{$src2, $src1, $dst {${mask}}|",
2155                          "$dst {${mask}}, $src1, $src2}"),
2156              []>, EVEX_4V, EVEX_K, Sched<[sched.Folded, sched.ReadAfterFold]>;
2157}
2158
2159multiclass avx512_icmp_packed_rmb<bits<8> opc, string OpcodeStr,
2160                                  X86FoldableSchedWrite sched, X86VectorVTInfo _,
2161                                  bit IsCommutable> :
2162           avx512_icmp_packed<opc, OpcodeStr, sched, _, IsCommutable> {
2163  let mayLoad = 1, hasSideEffects = 0 in {
2164  def rmb : AVX512BI<opc, MRMSrcMem,
2165              (outs _.KRC:$dst), (ins _.RC:$src1, _.ScalarMemOp:$src2),
2166              !strconcat(OpcodeStr, "\t{${src2}", _.BroadcastStr, ", $src1, $dst",
2167                                    "|$dst, $src1, ${src2}", _.BroadcastStr, "}"),
2168              []>, EVEX_4V, EVEX_B, Sched<[sched.Folded, sched.ReadAfterFold]>;
2169  def rmbk : AVX512BI<opc, MRMSrcMem,
2170               (outs _.KRC:$dst), (ins _.KRCWM:$mask, _.RC:$src1,
2171                                       _.ScalarMemOp:$src2),
2172               !strconcat(OpcodeStr,
2173                          "\t{${src2}", _.BroadcastStr, ", $src1, $dst {${mask}}|",
2174                          "$dst {${mask}}, $src1, ${src2}", _.BroadcastStr, "}"),
2175               []>, EVEX_4V, EVEX_K, EVEX_B,
2176               Sched<[sched.Folded, sched.ReadAfterFold]>;
2177  }
2178}
2179
2180multiclass avx512_icmp_packed_vl<bits<8> opc, string OpcodeStr,
2181                                 X86SchedWriteWidths sched,
2182                                 AVX512VLVectorVTInfo VTInfo, Predicate prd,
2183                                 bit IsCommutable = 0> {
2184  let Predicates = [prd] in
2185  defm Z : avx512_icmp_packed<opc, OpcodeStr, sched.ZMM,
2186                              VTInfo.info512, IsCommutable>, EVEX_V512;
2187
2188  let Predicates = [prd, HasVLX] in {
2189    defm Z256 : avx512_icmp_packed<opc, OpcodeStr, sched.YMM,
2190                                   VTInfo.info256, IsCommutable>, EVEX_V256;
2191    defm Z128 : avx512_icmp_packed<opc, OpcodeStr, sched.XMM,
2192                                   VTInfo.info128, IsCommutable>, EVEX_V128;
2193  }
2194}
2195
2196multiclass avx512_icmp_packed_rmb_vl<bits<8> opc, string OpcodeStr,
2197                                     X86SchedWriteWidths sched,
2198                                     AVX512VLVectorVTInfo VTInfo,
2199                                     Predicate prd, bit IsCommutable = 0> {
2200  let Predicates = [prd] in
2201  defm Z : avx512_icmp_packed_rmb<opc, OpcodeStr, sched.ZMM,
2202                                  VTInfo.info512, IsCommutable>, EVEX_V512;
2203
2204  let Predicates = [prd, HasVLX] in {
2205    defm Z256 : avx512_icmp_packed_rmb<opc, OpcodeStr, sched.YMM,
2206                                       VTInfo.info256, IsCommutable>, EVEX_V256;
2207    defm Z128 : avx512_icmp_packed_rmb<opc, OpcodeStr, sched.XMM,
2208                                       VTInfo.info128, IsCommutable>, EVEX_V128;
2209  }
2210}
2211
2212// This fragment treats X86cmpm as commutable to help match loads in both
2213// operands for PCMPEQ.
2214def X86setcc_commute : SDNode<"ISD::SETCC", SDTSetCC, [SDNPCommutative]>;
2215def X86pcmpgtm : PatFrag<(ops node:$src1, node:$src2),
2216                         (setcc node:$src1, node:$src2, SETGT)>;
2217
2218// AddedComplexity is needed because the explicit SETEQ/SETGT CondCode doesn't
2219// increase the pattern complexity the way an immediate would.
2220let AddedComplexity = 2 in {
2221// FIXME: Is there a better scheduler class for VPCMP?
2222defm VPCMPEQB : avx512_icmp_packed_vl<0x74, "vpcmpeqb",
2223                      SchedWriteVecALU, avx512vl_i8_info, HasBWI, 1>,
2224                EVEX_CD8<8, CD8VF>, VEX_WIG;
2225
2226defm VPCMPEQW : avx512_icmp_packed_vl<0x75, "vpcmpeqw",
2227                      SchedWriteVecALU, avx512vl_i16_info, HasBWI, 1>,
2228                EVEX_CD8<16, CD8VF>, VEX_WIG;
2229
2230defm VPCMPEQD : avx512_icmp_packed_rmb_vl<0x76, "vpcmpeqd",
2231                      SchedWriteVecALU, avx512vl_i32_info, HasAVX512, 1>,
2232                EVEX_CD8<32, CD8VF>;
2233
2234defm VPCMPEQQ : avx512_icmp_packed_rmb_vl<0x29, "vpcmpeqq",
2235                      SchedWriteVecALU, avx512vl_i64_info, HasAVX512, 1>,
2236                T8PD, VEX_W, EVEX_CD8<64, CD8VF>;
2237
2238defm VPCMPGTB : avx512_icmp_packed_vl<0x64, "vpcmpgtb",
2239                      SchedWriteVecALU, avx512vl_i8_info, HasBWI>,
2240                EVEX_CD8<8, CD8VF>, VEX_WIG;
2241
2242defm VPCMPGTW : avx512_icmp_packed_vl<0x65, "vpcmpgtw",
2243                      SchedWriteVecALU, avx512vl_i16_info, HasBWI>,
2244                EVEX_CD8<16, CD8VF>, VEX_WIG;
2245
2246defm VPCMPGTD : avx512_icmp_packed_rmb_vl<0x66, "vpcmpgtd",
2247                      SchedWriteVecALU, avx512vl_i32_info, HasAVX512>,
2248                EVEX_CD8<32, CD8VF>;
2249
2250defm VPCMPGTQ : avx512_icmp_packed_rmb_vl<0x37, "vpcmpgtq",
2251                      SchedWriteVecALU, avx512vl_i64_info, HasAVX512>,
2252                T8PD, VEX_W, EVEX_CD8<64, CD8VF>;
2253}
2254
2255multiclass avx512_icmp_cc<bits<8> opc, string Suffix, PatFrag Frag,
2256                          PatFrag Frag_su, PatFrag CommFrag, PatFrag CommFrag_su,
2257                          X86FoldableSchedWrite sched,
2258                          X86VectorVTInfo _, string Name> {
2259  let isCommutable = 1 in
2260  def rri : AVX512AIi8<opc, MRMSrcReg,
2261             (outs _.KRC:$dst), (ins _.RC:$src1, _.RC:$src2, u8imm:$cc),
2262             !strconcat("vpcmp", Suffix,
2263                        "\t{$cc, $src2, $src1, $dst|$dst, $src1, $src2, $cc}"),
2264             [(set _.KRC:$dst, (_.KVT (Frag:$cc (_.VT _.RC:$src1),
2265                                                (_.VT _.RC:$src2),
2266                                                cond)))]>,
2267             EVEX_4V, Sched<[sched]>;
2268  def rmi : AVX512AIi8<opc, MRMSrcMem,
2269             (outs _.KRC:$dst), (ins _.RC:$src1, _.MemOp:$src2, u8imm:$cc),
2270             !strconcat("vpcmp", Suffix,
2271                        "\t{$cc, $src2, $src1, $dst|$dst, $src1, $src2, $cc}"),
2272             [(set _.KRC:$dst, (_.KVT
2273                                (Frag:$cc
2274                                 (_.VT _.RC:$src1),
2275                                 (_.VT (_.LdFrag addr:$src2)),
2276                                 cond)))]>,
2277             EVEX_4V, Sched<[sched.Folded, sched.ReadAfterFold]>;
2278  let isCommutable = 1 in
2279  def rrik : AVX512AIi8<opc, MRMSrcReg,
2280              (outs _.KRC:$dst), (ins _.KRCWM:$mask, _.RC:$src1, _.RC:$src2,
2281                                      u8imm:$cc),
2282              !strconcat("vpcmp", Suffix,
2283                         "\t{$cc, $src2, $src1, $dst {${mask}}|",
2284                         "$dst {${mask}}, $src1, $src2, $cc}"),
2285              [(set _.KRC:$dst, (and _.KRCWM:$mask,
2286                                     (_.KVT (Frag_su:$cc (_.VT _.RC:$src1),
2287                                                         (_.VT _.RC:$src2),
2288                                                         cond))))]>,
2289              EVEX_4V, EVEX_K, Sched<[sched]>;
2290  def rmik : AVX512AIi8<opc, MRMSrcMem,
2291              (outs _.KRC:$dst), (ins _.KRCWM:$mask, _.RC:$src1, _.MemOp:$src2,
2292                                    u8imm:$cc),
2293              !strconcat("vpcmp", Suffix,
2294                         "\t{$cc, $src2, $src1, $dst {${mask}}|",
2295                         "$dst {${mask}}, $src1, $src2, $cc}"),
2296              [(set _.KRC:$dst, (and _.KRCWM:$mask,
2297                                     (_.KVT
2298                                      (Frag_su:$cc
2299                                       (_.VT _.RC:$src1),
2300                                       (_.VT (_.LdFrag addr:$src2)),
2301                                       cond))))]>,
2302              EVEX_4V, EVEX_K, Sched<[sched.Folded, sched.ReadAfterFold]>;
2303
2304  def : Pat<(_.KVT (CommFrag:$cc (_.LdFrag addr:$src2),
2305                                 (_.VT _.RC:$src1), cond)),
2306            (!cast<Instruction>(Name#_.ZSuffix#"rmi")
2307             _.RC:$src1, addr:$src2, (CommFrag.OperandTransform $cc))>;
2308
2309  def : Pat<(and _.KRCWM:$mask,
2310                 (_.KVT (CommFrag_su:$cc (_.LdFrag addr:$src2),
2311                                      (_.VT _.RC:$src1), cond))),
2312            (!cast<Instruction>(Name#_.ZSuffix#"rmik")
2313             _.KRCWM:$mask, _.RC:$src1, addr:$src2,
2314             (CommFrag.OperandTransform $cc))>;
2315}
2316
2317multiclass avx512_icmp_cc_rmb<bits<8> opc, string Suffix, PatFrag Frag,
2318                              PatFrag Frag_su, PatFrag CommFrag,
2319                              PatFrag CommFrag_su, X86FoldableSchedWrite sched,
2320                              X86VectorVTInfo _, string Name> :
2321           avx512_icmp_cc<opc, Suffix, Frag, Frag_su, CommFrag, CommFrag_su,
2322                          sched, _, Name> {
2323  def rmib : AVX512AIi8<opc, MRMSrcMem,
2324             (outs _.KRC:$dst), (ins _.RC:$src1, _.ScalarMemOp:$src2,
2325                                     u8imm:$cc),
2326             !strconcat("vpcmp", Suffix,
2327                        "\t{$cc, ${src2}", _.BroadcastStr, ", $src1, $dst|",
2328                        "$dst, $src1, ${src2}", _.BroadcastStr, ", $cc}"),
2329             [(set _.KRC:$dst, (_.KVT (Frag:$cc
2330                                       (_.VT _.RC:$src1),
2331                                       (_.BroadcastLdFrag addr:$src2),
2332                                       cond)))]>,
2333             EVEX_4V, EVEX_B, Sched<[sched.Folded, sched.ReadAfterFold]>;
2334  def rmibk : AVX512AIi8<opc, MRMSrcMem,
2335              (outs _.KRC:$dst), (ins _.KRCWM:$mask, _.RC:$src1,
2336                                       _.ScalarMemOp:$src2, u8imm:$cc),
2337              !strconcat("vpcmp", Suffix,
2338                  "\t{$cc, ${src2}", _.BroadcastStr, ", $src1, $dst {${mask}}|",
2339                  "$dst {${mask}}, $src1, ${src2}", _.BroadcastStr, ", $cc}"),
2340              [(set _.KRC:$dst, (and _.KRCWM:$mask,
2341                                     (_.KVT (Frag_su:$cc
2342                                             (_.VT _.RC:$src1),
2343                                             (_.BroadcastLdFrag addr:$src2),
2344                                             cond))))]>,
2345              EVEX_4V, EVEX_K, EVEX_B, Sched<[sched.Folded, sched.ReadAfterFold]>;
2346
2347  def : Pat<(_.KVT (CommFrag:$cc (_.BroadcastLdFrag addr:$src2),
2348                    (_.VT _.RC:$src1), cond)),
2349            (!cast<Instruction>(Name#_.ZSuffix#"rmib")
2350             _.RC:$src1, addr:$src2, (CommFrag.OperandTransform $cc))>;
2351
2352  def : Pat<(and _.KRCWM:$mask,
2353                 (_.KVT (CommFrag_su:$cc (_.BroadcastLdFrag addr:$src2),
2354                                      (_.VT _.RC:$src1), cond))),
2355            (!cast<Instruction>(Name#_.ZSuffix#"rmibk")
2356             _.KRCWM:$mask, _.RC:$src1, addr:$src2,
2357             (CommFrag_su.OperandTransform $cc))>;
2358}
2359
2360multiclass avx512_icmp_cc_vl<bits<8> opc, string Suffix, PatFrag Frag,
2361                             PatFrag Frag_su, PatFrag CommFrag,
2362                             PatFrag CommFrag_su, X86SchedWriteWidths sched,
2363                             AVX512VLVectorVTInfo VTInfo, Predicate prd> {
2364  let Predicates = [prd] in
2365  defm Z : avx512_icmp_cc<opc, Suffix, Frag, Frag_su, CommFrag, CommFrag_su,
2366                          sched.ZMM, VTInfo.info512, NAME>, EVEX_V512;
2367
2368  let Predicates = [prd, HasVLX] in {
2369    defm Z256 : avx512_icmp_cc<opc, Suffix, Frag, Frag_su, CommFrag, CommFrag_su,
2370                               sched.YMM, VTInfo.info256, NAME>, EVEX_V256;
2371    defm Z128 : avx512_icmp_cc<opc, Suffix, Frag, Frag_su, CommFrag, CommFrag_su,
2372                               sched.XMM, VTInfo.info128, NAME>, EVEX_V128;
2373  }
2374}
2375
2376multiclass avx512_icmp_cc_rmb_vl<bits<8> opc, string Suffix, PatFrag Frag,
2377                                 PatFrag Frag_su, PatFrag CommFrag,
2378                                 PatFrag CommFrag_su, X86SchedWriteWidths sched,
2379                                 AVX512VLVectorVTInfo VTInfo, Predicate prd> {
2380  let Predicates = [prd] in
2381  defm Z : avx512_icmp_cc_rmb<opc, Suffix, Frag, Frag_su, CommFrag, CommFrag_su,
2382                              sched.ZMM, VTInfo.info512, NAME>, EVEX_V512;
2383
2384  let Predicates = [prd, HasVLX] in {
2385    defm Z256 : avx512_icmp_cc_rmb<opc, Suffix, Frag, Frag_su, CommFrag, CommFrag_su,
2386                                   sched.YMM, VTInfo.info256, NAME>, EVEX_V256;
2387    defm Z128 : avx512_icmp_cc_rmb<opc, Suffix, Frag, Frag_su, CommFrag, CommFrag_su,
2388                                   sched.XMM, VTInfo.info128, NAME>, EVEX_V128;
2389  }
2390}
2391
2392def X86pcmpm_imm : SDNodeXForm<setcc, [{
2393  ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(2))->get();
2394  uint8_t SSECC = X86::getVPCMPImmForCond(CC);
2395  return getI8Imm(SSECC, SDLoc(N));
2396}]>;
2397
2398// Swapped operand version of the above.
2399def X86pcmpm_imm_commute : SDNodeXForm<setcc, [{
2400  ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(2))->get();
2401  uint8_t SSECC = X86::getVPCMPImmForCond(CC);
2402  SSECC = X86::getSwappedVPCMPImm(SSECC);
2403  return getI8Imm(SSECC, SDLoc(N));
2404}]>;
2405
2406def X86pcmpm : PatFrag<(ops node:$src1, node:$src2, node:$cc),
2407                       (setcc node:$src1, node:$src2, node:$cc), [{
2408  ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(2))->get();
2409  return !ISD::isUnsignedIntSetCC(CC);
2410}], X86pcmpm_imm>;
2411
2412def X86pcmpm_su : PatFrag<(ops node:$src1, node:$src2, node:$cc),
2413                          (setcc node:$src1, node:$src2, node:$cc), [{
2414  ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(2))->get();
2415  return N->hasOneUse() && !ISD::isUnsignedIntSetCC(CC);
2416}], X86pcmpm_imm>;
2417
2418// Same as above, but commutes immediate. Use for load folding.
2419def X86pcmpm_commute : PatFrag<(ops node:$src1, node:$src2, node:$cc),
2420                               (setcc node:$src1, node:$src2, node:$cc), [{
2421  ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(2))->get();
2422  return !ISD::isUnsignedIntSetCC(CC);
2423}], X86pcmpm_imm_commute>;
2424
2425def X86pcmpm_commute_su : PatFrag<(ops node:$src1, node:$src2, node:$cc),
2426                                  (setcc node:$src1, node:$src2, node:$cc), [{
2427  ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(2))->get();
2428  return N->hasOneUse() && !ISD::isUnsignedIntSetCC(CC);
2429}], X86pcmpm_imm_commute>;
2430
2431def X86pcmpum : PatFrag<(ops node:$src1, node:$src2, node:$cc),
2432                        (setcc node:$src1, node:$src2, node:$cc), [{
2433  ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(2))->get();
2434  return ISD::isUnsignedIntSetCC(CC);
2435}], X86pcmpm_imm>;
2436
2437def X86pcmpum_su : PatFrag<(ops node:$src1, node:$src2, node:$cc),
2438                           (setcc node:$src1, node:$src2, node:$cc), [{
2439  ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(2))->get();
2440  return N->hasOneUse() && ISD::isUnsignedIntSetCC(CC);
2441}], X86pcmpm_imm>;
2442
2443// Same as above, but commutes immediate. Use for load folding.
2444def X86pcmpum_commute : PatFrag<(ops node:$src1, node:$src2, node:$cc),
2445                                (setcc node:$src1, node:$src2, node:$cc), [{
2446  ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(2))->get();
2447  return ISD::isUnsignedIntSetCC(CC);
2448}], X86pcmpm_imm_commute>;
2449
2450def X86pcmpum_commute_su : PatFrag<(ops node:$src1, node:$src2, node:$cc),
2451                                   (setcc node:$src1, node:$src2, node:$cc), [{
2452  ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(2))->get();
2453  return N->hasOneUse() && ISD::isUnsignedIntSetCC(CC);
2454}], X86pcmpm_imm_commute>;
2455
2456// FIXME: Is there a better scheduler class for VPCMP/VPCMPU?
2457defm VPCMPB : avx512_icmp_cc_vl<0x3F, "b", X86pcmpm, X86pcmpm_su,
2458                                X86pcmpm_commute, X86pcmpm_commute_su,
2459                                SchedWriteVecALU, avx512vl_i8_info, HasBWI>,
2460                                EVEX_CD8<8, CD8VF>;
2461defm VPCMPUB : avx512_icmp_cc_vl<0x3E, "ub", X86pcmpum, X86pcmpum_su,
2462                                 X86pcmpum_commute, X86pcmpum_commute_su,
2463                                 SchedWriteVecALU, avx512vl_i8_info, HasBWI>,
2464                                 EVEX_CD8<8, CD8VF>;
2465
2466defm VPCMPW : avx512_icmp_cc_vl<0x3F, "w", X86pcmpm, X86pcmpm_su,
2467                                X86pcmpm_commute, X86pcmpm_commute_su,
2468                                SchedWriteVecALU, avx512vl_i16_info, HasBWI>,
2469                                VEX_W, EVEX_CD8<16, CD8VF>;
2470defm VPCMPUW : avx512_icmp_cc_vl<0x3E, "uw", X86pcmpum, X86pcmpum_su,
2471                                 X86pcmpum_commute, X86pcmpum_commute_su,
2472                                 SchedWriteVecALU, avx512vl_i16_info, HasBWI>,
2473                                 VEX_W, EVEX_CD8<16, CD8VF>;
2474
2475defm VPCMPD : avx512_icmp_cc_rmb_vl<0x1F, "d", X86pcmpm, X86pcmpm_su,
2476                                    X86pcmpm_commute, X86pcmpm_commute_su,
2477                                    SchedWriteVecALU, avx512vl_i32_info,
2478                                    HasAVX512>, EVEX_CD8<32, CD8VF>;
2479defm VPCMPUD : avx512_icmp_cc_rmb_vl<0x1E, "ud", X86pcmpum, X86pcmpum_su,
2480                                     X86pcmpum_commute, X86pcmpum_commute_su,
2481                                     SchedWriteVecALU, avx512vl_i32_info,
2482                                     HasAVX512>, EVEX_CD8<32, CD8VF>;
2483
2484defm VPCMPQ : avx512_icmp_cc_rmb_vl<0x1F, "q", X86pcmpm, X86pcmpm_su,
2485                                    X86pcmpm_commute, X86pcmpm_commute_su,
2486                                    SchedWriteVecALU, avx512vl_i64_info,
2487                                    HasAVX512>, VEX_W, EVEX_CD8<64, CD8VF>;
2488defm VPCMPUQ : avx512_icmp_cc_rmb_vl<0x1E, "uq", X86pcmpum, X86pcmpum_su,
2489                                     X86pcmpum_commute, X86pcmpum_commute_su,
2490                                     SchedWriteVecALU, avx512vl_i64_info,
2491                                     HasAVX512>, VEX_W, EVEX_CD8<64, CD8VF>;
2492
2493def X86cmpm_su : PatFrag<(ops node:$src1, node:$src2, node:$cc),
2494                         (X86cmpm node:$src1, node:$src2, node:$cc), [{
2495  return N->hasOneUse();
2496}]>;
2497def X86cmpmSAE_su : PatFrag<(ops node:$src1, node:$src2, node:$cc),
2498                            (X86cmpmSAE node:$src1, node:$src2, node:$cc), [{
2499  return N->hasOneUse();
2500}]>;
2501
2502def X86cmpm_imm_commute : SDNodeXForm<timm, [{
2503  uint8_t Imm = X86::getSwappedVCMPImm(N->getZExtValue() & 0x1f);
2504  return getI8Imm(Imm, SDLoc(N));
2505}]>;
2506
2507multiclass avx512_vcmp_common<X86FoldableSchedWrite sched, X86VectorVTInfo _,
2508                              string Name> {
2509let Uses = [MXCSR], mayRaiseFPException = 1 in {
2510  defm  rri  : AVX512_maskable_cmp<0xC2, MRMSrcReg, _,
2511                   (outs _.KRC:$dst), (ins _.RC:$src1, _.RC:$src2,u8imm:$cc),
2512                   "vcmp"#_.Suffix,
2513                   "$cc, $src2, $src1", "$src1, $src2, $cc",
2514                   (X86any_cmpm (_.VT _.RC:$src1), (_.VT _.RC:$src2), timm:$cc),
2515                   (X86cmpm_su (_.VT _.RC:$src1), (_.VT _.RC:$src2), timm:$cc),
2516                   1>, Sched<[sched]>;
2517
2518  defm  rmi  : AVX512_maskable_cmp<0xC2, MRMSrcMem, _,
2519                (outs _.KRC:$dst),(ins _.RC:$src1, _.MemOp:$src2, u8imm:$cc),
2520                "vcmp"#_.Suffix,
2521                "$cc, $src2, $src1", "$src1, $src2, $cc",
2522                (X86any_cmpm (_.VT _.RC:$src1), (_.VT (_.LdFrag addr:$src2)),
2523                             timm:$cc),
2524                (X86cmpm_su (_.VT _.RC:$src1), (_.VT (_.LdFrag addr:$src2)),
2525                            timm:$cc)>,
2526                Sched<[sched.Folded, sched.ReadAfterFold]>;
2527
2528  defm  rmbi : AVX512_maskable_cmp<0xC2, MRMSrcMem, _,
2529                (outs _.KRC:$dst),
2530                (ins _.RC:$src1, _.ScalarMemOp:$src2, u8imm:$cc),
2531                "vcmp"#_.Suffix,
2532                "$cc, ${src2}"#_.BroadcastStr#", $src1",
2533                "$src1, ${src2}"#_.BroadcastStr#", $cc",
2534                (X86any_cmpm (_.VT _.RC:$src1),
2535                             (_.VT (_.BroadcastLdFrag addr:$src2)),
2536                             timm:$cc),
2537                (X86cmpm_su (_.VT _.RC:$src1),
2538                            (_.VT (_.BroadcastLdFrag addr:$src2)),
2539                            timm:$cc)>,
2540                EVEX_B, Sched<[sched.Folded, sched.ReadAfterFold]>;
2541  }
2542
2543  // Patterns for selecting with loads in other operand.
2544  def : Pat<(X86any_cmpm (_.LdFrag addr:$src2), (_.VT _.RC:$src1),
2545                         timm:$cc),
2546            (!cast<Instruction>(Name#_.ZSuffix#"rmi") _.RC:$src1, addr:$src2,
2547                                                      (X86cmpm_imm_commute timm:$cc))>;
2548
2549  def : Pat<(and _.KRCWM:$mask, (X86cmpm_su (_.LdFrag addr:$src2),
2550                                            (_.VT _.RC:$src1),
2551                                            timm:$cc)),
2552            (!cast<Instruction>(Name#_.ZSuffix#"rmik") _.KRCWM:$mask,
2553                                                       _.RC:$src1, addr:$src2,
2554                                                       (X86cmpm_imm_commute timm:$cc))>;
2555
2556  def : Pat<(X86any_cmpm (_.BroadcastLdFrag addr:$src2),
2557                         (_.VT _.RC:$src1), timm:$cc),
2558            (!cast<Instruction>(Name#_.ZSuffix#"rmbi") _.RC:$src1, addr:$src2,
2559                                                       (X86cmpm_imm_commute timm:$cc))>;
2560
2561  def : Pat<(and _.KRCWM:$mask, (X86cmpm_su (_.BroadcastLdFrag addr:$src2),
2562                                            (_.VT _.RC:$src1),
2563                                            timm:$cc)),
2564            (!cast<Instruction>(Name#_.ZSuffix#"rmbik") _.KRCWM:$mask,
2565                                                        _.RC:$src1, addr:$src2,
2566                                                        (X86cmpm_imm_commute timm:$cc))>;
2567}
2568
2569multiclass avx512_vcmp_sae<X86FoldableSchedWrite sched, X86VectorVTInfo _> {
2570  // comparison code form (VCMP[EQ/LT/LE/...]
2571  let Uses = [MXCSR] in
2572  defm  rrib  : AVX512_maskable_cmp<0xC2, MRMSrcReg, _,
2573                     (outs _.KRC:$dst),(ins _.RC:$src1, _.RC:$src2, u8imm:$cc),
2574                     "vcmp"#_.Suffix,
2575                     "$cc, {sae}, $src2, $src1",
2576                     "$src1, $src2, {sae}, $cc",
2577                     (X86cmpmSAE (_.VT _.RC:$src1), (_.VT _.RC:$src2), timm:$cc),
2578                     (X86cmpmSAE_su (_.VT _.RC:$src1), (_.VT _.RC:$src2),
2579                                    timm:$cc)>,
2580                     EVEX_B, Sched<[sched]>;
2581}
2582
2583multiclass avx512_vcmp<X86SchedWriteWidths sched, AVX512VLVectorVTInfo _> {
2584  let Predicates = [HasAVX512] in {
2585    defm Z    : avx512_vcmp_common<sched.ZMM, _.info512, NAME>,
2586                avx512_vcmp_sae<sched.ZMM, _.info512>, EVEX_V512;
2587
2588  }
2589  let Predicates = [HasAVX512,HasVLX] in {
2590   defm Z128 : avx512_vcmp_common<sched.XMM, _.info128, NAME>, EVEX_V128;
2591   defm Z256 : avx512_vcmp_common<sched.YMM, _.info256, NAME>, EVEX_V256;
2592  }
2593}
2594
2595defm VCMPPD : avx512_vcmp<SchedWriteFCmp, avx512vl_f64_info>,
2596                          AVX512PDIi8Base, EVEX_4V, EVEX_CD8<64, CD8VF>, VEX_W;
2597defm VCMPPS : avx512_vcmp<SchedWriteFCmp, avx512vl_f32_info>,
2598                          AVX512PSIi8Base, EVEX_4V, EVEX_CD8<32, CD8VF>;
2599
2600// Patterns to select fp compares with load as first operand.
2601let Predicates = [HasAVX512] in {
2602  def : Pat<(v1i1 (X86cmpms (loadf64 addr:$src2), FR64X:$src1,
2603                            timm:$cc)),
2604            (VCMPSDZrm FR64X:$src1, addr:$src2, (X86cmpm_imm_commute timm:$cc))>;
2605
2606  def : Pat<(v1i1 (X86cmpms (loadf32 addr:$src2), FR32X:$src1,
2607                            timm:$cc)),
2608            (VCMPSSZrm FR32X:$src1, addr:$src2, (X86cmpm_imm_commute timm:$cc))>;
2609}
2610
2611// ----------------------------------------------------------------
2612// FPClass
2613
2614def X86Vfpclasss_su : PatFrag<(ops node:$src1, node:$src2),
2615                              (X86Vfpclasss node:$src1, node:$src2), [{
2616  return N->hasOneUse();
2617}]>;
2618
2619def X86Vfpclass_su : PatFrag<(ops node:$src1, node:$src2),
2620                             (X86Vfpclass node:$src1, node:$src2), [{
2621  return N->hasOneUse();
2622}]>;
2623
2624//handle fpclass instruction  mask =  op(reg_scalar,imm)
2625//                                    op(mem_scalar,imm)
2626multiclass avx512_scalar_fpclass<bits<8> opc, string OpcodeStr,
2627                                 X86FoldableSchedWrite sched, X86VectorVTInfo _,
2628                                 Predicate prd> {
2629  let Predicates = [prd], ExeDomain = _.ExeDomain, Uses = [MXCSR] in {
2630      def rr : AVX512<opc, MRMSrcReg, (outs _.KRC:$dst),
2631                      (ins _.RC:$src1, i32u8imm:$src2),
2632                      OpcodeStr#_.Suffix#"\t{$src2, $src1, $dst|$dst, $src1, $src2}",
2633                      [(set _.KRC:$dst,(X86Vfpclasss (_.VT _.RC:$src1),
2634                              (i32 timm:$src2)))]>,
2635                      Sched<[sched]>;
2636      def rrk : AVX512<opc, MRMSrcReg, (outs _.KRC:$dst),
2637                      (ins _.KRCWM:$mask, _.RC:$src1, i32u8imm:$src2),
2638                      OpcodeStr#_.Suffix#
2639                      "\t{$src2, $src1, $dst {${mask}}|$dst {${mask}}, $src1, $src2}",
2640                      [(set _.KRC:$dst,(and _.KRCWM:$mask,
2641                                      (X86Vfpclasss_su (_.VT _.RC:$src1),
2642                                      (i32 timm:$src2))))]>,
2643                      EVEX_K, Sched<[sched]>;
2644    def rm : AVX512<opc, MRMSrcMem, (outs _.KRC:$dst),
2645                    (ins _.IntScalarMemOp:$src1, i32u8imm:$src2),
2646                    OpcodeStr#_.Suffix#
2647                              "\t{$src2, $src1, $dst|$dst, $src1, $src2}",
2648                    [(set _.KRC:$dst,
2649                          (X86Vfpclasss (_.ScalarIntMemFrags addr:$src1),
2650                                        (i32 timm:$src2)))]>,
2651                    Sched<[sched.Folded, sched.ReadAfterFold]>;
2652    def rmk : AVX512<opc, MRMSrcMem, (outs _.KRC:$dst),
2653                    (ins _.KRCWM:$mask, _.IntScalarMemOp:$src1, i32u8imm:$src2),
2654                    OpcodeStr#_.Suffix#
2655                    "\t{$src2, $src1, $dst {${mask}}|$dst {${mask}}, $src1, $src2}",
2656                    [(set _.KRC:$dst,(and _.KRCWM:$mask,
2657                        (X86Vfpclasss_su (_.ScalarIntMemFrags addr:$src1),
2658                            (i32 timm:$src2))))]>,
2659                    EVEX_K, Sched<[sched.Folded, sched.ReadAfterFold]>;
2660  }
2661}
2662
2663//handle fpclass instruction mask = fpclass(reg_vec, reg_vec, imm)
2664//                                  fpclass(reg_vec, mem_vec, imm)
2665//                                  fpclass(reg_vec, broadcast(eltVt), imm)
2666multiclass avx512_vector_fpclass<bits<8> opc, string OpcodeStr,
2667                                 X86FoldableSchedWrite sched, X86VectorVTInfo _,
2668                                 string mem>{
2669  let ExeDomain = _.ExeDomain, Uses = [MXCSR] in {
2670  def rr : AVX512<opc, MRMSrcReg, (outs _.KRC:$dst),
2671                      (ins _.RC:$src1, i32u8imm:$src2),
2672                      OpcodeStr#_.Suffix#"\t{$src2, $src1, $dst|$dst, $src1, $src2}",
2673                      [(set _.KRC:$dst,(X86Vfpclass (_.VT _.RC:$src1),
2674                                       (i32 timm:$src2)))]>,
2675                      Sched<[sched]>;
2676  def rrk : AVX512<opc, MRMSrcReg, (outs _.KRC:$dst),
2677                      (ins _.KRCWM:$mask, _.RC:$src1, i32u8imm:$src2),
2678                      OpcodeStr#_.Suffix#
2679                      "\t{$src2, $src1, $dst {${mask}}|$dst {${mask}}, $src1, $src2}",
2680                      [(set _.KRC:$dst,(and _.KRCWM:$mask,
2681                                       (X86Vfpclass_su (_.VT _.RC:$src1),
2682                                       (i32 timm:$src2))))]>,
2683                      EVEX_K, Sched<[sched]>;
2684  def rm : AVX512<opc, MRMSrcMem, (outs _.KRC:$dst),
2685                    (ins _.MemOp:$src1, i32u8imm:$src2),
2686                    OpcodeStr#_.Suffix#"{"#mem#"}"#
2687                    "\t{$src2, $src1, $dst|$dst, $src1, $src2}",
2688                    [(set _.KRC:$dst,(X86Vfpclass
2689                                     (_.VT (_.LdFrag addr:$src1)),
2690                                     (i32 timm:$src2)))]>,
2691                    Sched<[sched.Folded, sched.ReadAfterFold]>;
2692  def rmk : AVX512<opc, MRMSrcMem, (outs _.KRC:$dst),
2693                    (ins _.KRCWM:$mask, _.MemOp:$src1, i32u8imm:$src2),
2694                    OpcodeStr#_.Suffix#"{"#mem#"}"#
2695                    "\t{$src2, $src1, $dst {${mask}}|$dst {${mask}}, $src1, $src2}",
2696                    [(set _.KRC:$dst, (and _.KRCWM:$mask, (X86Vfpclass_su
2697                                  (_.VT (_.LdFrag addr:$src1)),
2698                                  (i32 timm:$src2))))]>,
2699                    EVEX_K, Sched<[sched.Folded, sched.ReadAfterFold]>;
2700  def rmb : AVX512<opc, MRMSrcMem, (outs _.KRC:$dst),
2701                    (ins _.ScalarMemOp:$src1, i32u8imm:$src2),
2702                    OpcodeStr#_.Suffix#"\t{$src2, ${src1}"#
2703                                      _.BroadcastStr#", $dst|$dst, ${src1}"
2704                                                  #_.BroadcastStr#", $src2}",
2705                    [(set _.KRC:$dst,(X86Vfpclass
2706                                     (_.VT (_.BroadcastLdFrag addr:$src1)),
2707                                     (i32 timm:$src2)))]>,
2708                    EVEX_B, Sched<[sched.Folded, sched.ReadAfterFold]>;
2709  def rmbk : AVX512<opc, MRMSrcMem, (outs _.KRC:$dst),
2710                    (ins _.KRCWM:$mask, _.ScalarMemOp:$src1, i32u8imm:$src2),
2711                    OpcodeStr#_.Suffix#"\t{$src2, ${src1}"#
2712                          _.BroadcastStr#", $dst {${mask}}|$dst {${mask}}, ${src1}"#
2713                                                   _.BroadcastStr#", $src2}",
2714                    [(set _.KRC:$dst,(and _.KRCWM:$mask, (X86Vfpclass_su
2715                                     (_.VT (_.BroadcastLdFrag addr:$src1)),
2716                                     (i32 timm:$src2))))]>,
2717                    EVEX_B, EVEX_K,  Sched<[sched.Folded, sched.ReadAfterFold]>;
2718  }
2719
2720  // Allow registers or broadcast with the x, y, z suffix we use to disambiguate
2721  // the memory form.
2722  def : InstAlias<OpcodeStr#_.Suffix#mem#
2723                  "\t{$src2, $src1, $dst|$dst, $src1, $src2}",
2724                  (!cast<Instruction>(NAME#"rr")
2725                   _.KRC:$dst, _.RC:$src1, i32u8imm:$src2), 0, "att">;
2726  def : InstAlias<OpcodeStr#_.Suffix#mem#
2727                  "\t{$src2, $src1, $dst {${mask}}|$dst {${mask}}, $src1, $src2}",
2728                  (!cast<Instruction>(NAME#"rrk")
2729                   _.KRC:$dst, _.KRCWM:$mask, _.RC:$src1, i32u8imm:$src2), 0, "att">;
2730  def : InstAlias<OpcodeStr#_.Suffix#mem#
2731                  "\t{$src2, ${src1}"#_.BroadcastStr#", $dst|$dst, ${src1}"#
2732                  _.BroadcastStr#", $src2}",
2733                  (!cast<Instruction>(NAME#"rmb")
2734                   _.KRC:$dst, _.ScalarMemOp:$src1, i32u8imm:$src2), 0, "att">;
2735  def : InstAlias<OpcodeStr#_.Suffix#mem#
2736                  "\t{$src2, ${src1}"#_.BroadcastStr#", $dst {${mask}}|"
2737                  "$dst {${mask}}, ${src1}"#_.BroadcastStr#", $src2}",
2738                  (!cast<Instruction>(NAME#"rmbk")
2739                   _.KRC:$dst, _.KRCWM:$mask, _.ScalarMemOp:$src1, i32u8imm:$src2), 0, "att">;
2740}
2741
2742multiclass avx512_vector_fpclass_all<string OpcodeStr, AVX512VLVectorVTInfo _,
2743                                     bits<8> opc, X86SchedWriteWidths sched,
2744                                     Predicate prd>{
2745  let Predicates = [prd] in {
2746    defm Z    : avx512_vector_fpclass<opc, OpcodeStr, sched.ZMM,
2747                                      _.info512, "z">, EVEX_V512;
2748  }
2749  let Predicates = [prd, HasVLX] in {
2750    defm Z128 : avx512_vector_fpclass<opc, OpcodeStr, sched.XMM,
2751                                      _.info128, "x">, EVEX_V128;
2752    defm Z256 : avx512_vector_fpclass<opc, OpcodeStr, sched.YMM,
2753                                      _.info256, "y">, EVEX_V256;
2754  }
2755}
2756
2757multiclass avx512_fp_fpclass_all<string OpcodeStr, bits<8> opcVec,
2758                                 bits<8> opcScalar, X86SchedWriteWidths sched,
2759                                 Predicate prd> {
2760  defm PS : avx512_vector_fpclass_all<OpcodeStr,  avx512vl_f32_info, opcVec,
2761                                      sched, prd>,
2762                                      EVEX_CD8<32, CD8VF>;
2763  defm PD : avx512_vector_fpclass_all<OpcodeStr,  avx512vl_f64_info, opcVec,
2764                                      sched, prd>,
2765                                      EVEX_CD8<64, CD8VF> , VEX_W;
2766  defm SSZ : avx512_scalar_fpclass<opcScalar, OpcodeStr,
2767                                   sched.Scl, f32x_info, prd>, VEX_LIG,
2768                                   EVEX_CD8<32, CD8VT1>;
2769  defm SDZ : avx512_scalar_fpclass<opcScalar, OpcodeStr,
2770                                   sched.Scl, f64x_info, prd>, VEX_LIG,
2771                                   EVEX_CD8<64, CD8VT1>, VEX_W;
2772}
2773
2774defm VFPCLASS : avx512_fp_fpclass_all<"vfpclass", 0x66, 0x67, SchedWriteFCmp,
2775                                      HasDQI>, AVX512AIi8Base, EVEX;
2776
2777//-----------------------------------------------------------------
2778// Mask register copy, including
2779// - copy between mask registers
2780// - load/store mask registers
2781// - copy from GPR to mask register and vice versa
2782//
2783multiclass avx512_mask_mov<bits<8> opc_kk, bits<8> opc_km, bits<8> opc_mk,
2784                         string OpcodeStr, RegisterClass KRC,
2785                         ValueType vvt, X86MemOperand x86memop> {
2786  let isMoveReg = 1, hasSideEffects = 0, SchedRW = [WriteMove] in
2787  def kk : I<opc_kk, MRMSrcReg, (outs KRC:$dst), (ins KRC:$src),
2788             !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), []>,
2789             Sched<[WriteMove]>;
2790  def km : I<opc_km, MRMSrcMem, (outs KRC:$dst), (ins x86memop:$src),
2791             !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
2792             [(set KRC:$dst, (vvt (load addr:$src)))]>,
2793             Sched<[WriteLoad]>;
2794  def mk : I<opc_mk, MRMDestMem, (outs), (ins x86memop:$dst, KRC:$src),
2795             !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
2796             [(store KRC:$src, addr:$dst)]>,
2797             Sched<[WriteStore]>;
2798}
2799
2800multiclass avx512_mask_mov_gpr<bits<8> opc_kr, bits<8> opc_rk,
2801                             string OpcodeStr,
2802                             RegisterClass KRC, RegisterClass GRC> {
2803  let hasSideEffects = 0 in {
2804    def kr : I<opc_kr, MRMSrcReg, (outs KRC:$dst), (ins GRC:$src),
2805               !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), []>,
2806               Sched<[WriteMove]>;
2807    def rk : I<opc_rk, MRMSrcReg, (outs GRC:$dst), (ins KRC:$src),
2808               !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), []>,
2809               Sched<[WriteMove]>;
2810  }
2811}
2812
2813let Predicates = [HasDQI] in
2814  defm KMOVB : avx512_mask_mov<0x90, 0x90, 0x91, "kmovb", VK8, v8i1, i8mem>,
2815               avx512_mask_mov_gpr<0x92, 0x93, "kmovb", VK8, GR32>,
2816               VEX, PD;
2817
2818let Predicates = [HasAVX512] in
2819  defm KMOVW : avx512_mask_mov<0x90, 0x90, 0x91, "kmovw", VK16, v16i1, i16mem>,
2820               avx512_mask_mov_gpr<0x92, 0x93, "kmovw", VK16, GR32>,
2821               VEX, PS;
2822
2823let Predicates = [HasBWI] in {
2824  defm KMOVD : avx512_mask_mov<0x90, 0x90, 0x91, "kmovd", VK32, v32i1,i32mem>,
2825               VEX, PD, VEX_W;
2826  defm KMOVD : avx512_mask_mov_gpr<0x92, 0x93, "kmovd", VK32, GR32>,
2827               VEX, XD;
2828  defm KMOVQ : avx512_mask_mov<0x90, 0x90, 0x91, "kmovq", VK64, v64i1, i64mem>,
2829               VEX, PS, VEX_W;
2830  defm KMOVQ : avx512_mask_mov_gpr<0x92, 0x93, "kmovq", VK64, GR64>,
2831               VEX, XD, VEX_W;
2832}
2833
2834// GR from/to mask register
2835def : Pat<(v16i1 (bitconvert (i16 GR16:$src))),
2836          (COPY_TO_REGCLASS (i32 (INSERT_SUBREG (IMPLICIT_DEF), GR16:$src, sub_16bit)), VK16)>;
2837def : Pat<(i16 (bitconvert (v16i1 VK16:$src))),
2838          (EXTRACT_SUBREG (i32 (COPY_TO_REGCLASS VK16:$src, GR32)), sub_16bit)>;
2839
2840def : Pat<(v8i1 (bitconvert (i8 GR8:$src))),
2841          (COPY_TO_REGCLASS (i32 (INSERT_SUBREG (IMPLICIT_DEF), GR8:$src, sub_8bit)), VK8)>;
2842def : Pat<(i8 (bitconvert (v8i1 VK8:$src))),
2843          (EXTRACT_SUBREG (i32 (COPY_TO_REGCLASS VK8:$src, GR32)), sub_8bit)>;
2844
2845def : Pat<(i32 (zext (i16 (bitconvert (v16i1 VK16:$src))))),
2846          (KMOVWrk VK16:$src)>;
2847def : Pat<(i64 (zext (i16 (bitconvert (v16i1 VK16:$src))))),
2848          (SUBREG_TO_REG (i64 0), (KMOVWrk VK16:$src), sub_32bit)>;
2849def : Pat<(i32 (anyext (i16 (bitconvert (v16i1 VK16:$src))))),
2850          (COPY_TO_REGCLASS VK16:$src, GR32)>;
2851def : Pat<(i64 (anyext (i16 (bitconvert (v16i1 VK16:$src))))),
2852          (INSERT_SUBREG (IMPLICIT_DEF), (COPY_TO_REGCLASS VK16:$src, GR32), sub_32bit)>;
2853
2854def : Pat<(i32 (zext (i8 (bitconvert (v8i1 VK8:$src))))),
2855          (KMOVBrk VK8:$src)>, Requires<[HasDQI]>;
2856def : Pat<(i64 (zext (i8 (bitconvert (v8i1 VK8:$src))))),
2857          (SUBREG_TO_REG (i64 0), (KMOVBrk VK8:$src), sub_32bit)>, Requires<[HasDQI]>;
2858def : Pat<(i32 (anyext (i8 (bitconvert (v8i1 VK8:$src))))),
2859          (COPY_TO_REGCLASS VK8:$src, GR32)>;
2860def : Pat<(i64 (anyext (i8 (bitconvert (v8i1 VK8:$src))))),
2861          (INSERT_SUBREG (IMPLICIT_DEF), (COPY_TO_REGCLASS VK8:$src, GR32), sub_32bit)>;
2862
2863def : Pat<(v32i1 (bitconvert (i32 GR32:$src))),
2864          (COPY_TO_REGCLASS GR32:$src, VK32)>;
2865def : Pat<(i32 (bitconvert (v32i1 VK32:$src))),
2866          (COPY_TO_REGCLASS VK32:$src, GR32)>;
2867def : Pat<(v64i1 (bitconvert (i64 GR64:$src))),
2868          (COPY_TO_REGCLASS GR64:$src, VK64)>;
2869def : Pat<(i64 (bitconvert (v64i1 VK64:$src))),
2870          (COPY_TO_REGCLASS VK64:$src, GR64)>;
2871
2872// Load/store kreg
2873let Predicates = [HasDQI] in {
2874  def : Pat<(store VK1:$src, addr:$dst),
2875            (KMOVBmk addr:$dst, (COPY_TO_REGCLASS VK1:$src, VK8))>;
2876
2877  def : Pat<(v1i1 (load addr:$src)),
2878            (COPY_TO_REGCLASS (KMOVBkm addr:$src), VK1)>;
2879  def : Pat<(v2i1 (load addr:$src)),
2880            (COPY_TO_REGCLASS (KMOVBkm addr:$src), VK2)>;
2881  def : Pat<(v4i1 (load addr:$src)),
2882            (COPY_TO_REGCLASS (KMOVBkm addr:$src), VK4)>;
2883}
2884
2885let Predicates = [HasAVX512] in {
2886  def : Pat<(v8i1 (bitconvert (i8 (load addr:$src)))),
2887            (COPY_TO_REGCLASS (MOVZX32rm8 addr:$src), VK8)>;
2888  def : Pat<(v16i1 (bitconvert (loadi16 addr:$src))),
2889            (KMOVWkm addr:$src)>;
2890}
2891
2892def X86kextract : SDNode<"ISD::EXTRACT_VECTOR_ELT",
2893                         SDTypeProfile<1, 2, [SDTCisVT<0, i8>,
2894                                              SDTCVecEltisVT<1, i1>,
2895                                              SDTCisPtrTy<2>]>>;
2896
2897let Predicates = [HasAVX512] in {
2898  multiclass operation_gpr_mask_copy_lowering<RegisterClass maskRC, ValueType maskVT> {
2899    def : Pat<(maskVT (scalar_to_vector GR32:$src)),
2900              (COPY_TO_REGCLASS GR32:$src, maskRC)>;
2901
2902    def : Pat<(maskVT (scalar_to_vector GR8:$src)),
2903              (COPY_TO_REGCLASS (INSERT_SUBREG (i32 (IMPLICIT_DEF)), GR8:$src, sub_8bit), maskRC)>;
2904
2905    def : Pat<(i8 (X86kextract maskRC:$src, (iPTR 0))),
2906              (EXTRACT_SUBREG (i32 (COPY_TO_REGCLASS maskRC:$src, GR32)), sub_8bit)>;
2907
2908    def : Pat<(i32 (anyext (i8 (X86kextract maskRC:$src, (iPTR 0))))),
2909              (i32 (COPY_TO_REGCLASS maskRC:$src, GR32))>;
2910  }
2911
2912  defm : operation_gpr_mask_copy_lowering<VK1,  v1i1>;
2913  defm : operation_gpr_mask_copy_lowering<VK2,  v2i1>;
2914  defm : operation_gpr_mask_copy_lowering<VK4,  v4i1>;
2915  defm : operation_gpr_mask_copy_lowering<VK8,  v8i1>;
2916  defm : operation_gpr_mask_copy_lowering<VK16,  v16i1>;
2917  defm : operation_gpr_mask_copy_lowering<VK32,  v32i1>;
2918  defm : operation_gpr_mask_copy_lowering<VK64,  v64i1>;
2919
2920  def : Pat<(insert_subvector (v16i1 immAllZerosV),
2921                              (v1i1 (scalar_to_vector GR8:$src)), (iPTR 0)),
2922            (COPY_TO_REGCLASS
2923             (KMOVWkr (AND32ri8
2924                       (INSERT_SUBREG (i32 (IMPLICIT_DEF)), GR8:$src, sub_8bit),
2925                       (i32 1))), VK16)>;
2926}
2927
2928// Mask unary operation
2929// - KNOT
2930multiclass avx512_mask_unop<bits<8> opc, string OpcodeStr,
2931                            RegisterClass KRC, SDPatternOperator OpNode,
2932                            X86FoldableSchedWrite sched, Predicate prd> {
2933  let Predicates = [prd] in
2934    def rr : I<opc, MRMSrcReg, (outs KRC:$dst), (ins KRC:$src),
2935               !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
2936               [(set KRC:$dst, (OpNode KRC:$src))]>,
2937               Sched<[sched]>;
2938}
2939
2940multiclass avx512_mask_unop_all<bits<8> opc, string OpcodeStr,
2941                                SDPatternOperator OpNode,
2942                                X86FoldableSchedWrite sched> {
2943  defm B : avx512_mask_unop<opc, !strconcat(OpcodeStr, "b"), VK8, OpNode,
2944                            sched, HasDQI>, VEX, PD;
2945  defm W : avx512_mask_unop<opc, !strconcat(OpcodeStr, "w"), VK16, OpNode,
2946                            sched, HasAVX512>, VEX, PS;
2947  defm D : avx512_mask_unop<opc, !strconcat(OpcodeStr, "d"), VK32, OpNode,
2948                            sched, HasBWI>, VEX, PD, VEX_W;
2949  defm Q : avx512_mask_unop<opc, !strconcat(OpcodeStr, "q"), VK64, OpNode,
2950                            sched, HasBWI>, VEX, PS, VEX_W;
2951}
2952
2953// TODO - do we need a X86SchedWriteWidths::KMASK type?
2954defm KNOT : avx512_mask_unop_all<0x44, "knot", vnot, SchedWriteVecLogic.XMM>;
2955
2956// KNL does not support KMOVB, 8-bit mask is promoted to 16-bit
2957let Predicates = [HasAVX512, NoDQI] in
2958def : Pat<(vnot VK8:$src),
2959          (COPY_TO_REGCLASS (KNOTWrr (COPY_TO_REGCLASS VK8:$src, VK16)), VK8)>;
2960
2961def : Pat<(vnot VK4:$src),
2962          (COPY_TO_REGCLASS (KNOTWrr (COPY_TO_REGCLASS VK4:$src, VK16)), VK4)>;
2963def : Pat<(vnot VK2:$src),
2964          (COPY_TO_REGCLASS (KNOTWrr (COPY_TO_REGCLASS VK2:$src, VK16)), VK2)>;
2965def : Pat<(vnot VK1:$src),
2966          (COPY_TO_REGCLASS (KNOTWrr (COPY_TO_REGCLASS VK1:$src, VK16)), VK2)>;
2967
2968// Mask binary operation
2969// - KAND, KANDN, KOR, KXNOR, KXOR
2970multiclass avx512_mask_binop<bits<8> opc, string OpcodeStr,
2971                           RegisterClass KRC, SDPatternOperator OpNode,
2972                           X86FoldableSchedWrite sched, Predicate prd,
2973                           bit IsCommutable> {
2974  let Predicates = [prd], isCommutable = IsCommutable in
2975    def rr : I<opc, MRMSrcReg, (outs KRC:$dst), (ins KRC:$src1, KRC:$src2),
2976               !strconcat(OpcodeStr,
2977                          "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
2978               [(set KRC:$dst, (OpNode KRC:$src1, KRC:$src2))]>,
2979               Sched<[sched]>;
2980}
2981
2982multiclass avx512_mask_binop_all<bits<8> opc, string OpcodeStr,
2983                                 SDPatternOperator OpNode,
2984                                 X86FoldableSchedWrite sched, bit IsCommutable,
2985                                 Predicate prdW = HasAVX512> {
2986  defm B : avx512_mask_binop<opc, !strconcat(OpcodeStr, "b"), VK8, OpNode,
2987                             sched, HasDQI, IsCommutable>, VEX_4V, VEX_L, PD;
2988  defm W : avx512_mask_binop<opc, !strconcat(OpcodeStr, "w"), VK16, OpNode,
2989                             sched, prdW, IsCommutable>, VEX_4V, VEX_L, PS;
2990  defm D : avx512_mask_binop<opc, !strconcat(OpcodeStr, "d"), VK32, OpNode,
2991                             sched, HasBWI, IsCommutable>, VEX_4V, VEX_L, VEX_W, PD;
2992  defm Q : avx512_mask_binop<opc, !strconcat(OpcodeStr, "q"), VK64, OpNode,
2993                             sched, HasBWI, IsCommutable>, VEX_4V, VEX_L, VEX_W, PS;
2994}
2995
2996// These nodes use 'vnot' instead of 'not' to support vectors.
2997def vandn : PatFrag<(ops node:$i0, node:$i1), (and (vnot node:$i0), node:$i1)>;
2998def vxnor : PatFrag<(ops node:$i0, node:$i1), (vnot (xor node:$i0, node:$i1))>;
2999
3000// TODO - do we need a X86SchedWriteWidths::KMASK type?
3001defm KAND  : avx512_mask_binop_all<0x41, "kand",  and,     SchedWriteVecLogic.XMM, 1>;
3002defm KOR   : avx512_mask_binop_all<0x45, "kor",   or,      SchedWriteVecLogic.XMM, 1>;
3003defm KXNOR : avx512_mask_binop_all<0x46, "kxnor", vxnor,   SchedWriteVecLogic.XMM, 1>;
3004defm KXOR  : avx512_mask_binop_all<0x47, "kxor",  xor,     SchedWriteVecLogic.XMM, 1>;
3005defm KANDN : avx512_mask_binop_all<0x42, "kandn", vandn,   SchedWriteVecLogic.XMM, 0>;
3006defm KADD  : avx512_mask_binop_all<0x4A, "kadd",  X86kadd, SchedWriteVecLogic.XMM, 1, HasDQI>;
3007
3008multiclass avx512_binop_pat<SDPatternOperator VOpNode,
3009                            Instruction Inst> {
3010  // With AVX512F, 8-bit mask is promoted to 16-bit mask,
3011  // for the DQI set, this type is legal and KxxxB instruction is used
3012  let Predicates = [NoDQI] in
3013  def : Pat<(VOpNode VK8:$src1, VK8:$src2),
3014            (COPY_TO_REGCLASS
3015              (Inst (COPY_TO_REGCLASS VK8:$src1, VK16),
3016                    (COPY_TO_REGCLASS VK8:$src2, VK16)), VK8)>;
3017
3018  // All types smaller than 8 bits require conversion anyway
3019  def : Pat<(VOpNode VK1:$src1, VK1:$src2),
3020        (COPY_TO_REGCLASS (Inst
3021                           (COPY_TO_REGCLASS VK1:$src1, VK16),
3022                           (COPY_TO_REGCLASS VK1:$src2, VK16)), VK1)>;
3023  def : Pat<(VOpNode VK2:$src1, VK2:$src2),
3024        (COPY_TO_REGCLASS (Inst
3025                           (COPY_TO_REGCLASS VK2:$src1, VK16),
3026                           (COPY_TO_REGCLASS VK2:$src2, VK16)), VK2)>;
3027  def : Pat<(VOpNode VK4:$src1, VK4:$src2),
3028        (COPY_TO_REGCLASS (Inst
3029                           (COPY_TO_REGCLASS VK4:$src1, VK16),
3030                           (COPY_TO_REGCLASS VK4:$src2, VK16)), VK4)>;
3031}
3032
3033defm : avx512_binop_pat<and,   KANDWrr>;
3034defm : avx512_binop_pat<vandn, KANDNWrr>;
3035defm : avx512_binop_pat<or,    KORWrr>;
3036defm : avx512_binop_pat<vxnor, KXNORWrr>;
3037defm : avx512_binop_pat<xor,   KXORWrr>;
3038
3039// Mask unpacking
3040multiclass avx512_mask_unpck<string Suffix, X86KVectorVTInfo Dst,
3041                             X86KVectorVTInfo Src, X86FoldableSchedWrite sched,
3042                             Predicate prd> {
3043  let Predicates = [prd] in {
3044    let hasSideEffects = 0 in
3045    def rr : I<0x4b, MRMSrcReg, (outs Dst.KRC:$dst),
3046               (ins Src.KRC:$src1, Src.KRC:$src2),
3047               "kunpck"#Suffix#"\t{$src2, $src1, $dst|$dst, $src1, $src2}", []>,
3048               VEX_4V, VEX_L, Sched<[sched]>;
3049
3050    def : Pat<(Dst.KVT (concat_vectors Src.KRC:$src1, Src.KRC:$src2)),
3051              (!cast<Instruction>(NAME#rr) Src.KRC:$src2, Src.KRC:$src1)>;
3052  }
3053}
3054
3055defm KUNPCKBW : avx512_mask_unpck<"bw", v16i1_info, v8i1_info,  WriteShuffle, HasAVX512>, PD;
3056defm KUNPCKWD : avx512_mask_unpck<"wd", v32i1_info, v16i1_info, WriteShuffle, HasBWI>, PS;
3057defm KUNPCKDQ : avx512_mask_unpck<"dq", v64i1_info, v32i1_info, WriteShuffle, HasBWI>, PS, VEX_W;
3058
3059// Mask bit testing
3060multiclass avx512_mask_testop<bits<8> opc, string OpcodeStr, RegisterClass KRC,
3061                              SDNode OpNode, X86FoldableSchedWrite sched,
3062                              Predicate prd> {
3063  let Predicates = [prd], Defs = [EFLAGS] in
3064    def rr : I<opc, MRMSrcReg, (outs), (ins KRC:$src1, KRC:$src2),
3065               !strconcat(OpcodeStr, "\t{$src2, $src1|$src1, $src2}"),
3066               [(set EFLAGS, (OpNode KRC:$src1, KRC:$src2))]>,
3067               Sched<[sched]>;
3068}
3069
3070multiclass avx512_mask_testop_w<bits<8> opc, string OpcodeStr, SDNode OpNode,
3071                                X86FoldableSchedWrite sched,
3072                                Predicate prdW = HasAVX512> {
3073  defm B : avx512_mask_testop<opc, OpcodeStr#"b", VK8, OpNode, sched, HasDQI>,
3074                                                                VEX, PD;
3075  defm W : avx512_mask_testop<opc, OpcodeStr#"w", VK16, OpNode, sched, prdW>,
3076                                                                VEX, PS;
3077  defm Q : avx512_mask_testop<opc, OpcodeStr#"q", VK64, OpNode, sched, HasBWI>,
3078                                                                VEX, PS, VEX_W;
3079  defm D : avx512_mask_testop<opc, OpcodeStr#"d", VK32, OpNode, sched, HasBWI>,
3080                                                                VEX, PD, VEX_W;
3081}
3082
3083// TODO - do we need a X86SchedWriteWidths::KMASK type?
3084defm KORTEST : avx512_mask_testop_w<0x98, "kortest", X86kortest, SchedWriteVecLogic.XMM>;
3085defm KTEST   : avx512_mask_testop_w<0x99, "ktest", X86ktest, SchedWriteVecLogic.XMM, HasDQI>;
3086
3087// Mask shift
3088multiclass avx512_mask_shiftop<bits<8> opc, string OpcodeStr, RegisterClass KRC,
3089                               SDNode OpNode, X86FoldableSchedWrite sched> {
3090  let Predicates = [HasAVX512] in
3091    def ri : Ii8<opc, MRMSrcReg, (outs KRC:$dst), (ins KRC:$src, u8imm:$imm),
3092                 !strconcat(OpcodeStr,
3093                            "\t{$imm, $src, $dst|$dst, $src, $imm}"),
3094                            [(set KRC:$dst, (OpNode KRC:$src, (i8 timm:$imm)))]>,
3095                 Sched<[sched]>;
3096}
3097
3098multiclass avx512_mask_shiftop_w<bits<8> opc1, bits<8> opc2, string OpcodeStr,
3099                                 SDNode OpNode, X86FoldableSchedWrite sched> {
3100  defm W : avx512_mask_shiftop<opc1, !strconcat(OpcodeStr, "w"), VK16, OpNode,
3101                               sched>, VEX, TAPD, VEX_W;
3102  let Predicates = [HasDQI] in
3103  defm B : avx512_mask_shiftop<opc1, !strconcat(OpcodeStr, "b"), VK8, OpNode,
3104                               sched>, VEX, TAPD;
3105  let Predicates = [HasBWI] in {
3106  defm Q : avx512_mask_shiftop<opc2, !strconcat(OpcodeStr, "q"), VK64, OpNode,
3107                               sched>, VEX, TAPD, VEX_W;
3108  defm D : avx512_mask_shiftop<opc2, !strconcat(OpcodeStr, "d"), VK32, OpNode,
3109                               sched>, VEX, TAPD;
3110  }
3111}
3112
3113defm KSHIFTL : avx512_mask_shiftop_w<0x32, 0x33, "kshiftl", X86kshiftl, WriteShuffle>;
3114defm KSHIFTR : avx512_mask_shiftop_w<0x30, 0x31, "kshiftr", X86kshiftr, WriteShuffle>;
3115
3116// Patterns for comparing 128/256-bit integer vectors using 512-bit instruction.
3117multiclass axv512_icmp_packed_cc_no_vlx_lowering<PatFrag Frag, PatFrag Frag_su,
3118                                                 string InstStr,
3119                                                 X86VectorVTInfo Narrow,
3120                                                 X86VectorVTInfo Wide> {
3121def : Pat<(Narrow.KVT (Frag:$cc (Narrow.VT Narrow.RC:$src1),
3122                                (Narrow.VT Narrow.RC:$src2), cond)),
3123          (COPY_TO_REGCLASS
3124           (!cast<Instruction>(InstStr#"Zrri")
3125            (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src1, Narrow.SubRegIdx)),
3126            (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src2, Narrow.SubRegIdx)),
3127            (Frag.OperandTransform $cc)), Narrow.KRC)>;
3128
3129def : Pat<(Narrow.KVT (and Narrow.KRC:$mask,
3130                           (Narrow.KVT (Frag_su:$cc (Narrow.VT Narrow.RC:$src1),
3131                                                    (Narrow.VT Narrow.RC:$src2),
3132                                                    cond)))),
3133          (COPY_TO_REGCLASS (!cast<Instruction>(InstStr#"Zrrik")
3134           (COPY_TO_REGCLASS Narrow.KRC:$mask, Wide.KRC),
3135           (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src1, Narrow.SubRegIdx)),
3136           (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src2, Narrow.SubRegIdx)),
3137           (Frag_su.OperandTransform $cc)), Narrow.KRC)>;
3138}
3139
3140multiclass axv512_icmp_packed_cc_rmb_no_vlx_lowering<PatFrag Frag, PatFrag Frag_su,
3141                                                     PatFrag CommFrag, PatFrag CommFrag_su,
3142                                                     string InstStr,
3143                                                     X86VectorVTInfo Narrow,
3144                                                     X86VectorVTInfo Wide> {
3145// Broadcast load.
3146def : Pat<(Narrow.KVT (Frag:$cc (Narrow.VT Narrow.RC:$src1),
3147                                (Narrow.BroadcastLdFrag addr:$src2), cond)),
3148          (COPY_TO_REGCLASS
3149           (!cast<Instruction>(InstStr#"Zrmib")
3150            (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src1, Narrow.SubRegIdx)),
3151            addr:$src2, (Frag.OperandTransform $cc)), Narrow.KRC)>;
3152
3153def : Pat<(Narrow.KVT (and Narrow.KRC:$mask,
3154                           (Narrow.KVT
3155                            (Frag_su:$cc (Narrow.VT Narrow.RC:$src1),
3156                                         (Narrow.BroadcastLdFrag addr:$src2),
3157                                         cond)))),
3158          (COPY_TO_REGCLASS (!cast<Instruction>(InstStr#"Zrmibk")
3159           (COPY_TO_REGCLASS Narrow.KRC:$mask, Wide.KRC),
3160           (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src1, Narrow.SubRegIdx)),
3161           addr:$src2, (Frag_su.OperandTransform $cc)), Narrow.KRC)>;
3162
3163// Commuted with broadcast load.
3164def : Pat<(Narrow.KVT (CommFrag:$cc (Narrow.BroadcastLdFrag addr:$src2),
3165                                    (Narrow.VT Narrow.RC:$src1),
3166                                    cond)),
3167          (COPY_TO_REGCLASS
3168           (!cast<Instruction>(InstStr#"Zrmib")
3169            (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src1, Narrow.SubRegIdx)),
3170            addr:$src2, (CommFrag.OperandTransform $cc)), Narrow.KRC)>;
3171
3172def : Pat<(Narrow.KVT (and Narrow.KRC:$mask,
3173                           (Narrow.KVT
3174                            (CommFrag_su:$cc (Narrow.BroadcastLdFrag addr:$src2),
3175                                             (Narrow.VT Narrow.RC:$src1),
3176                                             cond)))),
3177          (COPY_TO_REGCLASS (!cast<Instruction>(InstStr#"Zrmibk")
3178           (COPY_TO_REGCLASS Narrow.KRC:$mask, Wide.KRC),
3179           (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src1, Narrow.SubRegIdx)),
3180           addr:$src2, (CommFrag_su.OperandTransform $cc)), Narrow.KRC)>;
3181}
3182
3183// Same as above, but for fp types which don't use PatFrags.
3184multiclass axv512_cmp_packed_cc_no_vlx_lowering<string InstStr,
3185                                                X86VectorVTInfo Narrow,
3186                                                X86VectorVTInfo Wide> {
3187def : Pat<(Narrow.KVT (X86cmpm (Narrow.VT Narrow.RC:$src1),
3188                               (Narrow.VT Narrow.RC:$src2), timm:$cc)),
3189          (COPY_TO_REGCLASS
3190           (!cast<Instruction>(InstStr#"Zrri")
3191            (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src1, Narrow.SubRegIdx)),
3192            (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src2, Narrow.SubRegIdx)),
3193            timm:$cc), Narrow.KRC)>;
3194
3195def : Pat<(Narrow.KVT (and Narrow.KRC:$mask,
3196                           (X86cmpm_su (Narrow.VT Narrow.RC:$src1),
3197                                       (Narrow.VT Narrow.RC:$src2), timm:$cc))),
3198          (COPY_TO_REGCLASS (!cast<Instruction>(InstStr#"Zrrik")
3199           (COPY_TO_REGCLASS Narrow.KRC:$mask, Wide.KRC),
3200           (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src1, Narrow.SubRegIdx)),
3201           (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src2, Narrow.SubRegIdx)),
3202           timm:$cc), Narrow.KRC)>;
3203
3204// Broadcast load.
3205def : Pat<(Narrow.KVT (X86cmpm (Narrow.VT Narrow.RC:$src1),
3206                               (Narrow.VT (Narrow.BroadcastLdFrag addr:$src2)), timm:$cc)),
3207          (COPY_TO_REGCLASS
3208           (!cast<Instruction>(InstStr#"Zrmbi")
3209            (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src1, Narrow.SubRegIdx)),
3210            addr:$src2, timm:$cc), Narrow.KRC)>;
3211
3212def : Pat<(Narrow.KVT (and Narrow.KRC:$mask,
3213                           (X86cmpm_su (Narrow.VT Narrow.RC:$src1),
3214                                       (Narrow.VT (Narrow.BroadcastLdFrag addr:$src2)), timm:$cc))),
3215          (COPY_TO_REGCLASS (!cast<Instruction>(InstStr#"Zrmbik")
3216           (COPY_TO_REGCLASS Narrow.KRC:$mask, Wide.KRC),
3217           (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src1, Narrow.SubRegIdx)),
3218           addr:$src2, timm:$cc), Narrow.KRC)>;
3219
3220// Commuted with broadcast load.
3221def : Pat<(Narrow.KVT (X86cmpm (Narrow.VT (Narrow.BroadcastLdFrag addr:$src2)),
3222                               (Narrow.VT Narrow.RC:$src1), timm:$cc)),
3223          (COPY_TO_REGCLASS
3224           (!cast<Instruction>(InstStr#"Zrmbi")
3225            (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src1, Narrow.SubRegIdx)),
3226            addr:$src2, (X86cmpm_imm_commute timm:$cc)), Narrow.KRC)>;
3227
3228def : Pat<(Narrow.KVT (and Narrow.KRC:$mask,
3229                           (X86cmpm_su (Narrow.VT (Narrow.BroadcastLdFrag addr:$src2)),
3230                                       (Narrow.VT Narrow.RC:$src1), timm:$cc))),
3231          (COPY_TO_REGCLASS (!cast<Instruction>(InstStr#"Zrmbik")
3232           (COPY_TO_REGCLASS Narrow.KRC:$mask, Wide.KRC),
3233           (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src1, Narrow.SubRegIdx)),
3234           addr:$src2, (X86cmpm_imm_commute timm:$cc)), Narrow.KRC)>;
3235}
3236
3237let Predicates = [HasAVX512, NoVLX] in {
3238  defm : axv512_icmp_packed_cc_no_vlx_lowering<X86pcmpm, X86pcmpm_su, "VPCMPD", v8i32x_info, v16i32_info>;
3239  defm : axv512_icmp_packed_cc_no_vlx_lowering<X86pcmpum, X86pcmpum_su, "VPCMPUD", v8i32x_info, v16i32_info>;
3240
3241  defm : axv512_icmp_packed_cc_no_vlx_lowering<X86pcmpm, X86pcmpm_su, "VPCMPD", v4i32x_info, v16i32_info>;
3242  defm : axv512_icmp_packed_cc_no_vlx_lowering<X86pcmpum, X86pcmpum_su, "VPCMPUD", v4i32x_info, v16i32_info>;
3243
3244  defm : axv512_icmp_packed_cc_no_vlx_lowering<X86pcmpm, X86pcmpm_su, "VPCMPQ", v4i64x_info, v8i64_info>;
3245  defm : axv512_icmp_packed_cc_no_vlx_lowering<X86pcmpum, X86pcmpum_su, "VPCMPUQ", v4i64x_info, v8i64_info>;
3246
3247  defm : axv512_icmp_packed_cc_no_vlx_lowering<X86pcmpm, X86pcmpm_su, "VPCMPQ", v2i64x_info, v8i64_info>;
3248  defm : axv512_icmp_packed_cc_no_vlx_lowering<X86pcmpum, X86pcmpum_su, "VPCMPUQ", v2i64x_info, v8i64_info>;
3249
3250  defm : axv512_icmp_packed_cc_rmb_no_vlx_lowering<X86pcmpm, X86pcmpm_su, X86pcmpm_commute, X86pcmpm_commute_su, "VPCMPD", v8i32x_info, v16i32_info>;
3251  defm : axv512_icmp_packed_cc_rmb_no_vlx_lowering<X86pcmpum, X86pcmpum_su, X86pcmpum_commute, X86pcmpum_commute_su, "VPCMPUD", v8i32x_info, v16i32_info>;
3252
3253  defm : axv512_icmp_packed_cc_rmb_no_vlx_lowering<X86pcmpm, X86pcmpm_su, X86pcmpm_commute, X86pcmpm_commute_su, "VPCMPD", v4i32x_info, v16i32_info>;
3254  defm : axv512_icmp_packed_cc_rmb_no_vlx_lowering<X86pcmpum, X86pcmpum_su, X86pcmpum_commute, X86pcmpum_commute_su, "VPCMPUD", v4i32x_info, v16i32_info>;
3255
3256  defm : axv512_icmp_packed_cc_rmb_no_vlx_lowering<X86pcmpm, X86pcmpm_su, X86pcmpm_commute, X86pcmpm_commute_su, "VPCMPQ", v4i64x_info, v8i64_info>;
3257  defm : axv512_icmp_packed_cc_rmb_no_vlx_lowering<X86pcmpum, X86pcmpum_su, X86pcmpum_commute, X86pcmpum_commute_su, "VPCMPUQ", v4i64x_info, v8i64_info>;
3258
3259  defm : axv512_icmp_packed_cc_rmb_no_vlx_lowering<X86pcmpm, X86pcmpm_su, X86pcmpm_commute, X86pcmpm_commute_su, "VPCMPQ", v2i64x_info, v8i64_info>;
3260  defm : axv512_icmp_packed_cc_rmb_no_vlx_lowering<X86pcmpum, X86pcmpum_su, X86pcmpum_commute, X86pcmpum_commute_su, "VPCMPUQ", v2i64x_info, v8i64_info>;
3261
3262  defm : axv512_cmp_packed_cc_no_vlx_lowering<"VCMPPS", v8f32x_info, v16f32_info>;
3263  defm : axv512_cmp_packed_cc_no_vlx_lowering<"VCMPPS", v4f32x_info, v16f32_info>;
3264  defm : axv512_cmp_packed_cc_no_vlx_lowering<"VCMPPD", v4f64x_info, v8f64_info>;
3265  defm : axv512_cmp_packed_cc_no_vlx_lowering<"VCMPPD", v2f64x_info, v8f64_info>;
3266}
3267
3268let Predicates = [HasBWI, NoVLX] in {
3269  defm : axv512_icmp_packed_cc_no_vlx_lowering<X86pcmpm, X86pcmpm_su, "VPCMPB", v32i8x_info, v64i8_info>;
3270  defm : axv512_icmp_packed_cc_no_vlx_lowering<X86pcmpum, X86pcmpum_su, "VPCMPUB", v32i8x_info, v64i8_info>;
3271
3272  defm : axv512_icmp_packed_cc_no_vlx_lowering<X86pcmpm, X86pcmpm_su, "VPCMPB", v16i8x_info, v64i8_info>;
3273  defm : axv512_icmp_packed_cc_no_vlx_lowering<X86pcmpum, X86pcmpum_su, "VPCMPUB", v16i8x_info, v64i8_info>;
3274
3275  defm : axv512_icmp_packed_cc_no_vlx_lowering<X86pcmpm, X86pcmpm_su, "VPCMPW", v16i16x_info, v32i16_info>;
3276  defm : axv512_icmp_packed_cc_no_vlx_lowering<X86pcmpum, X86pcmpum_su, "VPCMPUW", v16i16x_info, v32i16_info>;
3277
3278  defm : axv512_icmp_packed_cc_no_vlx_lowering<X86pcmpm, X86pcmpm_su, "VPCMPW", v8i16x_info, v32i16_info>;
3279  defm : axv512_icmp_packed_cc_no_vlx_lowering<X86pcmpum, X86pcmpum_su, "VPCMPUW", v8i16x_info, v32i16_info>;
3280}
3281
3282// Mask setting all 0s or 1s
3283multiclass avx512_mask_setop<RegisterClass KRC, ValueType VT, PatFrag Val> {
3284  let Predicates = [HasAVX512] in
3285    let isReMaterializable = 1, isAsCheapAsAMove = 1, isPseudo = 1,
3286        SchedRW = [WriteZero] in
3287      def NAME# : I<0, Pseudo, (outs KRC:$dst), (ins), "",
3288                     [(set KRC:$dst, (VT Val))]>;
3289}
3290
3291multiclass avx512_mask_setop_w<PatFrag Val> {
3292  defm W : avx512_mask_setop<VK16, v16i1, Val>;
3293  defm D : avx512_mask_setop<VK32,  v32i1, Val>;
3294  defm Q : avx512_mask_setop<VK64, v64i1, Val>;
3295}
3296
3297defm KSET0 : avx512_mask_setop_w<immAllZerosV>;
3298defm KSET1 : avx512_mask_setop_w<immAllOnesV>;
3299
3300// With AVX-512 only, 8-bit mask is promoted to 16-bit mask.
3301let Predicates = [HasAVX512] in {
3302  def : Pat<(v8i1 immAllZerosV), (COPY_TO_REGCLASS (KSET0W), VK8)>;
3303  def : Pat<(v4i1 immAllZerosV), (COPY_TO_REGCLASS (KSET0W), VK4)>;
3304  def : Pat<(v2i1 immAllZerosV), (COPY_TO_REGCLASS (KSET0W), VK2)>;
3305  def : Pat<(v1i1 immAllZerosV), (COPY_TO_REGCLASS (KSET0W), VK1)>;
3306  def : Pat<(v8i1 immAllOnesV),  (COPY_TO_REGCLASS (KSET1W), VK8)>;
3307  def : Pat<(v4i1 immAllOnesV),  (COPY_TO_REGCLASS (KSET1W), VK4)>;
3308  def : Pat<(v2i1 immAllOnesV),  (COPY_TO_REGCLASS (KSET1W), VK2)>;
3309  def : Pat<(v1i1 immAllOnesV),  (COPY_TO_REGCLASS (KSET1W), VK1)>;
3310}
3311
3312// Patterns for kmask insert_subvector/extract_subvector to/from index=0
3313multiclass operation_subvector_mask_lowering<RegisterClass subRC, ValueType subVT,
3314                                             RegisterClass RC, ValueType VT> {
3315  def : Pat<(subVT (extract_subvector (VT RC:$src), (iPTR 0))),
3316            (subVT (COPY_TO_REGCLASS RC:$src, subRC))>;
3317
3318  def : Pat<(VT (insert_subvector undef, subRC:$src, (iPTR 0))),
3319            (VT (COPY_TO_REGCLASS subRC:$src, RC))>;
3320}
3321defm : operation_subvector_mask_lowering<VK1,  v1i1,  VK2,  v2i1>;
3322defm : operation_subvector_mask_lowering<VK1,  v1i1,  VK4,  v4i1>;
3323defm : operation_subvector_mask_lowering<VK1,  v1i1,  VK8,  v8i1>;
3324defm : operation_subvector_mask_lowering<VK1,  v1i1,  VK16, v16i1>;
3325defm : operation_subvector_mask_lowering<VK1,  v1i1,  VK32, v32i1>;
3326defm : operation_subvector_mask_lowering<VK1,  v1i1,  VK64, v64i1>;
3327
3328defm : operation_subvector_mask_lowering<VK2,  v2i1,  VK4,  v4i1>;
3329defm : operation_subvector_mask_lowering<VK2,  v2i1,  VK8,  v8i1>;
3330defm : operation_subvector_mask_lowering<VK2,  v2i1,  VK16, v16i1>;
3331defm : operation_subvector_mask_lowering<VK2,  v2i1,  VK32, v32i1>;
3332defm : operation_subvector_mask_lowering<VK2,  v2i1,  VK64, v64i1>;
3333
3334defm : operation_subvector_mask_lowering<VK4,  v4i1,  VK8,  v8i1>;
3335defm : operation_subvector_mask_lowering<VK4,  v4i1,  VK16, v16i1>;
3336defm : operation_subvector_mask_lowering<VK4,  v4i1,  VK32, v32i1>;
3337defm : operation_subvector_mask_lowering<VK4,  v4i1,  VK64, v64i1>;
3338
3339defm : operation_subvector_mask_lowering<VK8,  v8i1,  VK16, v16i1>;
3340defm : operation_subvector_mask_lowering<VK8,  v8i1,  VK32, v32i1>;
3341defm : operation_subvector_mask_lowering<VK8,  v8i1,  VK64, v64i1>;
3342
3343defm : operation_subvector_mask_lowering<VK16, v16i1, VK32, v32i1>;
3344defm : operation_subvector_mask_lowering<VK16, v16i1, VK64, v64i1>;
3345
3346defm : operation_subvector_mask_lowering<VK32, v32i1, VK64, v64i1>;
3347
3348//===----------------------------------------------------------------------===//
3349// AVX-512 - Aligned and unaligned load and store
3350//
3351
3352multiclass avx512_load<bits<8> opc, string OpcodeStr, string Name,
3353                       X86VectorVTInfo _, PatFrag ld_frag, PatFrag mload,
3354                       X86SchedWriteMoveLS Sched, string EVEX2VEXOvrd,
3355                       bit NoRMPattern = 0,
3356                       SDPatternOperator SelectOprr = vselect> {
3357  let hasSideEffects = 0 in {
3358  let isMoveReg = 1 in
3359  def rr : AVX512PI<opc, MRMSrcReg, (outs _.RC:$dst), (ins _.RC:$src),
3360                    !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), [],
3361                    _.ExeDomain>, EVEX, Sched<[Sched.RR]>,
3362                    EVEX2VEXOverride<EVEX2VEXOvrd#"rr">;
3363  def rrkz : AVX512PI<opc, MRMSrcReg, (outs _.RC:$dst),
3364                      (ins _.KRCWM:$mask,  _.RC:$src),
3365                      !strconcat(OpcodeStr, "\t{$src, ${dst} {${mask}} {z}|",
3366                       "${dst} {${mask}} {z}, $src}"),
3367                       [(set _.RC:$dst, (_.VT (SelectOprr _.KRCWM:$mask,
3368                                           (_.VT _.RC:$src),
3369                                           _.ImmAllZerosV)))], _.ExeDomain>,
3370                       EVEX, EVEX_KZ, Sched<[Sched.RR]>;
3371
3372  let mayLoad = 1, canFoldAsLoad = 1, isReMaterializable = 1 in
3373  def rm : AVX512PI<opc, MRMSrcMem, (outs _.RC:$dst), (ins _.MemOp:$src),
3374                    !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
3375                    !if(NoRMPattern, [],
3376                        [(set _.RC:$dst,
3377                          (_.VT (ld_frag addr:$src)))]),
3378                    _.ExeDomain>, EVEX, Sched<[Sched.RM]>,
3379                    EVEX2VEXOverride<EVEX2VEXOvrd#"rm">;
3380
3381  let Constraints = "$src0 = $dst", isConvertibleToThreeAddress = 1 in {
3382    def rrk : AVX512PI<opc, MRMSrcReg, (outs _.RC:$dst),
3383                      (ins _.RC:$src0, _.KRCWM:$mask, _.RC:$src1),
3384                      !strconcat(OpcodeStr, "\t{$src1, ${dst} {${mask}}|",
3385                      "${dst} {${mask}}, $src1}"),
3386                      [(set _.RC:$dst, (_.VT (SelectOprr _.KRCWM:$mask,
3387                                          (_.VT _.RC:$src1),
3388                                          (_.VT _.RC:$src0))))], _.ExeDomain>,
3389                       EVEX, EVEX_K, Sched<[Sched.RR]>;
3390    def rmk : AVX512PI<opc, MRMSrcMem, (outs _.RC:$dst),
3391                     (ins _.RC:$src0, _.KRCWM:$mask, _.MemOp:$src1),
3392                     !strconcat(OpcodeStr, "\t{$src1, ${dst} {${mask}}|",
3393                      "${dst} {${mask}}, $src1}"),
3394                     [(set _.RC:$dst, (_.VT
3395                         (vselect_mask _.KRCWM:$mask,
3396                          (_.VT (ld_frag addr:$src1)),
3397                           (_.VT _.RC:$src0))))], _.ExeDomain>,
3398                     EVEX, EVEX_K, Sched<[Sched.RM]>;
3399  }
3400  def rmkz : AVX512PI<opc, MRMSrcMem, (outs _.RC:$dst),
3401                  (ins _.KRCWM:$mask, _.MemOp:$src),
3402                  OpcodeStr #"\t{$src, ${dst} {${mask}} {z}|"#
3403                                "${dst} {${mask}} {z}, $src}",
3404                  [(set _.RC:$dst, (_.VT (vselect_mask _.KRCWM:$mask,
3405                    (_.VT (ld_frag addr:$src)), _.ImmAllZerosV)))],
3406                  _.ExeDomain>, EVEX, EVEX_KZ, Sched<[Sched.RM]>;
3407  }
3408  def : Pat<(_.VT (mload addr:$ptr, _.KRCWM:$mask, undef)),
3409            (!cast<Instruction>(Name#_.ZSuffix#rmkz) _.KRCWM:$mask, addr:$ptr)>;
3410
3411  def : Pat<(_.VT (mload addr:$ptr, _.KRCWM:$mask, _.ImmAllZerosV)),
3412            (!cast<Instruction>(Name#_.ZSuffix#rmkz) _.KRCWM:$mask, addr:$ptr)>;
3413
3414  def : Pat<(_.VT (mload addr:$ptr, _.KRCWM:$mask, (_.VT _.RC:$src0))),
3415            (!cast<Instruction>(Name#_.ZSuffix#rmk) _.RC:$src0,
3416             _.KRCWM:$mask, addr:$ptr)>;
3417}
3418
3419multiclass avx512_alignedload_vl<bits<8> opc, string OpcodeStr,
3420                                 AVX512VLVectorVTInfo _, Predicate prd,
3421                                 X86SchedWriteMoveLSWidths Sched,
3422                                 string EVEX2VEXOvrd, bit NoRMPattern = 0> {
3423  let Predicates = [prd] in
3424  defm Z : avx512_load<opc, OpcodeStr, NAME, _.info512,
3425                       _.info512.AlignedLdFrag, masked_load_aligned,
3426                       Sched.ZMM, "", NoRMPattern>, EVEX_V512;
3427
3428  let Predicates = [prd, HasVLX] in {
3429  defm Z256 : avx512_load<opc, OpcodeStr, NAME, _.info256,
3430                          _.info256.AlignedLdFrag, masked_load_aligned,
3431                          Sched.YMM, EVEX2VEXOvrd#"Y", NoRMPattern>, EVEX_V256;
3432  defm Z128 : avx512_load<opc, OpcodeStr, NAME, _.info128,
3433                          _.info128.AlignedLdFrag, masked_load_aligned,
3434                          Sched.XMM, EVEX2VEXOvrd, NoRMPattern>, EVEX_V128;
3435  }
3436}
3437
3438multiclass avx512_load_vl<bits<8> opc, string OpcodeStr,
3439                          AVX512VLVectorVTInfo _, Predicate prd,
3440                          X86SchedWriteMoveLSWidths Sched,
3441                          string EVEX2VEXOvrd, bit NoRMPattern = 0,
3442                          SDPatternOperator SelectOprr = vselect> {
3443  let Predicates = [prd] in
3444  defm Z : avx512_load<opc, OpcodeStr, NAME, _.info512, _.info512.LdFrag,
3445                       masked_load, Sched.ZMM, "",
3446                       NoRMPattern, SelectOprr>, EVEX_V512;
3447
3448  let Predicates = [prd, HasVLX] in {
3449  defm Z256 : avx512_load<opc, OpcodeStr, NAME, _.info256, _.info256.LdFrag,
3450                         masked_load, Sched.YMM, EVEX2VEXOvrd#"Y",
3451                         NoRMPattern, SelectOprr>, EVEX_V256;
3452  defm Z128 : avx512_load<opc, OpcodeStr, NAME, _.info128, _.info128.LdFrag,
3453                         masked_load, Sched.XMM, EVEX2VEXOvrd,
3454                         NoRMPattern, SelectOprr>, EVEX_V128;
3455  }
3456}
3457
3458multiclass avx512_store<bits<8> opc, string OpcodeStr, string BaseName,
3459                        X86VectorVTInfo _, PatFrag st_frag, PatFrag mstore,
3460                        X86SchedWriteMoveLS Sched, string EVEX2VEXOvrd,
3461                        bit NoMRPattern = 0> {
3462  let hasSideEffects = 0, isCodeGenOnly = 1, ForceDisassemble = 1 in {
3463  let isMoveReg = 1 in
3464  def rr_REV  : AVX512PI<opc, MRMDestReg, (outs _.RC:$dst), (ins _.RC:$src),
3465                         OpcodeStr # "\t{$src, $dst|$dst, $src}",
3466                         [], _.ExeDomain>, EVEX,
3467                         FoldGenData<BaseName#_.ZSuffix#rr>, Sched<[Sched.RR]>,
3468                         EVEX2VEXOverride<EVEX2VEXOvrd#"rr_REV">;
3469  def rrk_REV : AVX512PI<opc, MRMDestReg, (outs  _.RC:$dst),
3470                         (ins _.KRCWM:$mask, _.RC:$src),
3471                         OpcodeStr # "\t{$src, ${dst} {${mask}}|"#
3472                         "${dst} {${mask}}, $src}",
3473                         [], _.ExeDomain>,  EVEX, EVEX_K,
3474                         FoldGenData<BaseName#_.ZSuffix#rrk>,
3475                         Sched<[Sched.RR]>;
3476  def rrkz_REV : AVX512PI<opc, MRMDestReg, (outs  _.RC:$dst),
3477                          (ins _.KRCWM:$mask, _.RC:$src),
3478                          OpcodeStr # "\t{$src, ${dst} {${mask}} {z}|" #
3479                          "${dst} {${mask}} {z}, $src}",
3480                          [], _.ExeDomain>, EVEX, EVEX_KZ,
3481                          FoldGenData<BaseName#_.ZSuffix#rrkz>,
3482                          Sched<[Sched.RR]>;
3483  }
3484
3485  let hasSideEffects = 0, mayStore = 1 in
3486  def mr : AVX512PI<opc, MRMDestMem, (outs), (ins _.MemOp:$dst, _.RC:$src),
3487                    !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
3488                    !if(NoMRPattern, [],
3489                        [(st_frag (_.VT _.RC:$src), addr:$dst)]),
3490                    _.ExeDomain>, EVEX, Sched<[Sched.MR]>,
3491                    EVEX2VEXOverride<EVEX2VEXOvrd#"mr">;
3492  def mrk : AVX512PI<opc, MRMDestMem, (outs),
3493                     (ins _.MemOp:$dst, _.KRCWM:$mask, _.RC:$src),
3494              OpcodeStr # "\t{$src, ${dst} {${mask}}|${dst} {${mask}}, $src}",
3495               [], _.ExeDomain>, EVEX, EVEX_K, Sched<[Sched.MR]>,
3496               NotMemoryFoldable;
3497
3498  def: Pat<(mstore (_.VT _.RC:$src), addr:$ptr, _.KRCWM:$mask),
3499           (!cast<Instruction>(BaseName#_.ZSuffix#mrk) addr:$ptr,
3500                                                        _.KRCWM:$mask, _.RC:$src)>;
3501
3502  def : InstAlias<OpcodeStr#".s\t{$src, $dst|$dst, $src}",
3503                  (!cast<Instruction>(BaseName#_.ZSuffix#"rr_REV")
3504                   _.RC:$dst, _.RC:$src), 0>;
3505  def : InstAlias<OpcodeStr#".s\t{$src, ${dst} {${mask}}|${dst} {${mask}}, $src}",
3506                  (!cast<Instruction>(BaseName#_.ZSuffix#"rrk_REV")
3507                   _.RC:$dst, _.KRCWM:$mask, _.RC:$src), 0>;
3508  def : InstAlias<OpcodeStr#".s\t{$src, ${dst} {${mask}} {z}|${dst} {${mask}} {z}, $src}",
3509                  (!cast<Instruction>(BaseName#_.ZSuffix#"rrkz_REV")
3510                   _.RC:$dst, _.KRCWM:$mask, _.RC:$src), 0>;
3511}
3512
3513multiclass avx512_store_vl< bits<8> opc, string OpcodeStr,
3514                            AVX512VLVectorVTInfo _, Predicate prd,
3515                            X86SchedWriteMoveLSWidths Sched,
3516                            string EVEX2VEXOvrd, bit NoMRPattern = 0> {
3517  let Predicates = [prd] in
3518  defm Z : avx512_store<opc, OpcodeStr, NAME, _.info512, store,
3519                        masked_store, Sched.ZMM, "",
3520                        NoMRPattern>, EVEX_V512;
3521  let Predicates = [prd, HasVLX] in {
3522    defm Z256 : avx512_store<opc, OpcodeStr, NAME, _.info256, store,
3523                             masked_store, Sched.YMM,
3524                             EVEX2VEXOvrd#"Y", NoMRPattern>, EVEX_V256;
3525    defm Z128 : avx512_store<opc, OpcodeStr, NAME, _.info128, store,
3526                             masked_store, Sched.XMM, EVEX2VEXOvrd,
3527                             NoMRPattern>, EVEX_V128;
3528  }
3529}
3530
3531multiclass avx512_alignedstore_vl<bits<8> opc, string OpcodeStr,
3532                                  AVX512VLVectorVTInfo _, Predicate prd,
3533                                  X86SchedWriteMoveLSWidths Sched,
3534                                  string EVEX2VEXOvrd, bit NoMRPattern = 0> {
3535  let Predicates = [prd] in
3536  defm Z : avx512_store<opc, OpcodeStr, NAME, _.info512, alignedstore,
3537                        masked_store_aligned, Sched.ZMM, "",
3538                        NoMRPattern>, EVEX_V512;
3539
3540  let Predicates = [prd, HasVLX] in {
3541    defm Z256 : avx512_store<opc, OpcodeStr, NAME, _.info256, alignedstore,
3542                             masked_store_aligned, Sched.YMM,
3543                             EVEX2VEXOvrd#"Y", NoMRPattern>, EVEX_V256;
3544    defm Z128 : avx512_store<opc, OpcodeStr, NAME, _.info128, alignedstore,
3545                             masked_store_aligned, Sched.XMM, EVEX2VEXOvrd,
3546                             NoMRPattern>, EVEX_V128;
3547  }
3548}
3549
3550defm VMOVAPS : avx512_alignedload_vl<0x28, "vmovaps", avx512vl_f32_info,
3551                                     HasAVX512, SchedWriteFMoveLS, "VMOVAPS">,
3552               avx512_alignedstore_vl<0x29, "vmovaps", avx512vl_f32_info,
3553                                      HasAVX512, SchedWriteFMoveLS, "VMOVAPS">,
3554               PS, EVEX_CD8<32, CD8VF>;
3555
3556defm VMOVAPD : avx512_alignedload_vl<0x28, "vmovapd", avx512vl_f64_info,
3557                                     HasAVX512, SchedWriteFMoveLS, "VMOVAPD">,
3558               avx512_alignedstore_vl<0x29, "vmovapd", avx512vl_f64_info,
3559                                      HasAVX512, SchedWriteFMoveLS, "VMOVAPD">,
3560               PD, VEX_W, EVEX_CD8<64, CD8VF>;
3561
3562defm VMOVUPS : avx512_load_vl<0x10, "vmovups", avx512vl_f32_info, HasAVX512,
3563                              SchedWriteFMoveLS, "VMOVUPS", 0, null_frag>,
3564               avx512_store_vl<0x11, "vmovups", avx512vl_f32_info, HasAVX512,
3565                               SchedWriteFMoveLS, "VMOVUPS">,
3566                               PS, EVEX_CD8<32, CD8VF>;
3567
3568defm VMOVUPD : avx512_load_vl<0x10, "vmovupd", avx512vl_f64_info, HasAVX512,
3569                              SchedWriteFMoveLS, "VMOVUPD", 0, null_frag>,
3570               avx512_store_vl<0x11, "vmovupd", avx512vl_f64_info, HasAVX512,
3571                               SchedWriteFMoveLS, "VMOVUPD">,
3572               PD, VEX_W, EVEX_CD8<64, CD8VF>;
3573
3574defm VMOVDQA32 : avx512_alignedload_vl<0x6F, "vmovdqa32", avx512vl_i32_info,
3575                                       HasAVX512, SchedWriteVecMoveLS,
3576                                       "VMOVDQA", 1>,
3577                 avx512_alignedstore_vl<0x7F, "vmovdqa32", avx512vl_i32_info,
3578                                        HasAVX512, SchedWriteVecMoveLS,
3579                                        "VMOVDQA", 1>,
3580                 PD, EVEX_CD8<32, CD8VF>;
3581
3582defm VMOVDQA64 : avx512_alignedload_vl<0x6F, "vmovdqa64", avx512vl_i64_info,
3583                                       HasAVX512, SchedWriteVecMoveLS,
3584                                       "VMOVDQA">,
3585                 avx512_alignedstore_vl<0x7F, "vmovdqa64", avx512vl_i64_info,
3586                                        HasAVX512, SchedWriteVecMoveLS,
3587                                        "VMOVDQA">,
3588                 PD, VEX_W, EVEX_CD8<64, CD8VF>;
3589
3590defm VMOVDQU8 : avx512_load_vl<0x6F, "vmovdqu8", avx512vl_i8_info, HasBWI,
3591                               SchedWriteVecMoveLS, "VMOVDQU", 1>,
3592                avx512_store_vl<0x7F, "vmovdqu8", avx512vl_i8_info, HasBWI,
3593                                SchedWriteVecMoveLS, "VMOVDQU", 1>,
3594                XD, EVEX_CD8<8, CD8VF>;
3595
3596defm VMOVDQU16 : avx512_load_vl<0x6F, "vmovdqu16", avx512vl_i16_info, HasBWI,
3597                                SchedWriteVecMoveLS, "VMOVDQU", 1>,
3598                 avx512_store_vl<0x7F, "vmovdqu16", avx512vl_i16_info, HasBWI,
3599                                 SchedWriteVecMoveLS, "VMOVDQU", 1>,
3600                 XD, VEX_W, EVEX_CD8<16, CD8VF>;
3601
3602defm VMOVDQU32 : avx512_load_vl<0x6F, "vmovdqu32", avx512vl_i32_info, HasAVX512,
3603                                SchedWriteVecMoveLS, "VMOVDQU", 1, null_frag>,
3604                 avx512_store_vl<0x7F, "vmovdqu32", avx512vl_i32_info, HasAVX512,
3605                                 SchedWriteVecMoveLS, "VMOVDQU", 1>,
3606                 XS, EVEX_CD8<32, CD8VF>;
3607
3608defm VMOVDQU64 : avx512_load_vl<0x6F, "vmovdqu64", avx512vl_i64_info, HasAVX512,
3609                                SchedWriteVecMoveLS, "VMOVDQU", 0, null_frag>,
3610                 avx512_store_vl<0x7F, "vmovdqu64", avx512vl_i64_info, HasAVX512,
3611                                 SchedWriteVecMoveLS, "VMOVDQU">,
3612                 XS, VEX_W, EVEX_CD8<64, CD8VF>;
3613
3614// Special instructions to help with spilling when we don't have VLX. We need
3615// to load or store from a ZMM register instead. These are converted in
3616// expandPostRAPseudos.
3617let isReMaterializable = 1, canFoldAsLoad = 1,
3618    isPseudo = 1, mayLoad = 1, hasSideEffects = 0 in {
3619def VMOVAPSZ128rm_NOVLX : I<0, Pseudo, (outs VR128X:$dst), (ins f128mem:$src),
3620                            "", []>, Sched<[WriteFLoadX]>;
3621def VMOVAPSZ256rm_NOVLX : I<0, Pseudo, (outs VR256X:$dst), (ins f256mem:$src),
3622                            "", []>, Sched<[WriteFLoadY]>;
3623def VMOVUPSZ128rm_NOVLX : I<0, Pseudo, (outs VR128X:$dst), (ins f128mem:$src),
3624                            "", []>, Sched<[WriteFLoadX]>;
3625def VMOVUPSZ256rm_NOVLX : I<0, Pseudo, (outs VR256X:$dst), (ins f256mem:$src),
3626                            "", []>, Sched<[WriteFLoadY]>;
3627}
3628
3629let isPseudo = 1, mayStore = 1, hasSideEffects = 0 in {
3630def VMOVAPSZ128mr_NOVLX : I<0, Pseudo, (outs), (ins f128mem:$dst, VR128X:$src),
3631                            "", []>, Sched<[WriteFStoreX]>;
3632def VMOVAPSZ256mr_NOVLX : I<0, Pseudo, (outs), (ins f256mem:$dst, VR256X:$src),
3633                            "", []>, Sched<[WriteFStoreY]>;
3634def VMOVUPSZ128mr_NOVLX : I<0, Pseudo, (outs), (ins f128mem:$dst, VR128X:$src),
3635                            "", []>, Sched<[WriteFStoreX]>;
3636def VMOVUPSZ256mr_NOVLX : I<0, Pseudo, (outs), (ins f256mem:$dst, VR256X:$src),
3637                            "", []>, Sched<[WriteFStoreY]>;
3638}
3639
3640def : Pat<(v8i64 (vselect VK8WM:$mask, (v8i64 immAllZerosV),
3641                          (v8i64 VR512:$src))),
3642   (VMOVDQA64Zrrkz (COPY_TO_REGCLASS (KNOTWrr (COPY_TO_REGCLASS VK8:$mask, VK16)),
3643                                              VK8), VR512:$src)>;
3644
3645def : Pat<(v16i32 (vselect VK16WM:$mask, (v16i32 immAllZerosV),
3646                           (v16i32 VR512:$src))),
3647                  (VMOVDQA32Zrrkz (KNOTWrr VK16WM:$mask), VR512:$src)>;
3648
3649// These patterns exist to prevent the above patterns from introducing a second
3650// mask inversion when one already exists.
3651def : Pat<(v8i64 (vselect (xor VK8:$mask, (v8i1 immAllOnesV)),
3652                          (v8i64 immAllZerosV),
3653                          (v8i64 VR512:$src))),
3654                 (VMOVDQA64Zrrkz VK8:$mask, VR512:$src)>;
3655def : Pat<(v16i32 (vselect (xor VK16:$mask, (v16i1 immAllOnesV)),
3656                           (v16i32 immAllZerosV),
3657                           (v16i32 VR512:$src))),
3658                  (VMOVDQA32Zrrkz VK16WM:$mask, VR512:$src)>;
3659
3660multiclass mask_move_lowering<string InstrStr, X86VectorVTInfo Narrow,
3661                              X86VectorVTInfo Wide> {
3662 def : Pat<(Narrow.VT (vselect (Narrow.KVT Narrow.KRCWM:$mask),
3663                               Narrow.RC:$src1, Narrow.RC:$src0)),
3664           (EXTRACT_SUBREG
3665            (Wide.VT
3666             (!cast<Instruction>(InstrStr#"rrk")
3667              (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src0, Narrow.SubRegIdx)),
3668              (COPY_TO_REGCLASS Narrow.KRCWM:$mask, Wide.KRCWM),
3669              (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src1, Narrow.SubRegIdx)))),
3670            Narrow.SubRegIdx)>;
3671
3672 def : Pat<(Narrow.VT (vselect (Narrow.KVT Narrow.KRCWM:$mask),
3673                               Narrow.RC:$src1, Narrow.ImmAllZerosV)),
3674           (EXTRACT_SUBREG
3675            (Wide.VT
3676             (!cast<Instruction>(InstrStr#"rrkz")
3677              (COPY_TO_REGCLASS Narrow.KRCWM:$mask, Wide.KRCWM),
3678              (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src1, Narrow.SubRegIdx)))),
3679            Narrow.SubRegIdx)>;
3680}
3681
3682// Patterns for handling v8i1 selects of 256-bit vectors when VLX isn't
3683// available. Use a 512-bit operation and extract.
3684let Predicates = [HasAVX512, NoVLX] in {
3685  defm : mask_move_lowering<"VMOVAPSZ", v4f32x_info, v16f32_info>;
3686  defm : mask_move_lowering<"VMOVDQA32Z", v4i32x_info, v16i32_info>;
3687  defm : mask_move_lowering<"VMOVAPSZ", v8f32x_info, v16f32_info>;
3688  defm : mask_move_lowering<"VMOVDQA32Z", v8i32x_info, v16i32_info>;
3689
3690  defm : mask_move_lowering<"VMOVAPDZ", v2f64x_info, v8f64_info>;
3691  defm : mask_move_lowering<"VMOVDQA64Z", v2i64x_info, v8i64_info>;
3692  defm : mask_move_lowering<"VMOVAPDZ", v4f64x_info, v8f64_info>;
3693  defm : mask_move_lowering<"VMOVDQA64Z", v4i64x_info, v8i64_info>;
3694}
3695
3696let Predicates = [HasBWI, NoVLX] in {
3697  defm : mask_move_lowering<"VMOVDQU8Z", v16i8x_info, v64i8_info>;
3698  defm : mask_move_lowering<"VMOVDQU8Z", v32i8x_info, v64i8_info>;
3699
3700  defm : mask_move_lowering<"VMOVDQU16Z", v8i16x_info, v32i16_info>;
3701  defm : mask_move_lowering<"VMOVDQU16Z", v16i16x_info, v32i16_info>;
3702}
3703
3704let Predicates = [HasAVX512] in {
3705  // 512-bit load.
3706  def : Pat<(alignedloadv16i32 addr:$src),
3707            (VMOVDQA64Zrm addr:$src)>;
3708  def : Pat<(alignedloadv32i16 addr:$src),
3709            (VMOVDQA64Zrm addr:$src)>;
3710  def : Pat<(alignedloadv64i8 addr:$src),
3711            (VMOVDQA64Zrm addr:$src)>;
3712  def : Pat<(loadv16i32 addr:$src),
3713            (VMOVDQU64Zrm addr:$src)>;
3714  def : Pat<(loadv32i16 addr:$src),
3715            (VMOVDQU64Zrm addr:$src)>;
3716  def : Pat<(loadv64i8 addr:$src),
3717            (VMOVDQU64Zrm addr:$src)>;
3718
3719  // 512-bit store.
3720  def : Pat<(alignedstore (v16i32 VR512:$src), addr:$dst),
3721            (VMOVDQA64Zmr addr:$dst, VR512:$src)>;
3722  def : Pat<(alignedstore (v32i16 VR512:$src), addr:$dst),
3723            (VMOVDQA64Zmr addr:$dst, VR512:$src)>;
3724  def : Pat<(alignedstore (v64i8 VR512:$src), addr:$dst),
3725            (VMOVDQA64Zmr addr:$dst, VR512:$src)>;
3726  def : Pat<(store (v16i32 VR512:$src), addr:$dst),
3727            (VMOVDQU64Zmr addr:$dst, VR512:$src)>;
3728  def : Pat<(store (v32i16 VR512:$src), addr:$dst),
3729            (VMOVDQU64Zmr addr:$dst, VR512:$src)>;
3730  def : Pat<(store (v64i8 VR512:$src), addr:$dst),
3731            (VMOVDQU64Zmr addr:$dst, VR512:$src)>;
3732}
3733
3734let Predicates = [HasVLX] in {
3735  // 128-bit load.
3736  def : Pat<(alignedloadv4i32 addr:$src),
3737            (VMOVDQA64Z128rm addr:$src)>;
3738  def : Pat<(alignedloadv8i16 addr:$src),
3739            (VMOVDQA64Z128rm addr:$src)>;
3740  def : Pat<(alignedloadv16i8 addr:$src),
3741            (VMOVDQA64Z128rm addr:$src)>;
3742  def : Pat<(loadv4i32 addr:$src),
3743            (VMOVDQU64Z128rm addr:$src)>;
3744  def : Pat<(loadv8i16 addr:$src),
3745            (VMOVDQU64Z128rm addr:$src)>;
3746  def : Pat<(loadv16i8 addr:$src),
3747            (VMOVDQU64Z128rm addr:$src)>;
3748
3749  // 128-bit store.
3750  def : Pat<(alignedstore (v4i32 VR128X:$src), addr:$dst),
3751            (VMOVDQA64Z128mr addr:$dst, VR128X:$src)>;
3752  def : Pat<(alignedstore (v8i16 VR128X:$src), addr:$dst),
3753            (VMOVDQA64Z128mr addr:$dst, VR128X:$src)>;
3754  def : Pat<(alignedstore (v16i8 VR128X:$src), addr:$dst),
3755            (VMOVDQA64Z128mr addr:$dst, VR128X:$src)>;
3756  def : Pat<(store (v4i32 VR128X:$src), addr:$dst),
3757            (VMOVDQU64Z128mr addr:$dst, VR128X:$src)>;
3758  def : Pat<(store (v8i16 VR128X:$src), addr:$dst),
3759            (VMOVDQU64Z128mr addr:$dst, VR128X:$src)>;
3760  def : Pat<(store (v16i8 VR128X:$src), addr:$dst),
3761            (VMOVDQU64Z128mr addr:$dst, VR128X:$src)>;
3762
3763  // 256-bit load.
3764  def : Pat<(alignedloadv8i32 addr:$src),
3765            (VMOVDQA64Z256rm addr:$src)>;
3766  def : Pat<(alignedloadv16i16 addr:$src),
3767            (VMOVDQA64Z256rm addr:$src)>;
3768  def : Pat<(alignedloadv32i8 addr:$src),
3769            (VMOVDQA64Z256rm addr:$src)>;
3770  def : Pat<(loadv8i32 addr:$src),
3771            (VMOVDQU64Z256rm addr:$src)>;
3772  def : Pat<(loadv16i16 addr:$src),
3773            (VMOVDQU64Z256rm addr:$src)>;
3774  def : Pat<(loadv32i8 addr:$src),
3775            (VMOVDQU64Z256rm addr:$src)>;
3776
3777  // 256-bit store.
3778  def : Pat<(alignedstore (v8i32 VR256X:$src), addr:$dst),
3779            (VMOVDQA64Z256mr addr:$dst, VR256X:$src)>;
3780  def : Pat<(alignedstore (v16i16 VR256X:$src), addr:$dst),
3781            (VMOVDQA64Z256mr addr:$dst, VR256X:$src)>;
3782  def : Pat<(alignedstore (v32i8 VR256X:$src), addr:$dst),
3783            (VMOVDQA64Z256mr addr:$dst, VR256X:$src)>;
3784  def : Pat<(store (v8i32 VR256X:$src), addr:$dst),
3785            (VMOVDQU64Z256mr addr:$dst, VR256X:$src)>;
3786  def : Pat<(store (v16i16 VR256X:$src), addr:$dst),
3787            (VMOVDQU64Z256mr addr:$dst, VR256X:$src)>;
3788  def : Pat<(store (v32i8 VR256X:$src), addr:$dst),
3789            (VMOVDQU64Z256mr addr:$dst, VR256X:$src)>;
3790}
3791
3792// Move Int Doubleword to Packed Double Int
3793//
3794let ExeDomain = SSEPackedInt in {
3795def VMOVDI2PDIZrr : AVX512BI<0x6E, MRMSrcReg, (outs VR128X:$dst), (ins GR32:$src),
3796                      "vmovd\t{$src, $dst|$dst, $src}",
3797                      [(set VR128X:$dst,
3798                        (v4i32 (scalar_to_vector GR32:$src)))]>,
3799                        EVEX, Sched<[WriteVecMoveFromGpr]>;
3800def VMOVDI2PDIZrm : AVX512BI<0x6E, MRMSrcMem, (outs VR128X:$dst), (ins i32mem:$src),
3801                      "vmovd\t{$src, $dst|$dst, $src}",
3802                      [(set VR128X:$dst,
3803                        (v4i32 (scalar_to_vector (loadi32 addr:$src))))]>,
3804                      EVEX, EVEX_CD8<32, CD8VT1>, Sched<[WriteVecLoad]>;
3805def VMOV64toPQIZrr : AVX512BI<0x6E, MRMSrcReg, (outs VR128X:$dst), (ins GR64:$src),
3806                      "vmovq\t{$src, $dst|$dst, $src}",
3807                        [(set VR128X:$dst,
3808                          (v2i64 (scalar_to_vector GR64:$src)))]>,
3809                      EVEX, VEX_W, Sched<[WriteVecMoveFromGpr]>;
3810let isCodeGenOnly = 1, ForceDisassemble = 1, hasSideEffects = 0, mayLoad = 1 in
3811def VMOV64toPQIZrm : AVX512BI<0x6E, MRMSrcMem, (outs VR128X:$dst),
3812                      (ins i64mem:$src),
3813                      "vmovq\t{$src, $dst|$dst, $src}", []>,
3814                      EVEX, VEX_W, EVEX_CD8<64, CD8VT1>, Sched<[WriteVecLoad]>;
3815let isCodeGenOnly = 1 in {
3816def VMOV64toSDZrr : AVX512BI<0x6E, MRMSrcReg, (outs FR64X:$dst), (ins GR64:$src),
3817                       "vmovq\t{$src, $dst|$dst, $src}",
3818                       [(set FR64X:$dst, (bitconvert GR64:$src))]>,
3819                       EVEX, VEX_W, Sched<[WriteVecMoveFromGpr]>;
3820def VMOVSDto64Zrr : AVX512BI<0x7E, MRMDestReg, (outs GR64:$dst), (ins FR64X:$src),
3821                         "vmovq\t{$src, $dst|$dst, $src}",
3822                         [(set GR64:$dst, (bitconvert FR64X:$src))]>,
3823                         EVEX, VEX_W, Sched<[WriteVecMoveFromGpr]>;
3824}
3825} // ExeDomain = SSEPackedInt
3826
3827// Move Int Doubleword to Single Scalar
3828//
3829let ExeDomain = SSEPackedInt, isCodeGenOnly = 1 in {
3830def VMOVDI2SSZrr  : AVX512BI<0x6E, MRMSrcReg, (outs FR32X:$dst), (ins GR32:$src),
3831                      "vmovd\t{$src, $dst|$dst, $src}",
3832                      [(set FR32X:$dst, (bitconvert GR32:$src))]>,
3833                      EVEX, Sched<[WriteVecMoveFromGpr]>;
3834} // ExeDomain = SSEPackedInt, isCodeGenOnly = 1
3835
3836// Move doubleword from xmm register to r/m32
3837//
3838let ExeDomain = SSEPackedInt in {
3839def VMOVPDI2DIZrr  : AVX512BI<0x7E, MRMDestReg, (outs GR32:$dst), (ins VR128X:$src),
3840                       "vmovd\t{$src, $dst|$dst, $src}",
3841                       [(set GR32:$dst, (extractelt (v4i32 VR128X:$src),
3842                                        (iPTR 0)))]>,
3843                       EVEX, Sched<[WriteVecMoveToGpr]>;
3844def VMOVPDI2DIZmr  : AVX512BI<0x7E, MRMDestMem, (outs),
3845                       (ins i32mem:$dst, VR128X:$src),
3846                       "vmovd\t{$src, $dst|$dst, $src}",
3847                       [(store (i32 (extractelt (v4i32 VR128X:$src),
3848                                     (iPTR 0))), addr:$dst)]>,
3849                       EVEX, EVEX_CD8<32, CD8VT1>, Sched<[WriteVecStore]>;
3850} // ExeDomain = SSEPackedInt
3851
3852// Move quadword from xmm1 register to r/m64
3853//
3854let ExeDomain = SSEPackedInt in {
3855def VMOVPQIto64Zrr : I<0x7E, MRMDestReg, (outs GR64:$dst), (ins VR128X:$src),
3856                      "vmovq\t{$src, $dst|$dst, $src}",
3857                      [(set GR64:$dst, (extractelt (v2i64 VR128X:$src),
3858                                                   (iPTR 0)))]>,
3859                      PD, EVEX, VEX_W, Sched<[WriteVecMoveToGpr]>,
3860                      Requires<[HasAVX512]>;
3861
3862let isCodeGenOnly = 1, ForceDisassemble = 1, hasSideEffects = 0, mayStore = 1 in
3863def VMOVPQIto64Zmr : I<0x7E, MRMDestMem, (outs), (ins i64mem:$dst, VR128X:$src),
3864                      "vmovq\t{$src, $dst|$dst, $src}", []>, PD,
3865                      EVEX, VEX_W, Sched<[WriteVecStore]>,
3866                      Requires<[HasAVX512, In64BitMode]>;
3867
3868def VMOVPQI2QIZmr : I<0xD6, MRMDestMem, (outs),
3869                      (ins i64mem:$dst, VR128X:$src),
3870                      "vmovq\t{$src, $dst|$dst, $src}",
3871                      [(store (extractelt (v2i64 VR128X:$src), (iPTR 0)),
3872                              addr:$dst)]>,
3873                      EVEX, PD, VEX_W, EVEX_CD8<64, CD8VT1>,
3874                      Sched<[WriteVecStore]>, Requires<[HasAVX512]>;
3875
3876let hasSideEffects = 0, isCodeGenOnly = 1, ForceDisassemble = 1 in
3877def VMOVPQI2QIZrr : AVX512BI<0xD6, MRMDestReg, (outs VR128X:$dst),
3878                             (ins VR128X:$src),
3879                             "vmovq\t{$src, $dst|$dst, $src}", []>,
3880                             EVEX, VEX_W, Sched<[SchedWriteVecLogic.XMM]>;
3881} // ExeDomain = SSEPackedInt
3882
3883def : InstAlias<"vmovq.s\t{$src, $dst|$dst, $src}",
3884                (VMOVPQI2QIZrr VR128X:$dst, VR128X:$src), 0>;
3885
3886let Predicates = [HasAVX512] in {
3887  def : Pat<(X86vextractstore64 (v2i64 VR128X:$src), addr:$dst),
3888            (VMOVPQI2QIZmr addr:$dst, VR128X:$src)>;
3889}
3890
3891// Move Scalar Single to Double Int
3892//
3893let ExeDomain = SSEPackedInt, isCodeGenOnly = 1 in {
3894def VMOVSS2DIZrr  : AVX512BI<0x7E, MRMDestReg, (outs GR32:$dst),
3895                      (ins FR32X:$src),
3896                      "vmovd\t{$src, $dst|$dst, $src}",
3897                      [(set GR32:$dst, (bitconvert FR32X:$src))]>,
3898                      EVEX, Sched<[WriteVecMoveToGpr]>;
3899} // ExeDomain = SSEPackedInt, isCodeGenOnly = 1
3900
3901// Move Quadword Int to Packed Quadword Int
3902//
3903let ExeDomain = SSEPackedInt in {
3904def VMOVQI2PQIZrm : AVX512XSI<0x7E, MRMSrcMem, (outs VR128X:$dst),
3905                      (ins i64mem:$src),
3906                      "vmovq\t{$src, $dst|$dst, $src}",
3907                      [(set VR128X:$dst,
3908                        (v2i64 (scalar_to_vector (loadi64 addr:$src))))]>,
3909                      EVEX, VEX_W, EVEX_CD8<8, CD8VT8>, Sched<[WriteVecLoad]>;
3910} // ExeDomain = SSEPackedInt
3911
3912// Allow "vmovd" but print "vmovq".
3913def : InstAlias<"vmovd\t{$src, $dst|$dst, $src}",
3914                (VMOV64toPQIZrr VR128X:$dst, GR64:$src), 0>;
3915def : InstAlias<"vmovd\t{$src, $dst|$dst, $src}",
3916                (VMOVPQIto64Zrr GR64:$dst, VR128X:$src), 0>;
3917
3918// Conversions between masks and scalar fp.
3919def : Pat<(v32i1 (bitconvert FR32X:$src)),
3920          (KMOVDkr (VMOVSS2DIZrr FR32X:$src))>;
3921def : Pat<(f32 (bitconvert VK32:$src)),
3922          (VMOVDI2SSZrr (KMOVDrk VK32:$src))>;
3923
3924def : Pat<(v64i1 (bitconvert FR64X:$src)),
3925          (KMOVQkr (VMOVSDto64Zrr FR64X:$src))>;
3926def : Pat<(f64 (bitconvert VK64:$src)),
3927          (VMOV64toSDZrr (KMOVQrk VK64:$src))>;
3928
3929//===----------------------------------------------------------------------===//
3930// AVX-512  MOVSS, MOVSD
3931//===----------------------------------------------------------------------===//
3932
3933multiclass avx512_move_scalar<string asm, SDNode OpNode, PatFrag vzload_frag,
3934                              X86VectorVTInfo _> {
3935  let Predicates = [HasAVX512, OptForSize] in
3936  def rr : AVX512PI<0x10, MRMSrcReg, (outs _.RC:$dst),
3937             (ins _.RC:$src1, _.RC:$src2),
3938             !strconcat(asm, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
3939             [(set _.RC:$dst, (_.VT (OpNode _.RC:$src1, _.RC:$src2)))],
3940             _.ExeDomain>, EVEX_4V, Sched<[SchedWriteFShuffle.XMM]>;
3941  def rrkz : AVX512PI<0x10, MRMSrcReg, (outs _.RC:$dst),
3942              (ins _.KRCWM:$mask, _.RC:$src1, _.RC:$src2),
3943              !strconcat(asm, "\t{$src2, $src1, $dst {${mask}} {z}|",
3944              "$dst {${mask}} {z}, $src1, $src2}"),
3945              [(set _.RC:$dst, (_.VT (X86selects _.KRCWM:$mask,
3946                                      (_.VT (OpNode _.RC:$src1, _.RC:$src2)),
3947                                      _.ImmAllZerosV)))],
3948              _.ExeDomain>, EVEX_4V, EVEX_KZ, Sched<[SchedWriteFShuffle.XMM]>;
3949  let Constraints = "$src0 = $dst"  in
3950  def rrk : AVX512PI<0x10, MRMSrcReg, (outs _.RC:$dst),
3951             (ins _.RC:$src0, _.KRCWM:$mask, _.RC:$src1, _.RC:$src2),
3952             !strconcat(asm, "\t{$src2, $src1, $dst {${mask}}|",
3953             "$dst {${mask}}, $src1, $src2}"),
3954             [(set _.RC:$dst, (_.VT (X86selects _.KRCWM:$mask,
3955                                     (_.VT (OpNode _.RC:$src1, _.RC:$src2)),
3956                                     (_.VT _.RC:$src0))))],
3957             _.ExeDomain>, EVEX_4V, EVEX_K, Sched<[SchedWriteFShuffle.XMM]>;
3958  let canFoldAsLoad = 1, isReMaterializable = 1 in {
3959  def rm : AVX512PI<0x10, MRMSrcMem, (outs _.RC:$dst), (ins _.ScalarMemOp:$src),
3960             !strconcat(asm, "\t{$src, $dst|$dst, $src}"),
3961             [(set _.RC:$dst, (_.VT (vzload_frag addr:$src)))],
3962             _.ExeDomain>, EVEX, Sched<[WriteFLoad]>;
3963  // _alt version uses FR32/FR64 register class.
3964  let isCodeGenOnly = 1 in
3965  def rm_alt : AVX512PI<0x10, MRMSrcMem, (outs _.FRC:$dst), (ins _.ScalarMemOp:$src),
3966                 !strconcat(asm, "\t{$src, $dst|$dst, $src}"),
3967                 [(set _.FRC:$dst, (_.ScalarLdFrag addr:$src))],
3968                 _.ExeDomain>, EVEX, Sched<[WriteFLoad]>;
3969  }
3970  let mayLoad = 1, hasSideEffects = 0 in {
3971    let Constraints = "$src0 = $dst" in
3972    def rmk : AVX512PI<0x10, MRMSrcMem, (outs _.RC:$dst),
3973               (ins _.RC:$src0, _.KRCWM:$mask, _.ScalarMemOp:$src),
3974               !strconcat(asm, "\t{$src, $dst {${mask}}|",
3975               "$dst {${mask}}, $src}"),
3976               [], _.ExeDomain>, EVEX, EVEX_K, Sched<[WriteFLoad]>;
3977    def rmkz : AVX512PI<0x10, MRMSrcMem, (outs _.RC:$dst),
3978               (ins _.KRCWM:$mask, _.ScalarMemOp:$src),
3979               !strconcat(asm, "\t{$src, $dst {${mask}} {z}|",
3980               "$dst {${mask}} {z}, $src}"),
3981               [], _.ExeDomain>, EVEX, EVEX_KZ, Sched<[WriteFLoad]>;
3982  }
3983  def mr: AVX512PI<0x11, MRMDestMem, (outs), (ins _.ScalarMemOp:$dst, _.FRC:$src),
3984             !strconcat(asm, "\t{$src, $dst|$dst, $src}"),
3985             [(store _.FRC:$src, addr:$dst)],  _.ExeDomain>,
3986             EVEX, Sched<[WriteFStore]>;
3987  let mayStore = 1, hasSideEffects = 0 in
3988  def mrk: AVX512PI<0x11, MRMDestMem, (outs),
3989              (ins _.ScalarMemOp:$dst, VK1WM:$mask, _.RC:$src),
3990              !strconcat(asm, "\t{$src, $dst {${mask}}|$dst {${mask}}, $src}"),
3991              [], _.ExeDomain>, EVEX, EVEX_K, Sched<[WriteFStore]>,
3992              NotMemoryFoldable;
3993}
3994
3995defm VMOVSSZ : avx512_move_scalar<"vmovss", X86Movss, X86vzload32, f32x_info>,
3996                                  VEX_LIG, XS, EVEX_CD8<32, CD8VT1>;
3997
3998defm VMOVSDZ : avx512_move_scalar<"vmovsd", X86Movsd, X86vzload64, f64x_info>,
3999                                  VEX_LIG, XD, VEX_W, EVEX_CD8<64, CD8VT1>;
4000
4001
4002multiclass avx512_move_scalar_lowering<string InstrStr, SDNode OpNode,
4003                                       PatLeaf ZeroFP, X86VectorVTInfo _> {
4004
4005def : Pat<(_.VT (OpNode _.RC:$src0,
4006                        (_.VT (scalar_to_vector
4007                                  (_.EltVT (X86selects VK1WM:$mask,
4008                                                       (_.EltVT _.FRC:$src1),
4009                                                       (_.EltVT _.FRC:$src2))))))),
4010          (!cast<Instruction>(InstrStr#rrk)
4011                        (_.VT (COPY_TO_REGCLASS _.FRC:$src2, _.RC)),
4012                        VK1WM:$mask,
4013                        (_.VT _.RC:$src0),
4014                        (_.VT (COPY_TO_REGCLASS _.FRC:$src1, _.RC)))>;
4015
4016def : Pat<(_.VT (OpNode _.RC:$src0,
4017                        (_.VT (scalar_to_vector
4018                                  (_.EltVT (X86selects VK1WM:$mask,
4019                                                       (_.EltVT _.FRC:$src1),
4020                                                       (_.EltVT ZeroFP))))))),
4021          (!cast<Instruction>(InstrStr#rrkz)
4022                        VK1WM:$mask,
4023                        (_.VT _.RC:$src0),
4024                        (_.VT (COPY_TO_REGCLASS _.FRC:$src1, _.RC)))>;
4025}
4026
4027multiclass avx512_store_scalar_lowering<string InstrStr, AVX512VLVectorVTInfo _,
4028                                        dag Mask, RegisterClass MaskRC> {
4029
4030def : Pat<(masked_store
4031             (_.info512.VT (insert_subvector undef,
4032                               (_.info128.VT _.info128.RC:$src),
4033                               (iPTR 0))), addr:$dst, Mask),
4034          (!cast<Instruction>(InstrStr#mrk) addr:$dst,
4035                      (COPY_TO_REGCLASS MaskRC:$mask, VK1WM),
4036                      _.info128.RC:$src)>;
4037
4038}
4039
4040multiclass avx512_store_scalar_lowering_subreg<string InstrStr,
4041                                               AVX512VLVectorVTInfo _,
4042                                               dag Mask, RegisterClass MaskRC,
4043                                               SubRegIndex subreg> {
4044
4045def : Pat<(masked_store
4046             (_.info512.VT (insert_subvector undef,
4047                               (_.info128.VT _.info128.RC:$src),
4048                               (iPTR 0))), addr:$dst, Mask),
4049          (!cast<Instruction>(InstrStr#mrk) addr:$dst,
4050                      (COPY_TO_REGCLASS (i32 (INSERT_SUBREG (IMPLICIT_DEF), MaskRC:$mask, subreg)), VK1WM),
4051                      _.info128.RC:$src)>;
4052
4053}
4054
4055// This matches the more recent codegen from clang that avoids emitting a 512
4056// bit masked store directly. Codegen will widen 128-bit masked store to 512
4057// bits on AVX512F only targets.
4058multiclass avx512_store_scalar_lowering_subreg2<string InstrStr,
4059                                               AVX512VLVectorVTInfo _,
4060                                               dag Mask512, dag Mask128,
4061                                               RegisterClass MaskRC,
4062                                               SubRegIndex subreg> {
4063
4064// AVX512F pattern.
4065def : Pat<(masked_store
4066             (_.info512.VT (insert_subvector undef,
4067                               (_.info128.VT _.info128.RC:$src),
4068                               (iPTR 0))), addr:$dst, Mask512),
4069          (!cast<Instruction>(InstrStr#mrk) addr:$dst,
4070                      (COPY_TO_REGCLASS (i32 (INSERT_SUBREG (IMPLICIT_DEF), MaskRC:$mask, subreg)), VK1WM),
4071                      _.info128.RC:$src)>;
4072
4073// AVX512VL pattern.
4074def : Pat<(masked_store (_.info128.VT _.info128.RC:$src), addr:$dst, Mask128),
4075          (!cast<Instruction>(InstrStr#mrk) addr:$dst,
4076                      (COPY_TO_REGCLASS (i32 (INSERT_SUBREG (IMPLICIT_DEF), MaskRC:$mask, subreg)), VK1WM),
4077                      _.info128.RC:$src)>;
4078}
4079
4080multiclass avx512_load_scalar_lowering<string InstrStr, AVX512VLVectorVTInfo _,
4081                                       dag Mask, RegisterClass MaskRC> {
4082
4083def : Pat<(_.info128.VT (extract_subvector
4084                         (_.info512.VT (masked_load addr:$srcAddr, Mask,
4085                                        _.info512.ImmAllZerosV)),
4086                           (iPTR 0))),
4087          (!cast<Instruction>(InstrStr#rmkz)
4088                      (COPY_TO_REGCLASS MaskRC:$mask, VK1WM),
4089                      addr:$srcAddr)>;
4090
4091def : Pat<(_.info128.VT (extract_subvector
4092                (_.info512.VT (masked_load addr:$srcAddr, Mask,
4093                      (_.info512.VT (insert_subvector undef,
4094                            (_.info128.VT (X86vzmovl _.info128.RC:$src)),
4095                            (iPTR 0))))),
4096                (iPTR 0))),
4097          (!cast<Instruction>(InstrStr#rmk) _.info128.RC:$src,
4098                      (COPY_TO_REGCLASS MaskRC:$mask, VK1WM),
4099                      addr:$srcAddr)>;
4100
4101}
4102
4103multiclass avx512_load_scalar_lowering_subreg<string InstrStr,
4104                                              AVX512VLVectorVTInfo _,
4105                                              dag Mask, RegisterClass MaskRC,
4106                                              SubRegIndex subreg> {
4107
4108def : Pat<(_.info128.VT (extract_subvector
4109                         (_.info512.VT (masked_load addr:$srcAddr, Mask,
4110                                        _.info512.ImmAllZerosV)),
4111                           (iPTR 0))),
4112          (!cast<Instruction>(InstrStr#rmkz)
4113                      (COPY_TO_REGCLASS (i32 (INSERT_SUBREG (IMPLICIT_DEF), MaskRC:$mask, subreg)), VK1WM),
4114                      addr:$srcAddr)>;
4115
4116def : Pat<(_.info128.VT (extract_subvector
4117                (_.info512.VT (masked_load addr:$srcAddr, Mask,
4118                      (_.info512.VT (insert_subvector undef,
4119                            (_.info128.VT (X86vzmovl _.info128.RC:$src)),
4120                            (iPTR 0))))),
4121                (iPTR 0))),
4122          (!cast<Instruction>(InstrStr#rmk) _.info128.RC:$src,
4123                      (COPY_TO_REGCLASS (i32 (INSERT_SUBREG (IMPLICIT_DEF), MaskRC:$mask, subreg)), VK1WM),
4124                      addr:$srcAddr)>;
4125
4126}
4127
4128// This matches the more recent codegen from clang that avoids emitting a 512
4129// bit masked load directly. Codegen will widen 128-bit masked load to 512
4130// bits on AVX512F only targets.
4131multiclass avx512_load_scalar_lowering_subreg2<string InstrStr,
4132                                              AVX512VLVectorVTInfo _,
4133                                              dag Mask512, dag Mask128,
4134                                              RegisterClass MaskRC,
4135                                              SubRegIndex subreg> {
4136// AVX512F patterns.
4137def : Pat<(_.info128.VT (extract_subvector
4138                         (_.info512.VT (masked_load addr:$srcAddr, Mask512,
4139                                        _.info512.ImmAllZerosV)),
4140                           (iPTR 0))),
4141          (!cast<Instruction>(InstrStr#rmkz)
4142                      (COPY_TO_REGCLASS (i32 (INSERT_SUBREG (IMPLICIT_DEF), MaskRC:$mask, subreg)), VK1WM),
4143                      addr:$srcAddr)>;
4144
4145def : Pat<(_.info128.VT (extract_subvector
4146                (_.info512.VT (masked_load addr:$srcAddr, Mask512,
4147                      (_.info512.VT (insert_subvector undef,
4148                            (_.info128.VT (X86vzmovl _.info128.RC:$src)),
4149                            (iPTR 0))))),
4150                (iPTR 0))),
4151          (!cast<Instruction>(InstrStr#rmk) _.info128.RC:$src,
4152                      (COPY_TO_REGCLASS (i32 (INSERT_SUBREG (IMPLICIT_DEF), MaskRC:$mask, subreg)), VK1WM),
4153                      addr:$srcAddr)>;
4154
4155// AVX512Vl patterns.
4156def : Pat<(_.info128.VT (masked_load addr:$srcAddr, Mask128,
4157                         _.info128.ImmAllZerosV)),
4158          (!cast<Instruction>(InstrStr#rmkz)
4159                      (COPY_TO_REGCLASS (i32 (INSERT_SUBREG (IMPLICIT_DEF), MaskRC:$mask, subreg)), VK1WM),
4160                      addr:$srcAddr)>;
4161
4162def : Pat<(_.info128.VT (masked_load addr:$srcAddr, Mask128,
4163                         (_.info128.VT (X86vzmovl _.info128.RC:$src)))),
4164          (!cast<Instruction>(InstrStr#rmk) _.info128.RC:$src,
4165                      (COPY_TO_REGCLASS (i32 (INSERT_SUBREG (IMPLICIT_DEF), MaskRC:$mask, subreg)), VK1WM),
4166                      addr:$srcAddr)>;
4167}
4168
4169defm : avx512_move_scalar_lowering<"VMOVSSZ", X86Movss, fp32imm0, v4f32x_info>;
4170defm : avx512_move_scalar_lowering<"VMOVSDZ", X86Movsd, fp64imm0, v2f64x_info>;
4171
4172defm : avx512_store_scalar_lowering<"VMOVSSZ", avx512vl_f32_info,
4173                   (v16i1 (bitconvert (i16 (trunc (and GR32:$mask, (i32 1)))))), GR32>;
4174defm : avx512_store_scalar_lowering_subreg<"VMOVSSZ", avx512vl_f32_info,
4175                   (v16i1 (bitconvert (i16 (and GR16:$mask, (i16 1))))), GR16, sub_16bit>;
4176defm : avx512_store_scalar_lowering_subreg<"VMOVSDZ", avx512vl_f64_info,
4177                   (v8i1 (bitconvert (i8 (and GR8:$mask, (i8 1))))), GR8, sub_8bit>;
4178
4179defm : avx512_store_scalar_lowering_subreg2<"VMOVSSZ", avx512vl_f32_info,
4180                   (v16i1 (insert_subvector
4181                           (v16i1 immAllZerosV),
4182                           (v4i1 (extract_subvector
4183                                  (v8i1 (bitconvert (and GR8:$mask, (i8 1)))),
4184                                  (iPTR 0))),
4185                           (iPTR 0))),
4186                   (v4i1 (extract_subvector
4187                          (v8i1 (bitconvert (and GR8:$mask, (i8 1)))),
4188                          (iPTR 0))), GR8, sub_8bit>;
4189defm : avx512_store_scalar_lowering_subreg2<"VMOVSDZ", avx512vl_f64_info,
4190                   (v8i1
4191                    (extract_subvector
4192                     (v16i1
4193                      (insert_subvector
4194                       (v16i1 immAllZerosV),
4195                       (v2i1 (extract_subvector
4196                              (v8i1 (bitconvert (i8 (and GR8:$mask, (i8 1))))),
4197                              (iPTR 0))),
4198                       (iPTR 0))),
4199                     (iPTR 0))),
4200                   (v2i1 (extract_subvector
4201                          (v8i1 (bitconvert (i8 (and GR8:$mask, (i8 1))))),
4202                          (iPTR 0))), GR8, sub_8bit>;
4203
4204defm : avx512_load_scalar_lowering<"VMOVSSZ", avx512vl_f32_info,
4205                   (v16i1 (bitconvert (i16 (trunc (and GR32:$mask, (i32 1)))))), GR32>;
4206defm : avx512_load_scalar_lowering_subreg<"VMOVSSZ", avx512vl_f32_info,
4207                   (v16i1 (bitconvert (i16 (and GR16:$mask, (i16 1))))), GR16, sub_16bit>;
4208defm : avx512_load_scalar_lowering_subreg<"VMOVSDZ", avx512vl_f64_info,
4209                   (v8i1 (bitconvert (i8 (and GR8:$mask, (i8 1))))), GR8, sub_8bit>;
4210
4211defm : avx512_load_scalar_lowering_subreg2<"VMOVSSZ", avx512vl_f32_info,
4212                   (v16i1 (insert_subvector
4213                           (v16i1 immAllZerosV),
4214                           (v4i1 (extract_subvector
4215                                  (v8i1 (bitconvert (and GR8:$mask, (i8 1)))),
4216                                  (iPTR 0))),
4217                           (iPTR 0))),
4218                   (v4i1 (extract_subvector
4219                          (v8i1 (bitconvert (and GR8:$mask, (i8 1)))),
4220                          (iPTR 0))), GR8, sub_8bit>;
4221defm : avx512_load_scalar_lowering_subreg2<"VMOVSDZ", avx512vl_f64_info,
4222                   (v8i1
4223                    (extract_subvector
4224                     (v16i1
4225                      (insert_subvector
4226                       (v16i1 immAllZerosV),
4227                       (v2i1 (extract_subvector
4228                              (v8i1 (bitconvert (i8 (and GR8:$mask, (i8 1))))),
4229                              (iPTR 0))),
4230                       (iPTR 0))),
4231                     (iPTR 0))),
4232                   (v2i1 (extract_subvector
4233                          (v8i1 (bitconvert (i8 (and GR8:$mask, (i8 1))))),
4234                          (iPTR 0))), GR8, sub_8bit>;
4235
4236def : Pat<(f32 (X86selects VK1WM:$mask, (f32 FR32X:$src1), (f32 FR32X:$src2))),
4237          (COPY_TO_REGCLASS (v4f32 (VMOVSSZrrk
4238           (v4f32 (COPY_TO_REGCLASS FR32X:$src2, VR128X)),
4239           VK1WM:$mask, (v4f32 (IMPLICIT_DEF)),
4240           (v4f32 (COPY_TO_REGCLASS FR32X:$src1, VR128X)))), FR32X)>;
4241
4242def : Pat<(f32 (X86selects VK1WM:$mask, (f32 FR32X:$src1), fp32imm0)),
4243          (COPY_TO_REGCLASS (v4f32 (VMOVSSZrrkz VK1WM:$mask, (v4f32 (IMPLICIT_DEF)),
4244           (v4f32 (COPY_TO_REGCLASS FR32X:$src1, VR128X)))), FR32X)>;
4245
4246def : Pat<(f32 (X86selects VK1WM:$mask, (loadf32 addr:$src), (f32 FR32X:$src0))),
4247          (COPY_TO_REGCLASS
4248           (v4f32 (VMOVSSZrmk (v4f32 (COPY_TO_REGCLASS FR32X:$src0, VR128X)),
4249                                                       VK1WM:$mask, addr:$src)),
4250           FR32X)>;
4251def : Pat<(f32 (X86selects VK1WM:$mask, (loadf32 addr:$src), fp32imm0)),
4252          (COPY_TO_REGCLASS (v4f32 (VMOVSSZrmkz VK1WM:$mask, addr:$src)), FR32X)>;
4253
4254def : Pat<(f64 (X86selects VK1WM:$mask, (f64 FR64X:$src1), (f64 FR64X:$src2))),
4255          (COPY_TO_REGCLASS (v2f64 (VMOVSDZrrk
4256           (v2f64 (COPY_TO_REGCLASS FR64X:$src2, VR128X)),
4257           VK1WM:$mask, (v2f64 (IMPLICIT_DEF)),
4258           (v2f64 (COPY_TO_REGCLASS FR64X:$src1, VR128X)))), FR64X)>;
4259
4260def : Pat<(f64 (X86selects VK1WM:$mask, (f64 FR64X:$src1), fp64imm0)),
4261          (COPY_TO_REGCLASS (v2f64 (VMOVSDZrrkz VK1WM:$mask, (v2f64 (IMPLICIT_DEF)),
4262           (v2f64 (COPY_TO_REGCLASS FR64X:$src1, VR128X)))), FR64X)>;
4263
4264def : Pat<(f64 (X86selects VK1WM:$mask, (loadf64 addr:$src), (f64 FR64X:$src0))),
4265          (COPY_TO_REGCLASS
4266           (v2f64 (VMOVSDZrmk (v2f64 (COPY_TO_REGCLASS FR64X:$src0, VR128X)),
4267                                                       VK1WM:$mask, addr:$src)),
4268           FR64X)>;
4269def : Pat<(f64 (X86selects VK1WM:$mask, (loadf64 addr:$src), fp64imm0)),
4270          (COPY_TO_REGCLASS (v2f64 (VMOVSDZrmkz VK1WM:$mask, addr:$src)), FR64X)>;
4271
4272
4273def : Pat<(v4f32 (X86selects VK1WM:$mask, (v4f32 VR128X:$src1), (v4f32 VR128X:$src2))),
4274          (VMOVSSZrrk VR128X:$src2, VK1WM:$mask, VR128X:$src1, VR128X:$src1)>;
4275def : Pat<(v2f64 (X86selects VK1WM:$mask, (v2f64 VR128X:$src1), (v2f64 VR128X:$src2))),
4276          (VMOVSDZrrk VR128X:$src2, VK1WM:$mask, VR128X:$src1, VR128X:$src1)>;
4277
4278def : Pat<(v4f32 (X86selects VK1WM:$mask, (v4f32 VR128X:$src1), (v4f32 immAllZerosV))),
4279          (VMOVSSZrrkz VK1WM:$mask, VR128X:$src1, VR128X:$src1)>;
4280def : Pat<(v2f64 (X86selects VK1WM:$mask, (v2f64 VR128X:$src1), (v2f64 immAllZerosV))),
4281          (VMOVSDZrrkz VK1WM:$mask, VR128X:$src1, VR128X:$src1)>;
4282
4283let hasSideEffects = 0, isCodeGenOnly = 1, ForceDisassemble = 1 in {
4284  def VMOVSSZrr_REV: AVX512<0x11, MRMDestReg, (outs VR128X:$dst),
4285                           (ins VR128X:$src1, VR128X:$src2),
4286                           "vmovss\t{$src2, $src1, $dst|$dst, $src1, $src2}",
4287                           []>, XS, EVEX_4V, VEX_LIG,
4288                           FoldGenData<"VMOVSSZrr">,
4289                           Sched<[SchedWriteFShuffle.XMM]>;
4290
4291  let Constraints = "$src0 = $dst" in
4292  def VMOVSSZrrk_REV: AVX512<0x11, MRMDestReg, (outs VR128X:$dst),
4293                             (ins f32x_info.RC:$src0, f32x_info.KRCWM:$mask,
4294                                                   VR128X:$src1, VR128X:$src2),
4295                             "vmovss\t{$src2, $src1, $dst {${mask}}|"#
4296                                        "$dst {${mask}}, $src1, $src2}",
4297                             []>, EVEX_K, XS, EVEX_4V, VEX_LIG,
4298                             FoldGenData<"VMOVSSZrrk">,
4299                             Sched<[SchedWriteFShuffle.XMM]>;
4300
4301  def VMOVSSZrrkz_REV: AVX512<0x11, MRMDestReg, (outs VR128X:$dst),
4302                         (ins f32x_info.KRCWM:$mask, VR128X:$src1, VR128X:$src2),
4303                         "vmovss\t{$src2, $src1, $dst {${mask}} {z}|"#
4304                                    "$dst {${mask}} {z}, $src1, $src2}",
4305                         []>, EVEX_KZ, XS, EVEX_4V, VEX_LIG,
4306                         FoldGenData<"VMOVSSZrrkz">,
4307                         Sched<[SchedWriteFShuffle.XMM]>;
4308
4309  def VMOVSDZrr_REV: AVX512<0x11, MRMDestReg, (outs VR128X:$dst),
4310                           (ins VR128X:$src1, VR128X:$src2),
4311                           "vmovsd\t{$src2, $src1, $dst|$dst, $src1, $src2}",
4312                           []>, XD, EVEX_4V, VEX_LIG, VEX_W,
4313                           FoldGenData<"VMOVSDZrr">,
4314                           Sched<[SchedWriteFShuffle.XMM]>;
4315
4316  let Constraints = "$src0 = $dst" in
4317  def VMOVSDZrrk_REV: AVX512<0x11, MRMDestReg, (outs VR128X:$dst),
4318                             (ins f64x_info.RC:$src0, f64x_info.KRCWM:$mask,
4319                                                   VR128X:$src1, VR128X:$src2),
4320                             "vmovsd\t{$src2, $src1, $dst {${mask}}|"#
4321                                        "$dst {${mask}}, $src1, $src2}",
4322                             []>, EVEX_K, XD, EVEX_4V, VEX_LIG,
4323                             VEX_W, FoldGenData<"VMOVSDZrrk">,
4324                             Sched<[SchedWriteFShuffle.XMM]>;
4325
4326  def VMOVSDZrrkz_REV: AVX512<0x11, MRMDestReg, (outs VR128X:$dst),
4327                              (ins f64x_info.KRCWM:$mask, VR128X:$src1,
4328                                                          VR128X:$src2),
4329                              "vmovsd\t{$src2, $src1, $dst {${mask}} {z}|"#
4330                                         "$dst {${mask}} {z}, $src1, $src2}",
4331                              []>, EVEX_KZ, XD, EVEX_4V, VEX_LIG,
4332                              VEX_W, FoldGenData<"VMOVSDZrrkz">,
4333                              Sched<[SchedWriteFShuffle.XMM]>;
4334}
4335
4336def : InstAlias<"vmovss.s\t{$src2, $src1, $dst|$dst, $src1, $src2}",
4337                (VMOVSSZrr_REV VR128X:$dst, VR128X:$src1, VR128X:$src2), 0>;
4338def : InstAlias<"vmovss.s\t{$src2, $src1, $dst {${mask}}|"#
4339                             "$dst {${mask}}, $src1, $src2}",
4340                (VMOVSSZrrk_REV VR128X:$dst, VK1WM:$mask,
4341                                VR128X:$src1, VR128X:$src2), 0>;
4342def : InstAlias<"vmovss.s\t{$src2, $src1, $dst {${mask}} {z}|"#
4343                             "$dst {${mask}} {z}, $src1, $src2}",
4344                (VMOVSSZrrkz_REV VR128X:$dst, VK1WM:$mask,
4345                                 VR128X:$src1, VR128X:$src2), 0>;
4346def : InstAlias<"vmovsd.s\t{$src2, $src1, $dst|$dst, $src1, $src2}",
4347                (VMOVSDZrr_REV VR128X:$dst, VR128X:$src1, VR128X:$src2), 0>;
4348def : InstAlias<"vmovsd.s\t{$src2, $src1, $dst {${mask}}|"#
4349                             "$dst {${mask}}, $src1, $src2}",
4350                (VMOVSDZrrk_REV VR128X:$dst, VK1WM:$mask,
4351                                VR128X:$src1, VR128X:$src2), 0>;
4352def : InstAlias<"vmovsd.s\t{$src2, $src1, $dst {${mask}} {z}|"#
4353                             "$dst {${mask}} {z}, $src1, $src2}",
4354                (VMOVSDZrrkz_REV VR128X:$dst, VK1WM:$mask,
4355                                 VR128X:$src1, VR128X:$src2), 0>;
4356
4357let Predicates = [HasAVX512, OptForSize] in {
4358  def : Pat<(v4f32 (X86vzmovl (v4f32 VR128X:$src))),
4359            (VMOVSSZrr (v4f32 (AVX512_128_SET0)), VR128X:$src)>;
4360  def : Pat<(v4i32 (X86vzmovl (v4i32 VR128X:$src))),
4361            (VMOVSSZrr (v4i32 (AVX512_128_SET0)), VR128X:$src)>;
4362
4363  // Move low f32 and clear high bits.
4364  def : Pat<(v8f32 (X86vzmovl (v8f32 VR256X:$src))),
4365            (SUBREG_TO_REG (i32 0),
4366             (v4f32 (VMOVSSZrr (v4f32 (AVX512_128_SET0)),
4367              (v4f32 (EXTRACT_SUBREG (v8f32 VR256X:$src), sub_xmm)))), sub_xmm)>;
4368  def : Pat<(v8i32 (X86vzmovl (v8i32 VR256X:$src))),
4369            (SUBREG_TO_REG (i32 0),
4370             (v4i32 (VMOVSSZrr (v4i32 (AVX512_128_SET0)),
4371              (v4i32 (EXTRACT_SUBREG (v8i32 VR256X:$src), sub_xmm)))), sub_xmm)>;
4372
4373  def : Pat<(v16f32 (X86vzmovl (v16f32 VR512:$src))),
4374            (SUBREG_TO_REG (i32 0),
4375             (v4f32 (VMOVSSZrr (v4f32 (AVX512_128_SET0)),
4376              (v4f32 (EXTRACT_SUBREG (v16f32 VR512:$src), sub_xmm)))), sub_xmm)>;
4377  def : Pat<(v16i32 (X86vzmovl (v16i32 VR512:$src))),
4378            (SUBREG_TO_REG (i32 0),
4379             (v4i32 (VMOVSSZrr (v4i32 (AVX512_128_SET0)),
4380              (v4i32 (EXTRACT_SUBREG (v16i32 VR512:$src), sub_xmm)))), sub_xmm)>;
4381}
4382
4383// Use 128-bit blends for OptForSpeed since BLENDs have better throughput than
4384// VMOVSS/SD. Unfortunately, loses the ability to use XMM16-31.
4385let Predicates = [HasAVX512, OptForSpeed] in {
4386  def : Pat<(v16f32 (X86vzmovl (v16f32 VR512:$src))),
4387            (SUBREG_TO_REG (i32 0),
4388             (v4f32 (VBLENDPSrri (v4f32 (V_SET0)),
4389                          (v4f32 (EXTRACT_SUBREG (v16f32 VR512:$src), sub_xmm)),
4390                          (i8 1))), sub_xmm)>;
4391  def : Pat<(v16i32 (X86vzmovl (v16i32 VR512:$src))),
4392            (SUBREG_TO_REG (i32 0),
4393             (v4i32 (VPBLENDWrri (v4i32 (V_SET0)),
4394                          (v4i32 (EXTRACT_SUBREG (v16i32 VR512:$src), sub_xmm)),
4395                          (i8 3))), sub_xmm)>;
4396}
4397
4398let Predicates = [HasAVX512] in {
4399  def : Pat<(v4f32 (scalar_to_vector (loadf32 addr:$src))),
4400            (VMOVSSZrm addr:$src)>;
4401  def : Pat<(v2f64 (scalar_to_vector (loadf64 addr:$src))),
4402            (VMOVSDZrm addr:$src)>;
4403
4404  // Represent the same patterns above but in the form they appear for
4405  // 256-bit types
4406  def : Pat<(v8f32 (X86vzload32 addr:$src)),
4407            (SUBREG_TO_REG (i32 0), (VMOVSSZrm addr:$src), sub_xmm)>;
4408  def : Pat<(v4f64 (X86vzload64 addr:$src)),
4409            (SUBREG_TO_REG (i32 0), (VMOVSDZrm addr:$src), sub_xmm)>;
4410
4411  // Represent the same patterns above but in the form they appear for
4412  // 512-bit types
4413  def : Pat<(v16f32 (X86vzload32 addr:$src)),
4414            (SUBREG_TO_REG (i32 0), (VMOVSSZrm addr:$src), sub_xmm)>;
4415  def : Pat<(v8f64 (X86vzload64 addr:$src)),
4416            (SUBREG_TO_REG (i32 0), (VMOVSDZrm addr:$src), sub_xmm)>;
4417}
4418
4419let ExeDomain = SSEPackedInt, SchedRW = [SchedWriteVecLogic.XMM] in {
4420def VMOVZPQILo2PQIZrr : AVX512XSI<0x7E, MRMSrcReg, (outs VR128X:$dst),
4421                                (ins VR128X:$src),
4422                                "vmovq\t{$src, $dst|$dst, $src}",
4423                                [(set VR128X:$dst, (v2i64 (X86vzmovl
4424                                                   (v2i64 VR128X:$src))))]>,
4425                                EVEX, VEX_W;
4426}
4427
4428let Predicates = [HasAVX512] in {
4429  def : Pat<(v4i32 (X86vzmovl (v4i32 (scalar_to_vector GR32:$src)))),
4430            (VMOVDI2PDIZrr GR32:$src)>;
4431
4432  def : Pat<(v2i64 (X86vzmovl (v2i64 (scalar_to_vector GR64:$src)))),
4433            (VMOV64toPQIZrr GR64:$src)>;
4434
4435  // AVX 128-bit movd/movq instruction write zeros in the high 128-bit part.
4436  def : Pat<(v4i32 (X86vzload32 addr:$src)),
4437            (VMOVDI2PDIZrm addr:$src)>;
4438  def : Pat<(v8i32 (X86vzload32 addr:$src)),
4439            (SUBREG_TO_REG (i32 0), (v4i32 (VMOVDI2PDIZrm addr:$src)), sub_xmm)>;
4440  def : Pat<(v2f64 (X86vzmovl (v2f64 VR128X:$src))),
4441            (VMOVZPQILo2PQIZrr VR128X:$src)>;
4442  def : Pat<(v2i64 (X86vzload64 addr:$src)),
4443            (VMOVQI2PQIZrm addr:$src)>;
4444  def : Pat<(v4i64 (X86vzload64 addr:$src)),
4445            (SUBREG_TO_REG (i64 0), (v2i64 (VMOVQI2PQIZrm addr:$src)), sub_xmm)>;
4446
4447  // Use regular 128-bit instructions to match 512-bit scalar_to_vec+zext.
4448  def : Pat<(v16i32 (X86vzload32 addr:$src)),
4449            (SUBREG_TO_REG (i32 0), (v4i32 (VMOVDI2PDIZrm addr:$src)), sub_xmm)>;
4450  def : Pat<(v8i64 (X86vzload64 addr:$src)),
4451            (SUBREG_TO_REG (i64 0), (v2i64 (VMOVQI2PQIZrm addr:$src)), sub_xmm)>;
4452
4453  def : Pat<(v4f64 (X86vzmovl (v4f64 VR256X:$src))),
4454            (SUBREG_TO_REG (i32 0),
4455             (v2f64 (VMOVZPQILo2PQIZrr
4456                     (v2f64 (EXTRACT_SUBREG (v4f64 VR256X:$src), sub_xmm)))),
4457             sub_xmm)>;
4458  def : Pat<(v4i64 (X86vzmovl (v4i64 VR256X:$src))),
4459            (SUBREG_TO_REG (i32 0),
4460             (v2i64 (VMOVZPQILo2PQIZrr
4461                     (v2i64 (EXTRACT_SUBREG (v4i64 VR256X:$src), sub_xmm)))),
4462             sub_xmm)>;
4463
4464  def : Pat<(v8f64 (X86vzmovl (v8f64 VR512:$src))),
4465            (SUBREG_TO_REG (i32 0),
4466             (v2f64 (VMOVZPQILo2PQIZrr
4467                     (v2f64 (EXTRACT_SUBREG (v8f64 VR512:$src), sub_xmm)))),
4468             sub_xmm)>;
4469  def : Pat<(v8i64 (X86vzmovl (v8i64 VR512:$src))),
4470            (SUBREG_TO_REG (i32 0),
4471             (v2i64 (VMOVZPQILo2PQIZrr
4472                     (v2i64 (EXTRACT_SUBREG (v8i64 VR512:$src), sub_xmm)))),
4473             sub_xmm)>;
4474}
4475
4476//===----------------------------------------------------------------------===//
4477// AVX-512 - Non-temporals
4478//===----------------------------------------------------------------------===//
4479
4480def VMOVNTDQAZrm : AVX512PI<0x2A, MRMSrcMem, (outs VR512:$dst),
4481                      (ins i512mem:$src), "vmovntdqa\t{$src, $dst|$dst, $src}",
4482                      [], SSEPackedInt>, Sched<[SchedWriteVecMoveLS.ZMM.RM]>,
4483                      EVEX, T8PD, EVEX_V512, EVEX_CD8<64, CD8VF>;
4484
4485let Predicates = [HasVLX] in {
4486  def VMOVNTDQAZ256rm : AVX512PI<0x2A, MRMSrcMem, (outs VR256X:$dst),
4487                       (ins i256mem:$src),
4488                       "vmovntdqa\t{$src, $dst|$dst, $src}",
4489                       [], SSEPackedInt>, Sched<[SchedWriteVecMoveLS.YMM.RM]>,
4490                       EVEX, T8PD, EVEX_V256, EVEX_CD8<64, CD8VF>;
4491
4492  def VMOVNTDQAZ128rm : AVX512PI<0x2A, MRMSrcMem, (outs VR128X:$dst),
4493                      (ins i128mem:$src),
4494                      "vmovntdqa\t{$src, $dst|$dst, $src}",
4495                      [], SSEPackedInt>, Sched<[SchedWriteVecMoveLS.XMM.RM]>,
4496                      EVEX, T8PD, EVEX_V128, EVEX_CD8<64, CD8VF>;
4497}
4498
4499multiclass avx512_movnt<bits<8> opc, string OpcodeStr, X86VectorVTInfo _,
4500                        X86SchedWriteMoveLS Sched,
4501                        PatFrag st_frag = alignednontemporalstore> {
4502  let SchedRW = [Sched.MR], AddedComplexity = 400 in
4503  def mr : AVX512PI<opc, MRMDestMem, (outs), (ins _.MemOp:$dst, _.RC:$src),
4504                    !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
4505                    [(st_frag (_.VT _.RC:$src), addr:$dst)],
4506                    _.ExeDomain>, EVEX, EVEX_CD8<_.EltSize, CD8VF>;
4507}
4508
4509multiclass avx512_movnt_vl<bits<8> opc, string OpcodeStr,
4510                           AVX512VLVectorVTInfo VTInfo,
4511                           X86SchedWriteMoveLSWidths Sched> {
4512  let Predicates = [HasAVX512] in
4513    defm Z : avx512_movnt<opc, OpcodeStr, VTInfo.info512, Sched.ZMM>, EVEX_V512;
4514
4515  let Predicates = [HasAVX512, HasVLX] in {
4516    defm Z256 : avx512_movnt<opc, OpcodeStr, VTInfo.info256, Sched.YMM>, EVEX_V256;
4517    defm Z128 : avx512_movnt<opc, OpcodeStr, VTInfo.info128, Sched.XMM>, EVEX_V128;
4518  }
4519}
4520
4521defm VMOVNTDQ : avx512_movnt_vl<0xE7, "vmovntdq", avx512vl_i64_info,
4522                                SchedWriteVecMoveLSNT>, PD;
4523defm VMOVNTPD : avx512_movnt_vl<0x2B, "vmovntpd", avx512vl_f64_info,
4524                                SchedWriteFMoveLSNT>, PD, VEX_W;
4525defm VMOVNTPS : avx512_movnt_vl<0x2B, "vmovntps", avx512vl_f32_info,
4526                                SchedWriteFMoveLSNT>, PS;
4527
4528let Predicates = [HasAVX512], AddedComplexity = 400 in {
4529  def : Pat<(alignednontemporalstore (v16i32 VR512:$src), addr:$dst),
4530            (VMOVNTDQZmr addr:$dst, VR512:$src)>;
4531  def : Pat<(alignednontemporalstore (v32i16 VR512:$src), addr:$dst),
4532            (VMOVNTDQZmr addr:$dst, VR512:$src)>;
4533  def : Pat<(alignednontemporalstore (v64i8 VR512:$src), addr:$dst),
4534            (VMOVNTDQZmr addr:$dst, VR512:$src)>;
4535
4536  def : Pat<(v8f64 (alignednontemporalload addr:$src)),
4537            (VMOVNTDQAZrm addr:$src)>;
4538  def : Pat<(v16f32 (alignednontemporalload addr:$src)),
4539            (VMOVNTDQAZrm addr:$src)>;
4540  def : Pat<(v8i64 (alignednontemporalload addr:$src)),
4541            (VMOVNTDQAZrm addr:$src)>;
4542  def : Pat<(v16i32 (alignednontemporalload addr:$src)),
4543            (VMOVNTDQAZrm addr:$src)>;
4544  def : Pat<(v32i16 (alignednontemporalload addr:$src)),
4545            (VMOVNTDQAZrm addr:$src)>;
4546  def : Pat<(v64i8 (alignednontemporalload addr:$src)),
4547            (VMOVNTDQAZrm addr:$src)>;
4548}
4549
4550let Predicates = [HasVLX], AddedComplexity = 400 in {
4551  def : Pat<(alignednontemporalstore (v8i32 VR256X:$src), addr:$dst),
4552            (VMOVNTDQZ256mr addr:$dst, VR256X:$src)>;
4553  def : Pat<(alignednontemporalstore (v16i16 VR256X:$src), addr:$dst),
4554            (VMOVNTDQZ256mr addr:$dst, VR256X:$src)>;
4555  def : Pat<(alignednontemporalstore (v32i8 VR256X:$src), addr:$dst),
4556            (VMOVNTDQZ256mr addr:$dst, VR256X:$src)>;
4557
4558  def : Pat<(v4f64 (alignednontemporalload addr:$src)),
4559            (VMOVNTDQAZ256rm addr:$src)>;
4560  def : Pat<(v8f32 (alignednontemporalload addr:$src)),
4561            (VMOVNTDQAZ256rm addr:$src)>;
4562  def : Pat<(v4i64 (alignednontemporalload addr:$src)),
4563            (VMOVNTDQAZ256rm addr:$src)>;
4564  def : Pat<(v8i32 (alignednontemporalload addr:$src)),
4565            (VMOVNTDQAZ256rm addr:$src)>;
4566  def : Pat<(v16i16 (alignednontemporalload addr:$src)),
4567            (VMOVNTDQAZ256rm addr:$src)>;
4568  def : Pat<(v32i8 (alignednontemporalload addr:$src)),
4569            (VMOVNTDQAZ256rm addr:$src)>;
4570
4571  def : Pat<(alignednontemporalstore (v4i32 VR128X:$src), addr:$dst),
4572            (VMOVNTDQZ128mr addr:$dst, VR128X:$src)>;
4573  def : Pat<(alignednontemporalstore (v8i16 VR128X:$src), addr:$dst),
4574            (VMOVNTDQZ128mr addr:$dst, VR128X:$src)>;
4575  def : Pat<(alignednontemporalstore (v16i8 VR128X:$src), addr:$dst),
4576            (VMOVNTDQZ128mr addr:$dst, VR128X:$src)>;
4577
4578  def : Pat<(v2f64 (alignednontemporalload addr:$src)),
4579            (VMOVNTDQAZ128rm addr:$src)>;
4580  def : Pat<(v4f32 (alignednontemporalload addr:$src)),
4581            (VMOVNTDQAZ128rm addr:$src)>;
4582  def : Pat<(v2i64 (alignednontemporalload addr:$src)),
4583            (VMOVNTDQAZ128rm addr:$src)>;
4584  def : Pat<(v4i32 (alignednontemporalload addr:$src)),
4585            (VMOVNTDQAZ128rm addr:$src)>;
4586  def : Pat<(v8i16 (alignednontemporalload addr:$src)),
4587            (VMOVNTDQAZ128rm addr:$src)>;
4588  def : Pat<(v16i8 (alignednontemporalload addr:$src)),
4589            (VMOVNTDQAZ128rm addr:$src)>;
4590}
4591
4592//===----------------------------------------------------------------------===//
4593// AVX-512 - Integer arithmetic
4594//
4595multiclass avx512_binop_rm<bits<8> opc, string OpcodeStr, SDNode OpNode,
4596                           X86VectorVTInfo _, X86FoldableSchedWrite sched,
4597                           bit IsCommutable = 0> {
4598  defm rr : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
4599                    (ins _.RC:$src1, _.RC:$src2), OpcodeStr,
4600                    "$src2, $src1", "$src1, $src2",
4601                    (_.VT (OpNode _.RC:$src1, _.RC:$src2)),
4602                    IsCommutable, IsCommutable>, AVX512BIBase, EVEX_4V,
4603                    Sched<[sched]>;
4604
4605  defm rm : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
4606                  (ins _.RC:$src1, _.MemOp:$src2), OpcodeStr,
4607                  "$src2, $src1", "$src1, $src2",
4608                  (_.VT (OpNode _.RC:$src1, (_.LdFrag addr:$src2)))>,
4609                  AVX512BIBase, EVEX_4V,
4610                  Sched<[sched.Folded, sched.ReadAfterFold]>;
4611}
4612
4613multiclass avx512_binop_rmb<bits<8> opc, string OpcodeStr, SDNode OpNode,
4614                            X86VectorVTInfo _, X86FoldableSchedWrite sched,
4615                            bit IsCommutable = 0> :
4616           avx512_binop_rm<opc, OpcodeStr, OpNode, _, sched, IsCommutable> {
4617  defm rmb : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
4618                  (ins _.RC:$src1, _.ScalarMemOp:$src2), OpcodeStr,
4619                  "${src2}"#_.BroadcastStr#", $src1",
4620                  "$src1, ${src2}"#_.BroadcastStr,
4621                  (_.VT (OpNode _.RC:$src1,
4622                                (_.BroadcastLdFrag addr:$src2)))>,
4623                  AVX512BIBase, EVEX_4V, EVEX_B,
4624                  Sched<[sched.Folded, sched.ReadAfterFold]>;
4625}
4626
4627multiclass avx512_binop_rm_vl<bits<8> opc, string OpcodeStr, SDNode OpNode,
4628                              AVX512VLVectorVTInfo VTInfo,
4629                              X86SchedWriteWidths sched, Predicate prd,
4630                              bit IsCommutable = 0> {
4631  let Predicates = [prd] in
4632    defm Z : avx512_binop_rm<opc, OpcodeStr, OpNode, VTInfo.info512, sched.ZMM,
4633                             IsCommutable>, EVEX_V512;
4634
4635  let Predicates = [prd, HasVLX] in {
4636    defm Z256 : avx512_binop_rm<opc, OpcodeStr, OpNode, VTInfo.info256,
4637                                sched.YMM, IsCommutable>, EVEX_V256;
4638    defm Z128 : avx512_binop_rm<opc, OpcodeStr, OpNode, VTInfo.info128,
4639                                sched.XMM, IsCommutable>, EVEX_V128;
4640  }
4641}
4642
4643multiclass avx512_binop_rmb_vl<bits<8> opc, string OpcodeStr, SDNode OpNode,
4644                               AVX512VLVectorVTInfo VTInfo,
4645                               X86SchedWriteWidths sched, Predicate prd,
4646                               bit IsCommutable = 0> {
4647  let Predicates = [prd] in
4648    defm Z : avx512_binop_rmb<opc, OpcodeStr, OpNode, VTInfo.info512, sched.ZMM,
4649                             IsCommutable>, EVEX_V512;
4650
4651  let Predicates = [prd, HasVLX] in {
4652    defm Z256 : avx512_binop_rmb<opc, OpcodeStr, OpNode, VTInfo.info256,
4653                                 sched.YMM, IsCommutable>, EVEX_V256;
4654    defm Z128 : avx512_binop_rmb<opc, OpcodeStr, OpNode, VTInfo.info128,
4655                                 sched.XMM, IsCommutable>, EVEX_V128;
4656  }
4657}
4658
4659multiclass avx512_binop_rm_vl_q<bits<8> opc, string OpcodeStr, SDNode OpNode,
4660                                X86SchedWriteWidths sched, Predicate prd,
4661                                bit IsCommutable = 0> {
4662  defm NAME : avx512_binop_rmb_vl<opc, OpcodeStr, OpNode, avx512vl_i64_info,
4663                                  sched, prd, IsCommutable>,
4664                                  VEX_W, EVEX_CD8<64, CD8VF>;
4665}
4666
4667multiclass avx512_binop_rm_vl_d<bits<8> opc, string OpcodeStr, SDNode OpNode,
4668                                X86SchedWriteWidths sched, Predicate prd,
4669                                bit IsCommutable = 0> {
4670  defm NAME : avx512_binop_rmb_vl<opc, OpcodeStr, OpNode, avx512vl_i32_info,
4671                                  sched, prd, IsCommutable>, EVEX_CD8<32, CD8VF>;
4672}
4673
4674multiclass avx512_binop_rm_vl_w<bits<8> opc, string OpcodeStr, SDNode OpNode,
4675                                X86SchedWriteWidths sched, Predicate prd,
4676                                bit IsCommutable = 0> {
4677  defm NAME : avx512_binop_rm_vl<opc, OpcodeStr, OpNode, avx512vl_i16_info,
4678                                 sched, prd, IsCommutable>, EVEX_CD8<16, CD8VF>,
4679                                 VEX_WIG;
4680}
4681
4682multiclass avx512_binop_rm_vl_b<bits<8> opc, string OpcodeStr, SDNode OpNode,
4683                                X86SchedWriteWidths sched, Predicate prd,
4684                                bit IsCommutable = 0> {
4685  defm NAME : avx512_binop_rm_vl<opc, OpcodeStr, OpNode, avx512vl_i8_info,
4686                                 sched, prd, IsCommutable>, EVEX_CD8<8, CD8VF>,
4687                                 VEX_WIG;
4688}
4689
4690multiclass avx512_binop_rm_vl_dq<bits<8> opc_d, bits<8> opc_q, string OpcodeStr,
4691                                 SDNode OpNode, X86SchedWriteWidths sched,
4692                                 Predicate prd, bit IsCommutable = 0> {
4693  defm Q : avx512_binop_rm_vl_q<opc_q, OpcodeStr#"q", OpNode, sched, prd,
4694                                   IsCommutable>;
4695
4696  defm D : avx512_binop_rm_vl_d<opc_d, OpcodeStr#"d", OpNode, sched, prd,
4697                                   IsCommutable>;
4698}
4699
4700multiclass avx512_binop_rm_vl_bw<bits<8> opc_b, bits<8> opc_w, string OpcodeStr,
4701                                 SDNode OpNode, X86SchedWriteWidths sched,
4702                                 Predicate prd, bit IsCommutable = 0> {
4703  defm W : avx512_binop_rm_vl_w<opc_w, OpcodeStr#"w", OpNode, sched, prd,
4704                                   IsCommutable>;
4705
4706  defm B : avx512_binop_rm_vl_b<opc_b, OpcodeStr#"b", OpNode, sched, prd,
4707                                   IsCommutable>;
4708}
4709
4710multiclass avx512_binop_rm_vl_all<bits<8> opc_b, bits<8> opc_w,
4711                                  bits<8> opc_d, bits<8> opc_q,
4712                                  string OpcodeStr, SDNode OpNode,
4713                                  X86SchedWriteWidths sched,
4714                                  bit IsCommutable = 0> {
4715  defm NAME : avx512_binop_rm_vl_dq<opc_d, opc_q, OpcodeStr, OpNode,
4716                                    sched, HasAVX512, IsCommutable>,
4717              avx512_binop_rm_vl_bw<opc_b, opc_w, OpcodeStr, OpNode,
4718                                    sched, HasBWI, IsCommutable>;
4719}
4720
4721multiclass avx512_binop_rm2<bits<8> opc, string OpcodeStr,
4722                            X86FoldableSchedWrite sched,
4723                            SDNode OpNode,X86VectorVTInfo _Src,
4724                            X86VectorVTInfo _Dst, X86VectorVTInfo _Brdct,
4725                            bit IsCommutable = 0> {
4726  defm rr : AVX512_maskable<opc, MRMSrcReg, _Dst, (outs _Dst.RC:$dst),
4727                            (ins _Src.RC:$src1, _Src.RC:$src2), OpcodeStr,
4728                            "$src2, $src1","$src1, $src2",
4729                            (_Dst.VT (OpNode
4730                                         (_Src.VT _Src.RC:$src1),
4731                                         (_Src.VT _Src.RC:$src2))),
4732                            IsCommutable>,
4733                            AVX512BIBase, EVEX_4V, Sched<[sched]>;
4734  defm rm : AVX512_maskable<opc, MRMSrcMem, _Dst, (outs _Dst.RC:$dst),
4735                        (ins _Src.RC:$src1, _Src.MemOp:$src2), OpcodeStr,
4736                        "$src2, $src1", "$src1, $src2",
4737                        (_Dst.VT (OpNode (_Src.VT _Src.RC:$src1),
4738                                      (_Src.LdFrag addr:$src2)))>,
4739                        AVX512BIBase, EVEX_4V,
4740                        Sched<[sched.Folded, sched.ReadAfterFold]>;
4741
4742  defm rmb : AVX512_maskable<opc, MRMSrcMem, _Dst, (outs _Dst.RC:$dst),
4743                    (ins _Src.RC:$src1, _Brdct.ScalarMemOp:$src2),
4744                    OpcodeStr,
4745                    "${src2}"#_Brdct.BroadcastStr#", $src1",
4746                     "$src1, ${src2}"#_Brdct.BroadcastStr,
4747                    (_Dst.VT (OpNode (_Src.VT _Src.RC:$src1), (bitconvert
4748                                 (_Brdct.VT (_Brdct.BroadcastLdFrag addr:$src2)))))>,
4749                    AVX512BIBase, EVEX_4V, EVEX_B,
4750                    Sched<[sched.Folded, sched.ReadAfterFold]>;
4751}
4752
4753defm VPADD : avx512_binop_rm_vl_all<0xFC, 0xFD, 0xFE, 0xD4, "vpadd", add,
4754                                    SchedWriteVecALU, 1>;
4755defm VPSUB : avx512_binop_rm_vl_all<0xF8, 0xF9, 0xFA, 0xFB, "vpsub", sub,
4756                                    SchedWriteVecALU, 0>;
4757defm VPADDS : avx512_binop_rm_vl_bw<0xEC, 0xED, "vpadds", saddsat,
4758                                    SchedWriteVecALU, HasBWI, 1>;
4759defm VPSUBS : avx512_binop_rm_vl_bw<0xE8, 0xE9, "vpsubs", ssubsat,
4760                                    SchedWriteVecALU, HasBWI, 0>;
4761defm VPADDUS : avx512_binop_rm_vl_bw<0xDC, 0xDD, "vpaddus", uaddsat,
4762                                     SchedWriteVecALU, HasBWI, 1>;
4763defm VPSUBUS : avx512_binop_rm_vl_bw<0xD8, 0xD9, "vpsubus", usubsat,
4764                                     SchedWriteVecALU, HasBWI, 0>;
4765defm VPMULLD : avx512_binop_rm_vl_d<0x40, "vpmulld", mul,
4766                                    SchedWritePMULLD, HasAVX512, 1>, T8PD;
4767defm VPMULLW : avx512_binop_rm_vl_w<0xD5, "vpmullw", mul,
4768                                    SchedWriteVecIMul, HasBWI, 1>;
4769defm VPMULLQ : avx512_binop_rm_vl_q<0x40, "vpmullq", mul,
4770                                    SchedWriteVecIMul, HasDQI, 1>, T8PD,
4771                                    NotEVEX2VEXConvertible;
4772defm VPMULHW : avx512_binop_rm_vl_w<0xE5, "vpmulhw", mulhs, SchedWriteVecIMul,
4773                                    HasBWI, 1>;
4774defm VPMULHUW : avx512_binop_rm_vl_w<0xE4, "vpmulhuw", mulhu, SchedWriteVecIMul,
4775                                     HasBWI, 1>;
4776defm VPMULHRSW : avx512_binop_rm_vl_w<0x0B, "vpmulhrsw", X86mulhrs,
4777                                      SchedWriteVecIMul, HasBWI, 1>, T8PD;
4778defm VPAVG : avx512_binop_rm_vl_bw<0xE0, 0xE3, "vpavg", X86avg,
4779                                   SchedWriteVecALU, HasBWI, 1>;
4780defm VPMULDQ : avx512_binop_rm_vl_q<0x28, "vpmuldq", X86pmuldq,
4781                                    SchedWriteVecIMul, HasAVX512, 1>, T8PD;
4782defm VPMULUDQ : avx512_binop_rm_vl_q<0xF4, "vpmuludq", X86pmuludq,
4783                                     SchedWriteVecIMul, HasAVX512, 1>;
4784
4785multiclass avx512_binop_all<bits<8> opc, string OpcodeStr,
4786                            X86SchedWriteWidths sched,
4787                            AVX512VLVectorVTInfo _SrcVTInfo,
4788                            AVX512VLVectorVTInfo _DstVTInfo,
4789                            SDNode OpNode, Predicate prd,  bit IsCommutable = 0> {
4790  let Predicates = [prd] in
4791    defm NAME#Z : avx512_binop_rm2<opc, OpcodeStr, sched.ZMM, OpNode,
4792                                 _SrcVTInfo.info512, _DstVTInfo.info512,
4793                                 v8i64_info, IsCommutable>,
4794                                  EVEX_V512, EVEX_CD8<64, CD8VF>, VEX_W;
4795  let Predicates = [HasVLX, prd] in {
4796    defm NAME#Z256 : avx512_binop_rm2<opc, OpcodeStr, sched.YMM, OpNode,
4797                                      _SrcVTInfo.info256, _DstVTInfo.info256,
4798                                      v4i64x_info, IsCommutable>,
4799                                      EVEX_V256, EVEX_CD8<64, CD8VF>, VEX_W;
4800    defm NAME#Z128 : avx512_binop_rm2<opc, OpcodeStr, sched.XMM, OpNode,
4801                                      _SrcVTInfo.info128, _DstVTInfo.info128,
4802                                      v2i64x_info, IsCommutable>,
4803                                     EVEX_V128, EVEX_CD8<64, CD8VF>, VEX_W;
4804  }
4805}
4806
4807defm VPMULTISHIFTQB : avx512_binop_all<0x83, "vpmultishiftqb", SchedWriteVecALU,
4808                                avx512vl_i8_info, avx512vl_i8_info,
4809                                X86multishift, HasVBMI, 0>, T8PD;
4810
4811multiclass avx512_packs_rmb<bits<8> opc, string OpcodeStr, SDNode OpNode,
4812                            X86VectorVTInfo _Src, X86VectorVTInfo _Dst,
4813                            X86FoldableSchedWrite sched> {
4814  defm rmb : AVX512_maskable<opc, MRMSrcMem, _Dst, (outs _Dst.RC:$dst),
4815                    (ins _Src.RC:$src1, _Src.ScalarMemOp:$src2),
4816                    OpcodeStr,
4817                    "${src2}"#_Src.BroadcastStr#", $src1",
4818                     "$src1, ${src2}"#_Src.BroadcastStr,
4819                    (_Dst.VT (OpNode (_Src.VT _Src.RC:$src1), (bitconvert
4820                                 (_Src.VT (_Src.BroadcastLdFrag addr:$src2)))))>,
4821                    EVEX_4V, EVEX_B, EVEX_CD8<_Src.EltSize, CD8VF>,
4822                    Sched<[sched.Folded, sched.ReadAfterFold]>;
4823}
4824
4825multiclass avx512_packs_rm<bits<8> opc, string OpcodeStr,
4826                            SDNode OpNode,X86VectorVTInfo _Src,
4827                            X86VectorVTInfo _Dst, X86FoldableSchedWrite sched,
4828                            bit IsCommutable = 0> {
4829  defm rr : AVX512_maskable<opc, MRMSrcReg, _Dst, (outs _Dst.RC:$dst),
4830                            (ins _Src.RC:$src1, _Src.RC:$src2), OpcodeStr,
4831                            "$src2, $src1","$src1, $src2",
4832                            (_Dst.VT (OpNode
4833                                         (_Src.VT _Src.RC:$src1),
4834                                         (_Src.VT _Src.RC:$src2))),
4835                            IsCommutable, IsCommutable>,
4836                            EVEX_CD8<_Src.EltSize, CD8VF>, EVEX_4V, Sched<[sched]>;
4837  defm rm : AVX512_maskable<opc, MRMSrcMem, _Dst, (outs _Dst.RC:$dst),
4838                        (ins _Src.RC:$src1, _Src.MemOp:$src2), OpcodeStr,
4839                        "$src2, $src1", "$src1, $src2",
4840                        (_Dst.VT (OpNode (_Src.VT _Src.RC:$src1),
4841                                      (_Src.LdFrag addr:$src2)))>,
4842                         EVEX_4V, EVEX_CD8<_Src.EltSize, CD8VF>,
4843                         Sched<[sched.Folded, sched.ReadAfterFold]>;
4844}
4845
4846multiclass avx512_packs_all_i32_i16<bits<8> opc, string OpcodeStr,
4847                                    SDNode OpNode> {
4848  let Predicates = [HasBWI] in
4849  defm NAME#Z : avx512_packs_rm<opc, OpcodeStr, OpNode, v16i32_info,
4850                                 v32i16_info, SchedWriteShuffle.ZMM>,
4851                avx512_packs_rmb<opc, OpcodeStr, OpNode, v16i32_info,
4852                                 v32i16_info, SchedWriteShuffle.ZMM>, EVEX_V512;
4853  let Predicates = [HasBWI, HasVLX] in {
4854    defm NAME#Z256 : avx512_packs_rm<opc, OpcodeStr, OpNode, v8i32x_info,
4855                                     v16i16x_info, SchedWriteShuffle.YMM>,
4856                     avx512_packs_rmb<opc, OpcodeStr, OpNode, v8i32x_info,
4857                                      v16i16x_info, SchedWriteShuffle.YMM>,
4858                                      EVEX_V256;
4859    defm NAME#Z128 : avx512_packs_rm<opc, OpcodeStr, OpNode, v4i32x_info,
4860                                     v8i16x_info, SchedWriteShuffle.XMM>,
4861                     avx512_packs_rmb<opc, OpcodeStr, OpNode, v4i32x_info,
4862                                      v8i16x_info, SchedWriteShuffle.XMM>,
4863                                      EVEX_V128;
4864  }
4865}
4866multiclass avx512_packs_all_i16_i8<bits<8> opc, string OpcodeStr,
4867                            SDNode OpNode> {
4868  let Predicates = [HasBWI] in
4869  defm NAME#Z : avx512_packs_rm<opc, OpcodeStr, OpNode, v32i16_info, v64i8_info,
4870                                SchedWriteShuffle.ZMM>, EVEX_V512, VEX_WIG;
4871  let Predicates = [HasBWI, HasVLX] in {
4872    defm NAME#Z256 : avx512_packs_rm<opc, OpcodeStr, OpNode, v16i16x_info,
4873                                     v32i8x_info, SchedWriteShuffle.YMM>,
4874                                     EVEX_V256, VEX_WIG;
4875    defm NAME#Z128 : avx512_packs_rm<opc, OpcodeStr, OpNode, v8i16x_info,
4876                                     v16i8x_info, SchedWriteShuffle.XMM>,
4877                                     EVEX_V128, VEX_WIG;
4878  }
4879}
4880
4881multiclass avx512_vpmadd<bits<8> opc, string OpcodeStr,
4882                            SDNode OpNode, AVX512VLVectorVTInfo _Src,
4883                            AVX512VLVectorVTInfo _Dst, bit IsCommutable = 0> {
4884  let Predicates = [HasBWI] in
4885  defm NAME#Z : avx512_packs_rm<opc, OpcodeStr, OpNode, _Src.info512,
4886                                _Dst.info512, SchedWriteVecIMul.ZMM,
4887                                IsCommutable>, EVEX_V512;
4888  let Predicates = [HasBWI, HasVLX] in {
4889    defm NAME#Z256 : avx512_packs_rm<opc, OpcodeStr, OpNode, _Src.info256,
4890                                     _Dst.info256, SchedWriteVecIMul.YMM,
4891                                     IsCommutable>, EVEX_V256;
4892    defm NAME#Z128 : avx512_packs_rm<opc, OpcodeStr, OpNode, _Src.info128,
4893                                     _Dst.info128, SchedWriteVecIMul.XMM,
4894                                     IsCommutable>, EVEX_V128;
4895  }
4896}
4897
4898defm VPACKSSDW : avx512_packs_all_i32_i16<0x6B, "vpackssdw", X86Packss>, AVX512BIBase;
4899defm VPACKUSDW : avx512_packs_all_i32_i16<0x2b, "vpackusdw", X86Packus>, AVX5128IBase;
4900defm VPACKSSWB : avx512_packs_all_i16_i8 <0x63, "vpacksswb", X86Packss>, AVX512BIBase;
4901defm VPACKUSWB : avx512_packs_all_i16_i8 <0x67, "vpackuswb", X86Packus>, AVX512BIBase;
4902
4903defm VPMADDUBSW : avx512_vpmadd<0x04, "vpmaddubsw", X86vpmaddubsw,
4904                     avx512vl_i8_info, avx512vl_i16_info>, AVX512BIBase, T8PD, VEX_WIG;
4905defm VPMADDWD   : avx512_vpmadd<0xF5, "vpmaddwd", X86vpmaddwd,
4906                     avx512vl_i16_info, avx512vl_i32_info, 1>, AVX512BIBase, VEX_WIG;
4907
4908defm VPMAXSB : avx512_binop_rm_vl_b<0x3C, "vpmaxsb", smax,
4909                                    SchedWriteVecALU, HasBWI, 1>, T8PD;
4910defm VPMAXSW : avx512_binop_rm_vl_w<0xEE, "vpmaxsw", smax,
4911                                    SchedWriteVecALU, HasBWI, 1>;
4912defm VPMAXSD : avx512_binop_rm_vl_d<0x3D, "vpmaxsd", smax,
4913                                    SchedWriteVecALU, HasAVX512, 1>, T8PD;
4914defm VPMAXSQ : avx512_binop_rm_vl_q<0x3D, "vpmaxsq", smax,
4915                                    SchedWriteVecALU, HasAVX512, 1>, T8PD,
4916                                    NotEVEX2VEXConvertible;
4917
4918defm VPMAXUB : avx512_binop_rm_vl_b<0xDE, "vpmaxub", umax,
4919                                    SchedWriteVecALU, HasBWI, 1>;
4920defm VPMAXUW : avx512_binop_rm_vl_w<0x3E, "vpmaxuw", umax,
4921                                    SchedWriteVecALU, HasBWI, 1>, T8PD;
4922defm VPMAXUD : avx512_binop_rm_vl_d<0x3F, "vpmaxud", umax,
4923                                    SchedWriteVecALU, HasAVX512, 1>, T8PD;
4924defm VPMAXUQ : avx512_binop_rm_vl_q<0x3F, "vpmaxuq", umax,
4925                                    SchedWriteVecALU, HasAVX512, 1>, T8PD,
4926                                    NotEVEX2VEXConvertible;
4927
4928defm VPMINSB : avx512_binop_rm_vl_b<0x38, "vpminsb", smin,
4929                                    SchedWriteVecALU, HasBWI, 1>, T8PD;
4930defm VPMINSW : avx512_binop_rm_vl_w<0xEA, "vpminsw", smin,
4931                                    SchedWriteVecALU, HasBWI, 1>;
4932defm VPMINSD : avx512_binop_rm_vl_d<0x39, "vpminsd", smin,
4933                                    SchedWriteVecALU, HasAVX512, 1>, T8PD;
4934defm VPMINSQ : avx512_binop_rm_vl_q<0x39, "vpminsq", smin,
4935                                    SchedWriteVecALU, HasAVX512, 1>, T8PD,
4936                                    NotEVEX2VEXConvertible;
4937
4938defm VPMINUB : avx512_binop_rm_vl_b<0xDA, "vpminub", umin,
4939                                    SchedWriteVecALU, HasBWI, 1>;
4940defm VPMINUW : avx512_binop_rm_vl_w<0x3A, "vpminuw", umin,
4941                                    SchedWriteVecALU, HasBWI, 1>, T8PD;
4942defm VPMINUD : avx512_binop_rm_vl_d<0x3B, "vpminud", umin,
4943                                    SchedWriteVecALU, HasAVX512, 1>, T8PD;
4944defm VPMINUQ : avx512_binop_rm_vl_q<0x3B, "vpminuq", umin,
4945                                    SchedWriteVecALU, HasAVX512, 1>, T8PD,
4946                                    NotEVEX2VEXConvertible;
4947
4948// PMULLQ: Use 512bit version to implement 128/256 bit in case NoVLX.
4949let Predicates = [HasDQI, NoVLX] in {
4950  def : Pat<(v4i64 (mul (v4i64 VR256X:$src1), (v4i64 VR256X:$src2))),
4951            (EXTRACT_SUBREG
4952                (VPMULLQZrr
4953                    (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src1, sub_ymm),
4954                    (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src2, sub_ymm)),
4955             sub_ymm)>;
4956  def : Pat<(v4i64 (mul (v4i64 VR256X:$src1), (v4i64 (X86VBroadcastld64 addr:$src2)))),
4957            (EXTRACT_SUBREG
4958                (VPMULLQZrmb
4959                    (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src1, sub_ymm),
4960                    addr:$src2),
4961             sub_ymm)>;
4962
4963  def : Pat<(v2i64 (mul (v2i64 VR128X:$src1), (v2i64 VR128X:$src2))),
4964            (EXTRACT_SUBREG
4965                (VPMULLQZrr
4966                    (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src1, sub_xmm),
4967                    (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src2, sub_xmm)),
4968             sub_xmm)>;
4969  def : Pat<(v2i64 (mul (v2i64 VR128X:$src1), (v2i64 (X86VBroadcastld64 addr:$src2)))),
4970            (EXTRACT_SUBREG
4971                (VPMULLQZrmb
4972                    (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src1, sub_xmm),
4973                    addr:$src2),
4974             sub_xmm)>;
4975}
4976
4977multiclass avx512_min_max_lowering<string Instr, SDNode OpNode> {
4978  def : Pat<(v4i64 (OpNode VR256X:$src1, VR256X:$src2)),
4979            (EXTRACT_SUBREG
4980                (!cast<Instruction>(Instr#"rr")
4981                    (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src1, sub_ymm),
4982                    (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src2, sub_ymm)),
4983             sub_ymm)>;
4984  def : Pat<(v4i64 (OpNode (v4i64 VR256X:$src1), (v4i64 (X86VBroadcastld64 addr:$src2)))),
4985            (EXTRACT_SUBREG
4986                (!cast<Instruction>(Instr#"rmb")
4987                    (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src1, sub_ymm),
4988                    addr:$src2),
4989             sub_ymm)>;
4990
4991  def : Pat<(v2i64 (OpNode VR128X:$src1, VR128X:$src2)),
4992            (EXTRACT_SUBREG
4993                (!cast<Instruction>(Instr#"rr")
4994                    (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src1, sub_xmm),
4995                    (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src2, sub_xmm)),
4996             sub_xmm)>;
4997  def : Pat<(v2i64 (OpNode (v2i64 VR128X:$src1), (v2i64 (X86VBroadcastld64 addr:$src2)))),
4998            (EXTRACT_SUBREG
4999                (!cast<Instruction>(Instr#"rmb")
5000                    (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src1, sub_xmm),
5001                    addr:$src2),
5002             sub_xmm)>;
5003}
5004
5005let Predicates = [HasAVX512, NoVLX] in {
5006  defm : avx512_min_max_lowering<"VPMAXUQZ", umax>;
5007  defm : avx512_min_max_lowering<"VPMINUQZ", umin>;
5008  defm : avx512_min_max_lowering<"VPMAXSQZ", smax>;
5009  defm : avx512_min_max_lowering<"VPMINSQZ", smin>;
5010}
5011
5012//===----------------------------------------------------------------------===//
5013// AVX-512  Logical Instructions
5014//===----------------------------------------------------------------------===//
5015
5016defm VPAND : avx512_binop_rm_vl_dq<0xDB, 0xDB, "vpand", and,
5017                                   SchedWriteVecLogic, HasAVX512, 1>;
5018defm VPOR : avx512_binop_rm_vl_dq<0xEB, 0xEB, "vpor", or,
5019                                  SchedWriteVecLogic, HasAVX512, 1>;
5020defm VPXOR : avx512_binop_rm_vl_dq<0xEF, 0xEF, "vpxor", xor,
5021                                   SchedWriteVecLogic, HasAVX512, 1>;
5022defm VPANDN : avx512_binop_rm_vl_dq<0xDF, 0xDF, "vpandn", X86andnp,
5023                                    SchedWriteVecLogic, HasAVX512>;
5024
5025let Predicates = [HasVLX] in {
5026  def : Pat<(v16i8 (and VR128X:$src1, VR128X:$src2)),
5027            (VPANDQZ128rr VR128X:$src1, VR128X:$src2)>;
5028  def : Pat<(v8i16 (and VR128X:$src1, VR128X:$src2)),
5029            (VPANDQZ128rr VR128X:$src1, VR128X:$src2)>;
5030
5031  def : Pat<(v16i8 (or VR128X:$src1, VR128X:$src2)),
5032            (VPORQZ128rr VR128X:$src1, VR128X:$src2)>;
5033  def : Pat<(v8i16 (or VR128X:$src1, VR128X:$src2)),
5034            (VPORQZ128rr VR128X:$src1, VR128X:$src2)>;
5035
5036  def : Pat<(v16i8 (xor VR128X:$src1, VR128X:$src2)),
5037            (VPXORQZ128rr VR128X:$src1, VR128X:$src2)>;
5038  def : Pat<(v8i16 (xor VR128X:$src1, VR128X:$src2)),
5039            (VPXORQZ128rr VR128X:$src1, VR128X:$src2)>;
5040
5041  def : Pat<(v16i8 (X86andnp VR128X:$src1, VR128X:$src2)),
5042            (VPANDNQZ128rr VR128X:$src1, VR128X:$src2)>;
5043  def : Pat<(v8i16 (X86andnp VR128X:$src1, VR128X:$src2)),
5044            (VPANDNQZ128rr VR128X:$src1, VR128X:$src2)>;
5045
5046  def : Pat<(and VR128X:$src1, (loadv16i8 addr:$src2)),
5047            (VPANDQZ128rm VR128X:$src1, addr:$src2)>;
5048  def : Pat<(and VR128X:$src1, (loadv8i16 addr:$src2)),
5049            (VPANDQZ128rm VR128X:$src1, addr:$src2)>;
5050
5051  def : Pat<(or VR128X:$src1, (loadv16i8 addr:$src2)),
5052            (VPORQZ128rm VR128X:$src1, addr:$src2)>;
5053  def : Pat<(or VR128X:$src1, (loadv8i16 addr:$src2)),
5054            (VPORQZ128rm VR128X:$src1, addr:$src2)>;
5055
5056  def : Pat<(xor VR128X:$src1, (loadv16i8 addr:$src2)),
5057            (VPXORQZ128rm VR128X:$src1, addr:$src2)>;
5058  def : Pat<(xor VR128X:$src1, (loadv8i16 addr:$src2)),
5059            (VPXORQZ128rm VR128X:$src1, addr:$src2)>;
5060
5061  def : Pat<(X86andnp VR128X:$src1, (loadv16i8 addr:$src2)),
5062            (VPANDNQZ128rm VR128X:$src1, addr:$src2)>;
5063  def : Pat<(X86andnp VR128X:$src1, (loadv8i16 addr:$src2)),
5064            (VPANDNQZ128rm VR128X:$src1, addr:$src2)>;
5065
5066  def : Pat<(v32i8 (and VR256X:$src1, VR256X:$src2)),
5067            (VPANDQZ256rr VR256X:$src1, VR256X:$src2)>;
5068  def : Pat<(v16i16 (and VR256X:$src1, VR256X:$src2)),
5069            (VPANDQZ256rr VR256X:$src1, VR256X:$src2)>;
5070
5071  def : Pat<(v32i8 (or VR256X:$src1, VR256X:$src2)),
5072            (VPORQZ256rr VR256X:$src1, VR256X:$src2)>;
5073  def : Pat<(v16i16 (or VR256X:$src1, VR256X:$src2)),
5074            (VPORQZ256rr VR256X:$src1, VR256X:$src2)>;
5075
5076  def : Pat<(v32i8 (xor VR256X:$src1, VR256X:$src2)),
5077            (VPXORQZ256rr VR256X:$src1, VR256X:$src2)>;
5078  def : Pat<(v16i16 (xor VR256X:$src1, VR256X:$src2)),
5079            (VPXORQZ256rr VR256X:$src1, VR256X:$src2)>;
5080
5081  def : Pat<(v32i8 (X86andnp VR256X:$src1, VR256X:$src2)),
5082            (VPANDNQZ256rr VR256X:$src1, VR256X:$src2)>;
5083  def : Pat<(v16i16 (X86andnp VR256X:$src1, VR256X:$src2)),
5084            (VPANDNQZ256rr VR256X:$src1, VR256X:$src2)>;
5085
5086  def : Pat<(and VR256X:$src1, (loadv32i8 addr:$src2)),
5087            (VPANDQZ256rm VR256X:$src1, addr:$src2)>;
5088  def : Pat<(and VR256X:$src1, (loadv16i16 addr:$src2)),
5089            (VPANDQZ256rm VR256X:$src1, addr:$src2)>;
5090
5091  def : Pat<(or VR256X:$src1, (loadv32i8 addr:$src2)),
5092            (VPORQZ256rm VR256X:$src1, addr:$src2)>;
5093  def : Pat<(or VR256X:$src1, (loadv16i16 addr:$src2)),
5094            (VPORQZ256rm VR256X:$src1, addr:$src2)>;
5095
5096  def : Pat<(xor VR256X:$src1, (loadv32i8 addr:$src2)),
5097            (VPXORQZ256rm VR256X:$src1, addr:$src2)>;
5098  def : Pat<(xor VR256X:$src1, (loadv16i16 addr:$src2)),
5099            (VPXORQZ256rm VR256X:$src1, addr:$src2)>;
5100
5101  def : Pat<(X86andnp VR256X:$src1, (loadv32i8 addr:$src2)),
5102            (VPANDNQZ256rm VR256X:$src1, addr:$src2)>;
5103  def : Pat<(X86andnp VR256X:$src1, (loadv16i16 addr:$src2)),
5104            (VPANDNQZ256rm VR256X:$src1, addr:$src2)>;
5105}
5106
5107let Predicates = [HasAVX512] in {
5108  def : Pat<(v64i8 (and VR512:$src1, VR512:$src2)),
5109            (VPANDQZrr VR512:$src1, VR512:$src2)>;
5110  def : Pat<(v32i16 (and VR512:$src1, VR512:$src2)),
5111            (VPANDQZrr VR512:$src1, VR512:$src2)>;
5112
5113  def : Pat<(v64i8 (or VR512:$src1, VR512:$src2)),
5114            (VPORQZrr VR512:$src1, VR512:$src2)>;
5115  def : Pat<(v32i16 (or VR512:$src1, VR512:$src2)),
5116            (VPORQZrr VR512:$src1, VR512:$src2)>;
5117
5118  def : Pat<(v64i8 (xor VR512:$src1, VR512:$src2)),
5119            (VPXORQZrr VR512:$src1, VR512:$src2)>;
5120  def : Pat<(v32i16 (xor VR512:$src1, VR512:$src2)),
5121            (VPXORQZrr VR512:$src1, VR512:$src2)>;
5122
5123  def : Pat<(v64i8 (X86andnp VR512:$src1, VR512:$src2)),
5124            (VPANDNQZrr VR512:$src1, VR512:$src2)>;
5125  def : Pat<(v32i16 (X86andnp VR512:$src1, VR512:$src2)),
5126            (VPANDNQZrr VR512:$src1, VR512:$src2)>;
5127
5128  def : Pat<(and VR512:$src1, (loadv64i8 addr:$src2)),
5129            (VPANDQZrm VR512:$src1, addr:$src2)>;
5130  def : Pat<(and VR512:$src1, (loadv32i16 addr:$src2)),
5131            (VPANDQZrm VR512:$src1, addr:$src2)>;
5132
5133  def : Pat<(or VR512:$src1, (loadv64i8 addr:$src2)),
5134            (VPORQZrm VR512:$src1, addr:$src2)>;
5135  def : Pat<(or VR512:$src1, (loadv32i16 addr:$src2)),
5136            (VPORQZrm VR512:$src1, addr:$src2)>;
5137
5138  def : Pat<(xor VR512:$src1, (loadv64i8 addr:$src2)),
5139            (VPXORQZrm VR512:$src1, addr:$src2)>;
5140  def : Pat<(xor VR512:$src1, (loadv32i16 addr:$src2)),
5141            (VPXORQZrm VR512:$src1, addr:$src2)>;
5142
5143  def : Pat<(X86andnp VR512:$src1, (loadv64i8 addr:$src2)),
5144            (VPANDNQZrm VR512:$src1, addr:$src2)>;
5145  def : Pat<(X86andnp VR512:$src1, (loadv32i16 addr:$src2)),
5146            (VPANDNQZrm VR512:$src1, addr:$src2)>;
5147}
5148
5149// Patterns to catch vselect with different type than logic op.
5150multiclass avx512_logical_lowering<string InstrStr, SDNode OpNode,
5151                                    X86VectorVTInfo _,
5152                                    X86VectorVTInfo IntInfo> {
5153  // Masked register-register logical operations.
5154  def : Pat<(_.VT (vselect_mask _.KRCWM:$mask,
5155                   (bitconvert (IntInfo.VT (OpNode _.RC:$src1, _.RC:$src2))),
5156                   _.RC:$src0)),
5157            (!cast<Instruction>(InstrStr#rrk) _.RC:$src0, _.KRCWM:$mask,
5158             _.RC:$src1, _.RC:$src2)>;
5159
5160  def : Pat<(_.VT (vselect_mask _.KRCWM:$mask,
5161                   (bitconvert (IntInfo.VT (OpNode _.RC:$src1, _.RC:$src2))),
5162                   _.ImmAllZerosV)),
5163            (!cast<Instruction>(InstrStr#rrkz) _.KRCWM:$mask, _.RC:$src1,
5164             _.RC:$src2)>;
5165
5166  // Masked register-memory logical operations.
5167  def : Pat<(_.VT (vselect_mask _.KRCWM:$mask,
5168                   (bitconvert (IntInfo.VT (OpNode _.RC:$src1,
5169                                            (load addr:$src2)))),
5170                   _.RC:$src0)),
5171            (!cast<Instruction>(InstrStr#rmk) _.RC:$src0, _.KRCWM:$mask,
5172             _.RC:$src1, addr:$src2)>;
5173  def : Pat<(_.VT (vselect_mask _.KRCWM:$mask,
5174                   (bitconvert (IntInfo.VT (OpNode _.RC:$src1,
5175                                            (load addr:$src2)))),
5176                   _.ImmAllZerosV)),
5177            (!cast<Instruction>(InstrStr#rmkz) _.KRCWM:$mask, _.RC:$src1,
5178             addr:$src2)>;
5179}
5180
5181multiclass avx512_logical_lowering_bcast<string InstrStr, SDNode OpNode,
5182                                         X86VectorVTInfo _,
5183                                         X86VectorVTInfo IntInfo> {
5184  // Register-broadcast logical operations.
5185  def : Pat<(_.VT (vselect_mask _.KRCWM:$mask,
5186                   (bitconvert
5187                    (IntInfo.VT (OpNode _.RC:$src1,
5188                                 (IntInfo.VT (IntInfo.BroadcastLdFrag addr:$src2))))),
5189                   _.RC:$src0)),
5190            (!cast<Instruction>(InstrStr#rmbk) _.RC:$src0, _.KRCWM:$mask,
5191             _.RC:$src1, addr:$src2)>;
5192  def : Pat<(_.VT (vselect_mask _.KRCWM:$mask,
5193                   (bitconvert
5194                    (IntInfo.VT (OpNode _.RC:$src1,
5195                                 (IntInfo.VT (IntInfo.BroadcastLdFrag addr:$src2))))),
5196                   _.ImmAllZerosV)),
5197            (!cast<Instruction>(InstrStr#rmbkz)  _.KRCWM:$mask,
5198             _.RC:$src1, addr:$src2)>;
5199}
5200
5201multiclass avx512_logical_lowering_sizes<string InstrStr, SDNode OpNode,
5202                                         AVX512VLVectorVTInfo SelectInfo,
5203                                         AVX512VLVectorVTInfo IntInfo> {
5204let Predicates = [HasVLX] in {
5205  defm : avx512_logical_lowering<InstrStr#"Z128", OpNode, SelectInfo.info128,
5206                                 IntInfo.info128>;
5207  defm : avx512_logical_lowering<InstrStr#"Z256", OpNode, SelectInfo.info256,
5208                                 IntInfo.info256>;
5209}
5210let Predicates = [HasAVX512] in {
5211  defm : avx512_logical_lowering<InstrStr#"Z", OpNode, SelectInfo.info512,
5212                                 IntInfo.info512>;
5213}
5214}
5215
5216multiclass avx512_logical_lowering_sizes_bcast<string InstrStr, SDNode OpNode,
5217                                               AVX512VLVectorVTInfo SelectInfo,
5218                                               AVX512VLVectorVTInfo IntInfo> {
5219let Predicates = [HasVLX] in {
5220  defm : avx512_logical_lowering_bcast<InstrStr#"Z128", OpNode,
5221                                       SelectInfo.info128, IntInfo.info128>;
5222  defm : avx512_logical_lowering_bcast<InstrStr#"Z256", OpNode,
5223                                       SelectInfo.info256, IntInfo.info256>;
5224}
5225let Predicates = [HasAVX512] in {
5226  defm : avx512_logical_lowering_bcast<InstrStr#"Z", OpNode,
5227                                       SelectInfo.info512, IntInfo.info512>;
5228}
5229}
5230
5231multiclass avx512_logical_lowering_types<string InstrStr, SDNode OpNode> {
5232  // i64 vselect with i32/i16/i8 logic op
5233  defm : avx512_logical_lowering_sizes<InstrStr#"Q", OpNode, avx512vl_i64_info,
5234                                       avx512vl_i32_info>;
5235  defm : avx512_logical_lowering_sizes<InstrStr#"Q", OpNode, avx512vl_i64_info,
5236                                       avx512vl_i16_info>;
5237  defm : avx512_logical_lowering_sizes<InstrStr#"Q", OpNode, avx512vl_i64_info,
5238                                       avx512vl_i8_info>;
5239
5240  // i32 vselect with i64/i16/i8 logic op
5241  defm : avx512_logical_lowering_sizes<InstrStr#"D", OpNode, avx512vl_i32_info,
5242                                       avx512vl_i64_info>;
5243  defm : avx512_logical_lowering_sizes<InstrStr#"D", OpNode, avx512vl_i32_info,
5244                                       avx512vl_i16_info>;
5245  defm : avx512_logical_lowering_sizes<InstrStr#"D", OpNode, avx512vl_i32_info,
5246                                       avx512vl_i8_info>;
5247
5248  // f32 vselect with i64/i32/i16/i8 logic op
5249  defm : avx512_logical_lowering_sizes<InstrStr#"D", OpNode, avx512vl_f32_info,
5250                                       avx512vl_i64_info>;
5251  defm : avx512_logical_lowering_sizes<InstrStr#"D", OpNode, avx512vl_f32_info,
5252                                       avx512vl_i32_info>;
5253  defm : avx512_logical_lowering_sizes<InstrStr#"D", OpNode, avx512vl_f32_info,
5254                                       avx512vl_i16_info>;
5255  defm : avx512_logical_lowering_sizes<InstrStr#"D", OpNode, avx512vl_f32_info,
5256                                       avx512vl_i8_info>;
5257
5258  // f64 vselect with i64/i32/i16/i8 logic op
5259  defm : avx512_logical_lowering_sizes<InstrStr#"Q", OpNode, avx512vl_f64_info,
5260                                       avx512vl_i64_info>;
5261  defm : avx512_logical_lowering_sizes<InstrStr#"Q", OpNode, avx512vl_f64_info,
5262                                       avx512vl_i32_info>;
5263  defm : avx512_logical_lowering_sizes<InstrStr#"Q", OpNode, avx512vl_f64_info,
5264                                       avx512vl_i16_info>;
5265  defm : avx512_logical_lowering_sizes<InstrStr#"Q", OpNode, avx512vl_f64_info,
5266                                       avx512vl_i8_info>;
5267
5268  defm : avx512_logical_lowering_sizes_bcast<InstrStr#"D", OpNode,
5269                                             avx512vl_f32_info,
5270                                             avx512vl_i32_info>;
5271  defm : avx512_logical_lowering_sizes_bcast<InstrStr#"Q", OpNode,
5272                                             avx512vl_f64_info,
5273                                             avx512vl_i64_info>;
5274}
5275
5276defm : avx512_logical_lowering_types<"VPAND", and>;
5277defm : avx512_logical_lowering_types<"VPOR",  or>;
5278defm : avx512_logical_lowering_types<"VPXOR", xor>;
5279defm : avx512_logical_lowering_types<"VPANDN", X86andnp>;
5280
5281//===----------------------------------------------------------------------===//
5282// AVX-512  FP arithmetic
5283//===----------------------------------------------------------------------===//
5284
5285multiclass avx512_fp_scalar<bits<8> opc, string OpcodeStr,X86VectorVTInfo _,
5286                            SDNode OpNode, SDNode VecNode,
5287                            X86FoldableSchedWrite sched, bit IsCommutable> {
5288  let ExeDomain = _.ExeDomain, Uses = [MXCSR], mayRaiseFPException = 1 in {
5289  defm rr_Int : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
5290                           (ins _.RC:$src1, _.RC:$src2), OpcodeStr,
5291                           "$src2, $src1", "$src1, $src2",
5292                           (_.VT (VecNode _.RC:$src1, _.RC:$src2))>,
5293                           Sched<[sched]>;
5294
5295  defm rm_Int : AVX512_maskable_scalar<opc, MRMSrcMem, _, (outs _.RC:$dst),
5296                         (ins _.RC:$src1, _.IntScalarMemOp:$src2), OpcodeStr,
5297                         "$src2, $src1", "$src1, $src2",
5298                         (_.VT (VecNode _.RC:$src1,
5299                                        (_.ScalarIntMemFrags addr:$src2)))>,
5300                         Sched<[sched.Folded, sched.ReadAfterFold]>;
5301  let isCodeGenOnly = 1, Predicates = [HasAVX512] in {
5302  def rr : I< opc, MRMSrcReg, (outs _.FRC:$dst),
5303                         (ins _.FRC:$src1, _.FRC:$src2),
5304                          OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}",
5305                          [(set _.FRC:$dst, (OpNode _.FRC:$src1, _.FRC:$src2))]>,
5306                          Sched<[sched]> {
5307    let isCommutable = IsCommutable;
5308  }
5309  def rm : I< opc, MRMSrcMem, (outs _.FRC:$dst),
5310                         (ins _.FRC:$src1, _.ScalarMemOp:$src2),
5311                         OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}",
5312                         [(set _.FRC:$dst, (OpNode _.FRC:$src1,
5313                         (_.ScalarLdFrag addr:$src2)))]>,
5314                         Sched<[sched.Folded, sched.ReadAfterFold]>;
5315  }
5316  }
5317}
5318
5319multiclass avx512_fp_scalar_round<bits<8> opc, string OpcodeStr,X86VectorVTInfo _,
5320                                  SDNode VecNode, X86FoldableSchedWrite sched,
5321                                  bit IsCommutable = 0> {
5322  let ExeDomain = _.ExeDomain, Uses = [MXCSR] in
5323  defm rrb_Int : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
5324                          (ins _.RC:$src1, _.RC:$src2, AVX512RC:$rc), OpcodeStr,
5325                          "$rc, $src2, $src1", "$src1, $src2, $rc",
5326                          (VecNode (_.VT _.RC:$src1), (_.VT _.RC:$src2),
5327                          (i32 timm:$rc))>,
5328                          EVEX_B, EVEX_RC, Sched<[sched]>;
5329}
5330multiclass avx512_fp_scalar_sae<bits<8> opc, string OpcodeStr,X86VectorVTInfo _,
5331                                SDNode OpNode, SDNode VecNode, SDNode SaeNode,
5332                                X86FoldableSchedWrite sched, bit IsCommutable,
5333                                string EVEX2VexOvrd> {
5334  let ExeDomain = _.ExeDomain in {
5335  defm rr_Int : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
5336                           (ins _.RC:$src1, _.RC:$src2), OpcodeStr,
5337                           "$src2, $src1", "$src1, $src2",
5338                           (_.VT (VecNode _.RC:$src1, _.RC:$src2))>,
5339                           Sched<[sched]>, SIMD_EXC;
5340
5341  defm rm_Int : AVX512_maskable_scalar<opc, MRMSrcMem, _, (outs _.RC:$dst),
5342                         (ins _.RC:$src1, _.IntScalarMemOp:$src2), OpcodeStr,
5343                         "$src2, $src1", "$src1, $src2",
5344                         (_.VT (VecNode _.RC:$src1,
5345                                        (_.ScalarIntMemFrags addr:$src2)))>,
5346                         Sched<[sched.Folded, sched.ReadAfterFold]>, SIMD_EXC;
5347
5348  let isCodeGenOnly = 1, Predicates = [HasAVX512],
5349      Uses = [MXCSR], mayRaiseFPException = 1 in {
5350  def rr : I< opc, MRMSrcReg, (outs _.FRC:$dst),
5351                         (ins _.FRC:$src1, _.FRC:$src2),
5352                          OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}",
5353                          [(set _.FRC:$dst, (OpNode _.FRC:$src1, _.FRC:$src2))]>,
5354                          Sched<[sched]>,
5355                          EVEX2VEXOverride<EVEX2VexOvrd#"rr"> {
5356    let isCommutable = IsCommutable;
5357  }
5358  def rm : I< opc, MRMSrcMem, (outs _.FRC:$dst),
5359                         (ins _.FRC:$src1, _.ScalarMemOp:$src2),
5360                         OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}",
5361                         [(set _.FRC:$dst, (OpNode _.FRC:$src1,
5362                         (_.ScalarLdFrag addr:$src2)))]>,
5363                         Sched<[sched.Folded, sched.ReadAfterFold]>,
5364                         EVEX2VEXOverride<EVEX2VexOvrd#"rm">;
5365  }
5366
5367  let Uses = [MXCSR] in
5368  defm rrb_Int : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
5369                            (ins _.RC:$src1, _.RC:$src2), OpcodeStr,
5370                            "{sae}, $src2, $src1", "$src1, $src2, {sae}",
5371                            (SaeNode (_.VT _.RC:$src1), (_.VT _.RC:$src2))>,
5372                            EVEX_B, Sched<[sched]>;
5373  }
5374}
5375
5376multiclass avx512_binop_s_round<bits<8> opc, string OpcodeStr, SDNode OpNode,
5377                                SDNode VecNode, SDNode RndNode,
5378                                X86SchedWriteSizes sched, bit IsCommutable> {
5379  defm SSZ : avx512_fp_scalar<opc, OpcodeStr#"ss", f32x_info, OpNode, VecNode,
5380                              sched.PS.Scl, IsCommutable>,
5381             avx512_fp_scalar_round<opc, OpcodeStr#"ss", f32x_info, RndNode,
5382                              sched.PS.Scl, IsCommutable>,
5383                              XS, EVEX_4V, VEX_LIG,  EVEX_CD8<32, CD8VT1>;
5384  defm SDZ : avx512_fp_scalar<opc, OpcodeStr#"sd", f64x_info, OpNode, VecNode,
5385                              sched.PD.Scl, IsCommutable>,
5386             avx512_fp_scalar_round<opc, OpcodeStr#"sd", f64x_info, RndNode,
5387                              sched.PD.Scl, IsCommutable>,
5388                              XD, VEX_W, EVEX_4V, VEX_LIG, EVEX_CD8<64, CD8VT1>;
5389}
5390
5391multiclass avx512_binop_s_sae<bits<8> opc, string OpcodeStr, SDNode OpNode,
5392                              SDNode VecNode, SDNode SaeNode,
5393                              X86SchedWriteSizes sched, bit IsCommutable> {
5394  defm SSZ : avx512_fp_scalar_sae<opc, OpcodeStr#"ss", f32x_info, OpNode,
5395                              VecNode, SaeNode, sched.PS.Scl, IsCommutable,
5396                              NAME#"SS">,
5397                              XS, EVEX_4V, VEX_LIG,  EVEX_CD8<32, CD8VT1>;
5398  defm SDZ : avx512_fp_scalar_sae<opc, OpcodeStr#"sd", f64x_info, OpNode,
5399                              VecNode, SaeNode, sched.PD.Scl, IsCommutable,
5400                              NAME#"SD">,
5401                              XD, VEX_W, EVEX_4V, VEX_LIG, EVEX_CD8<64, CD8VT1>;
5402}
5403defm VADD : avx512_binop_s_round<0x58, "vadd", any_fadd, X86fadds, X86faddRnds,
5404                                 SchedWriteFAddSizes, 1>;
5405defm VMUL : avx512_binop_s_round<0x59, "vmul", any_fmul, X86fmuls, X86fmulRnds,
5406                                 SchedWriteFMulSizes, 1>;
5407defm VSUB : avx512_binop_s_round<0x5C, "vsub", any_fsub, X86fsubs, X86fsubRnds,
5408                                 SchedWriteFAddSizes, 0>;
5409defm VDIV : avx512_binop_s_round<0x5E, "vdiv", any_fdiv, X86fdivs, X86fdivRnds,
5410                                 SchedWriteFDivSizes, 0>;
5411defm VMIN : avx512_binop_s_sae<0x5D, "vmin", X86fmin, X86fmins, X86fminSAEs,
5412                               SchedWriteFCmpSizes, 0>;
5413defm VMAX : avx512_binop_s_sae<0x5F, "vmax", X86fmax, X86fmaxs, X86fmaxSAEs,
5414                               SchedWriteFCmpSizes, 0>;
5415
5416// MIN/MAX nodes are commutable under "unsafe-fp-math". In this case we use
5417// X86fminc and X86fmaxc instead of X86fmin and X86fmax
5418multiclass avx512_comutable_binop_s<bits<8> opc, string OpcodeStr,
5419                                    X86VectorVTInfo _, SDNode OpNode,
5420                                    X86FoldableSchedWrite sched,
5421                                    string EVEX2VEXOvrd> {
5422  let isCodeGenOnly = 1, Predicates = [HasAVX512], ExeDomain = _.ExeDomain in {
5423  def rr : I< opc, MRMSrcReg, (outs _.FRC:$dst),
5424                         (ins _.FRC:$src1, _.FRC:$src2),
5425                          OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}",
5426                          [(set _.FRC:$dst, (OpNode _.FRC:$src1, _.FRC:$src2))]>,
5427                          Sched<[sched]>, EVEX2VEXOverride<EVEX2VEXOvrd#"rr"> {
5428    let isCommutable = 1;
5429  }
5430  def rm : I< opc, MRMSrcMem, (outs _.FRC:$dst),
5431                         (ins _.FRC:$src1, _.ScalarMemOp:$src2),
5432                         OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}",
5433                         [(set _.FRC:$dst, (OpNode _.FRC:$src1,
5434                         (_.ScalarLdFrag addr:$src2)))]>,
5435                         Sched<[sched.Folded, sched.ReadAfterFold]>,
5436                         EVEX2VEXOverride<EVEX2VEXOvrd#"rm">;
5437  }
5438}
5439defm VMINCSSZ : avx512_comutable_binop_s<0x5D, "vminss", f32x_info, X86fminc,
5440                                         SchedWriteFCmp.Scl, "VMINCSS">, XS,
5441                                         EVEX_4V, VEX_LIG, EVEX_CD8<32, CD8VT1>, SIMD_EXC;
5442
5443defm VMINCSDZ : avx512_comutable_binop_s<0x5D, "vminsd", f64x_info, X86fminc,
5444                                         SchedWriteFCmp.Scl, "VMINCSD">, XD,
5445                                         VEX_W, EVEX_4V, VEX_LIG,
5446                                         EVEX_CD8<64, CD8VT1>, SIMD_EXC;
5447
5448defm VMAXCSSZ : avx512_comutable_binop_s<0x5F, "vmaxss", f32x_info, X86fmaxc,
5449                                         SchedWriteFCmp.Scl, "VMAXCSS">, XS,
5450                                         EVEX_4V, VEX_LIG, EVEX_CD8<32, CD8VT1>, SIMD_EXC;
5451
5452defm VMAXCSDZ : avx512_comutable_binop_s<0x5F, "vmaxsd", f64x_info, X86fmaxc,
5453                                         SchedWriteFCmp.Scl, "VMAXCSD">, XD,
5454                                         VEX_W, EVEX_4V, VEX_LIG,
5455                                         EVEX_CD8<64, CD8VT1>, SIMD_EXC;
5456
5457multiclass avx512_fp_packed<bits<8> opc, string OpcodeStr, SDPatternOperator OpNode,
5458                            SDPatternOperator MaskOpNode,
5459                            X86VectorVTInfo _, X86FoldableSchedWrite sched,
5460                            bit IsCommutable,
5461                            bit IsKCommutable = IsCommutable> {
5462  let ExeDomain = _.ExeDomain, hasSideEffects = 0,
5463      Uses = [MXCSR], mayRaiseFPException = 1 in {
5464  defm rr: AVX512_maskable_split<opc, MRMSrcReg, _, (outs _.RC:$dst),
5465                  (ins _.RC:$src1, _.RC:$src2), OpcodeStr#_.Suffix,
5466                  "$src2, $src1", "$src1, $src2",
5467                  (_.VT (OpNode _.RC:$src1, _.RC:$src2)),
5468                  (_.VT (MaskOpNode _.RC:$src1, _.RC:$src2)), IsCommutable,
5469                  IsKCommutable, IsKCommutable>,
5470                  EVEX_4V, Sched<[sched]>;
5471  let mayLoad = 1 in {
5472    defm rm: AVX512_maskable_split<opc, MRMSrcMem, _, (outs _.RC:$dst),
5473                    (ins _.RC:$src1, _.MemOp:$src2), OpcodeStr#_.Suffix,
5474                    "$src2, $src1", "$src1, $src2",
5475                    (OpNode _.RC:$src1, (_.LdFrag addr:$src2)),
5476                    (MaskOpNode _.RC:$src1, (_.LdFrag addr:$src2))>,
5477                    EVEX_4V, Sched<[sched.Folded, sched.ReadAfterFold]>;
5478    defm rmb: AVX512_maskable_split<opc, MRMSrcMem, _, (outs _.RC:$dst),
5479                     (ins _.RC:$src1, _.ScalarMemOp:$src2), OpcodeStr#_.Suffix,
5480                     "${src2}"#_.BroadcastStr#", $src1",
5481                     "$src1, ${src2}"#_.BroadcastStr,
5482                     (OpNode  _.RC:$src1, (_.VT (_.BroadcastLdFrag addr:$src2))),
5483                     (MaskOpNode  _.RC:$src1, (_.VT (_.BroadcastLdFrag addr:$src2)))>,
5484                     EVEX_4V, EVEX_B,
5485                     Sched<[sched.Folded, sched.ReadAfterFold]>;
5486    }
5487  }
5488}
5489
5490multiclass avx512_fp_round_packed<bits<8> opc, string OpcodeStr,
5491                                  SDPatternOperator OpNodeRnd,
5492                                  X86FoldableSchedWrite sched, X86VectorVTInfo _> {
5493  let ExeDomain = _.ExeDomain, Uses = [MXCSR] in
5494  defm rrb: AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
5495                  (ins _.RC:$src1, _.RC:$src2, AVX512RC:$rc), OpcodeStr#_.Suffix,
5496                  "$rc, $src2, $src1", "$src1, $src2, $rc",
5497                  (_.VT (OpNodeRnd _.RC:$src1, _.RC:$src2, (i32 timm:$rc)))>,
5498                  EVEX_4V, EVEX_B, EVEX_RC, Sched<[sched]>;
5499}
5500
5501multiclass avx512_fp_sae_packed<bits<8> opc, string OpcodeStr,
5502                                SDPatternOperator OpNodeSAE,
5503                                X86FoldableSchedWrite sched, X86VectorVTInfo _> {
5504  let ExeDomain = _.ExeDomain, Uses = [MXCSR] in
5505  defm rrb: AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
5506                  (ins _.RC:$src1, _.RC:$src2), OpcodeStr#_.Suffix,
5507                  "{sae}, $src2, $src1", "$src1, $src2, {sae}",
5508                  (_.VT (OpNodeSAE _.RC:$src1, _.RC:$src2))>,
5509                  EVEX_4V, EVEX_B, Sched<[sched]>;
5510}
5511
5512multiclass avx512_fp_binop_p<bits<8> opc, string OpcodeStr, SDPatternOperator OpNode,
5513                             SDPatternOperator MaskOpNode,
5514                             Predicate prd, X86SchedWriteSizes sched,
5515                             bit IsCommutable = 0,
5516                             bit IsPD128Commutable = IsCommutable> {
5517  let Predicates = [prd] in {
5518  defm PSZ : avx512_fp_packed<opc, OpcodeStr, OpNode, MaskOpNode, v16f32_info,
5519                              sched.PS.ZMM, IsCommutable>, EVEX_V512, PS,
5520                              EVEX_CD8<32, CD8VF>;
5521  defm PDZ : avx512_fp_packed<opc, OpcodeStr, OpNode, MaskOpNode, v8f64_info,
5522                              sched.PD.ZMM, IsCommutable>, EVEX_V512, PD, VEX_W,
5523                              EVEX_CD8<64, CD8VF>;
5524  }
5525
5526    // Define only if AVX512VL feature is present.
5527  let Predicates = [prd, HasVLX] in {
5528    defm PSZ128 : avx512_fp_packed<opc, OpcodeStr, OpNode, MaskOpNode, v4f32x_info,
5529                                   sched.PS.XMM, IsCommutable>, EVEX_V128, PS,
5530                                   EVEX_CD8<32, CD8VF>;
5531    defm PSZ256 : avx512_fp_packed<opc, OpcodeStr, OpNode, MaskOpNode, v8f32x_info,
5532                                   sched.PS.YMM, IsCommutable>, EVEX_V256, PS,
5533                                   EVEX_CD8<32, CD8VF>;
5534    defm PDZ128 : avx512_fp_packed<opc, OpcodeStr, OpNode, MaskOpNode, v2f64x_info,
5535                                   sched.PD.XMM, IsPD128Commutable,
5536                                   IsCommutable>, EVEX_V128, PD, VEX_W,
5537                                   EVEX_CD8<64, CD8VF>;
5538    defm PDZ256 : avx512_fp_packed<opc, OpcodeStr, OpNode, MaskOpNode, v4f64x_info,
5539                                   sched.PD.YMM, IsCommutable>, EVEX_V256, PD, VEX_W,
5540                                   EVEX_CD8<64, CD8VF>;
5541  }
5542}
5543
5544let Uses = [MXCSR] in
5545multiclass avx512_fp_binop_p_round<bits<8> opc, string OpcodeStr, SDNode OpNodeRnd,
5546                                   X86SchedWriteSizes sched> {
5547  defm PSZ : avx512_fp_round_packed<opc, OpcodeStr, OpNodeRnd, sched.PS.ZMM,
5548                                    v16f32_info>,
5549                                    EVEX_V512, PS, EVEX_CD8<32, CD8VF>;
5550  defm PDZ : avx512_fp_round_packed<opc, OpcodeStr, OpNodeRnd, sched.PD.ZMM,
5551                                    v8f64_info>,
5552                                    EVEX_V512, PD, VEX_W,EVEX_CD8<64, CD8VF>;
5553}
5554
5555let Uses = [MXCSR] in
5556multiclass avx512_fp_binop_p_sae<bits<8> opc, string OpcodeStr, SDNode OpNodeRnd,
5557                                 X86SchedWriteSizes sched> {
5558  defm PSZ : avx512_fp_sae_packed<opc, OpcodeStr, OpNodeRnd, sched.PS.ZMM,
5559                                  v16f32_info>,
5560                                  EVEX_V512, PS, EVEX_CD8<32, CD8VF>;
5561  defm PDZ : avx512_fp_sae_packed<opc, OpcodeStr, OpNodeRnd, sched.PD.ZMM,
5562                                  v8f64_info>,
5563                                  EVEX_V512, PD, VEX_W,EVEX_CD8<64, CD8VF>;
5564}
5565
5566defm VADD : avx512_fp_binop_p<0x58, "vadd", any_fadd, fadd, HasAVX512,
5567                              SchedWriteFAddSizes, 1>,
5568            avx512_fp_binop_p_round<0x58, "vadd", X86faddRnd, SchedWriteFAddSizes>;
5569defm VMUL : avx512_fp_binop_p<0x59, "vmul", any_fmul, fmul, HasAVX512,
5570                              SchedWriteFMulSizes, 1>,
5571            avx512_fp_binop_p_round<0x59, "vmul", X86fmulRnd, SchedWriteFMulSizes>;
5572defm VSUB : avx512_fp_binop_p<0x5C, "vsub", any_fsub, fsub, HasAVX512,
5573                              SchedWriteFAddSizes>,
5574            avx512_fp_binop_p_round<0x5C, "vsub", X86fsubRnd, SchedWriteFAddSizes>;
5575defm VDIV : avx512_fp_binop_p<0x5E, "vdiv", any_fdiv, fdiv, HasAVX512,
5576                              SchedWriteFDivSizes>,
5577            avx512_fp_binop_p_round<0x5E, "vdiv", X86fdivRnd, SchedWriteFDivSizes>;
5578defm VMIN : avx512_fp_binop_p<0x5D, "vmin", X86fmin, X86fmin, HasAVX512,
5579                              SchedWriteFCmpSizes, 0>,
5580            avx512_fp_binop_p_sae<0x5D, "vmin", X86fminSAE, SchedWriteFCmpSizes>;
5581defm VMAX : avx512_fp_binop_p<0x5F, "vmax", X86fmax, X86fmax, HasAVX512,
5582                              SchedWriteFCmpSizes, 0>,
5583            avx512_fp_binop_p_sae<0x5F, "vmax", X86fmaxSAE, SchedWriteFCmpSizes>;
5584let isCodeGenOnly = 1 in {
5585  defm VMINC : avx512_fp_binop_p<0x5D, "vmin", X86fminc, X86fminc, HasAVX512,
5586                                 SchedWriteFCmpSizes, 1>;
5587  defm VMAXC : avx512_fp_binop_p<0x5F, "vmax", X86fmaxc, X86fmaxc, HasAVX512,
5588                                 SchedWriteFCmpSizes, 1>;
5589}
5590let Uses = []<Register>, mayRaiseFPException = 0 in {
5591defm VAND  : avx512_fp_binop_p<0x54, "vand", null_frag, null_frag, HasDQI,
5592                               SchedWriteFLogicSizes, 1>;
5593defm VANDN : avx512_fp_binop_p<0x55, "vandn", null_frag, null_frag, HasDQI,
5594                               SchedWriteFLogicSizes, 0>;
5595defm VOR   : avx512_fp_binop_p<0x56, "vor", null_frag, null_frag, HasDQI,
5596                               SchedWriteFLogicSizes, 1>;
5597defm VXOR  : avx512_fp_binop_p<0x57, "vxor", null_frag, null_frag, HasDQI,
5598                               SchedWriteFLogicSizes, 1>;
5599}
5600
5601multiclass avx512_fp_scalef_p<bits<8> opc, string OpcodeStr, SDNode OpNode,
5602                              X86FoldableSchedWrite sched, X86VectorVTInfo _> {
5603  let ExeDomain = _.ExeDomain, Uses = [MXCSR], mayRaiseFPException = 1 in {
5604  defm rr: AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
5605                  (ins _.RC:$src1, _.RC:$src2), OpcodeStr#_.Suffix,
5606                  "$src2, $src1", "$src1, $src2",
5607                  (_.VT (OpNode _.RC:$src1, _.RC:$src2))>,
5608                  EVEX_4V, Sched<[sched]>;
5609  defm rm: AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
5610                  (ins _.RC:$src1, _.MemOp:$src2), OpcodeStr#_.Suffix,
5611                  "$src2, $src1", "$src1, $src2",
5612                  (OpNode _.RC:$src1, (_.LdFrag addr:$src2))>,
5613                  EVEX_4V, Sched<[sched.Folded, sched.ReadAfterFold]>;
5614  defm rmb: AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
5615                   (ins _.RC:$src1, _.ScalarMemOp:$src2), OpcodeStr#_.Suffix,
5616                   "${src2}"#_.BroadcastStr#", $src1",
5617                   "$src1, ${src2}"#_.BroadcastStr,
5618                   (OpNode  _.RC:$src1, (_.VT (_.BroadcastLdFrag addr:$src2)))>,
5619                   EVEX_4V, EVEX_B, Sched<[sched.Folded, sched.ReadAfterFold]>;
5620  }
5621}
5622
5623multiclass avx512_fp_scalef_scalar<bits<8> opc, string OpcodeStr, SDNode OpNode,
5624                                   X86FoldableSchedWrite sched, X86VectorVTInfo _> {
5625  let ExeDomain = _.ExeDomain, Uses = [MXCSR], mayRaiseFPException = 1 in {
5626  defm rr: AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
5627                  (ins _.RC:$src1, _.RC:$src2), OpcodeStr#_.Suffix,
5628                  "$src2, $src1", "$src1, $src2",
5629                  (_.VT (OpNode _.RC:$src1, _.RC:$src2))>,
5630                  Sched<[sched]>;
5631  defm rm: AVX512_maskable_scalar<opc, MRMSrcMem, _, (outs _.RC:$dst),
5632                  (ins _.RC:$src1, _.IntScalarMemOp:$src2), OpcodeStr#_.Suffix,
5633                  "$src2, $src1", "$src1, $src2",
5634                  (OpNode _.RC:$src1, (_.ScalarIntMemFrags addr:$src2))>,
5635                  Sched<[sched.Folded, sched.ReadAfterFold]>;
5636  }
5637}
5638
5639multiclass avx512_fp_scalef_all<bits<8> opc, bits<8> opcScaler, string OpcodeStr,
5640                                X86SchedWriteWidths sched> {
5641  defm PSZ : avx512_fp_scalef_p<opc, OpcodeStr, X86scalef, sched.ZMM, v16f32_info>,
5642             avx512_fp_round_packed<opc, OpcodeStr, X86scalefRnd, sched.ZMM, v16f32_info>,
5643                              EVEX_V512, EVEX_CD8<32, CD8VF>;
5644  defm PDZ : avx512_fp_scalef_p<opc, OpcodeStr, X86scalef, sched.ZMM, v8f64_info>,
5645             avx512_fp_round_packed<opc, OpcodeStr, X86scalefRnd, sched.ZMM, v8f64_info>,
5646                              EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>;
5647  defm SSZ : avx512_fp_scalef_scalar<opcScaler, OpcodeStr, X86scalefs, sched.Scl, f32x_info>,
5648             avx512_fp_scalar_round<opcScaler, OpcodeStr#"ss", f32x_info,
5649                                    X86scalefsRnd, sched.Scl>,
5650                                    EVEX_4V, VEX_LIG, EVEX_CD8<32, CD8VT1>;
5651  defm SDZ : avx512_fp_scalef_scalar<opcScaler, OpcodeStr, X86scalefs, sched.Scl, f64x_info>,
5652             avx512_fp_scalar_round<opcScaler, OpcodeStr#"sd", f64x_info,
5653                                    X86scalefsRnd, sched.Scl>,
5654                                    EVEX_4V, VEX_LIG, EVEX_CD8<64, CD8VT1>, VEX_W;
5655
5656  // Define only if AVX512VL feature is present.
5657  let Predicates = [HasVLX] in {
5658    defm PSZ128 : avx512_fp_scalef_p<opc, OpcodeStr, X86scalef, sched.XMM, v4f32x_info>,
5659                                   EVEX_V128, EVEX_CD8<32, CD8VF>;
5660    defm PSZ256 : avx512_fp_scalef_p<opc, OpcodeStr, X86scalef, sched.YMM, v8f32x_info>,
5661                                   EVEX_V256, EVEX_CD8<32, CD8VF>;
5662    defm PDZ128 : avx512_fp_scalef_p<opc, OpcodeStr, X86scalef, sched.XMM, v2f64x_info>,
5663                                   EVEX_V128, VEX_W, EVEX_CD8<64, CD8VF>;
5664    defm PDZ256 : avx512_fp_scalef_p<opc, OpcodeStr, X86scalef, sched.YMM, v4f64x_info>,
5665                                   EVEX_V256, VEX_W, EVEX_CD8<64, CD8VF>;
5666  }
5667}
5668defm VSCALEF : avx512_fp_scalef_all<0x2C, 0x2D, "vscalef",
5669                                    SchedWriteFAdd>, T8PD, NotEVEX2VEXConvertible;
5670
5671//===----------------------------------------------------------------------===//
5672// AVX-512  VPTESTM instructions
5673//===----------------------------------------------------------------------===//
5674
5675multiclass avx512_vptest<bits<8> opc, string OpcodeStr,
5676                         X86FoldableSchedWrite sched, X86VectorVTInfo _,
5677                         string Name> {
5678  // NOTE: Patterns are omitted in favor of manual selection in X86ISelDAGToDAG.
5679  // There are just too many permutations due to commutability and bitcasts.
5680  let ExeDomain = _.ExeDomain, hasSideEffects = 0 in {
5681  defm rr : AVX512_maskable_cmp<opc, MRMSrcReg, _, (outs _.KRC:$dst),
5682                   (ins _.RC:$src1, _.RC:$src2), OpcodeStr,
5683                      "$src2, $src1", "$src1, $src2",
5684                   (null_frag), (null_frag), 1>,
5685                   EVEX_4V, Sched<[sched]>;
5686  let mayLoad = 1 in
5687  defm rm : AVX512_maskable_cmp<opc, MRMSrcMem, _, (outs _.KRC:$dst),
5688                   (ins _.RC:$src1, _.MemOp:$src2), OpcodeStr,
5689                       "$src2, $src1", "$src1, $src2",
5690                   (null_frag), (null_frag)>,
5691                   EVEX_4V, EVEX_CD8<_.EltSize, CD8VF>,
5692                   Sched<[sched.Folded, sched.ReadAfterFold]>;
5693  }
5694}
5695
5696multiclass avx512_vptest_mb<bits<8> opc, string OpcodeStr,
5697                            X86FoldableSchedWrite sched, X86VectorVTInfo _> {
5698  let ExeDomain = _.ExeDomain, mayLoad = 1, hasSideEffects = 0 in
5699  defm rmb : AVX512_maskable_cmp<opc, MRMSrcMem, _, (outs _.KRC:$dst),
5700                    (ins _.RC:$src1, _.ScalarMemOp:$src2), OpcodeStr,
5701                    "${src2}"#_.BroadcastStr#", $src1",
5702                    "$src1, ${src2}"#_.BroadcastStr,
5703                    (null_frag), (null_frag)>,
5704                    EVEX_B, EVEX_4V, EVEX_CD8<_.EltSize, CD8VF>,
5705                    Sched<[sched.Folded, sched.ReadAfterFold]>;
5706}
5707
5708multiclass avx512_vptest_dq_sizes<bits<8> opc, string OpcodeStr,
5709                                  X86SchedWriteWidths sched,
5710                                  AVX512VLVectorVTInfo _> {
5711  let Predicates  = [HasAVX512] in
5712  defm Z : avx512_vptest<opc, OpcodeStr, sched.ZMM, _.info512, NAME>,
5713           avx512_vptest_mb<opc, OpcodeStr, sched.ZMM, _.info512>, EVEX_V512;
5714
5715  let Predicates = [HasAVX512, HasVLX] in {
5716  defm Z256 : avx512_vptest<opc, OpcodeStr, sched.YMM, _.info256, NAME>,
5717              avx512_vptest_mb<opc, OpcodeStr, sched.YMM, _.info256>, EVEX_V256;
5718  defm Z128 : avx512_vptest<opc, OpcodeStr, sched.XMM, _.info128, NAME>,
5719              avx512_vptest_mb<opc, OpcodeStr, sched.XMM, _.info128>, EVEX_V128;
5720  }
5721}
5722
5723multiclass avx512_vptest_dq<bits<8> opc, string OpcodeStr,
5724                            X86SchedWriteWidths sched> {
5725  defm D : avx512_vptest_dq_sizes<opc, OpcodeStr#"d", sched,
5726                                 avx512vl_i32_info>;
5727  defm Q : avx512_vptest_dq_sizes<opc, OpcodeStr#"q", sched,
5728                                 avx512vl_i64_info>, VEX_W;
5729}
5730
5731multiclass avx512_vptest_wb<bits<8> opc, string OpcodeStr,
5732                            X86SchedWriteWidths sched> {
5733  let Predicates = [HasBWI] in {
5734  defm WZ:    avx512_vptest<opc, OpcodeStr#"w", sched.ZMM,
5735                            v32i16_info, NAME#"W">, EVEX_V512, VEX_W;
5736  defm BZ:    avx512_vptest<opc, OpcodeStr#"b", sched.ZMM,
5737                            v64i8_info, NAME#"B">, EVEX_V512;
5738  }
5739  let Predicates = [HasVLX, HasBWI] in {
5740
5741  defm WZ256: avx512_vptest<opc, OpcodeStr#"w", sched.YMM,
5742                            v16i16x_info, NAME#"W">, EVEX_V256, VEX_W;
5743  defm WZ128: avx512_vptest<opc, OpcodeStr#"w", sched.XMM,
5744                            v8i16x_info, NAME#"W">, EVEX_V128, VEX_W;
5745  defm BZ256: avx512_vptest<opc, OpcodeStr#"b", sched.YMM,
5746                            v32i8x_info, NAME#"B">, EVEX_V256;
5747  defm BZ128: avx512_vptest<opc, OpcodeStr#"b", sched.XMM,
5748                            v16i8x_info, NAME#"B">, EVEX_V128;
5749  }
5750}
5751
5752multiclass avx512_vptest_all_forms<bits<8> opc_wb, bits<8> opc_dq, string OpcodeStr,
5753                                   X86SchedWriteWidths sched> :
5754  avx512_vptest_wb<opc_wb, OpcodeStr, sched>,
5755  avx512_vptest_dq<opc_dq, OpcodeStr, sched>;
5756
5757defm VPTESTM   : avx512_vptest_all_forms<0x26, 0x27, "vptestm",
5758                                         SchedWriteVecLogic>, T8PD;
5759defm VPTESTNM  : avx512_vptest_all_forms<0x26, 0x27, "vptestnm",
5760                                         SchedWriteVecLogic>, T8XS;
5761
5762//===----------------------------------------------------------------------===//
5763// AVX-512  Shift instructions
5764//===----------------------------------------------------------------------===//
5765
5766multiclass avx512_shift_rmi<bits<8> opc, Format ImmFormR, Format ImmFormM,
5767                            string OpcodeStr, SDNode OpNode,
5768                            X86FoldableSchedWrite sched, X86VectorVTInfo _> {
5769  let ExeDomain = _.ExeDomain in {
5770  defm ri : AVX512_maskable<opc, ImmFormR, _, (outs _.RC:$dst),
5771                   (ins _.RC:$src1, u8imm:$src2), OpcodeStr,
5772                      "$src2, $src1", "$src1, $src2",
5773                   (_.VT (OpNode _.RC:$src1, (i8 timm:$src2)))>,
5774                   Sched<[sched]>;
5775  defm mi : AVX512_maskable<opc, ImmFormM, _, (outs _.RC:$dst),
5776                   (ins _.MemOp:$src1, u8imm:$src2), OpcodeStr,
5777                       "$src2, $src1", "$src1, $src2",
5778                   (_.VT (OpNode (_.VT (_.LdFrag addr:$src1)),
5779                          (i8 timm:$src2)))>,
5780                   Sched<[sched.Folded]>;
5781  }
5782}
5783
5784multiclass avx512_shift_rmbi<bits<8> opc, Format ImmFormM,
5785                             string OpcodeStr, SDNode OpNode,
5786                             X86FoldableSchedWrite sched, X86VectorVTInfo _> {
5787  let ExeDomain = _.ExeDomain in
5788  defm mbi : AVX512_maskable<opc, ImmFormM, _, (outs _.RC:$dst),
5789                   (ins _.ScalarMemOp:$src1, u8imm:$src2), OpcodeStr,
5790      "$src2, ${src1}"#_.BroadcastStr, "${src1}"#_.BroadcastStr#", $src2",
5791     (_.VT (OpNode (_.BroadcastLdFrag addr:$src1), (i8 timm:$src2)))>,
5792     EVEX_B, Sched<[sched.Folded]>;
5793}
5794
5795multiclass avx512_shift_rrm<bits<8> opc, string OpcodeStr, SDNode OpNode,
5796                            X86FoldableSchedWrite sched, ValueType SrcVT,
5797                            X86VectorVTInfo _> {
5798   // src2 is always 128-bit
5799  let ExeDomain = _.ExeDomain in {
5800  defm rr : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
5801                   (ins _.RC:$src1, VR128X:$src2), OpcodeStr,
5802                      "$src2, $src1", "$src1, $src2",
5803                   (_.VT (OpNode _.RC:$src1, (SrcVT VR128X:$src2)))>,
5804                   AVX512BIBase, EVEX_4V, Sched<[sched]>;
5805  defm rm : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
5806                   (ins _.RC:$src1, i128mem:$src2), OpcodeStr,
5807                       "$src2, $src1", "$src1, $src2",
5808                   (_.VT (OpNode _.RC:$src1, (SrcVT (load addr:$src2))))>,
5809                   AVX512BIBase,
5810                   EVEX_4V, Sched<[sched.Folded, sched.ReadAfterFold]>;
5811  }
5812}
5813
5814multiclass avx512_shift_sizes<bits<8> opc, string OpcodeStr, SDNode OpNode,
5815                              X86SchedWriteWidths sched, ValueType SrcVT,
5816                              AVX512VLVectorVTInfo VTInfo,
5817                              Predicate prd> {
5818  let Predicates = [prd] in
5819  defm Z    : avx512_shift_rrm<opc, OpcodeStr, OpNode, sched.ZMM, SrcVT,
5820                               VTInfo.info512>, EVEX_V512,
5821                               EVEX_CD8<VTInfo.info512.EltSize, CD8VQ> ;
5822  let Predicates = [prd, HasVLX] in {
5823  defm Z256 : avx512_shift_rrm<opc, OpcodeStr, OpNode, sched.YMM, SrcVT,
5824                               VTInfo.info256>, EVEX_V256,
5825                               EVEX_CD8<VTInfo.info256.EltSize, CD8VH>;
5826  defm Z128 : avx512_shift_rrm<opc, OpcodeStr, OpNode, sched.XMM, SrcVT,
5827                               VTInfo.info128>, EVEX_V128,
5828                               EVEX_CD8<VTInfo.info128.EltSize, CD8VF>;
5829  }
5830}
5831
5832multiclass avx512_shift_types<bits<8> opcd, bits<8> opcq, bits<8> opcw,
5833                              string OpcodeStr, SDNode OpNode,
5834                              X86SchedWriteWidths sched,
5835                              bit NotEVEX2VEXConvertibleQ = 0> {
5836  defm D : avx512_shift_sizes<opcd, OpcodeStr#"d", OpNode, sched, v4i32,
5837                              avx512vl_i32_info, HasAVX512>;
5838  let notEVEX2VEXConvertible = NotEVEX2VEXConvertibleQ in
5839  defm Q : avx512_shift_sizes<opcq, OpcodeStr#"q", OpNode, sched, v2i64,
5840                              avx512vl_i64_info, HasAVX512>, VEX_W;
5841  defm W : avx512_shift_sizes<opcw, OpcodeStr#"w", OpNode, sched, v8i16,
5842                              avx512vl_i16_info, HasBWI>;
5843}
5844
5845multiclass avx512_shift_rmi_sizes<bits<8> opc, Format ImmFormR, Format ImmFormM,
5846                                  string OpcodeStr, SDNode OpNode,
5847                                  X86SchedWriteWidths sched,
5848                                  AVX512VLVectorVTInfo VTInfo> {
5849  let Predicates = [HasAVX512] in
5850  defm Z:    avx512_shift_rmi<opc, ImmFormR, ImmFormM, OpcodeStr, OpNode,
5851                              sched.ZMM, VTInfo.info512>,
5852             avx512_shift_rmbi<opc, ImmFormM, OpcodeStr, OpNode, sched.ZMM,
5853                               VTInfo.info512>, EVEX_V512;
5854  let Predicates = [HasAVX512, HasVLX] in {
5855  defm Z256: avx512_shift_rmi<opc, ImmFormR, ImmFormM, OpcodeStr, OpNode,
5856                              sched.YMM, VTInfo.info256>,
5857             avx512_shift_rmbi<opc, ImmFormM, OpcodeStr, OpNode, sched.YMM,
5858                               VTInfo.info256>, EVEX_V256;
5859  defm Z128: avx512_shift_rmi<opc, ImmFormR, ImmFormM, OpcodeStr, OpNode,
5860                              sched.XMM, VTInfo.info128>,
5861             avx512_shift_rmbi<opc, ImmFormM, OpcodeStr, OpNode, sched.XMM,
5862                               VTInfo.info128>, EVEX_V128;
5863  }
5864}
5865
5866multiclass avx512_shift_rmi_w<bits<8> opcw, Format ImmFormR, Format ImmFormM,
5867                              string OpcodeStr, SDNode OpNode,
5868                              X86SchedWriteWidths sched> {
5869  let Predicates = [HasBWI] in
5870  defm WZ:    avx512_shift_rmi<opcw, ImmFormR, ImmFormM, OpcodeStr, OpNode,
5871                               sched.ZMM, v32i16_info>, EVEX_V512, VEX_WIG;
5872  let Predicates = [HasVLX, HasBWI] in {
5873  defm WZ256: avx512_shift_rmi<opcw, ImmFormR, ImmFormM, OpcodeStr, OpNode,
5874                               sched.YMM, v16i16x_info>, EVEX_V256, VEX_WIG;
5875  defm WZ128: avx512_shift_rmi<opcw, ImmFormR, ImmFormM, OpcodeStr, OpNode,
5876                               sched.XMM, v8i16x_info>, EVEX_V128, VEX_WIG;
5877  }
5878}
5879
5880multiclass avx512_shift_rmi_dq<bits<8> opcd, bits<8> opcq,
5881                               Format ImmFormR, Format ImmFormM,
5882                               string OpcodeStr, SDNode OpNode,
5883                               X86SchedWriteWidths sched,
5884                               bit NotEVEX2VEXConvertibleQ = 0> {
5885  defm D: avx512_shift_rmi_sizes<opcd, ImmFormR, ImmFormM, OpcodeStr#"d", OpNode,
5886                                 sched, avx512vl_i32_info>, EVEX_CD8<32, CD8VF>;
5887  let notEVEX2VEXConvertible = NotEVEX2VEXConvertibleQ in
5888  defm Q: avx512_shift_rmi_sizes<opcq, ImmFormR, ImmFormM, OpcodeStr#"q", OpNode,
5889                                 sched, avx512vl_i64_info>, EVEX_CD8<64, CD8VF>, VEX_W;
5890}
5891
5892defm VPSRL : avx512_shift_rmi_dq<0x72, 0x73, MRM2r, MRM2m, "vpsrl", X86vsrli,
5893                                 SchedWriteVecShiftImm>,
5894             avx512_shift_rmi_w<0x71, MRM2r, MRM2m, "vpsrlw", X86vsrli,
5895                                SchedWriteVecShiftImm>, AVX512BIi8Base, EVEX_4V;
5896
5897defm VPSLL : avx512_shift_rmi_dq<0x72, 0x73, MRM6r, MRM6m, "vpsll", X86vshli,
5898                                 SchedWriteVecShiftImm>,
5899             avx512_shift_rmi_w<0x71, MRM6r, MRM6m, "vpsllw", X86vshli,
5900                                SchedWriteVecShiftImm>, AVX512BIi8Base, EVEX_4V;
5901
5902defm VPSRA : avx512_shift_rmi_dq<0x72, 0x72, MRM4r, MRM4m, "vpsra", X86vsrai,
5903                                 SchedWriteVecShiftImm, 1>,
5904             avx512_shift_rmi_w<0x71, MRM4r, MRM4m, "vpsraw", X86vsrai,
5905                                SchedWriteVecShiftImm>, AVX512BIi8Base, EVEX_4V;
5906
5907defm VPROR : avx512_shift_rmi_dq<0x72, 0x72, MRM0r, MRM0m, "vpror", X86vrotri,
5908                                 SchedWriteVecShiftImm>, AVX512BIi8Base, EVEX_4V;
5909defm VPROL : avx512_shift_rmi_dq<0x72, 0x72, MRM1r, MRM1m, "vprol", X86vrotli,
5910                                 SchedWriteVecShiftImm>, AVX512BIi8Base, EVEX_4V;
5911
5912defm VPSLL : avx512_shift_types<0xF2, 0xF3, 0xF1, "vpsll", X86vshl,
5913                                SchedWriteVecShift>;
5914defm VPSRA : avx512_shift_types<0xE2, 0xE2, 0xE1, "vpsra", X86vsra,
5915                                SchedWriteVecShift, 1>;
5916defm VPSRL : avx512_shift_types<0xD2, 0xD3, 0xD1, "vpsrl", X86vsrl,
5917                                SchedWriteVecShift>;
5918
5919// Use 512bit VPSRA/VPSRAI version to implement v2i64/v4i64 in case NoVLX.
5920let Predicates = [HasAVX512, NoVLX] in {
5921  def : Pat<(v4i64 (X86vsra (v4i64 VR256X:$src1), (v2i64 VR128X:$src2))),
5922            (EXTRACT_SUBREG (v8i64
5923              (VPSRAQZrr
5924                (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src1, sub_ymm)),
5925                 VR128X:$src2)), sub_ymm)>;
5926
5927  def : Pat<(v2i64 (X86vsra (v2i64 VR128X:$src1), (v2i64 VR128X:$src2))),
5928            (EXTRACT_SUBREG (v8i64
5929              (VPSRAQZrr
5930                (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR128X:$src1, sub_xmm)),
5931                 VR128X:$src2)), sub_xmm)>;
5932
5933  def : Pat<(v4i64 (X86vsrai (v4i64 VR256X:$src1), (i8 timm:$src2))),
5934            (EXTRACT_SUBREG (v8i64
5935              (VPSRAQZri
5936                (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src1, sub_ymm)),
5937                 timm:$src2)), sub_ymm)>;
5938
5939  def : Pat<(v2i64 (X86vsrai (v2i64 VR128X:$src1), (i8 timm:$src2))),
5940            (EXTRACT_SUBREG (v8i64
5941              (VPSRAQZri
5942                (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR128X:$src1, sub_xmm)),
5943                 timm:$src2)), sub_xmm)>;
5944}
5945
5946//===-------------------------------------------------------------------===//
5947// Variable Bit Shifts
5948//===-------------------------------------------------------------------===//
5949
5950multiclass avx512_var_shift<bits<8> opc, string OpcodeStr, SDNode OpNode,
5951                            X86FoldableSchedWrite sched, X86VectorVTInfo _> {
5952  let ExeDomain = _.ExeDomain in {
5953  defm rr : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
5954                   (ins _.RC:$src1, _.RC:$src2), OpcodeStr,
5955                      "$src2, $src1", "$src1, $src2",
5956                   (_.VT (OpNode _.RC:$src1, (_.VT _.RC:$src2)))>,
5957                   AVX5128IBase, EVEX_4V, Sched<[sched]>;
5958  defm rm : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
5959                   (ins _.RC:$src1, _.MemOp:$src2), OpcodeStr,
5960                       "$src2, $src1", "$src1, $src2",
5961                   (_.VT (OpNode _.RC:$src1,
5962                   (_.VT (_.LdFrag addr:$src2))))>,
5963                   AVX5128IBase, EVEX_4V, EVEX_CD8<_.EltSize, CD8VF>,
5964                   Sched<[sched.Folded, sched.ReadAfterFold]>;
5965  }
5966}
5967
5968multiclass avx512_var_shift_mb<bits<8> opc, string OpcodeStr, SDNode OpNode,
5969                               X86FoldableSchedWrite sched, X86VectorVTInfo _> {
5970  let ExeDomain = _.ExeDomain in
5971  defm rmb : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
5972                    (ins _.RC:$src1, _.ScalarMemOp:$src2), OpcodeStr,
5973                    "${src2}"#_.BroadcastStr#", $src1",
5974                    "$src1, ${src2}"#_.BroadcastStr,
5975                    (_.VT (OpNode _.RC:$src1, (_.VT (_.BroadcastLdFrag addr:$src2))))>,
5976                    AVX5128IBase, EVEX_B, EVEX_4V, EVEX_CD8<_.EltSize, CD8VF>,
5977                    Sched<[sched.Folded, sched.ReadAfterFold]>;
5978}
5979
5980multiclass avx512_var_shift_sizes<bits<8> opc, string OpcodeStr, SDNode OpNode,
5981                                  X86SchedWriteWidths sched, AVX512VLVectorVTInfo _> {
5982  let Predicates  = [HasAVX512] in
5983  defm Z : avx512_var_shift<opc, OpcodeStr, OpNode, sched.ZMM, _.info512>,
5984           avx512_var_shift_mb<opc, OpcodeStr, OpNode, sched.ZMM, _.info512>, EVEX_V512;
5985
5986  let Predicates = [HasAVX512, HasVLX] in {
5987  defm Z256 : avx512_var_shift<opc, OpcodeStr, OpNode, sched.YMM, _.info256>,
5988              avx512_var_shift_mb<opc, OpcodeStr, OpNode, sched.YMM, _.info256>, EVEX_V256;
5989  defm Z128 : avx512_var_shift<opc, OpcodeStr, OpNode, sched.XMM, _.info128>,
5990              avx512_var_shift_mb<opc, OpcodeStr, OpNode, sched.XMM, _.info128>, EVEX_V128;
5991  }
5992}
5993
5994multiclass avx512_var_shift_types<bits<8> opc, string OpcodeStr,
5995                                  SDNode OpNode, X86SchedWriteWidths sched> {
5996  defm D : avx512_var_shift_sizes<opc, OpcodeStr#"d", OpNode, sched,
5997                                 avx512vl_i32_info>;
5998  defm Q : avx512_var_shift_sizes<opc, OpcodeStr#"q", OpNode, sched,
5999                                 avx512vl_i64_info>, VEX_W;
6000}
6001
6002// Use 512bit version to implement 128/256 bit in case NoVLX.
6003multiclass avx512_var_shift_lowering<AVX512VLVectorVTInfo _, string OpcodeStr,
6004                                     SDNode OpNode, list<Predicate> p> {
6005  let Predicates = p in {
6006  def : Pat<(_.info256.VT (OpNode (_.info256.VT _.info256.RC:$src1),
6007                                  (_.info256.VT _.info256.RC:$src2))),
6008            (EXTRACT_SUBREG
6009                (!cast<Instruction>(OpcodeStr#"Zrr")
6010                    (INSERT_SUBREG (_.info512.VT (IMPLICIT_DEF)), VR256X:$src1, sub_ymm),
6011                    (INSERT_SUBREG (_.info512.VT (IMPLICIT_DEF)), VR256X:$src2, sub_ymm)),
6012             sub_ymm)>;
6013
6014  def : Pat<(_.info128.VT (OpNode (_.info128.VT _.info128.RC:$src1),
6015                                  (_.info128.VT _.info128.RC:$src2))),
6016            (EXTRACT_SUBREG
6017                (!cast<Instruction>(OpcodeStr#"Zrr")
6018                    (INSERT_SUBREG (_.info512.VT (IMPLICIT_DEF)), VR128X:$src1, sub_xmm),
6019                    (INSERT_SUBREG (_.info512.VT (IMPLICIT_DEF)), VR128X:$src2, sub_xmm)),
6020             sub_xmm)>;
6021  }
6022}
6023multiclass avx512_var_shift_w<bits<8> opc, string OpcodeStr,
6024                              SDNode OpNode, X86SchedWriteWidths sched> {
6025  let Predicates = [HasBWI] in
6026  defm WZ:    avx512_var_shift<opc, OpcodeStr, OpNode, sched.ZMM, v32i16_info>,
6027              EVEX_V512, VEX_W;
6028  let Predicates = [HasVLX, HasBWI] in {
6029
6030  defm WZ256: avx512_var_shift<opc, OpcodeStr, OpNode, sched.YMM, v16i16x_info>,
6031              EVEX_V256, VEX_W;
6032  defm WZ128: avx512_var_shift<opc, OpcodeStr, OpNode, sched.XMM, v8i16x_info>,
6033              EVEX_V128, VEX_W;
6034  }
6035}
6036
6037defm VPSLLV : avx512_var_shift_types<0x47, "vpsllv", X86vshlv, SchedWriteVarVecShift>,
6038              avx512_var_shift_w<0x12, "vpsllvw", X86vshlv, SchedWriteVarVecShift>;
6039
6040defm VPSRAV : avx512_var_shift_types<0x46, "vpsrav", X86vsrav, SchedWriteVarVecShift>,
6041              avx512_var_shift_w<0x11, "vpsravw", X86vsrav, SchedWriteVarVecShift>;
6042
6043defm VPSRLV : avx512_var_shift_types<0x45, "vpsrlv", X86vsrlv, SchedWriteVarVecShift>,
6044              avx512_var_shift_w<0x10, "vpsrlvw", X86vsrlv, SchedWriteVarVecShift>;
6045
6046defm VPRORV : avx512_var_shift_types<0x14, "vprorv", rotr, SchedWriteVarVecShift>;
6047defm VPROLV : avx512_var_shift_types<0x15, "vprolv", rotl, SchedWriteVarVecShift>;
6048
6049defm : avx512_var_shift_lowering<avx512vl_i64_info, "VPSRAVQ", X86vsrav, [HasAVX512, NoVLX]>;
6050defm : avx512_var_shift_lowering<avx512vl_i16_info, "VPSLLVW", X86vshlv, [HasBWI, NoVLX]>;
6051defm : avx512_var_shift_lowering<avx512vl_i16_info, "VPSRAVW", X86vsrav, [HasBWI, NoVLX]>;
6052defm : avx512_var_shift_lowering<avx512vl_i16_info, "VPSRLVW", X86vsrlv, [HasBWI, NoVLX]>;
6053
6054
6055// Use 512bit VPROL/VPROLI version to implement v2i64/v4i64 + v4i32/v8i32 in case NoVLX.
6056let Predicates = [HasAVX512, NoVLX] in {
6057  def : Pat<(v2i64 (rotl (v2i64 VR128X:$src1), (v2i64 VR128X:$src2))),
6058            (EXTRACT_SUBREG (v8i64
6059              (VPROLVQZrr
6060                (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR128X:$src1, sub_xmm)),
6061                (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR128X:$src2, sub_xmm)))),
6062                       sub_xmm)>;
6063  def : Pat<(v4i64 (rotl (v4i64 VR256X:$src1), (v4i64 VR256X:$src2))),
6064            (EXTRACT_SUBREG (v8i64
6065              (VPROLVQZrr
6066                (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src1, sub_ymm)),
6067                (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src2, sub_ymm)))),
6068                       sub_ymm)>;
6069
6070  def : Pat<(v4i32 (rotl (v4i32 VR128X:$src1), (v4i32 VR128X:$src2))),
6071            (EXTRACT_SUBREG (v16i32
6072              (VPROLVDZrr
6073                (v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR128X:$src1, sub_xmm)),
6074                (v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR128X:$src2, sub_xmm)))),
6075                        sub_xmm)>;
6076  def : Pat<(v8i32 (rotl (v8i32 VR256X:$src1), (v8i32 VR256X:$src2))),
6077            (EXTRACT_SUBREG (v16i32
6078              (VPROLVDZrr
6079                (v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src1, sub_ymm)),
6080                (v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src2, sub_ymm)))),
6081                        sub_ymm)>;
6082
6083  def : Pat<(v2i64 (X86vrotli (v2i64 VR128X:$src1), (i8 timm:$src2))),
6084            (EXTRACT_SUBREG (v8i64
6085              (VPROLQZri
6086                (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR128X:$src1, sub_xmm)),
6087                        timm:$src2)), sub_xmm)>;
6088  def : Pat<(v4i64 (X86vrotli (v4i64 VR256X:$src1), (i8 timm:$src2))),
6089            (EXTRACT_SUBREG (v8i64
6090              (VPROLQZri
6091                (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src1, sub_ymm)),
6092                       timm:$src2)), sub_ymm)>;
6093
6094  def : Pat<(v4i32 (X86vrotli (v4i32 VR128X:$src1), (i8 timm:$src2))),
6095            (EXTRACT_SUBREG (v16i32
6096              (VPROLDZri
6097                (v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR128X:$src1, sub_xmm)),
6098                        timm:$src2)), sub_xmm)>;
6099  def : Pat<(v8i32 (X86vrotli (v8i32 VR256X:$src1), (i8 timm:$src2))),
6100            (EXTRACT_SUBREG (v16i32
6101              (VPROLDZri
6102                (v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src1, sub_ymm)),
6103                        timm:$src2)), sub_ymm)>;
6104}
6105
6106// Use 512bit VPROR/VPRORI version to implement v2i64/v4i64 + v4i32/v8i32 in case NoVLX.
6107let Predicates = [HasAVX512, NoVLX] in {
6108  def : Pat<(v2i64 (rotr (v2i64 VR128X:$src1), (v2i64 VR128X:$src2))),
6109            (EXTRACT_SUBREG (v8i64
6110              (VPRORVQZrr
6111                (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR128X:$src1, sub_xmm)),
6112                (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR128X:$src2, sub_xmm)))),
6113                       sub_xmm)>;
6114  def : Pat<(v4i64 (rotr (v4i64 VR256X:$src1), (v4i64 VR256X:$src2))),
6115            (EXTRACT_SUBREG (v8i64
6116              (VPRORVQZrr
6117                (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src1, sub_ymm)),
6118                (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src2, sub_ymm)))),
6119                       sub_ymm)>;
6120
6121  def : Pat<(v4i32 (rotr (v4i32 VR128X:$src1), (v4i32 VR128X:$src2))),
6122            (EXTRACT_SUBREG (v16i32
6123              (VPRORVDZrr
6124                (v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR128X:$src1, sub_xmm)),
6125                (v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR128X:$src2, sub_xmm)))),
6126                        sub_xmm)>;
6127  def : Pat<(v8i32 (rotr (v8i32 VR256X:$src1), (v8i32 VR256X:$src2))),
6128            (EXTRACT_SUBREG (v16i32
6129              (VPRORVDZrr
6130                (v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src1, sub_ymm)),
6131                (v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src2, sub_ymm)))),
6132                        sub_ymm)>;
6133
6134  def : Pat<(v2i64 (X86vrotri (v2i64 VR128X:$src1), (i8 timm:$src2))),
6135            (EXTRACT_SUBREG (v8i64
6136              (VPRORQZri
6137                (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR128X:$src1, sub_xmm)),
6138                        timm:$src2)), sub_xmm)>;
6139  def : Pat<(v4i64 (X86vrotri (v4i64 VR256X:$src1), (i8 timm:$src2))),
6140            (EXTRACT_SUBREG (v8i64
6141              (VPRORQZri
6142                (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src1, sub_ymm)),
6143                       timm:$src2)), sub_ymm)>;
6144
6145  def : Pat<(v4i32 (X86vrotri (v4i32 VR128X:$src1), (i8 timm:$src2))),
6146            (EXTRACT_SUBREG (v16i32
6147              (VPRORDZri
6148                (v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR128X:$src1, sub_xmm)),
6149                        timm:$src2)), sub_xmm)>;
6150  def : Pat<(v8i32 (X86vrotri (v8i32 VR256X:$src1), (i8 timm:$src2))),
6151            (EXTRACT_SUBREG (v16i32
6152              (VPRORDZri
6153                (v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src1, sub_ymm)),
6154                        timm:$src2)), sub_ymm)>;
6155}
6156
6157//===-------------------------------------------------------------------===//
6158// 1-src variable permutation VPERMW/D/Q
6159//===-------------------------------------------------------------------===//
6160
6161multiclass avx512_vperm_dq_sizes<bits<8> opc, string OpcodeStr, SDNode OpNode,
6162                                 X86FoldableSchedWrite sched, AVX512VLVectorVTInfo _> {
6163  let Predicates  = [HasAVX512] in
6164  defm Z : avx512_var_shift<opc, OpcodeStr, OpNode, sched, _.info512>,
6165           avx512_var_shift_mb<opc, OpcodeStr, OpNode, sched, _.info512>, EVEX_V512;
6166
6167  let Predicates = [HasAVX512, HasVLX] in
6168  defm Z256 : avx512_var_shift<opc, OpcodeStr, OpNode, sched, _.info256>,
6169              avx512_var_shift_mb<opc, OpcodeStr, OpNode, sched, _.info256>, EVEX_V256;
6170}
6171
6172multiclass avx512_vpermi_dq_sizes<bits<8> opc, Format ImmFormR, Format ImmFormM,
6173                                 string OpcodeStr, SDNode OpNode,
6174                                 X86FoldableSchedWrite sched, AVX512VLVectorVTInfo VTInfo> {
6175  let Predicates = [HasAVX512] in
6176  defm Z:    avx512_shift_rmi<opc, ImmFormR, ImmFormM, OpcodeStr, OpNode,
6177                              sched, VTInfo.info512>,
6178             avx512_shift_rmbi<opc, ImmFormM, OpcodeStr, OpNode,
6179                               sched, VTInfo.info512>, EVEX_V512;
6180  let Predicates = [HasAVX512, HasVLX] in
6181  defm Z256: avx512_shift_rmi<opc, ImmFormR, ImmFormM, OpcodeStr, OpNode,
6182                              sched, VTInfo.info256>,
6183             avx512_shift_rmbi<opc, ImmFormM, OpcodeStr, OpNode,
6184                               sched, VTInfo.info256>, EVEX_V256;
6185}
6186
6187multiclass avx512_vperm_bw<bits<8> opc, string OpcodeStr,
6188                              Predicate prd, SDNode OpNode,
6189                              X86FoldableSchedWrite sched, AVX512VLVectorVTInfo _> {
6190  let Predicates = [prd] in
6191  defm Z:    avx512_var_shift<opc, OpcodeStr, OpNode, sched, _.info512>,
6192              EVEX_V512 ;
6193  let Predicates = [HasVLX, prd] in {
6194  defm Z256: avx512_var_shift<opc, OpcodeStr, OpNode, sched, _.info256>,
6195              EVEX_V256 ;
6196  defm Z128: avx512_var_shift<opc, OpcodeStr, OpNode, sched, _.info128>,
6197              EVEX_V128 ;
6198  }
6199}
6200
6201defm VPERMW  : avx512_vperm_bw<0x8D, "vpermw", HasBWI, X86VPermv,
6202                               WriteVarShuffle256, avx512vl_i16_info>, VEX_W;
6203defm VPERMB  : avx512_vperm_bw<0x8D, "vpermb", HasVBMI, X86VPermv,
6204                               WriteVarShuffle256, avx512vl_i8_info>;
6205
6206defm VPERMD : avx512_vperm_dq_sizes<0x36, "vpermd", X86VPermv,
6207                                    WriteVarShuffle256, avx512vl_i32_info>;
6208defm VPERMQ : avx512_vperm_dq_sizes<0x36, "vpermq", X86VPermv,
6209                                    WriteVarShuffle256, avx512vl_i64_info>, VEX_W;
6210defm VPERMPS : avx512_vperm_dq_sizes<0x16, "vpermps", X86VPermv,
6211                                     WriteFVarShuffle256, avx512vl_f32_info>;
6212defm VPERMPD : avx512_vperm_dq_sizes<0x16, "vpermpd", X86VPermv,
6213                                     WriteFVarShuffle256, avx512vl_f64_info>, VEX_W;
6214
6215defm VPERMQ : avx512_vpermi_dq_sizes<0x00, MRMSrcReg, MRMSrcMem, "vpermq",
6216                             X86VPermi, WriteShuffle256, avx512vl_i64_info>,
6217                             EVEX, AVX512AIi8Base, EVEX_CD8<64, CD8VF>, VEX_W;
6218defm VPERMPD : avx512_vpermi_dq_sizes<0x01, MRMSrcReg, MRMSrcMem, "vpermpd",
6219                             X86VPermi, WriteFShuffle256, avx512vl_f64_info>,
6220                             EVEX, AVX512AIi8Base, EVEX_CD8<64, CD8VF>, VEX_W;
6221
6222//===----------------------------------------------------------------------===//
6223// AVX-512 - VPERMIL
6224//===----------------------------------------------------------------------===//
6225
6226multiclass avx512_permil_vec<bits<8> OpcVar, string OpcodeStr, SDNode OpNode,
6227                             X86FoldableSchedWrite sched, X86VectorVTInfo _,
6228                             X86VectorVTInfo Ctrl> {
6229  defm rr: AVX512_maskable<OpcVar, MRMSrcReg, _, (outs _.RC:$dst),
6230                  (ins _.RC:$src1, Ctrl.RC:$src2), OpcodeStr,
6231                  "$src2, $src1", "$src1, $src2",
6232                  (_.VT (OpNode _.RC:$src1,
6233                               (Ctrl.VT Ctrl.RC:$src2)))>,
6234                  T8PD, EVEX_4V, Sched<[sched]>;
6235  defm rm: AVX512_maskable<OpcVar, MRMSrcMem, _, (outs _.RC:$dst),
6236                  (ins _.RC:$src1, Ctrl.MemOp:$src2), OpcodeStr,
6237                  "$src2, $src1", "$src1, $src2",
6238                  (_.VT (OpNode
6239                           _.RC:$src1,
6240                           (Ctrl.VT (Ctrl.LdFrag addr:$src2))))>,
6241                  T8PD, EVEX_4V, EVEX_CD8<_.EltSize, CD8VF>,
6242                  Sched<[sched.Folded, sched.ReadAfterFold]>;
6243  defm rmb: AVX512_maskable<OpcVar, MRMSrcMem, _, (outs _.RC:$dst),
6244                   (ins _.RC:$src1, _.ScalarMemOp:$src2), OpcodeStr,
6245                   "${src2}"#_.BroadcastStr#", $src1",
6246                   "$src1, ${src2}"#_.BroadcastStr,
6247                   (_.VT (OpNode
6248                            _.RC:$src1,
6249                            (Ctrl.VT (Ctrl.BroadcastLdFrag addr:$src2))))>,
6250                   T8PD, EVEX_4V, EVEX_B, EVEX_CD8<_.EltSize, CD8VF>,
6251                   Sched<[sched.Folded, sched.ReadAfterFold]>;
6252}
6253
6254multiclass avx512_permil_vec_common<string OpcodeStr, bits<8> OpcVar,
6255                                    X86SchedWriteWidths sched,
6256                                    AVX512VLVectorVTInfo _,
6257                                    AVX512VLVectorVTInfo Ctrl> {
6258  let Predicates = [HasAVX512] in {
6259    defm Z    : avx512_permil_vec<OpcVar, OpcodeStr, X86VPermilpv, sched.ZMM,
6260                                  _.info512, Ctrl.info512>, EVEX_V512;
6261  }
6262  let Predicates = [HasAVX512, HasVLX] in {
6263    defm Z128 : avx512_permil_vec<OpcVar, OpcodeStr, X86VPermilpv, sched.XMM,
6264                                  _.info128, Ctrl.info128>, EVEX_V128;
6265    defm Z256 : avx512_permil_vec<OpcVar, OpcodeStr, X86VPermilpv, sched.YMM,
6266                                  _.info256, Ctrl.info256>, EVEX_V256;
6267  }
6268}
6269
6270multiclass avx512_permil<string OpcodeStr, bits<8> OpcImm, bits<8> OpcVar,
6271                         AVX512VLVectorVTInfo _, AVX512VLVectorVTInfo Ctrl>{
6272  defm NAME: avx512_permil_vec_common<OpcodeStr, OpcVar, SchedWriteFVarShuffle,
6273                                      _, Ctrl>;
6274  defm NAME: avx512_shift_rmi_sizes<OpcImm, MRMSrcReg, MRMSrcMem, OpcodeStr,
6275                                    X86VPermilpi, SchedWriteFShuffle, _>,
6276                    EVEX, AVX512AIi8Base, EVEX_CD8<_.info128.EltSize, CD8VF>;
6277}
6278
6279let ExeDomain = SSEPackedSingle in
6280defm VPERMILPS : avx512_permil<"vpermilps", 0x04, 0x0C, avx512vl_f32_info,
6281                               avx512vl_i32_info>;
6282let ExeDomain = SSEPackedDouble in
6283defm VPERMILPD : avx512_permil<"vpermilpd", 0x05, 0x0D, avx512vl_f64_info,
6284                               avx512vl_i64_info>, VEX_W1X;
6285
6286//===----------------------------------------------------------------------===//
6287// AVX-512 - VPSHUFD, VPSHUFLW, VPSHUFHW
6288//===----------------------------------------------------------------------===//
6289
6290defm VPSHUFD : avx512_shift_rmi_sizes<0x70, MRMSrcReg, MRMSrcMem, "vpshufd",
6291                             X86PShufd, SchedWriteShuffle, avx512vl_i32_info>,
6292                             EVEX, AVX512BIi8Base, EVEX_CD8<32, CD8VF>;
6293defm VPSHUFH : avx512_shift_rmi_w<0x70, MRMSrcReg, MRMSrcMem, "vpshufhw",
6294                                  X86PShufhw, SchedWriteShuffle>,
6295                                  EVEX, AVX512XSIi8Base;
6296defm VPSHUFL : avx512_shift_rmi_w<0x70, MRMSrcReg, MRMSrcMem, "vpshuflw",
6297                                  X86PShuflw, SchedWriteShuffle>,
6298                                  EVEX, AVX512XDIi8Base;
6299
6300//===----------------------------------------------------------------------===//
6301// AVX-512 - VPSHUFB
6302//===----------------------------------------------------------------------===//
6303
6304multiclass avx512_pshufb_sizes<bits<8> opc, string OpcodeStr, SDNode OpNode,
6305                               X86SchedWriteWidths sched> {
6306  let Predicates = [HasBWI] in
6307  defm Z:    avx512_var_shift<opc, OpcodeStr, OpNode, sched.ZMM, v64i8_info>,
6308                              EVEX_V512;
6309
6310  let Predicates = [HasVLX, HasBWI] in {
6311  defm Z256: avx512_var_shift<opc, OpcodeStr, OpNode, sched.YMM, v32i8x_info>,
6312                              EVEX_V256;
6313  defm Z128: avx512_var_shift<opc, OpcodeStr, OpNode, sched.XMM, v16i8x_info>,
6314                              EVEX_V128;
6315  }
6316}
6317
6318defm VPSHUFB: avx512_pshufb_sizes<0x00, "vpshufb", X86pshufb,
6319                                  SchedWriteVarShuffle>, VEX_WIG;
6320
6321//===----------------------------------------------------------------------===//
6322// Move Low to High and High to Low packed FP Instructions
6323//===----------------------------------------------------------------------===//
6324
6325def VMOVLHPSZrr : AVX512PSI<0x16, MRMSrcReg, (outs VR128X:$dst),
6326          (ins VR128X:$src1, VR128X:$src2),
6327          "vmovlhps\t{$src2, $src1, $dst|$dst, $src1, $src2}",
6328          [(set VR128X:$dst, (v4f32 (X86Movlhps VR128X:$src1, VR128X:$src2)))]>,
6329          Sched<[SchedWriteFShuffle.XMM]>, EVEX_4V;
6330let isCommutable = 1 in
6331def VMOVHLPSZrr : AVX512PSI<0x12, MRMSrcReg, (outs VR128X:$dst),
6332          (ins VR128X:$src1, VR128X:$src2),
6333          "vmovhlps\t{$src2, $src1, $dst|$dst, $src1, $src2}",
6334          [(set VR128X:$dst, (v4f32 (X86Movhlps VR128X:$src1, VR128X:$src2)))]>,
6335          Sched<[SchedWriteFShuffle.XMM]>, EVEX_4V, NotMemoryFoldable;
6336
6337//===----------------------------------------------------------------------===//
6338// VMOVHPS/PD VMOVLPS Instructions
6339// All patterns was taken from SSS implementation.
6340//===----------------------------------------------------------------------===//
6341
6342multiclass avx512_mov_hilo_packed<bits<8> opc, string OpcodeStr,
6343                                  SDPatternOperator OpNode,
6344                                  X86VectorVTInfo _> {
6345  let hasSideEffects = 0, mayLoad = 1, ExeDomain = _.ExeDomain in
6346  def rm : AVX512<opc, MRMSrcMem, (outs _.RC:$dst),
6347                  (ins _.RC:$src1, f64mem:$src2),
6348                  !strconcat(OpcodeStr,
6349                             "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
6350                  [(set _.RC:$dst,
6351                     (OpNode _.RC:$src1,
6352                       (_.VT (bitconvert
6353                         (v2f64 (scalar_to_vector (loadf64 addr:$src2)))))))]>,
6354                  Sched<[SchedWriteFShuffle.XMM.Folded, SchedWriteFShuffle.XMM.ReadAfterFold]>, EVEX_4V;
6355}
6356
6357// No patterns for MOVLPS/MOVHPS as the Movlhps node should only be created in
6358// SSE1. And MOVLPS pattern is even more complex.
6359defm VMOVHPSZ128 : avx512_mov_hilo_packed<0x16, "vmovhps", null_frag,
6360                                  v4f32x_info>, EVEX_CD8<32, CD8VT2>, PS;
6361defm VMOVHPDZ128 : avx512_mov_hilo_packed<0x16, "vmovhpd", X86Unpckl,
6362                                  v2f64x_info>, EVEX_CD8<64, CD8VT1>, PD, VEX_W;
6363defm VMOVLPSZ128 : avx512_mov_hilo_packed<0x12, "vmovlps", null_frag,
6364                                  v4f32x_info>, EVEX_CD8<32, CD8VT2>, PS;
6365defm VMOVLPDZ128 : avx512_mov_hilo_packed<0x12, "vmovlpd", X86Movsd,
6366                                  v2f64x_info>, EVEX_CD8<64, CD8VT1>, PD, VEX_W;
6367
6368let Predicates = [HasAVX512] in {
6369  // VMOVHPD patterns
6370  def : Pat<(v2f64 (X86Unpckl VR128X:$src1, (X86vzload64 addr:$src2))),
6371            (VMOVHPDZ128rm VR128X:$src1, addr:$src2)>;
6372
6373  // VMOVLPD patterns
6374  def : Pat<(v2f64 (X86Movsd VR128X:$src1, (X86vzload64 addr:$src2))),
6375            (VMOVLPDZ128rm VR128X:$src1, addr:$src2)>;
6376}
6377
6378let SchedRW = [WriteFStore] in {
6379let mayStore = 1, hasSideEffects = 0 in
6380def VMOVHPSZ128mr : AVX512PSI<0x17, MRMDestMem, (outs),
6381                       (ins f64mem:$dst, VR128X:$src),
6382                       "vmovhps\t{$src, $dst|$dst, $src}",
6383                       []>, EVEX, EVEX_CD8<32, CD8VT2>;
6384def VMOVHPDZ128mr : AVX512PDI<0x17, MRMDestMem, (outs),
6385                       (ins f64mem:$dst, VR128X:$src),
6386                       "vmovhpd\t{$src, $dst|$dst, $src}",
6387                       [(store (f64 (extractelt
6388                                     (v2f64 (X86Unpckh VR128X:$src, VR128X:$src)),
6389                                     (iPTR 0))), addr:$dst)]>,
6390                       EVEX, EVEX_CD8<64, CD8VT1>, VEX_W;
6391let mayStore = 1, hasSideEffects = 0 in
6392def VMOVLPSZ128mr : AVX512PSI<0x13, MRMDestMem, (outs),
6393                       (ins f64mem:$dst, VR128X:$src),
6394                       "vmovlps\t{$src, $dst|$dst, $src}",
6395                       []>, EVEX, EVEX_CD8<32, CD8VT2>;
6396def VMOVLPDZ128mr : AVX512PDI<0x13, MRMDestMem, (outs),
6397                       (ins f64mem:$dst, VR128X:$src),
6398                       "vmovlpd\t{$src, $dst|$dst, $src}",
6399                       [(store (f64 (extractelt (v2f64 VR128X:$src),
6400                                     (iPTR 0))), addr:$dst)]>,
6401                       EVEX, EVEX_CD8<64, CD8VT1>, VEX_W;
6402} // SchedRW
6403
6404let Predicates = [HasAVX512] in {
6405  // VMOVHPD patterns
6406  def : Pat<(store (f64 (extractelt
6407                           (v2f64 (X86VPermilpi VR128X:$src, (i8 1))),
6408                           (iPTR 0))), addr:$dst),
6409           (VMOVHPDZ128mr addr:$dst, VR128X:$src)>;
6410}
6411//===----------------------------------------------------------------------===//
6412// FMA - Fused Multiply Operations
6413//
6414
6415multiclass avx512_fma3p_213_rm<bits<8> opc, string OpcodeStr, SDNode OpNode,
6416                               SDNode MaskOpNode, X86FoldableSchedWrite sched,
6417                               X86VectorVTInfo _, string Suff> {
6418  let Constraints = "$src1 = $dst", ExeDomain = _.ExeDomain, hasSideEffects = 0,
6419      Uses = [MXCSR], mayRaiseFPException = 1 in {
6420  defm r: AVX512_maskable_fma<opc, MRMSrcReg, _, (outs _.RC:$dst),
6421          (ins _.RC:$src2, _.RC:$src3),
6422          OpcodeStr, "$src3, $src2", "$src2, $src3",
6423          (_.VT (OpNode _.RC:$src2, _.RC:$src1, _.RC:$src3)),
6424          (_.VT (MaskOpNode _.RC:$src2, _.RC:$src1, _.RC:$src3)), 1, 1>,
6425          AVX512FMA3Base, Sched<[sched]>;
6426
6427  defm m: AVX512_maskable_fma<opc, MRMSrcMem, _, (outs _.RC:$dst),
6428          (ins _.RC:$src2, _.MemOp:$src3),
6429          OpcodeStr, "$src3, $src2", "$src2, $src3",
6430          (_.VT (OpNode _.RC:$src2, _.RC:$src1, (_.LdFrag addr:$src3))),
6431          (_.VT (MaskOpNode _.RC:$src2, _.RC:$src1, (_.LdFrag addr:$src3))), 1, 0>,
6432          AVX512FMA3Base, Sched<[sched.Folded, sched.ReadAfterFold]>;
6433
6434  defm mb: AVX512_maskable_fma<opc, MRMSrcMem, _, (outs _.RC:$dst),
6435            (ins _.RC:$src2, _.ScalarMemOp:$src3),
6436            OpcodeStr,   !strconcat("${src3}", _.BroadcastStr,", $src2"),
6437            !strconcat("$src2, ${src3}", _.BroadcastStr ),
6438            (OpNode _.RC:$src2,
6439             _.RC:$src1,(_.VT (_.BroadcastLdFrag addr:$src3))),
6440            (MaskOpNode _.RC:$src2,
6441             _.RC:$src1,(_.VT (_.BroadcastLdFrag addr:$src3))), 1, 0>,
6442            AVX512FMA3Base, EVEX_B, Sched<[sched.Folded, sched.ReadAfterFold]>;
6443  }
6444}
6445
6446multiclass avx512_fma3_213_round<bits<8> opc, string OpcodeStr, SDNode OpNode,
6447                                 X86FoldableSchedWrite sched,
6448                                 X86VectorVTInfo _, string Suff> {
6449  let Constraints = "$src1 = $dst", ExeDomain = _.ExeDomain, hasSideEffects = 0,
6450      Uses = [MXCSR] in
6451  defm rb: AVX512_maskable_fma<opc, MRMSrcReg, _, (outs _.RC:$dst),
6452          (ins _.RC:$src2, _.RC:$src3, AVX512RC:$rc),
6453          OpcodeStr, "$rc, $src3, $src2", "$src2, $src3, $rc",
6454          (_.VT ( OpNode _.RC:$src2, _.RC:$src1, _.RC:$src3, (i32 timm:$rc))),
6455          (_.VT ( OpNode _.RC:$src2, _.RC:$src1, _.RC:$src3, (i32 timm:$rc))), 1, 1>,
6456          AVX512FMA3Base, EVEX_B, EVEX_RC, Sched<[sched]>;
6457}
6458
6459multiclass avx512_fma3p_213_common<bits<8> opc, string OpcodeStr, SDNode OpNode,
6460                                   SDNode MaskOpNode, SDNode OpNodeRnd,
6461                                   X86SchedWriteWidths sched,
6462                                   AVX512VLVectorVTInfo _, string Suff> {
6463  let Predicates = [HasAVX512] in {
6464    defm Z      : avx512_fma3p_213_rm<opc, OpcodeStr, OpNode, MaskOpNode,
6465                                      sched.ZMM, _.info512, Suff>,
6466                  avx512_fma3_213_round<opc, OpcodeStr, OpNodeRnd, sched.ZMM,
6467                                        _.info512, Suff>,
6468                              EVEX_V512, EVEX_CD8<_.info512.EltSize, CD8VF>;
6469  }
6470  let Predicates = [HasVLX, HasAVX512] in {
6471    defm Z256 : avx512_fma3p_213_rm<opc, OpcodeStr, OpNode, MaskOpNode,
6472                                    sched.YMM, _.info256, Suff>,
6473                      EVEX_V256, EVEX_CD8<_.info256.EltSize, CD8VF>;
6474    defm Z128 : avx512_fma3p_213_rm<opc, OpcodeStr, OpNode, MaskOpNode,
6475                                    sched.XMM, _.info128, Suff>,
6476                      EVEX_V128, EVEX_CD8<_.info128.EltSize, CD8VF>;
6477  }
6478}
6479
6480multiclass avx512_fma3p_213_f<bits<8> opc, string OpcodeStr, SDNode OpNode,
6481                              SDNode MaskOpNode, SDNode OpNodeRnd> {
6482    defm PS : avx512_fma3p_213_common<opc, OpcodeStr#"ps", OpNode, MaskOpNode,
6483                                      OpNodeRnd, SchedWriteFMA,
6484                                      avx512vl_f32_info, "PS">;
6485    defm PD : avx512_fma3p_213_common<opc, OpcodeStr#"pd", OpNode, MaskOpNode,
6486                                      OpNodeRnd, SchedWriteFMA,
6487                                      avx512vl_f64_info, "PD">, VEX_W;
6488}
6489
6490defm VFMADD213    : avx512_fma3p_213_f<0xA8, "vfmadd213", X86any_Fmadd,
6491                                       X86Fmadd, X86FmaddRnd>;
6492defm VFMSUB213    : avx512_fma3p_213_f<0xAA, "vfmsub213", X86any_Fmsub,
6493                                       X86Fmsub, X86FmsubRnd>;
6494defm VFMADDSUB213 : avx512_fma3p_213_f<0xA6, "vfmaddsub213", X86Fmaddsub,
6495                                       X86Fmaddsub, X86FmaddsubRnd>;
6496defm VFMSUBADD213 : avx512_fma3p_213_f<0xA7, "vfmsubadd213", X86Fmsubadd,
6497                                       X86Fmsubadd, X86FmsubaddRnd>;
6498defm VFNMADD213   : avx512_fma3p_213_f<0xAC, "vfnmadd213", X86any_Fnmadd,
6499                                       X86Fnmadd, X86FnmaddRnd>;
6500defm VFNMSUB213   : avx512_fma3p_213_f<0xAE, "vfnmsub213", X86any_Fnmsub,
6501                                       X86Fnmsub, X86FnmsubRnd>;
6502
6503
6504multiclass avx512_fma3p_231_rm<bits<8> opc, string OpcodeStr, SDNode OpNode,
6505                               SDNode MaskOpNode, X86FoldableSchedWrite sched,
6506                               X86VectorVTInfo _, string Suff> {
6507  let Constraints = "$src1 = $dst", ExeDomain = _.ExeDomain, hasSideEffects = 0,
6508      Uses = [MXCSR], mayRaiseFPException = 1 in {
6509  defm r: AVX512_maskable_fma<opc, MRMSrcReg, _, (outs _.RC:$dst),
6510          (ins _.RC:$src2, _.RC:$src3),
6511          OpcodeStr, "$src3, $src2", "$src2, $src3",
6512          (null_frag),
6513          (_.VT (MaskOpNode _.RC:$src2, _.RC:$src3, _.RC:$src1)), 1, 1>,
6514          AVX512FMA3Base, Sched<[sched]>;
6515
6516  defm m: AVX512_maskable_fma<opc, MRMSrcMem, _, (outs _.RC:$dst),
6517          (ins _.RC:$src2, _.MemOp:$src3),
6518          OpcodeStr, "$src3, $src2", "$src2, $src3",
6519          (_.VT (OpNode _.RC:$src2, (_.LdFrag addr:$src3), _.RC:$src1)),
6520          (_.VT (MaskOpNode _.RC:$src2, (_.LdFrag addr:$src3), _.RC:$src1)), 1, 0>,
6521          AVX512FMA3Base, Sched<[sched.Folded, sched.ReadAfterFold]>;
6522
6523  defm mb: AVX512_maskable_fma<opc, MRMSrcMem, _, (outs _.RC:$dst),
6524         (ins _.RC:$src2, _.ScalarMemOp:$src3),
6525         OpcodeStr, "${src3}"#_.BroadcastStr#", $src2",
6526         "$src2, ${src3}"#_.BroadcastStr,
6527         (_.VT (OpNode _.RC:$src2,
6528                      (_.VT (_.BroadcastLdFrag addr:$src3)),
6529                      _.RC:$src1)),
6530         (_.VT (MaskOpNode _.RC:$src2,
6531                           (_.VT (_.BroadcastLdFrag addr:$src3)),
6532                           _.RC:$src1)), 1, 0>, AVX512FMA3Base, EVEX_B,
6533         Sched<[sched.Folded, sched.ReadAfterFold]>;
6534  }
6535}
6536
6537multiclass avx512_fma3_231_round<bits<8> opc, string OpcodeStr, SDNode OpNode,
6538                                 X86FoldableSchedWrite sched,
6539                                 X86VectorVTInfo _, string Suff> {
6540  let Constraints = "$src1 = $dst", ExeDomain = _.ExeDomain, hasSideEffects = 0,
6541      Uses = [MXCSR] in
6542  defm rb: AVX512_maskable_fma<opc, MRMSrcReg, _, (outs _.RC:$dst),
6543          (ins _.RC:$src2, _.RC:$src3, AVX512RC:$rc),
6544          OpcodeStr, "$rc, $src3, $src2", "$src2, $src3, $rc",
6545          (null_frag),
6546          (_.VT (OpNode _.RC:$src2, _.RC:$src3, _.RC:$src1, (i32 timm:$rc))),
6547          1, 1>, AVX512FMA3Base, EVEX_B, EVEX_RC, Sched<[sched]>;
6548}
6549
6550multiclass avx512_fma3p_231_common<bits<8> opc, string OpcodeStr, SDNode OpNode,
6551                                   SDNode MaskOpNode, SDNode OpNodeRnd,
6552                                   X86SchedWriteWidths sched,
6553                                   AVX512VLVectorVTInfo _, string Suff> {
6554  let Predicates = [HasAVX512] in {
6555    defm Z      : avx512_fma3p_231_rm<opc, OpcodeStr, OpNode, MaskOpNode,
6556                                      sched.ZMM, _.info512, Suff>,
6557                  avx512_fma3_231_round<opc, OpcodeStr, OpNodeRnd, sched.ZMM,
6558                                        _.info512, Suff>,
6559                              EVEX_V512, EVEX_CD8<_.info512.EltSize, CD8VF>;
6560  }
6561  let Predicates = [HasVLX, HasAVX512] in {
6562    defm Z256 : avx512_fma3p_231_rm<opc, OpcodeStr, OpNode, MaskOpNode,
6563                                    sched.YMM, _.info256, Suff>,
6564                      EVEX_V256, EVEX_CD8<_.info256.EltSize, CD8VF>;
6565    defm Z128 : avx512_fma3p_231_rm<opc, OpcodeStr, OpNode, MaskOpNode,
6566                                    sched.XMM, _.info128, Suff>,
6567                      EVEX_V128, EVEX_CD8<_.info128.EltSize, CD8VF>;
6568  }
6569}
6570
6571multiclass avx512_fma3p_231_f<bits<8> opc, string OpcodeStr, SDNode OpNode,
6572                              SDNode MaskOpNode, SDNode OpNodeRnd > {
6573    defm PS : avx512_fma3p_231_common<opc, OpcodeStr#"ps", OpNode, MaskOpNode,
6574                                      OpNodeRnd, SchedWriteFMA,
6575                                      avx512vl_f32_info, "PS">;
6576    defm PD : avx512_fma3p_231_common<opc, OpcodeStr#"pd", OpNode, MaskOpNode,
6577                                      OpNodeRnd, SchedWriteFMA,
6578                                      avx512vl_f64_info, "PD">, VEX_W;
6579}
6580
6581defm VFMADD231    : avx512_fma3p_231_f<0xB8, "vfmadd231", X86any_Fmadd,
6582                                       X86Fmadd, X86FmaddRnd>;
6583defm VFMSUB231    : avx512_fma3p_231_f<0xBA, "vfmsub231", X86any_Fmsub,
6584                                       X86Fmsub, X86FmsubRnd>;
6585defm VFMADDSUB231 : avx512_fma3p_231_f<0xB6, "vfmaddsub231", X86Fmaddsub,
6586                                       X86Fmaddsub, X86FmaddsubRnd>;
6587defm VFMSUBADD231 : avx512_fma3p_231_f<0xB7, "vfmsubadd231", X86Fmsubadd,
6588                                       X86Fmsubadd, X86FmsubaddRnd>;
6589defm VFNMADD231   : avx512_fma3p_231_f<0xBC, "vfnmadd231", X86any_Fnmadd,
6590                                       X86Fnmadd, X86FnmaddRnd>;
6591defm VFNMSUB231   : avx512_fma3p_231_f<0xBE, "vfnmsub231", X86any_Fnmsub,
6592                                       X86Fnmsub, X86FnmsubRnd>;
6593
6594multiclass avx512_fma3p_132_rm<bits<8> opc, string OpcodeStr, SDNode OpNode,
6595                               SDNode MaskOpNode, X86FoldableSchedWrite sched,
6596                               X86VectorVTInfo _, string Suff> {
6597  let Constraints = "$src1 = $dst", ExeDomain = _.ExeDomain, hasSideEffects = 0,
6598      Uses = [MXCSR], mayRaiseFPException = 1 in {
6599  defm r: AVX512_maskable_fma<opc, MRMSrcReg, _, (outs _.RC:$dst),
6600          (ins _.RC:$src2, _.RC:$src3),
6601          OpcodeStr, "$src3, $src2", "$src2, $src3",
6602          (null_frag),
6603          (_.VT (MaskOpNode _.RC:$src1, _.RC:$src3, _.RC:$src2)), 1, 1>,
6604          AVX512FMA3Base, Sched<[sched]>;
6605
6606  // Pattern is 312 order so that the load is in a different place from the
6607  // 213 and 231 patterns this helps tablegen's duplicate pattern detection.
6608  defm m: AVX512_maskable_fma<opc, MRMSrcMem, _, (outs _.RC:$dst),
6609          (ins _.RC:$src2, _.MemOp:$src3),
6610          OpcodeStr, "$src3, $src2", "$src2, $src3",
6611          (_.VT (OpNode (_.LdFrag addr:$src3), _.RC:$src1, _.RC:$src2)),
6612          (_.VT (MaskOpNode (_.LdFrag addr:$src3), _.RC:$src1, _.RC:$src2)), 1, 0>,
6613          AVX512FMA3Base, Sched<[sched.Folded, sched.ReadAfterFold]>;
6614
6615  // Pattern is 312 order so that the load is in a different place from the
6616  // 213 and 231 patterns this helps tablegen's duplicate pattern detection.
6617  defm mb: AVX512_maskable_fma<opc, MRMSrcMem, _, (outs _.RC:$dst),
6618         (ins _.RC:$src2, _.ScalarMemOp:$src3),
6619         OpcodeStr, "${src3}"#_.BroadcastStr#", $src2",
6620         "$src2, ${src3}"#_.BroadcastStr,
6621         (_.VT (OpNode (_.VT (_.BroadcastLdFrag addr:$src3)),
6622                       _.RC:$src1, _.RC:$src2)),
6623         (_.VT (MaskOpNode (_.VT (_.BroadcastLdFrag addr:$src3)),
6624                           _.RC:$src1, _.RC:$src2)), 1, 0>,
6625         AVX512FMA3Base, EVEX_B, Sched<[sched.Folded, sched.ReadAfterFold]>;
6626  }
6627}
6628
6629multiclass avx512_fma3_132_round<bits<8> opc, string OpcodeStr, SDNode OpNode,
6630                                 X86FoldableSchedWrite sched,
6631                                 X86VectorVTInfo _, string Suff> {
6632  let Constraints = "$src1 = $dst", ExeDomain = _.ExeDomain, hasSideEffects = 0,
6633      Uses = [MXCSR] in
6634  defm rb: AVX512_maskable_fma<opc, MRMSrcReg, _, (outs _.RC:$dst),
6635          (ins _.RC:$src2, _.RC:$src3, AVX512RC:$rc),
6636          OpcodeStr, "$rc, $src3, $src2", "$src2, $src3, $rc",
6637          (null_frag),
6638          (_.VT (OpNode _.RC:$src1, _.RC:$src3, _.RC:$src2, (i32 timm:$rc))),
6639          1, 1>, AVX512FMA3Base, EVEX_B, EVEX_RC, Sched<[sched]>;
6640}
6641
6642multiclass avx512_fma3p_132_common<bits<8> opc, string OpcodeStr, SDNode OpNode,
6643                                   SDNode MaskOpNode, SDNode OpNodeRnd,
6644                                   X86SchedWriteWidths sched,
6645                                   AVX512VLVectorVTInfo _, string Suff> {
6646  let Predicates = [HasAVX512] in {
6647    defm Z      : avx512_fma3p_132_rm<opc, OpcodeStr, OpNode, MaskOpNode,
6648                                      sched.ZMM, _.info512, Suff>,
6649                  avx512_fma3_132_round<opc, OpcodeStr, OpNodeRnd, sched.ZMM,
6650                                        _.info512, Suff>,
6651                              EVEX_V512, EVEX_CD8<_.info512.EltSize, CD8VF>;
6652  }
6653  let Predicates = [HasVLX, HasAVX512] in {
6654    defm Z256 : avx512_fma3p_132_rm<opc, OpcodeStr, OpNode, MaskOpNode,
6655                                    sched.YMM, _.info256, Suff>,
6656                      EVEX_V256, EVEX_CD8<_.info256.EltSize, CD8VF>;
6657    defm Z128 : avx512_fma3p_132_rm<opc, OpcodeStr, OpNode, MaskOpNode,
6658                                    sched.XMM, _.info128, Suff>,
6659                      EVEX_V128, EVEX_CD8<_.info128.EltSize, CD8VF>;
6660  }
6661}
6662
6663multiclass avx512_fma3p_132_f<bits<8> opc, string OpcodeStr, SDNode OpNode,
6664                              SDNode MaskOpNode, SDNode OpNodeRnd > {
6665    defm PS : avx512_fma3p_132_common<opc, OpcodeStr#"ps", OpNode, MaskOpNode,
6666                                      OpNodeRnd, SchedWriteFMA,
6667                                      avx512vl_f32_info, "PS">;
6668    defm PD : avx512_fma3p_132_common<opc, OpcodeStr#"pd", OpNode, MaskOpNode,
6669                                      OpNodeRnd, SchedWriteFMA,
6670                                      avx512vl_f64_info, "PD">, VEX_W;
6671}
6672
6673defm VFMADD132    : avx512_fma3p_132_f<0x98, "vfmadd132", X86any_Fmadd,
6674                                       X86Fmadd, X86FmaddRnd>;
6675defm VFMSUB132    : avx512_fma3p_132_f<0x9A, "vfmsub132", X86any_Fmsub,
6676                                       X86Fmsub, X86FmsubRnd>;
6677defm VFMADDSUB132 : avx512_fma3p_132_f<0x96, "vfmaddsub132", X86Fmaddsub,
6678                                       X86Fmaddsub, X86FmaddsubRnd>;
6679defm VFMSUBADD132 : avx512_fma3p_132_f<0x97, "vfmsubadd132", X86Fmsubadd,
6680                                       X86Fmsubadd, X86FmsubaddRnd>;
6681defm VFNMADD132   : avx512_fma3p_132_f<0x9C, "vfnmadd132", X86any_Fnmadd,
6682                                       X86Fnmadd, X86FnmaddRnd>;
6683defm VFNMSUB132   : avx512_fma3p_132_f<0x9E, "vfnmsub132", X86any_Fnmsub,
6684                                       X86Fnmsub, X86FnmsubRnd>;
6685
6686// Scalar FMA
6687multiclass avx512_fma3s_common<bits<8> opc, string OpcodeStr, X86VectorVTInfo _,
6688                               dag RHS_r, dag RHS_m, dag RHS_b, bit MaskOnlyReg> {
6689let Constraints = "$src1 = $dst", hasSideEffects = 0 in {
6690  defm r_Int: AVX512_maskable_3src_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
6691          (ins _.RC:$src2, _.RC:$src3), OpcodeStr,
6692          "$src3, $src2", "$src2, $src3", (null_frag), 1, 1>,
6693          AVX512FMA3Base, Sched<[SchedWriteFMA.Scl]>, SIMD_EXC;
6694
6695  let mayLoad = 1 in
6696  defm m_Int: AVX512_maskable_3src_scalar<opc, MRMSrcMem, _, (outs _.RC:$dst),
6697          (ins _.RC:$src2, _.IntScalarMemOp:$src3), OpcodeStr,
6698          "$src3, $src2", "$src2, $src3", (null_frag), 1, 1>,
6699          AVX512FMA3Base, Sched<[SchedWriteFMA.Scl.Folded, SchedWriteFMA.Scl.ReadAfterFold]>, SIMD_EXC;
6700
6701  let Uses = [MXCSR] in
6702  defm rb_Int: AVX512_maskable_3src_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
6703         (ins _.RC:$src2, _.RC:$src3, AVX512RC:$rc),
6704         OpcodeStr, "$rc, $src3, $src2", "$src2, $src3, $rc", (null_frag), 1, 1>,
6705         AVX512FMA3Base, EVEX_B, EVEX_RC, Sched<[SchedWriteFMA.Scl]>;
6706
6707  let isCodeGenOnly = 1, isCommutable = 1 in {
6708    def r     : AVX512FMA3S<opc, MRMSrcReg, (outs _.FRC:$dst),
6709                     (ins _.FRC:$src1, _.FRC:$src2, _.FRC:$src3),
6710                     !strconcat(OpcodeStr,
6711                              "\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
6712                     !if(MaskOnlyReg, [], [RHS_r])>, Sched<[SchedWriteFMA.Scl]>, SIMD_EXC;
6713    def m     : AVX512FMA3S<opc, MRMSrcMem, (outs _.FRC:$dst),
6714                    (ins _.FRC:$src1, _.FRC:$src2, _.ScalarMemOp:$src3),
6715                    !strconcat(OpcodeStr,
6716                               "\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
6717                    [RHS_m]>, Sched<[SchedWriteFMA.Scl.Folded, SchedWriteFMA.Scl.ReadAfterFold]>, SIMD_EXC;
6718
6719    let Uses = [MXCSR] in
6720    def rb    : AVX512FMA3S<opc, MRMSrcReg, (outs _.FRC:$dst),
6721                     (ins _.FRC:$src1, _.FRC:$src2, _.FRC:$src3, AVX512RC:$rc),
6722                     !strconcat(OpcodeStr,
6723                              "\t{$rc, $src3, $src2, $dst|$dst, $src2, $src3, $rc}"),
6724                     !if(MaskOnlyReg, [], [RHS_b])>, EVEX_B, EVEX_RC,
6725                     Sched<[SchedWriteFMA.Scl]>;
6726  }// isCodeGenOnly = 1
6727}// Constraints = "$src1 = $dst"
6728}
6729
6730multiclass avx512_fma3s_all<bits<8> opc213, bits<8> opc231, bits<8> opc132,
6731                            string OpcodeStr, SDNode OpNode, SDNode OpNodeRnd,
6732                            X86VectorVTInfo _, string SUFF> {
6733  let ExeDomain = _.ExeDomain in {
6734  defm NAME#213#SUFF#Z: avx512_fma3s_common<opc213, OpcodeStr#"213"#_.Suffix, _,
6735                // Operands for intrinsic are in 123 order to preserve passthu
6736                // semantics.
6737                (set _.FRC:$dst, (_.EltVT (OpNode _.FRC:$src2, _.FRC:$src1,
6738                         _.FRC:$src3))),
6739                (set _.FRC:$dst, (_.EltVT (OpNode _.FRC:$src2, _.FRC:$src1,
6740                         (_.ScalarLdFrag addr:$src3)))),
6741                (set _.FRC:$dst, (_.EltVT (OpNodeRnd _.FRC:$src2, _.FRC:$src1,
6742                         _.FRC:$src3, (i32 timm:$rc)))), 0>;
6743
6744  defm NAME#231#SUFF#Z: avx512_fma3s_common<opc231, OpcodeStr#"231"#_.Suffix, _,
6745                (set _.FRC:$dst, (_.EltVT (OpNode _.FRC:$src2, _.FRC:$src3,
6746                                          _.FRC:$src1))),
6747                (set _.FRC:$dst, (_.EltVT (OpNode _.FRC:$src2,
6748                            (_.ScalarLdFrag addr:$src3), _.FRC:$src1))),
6749                (set _.FRC:$dst, (_.EltVT (OpNodeRnd _.FRC:$src2, _.FRC:$src3,
6750                         _.FRC:$src1, (i32 timm:$rc)))), 1>;
6751
6752  // One pattern is 312 order so that the load is in a different place from the
6753  // 213 and 231 patterns this helps tablegen's duplicate pattern detection.
6754  defm NAME#132#SUFF#Z: avx512_fma3s_common<opc132, OpcodeStr#"132"#_.Suffix, _,
6755                (set _.FRC:$dst, (_.EltVT (OpNode _.FRC:$src1, _.FRC:$src3,
6756                         _.FRC:$src2))),
6757                (set _.FRC:$dst, (_.EltVT (OpNode (_.ScalarLdFrag addr:$src3),
6758                                 _.FRC:$src1, _.FRC:$src2))),
6759                (set _.FRC:$dst, (_.EltVT (OpNodeRnd _.FRC:$src1, _.FRC:$src3,
6760                         _.FRC:$src2, (i32 timm:$rc)))), 1>;
6761  }
6762}
6763
6764multiclass avx512_fma3s<bits<8> opc213, bits<8> opc231, bits<8> opc132,
6765                        string OpcodeStr, SDNode OpNode, SDNode OpNodeRnd> {
6766  let Predicates = [HasAVX512] in {
6767    defm NAME : avx512_fma3s_all<opc213, opc231, opc132, OpcodeStr, OpNode,
6768                                 OpNodeRnd, f32x_info, "SS">,
6769                                 EVEX_CD8<32, CD8VT1>, VEX_LIG;
6770    defm NAME : avx512_fma3s_all<opc213, opc231, opc132, OpcodeStr, OpNode,
6771                                 OpNodeRnd, f64x_info, "SD">,
6772                                 EVEX_CD8<64, CD8VT1>, VEX_LIG, VEX_W;
6773  }
6774}
6775
6776defm VFMADD  : avx512_fma3s<0xA9, 0xB9, 0x99, "vfmadd", X86any_Fmadd, X86FmaddRnd>;
6777defm VFMSUB  : avx512_fma3s<0xAB, 0xBB, 0x9B, "vfmsub", X86any_Fmsub, X86FmsubRnd>;
6778defm VFNMADD : avx512_fma3s<0xAD, 0xBD, 0x9D, "vfnmadd", X86any_Fnmadd, X86FnmaddRnd>;
6779defm VFNMSUB : avx512_fma3s<0xAF, 0xBF, 0x9F, "vfnmsub", X86any_Fnmsub, X86FnmsubRnd>;
6780
6781multiclass avx512_scalar_fma_patterns<SDNode Op, SDNode MaskedOp,
6782                                      SDNode RndOp, string Prefix,
6783                                      string Suffix, SDNode Move,
6784                                      X86VectorVTInfo _, PatLeaf ZeroFP> {
6785  let Predicates = [HasAVX512] in {
6786    def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector
6787                (Op _.FRC:$src2,
6788                    (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0))),
6789                    _.FRC:$src3))))),
6790              (!cast<I>(Prefix#"213"#Suffix#"Zr_Int")
6791               VR128X:$src1, (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)),
6792               (_.VT (COPY_TO_REGCLASS _.FRC:$src3, VR128X)))>;
6793
6794    def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector
6795                (Op _.FRC:$src2, _.FRC:$src3,
6796                    (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0)))))))),
6797              (!cast<I>(Prefix#"231"#Suffix#"Zr_Int")
6798               VR128X:$src1, (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)),
6799               (_.VT (COPY_TO_REGCLASS _.FRC:$src3, VR128X)))>;
6800
6801    def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector
6802                (Op _.FRC:$src2,
6803                    (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0))),
6804                    (_.ScalarLdFrag addr:$src3)))))),
6805              (!cast<I>(Prefix#"213"#Suffix#"Zm_Int")
6806               VR128X:$src1, (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)),
6807               addr:$src3)>;
6808
6809    def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector
6810                (Op (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0))),
6811                    (_.ScalarLdFrag addr:$src3), _.FRC:$src2))))),
6812              (!cast<I>(Prefix#"132"#Suffix#"Zm_Int")
6813               VR128X:$src1, (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)),
6814               addr:$src3)>;
6815
6816    def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector
6817                (Op _.FRC:$src2, (_.ScalarLdFrag addr:$src3),
6818                    (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0)))))))),
6819              (!cast<I>(Prefix#"231"#Suffix#"Zm_Int")
6820               VR128X:$src1, (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)),
6821               addr:$src3)>;
6822
6823    def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector
6824               (X86selects_mask VK1WM:$mask,
6825                (MaskedOp _.FRC:$src2,
6826                    (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0))),
6827                    _.FRC:$src3),
6828                (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0)))))))),
6829              (!cast<I>(Prefix#"213"#Suffix#"Zr_Intk")
6830               VR128X:$src1, VK1WM:$mask,
6831               (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)),
6832               (_.VT (COPY_TO_REGCLASS _.FRC:$src3, VR128X)))>;
6833
6834    def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector
6835               (X86selects_mask VK1WM:$mask,
6836                (MaskedOp _.FRC:$src2,
6837                    (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0))),
6838                    (_.ScalarLdFrag addr:$src3)),
6839                (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0)))))))),
6840              (!cast<I>(Prefix#"213"#Suffix#"Zm_Intk")
6841               VR128X:$src1, VK1WM:$mask,
6842               (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)), addr:$src3)>;
6843
6844    def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector
6845               (X86selects_mask VK1WM:$mask,
6846                (MaskedOp (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0))),
6847                          (_.ScalarLdFrag addr:$src3), _.FRC:$src2),
6848                (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0)))))))),
6849              (!cast<I>(Prefix#"132"#Suffix#"Zm_Intk")
6850               VR128X:$src1, VK1WM:$mask,
6851               (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)), addr:$src3)>;
6852
6853    def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector
6854               (X86selects_mask VK1WM:$mask,
6855                (MaskedOp _.FRC:$src2, _.FRC:$src3,
6856                          (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0)))),
6857                (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0)))))))),
6858              (!cast<I>(Prefix#"231"#Suffix#"Zr_Intk")
6859               VR128X:$src1, VK1WM:$mask,
6860               (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)),
6861               (_.VT (COPY_TO_REGCLASS _.FRC:$src3, VR128X)))>;
6862
6863    def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector
6864               (X86selects_mask VK1WM:$mask,
6865                (MaskedOp _.FRC:$src2, (_.ScalarLdFrag addr:$src3),
6866                          (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0)))),
6867                (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0)))))))),
6868              (!cast<I>(Prefix#"231"#Suffix#"Zm_Intk")
6869               VR128X:$src1, VK1WM:$mask,
6870               (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)), addr:$src3)>;
6871
6872    def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector
6873               (X86selects_mask VK1WM:$mask,
6874                (MaskedOp _.FRC:$src2,
6875                          (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0))),
6876                          _.FRC:$src3),
6877                (_.EltVT ZeroFP)))))),
6878              (!cast<I>(Prefix#"213"#Suffix#"Zr_Intkz")
6879               VR128X:$src1, VK1WM:$mask,
6880               (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)),
6881               (_.VT (COPY_TO_REGCLASS _.FRC:$src3, VR128X)))>;
6882
6883    def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector
6884               (X86selects_mask VK1WM:$mask,
6885                (MaskedOp _.FRC:$src2, _.FRC:$src3,
6886                          (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0)))),
6887                (_.EltVT ZeroFP)))))),
6888              (!cast<I>(Prefix#"231"#Suffix#"Zr_Intkz")
6889               VR128X:$src1, VK1WM:$mask,
6890               (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)),
6891               (_.VT (COPY_TO_REGCLASS _.FRC:$src3, VR128X)))>;
6892
6893    def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector
6894               (X86selects_mask VK1WM:$mask,
6895                (MaskedOp _.FRC:$src2,
6896                          (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0))),
6897                          (_.ScalarLdFrag addr:$src3)),
6898                (_.EltVT ZeroFP)))))),
6899              (!cast<I>(Prefix#"213"#Suffix#"Zm_Intkz")
6900               VR128X:$src1, VK1WM:$mask,
6901               (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)), addr:$src3)>;
6902
6903    def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector
6904               (X86selects_mask VK1WM:$mask,
6905                (MaskedOp (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0))),
6906                          _.FRC:$src2, (_.ScalarLdFrag addr:$src3)),
6907                (_.EltVT ZeroFP)))))),
6908              (!cast<I>(Prefix#"132"#Suffix#"Zm_Intkz")
6909               VR128X:$src1, VK1WM:$mask,
6910               (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)), addr:$src3)>;
6911
6912    def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector
6913               (X86selects_mask VK1WM:$mask,
6914                (MaskedOp _.FRC:$src2, (_.ScalarLdFrag addr:$src3),
6915                          (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0)))),
6916                (_.EltVT ZeroFP)))))),
6917              (!cast<I>(Prefix#"231"#Suffix#"Zm_Intkz")
6918               VR128X:$src1, VK1WM:$mask,
6919               (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)), addr:$src3)>;
6920
6921    // Patterns with rounding mode.
6922    def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector
6923                (RndOp _.FRC:$src2,
6924                       (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0))),
6925                       _.FRC:$src3, (i32 timm:$rc)))))),
6926              (!cast<I>(Prefix#"213"#Suffix#"Zrb_Int")
6927               VR128X:$src1, (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)),
6928               (_.VT (COPY_TO_REGCLASS _.FRC:$src3, VR128X)), AVX512RC:$rc)>;
6929
6930    def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector
6931                (RndOp _.FRC:$src2, _.FRC:$src3,
6932                       (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0))),
6933                       (i32 timm:$rc)))))),
6934              (!cast<I>(Prefix#"231"#Suffix#"Zrb_Int")
6935               VR128X:$src1, (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)),
6936               (_.VT (COPY_TO_REGCLASS _.FRC:$src3, VR128X)), AVX512RC:$rc)>;
6937
6938    def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector
6939               (X86selects_mask VK1WM:$mask,
6940                (RndOp _.FRC:$src2,
6941                       (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0))),
6942                       _.FRC:$src3, (i32 timm:$rc)),
6943                (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0)))))))),
6944              (!cast<I>(Prefix#"213"#Suffix#"Zrb_Intk")
6945               VR128X:$src1, VK1WM:$mask,
6946               (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)),
6947               (_.VT (COPY_TO_REGCLASS _.FRC:$src3, VR128X)), AVX512RC:$rc)>;
6948
6949    def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector
6950               (X86selects_mask VK1WM:$mask,
6951                (RndOp _.FRC:$src2, _.FRC:$src3,
6952                       (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0))),
6953                       (i32 timm:$rc)),
6954                (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0)))))))),
6955              (!cast<I>(Prefix#"231"#Suffix#"Zrb_Intk")
6956               VR128X:$src1, VK1WM:$mask,
6957               (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)),
6958               (_.VT (COPY_TO_REGCLASS _.FRC:$src3, VR128X)), AVX512RC:$rc)>;
6959
6960    def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector
6961               (X86selects_mask VK1WM:$mask,
6962                (RndOp _.FRC:$src2,
6963                       (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0))),
6964                       _.FRC:$src3, (i32 timm:$rc)),
6965                (_.EltVT ZeroFP)))))),
6966              (!cast<I>(Prefix#"213"#Suffix#"Zrb_Intkz")
6967               VR128X:$src1, VK1WM:$mask,
6968               (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)),
6969               (_.VT (COPY_TO_REGCLASS _.FRC:$src3, VR128X)), AVX512RC:$rc)>;
6970
6971    def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector
6972               (X86selects_mask VK1WM:$mask,
6973                (RndOp _.FRC:$src2, _.FRC:$src3,
6974                       (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0))),
6975                       (i32 timm:$rc)),
6976                (_.EltVT ZeroFP)))))),
6977              (!cast<I>(Prefix#"231"#Suffix#"Zrb_Intkz")
6978               VR128X:$src1, VK1WM:$mask,
6979               (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)),
6980               (_.VT (COPY_TO_REGCLASS _.FRC:$src3, VR128X)), AVX512RC:$rc)>;
6981  }
6982}
6983
6984defm : avx512_scalar_fma_patterns<X86any_Fmadd, X86Fmadd, X86FmaddRnd, "VFMADD",
6985                                  "SS", X86Movss, v4f32x_info, fp32imm0>;
6986defm : avx512_scalar_fma_patterns<X86any_Fmsub, X86Fmsub, X86FmsubRnd, "VFMSUB",
6987                                  "SS", X86Movss, v4f32x_info, fp32imm0>;
6988defm : avx512_scalar_fma_patterns<X86any_Fnmadd, X86Fnmadd, X86FnmaddRnd, "VFNMADD",
6989                                  "SS", X86Movss, v4f32x_info, fp32imm0>;
6990defm : avx512_scalar_fma_patterns<X86any_Fnmsub, X86Fnmsub, X86FnmsubRnd, "VFNMSUB",
6991                                  "SS", X86Movss, v4f32x_info, fp32imm0>;
6992
6993defm : avx512_scalar_fma_patterns<X86any_Fmadd, X86Fmadd, X86FmaddRnd, "VFMADD",
6994                                  "SD", X86Movsd, v2f64x_info, fp64imm0>;
6995defm : avx512_scalar_fma_patterns<X86any_Fmsub, X86Fmsub, X86FmsubRnd, "VFMSUB",
6996                                  "SD", X86Movsd, v2f64x_info, fp64imm0>;
6997defm : avx512_scalar_fma_patterns<X86any_Fnmadd, X86Fnmadd, X86FnmaddRnd, "VFNMADD",
6998                                  "SD", X86Movsd, v2f64x_info, fp64imm0>;
6999defm : avx512_scalar_fma_patterns<X86any_Fnmsub, X86Fnmsub, X86FnmsubRnd, "VFNMSUB",
7000                                  "SD", X86Movsd, v2f64x_info, fp64imm0>;
7001
7002//===----------------------------------------------------------------------===//
7003// AVX-512  Packed Multiply of Unsigned 52-bit Integers and Add the Low 52-bit IFMA
7004//===----------------------------------------------------------------------===//
7005let Constraints = "$src1 = $dst" in {
7006multiclass avx512_pmadd52_rm<bits<8> opc, string OpcodeStr, SDNode OpNode,
7007                             X86FoldableSchedWrite sched, X86VectorVTInfo _> {
7008  // NOTE: The SDNode have the multiply operands first with the add last.
7009  // This enables commuted load patterns to be autogenerated by tablegen.
7010  let ExeDomain = _.ExeDomain in {
7011  defm r: AVX512_maskable_3src<opc, MRMSrcReg, _, (outs _.RC:$dst),
7012          (ins _.RC:$src2, _.RC:$src3),
7013          OpcodeStr, "$src3, $src2", "$src2, $src3",
7014          (_.VT (OpNode _.RC:$src2, _.RC:$src3, _.RC:$src1)), 1, 1>,
7015         AVX512FMA3Base, Sched<[sched]>;
7016
7017  defm m: AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst),
7018          (ins _.RC:$src2, _.MemOp:$src3),
7019          OpcodeStr, "$src3, $src2", "$src2, $src3",
7020          (_.VT (OpNode _.RC:$src2, (_.LdFrag addr:$src3), _.RC:$src1))>,
7021          AVX512FMA3Base, Sched<[sched.Folded, sched.ReadAfterFold]>;
7022
7023  defm mb: AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst),
7024            (ins _.RC:$src2, _.ScalarMemOp:$src3),
7025            OpcodeStr,   !strconcat("${src3}", _.BroadcastStr,", $src2"),
7026            !strconcat("$src2, ${src3}", _.BroadcastStr ),
7027            (OpNode _.RC:$src2,
7028                    (_.VT (_.BroadcastLdFrag addr:$src3)),
7029                    _.RC:$src1)>,
7030            AVX512FMA3Base, EVEX_B, Sched<[sched.Folded, sched.ReadAfterFold]>;
7031  }
7032}
7033} // Constraints = "$src1 = $dst"
7034
7035multiclass avx512_pmadd52_common<bits<8> opc, string OpcodeStr, SDNode OpNode,
7036                                 X86SchedWriteWidths sched, AVX512VLVectorVTInfo _> {
7037  let Predicates = [HasIFMA] in {
7038    defm Z      : avx512_pmadd52_rm<opc, OpcodeStr, OpNode, sched.ZMM, _.info512>,
7039                      EVEX_V512, EVEX_CD8<_.info512.EltSize, CD8VF>;
7040  }
7041  let Predicates = [HasVLX, HasIFMA] in {
7042    defm Z256 : avx512_pmadd52_rm<opc, OpcodeStr, OpNode, sched.YMM, _.info256>,
7043                      EVEX_V256, EVEX_CD8<_.info256.EltSize, CD8VF>;
7044    defm Z128 : avx512_pmadd52_rm<opc, OpcodeStr, OpNode, sched.XMM, _.info128>,
7045                      EVEX_V128, EVEX_CD8<_.info128.EltSize, CD8VF>;
7046  }
7047}
7048
7049defm VPMADD52LUQ : avx512_pmadd52_common<0xb4, "vpmadd52luq", x86vpmadd52l,
7050                                         SchedWriteVecIMul, avx512vl_i64_info>,
7051                                         VEX_W;
7052defm VPMADD52HUQ : avx512_pmadd52_common<0xb5, "vpmadd52huq", x86vpmadd52h,
7053                                         SchedWriteVecIMul, avx512vl_i64_info>,
7054                                         VEX_W;
7055
7056//===----------------------------------------------------------------------===//
7057// AVX-512  Scalar convert from sign integer to float/double
7058//===----------------------------------------------------------------------===//
7059
7060multiclass avx512_vcvtsi<bits<8> opc, SDPatternOperator OpNode, X86FoldableSchedWrite sched,
7061                    RegisterClass SrcRC, X86VectorVTInfo DstVT,
7062                    X86MemOperand x86memop, PatFrag ld_frag, string asm,
7063                    string mem, list<Register> _Uses = [MXCSR],
7064                    bit _mayRaiseFPException = 1> {
7065let ExeDomain = DstVT.ExeDomain, Uses = _Uses,
7066    mayRaiseFPException = _mayRaiseFPException in {
7067  let hasSideEffects = 0, isCodeGenOnly = 1 in {
7068    def rr : SI<opc, MRMSrcReg, (outs DstVT.FRC:$dst),
7069              (ins DstVT.FRC:$src1, SrcRC:$src),
7070              !strconcat(asm,"\t{$src, $src1, $dst|$dst, $src1, $src}"), []>,
7071              EVEX_4V, Sched<[sched, ReadDefault, ReadInt2Fpu]>;
7072    let mayLoad = 1 in
7073      def rm : SI<opc, MRMSrcMem, (outs DstVT.FRC:$dst),
7074              (ins DstVT.FRC:$src1, x86memop:$src),
7075              asm#"{"#mem#"}\t{$src, $src1, $dst|$dst, $src1, $src}", []>,
7076              EVEX_4V, Sched<[sched.Folded, sched.ReadAfterFold]>;
7077  } // hasSideEffects = 0
7078  def rr_Int : SI<opc, MRMSrcReg, (outs DstVT.RC:$dst),
7079                (ins DstVT.RC:$src1, SrcRC:$src2),
7080                !strconcat(asm,"\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
7081                [(set DstVT.RC:$dst,
7082                      (OpNode (DstVT.VT DstVT.RC:$src1), SrcRC:$src2))]>,
7083               EVEX_4V, Sched<[sched, ReadDefault, ReadInt2Fpu]>;
7084
7085  def rm_Int : SI<opc, MRMSrcMem, (outs DstVT.RC:$dst),
7086                (ins DstVT.RC:$src1, x86memop:$src2),
7087                asm#"{"#mem#"}\t{$src2, $src1, $dst|$dst, $src1, $src2}",
7088                [(set DstVT.RC:$dst,
7089                      (OpNode (DstVT.VT DstVT.RC:$src1),
7090                               (ld_frag addr:$src2)))]>,
7091                EVEX_4V, Sched<[sched.Folded, sched.ReadAfterFold]>;
7092}
7093  def : InstAlias<"v"#asm#mem#"\t{$src2, $src1, $dst|$dst, $src1, $src2}",
7094                  (!cast<Instruction>(NAME#"rr_Int") DstVT.RC:$dst,
7095                  DstVT.RC:$src1, SrcRC:$src2), 0, "att">;
7096}
7097
7098multiclass avx512_vcvtsi_round<bits<8> opc, SDNode OpNode,
7099                               X86FoldableSchedWrite sched, RegisterClass SrcRC,
7100                               X86VectorVTInfo DstVT, string asm,
7101                               string mem> {
7102  let ExeDomain = DstVT.ExeDomain, Uses = [MXCSR] in
7103  def rrb_Int : SI<opc, MRMSrcReg, (outs DstVT.RC:$dst),
7104              (ins DstVT.RC:$src1, SrcRC:$src2, AVX512RC:$rc),
7105              !strconcat(asm,
7106                  "\t{$src2, $rc, $src1, $dst|$dst, $src1, $rc, $src2}"),
7107              [(set DstVT.RC:$dst,
7108                    (OpNode (DstVT.VT DstVT.RC:$src1),
7109                             SrcRC:$src2,
7110                             (i32 timm:$rc)))]>,
7111              EVEX_4V, EVEX_B, EVEX_RC, Sched<[sched, ReadDefault, ReadInt2Fpu]>;
7112  def : InstAlias<"v"#asm#mem#"\t{$src2, $rc, $src1, $dst|$dst, $src1, $rc, $src2}",
7113                  (!cast<Instruction>(NAME#"rrb_Int") DstVT.RC:$dst,
7114                  DstVT.RC:$src1, SrcRC:$src2, AVX512RC:$rc), 0, "att">;
7115}
7116
7117multiclass avx512_vcvtsi_common<bits<8> opc, SDNode OpNode, SDNode OpNodeRnd,
7118                                X86FoldableSchedWrite sched,
7119                                RegisterClass SrcRC, X86VectorVTInfo DstVT,
7120                                X86MemOperand x86memop, PatFrag ld_frag,
7121                                string asm, string mem> {
7122  defm NAME : avx512_vcvtsi_round<opc, OpNodeRnd, sched, SrcRC, DstVT, asm, mem>,
7123              avx512_vcvtsi<opc, OpNode, sched, SrcRC, DstVT, x86memop,
7124                            ld_frag, asm, mem>, VEX_LIG;
7125}
7126
7127let Predicates = [HasAVX512] in {
7128defm VCVTSI2SSZ  : avx512_vcvtsi_common<0x2A, X86SintToFp, X86SintToFpRnd,
7129                                 WriteCvtI2SS, GR32,
7130                                 v4f32x_info, i32mem, loadi32, "cvtsi2ss", "l">,
7131                                 XS, EVEX_CD8<32, CD8VT1>;
7132defm VCVTSI642SSZ: avx512_vcvtsi_common<0x2A, X86SintToFp, X86SintToFpRnd,
7133                                 WriteCvtI2SS, GR64,
7134                                 v4f32x_info, i64mem, loadi64, "cvtsi2ss", "q">,
7135                                 XS, VEX_W, EVEX_CD8<64, CD8VT1>;
7136defm VCVTSI2SDZ  : avx512_vcvtsi<0x2A, null_frag, WriteCvtI2SD, GR32,
7137                                 v2f64x_info, i32mem, loadi32, "cvtsi2sd", "l", [], 0>,
7138                                 XD, VEX_LIG, EVEX_CD8<32, CD8VT1>;
7139defm VCVTSI642SDZ: avx512_vcvtsi_common<0x2A, X86SintToFp, X86SintToFpRnd,
7140                                 WriteCvtI2SD, GR64,
7141                                 v2f64x_info, i64mem, loadi64, "cvtsi2sd", "q">,
7142                                 XD, VEX_W, EVEX_CD8<64, CD8VT1>;
7143
7144def : InstAlias<"vcvtsi2ss\t{$src, $src1, $dst|$dst, $src1, $src}",
7145              (VCVTSI2SSZrm_Int VR128X:$dst, VR128X:$src1, i32mem:$src), 0, "att">;
7146def : InstAlias<"vcvtsi2sd\t{$src, $src1, $dst|$dst, $src1, $src}",
7147              (VCVTSI2SDZrm_Int VR128X:$dst, VR128X:$src1, i32mem:$src), 0, "att">;
7148
7149def : Pat<(f32 (any_sint_to_fp (loadi32 addr:$src))),
7150          (VCVTSI2SSZrm (f32 (IMPLICIT_DEF)), addr:$src)>;
7151def : Pat<(f32 (any_sint_to_fp (loadi64 addr:$src))),
7152          (VCVTSI642SSZrm (f32 (IMPLICIT_DEF)), addr:$src)>;
7153def : Pat<(f64 (any_sint_to_fp (loadi32 addr:$src))),
7154          (VCVTSI2SDZrm (f64 (IMPLICIT_DEF)), addr:$src)>;
7155def : Pat<(f64 (any_sint_to_fp (loadi64 addr:$src))),
7156          (VCVTSI642SDZrm (f64 (IMPLICIT_DEF)), addr:$src)>;
7157
7158def : Pat<(f32 (any_sint_to_fp GR32:$src)),
7159          (VCVTSI2SSZrr (f32 (IMPLICIT_DEF)), GR32:$src)>;
7160def : Pat<(f32 (any_sint_to_fp GR64:$src)),
7161          (VCVTSI642SSZrr (f32 (IMPLICIT_DEF)), GR64:$src)>;
7162def : Pat<(f64 (any_sint_to_fp GR32:$src)),
7163          (VCVTSI2SDZrr (f64 (IMPLICIT_DEF)), GR32:$src)>;
7164def : Pat<(f64 (any_sint_to_fp GR64:$src)),
7165          (VCVTSI642SDZrr (f64 (IMPLICIT_DEF)), GR64:$src)>;
7166
7167defm VCVTUSI2SSZ   : avx512_vcvtsi_common<0x7B, X86UintToFp, X86UintToFpRnd,
7168                                  WriteCvtI2SS, GR32,
7169                                  v4f32x_info, i32mem, loadi32,
7170                                  "cvtusi2ss", "l">, XS, EVEX_CD8<32, CD8VT1>;
7171defm VCVTUSI642SSZ : avx512_vcvtsi_common<0x7B, X86UintToFp, X86UintToFpRnd,
7172                                  WriteCvtI2SS, GR64,
7173                                  v4f32x_info, i64mem, loadi64, "cvtusi2ss", "q">,
7174                                  XS, VEX_W, EVEX_CD8<64, CD8VT1>;
7175defm VCVTUSI2SDZ   : avx512_vcvtsi<0x7B, null_frag, WriteCvtI2SD, GR32, v2f64x_info,
7176                                  i32mem, loadi32, "cvtusi2sd", "l", [], 0>,
7177                                  XD, VEX_LIG, EVEX_CD8<32, CD8VT1>;
7178defm VCVTUSI642SDZ : avx512_vcvtsi_common<0x7B, X86UintToFp, X86UintToFpRnd,
7179                                  WriteCvtI2SD, GR64,
7180                                  v2f64x_info, i64mem, loadi64, "cvtusi2sd", "q">,
7181                                  XD, VEX_W, EVEX_CD8<64, CD8VT1>;
7182
7183def : InstAlias<"vcvtusi2ss\t{$src, $src1, $dst|$dst, $src1, $src}",
7184              (VCVTUSI2SSZrm_Int VR128X:$dst, VR128X:$src1, i32mem:$src), 0, "att">;
7185def : InstAlias<"vcvtusi2sd\t{$src, $src1, $dst|$dst, $src1, $src}",
7186              (VCVTUSI2SDZrm_Int VR128X:$dst, VR128X:$src1, i32mem:$src), 0, "att">;
7187
7188def : Pat<(f32 (any_uint_to_fp (loadi32 addr:$src))),
7189          (VCVTUSI2SSZrm (f32 (IMPLICIT_DEF)), addr:$src)>;
7190def : Pat<(f32 (any_uint_to_fp (loadi64 addr:$src))),
7191          (VCVTUSI642SSZrm (f32 (IMPLICIT_DEF)), addr:$src)>;
7192def : Pat<(f64 (any_uint_to_fp (loadi32 addr:$src))),
7193          (VCVTUSI2SDZrm (f64 (IMPLICIT_DEF)), addr:$src)>;
7194def : Pat<(f64 (any_uint_to_fp (loadi64 addr:$src))),
7195          (VCVTUSI642SDZrm (f64 (IMPLICIT_DEF)), addr:$src)>;
7196
7197def : Pat<(f32 (any_uint_to_fp GR32:$src)),
7198          (VCVTUSI2SSZrr (f32 (IMPLICIT_DEF)), GR32:$src)>;
7199def : Pat<(f32 (any_uint_to_fp GR64:$src)),
7200          (VCVTUSI642SSZrr (f32 (IMPLICIT_DEF)), GR64:$src)>;
7201def : Pat<(f64 (any_uint_to_fp GR32:$src)),
7202          (VCVTUSI2SDZrr (f64 (IMPLICIT_DEF)), GR32:$src)>;
7203def : Pat<(f64 (any_uint_to_fp GR64:$src)),
7204          (VCVTUSI642SDZrr (f64 (IMPLICIT_DEF)), GR64:$src)>;
7205}
7206
7207//===----------------------------------------------------------------------===//
7208// AVX-512  Scalar convert from float/double to integer
7209//===----------------------------------------------------------------------===//
7210
7211multiclass avx512_cvt_s_int_round<bits<8> opc, X86VectorVTInfo SrcVT,
7212                                  X86VectorVTInfo DstVT, SDNode OpNode,
7213                                  SDNode OpNodeRnd,
7214                                  X86FoldableSchedWrite sched, string asm,
7215                                  string aliasStr> {
7216  let Predicates = [HasAVX512], ExeDomain = SrcVT.ExeDomain in {
7217    def rr_Int : SI<opc, MRMSrcReg, (outs DstVT.RC:$dst), (ins SrcVT.RC:$src),
7218                !strconcat(asm,"\t{$src, $dst|$dst, $src}"),
7219                [(set DstVT.RC:$dst, (OpNode (SrcVT.VT SrcVT.RC:$src)))]>,
7220                EVEX, VEX_LIG, Sched<[sched]>, SIMD_EXC;
7221    let Uses = [MXCSR] in
7222    def rrb_Int : SI<opc, MRMSrcReg, (outs DstVT.RC:$dst), (ins SrcVT.RC:$src, AVX512RC:$rc),
7223                 !strconcat(asm,"\t{$rc, $src, $dst|$dst, $src, $rc}"),
7224                 [(set DstVT.RC:$dst, (OpNodeRnd (SrcVT.VT SrcVT.RC:$src),(i32 timm:$rc)))]>,
7225                 EVEX, VEX_LIG, EVEX_B, EVEX_RC,
7226                 Sched<[sched]>;
7227    def rm_Int : SI<opc, MRMSrcMem, (outs DstVT.RC:$dst), (ins SrcVT.IntScalarMemOp:$src),
7228                !strconcat(asm,"\t{$src, $dst|$dst, $src}"),
7229                [(set DstVT.RC:$dst, (OpNode
7230                      (SrcVT.ScalarIntMemFrags addr:$src)))]>,
7231                EVEX, VEX_LIG, Sched<[sched.Folded, sched.ReadAfterFold]>, SIMD_EXC;
7232  } // Predicates = [HasAVX512]
7233
7234  def : InstAlias<"v" # asm # aliasStr # "\t{$src, $dst|$dst, $src}",
7235          (!cast<Instruction>(NAME # "rr_Int") DstVT.RC:$dst, SrcVT.RC:$src), 0, "att">;
7236  def : InstAlias<"v" # asm # aliasStr # "\t{$rc, $src, $dst|$dst, $src, $rc}",
7237          (!cast<Instruction>(NAME # "rrb_Int") DstVT.RC:$dst, SrcVT.RC:$src, AVX512RC:$rc), 0, "att">;
7238  def : InstAlias<"v" # asm # aliasStr # "\t{$src, $dst|$dst, $src}",
7239          (!cast<Instruction>(NAME # "rm_Int") DstVT.RC:$dst,
7240                                          SrcVT.IntScalarMemOp:$src), 0, "att">;
7241}
7242
7243// Convert float/double to signed/unsigned int 32/64
7244defm VCVTSS2SIZ: avx512_cvt_s_int_round<0x2D, f32x_info, i32x_info,X86cvts2si,
7245                                   X86cvts2siRnd, WriteCvtSS2I, "cvtss2si", "{l}">,
7246                                   XS, EVEX_CD8<32, CD8VT1>;
7247defm VCVTSS2SI64Z: avx512_cvt_s_int_round<0x2D, f32x_info, i64x_info, X86cvts2si,
7248                                   X86cvts2siRnd, WriteCvtSS2I, "cvtss2si", "{q}">,
7249                                   XS, VEX_W, EVEX_CD8<32, CD8VT1>;
7250defm VCVTSS2USIZ: avx512_cvt_s_int_round<0x79, f32x_info, i32x_info, X86cvts2usi,
7251                                   X86cvts2usiRnd, WriteCvtSS2I, "cvtss2usi", "{l}">,
7252                                   XS, EVEX_CD8<32, CD8VT1>;
7253defm VCVTSS2USI64Z: avx512_cvt_s_int_round<0x79, f32x_info, i64x_info, X86cvts2usi,
7254                                   X86cvts2usiRnd, WriteCvtSS2I, "cvtss2usi", "{q}">,
7255                                   XS, VEX_W, EVEX_CD8<32, CD8VT1>;
7256defm VCVTSD2SIZ: avx512_cvt_s_int_round<0x2D, f64x_info, i32x_info, X86cvts2si,
7257                                   X86cvts2siRnd, WriteCvtSD2I, "cvtsd2si", "{l}">,
7258                                   XD, EVEX_CD8<64, CD8VT1>;
7259defm VCVTSD2SI64Z: avx512_cvt_s_int_round<0x2D, f64x_info, i64x_info, X86cvts2si,
7260                                   X86cvts2siRnd, WriteCvtSD2I, "cvtsd2si", "{q}">,
7261                                   XD, VEX_W, EVEX_CD8<64, CD8VT1>;
7262defm VCVTSD2USIZ:   avx512_cvt_s_int_round<0x79, f64x_info, i32x_info, X86cvts2usi,
7263                                   X86cvts2usiRnd, WriteCvtSD2I, "cvtsd2usi", "{l}">,
7264                                   XD, EVEX_CD8<64, CD8VT1>;
7265defm VCVTSD2USI64Z: avx512_cvt_s_int_round<0x79, f64x_info, i64x_info, X86cvts2usi,
7266                                   X86cvts2usiRnd, WriteCvtSD2I, "cvtsd2usi", "{q}">,
7267                                   XD, VEX_W, EVEX_CD8<64, CD8VT1>;
7268
7269multiclass avx512_cvt_s<bits<8> opc, string asm, X86VectorVTInfo SrcVT,
7270                        X86VectorVTInfo DstVT, SDNode OpNode,
7271                        X86FoldableSchedWrite sched,
7272                        string aliasStr> {
7273  let Predicates = [HasAVX512], ExeDomain = SrcVT.ExeDomain in {
7274    let isCodeGenOnly = 1 in {
7275    def rr : AVX512<opc, MRMSrcReg, (outs DstVT.RC:$dst), (ins SrcVT.FRC:$src),
7276                !strconcat(asm,"\t{$src, $dst|$dst, $src}"),
7277                [(set DstVT.RC:$dst, (OpNode SrcVT.FRC:$src))]>,
7278                EVEX, VEX_LIG, Sched<[sched]>, SIMD_EXC;
7279    def rm : AVX512<opc, MRMSrcMem, (outs DstVT.RC:$dst), (ins SrcVT.ScalarMemOp:$src),
7280                !strconcat(asm,"\t{$src, $dst|$dst, $src}"),
7281                [(set DstVT.RC:$dst, (OpNode (SrcVT.ScalarLdFrag addr:$src)))]>,
7282                EVEX, VEX_LIG, Sched<[sched.Folded, sched.ReadAfterFold]>, SIMD_EXC;
7283    }
7284  } // Predicates = [HasAVX512]
7285}
7286
7287defm VCVTSS2SIZ: avx512_cvt_s<0x2D, "vcvtss2si", f32x_info, i32x_info,
7288                       lrint, WriteCvtSS2I,
7289                       "{l}">, XS, EVEX_CD8<32, CD8VT1>;
7290defm VCVTSS2SI64Z: avx512_cvt_s<0x2D, "vcvtss2si", f32x_info, i64x_info,
7291                       llrint, WriteCvtSS2I,
7292                       "{q}">, VEX_W, XS, EVEX_CD8<32, CD8VT1>;
7293defm VCVTSD2SIZ: avx512_cvt_s<0x2D, "vcvtsd2si", f64x_info, i32x_info,
7294                       lrint, WriteCvtSD2I,
7295                       "{l}">, XD, EVEX_CD8<64, CD8VT1>;
7296defm VCVTSD2SI64Z: avx512_cvt_s<0x2D, "vcvtsd2si", f64x_info, i64x_info,
7297                       llrint, WriteCvtSD2I,
7298                       "{q}">, VEX_W, XD, EVEX_CD8<64, CD8VT1>;
7299
7300let Predicates = [HasAVX512] in {
7301  def : Pat<(i64 (lrint FR32:$src)), (VCVTSS2SI64Zrr FR32:$src)>;
7302  def : Pat<(i64 (lrint (loadf32 addr:$src))), (VCVTSS2SI64Zrm addr:$src)>;
7303
7304  def : Pat<(i64 (lrint FR64:$src)), (VCVTSD2SI64Zrr FR64:$src)>;
7305  def : Pat<(i64 (lrint (loadf64 addr:$src))), (VCVTSD2SI64Zrm addr:$src)>;
7306}
7307
7308// Patterns used for matching vcvtsi2s{s,d} intrinsic sequences from clang
7309// which produce unnecessary vmovs{s,d} instructions
7310let Predicates = [HasAVX512] in {
7311def : Pat<(v4f32 (X86Movss
7312                   (v4f32 VR128X:$dst),
7313                   (v4f32 (scalar_to_vector (f32 (any_sint_to_fp GR64:$src)))))),
7314          (VCVTSI642SSZrr_Int VR128X:$dst, GR64:$src)>;
7315
7316def : Pat<(v4f32 (X86Movss
7317                   (v4f32 VR128X:$dst),
7318                   (v4f32 (scalar_to_vector (f32 (any_sint_to_fp (loadi64 addr:$src))))))),
7319          (VCVTSI642SSZrm_Int VR128X:$dst, addr:$src)>;
7320
7321def : Pat<(v4f32 (X86Movss
7322                   (v4f32 VR128X:$dst),
7323                   (v4f32 (scalar_to_vector (f32 (any_sint_to_fp GR32:$src)))))),
7324          (VCVTSI2SSZrr_Int VR128X:$dst, GR32:$src)>;
7325
7326def : Pat<(v4f32 (X86Movss
7327                   (v4f32 VR128X:$dst),
7328                   (v4f32 (scalar_to_vector (f32 (any_sint_to_fp (loadi32 addr:$src))))))),
7329          (VCVTSI2SSZrm_Int VR128X:$dst, addr:$src)>;
7330
7331def : Pat<(v2f64 (X86Movsd
7332                   (v2f64 VR128X:$dst),
7333                   (v2f64 (scalar_to_vector (f64 (any_sint_to_fp GR64:$src)))))),
7334          (VCVTSI642SDZrr_Int VR128X:$dst, GR64:$src)>;
7335
7336def : Pat<(v2f64 (X86Movsd
7337                   (v2f64 VR128X:$dst),
7338                   (v2f64 (scalar_to_vector (f64 (any_sint_to_fp (loadi64 addr:$src))))))),
7339          (VCVTSI642SDZrm_Int VR128X:$dst, addr:$src)>;
7340
7341def : Pat<(v2f64 (X86Movsd
7342                   (v2f64 VR128X:$dst),
7343                   (v2f64 (scalar_to_vector (f64 (any_sint_to_fp GR32:$src)))))),
7344          (VCVTSI2SDZrr_Int VR128X:$dst, GR32:$src)>;
7345
7346def : Pat<(v2f64 (X86Movsd
7347                   (v2f64 VR128X:$dst),
7348                   (v2f64 (scalar_to_vector (f64 (any_sint_to_fp (loadi32 addr:$src))))))),
7349          (VCVTSI2SDZrm_Int VR128X:$dst, addr:$src)>;
7350
7351def : Pat<(v4f32 (X86Movss
7352                   (v4f32 VR128X:$dst),
7353                   (v4f32 (scalar_to_vector (f32 (any_uint_to_fp GR64:$src)))))),
7354          (VCVTUSI642SSZrr_Int VR128X:$dst, GR64:$src)>;
7355
7356def : Pat<(v4f32 (X86Movss
7357                   (v4f32 VR128X:$dst),
7358                   (v4f32 (scalar_to_vector (f32 (any_uint_to_fp (loadi64 addr:$src))))))),
7359          (VCVTUSI642SSZrm_Int VR128X:$dst, addr:$src)>;
7360
7361def : Pat<(v4f32 (X86Movss
7362                   (v4f32 VR128X:$dst),
7363                   (v4f32 (scalar_to_vector (f32 (any_uint_to_fp GR32:$src)))))),
7364          (VCVTUSI2SSZrr_Int VR128X:$dst, GR32:$src)>;
7365
7366def : Pat<(v4f32 (X86Movss
7367                   (v4f32 VR128X:$dst),
7368                   (v4f32 (scalar_to_vector (f32 (any_uint_to_fp (loadi32 addr:$src))))))),
7369          (VCVTUSI2SSZrm_Int VR128X:$dst, addr:$src)>;
7370
7371def : Pat<(v2f64 (X86Movsd
7372                   (v2f64 VR128X:$dst),
7373                   (v2f64 (scalar_to_vector (f64 (any_uint_to_fp GR64:$src)))))),
7374          (VCVTUSI642SDZrr_Int VR128X:$dst, GR64:$src)>;
7375
7376def : Pat<(v2f64 (X86Movsd
7377                   (v2f64 VR128X:$dst),
7378                   (v2f64 (scalar_to_vector (f64 (any_uint_to_fp (loadi64 addr:$src))))))),
7379          (VCVTUSI642SDZrm_Int VR128X:$dst, addr:$src)>;
7380
7381def : Pat<(v2f64 (X86Movsd
7382                   (v2f64 VR128X:$dst),
7383                   (v2f64 (scalar_to_vector (f64 (any_uint_to_fp GR32:$src)))))),
7384          (VCVTUSI2SDZrr_Int VR128X:$dst, GR32:$src)>;
7385
7386def : Pat<(v2f64 (X86Movsd
7387                   (v2f64 VR128X:$dst),
7388                   (v2f64 (scalar_to_vector (f64 (any_uint_to_fp (loadi32 addr:$src))))))),
7389          (VCVTUSI2SDZrm_Int VR128X:$dst, addr:$src)>;
7390} // Predicates = [HasAVX512]
7391
7392// Convert float/double to signed/unsigned int 32/64 with truncation
7393multiclass avx512_cvt_s_all<bits<8> opc, string asm, X86VectorVTInfo _SrcRC,
7394                            X86VectorVTInfo _DstRC, SDNode OpNode,
7395                            SDNode OpNodeInt, SDNode OpNodeSAE,
7396                            X86FoldableSchedWrite sched, string aliasStr>{
7397let Predicates = [HasAVX512], ExeDomain = _SrcRC.ExeDomain in {
7398  let isCodeGenOnly = 1 in {
7399  def rr : AVX512<opc, MRMSrcReg, (outs _DstRC.RC:$dst), (ins _SrcRC.FRC:$src),
7400              !strconcat(asm,"\t{$src, $dst|$dst, $src}"),
7401              [(set _DstRC.RC:$dst, (OpNode _SrcRC.FRC:$src))]>,
7402              EVEX, VEX_LIG, Sched<[sched]>, SIMD_EXC;
7403  def rm : AVX512<opc, MRMSrcMem, (outs _DstRC.RC:$dst), (ins _SrcRC.ScalarMemOp:$src),
7404              !strconcat(asm,"\t{$src, $dst|$dst, $src}"),
7405              [(set _DstRC.RC:$dst, (OpNode (_SrcRC.ScalarLdFrag addr:$src)))]>,
7406              EVEX, VEX_LIG, Sched<[sched.Folded, sched.ReadAfterFold]>, SIMD_EXC;
7407  }
7408
7409  def rr_Int : AVX512<opc, MRMSrcReg, (outs _DstRC.RC:$dst), (ins _SrcRC.RC:$src),
7410            !strconcat(asm,"\t{$src, $dst|$dst, $src}"),
7411           [(set _DstRC.RC:$dst, (OpNodeInt (_SrcRC.VT _SrcRC.RC:$src)))]>,
7412           EVEX, VEX_LIG, Sched<[sched]>, SIMD_EXC;
7413  let Uses = [MXCSR] in
7414  def rrb_Int : AVX512<opc, MRMSrcReg, (outs _DstRC.RC:$dst), (ins _SrcRC.RC:$src),
7415            !strconcat(asm,"\t{{sae}, $src, $dst|$dst, $src, {sae}}"),
7416            [(set _DstRC.RC:$dst, (OpNodeSAE (_SrcRC.VT _SrcRC.RC:$src)))]>,
7417                                  EVEX, VEX_LIG, EVEX_B, Sched<[sched]>;
7418  def rm_Int : AVX512<opc, MRMSrcMem, (outs _DstRC.RC:$dst),
7419              (ins _SrcRC.IntScalarMemOp:$src),
7420              !strconcat(asm,"\t{$src, $dst|$dst, $src}"),
7421              [(set _DstRC.RC:$dst,
7422                (OpNodeInt (_SrcRC.ScalarIntMemFrags addr:$src)))]>,
7423              EVEX, VEX_LIG, Sched<[sched.Folded, sched.ReadAfterFold]>, SIMD_EXC;
7424} //HasAVX512
7425
7426  def : InstAlias<asm # aliasStr # "\t{$src, $dst|$dst, $src}",
7427          (!cast<Instruction>(NAME # "rr_Int") _DstRC.RC:$dst, _SrcRC.RC:$src), 0, "att">;
7428  def : InstAlias<asm # aliasStr # "\t{{sae}, $src, $dst|$dst, $src, {sae}}",
7429          (!cast<Instruction>(NAME # "rrb_Int") _DstRC.RC:$dst, _SrcRC.RC:$src), 0, "att">;
7430  def : InstAlias<asm # aliasStr # "\t{$src, $dst|$dst, $src}",
7431          (!cast<Instruction>(NAME # "rm_Int") _DstRC.RC:$dst,
7432                                          _SrcRC.IntScalarMemOp:$src), 0, "att">;
7433}
7434
7435defm VCVTTSS2SIZ: avx512_cvt_s_all<0x2C, "vcvttss2si", f32x_info, i32x_info,
7436                        any_fp_to_sint, X86cvtts2Int, X86cvtts2IntSAE, WriteCvtSS2I,
7437                        "{l}">, XS, EVEX_CD8<32, CD8VT1>;
7438defm VCVTTSS2SI64Z: avx512_cvt_s_all<0x2C, "vcvttss2si", f32x_info, i64x_info,
7439                        any_fp_to_sint, X86cvtts2Int, X86cvtts2IntSAE, WriteCvtSS2I,
7440                        "{q}">, VEX_W, XS, EVEX_CD8<32, CD8VT1>;
7441defm VCVTTSD2SIZ: avx512_cvt_s_all<0x2C, "vcvttsd2si", f64x_info, i32x_info,
7442                        any_fp_to_sint, X86cvtts2Int, X86cvtts2IntSAE, WriteCvtSD2I,
7443                        "{l}">, XD, EVEX_CD8<64, CD8VT1>;
7444defm VCVTTSD2SI64Z: avx512_cvt_s_all<0x2C, "vcvttsd2si", f64x_info, i64x_info,
7445                        any_fp_to_sint, X86cvtts2Int, X86cvtts2IntSAE, WriteCvtSD2I,
7446                        "{q}">, VEX_W, XD, EVEX_CD8<64, CD8VT1>;
7447
7448defm VCVTTSS2USIZ: avx512_cvt_s_all<0x78, "vcvttss2usi", f32x_info, i32x_info,
7449                        any_fp_to_uint, X86cvtts2UInt, X86cvtts2UIntSAE, WriteCvtSS2I,
7450                        "{l}">, XS, EVEX_CD8<32, CD8VT1>;
7451defm VCVTTSS2USI64Z: avx512_cvt_s_all<0x78, "vcvttss2usi", f32x_info, i64x_info,
7452                        any_fp_to_uint, X86cvtts2UInt, X86cvtts2UIntSAE, WriteCvtSS2I,
7453                        "{q}">, XS,VEX_W, EVEX_CD8<32, CD8VT1>;
7454defm VCVTTSD2USIZ: avx512_cvt_s_all<0x78, "vcvttsd2usi", f64x_info, i32x_info,
7455                        any_fp_to_uint, X86cvtts2UInt, X86cvtts2UIntSAE, WriteCvtSD2I,
7456                        "{l}">, XD, EVEX_CD8<64, CD8VT1>;
7457defm VCVTTSD2USI64Z: avx512_cvt_s_all<0x78, "vcvttsd2usi", f64x_info, i64x_info,
7458                        any_fp_to_uint, X86cvtts2UInt, X86cvtts2UIntSAE, WriteCvtSD2I,
7459                        "{q}">, XD, VEX_W, EVEX_CD8<64, CD8VT1>;
7460
7461//===----------------------------------------------------------------------===//
7462// AVX-512  Convert form float to double and back
7463//===----------------------------------------------------------------------===//
7464
7465let Uses = [MXCSR], mayRaiseFPException = 1 in
7466multiclass avx512_cvt_fp_scalar<bits<8> opc, string OpcodeStr, X86VectorVTInfo _,
7467                                X86VectorVTInfo _Src, SDNode OpNode,
7468                                X86FoldableSchedWrite sched> {
7469  defm rr_Int : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
7470                         (ins _.RC:$src1, _Src.RC:$src2), OpcodeStr,
7471                         "$src2, $src1", "$src1, $src2",
7472                         (_.VT (OpNode (_.VT _.RC:$src1),
7473                                       (_Src.VT _Src.RC:$src2)))>,
7474                         EVEX_4V, VEX_LIG, Sched<[sched]>;
7475  defm rm_Int : AVX512_maskable_scalar<opc, MRMSrcMem, _, (outs _.RC:$dst),
7476                         (ins _.RC:$src1, _Src.IntScalarMemOp:$src2), OpcodeStr,
7477                         "$src2, $src1", "$src1, $src2",
7478                         (_.VT (OpNode (_.VT _.RC:$src1),
7479                                  (_Src.ScalarIntMemFrags addr:$src2)))>,
7480                         EVEX_4V, VEX_LIG,
7481                         Sched<[sched.Folded, sched.ReadAfterFold]>;
7482
7483  let isCodeGenOnly = 1, hasSideEffects = 0 in {
7484    def rr : I<opc, MRMSrcReg, (outs _.FRC:$dst),
7485               (ins _.FRC:$src1, _Src.FRC:$src2),
7486               OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}", []>,
7487               EVEX_4V, VEX_LIG, Sched<[sched]>;
7488    let mayLoad = 1 in
7489    def rm : I<opc, MRMSrcMem, (outs _.FRC:$dst),
7490               (ins _.FRC:$src1, _Src.ScalarMemOp:$src2),
7491               OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}", []>,
7492               EVEX_4V, VEX_LIG, Sched<[sched.Folded, sched.ReadAfterFold]>;
7493  }
7494}
7495
7496// Scalar Conversion with SAE - suppress all exceptions
7497multiclass avx512_cvt_fp_sae_scalar<bits<8> opc, string OpcodeStr, X86VectorVTInfo _,
7498                                    X86VectorVTInfo _Src, SDNode OpNodeSAE,
7499                                    X86FoldableSchedWrite sched> {
7500  let Uses = [MXCSR] in
7501  defm rrb_Int : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
7502                        (ins _.RC:$src1, _Src.RC:$src2), OpcodeStr,
7503                        "{sae}, $src2, $src1", "$src1, $src2, {sae}",
7504                        (_.VT (OpNodeSAE (_.VT _.RC:$src1),
7505                                         (_Src.VT _Src.RC:$src2)))>,
7506                        EVEX_4V, VEX_LIG, EVEX_B, Sched<[sched]>;
7507}
7508
7509// Scalar Conversion with rounding control (RC)
7510multiclass avx512_cvt_fp_rc_scalar<bits<8> opc, string OpcodeStr, X86VectorVTInfo _,
7511                                   X86VectorVTInfo _Src, SDNode OpNodeRnd,
7512                                   X86FoldableSchedWrite sched> {
7513  let Uses = [MXCSR] in
7514  defm rrb_Int : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
7515                        (ins _.RC:$src1, _Src.RC:$src2, AVX512RC:$rc), OpcodeStr,
7516                        "$rc, $src2, $src1", "$src1, $src2, $rc",
7517                        (_.VT (OpNodeRnd (_.VT _.RC:$src1),
7518                                         (_Src.VT _Src.RC:$src2), (i32 timm:$rc)))>,
7519                        EVEX_4V, VEX_LIG, Sched<[sched]>,
7520                        EVEX_B, EVEX_RC;
7521}
7522multiclass avx512_cvt_fp_scalar_sd2ss<bits<8> opc, string OpcodeStr,
7523                                      SDNode OpNode, SDNode OpNodeRnd,
7524                                      X86FoldableSchedWrite sched,
7525                                      X86VectorVTInfo _src, X86VectorVTInfo _dst> {
7526  let Predicates = [HasAVX512] in {
7527    defm Z : avx512_cvt_fp_scalar<opc, OpcodeStr, _dst, _src, OpNode, sched>,
7528             avx512_cvt_fp_rc_scalar<opc, OpcodeStr, _dst, _src,
7529                               OpNodeRnd, sched>, VEX_W, EVEX_CD8<64, CD8VT1>, XD;
7530  }
7531}
7532
7533multiclass avx512_cvt_fp_scalar_ss2sd<bits<8> opc, string OpcodeStr,
7534                                      SDNode OpNode, SDNode OpNodeSAE,
7535                                      X86FoldableSchedWrite sched,
7536                                      X86VectorVTInfo _src, X86VectorVTInfo _dst> {
7537  let Predicates = [HasAVX512] in {
7538    defm Z : avx512_cvt_fp_scalar<opc, OpcodeStr, _dst, _src, OpNode, sched>,
7539             avx512_cvt_fp_sae_scalar<opc, OpcodeStr, _dst, _src, OpNodeSAE, sched>,
7540             EVEX_CD8<32, CD8VT1>, XS;
7541  }
7542}
7543defm VCVTSD2SS : avx512_cvt_fp_scalar_sd2ss<0x5A, "vcvtsd2ss", X86frounds,
7544                                         X86froundsRnd, WriteCvtSD2SS, f64x_info,
7545                                         f32x_info>;
7546defm VCVTSS2SD : avx512_cvt_fp_scalar_ss2sd<0x5A, "vcvtss2sd", X86fpexts,
7547                                          X86fpextsSAE, WriteCvtSS2SD, f32x_info,
7548                                          f64x_info>;
7549
7550def : Pat<(f64 (any_fpextend FR32X:$src)),
7551          (VCVTSS2SDZrr (f64 (IMPLICIT_DEF)), FR32X:$src)>,
7552          Requires<[HasAVX512]>;
7553def : Pat<(f64 (any_fpextend (loadf32 addr:$src))),
7554          (VCVTSS2SDZrm (f64 (IMPLICIT_DEF)), addr:$src)>,
7555          Requires<[HasAVX512, OptForSize]>;
7556
7557def : Pat<(f32 (any_fpround FR64X:$src)),
7558          (VCVTSD2SSZrr (f32 (IMPLICIT_DEF)), FR64X:$src)>,
7559           Requires<[HasAVX512]>;
7560
7561def : Pat<(v4f32 (X86Movss
7562                   (v4f32 VR128X:$dst),
7563                   (v4f32 (scalar_to_vector
7564                     (f32 (any_fpround (f64 (extractelt VR128X:$src, (iPTR 0))))))))),
7565          (VCVTSD2SSZrr_Int VR128X:$dst, VR128X:$src)>,
7566          Requires<[HasAVX512]>;
7567
7568def : Pat<(v2f64 (X86Movsd
7569                   (v2f64 VR128X:$dst),
7570                   (v2f64 (scalar_to_vector
7571                     (f64 (any_fpextend (f32 (extractelt VR128X:$src, (iPTR 0))))))))),
7572          (VCVTSS2SDZrr_Int VR128X:$dst, VR128X:$src)>,
7573          Requires<[HasAVX512]>;
7574
7575//===----------------------------------------------------------------------===//
7576// AVX-512  Vector convert from signed/unsigned integer to float/double
7577//          and from float/double to signed/unsigned integer
7578//===----------------------------------------------------------------------===//
7579
7580multiclass avx512_vcvt_fp<bits<8> opc, string OpcodeStr, X86VectorVTInfo _,
7581                          X86VectorVTInfo _Src, SDNode OpNode, SDNode MaskOpNode,
7582                          X86FoldableSchedWrite sched,
7583                          string Broadcast = _.BroadcastStr,
7584                          string Alias = "", X86MemOperand MemOp = _Src.MemOp,
7585                          RegisterClass MaskRC = _.KRCWM,
7586                          dag LdDAG = (_.VT (OpNode (_Src.VT (_Src.LdFrag addr:$src)))),
7587                          dag MaskLdDAG = (_.VT (MaskOpNode (_Src.VT (_Src.LdFrag addr:$src))))> {
7588let Uses = [MXCSR], mayRaiseFPException = 1 in {
7589  defm rr : AVX512_maskable_cvt<opc, MRMSrcReg, _, (outs _.RC:$dst),
7590                         (ins _Src.RC:$src),
7591                         (ins _.RC:$src0, MaskRC:$mask, _Src.RC:$src),
7592                         (ins MaskRC:$mask, _Src.RC:$src),
7593                          OpcodeStr, "$src", "$src",
7594                         (_.VT (OpNode (_Src.VT _Src.RC:$src))),
7595                         (vselect_mask MaskRC:$mask,
7596                                       (_.VT (MaskOpNode (_Src.VT _Src.RC:$src))),
7597                                       _.RC:$src0),
7598                         (vselect_mask MaskRC:$mask,
7599                                       (_.VT (MaskOpNode (_Src.VT _Src.RC:$src))),
7600                                       _.ImmAllZerosV)>,
7601                         EVEX, Sched<[sched]>;
7602
7603  defm rm : AVX512_maskable_cvt<opc, MRMSrcMem, _, (outs _.RC:$dst),
7604                         (ins MemOp:$src),
7605                         (ins _.RC:$src0, MaskRC:$mask, MemOp:$src),
7606                         (ins MaskRC:$mask, MemOp:$src),
7607                         OpcodeStr#Alias, "$src", "$src",
7608                         LdDAG,
7609                         (vselect_mask MaskRC:$mask, MaskLdDAG, _.RC:$src0),
7610                         (vselect_mask MaskRC:$mask, MaskLdDAG, _.ImmAllZerosV)>,
7611                         EVEX, Sched<[sched.Folded]>;
7612
7613  defm rmb : AVX512_maskable_cvt<opc, MRMSrcMem, _, (outs _.RC:$dst),
7614                         (ins _Src.ScalarMemOp:$src),
7615                         (ins _.RC:$src0, MaskRC:$mask, _Src.ScalarMemOp:$src),
7616                         (ins MaskRC:$mask, _Src.ScalarMemOp:$src),
7617                         OpcodeStr,
7618                         "${src}"#Broadcast, "${src}"#Broadcast,
7619                         (_.VT (OpNode (_Src.VT
7620                                  (_Src.BroadcastLdFrag addr:$src))
7621                            )),
7622                         (vselect_mask MaskRC:$mask,
7623                                       (_.VT
7624                                        (MaskOpNode
7625                                         (_Src.VT
7626                                          (_Src.BroadcastLdFrag addr:$src)))),
7627                                       _.RC:$src0),
7628                         (vselect_mask MaskRC:$mask,
7629                                       (_.VT
7630                                        (MaskOpNode
7631                                         (_Src.VT
7632                                          (_Src.BroadcastLdFrag addr:$src)))),
7633                                       _.ImmAllZerosV)>,
7634                         EVEX, EVEX_B, Sched<[sched.Folded]>;
7635  }
7636}
7637// Conversion with SAE - suppress all exceptions
7638multiclass avx512_vcvt_fp_sae<bits<8> opc, string OpcodeStr, X86VectorVTInfo _,
7639                              X86VectorVTInfo _Src, SDNode OpNodeSAE,
7640                              X86FoldableSchedWrite sched> {
7641  let Uses = [MXCSR] in
7642  defm rrb : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
7643                        (ins _Src.RC:$src), OpcodeStr,
7644                        "{sae}, $src", "$src, {sae}",
7645                        (_.VT (OpNodeSAE (_Src.VT _Src.RC:$src)))>,
7646                        EVEX, EVEX_B, Sched<[sched]>;
7647}
7648
7649// Conversion with rounding control (RC)
7650multiclass avx512_vcvt_fp_rc<bits<8> opc, string OpcodeStr, X86VectorVTInfo _,
7651                         X86VectorVTInfo _Src, SDNode OpNodeRnd,
7652                         X86FoldableSchedWrite sched> {
7653  let Uses = [MXCSR] in
7654  defm rrb : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
7655                        (ins _Src.RC:$src, AVX512RC:$rc), OpcodeStr,
7656                        "$rc, $src", "$src, $rc",
7657                        (_.VT (OpNodeRnd (_Src.VT _Src.RC:$src), (i32 timm:$rc)))>,
7658                        EVEX, EVEX_B, EVEX_RC, Sched<[sched]>;
7659}
7660
7661// Similar to avx512_vcvt_fp, but uses an extload for the memory form.
7662multiclass avx512_vcvt_fpextend<bits<8> opc, string OpcodeStr, X86VectorVTInfo _,
7663                                X86VectorVTInfo _Src, SDNode OpNode,
7664                                SDNode MaskOpNode,
7665                                X86FoldableSchedWrite sched,
7666                                string Broadcast = _.BroadcastStr,
7667                                string Alias = "", X86MemOperand MemOp = _Src.MemOp,
7668                                RegisterClass MaskRC = _.KRCWM>
7669  : avx512_vcvt_fp<opc, OpcodeStr, _, _Src, OpNode, MaskOpNode, sched, Broadcast,
7670                   Alias, MemOp, MaskRC,
7671                   (_.VT (!cast<PatFrag>("extload"#_Src.VTName) addr:$src)),
7672                   (_.VT (!cast<PatFrag>("extload"#_Src.VTName) addr:$src))>;
7673
7674// Extend Float to Double
7675multiclass avx512_cvtps2pd<bits<8> opc, string OpcodeStr,
7676                           X86SchedWriteWidths sched> {
7677  let Predicates = [HasAVX512] in {
7678    defm Z : avx512_vcvt_fpextend<opc, OpcodeStr, v8f64_info, v8f32x_info,
7679                            any_fpextend, fpextend, sched.ZMM>,
7680             avx512_vcvt_fp_sae<opc, OpcodeStr, v8f64_info, v8f32x_info,
7681                                X86vfpextSAE, sched.ZMM>, EVEX_V512;
7682  }
7683  let Predicates = [HasVLX] in {
7684    defm Z128 : avx512_vcvt_fpextend<opc, OpcodeStr, v2f64x_info, v4f32x_info,
7685                               X86any_vfpext, X86vfpext, sched.XMM, "{1to2}",
7686                               "", f64mem>, EVEX_V128;
7687    defm Z256 : avx512_vcvt_fpextend<opc, OpcodeStr, v4f64x_info, v4f32x_info,
7688                                     any_fpextend, fpextend, sched.YMM>, EVEX_V256;
7689  }
7690}
7691
7692// Truncate Double to Float
7693multiclass avx512_cvtpd2ps<bits<8> opc, string OpcodeStr, X86SchedWriteWidths sched> {
7694  let Predicates = [HasAVX512] in {
7695    defm Z : avx512_vcvt_fp<opc, OpcodeStr, v8f32x_info, v8f64_info,
7696                            X86any_vfpround, X86vfpround, sched.ZMM>,
7697             avx512_vcvt_fp_rc<opc, OpcodeStr, v8f32x_info, v8f64_info,
7698                               X86vfproundRnd, sched.ZMM>, EVEX_V512;
7699  }
7700  let Predicates = [HasVLX] in {
7701    defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v4f32x_info, v2f64x_info,
7702                               null_frag, null_frag, sched.XMM, "{1to2}", "{x}",
7703                               f128mem, VK2WM>, EVEX_V128;
7704    defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v4f32x_info, v4f64x_info,
7705                               X86any_vfpround, X86vfpround,
7706                               sched.YMM, "{1to4}", "{y}">, EVEX_V256;
7707  }
7708
7709  def : InstAlias<OpcodeStr#"x\t{$src, $dst|$dst, $src}",
7710                  (!cast<Instruction>(NAME # "Z128rr") VR128X:$dst, VR128X:$src), 0, "att">;
7711  def : InstAlias<OpcodeStr#"x\t{$src, $dst {${mask}}|$dst {${mask}}, $src}",
7712                  (!cast<Instruction>(NAME # "Z128rrk") VR128X:$dst,
7713                  VK2WM:$mask, VR128X:$src), 0, "att">;
7714  def : InstAlias<OpcodeStr#"x\t{$src, $dst {${mask}} {z}|"
7715                  "$dst {${mask}} {z}, $src}",
7716                  (!cast<Instruction>(NAME # "Z128rrkz") VR128X:$dst,
7717                  VK2WM:$mask, VR128X:$src), 0, "att">;
7718  def : InstAlias<OpcodeStr#"x\t{${src}{1to2}, $dst|$dst, ${src}{1to2}}",
7719                  (!cast<Instruction>(NAME # "Z128rmb") VR128X:$dst, f64mem:$src), 0, "att">;
7720  def : InstAlias<OpcodeStr#"x\t{${src}{1to2}, $dst {${mask}}|"
7721                  "$dst {${mask}}, ${src}{1to2}}",
7722                  (!cast<Instruction>(NAME # "Z128rmbk") VR128X:$dst,
7723                  VK2WM:$mask, f64mem:$src), 0, "att">;
7724  def : InstAlias<OpcodeStr#"x\t{${src}{1to2}, $dst {${mask}} {z}|"
7725                  "$dst {${mask}} {z}, ${src}{1to2}}",
7726                  (!cast<Instruction>(NAME # "Z128rmbkz") VR128X:$dst,
7727                  VK2WM:$mask, f64mem:$src), 0, "att">;
7728
7729  def : InstAlias<OpcodeStr#"y\t{$src, $dst|$dst, $src}",
7730                  (!cast<Instruction>(NAME # "Z256rr") VR128X:$dst, VR256X:$src), 0, "att">;
7731  def : InstAlias<OpcodeStr#"y\t{$src, $dst {${mask}}|$dst {${mask}}, $src}",
7732                  (!cast<Instruction>(NAME # "Z256rrk") VR128X:$dst,
7733                  VK4WM:$mask, VR256X:$src), 0, "att">;
7734  def : InstAlias<OpcodeStr#"y\t{$src, $dst {${mask}} {z}|"
7735                  "$dst {${mask}} {z}, $src}",
7736                  (!cast<Instruction>(NAME # "Z256rrkz") VR128X:$dst,
7737                  VK4WM:$mask, VR256X:$src), 0, "att">;
7738  def : InstAlias<OpcodeStr#"y\t{${src}{1to4}, $dst|$dst, ${src}{1to4}}",
7739                  (!cast<Instruction>(NAME # "Z256rmb") VR128X:$dst, f64mem:$src), 0, "att">;
7740  def : InstAlias<OpcodeStr#"y\t{${src}{1to4}, $dst {${mask}}|"
7741                  "$dst {${mask}}, ${src}{1to4}}",
7742                  (!cast<Instruction>(NAME # "Z256rmbk") VR128X:$dst,
7743                  VK4WM:$mask, f64mem:$src), 0, "att">;
7744  def : InstAlias<OpcodeStr#"y\t{${src}{1to4}, $dst {${mask}} {z}|"
7745                  "$dst {${mask}} {z}, ${src}{1to4}}",
7746                  (!cast<Instruction>(NAME # "Z256rmbkz") VR128X:$dst,
7747                  VK4WM:$mask, f64mem:$src), 0, "att">;
7748}
7749
7750defm VCVTPD2PS : avx512_cvtpd2ps<0x5A, "vcvtpd2ps", SchedWriteCvtPD2PS>,
7751                                  VEX_W, PD, EVEX_CD8<64, CD8VF>;
7752defm VCVTPS2PD : avx512_cvtps2pd<0x5A, "vcvtps2pd", SchedWriteCvtPS2PD>,
7753                                  PS, EVEX_CD8<32, CD8VH>;
7754
7755let Predicates = [HasVLX] in {
7756  // Special patterns to allow use of X86vmfpround for masking. Instruction
7757  // patterns have been disabled with null_frag.
7758  def : Pat<(X86any_vfpround (v2f64 VR128X:$src)),
7759            (VCVTPD2PSZ128rr VR128X:$src)>;
7760  def : Pat<(X86vmfpround (v2f64 VR128X:$src), (v4f32 VR128X:$src0),
7761                          VK2WM:$mask),
7762            (VCVTPD2PSZ128rrk VR128X:$src0, VK2WM:$mask, VR128X:$src)>;
7763  def : Pat<(X86vmfpround (v2f64 VR128X:$src), v4f32x_info.ImmAllZerosV,
7764                          VK2WM:$mask),
7765            (VCVTPD2PSZ128rrkz VK2WM:$mask, VR128X:$src)>;
7766
7767  def : Pat<(X86any_vfpround (loadv2f64 addr:$src)),
7768            (VCVTPD2PSZ128rm addr:$src)>;
7769  def : Pat<(X86vmfpround (loadv2f64 addr:$src), (v4f32 VR128X:$src0),
7770                          VK2WM:$mask),
7771            (VCVTPD2PSZ128rmk VR128X:$src0, VK2WM:$mask, addr:$src)>;
7772  def : Pat<(X86vmfpround (loadv2f64 addr:$src), v4f32x_info.ImmAllZerosV,
7773                          VK2WM:$mask),
7774            (VCVTPD2PSZ128rmkz VK2WM:$mask, addr:$src)>;
7775
7776  def : Pat<(X86any_vfpround (v2f64 (X86VBroadcastld64 addr:$src))),
7777            (VCVTPD2PSZ128rmb addr:$src)>;
7778  def : Pat<(X86vmfpround (v2f64 (X86VBroadcastld64 addr:$src)),
7779                          (v4f32 VR128X:$src0), VK2WM:$mask),
7780            (VCVTPD2PSZ128rmbk VR128X:$src0, VK2WM:$mask, addr:$src)>;
7781  def : Pat<(X86vmfpround (v2f64 (X86VBroadcastld64 addr:$src)),
7782                          v4f32x_info.ImmAllZerosV, VK2WM:$mask),
7783            (VCVTPD2PSZ128rmbkz VK2WM:$mask, addr:$src)>;
7784}
7785
7786// Convert Signed/Unsigned Doubleword to Double
7787let Uses = []<Register>, mayRaiseFPException = 0 in
7788multiclass avx512_cvtdq2pd<bits<8> opc, string OpcodeStr, SDNode OpNode,
7789                           SDNode MaskOpNode, SDNode OpNode128,
7790                           SDNode MaskOpNode128,
7791                           X86SchedWriteWidths sched> {
7792  // No rounding in this op
7793  let Predicates = [HasAVX512] in
7794    defm Z : avx512_vcvt_fp<opc, OpcodeStr, v8f64_info, v8i32x_info, OpNode,
7795                            MaskOpNode, sched.ZMM>, EVEX_V512;
7796
7797  let Predicates = [HasVLX] in {
7798    defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v2f64x_info, v4i32x_info,
7799                               OpNode128, MaskOpNode128, sched.XMM, "{1to2}",
7800                               "", i64mem, VK2WM,
7801                               (v2f64 (OpNode128 (bc_v4i32
7802                                (v2i64
7803                                 (scalar_to_vector (loadi64 addr:$src)))))),
7804                               (v2f64 (MaskOpNode128 (bc_v4i32
7805                                (v2i64
7806                                 (scalar_to_vector (loadi64 addr:$src))))))>,
7807                               EVEX_V128;
7808    defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v4f64x_info, v4i32x_info, OpNode,
7809                               MaskOpNode, sched.YMM>, EVEX_V256;
7810  }
7811}
7812
7813// Convert Signed/Unsigned Doubleword to Float
7814multiclass avx512_cvtdq2ps<bits<8> opc, string OpcodeStr, SDNode OpNode,
7815                           SDNode MaskOpNode, SDNode OpNodeRnd,
7816                           X86SchedWriteWidths sched> {
7817  let Predicates = [HasAVX512] in
7818    defm Z : avx512_vcvt_fp<opc, OpcodeStr, v16f32_info, v16i32_info, OpNode,
7819                            MaskOpNode, sched.ZMM>,
7820             avx512_vcvt_fp_rc<opc, OpcodeStr, v16f32_info, v16i32_info,
7821                               OpNodeRnd, sched.ZMM>, EVEX_V512;
7822
7823  let Predicates = [HasVLX] in {
7824    defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v4f32x_info, v4i32x_info, OpNode,
7825                               MaskOpNode, sched.XMM>, EVEX_V128;
7826    defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v8f32x_info, v8i32x_info, OpNode,
7827                               MaskOpNode, sched.YMM>, EVEX_V256;
7828  }
7829}
7830
7831// Convert Float to Signed/Unsigned Doubleword with truncation
7832multiclass avx512_cvttps2dq<bits<8> opc, string OpcodeStr, SDNode OpNode,
7833                            SDNode MaskOpNode,
7834                            SDNode OpNodeSAE, X86SchedWriteWidths sched> {
7835  let Predicates = [HasAVX512] in {
7836    defm Z : avx512_vcvt_fp<opc, OpcodeStr, v16i32_info, v16f32_info, OpNode,
7837                            MaskOpNode, sched.ZMM>,
7838             avx512_vcvt_fp_sae<opc, OpcodeStr, v16i32_info, v16f32_info,
7839                                OpNodeSAE, sched.ZMM>, EVEX_V512;
7840  }
7841  let Predicates = [HasVLX] in {
7842    defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v4i32x_info, v4f32x_info, OpNode,
7843                               MaskOpNode, sched.XMM>, EVEX_V128;
7844    defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v8i32x_info, v8f32x_info, OpNode,
7845                               MaskOpNode, sched.YMM>, EVEX_V256;
7846  }
7847}
7848
7849// Convert Float to Signed/Unsigned Doubleword
7850multiclass avx512_cvtps2dq<bits<8> opc, string OpcodeStr, SDNode OpNode,
7851                           SDNode MaskOpNode, SDNode OpNodeRnd,
7852                           X86SchedWriteWidths sched> {
7853  let Predicates = [HasAVX512] in {
7854    defm Z : avx512_vcvt_fp<opc, OpcodeStr, v16i32_info, v16f32_info, OpNode,
7855                            MaskOpNode, sched.ZMM>,
7856             avx512_vcvt_fp_rc<opc, OpcodeStr, v16i32_info, v16f32_info,
7857                                OpNodeRnd, sched.ZMM>, EVEX_V512;
7858  }
7859  let Predicates = [HasVLX] in {
7860    defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v4i32x_info, v4f32x_info, OpNode,
7861                               MaskOpNode, sched.XMM>, EVEX_V128;
7862    defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v8i32x_info, v8f32x_info, OpNode,
7863                               MaskOpNode, sched.YMM>, EVEX_V256;
7864  }
7865}
7866
7867// Convert Double to Signed/Unsigned Doubleword with truncation
7868multiclass avx512_cvttpd2dq<bits<8> opc, string OpcodeStr, SDNode OpNode,
7869                            SDNode MaskOpNode, SDNode OpNodeSAE,
7870                            X86SchedWriteWidths sched> {
7871  let Predicates = [HasAVX512] in {
7872    defm Z : avx512_vcvt_fp<opc, OpcodeStr, v8i32x_info, v8f64_info, OpNode,
7873                            MaskOpNode, sched.ZMM>,
7874             avx512_vcvt_fp_sae<opc, OpcodeStr, v8i32x_info, v8f64_info,
7875                                OpNodeSAE, sched.ZMM>, EVEX_V512;
7876  }
7877  let Predicates = [HasVLX] in {
7878    // we need "x"/"y" suffixes in order to distinguish between 128 and 256
7879    // memory forms of these instructions in Asm Parser. They have the same
7880    // dest type - 'v4i32x_info'. We also specify the broadcast string explicitly
7881    // due to the same reason.
7882    defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v4i32x_info, v2f64x_info,
7883                               null_frag, null_frag, sched.XMM, "{1to2}", "{x}", f128mem,
7884                               VK2WM>, EVEX_V128;
7885    defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v4i32x_info, v4f64x_info, OpNode,
7886                               MaskOpNode, sched.YMM, "{1to4}", "{y}">, EVEX_V256;
7887  }
7888
7889  def : InstAlias<OpcodeStr#"x\t{$src, $dst|$dst, $src}",
7890                  (!cast<Instruction>(NAME # "Z128rr") VR128X:$dst,
7891                  VR128X:$src), 0, "att">;
7892  def : InstAlias<OpcodeStr#"x\t{$src, $dst {${mask}}|$dst {${mask}}, $src}",
7893                  (!cast<Instruction>(NAME # "Z128rrk") VR128X:$dst,
7894                  VK2WM:$mask, VR128X:$src), 0, "att">;
7895  def : InstAlias<OpcodeStr#"x\t{$src, $dst {${mask}} {z}|$dst {${mask}} {z}, $src}",
7896                  (!cast<Instruction>(NAME # "Z128rrkz") VR128X:$dst,
7897                  VK2WM:$mask, VR128X:$src), 0, "att">;
7898  def : InstAlias<OpcodeStr#"x\t{${src}{1to2}, $dst|$dst, ${src}{1to2}}",
7899                  (!cast<Instruction>(NAME # "Z128rmb") VR128X:$dst,
7900                  f64mem:$src), 0, "att">;
7901  def : InstAlias<OpcodeStr#"x\t{${src}{1to2}, $dst {${mask}}|"
7902                  "$dst {${mask}}, ${src}{1to2}}",
7903                  (!cast<Instruction>(NAME # "Z128rmbk") VR128X:$dst,
7904                  VK2WM:$mask, f64mem:$src), 0, "att">;
7905  def : InstAlias<OpcodeStr#"x\t{${src}{1to2}, $dst {${mask}} {z}|"
7906                  "$dst {${mask}} {z}, ${src}{1to2}}",
7907                  (!cast<Instruction>(NAME # "Z128rmbkz") VR128X:$dst,
7908                  VK2WM:$mask, f64mem:$src), 0, "att">;
7909
7910  def : InstAlias<OpcodeStr#"y\t{$src, $dst|$dst, $src}",
7911                  (!cast<Instruction>(NAME # "Z256rr") VR128X:$dst,
7912                  VR256X:$src), 0, "att">;
7913  def : InstAlias<OpcodeStr#"y\t{$src, $dst {${mask}}|$dst {${mask}}, $src}",
7914                  (!cast<Instruction>(NAME # "Z256rrk") VR128X:$dst,
7915                  VK4WM:$mask, VR256X:$src), 0, "att">;
7916  def : InstAlias<OpcodeStr#"y\t{$src, $dst {${mask}} {z}|$dst {${mask}} {z}, $src}",
7917                  (!cast<Instruction>(NAME # "Z256rrkz") VR128X:$dst,
7918                  VK4WM:$mask, VR256X:$src), 0, "att">;
7919  def : InstAlias<OpcodeStr#"y\t{${src}{1to4}, $dst|$dst, ${src}{1to4}}",
7920                  (!cast<Instruction>(NAME # "Z256rmb") VR128X:$dst,
7921                  f64mem:$src), 0, "att">;
7922  def : InstAlias<OpcodeStr#"y\t{${src}{1to4}, $dst {${mask}}|"
7923                  "$dst {${mask}}, ${src}{1to4}}",
7924                  (!cast<Instruction>(NAME # "Z256rmbk") VR128X:$dst,
7925                  VK4WM:$mask, f64mem:$src), 0, "att">;
7926  def : InstAlias<OpcodeStr#"y\t{${src}{1to4}, $dst {${mask}} {z}|"
7927                  "$dst {${mask}} {z}, ${src}{1to4}}",
7928                  (!cast<Instruction>(NAME # "Z256rmbkz") VR128X:$dst,
7929                  VK4WM:$mask, f64mem:$src), 0, "att">;
7930}
7931
7932// Convert Double to Signed/Unsigned Doubleword
7933multiclass avx512_cvtpd2dq<bits<8> opc, string OpcodeStr, SDNode OpNode,
7934                           SDNode MaskOpNode, SDNode OpNodeRnd,
7935                           X86SchedWriteWidths sched> {
7936  let Predicates = [HasAVX512] in {
7937    defm Z : avx512_vcvt_fp<opc, OpcodeStr, v8i32x_info, v8f64_info, OpNode,
7938                            MaskOpNode, sched.ZMM>,
7939             avx512_vcvt_fp_rc<opc, OpcodeStr, v8i32x_info, v8f64_info,
7940                               OpNodeRnd, sched.ZMM>, EVEX_V512;
7941  }
7942  let Predicates = [HasVLX] in {
7943    // we need "x"/"y" suffixes in order to distinguish between 128 and 256
7944    // memory forms of these instructions in Asm Parcer. They have the same
7945    // dest type - 'v4i32x_info'. We also specify the broadcast string explicitly
7946    // due to the same reason.
7947    defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v4i32x_info, v2f64x_info,
7948                               null_frag, null_frag, sched.XMM, "{1to2}", "{x}", f128mem,
7949                               VK2WM>, EVEX_V128;
7950    defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v4i32x_info, v4f64x_info, OpNode,
7951                               MaskOpNode, sched.YMM, "{1to4}", "{y}">, EVEX_V256;
7952  }
7953
7954  def : InstAlias<OpcodeStr#"x\t{$src, $dst|$dst, $src}",
7955                  (!cast<Instruction>(NAME # "Z128rr") VR128X:$dst, VR128X:$src), 0, "att">;
7956  def : InstAlias<OpcodeStr#"x\t{$src, $dst {${mask}}|$dst {${mask}}, $src}",
7957                  (!cast<Instruction>(NAME # "Z128rrk") VR128X:$dst,
7958                  VK2WM:$mask, VR128X:$src), 0, "att">;
7959  def : InstAlias<OpcodeStr#"x\t{$src, $dst {${mask}} {z}|$dst {${mask}} {z}, $src}",
7960                  (!cast<Instruction>(NAME # "Z128rrkz") VR128X:$dst,
7961                  VK2WM:$mask, VR128X:$src), 0, "att">;
7962  def : InstAlias<OpcodeStr#"x\t{${src}{1to2}, $dst|$dst, ${src}{1to2}}",
7963                  (!cast<Instruction>(NAME # "Z128rmb") VR128X:$dst,
7964                  f64mem:$src), 0, "att">;
7965  def : InstAlias<OpcodeStr#"x\t{${src}{1to2}, $dst {${mask}}|"
7966                  "$dst {${mask}}, ${src}{1to2}}",
7967                  (!cast<Instruction>(NAME # "Z128rmbk") VR128X:$dst,
7968                  VK2WM:$mask, f64mem:$src), 0, "att">;
7969  def : InstAlias<OpcodeStr#"x\t{${src}{1to2}, $dst {${mask}} {z}|"
7970                  "$dst {${mask}} {z}, ${src}{1to2}}",
7971                  (!cast<Instruction>(NAME # "Z128rmbkz") VR128X:$dst,
7972                  VK2WM:$mask, f64mem:$src), 0, "att">;
7973
7974  def : InstAlias<OpcodeStr#"y\t{$src, $dst|$dst, $src}",
7975                  (!cast<Instruction>(NAME # "Z256rr") VR128X:$dst, VR256X:$src), 0, "att">;
7976  def : InstAlias<OpcodeStr#"y\t{$src, $dst {${mask}}|$dst {${mask}}, $src}",
7977                  (!cast<Instruction>(NAME # "Z256rrk") VR128X:$dst,
7978                  VK4WM:$mask, VR256X:$src), 0, "att">;
7979  def : InstAlias<OpcodeStr#"y\t{$src, $dst {${mask}} {z}|$dst {${mask}} {z}, $src}",
7980                  (!cast<Instruction>(NAME # "Z256rrkz") VR128X:$dst,
7981                  VK4WM:$mask, VR256X:$src), 0, "att">;
7982  def : InstAlias<OpcodeStr#"y\t{${src}{1to4}, $dst|$dst, ${src}{1to4}}",
7983                  (!cast<Instruction>(NAME # "Z256rmb") VR128X:$dst,
7984                  f64mem:$src), 0, "att">;
7985  def : InstAlias<OpcodeStr#"y\t{${src}{1to4}, $dst {${mask}}|"
7986                  "$dst {${mask}}, ${src}{1to4}}",
7987                  (!cast<Instruction>(NAME # "Z256rmbk") VR128X:$dst,
7988                  VK4WM:$mask, f64mem:$src), 0, "att">;
7989  def : InstAlias<OpcodeStr#"y\t{${src}{1to4}, $dst {${mask}} {z}|"
7990                  "$dst {${mask}} {z}, ${src}{1to4}}",
7991                  (!cast<Instruction>(NAME # "Z256rmbkz") VR128X:$dst,
7992                  VK4WM:$mask, f64mem:$src), 0, "att">;
7993}
7994
7995// Convert Double to Signed/Unsigned Quardword
7996multiclass avx512_cvtpd2qq<bits<8> opc, string OpcodeStr, SDNode OpNode,
7997                           SDNode MaskOpNode, SDNode OpNodeRnd,
7998                           X86SchedWriteWidths sched> {
7999  let Predicates = [HasDQI] in {
8000    defm Z : avx512_vcvt_fp<opc, OpcodeStr, v8i64_info, v8f64_info, OpNode,
8001                            MaskOpNode, sched.ZMM>,
8002             avx512_vcvt_fp_rc<opc, OpcodeStr, v8i64_info, v8f64_info,
8003                               OpNodeRnd, sched.ZMM>, EVEX_V512;
8004  }
8005  let Predicates = [HasDQI, HasVLX] in {
8006    defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v2i64x_info, v2f64x_info, OpNode,
8007                               MaskOpNode, sched.XMM>, EVEX_V128;
8008    defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v4i64x_info, v4f64x_info, OpNode,
8009                               MaskOpNode, sched.YMM>, EVEX_V256;
8010  }
8011}
8012
8013// Convert Double to Signed/Unsigned Quardword with truncation
8014multiclass avx512_cvttpd2qq<bits<8> opc, string OpcodeStr, SDNode OpNode,
8015                            SDNode MaskOpNode, SDNode OpNodeRnd,
8016                            X86SchedWriteWidths sched> {
8017  let Predicates = [HasDQI] in {
8018    defm Z : avx512_vcvt_fp<opc, OpcodeStr, v8i64_info, v8f64_info, OpNode,
8019                            MaskOpNode, sched.ZMM>,
8020             avx512_vcvt_fp_sae<opc, OpcodeStr, v8i64_info, v8f64_info,
8021                                OpNodeRnd, sched.ZMM>, EVEX_V512;
8022  }
8023  let Predicates = [HasDQI, HasVLX] in {
8024    defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v2i64x_info, v2f64x_info, OpNode,
8025                               MaskOpNode, sched.XMM>, EVEX_V128;
8026    defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v4i64x_info, v4f64x_info, OpNode,
8027                               MaskOpNode, sched.YMM>, EVEX_V256;
8028  }
8029}
8030
8031// Convert Signed/Unsigned Quardword to Double
8032multiclass avx512_cvtqq2pd<bits<8> opc, string OpcodeStr, SDNode OpNode,
8033                           SDNode MaskOpNode, SDNode OpNodeRnd,
8034                           X86SchedWriteWidths sched> {
8035  let Predicates = [HasDQI] in {
8036    defm Z : avx512_vcvt_fp<opc, OpcodeStr, v8f64_info, v8i64_info, OpNode,
8037                            MaskOpNode, sched.ZMM>,
8038             avx512_vcvt_fp_rc<opc, OpcodeStr, v8f64_info, v8i64_info,
8039                               OpNodeRnd, sched.ZMM>, EVEX_V512;
8040  }
8041  let Predicates = [HasDQI, HasVLX] in {
8042    defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v2f64x_info, v2i64x_info, OpNode,
8043                               MaskOpNode, sched.XMM>, EVEX_V128, NotEVEX2VEXConvertible;
8044    defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v4f64x_info, v4i64x_info, OpNode,
8045                               MaskOpNode, sched.YMM>, EVEX_V256, NotEVEX2VEXConvertible;
8046  }
8047}
8048
8049// Convert Float to Signed/Unsigned Quardword
8050multiclass avx512_cvtps2qq<bits<8> opc, string OpcodeStr, SDNode OpNode,
8051                           SDNode MaskOpNode, SDNode OpNodeRnd,
8052                           X86SchedWriteWidths sched> {
8053  let Predicates = [HasDQI] in {
8054    defm Z : avx512_vcvt_fp<opc, OpcodeStr, v8i64_info, v8f32x_info, OpNode,
8055                            MaskOpNode, sched.ZMM>,
8056             avx512_vcvt_fp_rc<opc, OpcodeStr, v8i64_info, v8f32x_info,
8057                               OpNodeRnd, sched.ZMM>, EVEX_V512;
8058  }
8059  let Predicates = [HasDQI, HasVLX] in {
8060    // Explicitly specified broadcast string, since we take only 2 elements
8061    // from v4f32x_info source
8062    defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v2i64x_info, v4f32x_info, OpNode,
8063                               MaskOpNode, sched.XMM, "{1to2}", "", f64mem, VK2WM,
8064                               (v2i64 (OpNode (bc_v4f32
8065                                (v2f64
8066                                 (scalar_to_vector (loadf64 addr:$src)))))),
8067                               (v2i64 (MaskOpNode (bc_v4f32
8068                                (v2f64
8069                                 (scalar_to_vector (loadf64 addr:$src))))))>,
8070                               EVEX_V128;
8071    defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v4i64x_info, v4f32x_info, OpNode,
8072                               MaskOpNode, sched.YMM>, EVEX_V256;
8073  }
8074}
8075
8076// Convert Float to Signed/Unsigned Quardword with truncation
8077multiclass avx512_cvttps2qq<bits<8> opc, string OpcodeStr, SDNode OpNode,
8078                            SDNode MaskOpNode, SDNode OpNodeRnd,
8079                            X86SchedWriteWidths sched> {
8080  let Predicates = [HasDQI] in {
8081    defm Z : avx512_vcvt_fp<opc, OpcodeStr, v8i64_info, v8f32x_info, OpNode,
8082                            MaskOpNode, sched.ZMM>,
8083             avx512_vcvt_fp_sae<opc, OpcodeStr, v8i64_info, v8f32x_info,
8084                                OpNodeRnd, sched.ZMM>, EVEX_V512;
8085  }
8086  let Predicates = [HasDQI, HasVLX] in {
8087    // Explicitly specified broadcast string, since we take only 2 elements
8088    // from v4f32x_info source
8089    defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v2i64x_info, v4f32x_info, OpNode,
8090                               MaskOpNode, sched.XMM, "{1to2}", "", f64mem, VK2WM,
8091                               (v2i64 (OpNode (bc_v4f32
8092                                (v2f64
8093                                 (scalar_to_vector (loadf64 addr:$src)))))),
8094                               (v2i64 (MaskOpNode (bc_v4f32
8095                                (v2f64
8096                                 (scalar_to_vector (loadf64 addr:$src))))))>,
8097                               EVEX_V128;
8098    defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v4i64x_info, v4f32x_info, OpNode,
8099                               MaskOpNode, sched.YMM>, EVEX_V256;
8100  }
8101}
8102
8103// Convert Signed/Unsigned Quardword to Float
8104multiclass avx512_cvtqq2ps<bits<8> opc, string OpcodeStr, SDNode OpNode,
8105                           SDNode MaskOpNode, SDNode OpNodeRnd,
8106                           X86SchedWriteWidths sched> {
8107  let Predicates = [HasDQI] in {
8108    defm Z : avx512_vcvt_fp<opc, OpcodeStr, v8f32x_info, v8i64_info, OpNode,
8109                            MaskOpNode, sched.ZMM>,
8110             avx512_vcvt_fp_rc<opc, OpcodeStr, v8f32x_info, v8i64_info,
8111                               OpNodeRnd, sched.ZMM>, EVEX_V512;
8112  }
8113  let Predicates = [HasDQI, HasVLX] in {
8114    // we need "x"/"y" suffixes in order to distinguish between 128 and 256
8115    // memory forms of these instructions in Asm Parcer. They have the same
8116    // dest type - 'v4i32x_info'. We also specify the broadcast string explicitly
8117    // due to the same reason.
8118    defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v4f32x_info, v2i64x_info, null_frag,
8119                               null_frag, sched.XMM, "{1to2}", "{x}", i128mem, VK2WM>,
8120                               EVEX_V128, NotEVEX2VEXConvertible;
8121    defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v4f32x_info, v4i64x_info, OpNode,
8122                               MaskOpNode, sched.YMM, "{1to4}", "{y}">, EVEX_V256,
8123                               NotEVEX2VEXConvertible;
8124  }
8125
8126  def : InstAlias<OpcodeStr#"x\t{$src, $dst|$dst, $src}",
8127                  (!cast<Instruction>(NAME # "Z128rr") VR128X:$dst,
8128                  VR128X:$src), 0, "att">;
8129  def : InstAlias<OpcodeStr#"x\t{$src, $dst {${mask}}|$dst {${mask}}, $src}",
8130                  (!cast<Instruction>(NAME # "Z128rrk") VR128X:$dst,
8131                  VK2WM:$mask, VR128X:$src), 0, "att">;
8132  def : InstAlias<OpcodeStr#"x\t{$src, $dst {${mask}} {z}|$dst {${mask}} {z}, $src}",
8133                  (!cast<Instruction>(NAME # "Z128rrkz") VR128X:$dst,
8134                  VK2WM:$mask, VR128X:$src), 0, "att">;
8135  def : InstAlias<OpcodeStr#"x\t{${src}{1to2}, $dst|$dst, ${src}{1to2}}",
8136                  (!cast<Instruction>(NAME # "Z128rmb") VR128X:$dst,
8137                  i64mem:$src), 0, "att">;
8138  def : InstAlias<OpcodeStr#"x\t{${src}{1to2}, $dst {${mask}}|"
8139                  "$dst {${mask}}, ${src}{1to2}}",
8140                  (!cast<Instruction>(NAME # "Z128rmbk") VR128X:$dst,
8141                  VK2WM:$mask, i64mem:$src), 0, "att">;
8142  def : InstAlias<OpcodeStr#"x\t{${src}{1to2}, $dst {${mask}} {z}|"
8143                  "$dst {${mask}} {z}, ${src}{1to2}}",
8144                  (!cast<Instruction>(NAME # "Z128rmbkz") VR128X:$dst,
8145                  VK2WM:$mask, i64mem:$src), 0, "att">;
8146
8147  def : InstAlias<OpcodeStr#"y\t{$src, $dst|$dst, $src}",
8148                  (!cast<Instruction>(NAME # "Z256rr") VR128X:$dst,
8149                  VR256X:$src), 0, "att">;
8150  def : InstAlias<OpcodeStr#"y\t{$src, $dst {${mask}}|"
8151                  "$dst {${mask}}, $src}",
8152                  (!cast<Instruction>(NAME # "Z256rrk") VR128X:$dst,
8153                  VK4WM:$mask, VR256X:$src), 0, "att">;
8154  def : InstAlias<OpcodeStr#"y\t{$src, $dst {${mask}} {z}|"
8155                  "$dst {${mask}} {z}, $src}",
8156                  (!cast<Instruction>(NAME # "Z256rrkz") VR128X:$dst,
8157                  VK4WM:$mask, VR256X:$src), 0, "att">;
8158  def : InstAlias<OpcodeStr#"y\t{${src}{1to4}, $dst|$dst, ${src}{1to4}}",
8159                  (!cast<Instruction>(NAME # "Z256rmb") VR128X:$dst,
8160                  i64mem:$src), 0, "att">;
8161  def : InstAlias<OpcodeStr#"y\t{${src}{1to4}, $dst {${mask}}|"
8162                  "$dst {${mask}}, ${src}{1to4}}",
8163                  (!cast<Instruction>(NAME # "Z256rmbk") VR128X:$dst,
8164                  VK4WM:$mask, i64mem:$src), 0, "att">;
8165  def : InstAlias<OpcodeStr#"y\t{${src}{1to4}, $dst {${mask}} {z}|"
8166                  "$dst {${mask}} {z}, ${src}{1to4}}",
8167                  (!cast<Instruction>(NAME # "Z256rmbkz") VR128X:$dst,
8168                  VK4WM:$mask, i64mem:$src), 0, "att">;
8169}
8170
8171defm VCVTDQ2PD : avx512_cvtdq2pd<0xE6, "vcvtdq2pd", any_sint_to_fp, sint_to_fp,
8172                                 X86any_VSintToFP, X86VSintToFP,
8173                                 SchedWriteCvtDQ2PD>, XS, EVEX_CD8<32, CD8VH>;
8174
8175defm VCVTDQ2PS : avx512_cvtdq2ps<0x5B, "vcvtdq2ps", any_sint_to_fp, sint_to_fp,
8176                                X86VSintToFpRnd, SchedWriteCvtDQ2PS>,
8177                                PS, EVEX_CD8<32, CD8VF>;
8178
8179defm VCVTTPS2DQ : avx512_cvttps2dq<0x5B, "vcvttps2dq", X86any_cvttp2si,
8180                                 X86cvttp2si, X86cvttp2siSAE,
8181                                 SchedWriteCvtPS2DQ>, XS, EVEX_CD8<32, CD8VF>;
8182
8183defm VCVTTPD2DQ : avx512_cvttpd2dq<0xE6, "vcvttpd2dq", X86any_cvttp2si,
8184                                 X86cvttp2si, X86cvttp2siSAE,
8185                                 SchedWriteCvtPD2DQ>,
8186                                 PD, VEX_W, EVEX_CD8<64, CD8VF>;
8187
8188defm VCVTTPS2UDQ : avx512_cvttps2dq<0x78, "vcvttps2udq", X86any_cvttp2ui,
8189                                 X86cvttp2ui, X86cvttp2uiSAE,
8190                                 SchedWriteCvtPS2DQ>, PS, EVEX_CD8<32, CD8VF>;
8191
8192defm VCVTTPD2UDQ : avx512_cvttpd2dq<0x78, "vcvttpd2udq", X86any_cvttp2ui,
8193                                 X86cvttp2ui, X86cvttp2uiSAE,
8194                                 SchedWriteCvtPD2DQ>,
8195                                 PS, VEX_W, EVEX_CD8<64, CD8VF>;
8196
8197defm VCVTUDQ2PD : avx512_cvtdq2pd<0x7A, "vcvtudq2pd", any_uint_to_fp,
8198                                  uint_to_fp, X86any_VUintToFP, X86VUintToFP,
8199                                  SchedWriteCvtDQ2PD>, XS, EVEX_CD8<32, CD8VH>;
8200
8201defm VCVTUDQ2PS : avx512_cvtdq2ps<0x7A, "vcvtudq2ps", any_uint_to_fp,
8202                                 uint_to_fp, X86VUintToFpRnd,
8203                                 SchedWriteCvtDQ2PS>, XD, EVEX_CD8<32, CD8VF>;
8204
8205defm VCVTPS2DQ : avx512_cvtps2dq<0x5B, "vcvtps2dq", X86cvtp2Int, X86cvtp2Int,
8206                                 X86cvtp2IntRnd, SchedWriteCvtPS2DQ>, PD,
8207                                 EVEX_CD8<32, CD8VF>;
8208
8209defm VCVTPD2DQ : avx512_cvtpd2dq<0xE6, "vcvtpd2dq", X86cvtp2Int, X86cvtp2Int,
8210                                 X86cvtp2IntRnd, SchedWriteCvtPD2DQ>, XD,
8211                                 VEX_W, EVEX_CD8<64, CD8VF>;
8212
8213defm VCVTPS2UDQ : avx512_cvtps2dq<0x79, "vcvtps2udq", X86cvtp2UInt, X86cvtp2UInt,
8214                                 X86cvtp2UIntRnd, SchedWriteCvtPS2DQ>,
8215                                 PS, EVEX_CD8<32, CD8VF>;
8216
8217defm VCVTPD2UDQ : avx512_cvtpd2dq<0x79, "vcvtpd2udq", X86cvtp2UInt, X86cvtp2UInt,
8218                                 X86cvtp2UIntRnd, SchedWriteCvtPD2DQ>, VEX_W,
8219                                 PS, EVEX_CD8<64, CD8VF>;
8220
8221defm VCVTPD2QQ : avx512_cvtpd2qq<0x7B, "vcvtpd2qq", X86cvtp2Int, X86cvtp2Int,
8222                                 X86cvtp2IntRnd, SchedWriteCvtPD2DQ>, VEX_W,
8223                                 PD, EVEX_CD8<64, CD8VF>;
8224
8225defm VCVTPS2QQ : avx512_cvtps2qq<0x7B, "vcvtps2qq", X86cvtp2Int, X86cvtp2Int,
8226                                 X86cvtp2IntRnd, SchedWriteCvtPS2DQ>, PD,
8227                                 EVEX_CD8<32, CD8VH>;
8228
8229defm VCVTPD2UQQ : avx512_cvtpd2qq<0x79, "vcvtpd2uqq", X86cvtp2UInt, X86cvtp2UInt,
8230                                 X86cvtp2UIntRnd, SchedWriteCvtPD2DQ>, VEX_W,
8231                                 PD, EVEX_CD8<64, CD8VF>;
8232
8233defm VCVTPS2UQQ : avx512_cvtps2qq<0x79, "vcvtps2uqq", X86cvtp2UInt, X86cvtp2UInt,
8234                                 X86cvtp2UIntRnd, SchedWriteCvtPS2DQ>, PD,
8235                                 EVEX_CD8<32, CD8VH>;
8236
8237defm VCVTTPD2QQ : avx512_cvttpd2qq<0x7A, "vcvttpd2qq", X86any_cvttp2si,
8238                                 X86cvttp2si, X86cvttp2siSAE,
8239                                 SchedWriteCvtPD2DQ>, VEX_W,
8240                                 PD, EVEX_CD8<64, CD8VF>;
8241
8242defm VCVTTPS2QQ : avx512_cvttps2qq<0x7A, "vcvttps2qq", X86any_cvttp2si,
8243                                 X86cvttp2si, X86cvttp2siSAE,
8244                                 SchedWriteCvtPS2DQ>, PD,
8245                                 EVEX_CD8<32, CD8VH>;
8246
8247defm VCVTTPD2UQQ : avx512_cvttpd2qq<0x78, "vcvttpd2uqq", X86any_cvttp2ui,
8248                                 X86cvttp2ui, X86cvttp2uiSAE,
8249                                 SchedWriteCvtPD2DQ>, VEX_W,
8250                                 PD, EVEX_CD8<64, CD8VF>;
8251
8252defm VCVTTPS2UQQ : avx512_cvttps2qq<0x78, "vcvttps2uqq", X86any_cvttp2ui,
8253                                 X86cvttp2ui, X86cvttp2uiSAE,
8254                                 SchedWriteCvtPS2DQ>, PD,
8255                                 EVEX_CD8<32, CD8VH>;
8256
8257defm VCVTQQ2PD : avx512_cvtqq2pd<0xE6, "vcvtqq2pd", any_sint_to_fp,
8258                            sint_to_fp, X86VSintToFpRnd,
8259                            SchedWriteCvtDQ2PD>, VEX_W, XS, EVEX_CD8<64, CD8VF>;
8260
8261defm VCVTUQQ2PD : avx512_cvtqq2pd<0x7A, "vcvtuqq2pd", any_uint_to_fp,
8262                            uint_to_fp, X86VUintToFpRnd, SchedWriteCvtDQ2PD>,
8263                            VEX_W, XS, EVEX_CD8<64, CD8VF>;
8264
8265defm VCVTQQ2PS : avx512_cvtqq2ps<0x5B, "vcvtqq2ps", any_sint_to_fp,
8266                            sint_to_fp, X86VSintToFpRnd, SchedWriteCvtDQ2PS>,
8267                            VEX_W, PS, EVEX_CD8<64, CD8VF>;
8268
8269defm VCVTUQQ2PS : avx512_cvtqq2ps<0x7A, "vcvtuqq2ps", any_uint_to_fp,
8270                            uint_to_fp, X86VUintToFpRnd, SchedWriteCvtDQ2PS>,
8271                            VEX_W, XD, EVEX_CD8<64, CD8VF>;
8272
8273let Predicates = [HasVLX] in {
8274  // Special patterns to allow use of X86mcvtp2Int for masking. Instruction
8275  // patterns have been disabled with null_frag.
8276  def : Pat<(v4i32 (X86cvtp2Int (v2f64 VR128X:$src))),
8277            (VCVTPD2DQZ128rr VR128X:$src)>;
8278  def : Pat<(X86mcvtp2Int (v2f64 VR128X:$src), (v4i32 VR128X:$src0),
8279                          VK2WM:$mask),
8280            (VCVTPD2DQZ128rrk VR128X:$src0, VK2WM:$mask, VR128X:$src)>;
8281  def : Pat<(X86mcvtp2Int (v2f64 VR128X:$src), v4i32x_info.ImmAllZerosV,
8282                          VK2WM:$mask),
8283            (VCVTPD2DQZ128rrkz VK2WM:$mask, VR128X:$src)>;
8284
8285  def : Pat<(v4i32 (X86cvtp2Int (loadv2f64 addr:$src))),
8286            (VCVTPD2DQZ128rm addr:$src)>;
8287  def : Pat<(X86mcvtp2Int (loadv2f64 addr:$src), (v4i32 VR128X:$src0),
8288                          VK2WM:$mask),
8289            (VCVTPD2DQZ128rmk VR128X:$src0, VK2WM:$mask, addr:$src)>;
8290  def : Pat<(X86mcvtp2Int (loadv2f64 addr:$src), v4i32x_info.ImmAllZerosV,
8291                          VK2WM:$mask),
8292            (VCVTPD2DQZ128rmkz VK2WM:$mask, addr:$src)>;
8293
8294  def : Pat<(v4i32 (X86cvtp2Int (v2f64 (X86VBroadcastld64 addr:$src)))),
8295            (VCVTPD2DQZ128rmb addr:$src)>;
8296  def : Pat<(X86mcvtp2Int (v2f64 (X86VBroadcastld64 addr:$src)),
8297                          (v4i32 VR128X:$src0), VK2WM:$mask),
8298            (VCVTPD2DQZ128rmbk VR128X:$src0, VK2WM:$mask, addr:$src)>;
8299  def : Pat<(X86mcvtp2Int (v2f64 (X86VBroadcastld64 addr:$src)),
8300                          v4i32x_info.ImmAllZerosV, VK2WM:$mask),
8301            (VCVTPD2DQZ128rmbkz VK2WM:$mask, addr:$src)>;
8302
8303  // Special patterns to allow use of X86mcvttp2si for masking. Instruction
8304  // patterns have been disabled with null_frag.
8305  def : Pat<(v4i32 (X86any_cvttp2si (v2f64 VR128X:$src))),
8306            (VCVTTPD2DQZ128rr VR128X:$src)>;
8307  def : Pat<(X86mcvttp2si (v2f64 VR128X:$src), (v4i32 VR128X:$src0),
8308                          VK2WM:$mask),
8309            (VCVTTPD2DQZ128rrk VR128X:$src0, VK2WM:$mask, VR128X:$src)>;
8310  def : Pat<(X86mcvttp2si (v2f64 VR128X:$src), v4i32x_info.ImmAllZerosV,
8311                          VK2WM:$mask),
8312            (VCVTTPD2DQZ128rrkz VK2WM:$mask, VR128X:$src)>;
8313
8314  def : Pat<(v4i32 (X86any_cvttp2si (loadv2f64 addr:$src))),
8315            (VCVTTPD2DQZ128rm addr:$src)>;
8316  def : Pat<(X86mcvttp2si (loadv2f64 addr:$src), (v4i32 VR128X:$src0),
8317                          VK2WM:$mask),
8318            (VCVTTPD2DQZ128rmk VR128X:$src0, VK2WM:$mask, addr:$src)>;
8319  def : Pat<(X86mcvttp2si (loadv2f64 addr:$src), v4i32x_info.ImmAllZerosV,
8320                          VK2WM:$mask),
8321            (VCVTTPD2DQZ128rmkz VK2WM:$mask, addr:$src)>;
8322
8323  def : Pat<(v4i32 (X86any_cvttp2si (v2f64 (X86VBroadcastld64 addr:$src)))),
8324            (VCVTTPD2DQZ128rmb addr:$src)>;
8325  def : Pat<(X86mcvttp2si (v2f64 (X86VBroadcastld64 addr:$src)),
8326                          (v4i32 VR128X:$src0), VK2WM:$mask),
8327            (VCVTTPD2DQZ128rmbk VR128X:$src0, VK2WM:$mask, addr:$src)>;
8328  def : Pat<(X86mcvttp2si (v2f64 (X86VBroadcastld64 addr:$src)),
8329                          v4i32x_info.ImmAllZerosV, VK2WM:$mask),
8330            (VCVTTPD2DQZ128rmbkz VK2WM:$mask, addr:$src)>;
8331
8332  // Special patterns to allow use of X86mcvtp2UInt for masking. Instruction
8333  // patterns have been disabled with null_frag.
8334  def : Pat<(v4i32 (X86cvtp2UInt (v2f64 VR128X:$src))),
8335            (VCVTPD2UDQZ128rr VR128X:$src)>;
8336  def : Pat<(X86mcvtp2UInt (v2f64 VR128X:$src), (v4i32 VR128X:$src0),
8337                           VK2WM:$mask),
8338            (VCVTPD2UDQZ128rrk VR128X:$src0, VK2WM:$mask, VR128X:$src)>;
8339  def : Pat<(X86mcvtp2UInt (v2f64 VR128X:$src), v4i32x_info.ImmAllZerosV,
8340                           VK2WM:$mask),
8341            (VCVTPD2UDQZ128rrkz VK2WM:$mask, VR128X:$src)>;
8342
8343  def : Pat<(v4i32 (X86cvtp2UInt (loadv2f64 addr:$src))),
8344            (VCVTPD2UDQZ128rm addr:$src)>;
8345  def : Pat<(X86mcvtp2UInt (loadv2f64 addr:$src), (v4i32 VR128X:$src0),
8346                           VK2WM:$mask),
8347            (VCVTPD2UDQZ128rmk VR128X:$src0, VK2WM:$mask, addr:$src)>;
8348  def : Pat<(X86mcvtp2UInt (loadv2f64 addr:$src), v4i32x_info.ImmAllZerosV,
8349                           VK2WM:$mask),
8350            (VCVTPD2UDQZ128rmkz VK2WM:$mask, addr:$src)>;
8351
8352  def : Pat<(v4i32 (X86cvtp2UInt (v2f64 (X86VBroadcastld64 addr:$src)))),
8353            (VCVTPD2UDQZ128rmb addr:$src)>;
8354  def : Pat<(X86mcvtp2UInt (v2f64 (X86VBroadcastld64 addr:$src)),
8355                           (v4i32 VR128X:$src0), VK2WM:$mask),
8356            (VCVTPD2UDQZ128rmbk VR128X:$src0, VK2WM:$mask, addr:$src)>;
8357  def : Pat<(X86mcvtp2UInt (v2f64 (X86VBroadcastld64 addr:$src)),
8358                           v4i32x_info.ImmAllZerosV, VK2WM:$mask),
8359            (VCVTPD2UDQZ128rmbkz VK2WM:$mask, addr:$src)>;
8360
8361  // Special patterns to allow use of X86mcvtp2UInt for masking. Instruction
8362  // patterns have been disabled with null_frag.
8363  def : Pat<(v4i32 (X86any_cvttp2ui (v2f64 VR128X:$src))),
8364            (VCVTTPD2UDQZ128rr VR128X:$src)>;
8365  def : Pat<(X86mcvttp2ui (v2f64 VR128X:$src), (v4i32 VR128X:$src0),
8366                          VK2WM:$mask),
8367            (VCVTTPD2UDQZ128rrk VR128X:$src0, VK2WM:$mask, VR128X:$src)>;
8368  def : Pat<(X86mcvttp2ui (v2f64 VR128X:$src), v4i32x_info.ImmAllZerosV,
8369                          VK2WM:$mask),
8370            (VCVTTPD2UDQZ128rrkz VK2WM:$mask, VR128X:$src)>;
8371
8372  def : Pat<(v4i32 (X86any_cvttp2ui (loadv2f64 addr:$src))),
8373            (VCVTTPD2UDQZ128rm addr:$src)>;
8374  def : Pat<(X86mcvttp2ui (loadv2f64 addr:$src), (v4i32 VR128X:$src0),
8375                          VK2WM:$mask),
8376            (VCVTTPD2UDQZ128rmk VR128X:$src0, VK2WM:$mask, addr:$src)>;
8377  def : Pat<(X86mcvttp2ui (loadv2f64 addr:$src), v4i32x_info.ImmAllZerosV,
8378                          VK2WM:$mask),
8379            (VCVTTPD2UDQZ128rmkz VK2WM:$mask, addr:$src)>;
8380
8381  def : Pat<(v4i32 (X86any_cvttp2ui (v2f64 (X86VBroadcastld64 addr:$src)))),
8382            (VCVTTPD2UDQZ128rmb addr:$src)>;
8383  def : Pat<(X86mcvttp2ui (v2f64 (X86VBroadcastld64 addr:$src)),
8384                          (v4i32 VR128X:$src0), VK2WM:$mask),
8385            (VCVTTPD2UDQZ128rmbk VR128X:$src0, VK2WM:$mask, addr:$src)>;
8386  def : Pat<(X86mcvttp2ui (v2f64 (X86VBroadcastld64 addr:$src)),
8387                          v4i32x_info.ImmAllZerosV, VK2WM:$mask),
8388            (VCVTTPD2UDQZ128rmbkz VK2WM:$mask, addr:$src)>;
8389}
8390
8391let Predicates = [HasDQI, HasVLX] in {
8392  def : Pat<(v2i64 (X86cvtp2Int (bc_v4f32 (v2f64 (X86vzload64 addr:$src))))),
8393            (VCVTPS2QQZ128rm addr:$src)>;
8394  def : Pat<(v2i64 (vselect_mask VK2WM:$mask,
8395                                 (X86cvtp2Int (bc_v4f32 (v2f64 (X86vzload64 addr:$src)))),
8396                                 VR128X:$src0)),
8397            (VCVTPS2QQZ128rmk VR128X:$src0, VK2WM:$mask, addr:$src)>;
8398  def : Pat<(v2i64 (vselect_mask VK2WM:$mask,
8399                                 (X86cvtp2Int (bc_v4f32 (v2f64 (X86vzload64 addr:$src)))),
8400                                 v2i64x_info.ImmAllZerosV)),
8401            (VCVTPS2QQZ128rmkz VK2WM:$mask, addr:$src)>;
8402
8403  def : Pat<(v2i64 (X86cvtp2UInt (bc_v4f32 (v2f64 (X86vzload64 addr:$src))))),
8404            (VCVTPS2UQQZ128rm addr:$src)>;
8405  def : Pat<(v2i64 (vselect_mask VK2WM:$mask,
8406                                 (X86cvtp2UInt (bc_v4f32 (v2f64 (X86vzload64 addr:$src)))),
8407                                 VR128X:$src0)),
8408            (VCVTPS2UQQZ128rmk VR128X:$src0, VK2WM:$mask, addr:$src)>;
8409  def : Pat<(v2i64 (vselect_mask VK2WM:$mask,
8410                                 (X86cvtp2UInt (bc_v4f32 (v2f64 (X86vzload64 addr:$src)))),
8411                                 v2i64x_info.ImmAllZerosV)),
8412            (VCVTPS2UQQZ128rmkz VK2WM:$mask, addr:$src)>;
8413
8414  def : Pat<(v2i64 (X86any_cvttp2si (bc_v4f32 (v2f64 (X86vzload64 addr:$src))))),
8415            (VCVTTPS2QQZ128rm addr:$src)>;
8416  def : Pat<(v2i64 (vselect_mask VK2WM:$mask,
8417                                 (X86cvttp2si (bc_v4f32 (v2f64 (X86vzload64 addr:$src)))),
8418                                 VR128X:$src0)),
8419            (VCVTTPS2QQZ128rmk VR128X:$src0, VK2WM:$mask, addr:$src)>;
8420  def : Pat<(v2i64 (vselect_mask VK2WM:$mask,
8421                                 (X86cvttp2si (bc_v4f32 (v2f64 (X86vzload64 addr:$src)))),
8422                                 v2i64x_info.ImmAllZerosV)),
8423            (VCVTTPS2QQZ128rmkz VK2WM:$mask, addr:$src)>;
8424
8425  def : Pat<(v2i64 (X86any_cvttp2ui (bc_v4f32 (v2f64 (X86vzload64 addr:$src))))),
8426            (VCVTTPS2UQQZ128rm addr:$src)>;
8427  def : Pat<(v2i64 (vselect_mask VK2WM:$mask,
8428                                 (X86cvttp2ui (bc_v4f32 (v2f64 (X86vzload64 addr:$src)))),
8429                                 VR128X:$src0)),
8430            (VCVTTPS2UQQZ128rmk VR128X:$src0, VK2WM:$mask, addr:$src)>;
8431  def : Pat<(v2i64 (vselect_mask VK2WM:$mask,
8432                                 (X86cvttp2ui (bc_v4f32 (v2f64 (X86vzload64 addr:$src)))),
8433                                 v2i64x_info.ImmAllZerosV)),
8434            (VCVTTPS2UQQZ128rmkz VK2WM:$mask, addr:$src)>;
8435}
8436
8437let Predicates = [HasVLX] in {
8438  def : Pat<(v2f64 (X86any_VSintToFP (bc_v4i32 (v2i64 (X86vzload64 addr:$src))))),
8439            (VCVTDQ2PDZ128rm addr:$src)>;
8440  def : Pat<(v2f64 (vselect_mask VK2WM:$mask,
8441                                 (X86VSintToFP (bc_v4i32 (v2i64 (X86vzload64 addr:$src)))),
8442                                 VR128X:$src0)),
8443            (VCVTDQ2PDZ128rmk VR128X:$src0, VK2WM:$mask, addr:$src)>;
8444  def : Pat<(v2f64 (vselect_mask VK2WM:$mask,
8445                                 (X86VSintToFP (bc_v4i32 (v2i64 (X86vzload64 addr:$src)))),
8446                                 v2f64x_info.ImmAllZerosV)),
8447            (VCVTDQ2PDZ128rmkz VK2WM:$mask, addr:$src)>;
8448
8449  def : Pat<(v2f64 (X86any_VUintToFP (bc_v4i32 (v2i64 (X86vzload64 addr:$src))))),
8450            (VCVTUDQ2PDZ128rm addr:$src)>;
8451  def : Pat<(v2f64 (vselect_mask VK2WM:$mask,
8452                                 (X86VUintToFP (bc_v4i32 (v2i64 (X86vzload64 addr:$src)))),
8453                                 VR128X:$src0)),
8454            (VCVTUDQ2PDZ128rmk VR128X:$src0, VK2WM:$mask, addr:$src)>;
8455  def : Pat<(v2f64 (vselect_mask VK2WM:$mask,
8456                                 (X86VUintToFP (bc_v4i32 (v2i64 (X86vzload64 addr:$src)))),
8457                                 v2f64x_info.ImmAllZerosV)),
8458            (VCVTUDQ2PDZ128rmkz VK2WM:$mask, addr:$src)>;
8459}
8460
8461let Predicates = [HasDQI, HasVLX] in {
8462  // Special patterns to allow use of X86VMSintToFP for masking. Instruction
8463  // patterns have been disabled with null_frag.
8464  def : Pat<(v4f32 (X86any_VSintToFP (v2i64 VR128X:$src))),
8465            (VCVTQQ2PSZ128rr VR128X:$src)>;
8466  def : Pat<(X86VMSintToFP (v2i64 VR128X:$src), (v4f32 VR128X:$src0),
8467                           VK2WM:$mask),
8468            (VCVTQQ2PSZ128rrk VR128X:$src0, VK2WM:$mask, VR128X:$src)>;
8469  def : Pat<(X86VMSintToFP (v2i64 VR128X:$src), v4f32x_info.ImmAllZerosV,
8470                           VK2WM:$mask),
8471            (VCVTQQ2PSZ128rrkz VK2WM:$mask, VR128X:$src)>;
8472
8473  def : Pat<(v4f32 (X86any_VSintToFP (loadv2i64 addr:$src))),
8474            (VCVTQQ2PSZ128rm addr:$src)>;
8475  def : Pat<(X86VMSintToFP (loadv2i64 addr:$src), (v4f32 VR128X:$src0),
8476                           VK2WM:$mask),
8477            (VCVTQQ2PSZ128rmk VR128X:$src0, VK2WM:$mask, addr:$src)>;
8478  def : Pat<(X86VMSintToFP (loadv2i64 addr:$src), v4f32x_info.ImmAllZerosV,
8479                           VK2WM:$mask),
8480            (VCVTQQ2PSZ128rmkz VK2WM:$mask, addr:$src)>;
8481
8482  def : Pat<(v4f32 (X86any_VSintToFP (v2i64 (X86VBroadcastld64 addr:$src)))),
8483            (VCVTQQ2PSZ128rmb addr:$src)>;
8484  def : Pat<(X86VMSintToFP (v2i64 (X86VBroadcastld64 addr:$src)),
8485                           (v4f32 VR128X:$src0), VK2WM:$mask),
8486            (VCVTQQ2PSZ128rmbk VR128X:$src0, VK2WM:$mask, addr:$src)>;
8487  def : Pat<(X86VMSintToFP (v2i64 (X86VBroadcastld64 addr:$src)),
8488                           v4f32x_info.ImmAllZerosV, VK2WM:$mask),
8489            (VCVTQQ2PSZ128rmbkz VK2WM:$mask, addr:$src)>;
8490
8491  // Special patterns to allow use of X86VMUintToFP for masking. Instruction
8492  // patterns have been disabled with null_frag.
8493  def : Pat<(v4f32 (X86any_VUintToFP (v2i64 VR128X:$src))),
8494            (VCVTUQQ2PSZ128rr VR128X:$src)>;
8495  def : Pat<(X86VMUintToFP (v2i64 VR128X:$src), (v4f32 VR128X:$src0),
8496                           VK2WM:$mask),
8497            (VCVTUQQ2PSZ128rrk VR128X:$src0, VK2WM:$mask, VR128X:$src)>;
8498  def : Pat<(X86VMUintToFP (v2i64 VR128X:$src), v4f32x_info.ImmAllZerosV,
8499                           VK2WM:$mask),
8500            (VCVTUQQ2PSZ128rrkz VK2WM:$mask, VR128X:$src)>;
8501
8502  def : Pat<(v4f32 (X86any_VUintToFP (loadv2i64 addr:$src))),
8503            (VCVTUQQ2PSZ128rm addr:$src)>;
8504  def : Pat<(X86VMUintToFP (loadv2i64 addr:$src), (v4f32 VR128X:$src0),
8505                           VK2WM:$mask),
8506            (VCVTUQQ2PSZ128rmk VR128X:$src0, VK2WM:$mask, addr:$src)>;
8507  def : Pat<(X86VMUintToFP (loadv2i64 addr:$src), v4f32x_info.ImmAllZerosV,
8508                           VK2WM:$mask),
8509            (VCVTUQQ2PSZ128rmkz VK2WM:$mask, addr:$src)>;
8510
8511  def : Pat<(v4f32 (X86any_VUintToFP (v2i64 (X86VBroadcastld64 addr:$src)))),
8512            (VCVTUQQ2PSZ128rmb addr:$src)>;
8513  def : Pat<(X86VMUintToFP (v2i64 (X86VBroadcastld64 addr:$src)),
8514                           (v4f32 VR128X:$src0), VK2WM:$mask),
8515            (VCVTUQQ2PSZ128rmbk VR128X:$src0, VK2WM:$mask, addr:$src)>;
8516  def : Pat<(X86VMUintToFP (v2i64 (X86VBroadcastld64 addr:$src)),
8517                           v4f32x_info.ImmAllZerosV, VK2WM:$mask),
8518            (VCVTUQQ2PSZ128rmbkz VK2WM:$mask, addr:$src)>;
8519}
8520
8521//===----------------------------------------------------------------------===//
8522// Half precision conversion instructions
8523//===----------------------------------------------------------------------===//
8524
8525let Uses = [MXCSR], mayRaiseFPException = 1 in
8526multiclass avx512_cvtph2ps<X86VectorVTInfo _dest, X86VectorVTInfo _src,
8527                           X86MemOperand x86memop, dag ld_dag,
8528                           X86FoldableSchedWrite sched> {
8529  defm rr : AVX512_maskable_split<0x13, MRMSrcReg, _dest ,(outs _dest.RC:$dst),
8530                            (ins _src.RC:$src), "vcvtph2ps", "$src", "$src",
8531                            (X86any_cvtph2ps (_src.VT _src.RC:$src)),
8532                            (X86cvtph2ps (_src.VT _src.RC:$src))>,
8533                            T8PD, Sched<[sched]>;
8534  defm rm : AVX512_maskable_split<0x13, MRMSrcMem, _dest, (outs _dest.RC:$dst),
8535                            (ins x86memop:$src), "vcvtph2ps", "$src", "$src",
8536                            (X86any_cvtph2ps (_src.VT ld_dag)),
8537                            (X86cvtph2ps (_src.VT ld_dag))>,
8538                            T8PD, Sched<[sched.Folded]>;
8539}
8540
8541multiclass avx512_cvtph2ps_sae<X86VectorVTInfo _dest, X86VectorVTInfo _src,
8542                               X86FoldableSchedWrite sched> {
8543  let Uses = [MXCSR] in
8544  defm rrb : AVX512_maskable<0x13, MRMSrcReg, _dest, (outs _dest.RC:$dst),
8545                             (ins _src.RC:$src), "vcvtph2ps",
8546                             "{sae}, $src", "$src, {sae}",
8547                             (X86cvtph2psSAE (_src.VT _src.RC:$src))>,
8548                             T8PD, EVEX_B, Sched<[sched]>;
8549}
8550
8551let Predicates = [HasAVX512] in
8552  defm VCVTPH2PSZ : avx512_cvtph2ps<v16f32_info, v16i16x_info, f256mem,
8553                                    (load addr:$src), WriteCvtPH2PSZ>,
8554                    avx512_cvtph2ps_sae<v16f32_info, v16i16x_info, WriteCvtPH2PSZ>,
8555                    EVEX, EVEX_V512, EVEX_CD8<32, CD8VH>;
8556
8557let Predicates = [HasVLX] in {
8558  defm VCVTPH2PSZ256 : avx512_cvtph2ps<v8f32x_info, v8i16x_info, f128mem,
8559                       (load addr:$src), WriteCvtPH2PSY>, EVEX, EVEX_V256,
8560                       EVEX_CD8<32, CD8VH>;
8561  defm VCVTPH2PSZ128 : avx512_cvtph2ps<v4f32x_info, v8i16x_info, f64mem,
8562                       (bitconvert (v2i64 (X86vzload64 addr:$src))),
8563                       WriteCvtPH2PS>, EVEX, EVEX_V128,
8564                       EVEX_CD8<32, CD8VH>;
8565
8566  // Pattern match vcvtph2ps of a scalar i64 load.
8567  def : Pat<(v4f32 (X86any_cvtph2ps (v8i16 (bitconvert
8568              (v2i64 (scalar_to_vector (loadi64 addr:$src))))))),
8569            (VCVTPH2PSZ128rm addr:$src)>;
8570}
8571
8572multiclass avx512_cvtps2ph<X86VectorVTInfo _dest, X86VectorVTInfo _src,
8573                           X86MemOperand x86memop, SchedWrite RR, SchedWrite MR> {
8574let ExeDomain = GenericDomain, Uses = [MXCSR], mayRaiseFPException = 1 in {
8575  def rr : AVX512AIi8<0x1D, MRMDestReg, (outs _dest.RC:$dst),
8576             (ins _src.RC:$src1, i32u8imm:$src2),
8577             "vcvtps2ph\t{$src2, $src1, $dst|$dst, $src1, $src2}",
8578             [(set _dest.RC:$dst,
8579                   (X86any_cvtps2ph (_src.VT _src.RC:$src1), (i32 timm:$src2)))]>,
8580             Sched<[RR]>;
8581  let Constraints = "$src0 = $dst" in
8582  def rrk : AVX512AIi8<0x1D, MRMDestReg, (outs _dest.RC:$dst),
8583             (ins _dest.RC:$src0, _src.KRCWM:$mask, _src.RC:$src1, i32u8imm:$src2),
8584             "vcvtps2ph\t{$src2, $src1, $dst {${mask}}|$dst {${mask}}, $src1, $src2}",
8585             [(set _dest.RC:$dst,
8586                   (X86mcvtps2ph (_src.VT _src.RC:$src1), (i32 timm:$src2),
8587                                 _dest.RC:$src0, _src.KRCWM:$mask))]>,
8588             Sched<[RR]>, EVEX_K;
8589  def rrkz : AVX512AIi8<0x1D, MRMDestReg, (outs _dest.RC:$dst),
8590             (ins _src.KRCWM:$mask, _src.RC:$src1, i32u8imm:$src2),
8591             "vcvtps2ph\t{$src2, $src1, $dst {${mask}} {z}|$dst {${mask}} {z}, $src1, $src2}",
8592             [(set _dest.RC:$dst,
8593                   (X86mcvtps2ph (_src.VT _src.RC:$src1), (i32 timm:$src2),
8594                                 _dest.ImmAllZerosV, _src.KRCWM:$mask))]>,
8595             Sched<[RR]>, EVEX_KZ;
8596  let hasSideEffects = 0, mayStore = 1 in {
8597    def mr : AVX512AIi8<0x1D, MRMDestMem, (outs),
8598               (ins x86memop:$dst, _src.RC:$src1, i32u8imm:$src2),
8599               "vcvtps2ph\t{$src2, $src1, $dst|$dst, $src1, $src2}", []>,
8600               Sched<[MR]>;
8601    def mrk : AVX512AIi8<0x1D, MRMDestMem, (outs),
8602               (ins x86memop:$dst, _dest.KRCWM:$mask, _src.RC:$src1, i32u8imm:$src2),
8603               "vcvtps2ph\t{$src2, $src1, $dst {${mask}}|$dst {${mask}}, $src1, $src2}", []>,
8604                EVEX_K, Sched<[MR]>, NotMemoryFoldable;
8605  }
8606}
8607}
8608
8609multiclass avx512_cvtps2ph_sae<X86VectorVTInfo _dest, X86VectorVTInfo _src,
8610                               SchedWrite Sched> {
8611  let hasSideEffects = 0, Uses = [MXCSR] in
8612  defm rrb : AVX512_maskable_in_asm<0x1D, MRMDestReg, _dest,
8613                   (outs _dest.RC:$dst),
8614                   (ins _src.RC:$src1, i32u8imm:$src2),
8615                   "vcvtps2ph", "$src2, {sae}, $src1", "$src1, {sae}, $src2", []>,
8616                   EVEX_B, AVX512AIi8Base, Sched<[Sched]>;
8617}
8618
8619let Predicates = [HasAVX512] in {
8620  defm VCVTPS2PHZ : avx512_cvtps2ph<v16i16x_info, v16f32_info, f256mem,
8621                                    WriteCvtPS2PHZ, WriteCvtPS2PHZSt>,
8622                    avx512_cvtps2ph_sae<v16i16x_info, v16f32_info, WriteCvtPS2PHZ>,
8623                                        EVEX, EVEX_V512, EVEX_CD8<32, CD8VH>;
8624
8625  def : Pat<(store (v16i16 (X86any_cvtps2ph VR512:$src1, timm:$src2)), addr:$dst),
8626            (VCVTPS2PHZmr addr:$dst, VR512:$src1, timm:$src2)>;
8627}
8628
8629let Predicates = [HasVLX] in {
8630  defm VCVTPS2PHZ256 : avx512_cvtps2ph<v8i16x_info, v8f32x_info, f128mem,
8631                                       WriteCvtPS2PHY, WriteCvtPS2PHYSt>,
8632                                       EVEX, EVEX_V256, EVEX_CD8<32, CD8VH>;
8633  defm VCVTPS2PHZ128 : avx512_cvtps2ph<v8i16x_info, v4f32x_info, f64mem,
8634                                       WriteCvtPS2PH, WriteCvtPS2PHSt>,
8635                                       EVEX, EVEX_V128, EVEX_CD8<32, CD8VH>;
8636
8637  def : Pat<(store (f64 (extractelt
8638                         (bc_v2f64 (v8i16 (X86any_cvtps2ph VR128X:$src1, timm:$src2))),
8639                         (iPTR 0))), addr:$dst),
8640            (VCVTPS2PHZ128mr addr:$dst, VR128X:$src1, timm:$src2)>;
8641  def : Pat<(store (i64 (extractelt
8642                         (bc_v2i64 (v8i16 (X86any_cvtps2ph VR128X:$src1, timm:$src2))),
8643                         (iPTR 0))), addr:$dst),
8644            (VCVTPS2PHZ128mr addr:$dst, VR128X:$src1, timm:$src2)>;
8645  def : Pat<(store (v8i16 (X86any_cvtps2ph VR256X:$src1, timm:$src2)), addr:$dst),
8646            (VCVTPS2PHZ256mr addr:$dst, VR256X:$src1, timm:$src2)>;
8647}
8648
8649//  Unordered/Ordered scalar fp compare with Sae and set EFLAGS
8650multiclass avx512_ord_cmp_sae<bits<8> opc, X86VectorVTInfo _,
8651                            string OpcodeStr, Domain d,
8652                            X86FoldableSchedWrite sched = WriteFComX> {
8653  let hasSideEffects = 0, Uses = [MXCSR] in
8654  def rrb: AVX512<opc, MRMSrcReg, (outs), (ins _.RC:$src1, _.RC:$src2),
8655                  !strconcat(OpcodeStr, "\t{{sae}, $src2, $src1|$src1, $src2, {sae}}"), []>,
8656                  EVEX, EVEX_B, VEX_LIG, EVEX_V128, Sched<[sched]>;
8657}
8658
8659let Defs = [EFLAGS], Predicates = [HasAVX512] in {
8660  defm VUCOMISSZ : avx512_ord_cmp_sae<0x2E, v4f32x_info, "vucomiss", SSEPackedSingle>,
8661                                   AVX512PSIi8Base, EVEX_CD8<32, CD8VT1>;
8662  defm VUCOMISDZ : avx512_ord_cmp_sae<0x2E, v2f64x_info, "vucomisd", SSEPackedDouble>,
8663                                   AVX512PDIi8Base, VEX_W, EVEX_CD8<64, CD8VT1>;
8664  defm VCOMISSZ : avx512_ord_cmp_sae<0x2F, v4f32x_info, "vcomiss", SSEPackedSingle>,
8665                                   AVX512PSIi8Base, EVEX_CD8<32, CD8VT1>;
8666  defm VCOMISDZ : avx512_ord_cmp_sae<0x2F, v2f64x_info, "vcomisd", SSEPackedDouble>,
8667                                   AVX512PDIi8Base, VEX_W, EVEX_CD8<64, CD8VT1>;
8668}
8669
8670let Defs = [EFLAGS], Predicates = [HasAVX512] in {
8671  defm VUCOMISSZ : sse12_ord_cmp<0x2E, FR32X, X86any_fcmp, f32, f32mem, loadf32,
8672                                 "ucomiss", SSEPackedSingle>, PS, EVEX, VEX_LIG,
8673                                 EVEX_CD8<32, CD8VT1>;
8674  defm VUCOMISDZ : sse12_ord_cmp<0x2E, FR64X, X86any_fcmp, f64, f64mem, loadf64,
8675                                  "ucomisd", SSEPackedDouble>, PD, EVEX,
8676                                  VEX_LIG, VEX_W, EVEX_CD8<64, CD8VT1>;
8677  defm VCOMISSZ  : sse12_ord_cmp<0x2F, FR32X, X86strict_fcmps, f32, f32mem, loadf32,
8678                                 "comiss", SSEPackedSingle>, PS, EVEX, VEX_LIG,
8679                                 EVEX_CD8<32, CD8VT1>;
8680  defm VCOMISDZ  : sse12_ord_cmp<0x2F, FR64X, X86strict_fcmps, f64, f64mem, loadf64,
8681                                 "comisd", SSEPackedDouble>, PD, EVEX,
8682                                  VEX_LIG, VEX_W, EVEX_CD8<64, CD8VT1>;
8683  let isCodeGenOnly = 1 in {
8684    defm VUCOMISSZ  : sse12_ord_cmp_int<0x2E, VR128X, X86ucomi, v4f32, ssmem,
8685                          sse_load_f32, "ucomiss", SSEPackedSingle>, PS, EVEX, VEX_LIG,
8686                          EVEX_CD8<32, CD8VT1>;
8687    defm VUCOMISDZ  : sse12_ord_cmp_int<0x2E, VR128X, X86ucomi, v2f64, sdmem,
8688                          sse_load_f64, "ucomisd", SSEPackedDouble>, PD, EVEX,
8689                          VEX_LIG, VEX_W, EVEX_CD8<64, CD8VT1>;
8690
8691    defm VCOMISSZ  : sse12_ord_cmp_int<0x2F, VR128X, X86comi, v4f32, ssmem,
8692                          sse_load_f32, "comiss", SSEPackedSingle>, PS, EVEX, VEX_LIG,
8693                          EVEX_CD8<32, CD8VT1>;
8694    defm VCOMISDZ  : sse12_ord_cmp_int<0x2F, VR128X, X86comi, v2f64, sdmem,
8695                          sse_load_f64, "comisd", SSEPackedDouble>, PD, EVEX,
8696                          VEX_LIG, VEX_W, EVEX_CD8<64, CD8VT1>;
8697  }
8698}
8699
8700/// avx512_fp14_s rcp14ss, rcp14sd, rsqrt14ss, rsqrt14sd
8701multiclass avx512_fp14_s<bits<8> opc, string OpcodeStr, SDNode OpNode,
8702                         X86FoldableSchedWrite sched, X86VectorVTInfo _> {
8703  let Predicates = [HasAVX512], ExeDomain = _.ExeDomain, Uses = [MXCSR] in {
8704  defm rr : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
8705                           (ins _.RC:$src1, _.RC:$src2), OpcodeStr,
8706                           "$src2, $src1", "$src1, $src2",
8707                           (OpNode (_.VT _.RC:$src1), (_.VT _.RC:$src2))>,
8708                           EVEX_4V, VEX_LIG, Sched<[sched]>;
8709  defm rm : AVX512_maskable_scalar<opc, MRMSrcMem, _, (outs _.RC:$dst),
8710                         (ins _.RC:$src1, _.IntScalarMemOp:$src2), OpcodeStr,
8711                         "$src2, $src1", "$src1, $src2",
8712                         (OpNode (_.VT _.RC:$src1),
8713                          (_.ScalarIntMemFrags addr:$src2))>, EVEX_4V, VEX_LIG,
8714                          Sched<[sched.Folded, sched.ReadAfterFold]>;
8715}
8716}
8717
8718defm VRCP14SSZ : avx512_fp14_s<0x4D, "vrcp14ss", X86rcp14s, SchedWriteFRcp.Scl,
8719                               f32x_info>, EVEX_CD8<32, CD8VT1>,
8720                               T8PD;
8721defm VRCP14SDZ : avx512_fp14_s<0x4D, "vrcp14sd", X86rcp14s, SchedWriteFRcp.Scl,
8722                               f64x_info>, VEX_W, EVEX_CD8<64, CD8VT1>,
8723                               T8PD;
8724defm VRSQRT14SSZ : avx512_fp14_s<0x4F, "vrsqrt14ss", X86rsqrt14s,
8725                                 SchedWriteFRsqrt.Scl, f32x_info>,
8726                                 EVEX_CD8<32, CD8VT1>, T8PD;
8727defm VRSQRT14SDZ : avx512_fp14_s<0x4F, "vrsqrt14sd", X86rsqrt14s,
8728                                 SchedWriteFRsqrt.Scl, f64x_info>, VEX_W,
8729                                 EVEX_CD8<64, CD8VT1>, T8PD;
8730
8731/// avx512_fp14_p rcp14ps, rcp14pd, rsqrt14ps, rsqrt14pd
8732multiclass avx512_fp14_p<bits<8> opc, string OpcodeStr, SDNode OpNode,
8733                         X86FoldableSchedWrite sched, X86VectorVTInfo _> {
8734  let ExeDomain = _.ExeDomain in {
8735  defm r: AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
8736                         (ins _.RC:$src), OpcodeStr, "$src", "$src",
8737                         (_.VT (OpNode _.RC:$src))>, EVEX, T8PD,
8738                         Sched<[sched]>;
8739  defm m: AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
8740                         (ins _.MemOp:$src), OpcodeStr, "$src", "$src",
8741                         (OpNode (_.VT
8742                           (bitconvert (_.LdFrag addr:$src))))>, EVEX, T8PD,
8743                         Sched<[sched.Folded, sched.ReadAfterFold]>;
8744  defm mb: AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
8745                          (ins _.ScalarMemOp:$src), OpcodeStr,
8746                          "${src}"#_.BroadcastStr, "${src}"#_.BroadcastStr,
8747                          (OpNode (_.VT
8748                            (_.BroadcastLdFrag addr:$src)))>,
8749                          EVEX, T8PD, EVEX_B, Sched<[sched.Folded, sched.ReadAfterFold]>;
8750  }
8751}
8752
8753let Uses = [MXCSR] in
8754multiclass avx512_fp14_p_vl_all<bits<8> opc, string OpcodeStr, SDNode OpNode,
8755                                X86SchedWriteWidths sched> {
8756  defm PSZ : avx512_fp14_p<opc, !strconcat(OpcodeStr, "ps"), OpNode, sched.ZMM,
8757                           v16f32_info>, EVEX_V512, EVEX_CD8<32, CD8VF>;
8758  defm PDZ : avx512_fp14_p<opc, !strconcat(OpcodeStr, "pd"), OpNode, sched.ZMM,
8759                           v8f64_info>, EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>;
8760
8761  // Define only if AVX512VL feature is present.
8762  let Predicates = [HasVLX] in {
8763    defm PSZ128 : avx512_fp14_p<opc, !strconcat(OpcodeStr, "ps"),
8764                                OpNode, sched.XMM, v4f32x_info>,
8765                               EVEX_V128, EVEX_CD8<32, CD8VF>;
8766    defm PSZ256 : avx512_fp14_p<opc, !strconcat(OpcodeStr, "ps"),
8767                                OpNode, sched.YMM, v8f32x_info>,
8768                               EVEX_V256, EVEX_CD8<32, CD8VF>;
8769    defm PDZ128 : avx512_fp14_p<opc, !strconcat(OpcodeStr, "pd"),
8770                                OpNode, sched.XMM, v2f64x_info>,
8771                               EVEX_V128, VEX_W, EVEX_CD8<64, CD8VF>;
8772    defm PDZ256 : avx512_fp14_p<opc, !strconcat(OpcodeStr, "pd"),
8773                                OpNode, sched.YMM, v4f64x_info>,
8774                               EVEX_V256, VEX_W, EVEX_CD8<64, CD8VF>;
8775  }
8776}
8777
8778defm VRSQRT14 : avx512_fp14_p_vl_all<0x4E, "vrsqrt14", X86rsqrt14, SchedWriteFRsqrt>;
8779defm VRCP14 : avx512_fp14_p_vl_all<0x4C, "vrcp14", X86rcp14, SchedWriteFRcp>;
8780
8781/// avx512_fp28_s rcp28ss, rcp28sd, rsqrt28ss, rsqrt28sd
8782multiclass avx512_fp28_s<bits<8> opc, string OpcodeStr,X86VectorVTInfo _,
8783                         SDNode OpNode, SDNode OpNodeSAE,
8784                         X86FoldableSchedWrite sched> {
8785  let ExeDomain = _.ExeDomain, Uses = [MXCSR] in {
8786  defm r : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
8787                           (ins _.RC:$src1, _.RC:$src2), OpcodeStr,
8788                           "$src2, $src1", "$src1, $src2",
8789                           (OpNode (_.VT _.RC:$src1), (_.VT _.RC:$src2))>,
8790                           Sched<[sched]>, SIMD_EXC;
8791
8792  defm rb : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
8793                            (ins _.RC:$src1, _.RC:$src2), OpcodeStr,
8794                            "{sae}, $src2, $src1", "$src1, $src2, {sae}",
8795                            (OpNodeSAE (_.VT _.RC:$src1), (_.VT _.RC:$src2))>,
8796                            EVEX_B, Sched<[sched]>;
8797
8798  defm m : AVX512_maskable_scalar<opc, MRMSrcMem, _, (outs _.RC:$dst),
8799                         (ins _.RC:$src1, _.IntScalarMemOp:$src2), OpcodeStr,
8800                         "$src2, $src1", "$src1, $src2",
8801                         (OpNode (_.VT _.RC:$src1), (_.ScalarIntMemFrags addr:$src2))>,
8802                         Sched<[sched.Folded, sched.ReadAfterFold]>, SIMD_EXC;
8803  }
8804}
8805
8806multiclass avx512_eri_s<bits<8> opc, string OpcodeStr, SDNode OpNode,
8807                        SDNode OpNodeSAE, X86FoldableSchedWrite sched> {
8808  defm SSZ : avx512_fp28_s<opc, OpcodeStr#"ss", f32x_info, OpNode, OpNodeSAE,
8809                           sched>, EVEX_CD8<32, CD8VT1>, VEX_LIG;
8810  defm SDZ : avx512_fp28_s<opc, OpcodeStr#"sd", f64x_info, OpNode, OpNodeSAE,
8811                           sched>, EVEX_CD8<64, CD8VT1>, VEX_LIG, VEX_W;
8812}
8813
8814let Predicates = [HasERI] in {
8815  defm VRCP28   : avx512_eri_s<0xCB, "vrcp28", X86rcp28s, X86rcp28SAEs,
8816                               SchedWriteFRcp.Scl>, T8PD, EVEX_4V;
8817  defm VRSQRT28 : avx512_eri_s<0xCD, "vrsqrt28", X86rsqrt28s, X86rsqrt28SAEs,
8818                               SchedWriteFRsqrt.Scl>, T8PD, EVEX_4V;
8819}
8820
8821defm VGETEXP   : avx512_eri_s<0x43, "vgetexp", X86fgetexps, X86fgetexpSAEs,
8822                              SchedWriteFRnd.Scl>, T8PD, EVEX_4V;
8823/// avx512_fp28_p rcp28ps, rcp28pd, rsqrt28ps, rsqrt28pd
8824
8825multiclass avx512_fp28_p<bits<8> opc, string OpcodeStr, X86VectorVTInfo _,
8826                         SDNode OpNode, X86FoldableSchedWrite sched> {
8827  let ExeDomain = _.ExeDomain, Uses = [MXCSR], mayRaiseFPException = 1 in {
8828  defm r : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
8829                         (ins _.RC:$src), OpcodeStr, "$src", "$src",
8830                         (OpNode (_.VT _.RC:$src))>,
8831                         Sched<[sched]>;
8832
8833  defm m : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
8834                         (ins _.MemOp:$src), OpcodeStr, "$src", "$src",
8835                         (OpNode (_.VT
8836                             (bitconvert (_.LdFrag addr:$src))))>,
8837                          Sched<[sched.Folded, sched.ReadAfterFold]>;
8838
8839  defm mb : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
8840                         (ins _.ScalarMemOp:$src), OpcodeStr,
8841                         "${src}"#_.BroadcastStr, "${src}"#_.BroadcastStr,
8842                         (OpNode (_.VT
8843                                  (_.BroadcastLdFrag addr:$src)))>,
8844                         EVEX_B, Sched<[sched.Folded, sched.ReadAfterFold]>;
8845  }
8846}
8847multiclass avx512_fp28_p_sae<bits<8> opc, string OpcodeStr, X86VectorVTInfo _,
8848                         SDNode OpNode, X86FoldableSchedWrite sched> {
8849  let ExeDomain = _.ExeDomain, Uses = [MXCSR] in
8850  defm rb : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
8851                        (ins _.RC:$src), OpcodeStr,
8852                        "{sae}, $src", "$src, {sae}",
8853                        (OpNode (_.VT _.RC:$src))>,
8854                        EVEX_B, Sched<[sched]>;
8855}
8856
8857multiclass  avx512_eri<bits<8> opc, string OpcodeStr, SDNode OpNode,
8858                       SDNode OpNodeSAE, X86SchedWriteWidths sched> {
8859   defm PSZ : avx512_fp28_p<opc, OpcodeStr#"ps", v16f32_info, OpNode, sched.ZMM>,
8860              avx512_fp28_p_sae<opc, OpcodeStr#"ps", v16f32_info, OpNodeSAE, sched.ZMM>,
8861              T8PD, EVEX_V512, EVEX_CD8<32, CD8VF>;
8862   defm PDZ : avx512_fp28_p<opc, OpcodeStr#"pd", v8f64_info, OpNode, sched.ZMM>,
8863              avx512_fp28_p_sae<opc, OpcodeStr#"pd", v8f64_info, OpNodeSAE, sched.ZMM>,
8864              T8PD, EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>;
8865}
8866
8867multiclass avx512_fp_unaryop_packed<bits<8> opc, string OpcodeStr,
8868                                  SDNode OpNode, X86SchedWriteWidths sched> {
8869  // Define only if AVX512VL feature is present.
8870  let Predicates = [HasVLX] in {
8871    defm PSZ128 : avx512_fp28_p<opc, OpcodeStr#"ps", v4f32x_info, OpNode,
8872                                sched.XMM>,
8873                                EVEX_V128, T8PD, EVEX_CD8<32, CD8VF>;
8874    defm PSZ256 : avx512_fp28_p<opc, OpcodeStr#"ps", v8f32x_info, OpNode,
8875                                sched.YMM>,
8876                                EVEX_V256, T8PD, EVEX_CD8<32, CD8VF>;
8877    defm PDZ128 : avx512_fp28_p<opc, OpcodeStr#"pd", v2f64x_info, OpNode,
8878                                sched.XMM>,
8879                                EVEX_V128, VEX_W, T8PD, EVEX_CD8<64, CD8VF>;
8880    defm PDZ256 : avx512_fp28_p<opc, OpcodeStr#"pd", v4f64x_info, OpNode,
8881                                sched.YMM>,
8882                                EVEX_V256, VEX_W, T8PD, EVEX_CD8<64, CD8VF>;
8883  }
8884}
8885
8886let Predicates = [HasERI] in {
8887 defm VRSQRT28 : avx512_eri<0xCC, "vrsqrt28", X86rsqrt28, X86rsqrt28SAE,
8888                            SchedWriteFRsqrt>, EVEX;
8889 defm VRCP28   : avx512_eri<0xCA, "vrcp28", X86rcp28, X86rcp28SAE,
8890                            SchedWriteFRcp>, EVEX;
8891 defm VEXP2    : avx512_eri<0xC8, "vexp2", X86exp2, X86exp2SAE,
8892                            SchedWriteFAdd>, EVEX;
8893}
8894defm VGETEXP   : avx512_eri<0x42, "vgetexp", X86fgetexp, X86fgetexpSAE,
8895                            SchedWriteFRnd>,
8896                 avx512_fp_unaryop_packed<0x42, "vgetexp", X86fgetexp,
8897                                          SchedWriteFRnd>, EVEX;
8898
8899multiclass avx512_sqrt_packed_round<bits<8> opc, string OpcodeStr,
8900                                    X86FoldableSchedWrite sched, X86VectorVTInfo _>{
8901  let ExeDomain = _.ExeDomain in
8902  defm rb: AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
8903                         (ins _.RC:$src, AVX512RC:$rc), OpcodeStr, "$rc, $src", "$src, $rc",
8904                         (_.VT (X86fsqrtRnd _.RC:$src, (i32 timm:$rc)))>,
8905                         EVEX, EVEX_B, EVEX_RC, Sched<[sched]>;
8906}
8907
8908multiclass avx512_sqrt_packed<bits<8> opc, string OpcodeStr,
8909                              X86FoldableSchedWrite sched, X86VectorVTInfo _>{
8910  let ExeDomain = _.ExeDomain, Uses = [MXCSR], mayRaiseFPException = 1 in {
8911  defm r: AVX512_maskable_split<opc, MRMSrcReg, _, (outs _.RC:$dst),
8912                         (ins _.RC:$src), OpcodeStr, "$src", "$src",
8913                         (_.VT (any_fsqrt _.RC:$src)),
8914                         (_.VT (fsqrt _.RC:$src))>, EVEX,
8915                         Sched<[sched]>;
8916  defm m: AVX512_maskable_split<opc, MRMSrcMem, _, (outs _.RC:$dst),
8917                         (ins _.MemOp:$src), OpcodeStr, "$src", "$src",
8918                         (any_fsqrt (_.VT (_.LdFrag addr:$src))),
8919                         (fsqrt (_.VT (_.LdFrag addr:$src)))>, EVEX,
8920                         Sched<[sched.Folded, sched.ReadAfterFold]>;
8921  defm mb: AVX512_maskable_split<opc, MRMSrcMem, _, (outs _.RC:$dst),
8922                          (ins _.ScalarMemOp:$src), OpcodeStr,
8923                          "${src}"#_.BroadcastStr, "${src}"#_.BroadcastStr,
8924                          (any_fsqrt (_.VT (_.BroadcastLdFrag addr:$src))),
8925                          (fsqrt (_.VT (_.BroadcastLdFrag addr:$src)))>,
8926                          EVEX, EVEX_B, Sched<[sched.Folded, sched.ReadAfterFold]>;
8927  }
8928}
8929
8930let Uses = [MXCSR], mayRaiseFPException = 1 in
8931multiclass avx512_sqrt_packed_all<bits<8> opc, string OpcodeStr,
8932                                  X86SchedWriteSizes sched> {
8933  defm PSZ : avx512_sqrt_packed<opc, !strconcat(OpcodeStr, "ps"),
8934                                sched.PS.ZMM, v16f32_info>,
8935                                EVEX_V512, PS, EVEX_CD8<32, CD8VF>;
8936  defm PDZ : avx512_sqrt_packed<opc, !strconcat(OpcodeStr, "pd"),
8937                                sched.PD.ZMM, v8f64_info>,
8938                                EVEX_V512, VEX_W, PD, EVEX_CD8<64, CD8VF>;
8939  // Define only if AVX512VL feature is present.
8940  let Predicates = [HasVLX] in {
8941    defm PSZ128 : avx512_sqrt_packed<opc, !strconcat(OpcodeStr, "ps"),
8942                                     sched.PS.XMM, v4f32x_info>,
8943                                     EVEX_V128, PS, EVEX_CD8<32, CD8VF>;
8944    defm PSZ256 : avx512_sqrt_packed<opc, !strconcat(OpcodeStr, "ps"),
8945                                     sched.PS.YMM, v8f32x_info>,
8946                                     EVEX_V256, PS, EVEX_CD8<32, CD8VF>;
8947    defm PDZ128 : avx512_sqrt_packed<opc, !strconcat(OpcodeStr, "pd"),
8948                                     sched.PD.XMM, v2f64x_info>,
8949                                     EVEX_V128, VEX_W, PD, EVEX_CD8<64, CD8VF>;
8950    defm PDZ256 : avx512_sqrt_packed<opc, !strconcat(OpcodeStr, "pd"),
8951                                     sched.PD.YMM, v4f64x_info>,
8952                                     EVEX_V256, VEX_W, PD, EVEX_CD8<64, CD8VF>;
8953  }
8954}
8955
8956let Uses = [MXCSR] in
8957multiclass avx512_sqrt_packed_all_round<bits<8> opc, string OpcodeStr,
8958                                        X86SchedWriteSizes sched> {
8959  defm PSZ : avx512_sqrt_packed_round<opc, !strconcat(OpcodeStr, "ps"),
8960                                      sched.PS.ZMM, v16f32_info>,
8961                                      EVEX_V512, PS, EVEX_CD8<32, CD8VF>;
8962  defm PDZ : avx512_sqrt_packed_round<opc, !strconcat(OpcodeStr, "pd"),
8963                                      sched.PD.ZMM, v8f64_info>,
8964                                      EVEX_V512, VEX_W, PD, EVEX_CD8<64, CD8VF>;
8965}
8966
8967multiclass avx512_sqrt_scalar<bits<8> opc, string OpcodeStr, X86FoldableSchedWrite sched,
8968                              X86VectorVTInfo _, string Name> {
8969  let ExeDomain = _.ExeDomain in {
8970    defm r_Int : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
8971                         (ins _.RC:$src1, _.RC:$src2), OpcodeStr,
8972                         "$src2, $src1", "$src1, $src2",
8973                         (X86fsqrts (_.VT _.RC:$src1),
8974                                    (_.VT _.RC:$src2))>,
8975                         Sched<[sched]>, SIMD_EXC;
8976    defm m_Int : AVX512_maskable_scalar<opc, MRMSrcMem, _, (outs _.RC:$dst),
8977                         (ins _.RC:$src1, _.IntScalarMemOp:$src2), OpcodeStr,
8978                         "$src2, $src1", "$src1, $src2",
8979                         (X86fsqrts (_.VT _.RC:$src1),
8980                                    (_.ScalarIntMemFrags addr:$src2))>,
8981                         Sched<[sched.Folded, sched.ReadAfterFold]>, SIMD_EXC;
8982    let Uses = [MXCSR] in
8983    defm rb_Int : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
8984                         (ins _.RC:$src1, _.RC:$src2, AVX512RC:$rc), OpcodeStr,
8985                         "$rc, $src2, $src1", "$src1, $src2, $rc",
8986                         (X86fsqrtRnds (_.VT _.RC:$src1),
8987                                     (_.VT _.RC:$src2),
8988                                     (i32 timm:$rc))>,
8989                         EVEX_B, EVEX_RC, Sched<[sched]>;
8990
8991    let isCodeGenOnly = 1, hasSideEffects = 0, Predicates=[HasAVX512] in {
8992      def r : I<opc, MRMSrcReg, (outs _.FRC:$dst),
8993                (ins _.FRC:$src1, _.FRC:$src2),
8994                OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}", []>,
8995                Sched<[sched]>, SIMD_EXC;
8996      let mayLoad = 1 in
8997        def m : I<opc, MRMSrcMem, (outs _.FRC:$dst),
8998                  (ins _.FRC:$src1, _.ScalarMemOp:$src2),
8999                  OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}", []>,
9000                  Sched<[sched.Folded, sched.ReadAfterFold]>, SIMD_EXC;
9001    }
9002  }
9003
9004  let Predicates = [HasAVX512] in {
9005    def : Pat<(_.EltVT (any_fsqrt _.FRC:$src)),
9006              (!cast<Instruction>(Name#Zr)
9007                  (_.EltVT (IMPLICIT_DEF)), _.FRC:$src)>;
9008  }
9009
9010  let Predicates = [HasAVX512, OptForSize] in {
9011    def : Pat<(_.EltVT (any_fsqrt (load addr:$src))),
9012              (!cast<Instruction>(Name#Zm)
9013                  (_.EltVT (IMPLICIT_DEF)), addr:$src)>;
9014  }
9015}
9016
9017multiclass avx512_sqrt_scalar_all<bits<8> opc, string OpcodeStr,
9018                                  X86SchedWriteSizes sched> {
9019  defm SSZ : avx512_sqrt_scalar<opc, OpcodeStr#"ss", sched.PS.Scl, f32x_info, NAME#"SS">,
9020                        EVEX_CD8<32, CD8VT1>, EVEX_4V, XS;
9021  defm SDZ : avx512_sqrt_scalar<opc, OpcodeStr#"sd", sched.PD.Scl, f64x_info, NAME#"SD">,
9022                        EVEX_CD8<64, CD8VT1>, EVEX_4V, XD, VEX_W;
9023}
9024
9025defm VSQRT : avx512_sqrt_packed_all<0x51, "vsqrt", SchedWriteFSqrtSizes>,
9026             avx512_sqrt_packed_all_round<0x51, "vsqrt", SchedWriteFSqrtSizes>;
9027
9028defm VSQRT : avx512_sqrt_scalar_all<0x51, "vsqrt", SchedWriteFSqrtSizes>, VEX_LIG;
9029
9030multiclass avx512_rndscale_scalar<bits<8> opc, string OpcodeStr,
9031                                  X86FoldableSchedWrite sched, X86VectorVTInfo _> {
9032  let ExeDomain = _.ExeDomain in {
9033  defm r_Int : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
9034                           (ins _.RC:$src1, _.RC:$src2, i32u8imm:$src3), OpcodeStr,
9035                           "$src3, $src2, $src1", "$src1, $src2, $src3",
9036                           (_.VT (X86RndScales (_.VT _.RC:$src1), (_.VT _.RC:$src2),
9037                           (i32 timm:$src3)))>,
9038                           Sched<[sched]>, SIMD_EXC;
9039
9040  let Uses = [MXCSR] in
9041  defm rb_Int : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
9042                         (ins _.RC:$src1, _.RC:$src2, i32u8imm:$src3), OpcodeStr,
9043                         "$src3, {sae}, $src2, $src1", "$src1, $src2, {sae}, $src3",
9044                         (_.VT (X86RndScalesSAE (_.VT _.RC:$src1), (_.VT _.RC:$src2),
9045                         (i32 timm:$src3)))>, EVEX_B,
9046                         Sched<[sched]>;
9047
9048  defm m_Int : AVX512_maskable_scalar<opc, MRMSrcMem, _, (outs _.RC:$dst),
9049                         (ins _.RC:$src1, _.IntScalarMemOp:$src2, i32u8imm:$src3),
9050                         OpcodeStr,
9051                         "$src3, $src2, $src1", "$src1, $src2, $src3",
9052                         (_.VT (X86RndScales _.RC:$src1,
9053                                (_.ScalarIntMemFrags addr:$src2), (i32 timm:$src3)))>,
9054                         Sched<[sched.Folded, sched.ReadAfterFold]>, SIMD_EXC;
9055
9056  let isCodeGenOnly = 1, hasSideEffects = 0, Predicates = [HasAVX512] in {
9057    def r : I<opc, MRMSrcReg, (outs _.FRC:$dst),
9058               (ins _.FRC:$src1, _.FRC:$src2, i32u8imm:$src3),
9059               OpcodeStr#"\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}",
9060               []>, Sched<[sched]>, SIMD_EXC;
9061
9062    let mayLoad = 1 in
9063      def m : I<opc, MRMSrcMem, (outs _.FRC:$dst),
9064                 (ins _.FRC:$src1, _.ScalarMemOp:$src2, i32u8imm:$src3),
9065                 OpcodeStr#"\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}",
9066                 []>, Sched<[sched.Folded, sched.ReadAfterFold]>, SIMD_EXC;
9067  }
9068  }
9069
9070  let Predicates = [HasAVX512] in {
9071    def : Pat<(X86any_VRndScale _.FRC:$src1, timm:$src2),
9072              (_.EltVT (!cast<Instruction>(NAME#r) (_.EltVT (IMPLICIT_DEF)),
9073               _.FRC:$src1, timm:$src2))>;
9074  }
9075
9076  let Predicates = [HasAVX512, OptForSize] in {
9077    def : Pat<(X86any_VRndScale (_.ScalarLdFrag addr:$src1), timm:$src2),
9078              (_.EltVT (!cast<Instruction>(NAME#m) (_.EltVT (IMPLICIT_DEF)),
9079               addr:$src1, timm:$src2))>;
9080  }
9081}
9082
9083defm VRNDSCALESSZ : avx512_rndscale_scalar<0x0A, "vrndscaless",
9084                                           SchedWriteFRnd.Scl, f32x_info>,
9085                                           AVX512AIi8Base, EVEX_4V, VEX_LIG,
9086                                           EVEX_CD8<32, CD8VT1>;
9087
9088defm VRNDSCALESDZ : avx512_rndscale_scalar<0x0B, "vrndscalesd",
9089                                           SchedWriteFRnd.Scl, f64x_info>,
9090                                           VEX_W, AVX512AIi8Base, EVEX_4V, VEX_LIG,
9091                                           EVEX_CD8<64, CD8VT1>;
9092
9093multiclass avx512_masked_scalar<SDNode OpNode, string OpcPrefix, SDNode Move,
9094                                dag Mask, X86VectorVTInfo _, PatLeaf ZeroFP,
9095                                dag OutMask, Predicate BasePredicate> {
9096  let Predicates = [BasePredicate] in {
9097    def : Pat<(Move _.VT:$src1, (scalar_to_vector (X86selects_mask Mask,
9098               (OpNode (extractelt _.VT:$src2, (iPTR 0))),
9099               (extractelt _.VT:$dst, (iPTR 0))))),
9100              (!cast<Instruction>("V"#OpcPrefix#r_Intk)
9101               _.VT:$dst, OutMask, _.VT:$src2, _.VT:$src1)>;
9102
9103    def : Pat<(Move _.VT:$src1, (scalar_to_vector (X86selects_mask Mask,
9104               (OpNode (extractelt _.VT:$src2, (iPTR 0))),
9105               ZeroFP))),
9106              (!cast<Instruction>("V"#OpcPrefix#r_Intkz)
9107               OutMask, _.VT:$src2, _.VT:$src1)>;
9108  }
9109}
9110
9111defm : avx512_masked_scalar<fsqrt, "SQRTSSZ", X86Movss,
9112                            (v1i1 (scalar_to_vector (i8 (trunc (i32 GR32:$mask))))), v4f32x_info,
9113                            fp32imm0, (COPY_TO_REGCLASS  $mask, VK1WM), HasAVX512>;
9114defm : avx512_masked_scalar<fsqrt, "SQRTSDZ", X86Movsd,
9115                            (v1i1 (scalar_to_vector (i8 (trunc (i32 GR32:$mask))))), v2f64x_info,
9116                            fp64imm0, (COPY_TO_REGCLASS  $mask, VK1WM), HasAVX512>;
9117
9118
9119//-------------------------------------------------
9120// Integer truncate and extend operations
9121//-------------------------------------------------
9122
9123// PatFrags that contain a select and a truncate op. The take operands in the
9124// same order as X86vmtrunc, X86vmtruncs, X86vmtruncus. This allows us to pass
9125// either to the multiclasses.
9126def select_trunc : PatFrag<(ops node:$src, node:$src0, node:$mask),
9127                           (vselect_mask node:$mask,
9128                                         (trunc node:$src), node:$src0)>;
9129def select_truncs : PatFrag<(ops node:$src, node:$src0, node:$mask),
9130                            (vselect_mask node:$mask,
9131                                          (X86vtruncs node:$src), node:$src0)>;
9132def select_truncus : PatFrag<(ops node:$src, node:$src0, node:$mask),
9133                             (vselect_mask node:$mask,
9134                                           (X86vtruncus node:$src), node:$src0)>;
9135
9136multiclass avx512_trunc_common<bits<8> opc, string OpcodeStr, SDNode OpNode,
9137                              SDPatternOperator MaskNode,
9138                              X86FoldableSchedWrite sched, X86VectorVTInfo SrcInfo,
9139                              X86VectorVTInfo DestInfo, X86MemOperand x86memop> {
9140  let ExeDomain = DestInfo.ExeDomain in {
9141  def rr : AVX512XS8I<opc, MRMDestReg, (outs DestInfo.RC:$dst),
9142             (ins SrcInfo.RC:$src),
9143             OpcodeStr # "\t{$src, $dst|$dst, $src}",
9144             [(set DestInfo.RC:$dst,
9145                   (DestInfo.VT (OpNode (SrcInfo.VT SrcInfo.RC:$src))))]>,
9146             EVEX, Sched<[sched]>;
9147  let Constraints = "$src0 = $dst" in
9148  def rrk : AVX512XS8I<opc, MRMDestReg, (outs DestInfo.RC:$dst),
9149             (ins DestInfo.RC:$src0, SrcInfo.KRCWM:$mask, SrcInfo.RC:$src),
9150             OpcodeStr # "\t{$src, $dst {${mask}}|$dst {${mask}}, $src}",
9151             [(set DestInfo.RC:$dst,
9152                   (MaskNode (SrcInfo.VT SrcInfo.RC:$src),
9153                             (DestInfo.VT DestInfo.RC:$src0),
9154                             SrcInfo.KRCWM:$mask))]>,
9155             EVEX, EVEX_K, Sched<[sched]>;
9156  def rrkz : AVX512XS8I<opc, MRMDestReg, (outs DestInfo.RC:$dst),
9157             (ins SrcInfo.KRCWM:$mask, SrcInfo.RC:$src),
9158             OpcodeStr # "\t{$src, $dst {${mask}} {z}|$dst {${mask}} {z}, $src}",
9159             [(set DestInfo.RC:$dst,
9160                   (DestInfo.VT (MaskNode (SrcInfo.VT SrcInfo.RC:$src),
9161                             DestInfo.ImmAllZerosV, SrcInfo.KRCWM:$mask)))]>,
9162             EVEX, EVEX_KZ, Sched<[sched]>;
9163  }
9164
9165  let mayStore = 1, hasSideEffects = 0, ExeDomain = DestInfo.ExeDomain in {
9166    def mr : AVX512XS8I<opc, MRMDestMem, (outs),
9167               (ins x86memop:$dst, SrcInfo.RC:$src),
9168               OpcodeStr # "\t{$src, $dst|$dst, $src}", []>,
9169               EVEX, Sched<[sched.Folded]>;
9170
9171    def mrk : AVX512XS8I<opc, MRMDestMem, (outs),
9172               (ins x86memop:$dst, SrcInfo.KRCWM:$mask, SrcInfo.RC:$src),
9173               OpcodeStr # "\t{$src, $dst {${mask}}|$dst {${mask}}, $src}", []>,
9174               EVEX, EVEX_K, Sched<[sched.Folded]>, NotMemoryFoldable;
9175  }//mayStore = 1, hasSideEffects = 0
9176}
9177
9178multiclass avx512_trunc_mr_lowering<X86VectorVTInfo SrcInfo,
9179                                    X86VectorVTInfo DestInfo,
9180                                    PatFrag truncFrag, PatFrag mtruncFrag,
9181                                    string Name> {
9182
9183  def : Pat<(truncFrag (SrcInfo.VT SrcInfo.RC:$src), addr:$dst),
9184            (!cast<Instruction>(Name#SrcInfo.ZSuffix#mr)
9185                                    addr:$dst, SrcInfo.RC:$src)>;
9186
9187  def : Pat<(mtruncFrag (SrcInfo.VT SrcInfo.RC:$src), addr:$dst,
9188                        SrcInfo.KRCWM:$mask),
9189            (!cast<Instruction>(Name#SrcInfo.ZSuffix#mrk)
9190                            addr:$dst, SrcInfo.KRCWM:$mask, SrcInfo.RC:$src)>;
9191}
9192
9193multiclass avx512_trunc<bits<8> opc, string OpcodeStr, SDNode OpNode128,
9194                        SDNode OpNode256, SDNode OpNode512,
9195                        SDPatternOperator MaskNode128,
9196                        SDPatternOperator MaskNode256,
9197                        SDPatternOperator MaskNode512,
9198                        X86FoldableSchedWrite sched,
9199                        AVX512VLVectorVTInfo VTSrcInfo,
9200                        X86VectorVTInfo DestInfoZ128,
9201                        X86VectorVTInfo DestInfoZ256, X86VectorVTInfo DestInfoZ,
9202                        X86MemOperand x86memopZ128, X86MemOperand x86memopZ256,
9203                        X86MemOperand x86memopZ, PatFrag truncFrag,
9204                        PatFrag mtruncFrag, Predicate prd = HasAVX512>{
9205
9206  let Predicates = [HasVLX, prd] in {
9207    defm Z128:  avx512_trunc_common<opc, OpcodeStr, OpNode128, MaskNode128, sched,
9208                             VTSrcInfo.info128, DestInfoZ128, x86memopZ128>,
9209                avx512_trunc_mr_lowering<VTSrcInfo.info128, DestInfoZ128,
9210                             truncFrag, mtruncFrag, NAME>, EVEX_V128;
9211
9212    defm Z256:  avx512_trunc_common<opc, OpcodeStr, OpNode256, MaskNode256, sched,
9213                             VTSrcInfo.info256, DestInfoZ256, x86memopZ256>,
9214                avx512_trunc_mr_lowering<VTSrcInfo.info256, DestInfoZ256,
9215                             truncFrag, mtruncFrag, NAME>, EVEX_V256;
9216  }
9217  let Predicates = [prd] in
9218    defm Z:     avx512_trunc_common<opc, OpcodeStr, OpNode512, MaskNode512, sched,
9219                             VTSrcInfo.info512, DestInfoZ, x86memopZ>,
9220                avx512_trunc_mr_lowering<VTSrcInfo.info512, DestInfoZ,
9221                             truncFrag, mtruncFrag, NAME>, EVEX_V512;
9222}
9223
9224multiclass avx512_trunc_qb<bits<8> opc, string OpcodeStr, SDNode OpNode,
9225                           SDPatternOperator MaskNode,
9226                           X86FoldableSchedWrite sched, PatFrag StoreNode,
9227                           PatFrag MaskedStoreNode, SDNode InVecNode,
9228                           SDPatternOperator InVecMaskNode> {
9229  defm NAME: avx512_trunc<opc, OpcodeStr, InVecNode, InVecNode, InVecNode,
9230                          InVecMaskNode, InVecMaskNode, InVecMaskNode, sched,
9231                          avx512vl_i64_info, v16i8x_info, v16i8x_info,
9232                          v16i8x_info, i16mem, i32mem, i64mem, StoreNode,
9233                          MaskedStoreNode>, EVEX_CD8<8, CD8VO>;
9234}
9235
9236multiclass avx512_trunc_qw<bits<8> opc, string OpcodeStr, SDNode OpNode,
9237                           SDPatternOperator MaskNode,
9238                           X86FoldableSchedWrite sched, PatFrag StoreNode,
9239                           PatFrag MaskedStoreNode, SDNode InVecNode,
9240                           SDPatternOperator InVecMaskNode> {
9241  defm NAME: avx512_trunc<opc, OpcodeStr, InVecNode, InVecNode, OpNode,
9242                          InVecMaskNode, InVecMaskNode, MaskNode, sched,
9243                          avx512vl_i64_info, v8i16x_info, v8i16x_info,
9244                          v8i16x_info, i32mem, i64mem, i128mem, StoreNode,
9245                          MaskedStoreNode>, EVEX_CD8<16, CD8VQ>;
9246}
9247
9248multiclass avx512_trunc_qd<bits<8> opc, string OpcodeStr, SDNode OpNode,
9249                           SDPatternOperator MaskNode,
9250                           X86FoldableSchedWrite sched, PatFrag StoreNode,
9251                           PatFrag MaskedStoreNode, SDNode InVecNode,
9252                           SDPatternOperator InVecMaskNode> {
9253  defm NAME: avx512_trunc<opc, OpcodeStr, InVecNode, OpNode, OpNode,
9254                          InVecMaskNode, MaskNode, MaskNode, sched,
9255                          avx512vl_i64_info, v4i32x_info, v4i32x_info,
9256                          v8i32x_info, i64mem, i128mem, i256mem, StoreNode,
9257                          MaskedStoreNode>, EVEX_CD8<32, CD8VH>;
9258}
9259
9260multiclass avx512_trunc_db<bits<8> opc, string OpcodeStr, SDNode OpNode,
9261                           SDPatternOperator MaskNode,
9262                           X86FoldableSchedWrite sched, PatFrag StoreNode,
9263                           PatFrag MaskedStoreNode, SDNode InVecNode,
9264                           SDPatternOperator InVecMaskNode> {
9265  defm NAME: avx512_trunc<opc, OpcodeStr, InVecNode, InVecNode, OpNode,
9266                          InVecMaskNode, InVecMaskNode, MaskNode, sched,
9267                          avx512vl_i32_info, v16i8x_info, v16i8x_info,
9268                          v16i8x_info, i32mem, i64mem, i128mem, StoreNode,
9269                          MaskedStoreNode>, EVEX_CD8<8, CD8VQ>;
9270}
9271
9272multiclass avx512_trunc_dw<bits<8> opc, string OpcodeStr, SDNode OpNode,
9273                           SDPatternOperator MaskNode,
9274                           X86FoldableSchedWrite sched, PatFrag StoreNode,
9275                           PatFrag MaskedStoreNode, SDNode InVecNode,
9276                           SDPatternOperator InVecMaskNode> {
9277  defm NAME: avx512_trunc<opc, OpcodeStr, InVecNode, OpNode, OpNode,
9278                          InVecMaskNode, MaskNode, MaskNode, sched,
9279                          avx512vl_i32_info, v8i16x_info, v8i16x_info,
9280                          v16i16x_info, i64mem, i128mem, i256mem, StoreNode,
9281                          MaskedStoreNode>, EVEX_CD8<16, CD8VH>;
9282}
9283
9284multiclass avx512_trunc_wb<bits<8> opc, string OpcodeStr, SDNode OpNode,
9285                           SDPatternOperator MaskNode,
9286                           X86FoldableSchedWrite sched, PatFrag StoreNode,
9287                           PatFrag MaskedStoreNode, SDNode InVecNode,
9288                           SDPatternOperator InVecMaskNode> {
9289  defm NAME: avx512_trunc<opc, OpcodeStr, InVecNode, OpNode, OpNode,
9290                          InVecMaskNode, MaskNode, MaskNode, sched,
9291                          avx512vl_i16_info, v16i8x_info, v16i8x_info,
9292                          v32i8x_info, i64mem, i128mem, i256mem, StoreNode,
9293                          MaskedStoreNode, HasBWI>, EVEX_CD8<16, CD8VH>;
9294}
9295
9296defm VPMOVQB    : avx512_trunc_qb<0x32, "vpmovqb",   trunc, select_trunc,
9297                                  WriteShuffle256, truncstorevi8,
9298                                  masked_truncstorevi8, X86vtrunc, X86vmtrunc>;
9299defm VPMOVSQB   : avx512_trunc_qb<0x22, "vpmovsqb",  X86vtruncs, select_truncs,
9300                                  WriteShuffle256, truncstore_s_vi8,
9301                                  masked_truncstore_s_vi8, X86vtruncs,
9302                                  X86vmtruncs>;
9303defm VPMOVUSQB  : avx512_trunc_qb<0x12, "vpmovusqb", X86vtruncus,
9304                                  select_truncus, WriteShuffle256,
9305                                  truncstore_us_vi8, masked_truncstore_us_vi8,
9306                                  X86vtruncus, X86vmtruncus>;
9307
9308defm VPMOVQW    : avx512_trunc_qw<0x34, "vpmovqw", trunc, select_trunc,
9309                                  WriteShuffle256, truncstorevi16,
9310                                  masked_truncstorevi16, X86vtrunc, X86vmtrunc>;
9311defm VPMOVSQW   : avx512_trunc_qw<0x24, "vpmovsqw",  X86vtruncs, select_truncs,
9312                                  WriteShuffle256, truncstore_s_vi16,
9313                                  masked_truncstore_s_vi16, X86vtruncs,
9314                                  X86vmtruncs>;
9315defm VPMOVUSQW  : avx512_trunc_qw<0x14, "vpmovusqw", X86vtruncus,
9316                                  select_truncus, WriteShuffle256,
9317                                  truncstore_us_vi16, masked_truncstore_us_vi16,
9318                                  X86vtruncus, X86vmtruncus>;
9319
9320defm VPMOVQD    : avx512_trunc_qd<0x35, "vpmovqd", trunc, select_trunc,
9321                                  WriteShuffle256, truncstorevi32,
9322                                  masked_truncstorevi32, X86vtrunc, X86vmtrunc>;
9323defm VPMOVSQD   : avx512_trunc_qd<0x25, "vpmovsqd",  X86vtruncs, select_truncs,
9324                                  WriteShuffle256, truncstore_s_vi32,
9325                                  masked_truncstore_s_vi32, X86vtruncs,
9326                                  X86vmtruncs>;
9327defm VPMOVUSQD  : avx512_trunc_qd<0x15, "vpmovusqd", X86vtruncus,
9328                                  select_truncus, WriteShuffle256,
9329                                  truncstore_us_vi32, masked_truncstore_us_vi32,
9330                                  X86vtruncus, X86vmtruncus>;
9331
9332defm VPMOVDB    : avx512_trunc_db<0x31, "vpmovdb", trunc, select_trunc,
9333                                  WriteShuffle256, truncstorevi8,
9334                                  masked_truncstorevi8, X86vtrunc, X86vmtrunc>;
9335defm VPMOVSDB   : avx512_trunc_db<0x21, "vpmovsdb", X86vtruncs, select_truncs,
9336                                  WriteShuffle256, truncstore_s_vi8,
9337                                  masked_truncstore_s_vi8, X86vtruncs,
9338                                  X86vmtruncs>;
9339defm VPMOVUSDB  : avx512_trunc_db<0x11, "vpmovusdb",  X86vtruncus,
9340                                  select_truncus, WriteShuffle256,
9341                                  truncstore_us_vi8, masked_truncstore_us_vi8,
9342                                  X86vtruncus, X86vmtruncus>;
9343
9344defm VPMOVDW    : avx512_trunc_dw<0x33, "vpmovdw", trunc, select_trunc,
9345                                  WriteShuffle256, truncstorevi16,
9346                                  masked_truncstorevi16, X86vtrunc, X86vmtrunc>;
9347defm VPMOVSDW   : avx512_trunc_dw<0x23, "vpmovsdw", X86vtruncs, select_truncs,
9348                                  WriteShuffle256, truncstore_s_vi16,
9349                                  masked_truncstore_s_vi16, X86vtruncs,
9350                                  X86vmtruncs>;
9351defm VPMOVUSDW  : avx512_trunc_dw<0x13, "vpmovusdw", X86vtruncus,
9352                                  select_truncus, WriteShuffle256,
9353                                  truncstore_us_vi16, masked_truncstore_us_vi16,
9354                                  X86vtruncus, X86vmtruncus>;
9355
9356defm VPMOVWB    : avx512_trunc_wb<0x30, "vpmovwb", trunc, select_trunc,
9357                                  WriteShuffle256, truncstorevi8,
9358                                  masked_truncstorevi8, X86vtrunc,
9359                                  X86vmtrunc>;
9360defm VPMOVSWB   : avx512_trunc_wb<0x20, "vpmovswb", X86vtruncs, select_truncs,
9361                                  WriteShuffle256, truncstore_s_vi8,
9362                                  masked_truncstore_s_vi8, X86vtruncs,
9363                                  X86vmtruncs>;
9364defm VPMOVUSWB  : avx512_trunc_wb<0x10, "vpmovuswb", X86vtruncus,
9365                                  select_truncus, WriteShuffle256,
9366                                  truncstore_us_vi8, masked_truncstore_us_vi8,
9367                                  X86vtruncus, X86vmtruncus>;
9368
9369let Predicates = [HasAVX512, NoVLX] in {
9370def: Pat<(v8i16 (trunc (v8i32 VR256X:$src))),
9371         (v8i16 (EXTRACT_SUBREG
9372                 (v16i16 (VPMOVDWZrr (v16i32 (INSERT_SUBREG (IMPLICIT_DEF),
9373                                          VR256X:$src, sub_ymm)))), sub_xmm))>;
9374def: Pat<(v4i32 (trunc (v4i64 VR256X:$src))),
9375         (v4i32 (EXTRACT_SUBREG
9376                 (v8i32 (VPMOVQDZrr (v8i64 (INSERT_SUBREG (IMPLICIT_DEF),
9377                                           VR256X:$src, sub_ymm)))), sub_xmm))>;
9378}
9379
9380let Predicates = [HasBWI, NoVLX] in {
9381def: Pat<(v16i8 (trunc (v16i16 VR256X:$src))),
9382         (v16i8 (EXTRACT_SUBREG (VPMOVWBZrr (v32i16 (INSERT_SUBREG (IMPLICIT_DEF),
9383                                            VR256X:$src, sub_ymm))), sub_xmm))>;
9384}
9385
9386// Without BWI we can't use vXi16/vXi8 vselect so we have to use vmtrunc nodes.
9387multiclass mtrunc_lowering<string InstrName, SDNode OpNode,
9388                           X86VectorVTInfo DestInfo,
9389                           X86VectorVTInfo SrcInfo> {
9390  def : Pat<(DestInfo.VT (OpNode (SrcInfo.VT SrcInfo.RC:$src),
9391                                 DestInfo.RC:$src0,
9392                                 SrcInfo.KRCWM:$mask)),
9393            (!cast<Instruction>(InstrName#"rrk") DestInfo.RC:$src0,
9394                                                 SrcInfo.KRCWM:$mask,
9395                                                 SrcInfo.RC:$src)>;
9396
9397  def : Pat<(DestInfo.VT (OpNode (SrcInfo.VT SrcInfo.RC:$src),
9398                                 DestInfo.ImmAllZerosV,
9399                                 SrcInfo.KRCWM:$mask)),
9400            (!cast<Instruction>(InstrName#"rrkz") SrcInfo.KRCWM:$mask,
9401                                                  SrcInfo.RC:$src)>;
9402}
9403
9404let Predicates = [HasVLX] in {
9405defm : mtrunc_lowering<"VPMOVDWZ256", X86vmtrunc, v8i16x_info, v8i32x_info>;
9406defm : mtrunc_lowering<"VPMOVSDWZ256", X86vmtruncs, v8i16x_info, v8i32x_info>;
9407defm : mtrunc_lowering<"VPMOVUSDWZ256", X86vmtruncus, v8i16x_info, v8i32x_info>;
9408}
9409
9410let Predicates = [HasAVX512] in {
9411defm : mtrunc_lowering<"VPMOVDWZ", X86vmtrunc, v16i16x_info, v16i32_info>;
9412defm : mtrunc_lowering<"VPMOVSDWZ", X86vmtruncs, v16i16x_info, v16i32_info>;
9413defm : mtrunc_lowering<"VPMOVUSDWZ", X86vmtruncus, v16i16x_info, v16i32_info>;
9414
9415defm : mtrunc_lowering<"VPMOVDBZ", X86vmtrunc, v16i8x_info, v16i32_info>;
9416defm : mtrunc_lowering<"VPMOVSDBZ", X86vmtruncs, v16i8x_info, v16i32_info>;
9417defm : mtrunc_lowering<"VPMOVUSDBZ", X86vmtruncus, v16i8x_info, v16i32_info>;
9418
9419defm : mtrunc_lowering<"VPMOVQWZ", X86vmtrunc, v8i16x_info, v8i64_info>;
9420defm : mtrunc_lowering<"VPMOVSQWZ", X86vmtruncs, v8i16x_info, v8i64_info>;
9421defm : mtrunc_lowering<"VPMOVUSQWZ", X86vmtruncus, v8i16x_info, v8i64_info>;
9422}
9423
9424multiclass WriteShuffle256_common<bits<8> opc, string OpcodeStr, X86FoldableSchedWrite sched,
9425              X86VectorVTInfo DestInfo, X86VectorVTInfo SrcInfo,
9426              X86MemOperand x86memop, PatFrag LdFrag, SDNode OpNode>{
9427  let ExeDomain = DestInfo.ExeDomain in {
9428  defm rr   : AVX512_maskable<opc, MRMSrcReg, DestInfo, (outs DestInfo.RC:$dst),
9429                    (ins SrcInfo.RC:$src), OpcodeStr ,"$src", "$src",
9430                    (DestInfo.VT (OpNode (SrcInfo.VT SrcInfo.RC:$src)))>,
9431                  EVEX, Sched<[sched]>;
9432
9433  defm rm : AVX512_maskable<opc, MRMSrcMem, DestInfo, (outs DestInfo.RC:$dst),
9434                  (ins x86memop:$src), OpcodeStr ,"$src", "$src",
9435                  (DestInfo.VT (LdFrag addr:$src))>,
9436                EVEX, Sched<[sched.Folded]>;
9437  }
9438}
9439
9440multiclass WriteShuffle256_BW<bits<8> opc, string OpcodeStr,
9441          SDNode OpNode, SDNode InVecNode, string ExtTy,
9442          X86FoldableSchedWrite sched, PatFrag LdFrag = !cast<PatFrag>(ExtTy#"extloadvi8")> {
9443  let Predicates = [HasVLX, HasBWI] in {
9444    defm Z128:  WriteShuffle256_common<opc, OpcodeStr, sched, v8i16x_info,
9445                    v16i8x_info, i64mem, LdFrag, InVecNode>,
9446                     EVEX_CD8<8, CD8VH>, T8PD, EVEX_V128, VEX_WIG;
9447
9448    defm Z256:  WriteShuffle256_common<opc, OpcodeStr, sched, v16i16x_info,
9449                    v16i8x_info, i128mem, LdFrag, OpNode>,
9450                     EVEX_CD8<8, CD8VH>, T8PD, EVEX_V256, VEX_WIG;
9451  }
9452  let Predicates = [HasBWI] in {
9453    defm Z   :  WriteShuffle256_common<opc, OpcodeStr, sched, v32i16_info,
9454                    v32i8x_info, i256mem, LdFrag, OpNode>,
9455                     EVEX_CD8<8, CD8VH>, T8PD, EVEX_V512, VEX_WIG;
9456  }
9457}
9458
9459multiclass WriteShuffle256_BD<bits<8> opc, string OpcodeStr,
9460          SDNode OpNode, SDNode InVecNode, string ExtTy,
9461          X86FoldableSchedWrite sched, PatFrag LdFrag = !cast<PatFrag>(ExtTy#"extloadvi8")> {
9462  let Predicates = [HasVLX, HasAVX512] in {
9463    defm Z128:  WriteShuffle256_common<opc, OpcodeStr, sched, v4i32x_info,
9464                   v16i8x_info, i32mem, LdFrag, InVecNode>,
9465                         EVEX_CD8<8, CD8VQ>, T8PD, EVEX_V128, VEX_WIG;
9466
9467    defm Z256:  WriteShuffle256_common<opc, OpcodeStr, sched, v8i32x_info,
9468                   v16i8x_info, i64mem, LdFrag, InVecNode>,
9469                         EVEX_CD8<8, CD8VQ>, T8PD, EVEX_V256, VEX_WIG;
9470  }
9471  let Predicates = [HasAVX512] in {
9472    defm Z   :  WriteShuffle256_common<opc, OpcodeStr, sched, v16i32_info,
9473                   v16i8x_info, i128mem, LdFrag, OpNode>,
9474                         EVEX_CD8<8, CD8VQ>, T8PD, EVEX_V512, VEX_WIG;
9475  }
9476}
9477
9478multiclass WriteShuffle256_BQ<bits<8> opc, string OpcodeStr,
9479          SDNode OpNode, SDNode InVecNode, string ExtTy,
9480          X86FoldableSchedWrite sched, PatFrag LdFrag = !cast<PatFrag>(ExtTy#"extloadvi8")> {
9481  let Predicates = [HasVLX, HasAVX512] in {
9482    defm Z128:  WriteShuffle256_common<opc, OpcodeStr, sched, v2i64x_info,
9483                   v16i8x_info, i16mem, LdFrag, InVecNode>,
9484                     EVEX_CD8<8, CD8VO>, T8PD, EVEX_V128, VEX_WIG;
9485
9486    defm Z256:  WriteShuffle256_common<opc, OpcodeStr, sched, v4i64x_info,
9487                   v16i8x_info, i32mem, LdFrag, InVecNode>,
9488                     EVEX_CD8<8, CD8VO>, T8PD, EVEX_V256, VEX_WIG;
9489  }
9490  let Predicates = [HasAVX512] in {
9491    defm Z   :  WriteShuffle256_common<opc, OpcodeStr, sched, v8i64_info,
9492                   v16i8x_info, i64mem, LdFrag, InVecNode>,
9493                     EVEX_CD8<8, CD8VO>, T8PD, EVEX_V512, VEX_WIG;
9494  }
9495}
9496
9497multiclass WriteShuffle256_WD<bits<8> opc, string OpcodeStr,
9498         SDNode OpNode, SDNode InVecNode, string ExtTy,
9499         X86FoldableSchedWrite sched, PatFrag LdFrag = !cast<PatFrag>(ExtTy#"extloadvi16")> {
9500  let Predicates = [HasVLX, HasAVX512] in {
9501    defm Z128:  WriteShuffle256_common<opc, OpcodeStr, sched, v4i32x_info,
9502                   v8i16x_info, i64mem, LdFrag, InVecNode>,
9503                     EVEX_CD8<16, CD8VH>, T8PD, EVEX_V128, VEX_WIG;
9504
9505    defm Z256:  WriteShuffle256_common<opc, OpcodeStr, sched, v8i32x_info,
9506                   v8i16x_info, i128mem, LdFrag, OpNode>,
9507                     EVEX_CD8<16, CD8VH>, T8PD, EVEX_V256, VEX_WIG;
9508  }
9509  let Predicates = [HasAVX512] in {
9510    defm Z   :  WriteShuffle256_common<opc, OpcodeStr, sched, v16i32_info,
9511                   v16i16x_info, i256mem, LdFrag, OpNode>,
9512                     EVEX_CD8<16, CD8VH>, T8PD, EVEX_V512, VEX_WIG;
9513  }
9514}
9515
9516multiclass WriteShuffle256_WQ<bits<8> opc, string OpcodeStr,
9517         SDNode OpNode, SDNode InVecNode, string ExtTy,
9518         X86FoldableSchedWrite sched, PatFrag LdFrag = !cast<PatFrag>(ExtTy#"extloadvi16")> {
9519  let Predicates = [HasVLX, HasAVX512] in {
9520    defm Z128:  WriteShuffle256_common<opc, OpcodeStr, sched, v2i64x_info,
9521                   v8i16x_info, i32mem, LdFrag, InVecNode>,
9522                     EVEX_CD8<16, CD8VQ>, T8PD, EVEX_V128, VEX_WIG;
9523
9524    defm Z256:  WriteShuffle256_common<opc, OpcodeStr, sched, v4i64x_info,
9525                   v8i16x_info, i64mem, LdFrag, InVecNode>,
9526                     EVEX_CD8<16, CD8VQ>, T8PD, EVEX_V256, VEX_WIG;
9527  }
9528  let Predicates = [HasAVX512] in {
9529    defm Z   :  WriteShuffle256_common<opc, OpcodeStr, sched, v8i64_info,
9530                   v8i16x_info, i128mem, LdFrag, OpNode>,
9531                     EVEX_CD8<16, CD8VQ>, T8PD, EVEX_V512, VEX_WIG;
9532  }
9533}
9534
9535multiclass WriteShuffle256_DQ<bits<8> opc, string OpcodeStr,
9536         SDNode OpNode, SDNode InVecNode, string ExtTy,
9537         X86FoldableSchedWrite sched, PatFrag LdFrag = !cast<PatFrag>(ExtTy#"extloadvi32")> {
9538
9539  let Predicates = [HasVLX, HasAVX512] in {
9540    defm Z128:  WriteShuffle256_common<opc, OpcodeStr, sched, v2i64x_info,
9541                   v4i32x_info, i64mem, LdFrag, InVecNode>,
9542                     EVEX_CD8<32, CD8VH>, T8PD, EVEX_V128;
9543
9544    defm Z256:  WriteShuffle256_common<opc, OpcodeStr, sched, v4i64x_info,
9545                   v4i32x_info, i128mem, LdFrag, OpNode>,
9546                     EVEX_CD8<32, CD8VH>, T8PD, EVEX_V256;
9547  }
9548  let Predicates = [HasAVX512] in {
9549    defm Z   :  WriteShuffle256_common<opc, OpcodeStr, sched, v8i64_info,
9550                   v8i32x_info, i256mem, LdFrag, OpNode>,
9551                     EVEX_CD8<32, CD8VH>, T8PD, EVEX_V512;
9552  }
9553}
9554
9555defm VPMOVZXBW : WriteShuffle256_BW<0x30, "vpmovzxbw", zext, zext_invec, "z", WriteShuffle256>;
9556defm VPMOVZXBD : WriteShuffle256_BD<0x31, "vpmovzxbd", zext, zext_invec, "z", WriteShuffle256>;
9557defm VPMOVZXBQ : WriteShuffle256_BQ<0x32, "vpmovzxbq", zext, zext_invec, "z", WriteShuffle256>;
9558defm VPMOVZXWD : WriteShuffle256_WD<0x33, "vpmovzxwd", zext, zext_invec, "z", WriteShuffle256>;
9559defm VPMOVZXWQ : WriteShuffle256_WQ<0x34, "vpmovzxwq", zext, zext_invec, "z", WriteShuffle256>;
9560defm VPMOVZXDQ : WriteShuffle256_DQ<0x35, "vpmovzxdq", zext, zext_invec, "z", WriteShuffle256>;
9561
9562defm VPMOVSXBW: WriteShuffle256_BW<0x20, "vpmovsxbw", sext, sext_invec, "s", WriteShuffle256>;
9563defm VPMOVSXBD: WriteShuffle256_BD<0x21, "vpmovsxbd", sext, sext_invec, "s", WriteShuffle256>;
9564defm VPMOVSXBQ: WriteShuffle256_BQ<0x22, "vpmovsxbq", sext, sext_invec, "s", WriteShuffle256>;
9565defm VPMOVSXWD: WriteShuffle256_WD<0x23, "vpmovsxwd", sext, sext_invec, "s", WriteShuffle256>;
9566defm VPMOVSXWQ: WriteShuffle256_WQ<0x24, "vpmovsxwq", sext, sext_invec, "s", WriteShuffle256>;
9567defm VPMOVSXDQ: WriteShuffle256_DQ<0x25, "vpmovsxdq", sext, sext_invec, "s", WriteShuffle256>;
9568
9569
9570// Patterns that we also need any extend versions of. aext_vector_inreg
9571// is currently legalized to zext_vector_inreg.
9572multiclass AVX512_pmovx_patterns_base<string OpcPrefix, SDNode ExtOp> {
9573  // 256-bit patterns
9574  let Predicates = [HasVLX, HasBWI] in {
9575    def : Pat<(v16i16 (ExtOp (loadv16i8 addr:$src))),
9576              (!cast<I>(OpcPrefix#BWZ256rm) addr:$src)>;
9577  }
9578
9579  let Predicates = [HasVLX] in {
9580    def : Pat<(v8i32 (ExtOp (loadv8i16 addr:$src))),
9581              (!cast<I>(OpcPrefix#WDZ256rm) addr:$src)>;
9582
9583    def : Pat<(v4i64 (ExtOp (loadv4i32 addr:$src))),
9584              (!cast<I>(OpcPrefix#DQZ256rm) addr:$src)>;
9585  }
9586
9587  // 512-bit patterns
9588  let Predicates = [HasBWI] in {
9589    def : Pat<(v32i16 (ExtOp (loadv32i8 addr:$src))),
9590              (!cast<I>(OpcPrefix#BWZrm) addr:$src)>;
9591  }
9592  let Predicates = [HasAVX512] in {
9593    def : Pat<(v16i32 (ExtOp (loadv16i8 addr:$src))),
9594              (!cast<I>(OpcPrefix#BDZrm) addr:$src)>;
9595    def : Pat<(v16i32 (ExtOp (loadv16i16 addr:$src))),
9596              (!cast<I>(OpcPrefix#WDZrm) addr:$src)>;
9597
9598    def : Pat<(v8i64 (ExtOp (loadv8i16 addr:$src))),
9599              (!cast<I>(OpcPrefix#WQZrm) addr:$src)>;
9600
9601    def : Pat<(v8i64 (ExtOp (loadv8i32 addr:$src))),
9602              (!cast<I>(OpcPrefix#DQZrm) addr:$src)>;
9603  }
9604}
9605
9606multiclass AVX512_pmovx_patterns<string OpcPrefix, SDNode ExtOp,
9607                                 SDNode InVecOp> :
9608    AVX512_pmovx_patterns_base<OpcPrefix, ExtOp> {
9609  // 128-bit patterns
9610  let Predicates = [HasVLX, HasBWI] in {
9611  def : Pat<(v8i16 (InVecOp (bc_v16i8 (v2i64 (scalar_to_vector (loadi64 addr:$src)))))),
9612            (!cast<I>(OpcPrefix#BWZ128rm) addr:$src)>;
9613  def : Pat<(v8i16 (InVecOp (bc_v16i8 (v2f64 (scalar_to_vector (loadf64 addr:$src)))))),
9614            (!cast<I>(OpcPrefix#BWZ128rm) addr:$src)>;
9615  def : Pat<(v8i16 (InVecOp (bc_v16i8 (v2i64 (X86vzload64 addr:$src))))),
9616            (!cast<I>(OpcPrefix#BWZ128rm) addr:$src)>;
9617  }
9618  let Predicates = [HasVLX] in {
9619  def : Pat<(v4i32 (InVecOp (bc_v16i8 (v4i32 (scalar_to_vector (loadi32 addr:$src)))))),
9620            (!cast<I>(OpcPrefix#BDZ128rm) addr:$src)>;
9621  def : Pat<(v4i32 (InVecOp (bc_v16i8 (v4i32 (X86vzload32 addr:$src))))),
9622            (!cast<I>(OpcPrefix#BDZ128rm) addr:$src)>;
9623
9624  def : Pat<(v2i64 (InVecOp (bc_v16i8 (v4i32 (scalar_to_vector (extloadi32i16 addr:$src)))))),
9625            (!cast<I>(OpcPrefix#BQZ128rm) addr:$src)>;
9626
9627  def : Pat<(v4i32 (InVecOp (bc_v8i16 (v2i64 (scalar_to_vector (loadi64 addr:$src)))))),
9628            (!cast<I>(OpcPrefix#WDZ128rm) addr:$src)>;
9629  def : Pat<(v4i32 (InVecOp (bc_v8i16 (v2f64 (scalar_to_vector (loadf64 addr:$src)))))),
9630            (!cast<I>(OpcPrefix#WDZ128rm) addr:$src)>;
9631  def : Pat<(v4i32 (InVecOp (bc_v8i16 (v2i64 (X86vzload64 addr:$src))))),
9632            (!cast<I>(OpcPrefix#WDZ128rm) addr:$src)>;
9633
9634  def : Pat<(v2i64 (InVecOp (bc_v8i16 (v4i32 (scalar_to_vector (loadi32 addr:$src)))))),
9635            (!cast<I>(OpcPrefix#WQZ128rm) addr:$src)>;
9636  def : Pat<(v2i64 (InVecOp (bc_v8i16 (v4i32 (X86vzload32 addr:$src))))),
9637            (!cast<I>(OpcPrefix#WQZ128rm) addr:$src)>;
9638
9639  def : Pat<(v2i64 (InVecOp (bc_v4i32 (v2i64 (scalar_to_vector (loadi64 addr:$src)))))),
9640            (!cast<I>(OpcPrefix#DQZ128rm) addr:$src)>;
9641  def : Pat<(v2i64 (InVecOp (bc_v4i32 (v2f64 (scalar_to_vector (loadf64 addr:$src)))))),
9642            (!cast<I>(OpcPrefix#DQZ128rm) addr:$src)>;
9643  def : Pat<(v2i64 (InVecOp (bc_v4i32 (v2i64 (X86vzload64 addr:$src))))),
9644            (!cast<I>(OpcPrefix#DQZ128rm) addr:$src)>;
9645  }
9646  let Predicates = [HasVLX] in {
9647  def : Pat<(v8i32 (InVecOp (bc_v16i8 (v2i64 (scalar_to_vector (loadi64 addr:$src)))))),
9648            (!cast<I>(OpcPrefix#BDZ256rm) addr:$src)>;
9649  def : Pat<(v8i32 (InVecOp (bc_v16i8 (v2i64 (scalar_to_vector (loadf64 addr:$src)))))),
9650            (!cast<I>(OpcPrefix#BDZ256rm) addr:$src)>;
9651  def : Pat<(v8i32 (InVecOp (bc_v16i8 (v2i64 (X86vzload64 addr:$src))))),
9652            (!cast<I>(OpcPrefix#BDZ256rm) addr:$src)>;
9653
9654  def : Pat<(v4i64 (InVecOp (bc_v16i8 (v4i32 (scalar_to_vector (loadi32 addr:$src)))))),
9655            (!cast<I>(OpcPrefix#BQZ256rm) addr:$src)>;
9656  def : Pat<(v4i64 (InVecOp (bc_v16i8 (v4i32 (X86vzload32 addr:$src))))),
9657            (!cast<I>(OpcPrefix#BQZ256rm) addr:$src)>;
9658
9659  def : Pat<(v4i64 (InVecOp (bc_v8i16 (v2i64 (scalar_to_vector (loadi64 addr:$src)))))),
9660            (!cast<I>(OpcPrefix#WQZ256rm) addr:$src)>;
9661  def : Pat<(v4i64 (InVecOp (bc_v8i16 (v2i64 (scalar_to_vector (loadf64 addr:$src)))))),
9662            (!cast<I>(OpcPrefix#WQZ256rm) addr:$src)>;
9663  def : Pat<(v4i64 (InVecOp (bc_v8i16 (v2i64 (X86vzload64 addr:$src))))),
9664            (!cast<I>(OpcPrefix#WQZ256rm) addr:$src)>;
9665  }
9666  // 512-bit patterns
9667  let Predicates = [HasAVX512] in {
9668  def : Pat<(v8i64 (InVecOp (bc_v16i8 (v2i64 (scalar_to_vector (loadi64 addr:$src)))))),
9669            (!cast<I>(OpcPrefix#BQZrm) addr:$src)>;
9670  def : Pat<(v8i64 (InVecOp (bc_v16i8 (v2f64 (scalar_to_vector (loadf64 addr:$src)))))),
9671            (!cast<I>(OpcPrefix#BQZrm) addr:$src)>;
9672  def : Pat<(v8i64 (InVecOp (bc_v16i8 (v2i64 (X86vzload64 addr:$src))))),
9673            (!cast<I>(OpcPrefix#BQZrm) addr:$src)>;
9674  }
9675}
9676
9677defm : AVX512_pmovx_patterns<"VPMOVSX", sext, sext_invec>;
9678defm : AVX512_pmovx_patterns<"VPMOVZX", zext, zext_invec>;
9679
9680// Without BWI we can't do a trunc from v16i16 to v16i8. DAG combine can merge
9681// ext+trunc aggressively making it impossible to legalize the DAG to this
9682// pattern directly.
9683let Predicates = [HasAVX512, NoBWI] in {
9684def: Pat<(v16i8 (trunc (v16i16 VR256X:$src))),
9685         (VPMOVDBZrr (v16i32 (VPMOVZXWDZrr VR256X:$src)))>;
9686def: Pat<(v16i8 (trunc (loadv16i16 addr:$src))),
9687         (VPMOVDBZrr (v16i32 (VPMOVZXWDZrm addr:$src)))>;
9688}
9689
9690//===----------------------------------------------------------------------===//
9691// GATHER - SCATTER Operations
9692
9693// FIXME: Improve scheduling of gather/scatter instructions.
9694multiclass avx512_gather<bits<8> opc, string OpcodeStr, X86VectorVTInfo _,
9695                         X86MemOperand memop, RegisterClass MaskRC = _.KRCWM> {
9696  let Constraints = "@earlyclobber $dst, $src1 = $dst, $mask = $mask_wb",
9697      ExeDomain = _.ExeDomain, mayLoad = 1, hasSideEffects = 0 in
9698  def rm  : AVX5128I<opc, MRMSrcMem, (outs _.RC:$dst, MaskRC:$mask_wb),
9699            (ins _.RC:$src1, MaskRC:$mask, memop:$src2),
9700            !strconcat(OpcodeStr#_.Suffix,
9701            "\t{$src2, ${dst} {${mask}}|${dst} {${mask}}, $src2}"),
9702            []>, EVEX, EVEX_K, EVEX_CD8<_.EltSize, CD8VT1>, Sched<[WriteLoad]>;
9703}
9704
9705multiclass avx512_gather_q_pd<bits<8> dopc, bits<8> qopc,
9706                        AVX512VLVectorVTInfo _, string OpcodeStr, string SUFF> {
9707  defm NAME#D#SUFF#Z: avx512_gather<dopc, OpcodeStr#"d", _.info512,
9708                                      vy512xmem>, EVEX_V512, VEX_W;
9709  defm NAME#Q#SUFF#Z: avx512_gather<qopc, OpcodeStr#"q", _.info512,
9710                                      vz512mem>, EVEX_V512, VEX_W;
9711let Predicates = [HasVLX] in {
9712  defm NAME#D#SUFF#Z256: avx512_gather<dopc, OpcodeStr#"d", _.info256,
9713                              vx256xmem>, EVEX_V256, VEX_W;
9714  defm NAME#Q#SUFF#Z256: avx512_gather<qopc, OpcodeStr#"q", _.info256,
9715                              vy256xmem>, EVEX_V256, VEX_W;
9716  defm NAME#D#SUFF#Z128: avx512_gather<dopc, OpcodeStr#"d", _.info128,
9717                              vx128xmem>, EVEX_V128, VEX_W;
9718  defm NAME#Q#SUFF#Z128: avx512_gather<qopc, OpcodeStr#"q", _.info128,
9719                              vx128xmem>, EVEX_V128, VEX_W;
9720}
9721}
9722
9723multiclass avx512_gather_d_ps<bits<8> dopc, bits<8> qopc,
9724                       AVX512VLVectorVTInfo _, string OpcodeStr, string SUFF> {
9725  defm NAME#D#SUFF#Z: avx512_gather<dopc, OpcodeStr#"d", _.info512, vz512mem>,
9726                                       EVEX_V512;
9727  defm NAME#Q#SUFF#Z: avx512_gather<qopc, OpcodeStr#"q", _.info256, vz256mem>,
9728                                       EVEX_V512;
9729let Predicates = [HasVLX] in {
9730  defm NAME#D#SUFF#Z256: avx512_gather<dopc, OpcodeStr#"d", _.info256,
9731                                          vy256xmem>, EVEX_V256;
9732  defm NAME#Q#SUFF#Z256: avx512_gather<qopc, OpcodeStr#"q", _.info128,
9733                                          vy128xmem>, EVEX_V256;
9734  defm NAME#D#SUFF#Z128: avx512_gather<dopc, OpcodeStr#"d", _.info128,
9735                                          vx128xmem>, EVEX_V128;
9736  defm NAME#Q#SUFF#Z128: avx512_gather<qopc, OpcodeStr#"q", _.info128,
9737                                          vx64xmem, VK2WM>, EVEX_V128;
9738}
9739}
9740
9741
9742defm VGATHER : avx512_gather_q_pd<0x92, 0x93, avx512vl_f64_info, "vgather", "PD">,
9743               avx512_gather_d_ps<0x92, 0x93, avx512vl_f32_info, "vgather", "PS">;
9744
9745defm VPGATHER : avx512_gather_q_pd<0x90, 0x91, avx512vl_i64_info, "vpgather", "Q">,
9746                avx512_gather_d_ps<0x90, 0x91, avx512vl_i32_info, "vpgather", "D">;
9747
9748multiclass avx512_scatter<bits<8> opc, string OpcodeStr, X86VectorVTInfo _,
9749                          X86MemOperand memop, RegisterClass MaskRC = _.KRCWM> {
9750
9751let mayStore = 1, Constraints = "$mask = $mask_wb", ExeDomain = _.ExeDomain,
9752    hasSideEffects = 0 in
9753
9754  def mr  : AVX5128I<opc, MRMDestMem, (outs MaskRC:$mask_wb),
9755            (ins memop:$dst, MaskRC:$mask, _.RC:$src),
9756            !strconcat(OpcodeStr#_.Suffix,
9757            "\t{$src, ${dst} {${mask}}|${dst} {${mask}}, $src}"),
9758            []>, EVEX, EVEX_K, EVEX_CD8<_.EltSize, CD8VT1>,
9759            Sched<[WriteStore]>;
9760}
9761
9762multiclass avx512_scatter_q_pd<bits<8> dopc, bits<8> qopc,
9763                        AVX512VLVectorVTInfo _, string OpcodeStr, string SUFF> {
9764  defm NAME#D#SUFF#Z: avx512_scatter<dopc, OpcodeStr#"d", _.info512,
9765                                      vy512xmem>, EVEX_V512, VEX_W;
9766  defm NAME#Q#SUFF#Z: avx512_scatter<qopc, OpcodeStr#"q", _.info512,
9767                                      vz512mem>, EVEX_V512, VEX_W;
9768let Predicates = [HasVLX] in {
9769  defm NAME#D#SUFF#Z256: avx512_scatter<dopc, OpcodeStr#"d", _.info256,
9770                              vx256xmem>, EVEX_V256, VEX_W;
9771  defm NAME#Q#SUFF#Z256: avx512_scatter<qopc, OpcodeStr#"q", _.info256,
9772                              vy256xmem>, EVEX_V256, VEX_W;
9773  defm NAME#D#SUFF#Z128: avx512_scatter<dopc, OpcodeStr#"d", _.info128,
9774                              vx128xmem>, EVEX_V128, VEX_W;
9775  defm NAME#Q#SUFF#Z128: avx512_scatter<qopc, OpcodeStr#"q", _.info128,
9776                              vx128xmem>, EVEX_V128, VEX_W;
9777}
9778}
9779
9780multiclass avx512_scatter_d_ps<bits<8> dopc, bits<8> qopc,
9781                       AVX512VLVectorVTInfo _, string OpcodeStr, string SUFF> {
9782  defm NAME#D#SUFF#Z: avx512_scatter<dopc, OpcodeStr#"d", _.info512, vz512mem>,
9783                                       EVEX_V512;
9784  defm NAME#Q#SUFF#Z: avx512_scatter<qopc, OpcodeStr#"q", _.info256, vz256mem>,
9785                                       EVEX_V512;
9786let Predicates = [HasVLX] in {
9787  defm NAME#D#SUFF#Z256: avx512_scatter<dopc, OpcodeStr#"d", _.info256,
9788                                          vy256xmem>, EVEX_V256;
9789  defm NAME#Q#SUFF#Z256: avx512_scatter<qopc, OpcodeStr#"q", _.info128,
9790                                          vy128xmem>, EVEX_V256;
9791  defm NAME#D#SUFF#Z128: avx512_scatter<dopc, OpcodeStr#"d", _.info128,
9792                                          vx128xmem>, EVEX_V128;
9793  defm NAME#Q#SUFF#Z128: avx512_scatter<qopc, OpcodeStr#"q", _.info128,
9794                                          vx64xmem, VK2WM>, EVEX_V128;
9795}
9796}
9797
9798defm VSCATTER : avx512_scatter_q_pd<0xA2, 0xA3, avx512vl_f64_info, "vscatter", "PD">,
9799               avx512_scatter_d_ps<0xA2, 0xA3, avx512vl_f32_info, "vscatter", "PS">;
9800
9801defm VPSCATTER : avx512_scatter_q_pd<0xA0, 0xA1, avx512vl_i64_info, "vpscatter", "Q">,
9802                avx512_scatter_d_ps<0xA0, 0xA1, avx512vl_i32_info, "vpscatter", "D">;
9803
9804// prefetch
9805multiclass avx512_gather_scatter_prefetch<bits<8> opc, Format F, string OpcodeStr,
9806                       RegisterClass KRC, X86MemOperand memop> {
9807  let Predicates = [HasPFI], mayLoad = 1, mayStore = 1 in
9808  def m  : AVX5128I<opc, F, (outs), (ins KRC:$mask, memop:$src),
9809            !strconcat(OpcodeStr, "\t{$src {${mask}}|{${mask}}, $src}"), []>,
9810            EVEX, EVEX_K, Sched<[WriteLoad]>;
9811}
9812
9813defm VGATHERPF0DPS: avx512_gather_scatter_prefetch<0xC6, MRM1m, "vgatherpf0dps",
9814                     VK16WM, vz512mem>, EVEX_V512, EVEX_CD8<32, CD8VT1>;
9815
9816defm VGATHERPF0QPS: avx512_gather_scatter_prefetch<0xC7, MRM1m, "vgatherpf0qps",
9817                     VK8WM, vz256mem>, EVEX_V512, EVEX_CD8<64, CD8VT1>;
9818
9819defm VGATHERPF0DPD: avx512_gather_scatter_prefetch<0xC6, MRM1m, "vgatherpf0dpd",
9820                     VK8WM, vy512xmem>, EVEX_V512, VEX_W, EVEX_CD8<32, CD8VT1>;
9821
9822defm VGATHERPF0QPD: avx512_gather_scatter_prefetch<0xC7, MRM1m, "vgatherpf0qpd",
9823                     VK8WM, vz512mem>, EVEX_V512, VEX_W, EVEX_CD8<64, CD8VT1>;
9824
9825defm VGATHERPF1DPS: avx512_gather_scatter_prefetch<0xC6, MRM2m, "vgatherpf1dps",
9826                     VK16WM, vz512mem>, EVEX_V512, EVEX_CD8<32, CD8VT1>;
9827
9828defm VGATHERPF1QPS: avx512_gather_scatter_prefetch<0xC7, MRM2m, "vgatherpf1qps",
9829                     VK8WM, vz256mem>, EVEX_V512, EVEX_CD8<64, CD8VT1>;
9830
9831defm VGATHERPF1DPD: avx512_gather_scatter_prefetch<0xC6, MRM2m, "vgatherpf1dpd",
9832                     VK8WM, vy512xmem>, EVEX_V512, VEX_W, EVEX_CD8<32, CD8VT1>;
9833
9834defm VGATHERPF1QPD: avx512_gather_scatter_prefetch<0xC7, MRM2m, "vgatherpf1qpd",
9835                     VK8WM, vz512mem>, EVEX_V512, VEX_W, EVEX_CD8<64, CD8VT1>;
9836
9837defm VSCATTERPF0DPS: avx512_gather_scatter_prefetch<0xC6, MRM5m, "vscatterpf0dps",
9838                     VK16WM, vz512mem>, EVEX_V512, EVEX_CD8<32, CD8VT1>;
9839
9840defm VSCATTERPF0QPS: avx512_gather_scatter_prefetch<0xC7, MRM5m, "vscatterpf0qps",
9841                     VK8WM, vz256mem>, EVEX_V512, EVEX_CD8<64, CD8VT1>;
9842
9843defm VSCATTERPF0DPD: avx512_gather_scatter_prefetch<0xC6, MRM5m, "vscatterpf0dpd",
9844                     VK8WM, vy512xmem>, EVEX_V512, VEX_W, EVEX_CD8<32, CD8VT1>;
9845
9846defm VSCATTERPF0QPD: avx512_gather_scatter_prefetch<0xC7, MRM5m, "vscatterpf0qpd",
9847                     VK8WM, vz512mem>, EVEX_V512, VEX_W, EVEX_CD8<64, CD8VT1>;
9848
9849defm VSCATTERPF1DPS: avx512_gather_scatter_prefetch<0xC6, MRM6m, "vscatterpf1dps",
9850                     VK16WM, vz512mem>, EVEX_V512, EVEX_CD8<32, CD8VT1>;
9851
9852defm VSCATTERPF1QPS: avx512_gather_scatter_prefetch<0xC7, MRM6m, "vscatterpf1qps",
9853                     VK8WM, vz256mem>, EVEX_V512, EVEX_CD8<64, CD8VT1>;
9854
9855defm VSCATTERPF1DPD: avx512_gather_scatter_prefetch<0xC6, MRM6m, "vscatterpf1dpd",
9856                     VK8WM, vy512xmem>, EVEX_V512, VEX_W, EVEX_CD8<32, CD8VT1>;
9857
9858defm VSCATTERPF1QPD: avx512_gather_scatter_prefetch<0xC7, MRM6m, "vscatterpf1qpd",
9859                     VK8WM, vz512mem>, EVEX_V512, VEX_W, EVEX_CD8<64, CD8VT1>;
9860
9861multiclass cvt_by_vec_width<bits<8> opc, X86VectorVTInfo Vec, string OpcodeStr > {
9862def rr : AVX512XS8I<opc, MRMSrcReg, (outs Vec.RC:$dst), (ins Vec.KRC:$src),
9863                  !strconcat(OpcodeStr#Vec.Suffix, "\t{$src, $dst|$dst, $src}"),
9864                  [(set Vec.RC:$dst, (Vec.VT (sext Vec.KRC:$src)))]>,
9865                  EVEX, Sched<[WriteMove]>; // TODO - WriteVecTrunc?
9866}
9867
9868multiclass cvt_mask_by_elt_width<bits<8> opc, AVX512VLVectorVTInfo VTInfo,
9869                                 string OpcodeStr, Predicate prd> {
9870let Predicates = [prd] in
9871  defm Z : cvt_by_vec_width<opc, VTInfo.info512, OpcodeStr>, EVEX_V512;
9872
9873  let Predicates = [prd, HasVLX] in {
9874    defm Z256 : cvt_by_vec_width<opc, VTInfo.info256, OpcodeStr>, EVEX_V256;
9875    defm Z128 : cvt_by_vec_width<opc, VTInfo.info128, OpcodeStr>, EVEX_V128;
9876  }
9877}
9878
9879defm VPMOVM2B : cvt_mask_by_elt_width<0x28, avx512vl_i8_info, "vpmovm2" , HasBWI>;
9880defm VPMOVM2W : cvt_mask_by_elt_width<0x28, avx512vl_i16_info, "vpmovm2", HasBWI> , VEX_W;
9881defm VPMOVM2D : cvt_mask_by_elt_width<0x38, avx512vl_i32_info, "vpmovm2", HasDQI>;
9882defm VPMOVM2Q : cvt_mask_by_elt_width<0x38, avx512vl_i64_info, "vpmovm2", HasDQI> , VEX_W;
9883
9884multiclass convert_vector_to_mask_common<bits<8> opc, X86VectorVTInfo _, string OpcodeStr > {
9885    def rr : AVX512XS8I<opc, MRMSrcReg, (outs _.KRC:$dst), (ins _.RC:$src),
9886                        !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
9887                        [(set _.KRC:$dst, (X86pcmpgtm _.ImmAllZerosV, (_.VT _.RC:$src)))]>,
9888                        EVEX, Sched<[WriteMove]>;
9889}
9890
9891// Use 512bit version to implement 128/256 bit in case NoVLX.
9892multiclass convert_vector_to_mask_lowering<X86VectorVTInfo ExtendInfo,
9893                                           X86VectorVTInfo _,
9894                                           string Name> {
9895
9896  def : Pat<(_.KVT (X86pcmpgtm _.ImmAllZerosV, (_.VT _.RC:$src))),
9897            (_.KVT (COPY_TO_REGCLASS
9898                     (!cast<Instruction>(Name#"Zrr")
9899                       (INSERT_SUBREG (ExtendInfo.VT (IMPLICIT_DEF)),
9900                                      _.RC:$src, _.SubRegIdx)),
9901                   _.KRC))>;
9902}
9903
9904multiclass avx512_convert_vector_to_mask<bits<8> opc, string OpcodeStr,
9905                                   AVX512VLVectorVTInfo VTInfo, Predicate prd> {
9906  let Predicates = [prd] in
9907    defm Z : convert_vector_to_mask_common <opc, VTInfo.info512, OpcodeStr>,
9908                                            EVEX_V512;
9909
9910  let Predicates = [prd, HasVLX] in {
9911    defm Z256 : convert_vector_to_mask_common<opc, VTInfo.info256, OpcodeStr>,
9912                                              EVEX_V256;
9913    defm Z128 : convert_vector_to_mask_common<opc, VTInfo.info128, OpcodeStr>,
9914                                               EVEX_V128;
9915  }
9916  let Predicates = [prd, NoVLX] in {
9917    defm Z256_Alt : convert_vector_to_mask_lowering<VTInfo.info512, VTInfo.info256, NAME>;
9918    defm Z128_Alt : convert_vector_to_mask_lowering<VTInfo.info512, VTInfo.info128, NAME>;
9919  }
9920}
9921
9922defm VPMOVB2M : avx512_convert_vector_to_mask<0x29, "vpmovb2m",
9923                                              avx512vl_i8_info, HasBWI>;
9924defm VPMOVW2M : avx512_convert_vector_to_mask<0x29, "vpmovw2m",
9925                                              avx512vl_i16_info, HasBWI>, VEX_W;
9926defm VPMOVD2M : avx512_convert_vector_to_mask<0x39, "vpmovd2m",
9927                                              avx512vl_i32_info, HasDQI>;
9928defm VPMOVQ2M : avx512_convert_vector_to_mask<0x39, "vpmovq2m",
9929                                              avx512vl_i64_info, HasDQI>, VEX_W;
9930
9931// Patterns for handling sext from a mask register to v16i8/v16i16 when DQI
9932// is available, but BWI is not. We can't handle this in lowering because
9933// a target independent DAG combine likes to combine sext and trunc.
9934let Predicates = [HasDQI, NoBWI] in {
9935  def : Pat<(v16i8 (sext (v16i1 VK16:$src))),
9936            (VPMOVDBZrr (v16i32 (VPMOVM2DZrr VK16:$src)))>;
9937  def : Pat<(v16i16 (sext (v16i1 VK16:$src))),
9938            (VPMOVDWZrr (v16i32 (VPMOVM2DZrr VK16:$src)))>;
9939}
9940
9941let Predicates = [HasDQI, NoBWI, HasVLX] in {
9942  def : Pat<(v8i16 (sext (v8i1 VK8:$src))),
9943            (VPMOVDWZ256rr (v8i32 (VPMOVM2DZ256rr VK8:$src)))>;
9944}
9945
9946//===----------------------------------------------------------------------===//
9947// AVX-512 - COMPRESS and EXPAND
9948//
9949
9950multiclass compress_by_vec_width_common<bits<8> opc, X86VectorVTInfo _,
9951                                 string OpcodeStr, X86FoldableSchedWrite sched> {
9952  defm rr : AVX512_maskable<opc, MRMDestReg, _, (outs _.RC:$dst),
9953              (ins _.RC:$src1), OpcodeStr, "$src1", "$src1",
9954              (null_frag)>, AVX5128IBase,
9955              Sched<[sched]>;
9956
9957  let mayStore = 1, hasSideEffects = 0 in
9958  def mr : AVX5128I<opc, MRMDestMem, (outs),
9959              (ins _.MemOp:$dst, _.RC:$src),
9960              OpcodeStr # "\t{$src, $dst|$dst, $src}",
9961              []>, EVEX_CD8<_.EltSize, CD8VT1>,
9962              Sched<[sched.Folded]>;
9963
9964  def mrk : AVX5128I<opc, MRMDestMem, (outs),
9965              (ins _.MemOp:$dst, _.KRCWM:$mask, _.RC:$src),
9966              OpcodeStr # "\t{$src, $dst {${mask}}|$dst {${mask}}, $src}",
9967              []>,
9968              EVEX_K, EVEX_CD8<_.EltSize, CD8VT1>,
9969              Sched<[sched.Folded]>;
9970}
9971
9972multiclass compress_by_vec_width_lowering<X86VectorVTInfo _, string Name> {
9973  def : Pat<(X86mCompressingStore (_.VT _.RC:$src), addr:$dst, _.KRCWM:$mask),
9974            (!cast<Instruction>(Name#_.ZSuffix#mrk)
9975                            addr:$dst, _.KRCWM:$mask, _.RC:$src)>;
9976
9977  def : Pat<(X86compress (_.VT _.RC:$src), _.RC:$src0, _.KRCWM:$mask),
9978            (!cast<Instruction>(Name#_.ZSuffix#rrk)
9979                            _.RC:$src0, _.KRCWM:$mask, _.RC:$src)>;
9980  def : Pat<(X86compress (_.VT _.RC:$src), _.ImmAllZerosV, _.KRCWM:$mask),
9981            (!cast<Instruction>(Name#_.ZSuffix#rrkz)
9982                            _.KRCWM:$mask, _.RC:$src)>;
9983}
9984
9985multiclass compress_by_elt_width<bits<8> opc, string OpcodeStr,
9986                                 X86FoldableSchedWrite sched,
9987                                 AVX512VLVectorVTInfo VTInfo,
9988                                 Predicate Pred = HasAVX512> {
9989  let Predicates = [Pred] in
9990  defm Z : compress_by_vec_width_common<opc, VTInfo.info512, OpcodeStr, sched>,
9991           compress_by_vec_width_lowering<VTInfo.info512, NAME>, EVEX_V512;
9992
9993  let Predicates = [Pred, HasVLX] in {
9994    defm Z256 : compress_by_vec_width_common<opc, VTInfo.info256, OpcodeStr, sched>,
9995                compress_by_vec_width_lowering<VTInfo.info256, NAME>, EVEX_V256;
9996    defm Z128 : compress_by_vec_width_common<opc, VTInfo.info128, OpcodeStr, sched>,
9997                compress_by_vec_width_lowering<VTInfo.info128, NAME>, EVEX_V128;
9998  }
9999}
10000
10001// FIXME: Is there a better scheduler class for VPCOMPRESS?
10002defm VPCOMPRESSD : compress_by_elt_width <0x8B, "vpcompressd", WriteVarShuffle256,
10003                                          avx512vl_i32_info>, EVEX, NotMemoryFoldable;
10004defm VPCOMPRESSQ : compress_by_elt_width <0x8B, "vpcompressq", WriteVarShuffle256,
10005                                          avx512vl_i64_info>, EVEX, VEX_W, NotMemoryFoldable;
10006defm VCOMPRESSPS : compress_by_elt_width <0x8A, "vcompressps", WriteVarShuffle256,
10007                                          avx512vl_f32_info>, EVEX, NotMemoryFoldable;
10008defm VCOMPRESSPD : compress_by_elt_width <0x8A, "vcompresspd", WriteVarShuffle256,
10009                                          avx512vl_f64_info>, EVEX, VEX_W, NotMemoryFoldable;
10010
10011// expand
10012multiclass expand_by_vec_width<bits<8> opc, X86VectorVTInfo _,
10013                                 string OpcodeStr, X86FoldableSchedWrite sched> {
10014  defm rr : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
10015              (ins _.RC:$src1), OpcodeStr, "$src1", "$src1",
10016              (null_frag)>, AVX5128IBase,
10017              Sched<[sched]>;
10018
10019  defm rm : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
10020              (ins _.MemOp:$src1), OpcodeStr, "$src1", "$src1",
10021              (null_frag)>,
10022            AVX5128IBase, EVEX_CD8<_.EltSize, CD8VT1>,
10023            Sched<[sched.Folded, sched.ReadAfterFold]>;
10024}
10025
10026multiclass expand_by_vec_width_lowering<X86VectorVTInfo _, string Name> {
10027
10028  def : Pat<(_.VT (X86mExpandingLoad addr:$src, _.KRCWM:$mask, undef)),
10029            (!cast<Instruction>(Name#_.ZSuffix#rmkz)
10030                                        _.KRCWM:$mask, addr:$src)>;
10031
10032  def : Pat<(_.VT (X86mExpandingLoad addr:$src, _.KRCWM:$mask, _.ImmAllZerosV)),
10033            (!cast<Instruction>(Name#_.ZSuffix#rmkz)
10034                                        _.KRCWM:$mask, addr:$src)>;
10035
10036  def : Pat<(_.VT (X86mExpandingLoad addr:$src, _.KRCWM:$mask,
10037                                               (_.VT _.RC:$src0))),
10038            (!cast<Instruction>(Name#_.ZSuffix#rmk)
10039                            _.RC:$src0, _.KRCWM:$mask, addr:$src)>;
10040
10041  def : Pat<(X86expand (_.VT _.RC:$src), _.RC:$src0, _.KRCWM:$mask),
10042            (!cast<Instruction>(Name#_.ZSuffix#rrk)
10043                            _.RC:$src0, _.KRCWM:$mask, _.RC:$src)>;
10044  def : Pat<(X86expand (_.VT _.RC:$src), _.ImmAllZerosV, _.KRCWM:$mask),
10045            (!cast<Instruction>(Name#_.ZSuffix#rrkz)
10046                            _.KRCWM:$mask, _.RC:$src)>;
10047}
10048
10049multiclass expand_by_elt_width<bits<8> opc, string OpcodeStr,
10050                               X86FoldableSchedWrite sched,
10051                               AVX512VLVectorVTInfo VTInfo,
10052                               Predicate Pred = HasAVX512> {
10053  let Predicates = [Pred] in
10054  defm Z : expand_by_vec_width<opc, VTInfo.info512, OpcodeStr, sched>,
10055           expand_by_vec_width_lowering<VTInfo.info512, NAME>, EVEX_V512;
10056
10057  let Predicates = [Pred, HasVLX] in {
10058    defm Z256 : expand_by_vec_width<opc, VTInfo.info256, OpcodeStr, sched>,
10059                expand_by_vec_width_lowering<VTInfo.info256, NAME>, EVEX_V256;
10060    defm Z128 : expand_by_vec_width<opc, VTInfo.info128, OpcodeStr, sched>,
10061                expand_by_vec_width_lowering<VTInfo.info128, NAME>, EVEX_V128;
10062  }
10063}
10064
10065// FIXME: Is there a better scheduler class for VPEXPAND?
10066defm VPEXPANDD : expand_by_elt_width <0x89, "vpexpandd", WriteVarShuffle256,
10067                                      avx512vl_i32_info>, EVEX;
10068defm VPEXPANDQ : expand_by_elt_width <0x89, "vpexpandq", WriteVarShuffle256,
10069                                      avx512vl_i64_info>, EVEX, VEX_W;
10070defm VEXPANDPS : expand_by_elt_width <0x88, "vexpandps", WriteVarShuffle256,
10071                                      avx512vl_f32_info>, EVEX;
10072defm VEXPANDPD : expand_by_elt_width <0x88, "vexpandpd", WriteVarShuffle256,
10073                                      avx512vl_f64_info>, EVEX, VEX_W;
10074
10075//handle instruction  reg_vec1 = op(reg_vec,imm)
10076//                               op(mem_vec,imm)
10077//                               op(broadcast(eltVt),imm)
10078//all instruction created with FROUND_CURRENT
10079multiclass avx512_unary_fp_packed_imm<bits<8> opc, string OpcodeStr,
10080                                      SDNode OpNode, SDNode MaskOpNode,
10081                                      X86FoldableSchedWrite sched,
10082                                      X86VectorVTInfo _> {
10083  let ExeDomain = _.ExeDomain, Uses = [MXCSR], mayRaiseFPException = 1 in {
10084  defm rri : AVX512_maskable_split<opc, MRMSrcReg, _, (outs _.RC:$dst),
10085                      (ins _.RC:$src1, i32u8imm:$src2),
10086                      OpcodeStr#_.Suffix, "$src2, $src1", "$src1, $src2",
10087                      (OpNode (_.VT _.RC:$src1), (i32 timm:$src2)),
10088                      (MaskOpNode (_.VT _.RC:$src1), (i32 timm:$src2))>,
10089                      Sched<[sched]>;
10090  defm rmi : AVX512_maskable_split<opc, MRMSrcMem, _, (outs _.RC:$dst),
10091                    (ins _.MemOp:$src1, i32u8imm:$src2),
10092                    OpcodeStr#_.Suffix, "$src2, $src1", "$src1, $src2",
10093                    (OpNode (_.VT (bitconvert (_.LdFrag addr:$src1))),
10094                            (i32 timm:$src2)),
10095                    (MaskOpNode (_.VT (bitconvert (_.LdFrag addr:$src1))),
10096                                (i32 timm:$src2))>,
10097                    Sched<[sched.Folded, sched.ReadAfterFold]>;
10098  defm rmbi : AVX512_maskable_split<opc, MRMSrcMem, _, (outs _.RC:$dst),
10099                    (ins _.ScalarMemOp:$src1, i32u8imm:$src2),
10100                    OpcodeStr#_.Suffix, "$src2, ${src1}"#_.BroadcastStr,
10101                    "${src1}"#_.BroadcastStr#", $src2",
10102                    (OpNode (_.VT (_.BroadcastLdFrag addr:$src1)),
10103                            (i32 timm:$src2)),
10104                    (MaskOpNode (_.VT (_.BroadcastLdFrag addr:$src1)),
10105                                (i32 timm:$src2))>, EVEX_B,
10106                    Sched<[sched.Folded, sched.ReadAfterFold]>;
10107  }
10108}
10109
10110//handle instruction  reg_vec1 = op(reg_vec2,reg_vec3,imm),{sae}
10111multiclass avx512_unary_fp_sae_packed_imm<bits<8> opc, string OpcodeStr,
10112                                          SDNode OpNode, X86FoldableSchedWrite sched,
10113                                          X86VectorVTInfo _> {
10114  let ExeDomain = _.ExeDomain, Uses = [MXCSR] in
10115  defm rrib : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
10116                      (ins _.RC:$src1, i32u8imm:$src2),
10117                      OpcodeStr#_.Suffix, "$src2, {sae}, $src1",
10118                      "$src1, {sae}, $src2",
10119                      (OpNode (_.VT _.RC:$src1),
10120                              (i32 timm:$src2))>,
10121                      EVEX_B, Sched<[sched]>;
10122}
10123
10124multiclass avx512_common_unary_fp_sae_packed_imm<string OpcodeStr,
10125            AVX512VLVectorVTInfo _, bits<8> opc, SDNode OpNode,
10126            SDNode MaskOpNode, SDNode OpNodeSAE, X86SchedWriteWidths sched,
10127            Predicate prd>{
10128  let Predicates = [prd] in {
10129    defm Z    : avx512_unary_fp_packed_imm<opc, OpcodeStr, OpNode, MaskOpNode,
10130                                           sched.ZMM, _.info512>,
10131                avx512_unary_fp_sae_packed_imm<opc, OpcodeStr, OpNodeSAE,
10132                                               sched.ZMM, _.info512>, EVEX_V512;
10133  }
10134  let Predicates = [prd, HasVLX] in {
10135    defm Z128 : avx512_unary_fp_packed_imm<opc, OpcodeStr, OpNode, MaskOpNode,
10136                                           sched.XMM, _.info128>, EVEX_V128;
10137    defm Z256 : avx512_unary_fp_packed_imm<opc, OpcodeStr, OpNode, MaskOpNode,
10138                                           sched.YMM, _.info256>, EVEX_V256;
10139  }
10140}
10141
10142//handle instruction  reg_vec1 = op(reg_vec2,reg_vec3,imm)
10143//                               op(reg_vec2,mem_vec,imm)
10144//                               op(reg_vec2,broadcast(eltVt),imm)
10145//all instruction created with FROUND_CURRENT
10146multiclass avx512_fp_packed_imm<bits<8> opc, string OpcodeStr, SDNode OpNode,
10147                                X86FoldableSchedWrite sched, X86VectorVTInfo _>{
10148  let ExeDomain = _.ExeDomain, Uses = [MXCSR], mayRaiseFPException = 1 in {
10149  defm rri : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
10150                      (ins _.RC:$src1, _.RC:$src2, i32u8imm:$src3),
10151                      OpcodeStr, "$src3, $src2, $src1", "$src1, $src2, $src3",
10152                      (OpNode (_.VT _.RC:$src1),
10153                              (_.VT _.RC:$src2),
10154                              (i32 timm:$src3))>,
10155                      Sched<[sched]>;
10156  defm rmi : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
10157                    (ins _.RC:$src1, _.MemOp:$src2, i32u8imm:$src3),
10158                    OpcodeStr, "$src3, $src2, $src1", "$src1, $src2, $src3",
10159                    (OpNode (_.VT _.RC:$src1),
10160                            (_.VT (bitconvert (_.LdFrag addr:$src2))),
10161                            (i32 timm:$src3))>,
10162                    Sched<[sched.Folded, sched.ReadAfterFold]>;
10163  defm rmbi : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
10164                    (ins _.RC:$src1, _.ScalarMemOp:$src2, i32u8imm:$src3),
10165                    OpcodeStr, "$src3, ${src2}"#_.BroadcastStr#", $src1",
10166                    "$src1, ${src2}"#_.BroadcastStr#", $src3",
10167                    (OpNode (_.VT _.RC:$src1),
10168                            (_.VT (_.BroadcastLdFrag addr:$src2)),
10169                            (i32 timm:$src3))>, EVEX_B,
10170                    Sched<[sched.Folded, sched.ReadAfterFold]>;
10171  }
10172}
10173
10174//handle instruction  reg_vec1 = op(reg_vec2,reg_vec3,imm)
10175//                               op(reg_vec2,mem_vec,imm)
10176multiclass avx512_3Op_rm_imm8<bits<8> opc, string OpcodeStr, SDNode OpNode,
10177                              X86FoldableSchedWrite sched, X86VectorVTInfo DestInfo,
10178                              X86VectorVTInfo SrcInfo>{
10179  let ExeDomain = DestInfo.ExeDomain in {
10180  defm rri : AVX512_maskable<opc, MRMSrcReg, DestInfo, (outs DestInfo.RC:$dst),
10181                  (ins SrcInfo.RC:$src1, SrcInfo.RC:$src2, u8imm:$src3),
10182                  OpcodeStr, "$src3, $src2, $src1", "$src1, $src2, $src3",
10183                  (DestInfo.VT (OpNode (SrcInfo.VT SrcInfo.RC:$src1),
10184                               (SrcInfo.VT SrcInfo.RC:$src2),
10185                               (i8 timm:$src3)))>,
10186                  Sched<[sched]>;
10187  defm rmi : AVX512_maskable<opc, MRMSrcMem, DestInfo, (outs DestInfo.RC:$dst),
10188                (ins SrcInfo.RC:$src1, SrcInfo.MemOp:$src2, u8imm:$src3),
10189                OpcodeStr, "$src3, $src2, $src1", "$src1, $src2, $src3",
10190                (DestInfo.VT (OpNode (SrcInfo.VT SrcInfo.RC:$src1),
10191                             (SrcInfo.VT (bitconvert
10192                                                (SrcInfo.LdFrag addr:$src2))),
10193                             (i8 timm:$src3)))>,
10194                Sched<[sched.Folded, sched.ReadAfterFold]>;
10195  }
10196}
10197
10198//handle instruction  reg_vec1 = op(reg_vec2,reg_vec3,imm)
10199//                               op(reg_vec2,mem_vec,imm)
10200//                               op(reg_vec2,broadcast(eltVt),imm)
10201multiclass avx512_3Op_imm8<bits<8> opc, string OpcodeStr, SDNode OpNode,
10202                           X86FoldableSchedWrite sched, X86VectorVTInfo _>:
10203  avx512_3Op_rm_imm8<opc, OpcodeStr, OpNode, sched, _, _>{
10204
10205  let ExeDomain = _.ExeDomain in
10206  defm rmbi : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
10207                    (ins _.RC:$src1, _.ScalarMemOp:$src2, u8imm:$src3),
10208                    OpcodeStr, "$src3, ${src2}"#_.BroadcastStr#", $src1",
10209                    "$src1, ${src2}"#_.BroadcastStr#", $src3",
10210                    (OpNode (_.VT _.RC:$src1),
10211                            (_.VT (_.BroadcastLdFrag addr:$src2)),
10212                            (i8 timm:$src3))>, EVEX_B,
10213                    Sched<[sched.Folded, sched.ReadAfterFold]>;
10214}
10215
10216//handle scalar instruction  reg_vec1 = op(reg_vec2,reg_vec3,imm)
10217//                                      op(reg_vec2,mem_scalar,imm)
10218multiclass avx512_fp_scalar_imm<bits<8> opc, string OpcodeStr, SDNode OpNode,
10219                                X86FoldableSchedWrite sched, X86VectorVTInfo _> {
10220  let ExeDomain = _.ExeDomain, Uses = [MXCSR], mayRaiseFPException = 1 in {
10221  defm rri : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
10222                      (ins _.RC:$src1, _.RC:$src2, i32u8imm:$src3),
10223                      OpcodeStr, "$src3, $src2, $src1", "$src1, $src2, $src3",
10224                      (OpNode (_.VT _.RC:$src1),
10225                              (_.VT _.RC:$src2),
10226                              (i32 timm:$src3))>,
10227                      Sched<[sched]>;
10228  defm rmi : AVX512_maskable_scalar<opc, MRMSrcMem, _, (outs _.RC:$dst),
10229                    (ins _.RC:$src1, _.IntScalarMemOp:$src2, i32u8imm:$src3),
10230                    OpcodeStr, "$src3, $src2, $src1", "$src1, $src2, $src3",
10231                    (OpNode (_.VT _.RC:$src1),
10232                            (_.ScalarIntMemFrags addr:$src2),
10233                            (i32 timm:$src3))>,
10234                    Sched<[sched.Folded, sched.ReadAfterFold]>;
10235  }
10236}
10237
10238//handle instruction  reg_vec1 = op(reg_vec2,reg_vec3,imm),{sae}
10239multiclass avx512_fp_sae_packed_imm<bits<8> opc, string OpcodeStr,
10240                                    SDNode OpNode, X86FoldableSchedWrite sched,
10241                                    X86VectorVTInfo _> {
10242  let ExeDomain = _.ExeDomain, Uses = [MXCSR] in
10243  defm rrib : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
10244                      (ins _.RC:$src1, _.RC:$src2, i32u8imm:$src3),
10245                      OpcodeStr, "$src3, {sae}, $src2, $src1",
10246                      "$src1, $src2, {sae}, $src3",
10247                      (OpNode (_.VT _.RC:$src1),
10248                              (_.VT _.RC:$src2),
10249                              (i32 timm:$src3))>,
10250                      EVEX_B, Sched<[sched]>;
10251}
10252
10253//handle scalar instruction  reg_vec1 = op(reg_vec2,reg_vec3,imm),{sae}
10254multiclass avx512_fp_sae_scalar_imm<bits<8> opc, string OpcodeStr, SDNode OpNode,
10255                                    X86FoldableSchedWrite sched, X86VectorVTInfo _> {
10256  let ExeDomain = _.ExeDomain, Uses = [MXCSR] in
10257  defm NAME#rrib : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
10258                      (ins _.RC:$src1, _.RC:$src2, i32u8imm:$src3),
10259                      OpcodeStr, "$src3, {sae}, $src2, $src1",
10260                      "$src1, $src2, {sae}, $src3",
10261                      (OpNode (_.VT _.RC:$src1),
10262                              (_.VT _.RC:$src2),
10263                              (i32 timm:$src3))>,
10264                      EVEX_B, Sched<[sched]>;
10265}
10266
10267multiclass avx512_common_fp_sae_packed_imm<string OpcodeStr,
10268            AVX512VLVectorVTInfo _, bits<8> opc, SDNode OpNode,
10269            SDNode OpNodeSAE, X86SchedWriteWidths sched, Predicate prd>{
10270  let Predicates = [prd] in {
10271    defm Z    : avx512_fp_packed_imm<opc, OpcodeStr, OpNode, sched.ZMM, _.info512>,
10272                avx512_fp_sae_packed_imm<opc, OpcodeStr, OpNodeSAE, sched.ZMM, _.info512>,
10273                                  EVEX_V512;
10274
10275  }
10276  let Predicates = [prd, HasVLX] in {
10277    defm Z128 : avx512_fp_packed_imm<opc, OpcodeStr, OpNode, sched.XMM, _.info128>,
10278                                  EVEX_V128;
10279    defm Z256 : avx512_fp_packed_imm<opc, OpcodeStr, OpNode, sched.YMM, _.info256>,
10280                                  EVEX_V256;
10281  }
10282}
10283
10284multiclass avx512_common_3Op_rm_imm8<bits<8> opc, SDNode OpNode, string OpStr,
10285                   X86SchedWriteWidths sched, AVX512VLVectorVTInfo DestInfo,
10286                   AVX512VLVectorVTInfo SrcInfo, Predicate Pred = HasBWI> {
10287  let Predicates = [Pred] in {
10288    defm Z    : avx512_3Op_rm_imm8<opc, OpStr, OpNode, sched.ZMM, DestInfo.info512,
10289                           SrcInfo.info512>, EVEX_V512, AVX512AIi8Base, EVEX_4V;
10290  }
10291  let Predicates = [Pred, HasVLX] in {
10292    defm Z128 : avx512_3Op_rm_imm8<opc, OpStr, OpNode, sched.XMM, DestInfo.info128,
10293                           SrcInfo.info128>, EVEX_V128, AVX512AIi8Base, EVEX_4V;
10294    defm Z256 : avx512_3Op_rm_imm8<opc, OpStr, OpNode, sched.YMM, DestInfo.info256,
10295                           SrcInfo.info256>, EVEX_V256, AVX512AIi8Base, EVEX_4V;
10296  }
10297}
10298
10299multiclass avx512_common_3Op_imm8<string OpcodeStr, AVX512VLVectorVTInfo _,
10300                                  bits<8> opc, SDNode OpNode, X86SchedWriteWidths sched,
10301                                  Predicate Pred = HasAVX512> {
10302  let Predicates = [Pred] in {
10303    defm Z    : avx512_3Op_imm8<opc, OpcodeStr, OpNode, sched.ZMM, _.info512>,
10304                                EVEX_V512;
10305  }
10306  let Predicates = [Pred, HasVLX] in {
10307    defm Z128 : avx512_3Op_imm8<opc, OpcodeStr, OpNode, sched.XMM, _.info128>,
10308                                EVEX_V128;
10309    defm Z256 : avx512_3Op_imm8<opc, OpcodeStr, OpNode, sched.YMM, _.info256>,
10310                                EVEX_V256;
10311  }
10312}
10313
10314multiclass avx512_common_fp_sae_scalar_imm<string OpcodeStr,
10315                  X86VectorVTInfo _, bits<8> opc, SDNode OpNode,
10316                  SDNode OpNodeSAE, X86SchedWriteWidths sched, Predicate prd> {
10317  let Predicates = [prd] in {
10318     defm Z : avx512_fp_scalar_imm<opc, OpcodeStr, OpNode, sched.XMM, _>,
10319              avx512_fp_sae_scalar_imm<opc, OpcodeStr, OpNodeSAE, sched.XMM, _>;
10320  }
10321}
10322
10323multiclass avx512_common_unary_fp_sae_packed_imm_all<string OpcodeStr,
10324                    bits<8> opcPs, bits<8> opcPd, SDNode OpNode,
10325                    SDNode MaskOpNode, SDNode OpNodeSAE,
10326                    X86SchedWriteWidths sched, Predicate prd>{
10327  defm PS : avx512_common_unary_fp_sae_packed_imm<OpcodeStr, avx512vl_f32_info,
10328                            opcPs, OpNode, MaskOpNode, OpNodeSAE, sched, prd>,
10329                            EVEX_CD8<32, CD8VF>;
10330  defm PD : avx512_common_unary_fp_sae_packed_imm<OpcodeStr, avx512vl_f64_info,
10331                            opcPd, OpNode, MaskOpNode, OpNodeSAE, sched, prd>,
10332                            EVEX_CD8<64, CD8VF>, VEX_W;
10333}
10334
10335defm VREDUCE   : avx512_common_unary_fp_sae_packed_imm_all<"vreduce", 0x56, 0x56,
10336                              X86VReduce, X86VReduce, X86VReduceSAE,
10337                              SchedWriteFRnd, HasDQI>, AVX512AIi8Base, EVEX;
10338defm VRNDSCALE : avx512_common_unary_fp_sae_packed_imm_all<"vrndscale", 0x08, 0x09,
10339                              X86any_VRndScale, X86VRndScale, X86VRndScaleSAE,
10340                              SchedWriteFRnd, HasAVX512>,
10341                              AVX512AIi8Base, EVEX;
10342defm VGETMANT : avx512_common_unary_fp_sae_packed_imm_all<"vgetmant", 0x26, 0x26,
10343                              X86VGetMant, X86VGetMant, X86VGetMantSAE,
10344                              SchedWriteFRnd, HasAVX512>, AVX512AIi8Base, EVEX;
10345
10346defm VRANGEPD : avx512_common_fp_sae_packed_imm<"vrangepd", avx512vl_f64_info,
10347                                                0x50, X86VRange, X86VRangeSAE,
10348                                                SchedWriteFAdd, HasDQI>,
10349      AVX512AIi8Base, EVEX_4V, EVEX_CD8<64, CD8VF>, VEX_W;
10350defm VRANGEPS : avx512_common_fp_sae_packed_imm<"vrangeps", avx512vl_f32_info,
10351                                                0x50, X86VRange, X86VRangeSAE,
10352                                                SchedWriteFAdd, HasDQI>,
10353      AVX512AIi8Base, EVEX_4V, EVEX_CD8<32, CD8VF>;
10354
10355defm VRANGESD: avx512_common_fp_sae_scalar_imm<"vrangesd",
10356      f64x_info, 0x51, X86Ranges, X86RangesSAE, SchedWriteFAdd, HasDQI>,
10357      AVX512AIi8Base, VEX_LIG, EVEX_4V, EVEX_CD8<64, CD8VT1>, VEX_W;
10358defm VRANGESS: avx512_common_fp_sae_scalar_imm<"vrangess", f32x_info,
10359      0x51, X86Ranges, X86RangesSAE, SchedWriteFAdd, HasDQI>,
10360      AVX512AIi8Base, VEX_LIG, EVEX_4V, EVEX_CD8<32, CD8VT1>;
10361
10362defm VREDUCESD: avx512_common_fp_sae_scalar_imm<"vreducesd", f64x_info,
10363      0x57, X86Reduces, X86ReducesSAE, SchedWriteFRnd, HasDQI>,
10364      AVX512AIi8Base, VEX_LIG, EVEX_4V, EVEX_CD8<64, CD8VT1>, VEX_W;
10365defm VREDUCESS: avx512_common_fp_sae_scalar_imm<"vreducess", f32x_info,
10366      0x57, X86Reduces, X86ReducesSAE, SchedWriteFRnd, HasDQI>,
10367      AVX512AIi8Base, VEX_LIG, EVEX_4V, EVEX_CD8<32, CD8VT1>;
10368
10369defm VGETMANTSD: avx512_common_fp_sae_scalar_imm<"vgetmantsd", f64x_info,
10370      0x27, X86GetMants, X86GetMantsSAE, SchedWriteFRnd, HasAVX512>,
10371      AVX512AIi8Base, VEX_LIG, EVEX_4V, EVEX_CD8<64, CD8VT1>, VEX_W;
10372defm VGETMANTSS: avx512_common_fp_sae_scalar_imm<"vgetmantss", f32x_info,
10373      0x27, X86GetMants, X86GetMantsSAE, SchedWriteFRnd, HasAVX512>,
10374      AVX512AIi8Base, VEX_LIG, EVEX_4V, EVEX_CD8<32, CD8VT1>;
10375
10376multiclass avx512_shuff_packed_128_common<bits<8> opc, string OpcodeStr,
10377                                          X86FoldableSchedWrite sched,
10378                                          X86VectorVTInfo _,
10379                                          X86VectorVTInfo CastInfo,
10380                                          string EVEX2VEXOvrd> {
10381  let ExeDomain = _.ExeDomain in {
10382  defm rri : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
10383                  (ins _.RC:$src1, _.RC:$src2, u8imm:$src3),
10384                  OpcodeStr, "$src3, $src2, $src1", "$src1, $src2, $src3",
10385                  (_.VT (bitconvert
10386                         (CastInfo.VT (X86Shuf128 _.RC:$src1, _.RC:$src2,
10387                                                  (i8 timm:$src3)))))>,
10388                  Sched<[sched]>, EVEX2VEXOverride<EVEX2VEXOvrd#"rr">;
10389  defm rmi : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
10390                (ins _.RC:$src1, _.MemOp:$src2, u8imm:$src3),
10391                OpcodeStr, "$src3, $src2, $src1", "$src1, $src2, $src3",
10392                (_.VT
10393                 (bitconvert
10394                  (CastInfo.VT (X86Shuf128 _.RC:$src1,
10395                                           (CastInfo.LdFrag addr:$src2),
10396                                           (i8 timm:$src3)))))>,
10397                Sched<[sched.Folded, sched.ReadAfterFold]>,
10398                EVEX2VEXOverride<EVEX2VEXOvrd#"rm">;
10399  defm rmbi : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
10400                    (ins _.RC:$src1, _.ScalarMemOp:$src2, u8imm:$src3),
10401                    OpcodeStr, "$src3, ${src2}"#_.BroadcastStr#", $src1",
10402                    "$src1, ${src2}"#_.BroadcastStr#", $src3",
10403                    (_.VT
10404                     (bitconvert
10405                      (CastInfo.VT
10406                       (X86Shuf128 _.RC:$src1,
10407                                   (_.BroadcastLdFrag addr:$src2),
10408                                   (i8 timm:$src3)))))>, EVEX_B,
10409                    Sched<[sched.Folded, sched.ReadAfterFold]>;
10410  }
10411}
10412
10413multiclass avx512_shuff_packed_128<string OpcodeStr, X86FoldableSchedWrite sched,
10414                                   AVX512VLVectorVTInfo _,
10415                                   AVX512VLVectorVTInfo CastInfo, bits<8> opc,
10416                                   string EVEX2VEXOvrd>{
10417  let Predicates = [HasAVX512] in
10418  defm Z : avx512_shuff_packed_128_common<opc, OpcodeStr, sched,
10419                                          _.info512, CastInfo.info512, "">, EVEX_V512;
10420
10421  let Predicates = [HasAVX512, HasVLX] in
10422  defm Z256 : avx512_shuff_packed_128_common<opc, OpcodeStr, sched,
10423                                             _.info256, CastInfo.info256,
10424                                             EVEX2VEXOvrd>, EVEX_V256;
10425}
10426
10427defm VSHUFF32X4 : avx512_shuff_packed_128<"vshuff32x4", WriteFShuffle256,
10428      avx512vl_f32_info, avx512vl_f64_info, 0x23, "VPERM2F128">, AVX512AIi8Base, EVEX_4V, EVEX_CD8<32, CD8VF>;
10429defm VSHUFF64X2 : avx512_shuff_packed_128<"vshuff64x2", WriteFShuffle256,
10430      avx512vl_f64_info, avx512vl_f64_info, 0x23, "VPERM2F128">, AVX512AIi8Base, EVEX_4V, EVEX_CD8<64, CD8VF>, VEX_W;
10431defm VSHUFI32X4 : avx512_shuff_packed_128<"vshufi32x4", WriteFShuffle256,
10432      avx512vl_i32_info, avx512vl_i64_info, 0x43, "VPERM2I128">, AVX512AIi8Base, EVEX_4V, EVEX_CD8<32, CD8VF>;
10433defm VSHUFI64X2 : avx512_shuff_packed_128<"vshufi64x2", WriteFShuffle256,
10434      avx512vl_i64_info, avx512vl_i64_info, 0x43, "VPERM2I128">, AVX512AIi8Base, EVEX_4V, EVEX_CD8<64, CD8VF>, VEX_W;
10435
10436let Predicates = [HasAVX512] in {
10437// Provide fallback in case the load node that is used in the broadcast
10438// patterns above is used by additional users, which prevents the pattern
10439// selection.
10440def : Pat<(v8f64 (X86SubVBroadcast (v2f64 VR128X:$src))),
10441          (VSHUFF64X2Zrri (INSERT_SUBREG (v8f64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
10442                          (INSERT_SUBREG (v8f64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
10443                          0)>;
10444def : Pat<(v8i64 (X86SubVBroadcast (v2i64 VR128X:$src))),
10445          (VSHUFI64X2Zrri (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
10446                          (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
10447                          0)>;
10448
10449def : Pat<(v16f32 (X86SubVBroadcast (v4f32 VR128X:$src))),
10450          (VSHUFF32X4Zrri (INSERT_SUBREG (v16f32 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
10451                          (INSERT_SUBREG (v16f32 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
10452                          0)>;
10453def : Pat<(v16i32 (X86SubVBroadcast (v4i32 VR128X:$src))),
10454          (VSHUFI32X4Zrri (INSERT_SUBREG (v16i32 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
10455                          (INSERT_SUBREG (v16i32 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
10456                          0)>;
10457
10458def : Pat<(v32i16 (X86SubVBroadcast (v8i16 VR128X:$src))),
10459          (VSHUFI32X4Zrri (INSERT_SUBREG (v32i16 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
10460                          (INSERT_SUBREG (v32i16 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
10461                          0)>;
10462
10463def : Pat<(v64i8 (X86SubVBroadcast (v16i8 VR128X:$src))),
10464          (VSHUFI32X4Zrri (INSERT_SUBREG (v64i8 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
10465                          (INSERT_SUBREG (v64i8 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
10466                          0)>;
10467}
10468
10469multiclass avx512_valign<bits<8> opc, string OpcodeStr,
10470                         X86FoldableSchedWrite sched, X86VectorVTInfo _>{
10471  // NOTE: EVEX2VEXOverride changed back to Unset for 256-bit at the
10472  // instantiation of this class.
10473  let ExeDomain = _.ExeDomain in {
10474  defm rri : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
10475                  (ins _.RC:$src1, _.RC:$src2, u8imm:$src3),
10476                  OpcodeStr, "$src3, $src2, $src1", "$src1, $src2, $src3",
10477                  (_.VT (X86VAlign _.RC:$src1, _.RC:$src2, (i8 timm:$src3)))>,
10478                  Sched<[sched]>, EVEX2VEXOverride<"VPALIGNRrri">;
10479  defm rmi : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
10480                (ins _.RC:$src1, _.MemOp:$src2, u8imm:$src3),
10481                OpcodeStr, "$src3, $src2, $src1", "$src1, $src2, $src3",
10482                (_.VT (X86VAlign _.RC:$src1,
10483                                 (bitconvert (_.LdFrag addr:$src2)),
10484                                 (i8 timm:$src3)))>,
10485                Sched<[sched.Folded, sched.ReadAfterFold]>,
10486                EVEX2VEXOverride<"VPALIGNRrmi">;
10487
10488  defm rmbi : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
10489                   (ins _.RC:$src1, _.ScalarMemOp:$src2, u8imm:$src3),
10490                   OpcodeStr, "$src3, ${src2}"#_.BroadcastStr#", $src1",
10491                   "$src1, ${src2}"#_.BroadcastStr#", $src3",
10492                   (X86VAlign _.RC:$src1,
10493                              (_.VT (_.BroadcastLdFrag addr:$src2)),
10494                              (i8 timm:$src3))>, EVEX_B,
10495                   Sched<[sched.Folded, sched.ReadAfterFold]>;
10496  }
10497}
10498
10499multiclass avx512_valign_common<string OpcodeStr, X86SchedWriteWidths sched,
10500                                AVX512VLVectorVTInfo _> {
10501  let Predicates = [HasAVX512] in {
10502    defm Z    : avx512_valign<0x03, OpcodeStr, sched.ZMM, _.info512>,
10503                                AVX512AIi8Base, EVEX_4V, EVEX_V512;
10504  }
10505  let Predicates = [HasAVX512, HasVLX] in {
10506    defm Z128 : avx512_valign<0x03, OpcodeStr, sched.XMM, _.info128>,
10507                                AVX512AIi8Base, EVEX_4V, EVEX_V128;
10508    // We can't really override the 256-bit version so change it back to unset.
10509    let EVEX2VEXOverride = ? in
10510    defm Z256 : avx512_valign<0x03, OpcodeStr, sched.YMM, _.info256>,
10511                                AVX512AIi8Base, EVEX_4V, EVEX_V256;
10512  }
10513}
10514
10515defm VALIGND: avx512_valign_common<"valignd", SchedWriteShuffle,
10516                                   avx512vl_i32_info>, EVEX_CD8<32, CD8VF>;
10517defm VALIGNQ: avx512_valign_common<"valignq", SchedWriteShuffle,
10518                                   avx512vl_i64_info>, EVEX_CD8<64, CD8VF>,
10519                                   VEX_W;
10520
10521defm VPALIGNR: avx512_common_3Op_rm_imm8<0x0F, X86PAlignr, "vpalignr",
10522                                         SchedWriteShuffle, avx512vl_i8_info,
10523                                         avx512vl_i8_info>, EVEX_CD8<8, CD8VF>;
10524
10525// Fragments to help convert valignq into masked valignd. Or valignq/valignd
10526// into vpalignr.
10527def ValignqImm32XForm : SDNodeXForm<timm, [{
10528  return getI8Imm(N->getZExtValue() * 2, SDLoc(N));
10529}]>;
10530def ValignqImm8XForm : SDNodeXForm<timm, [{
10531  return getI8Imm(N->getZExtValue() * 8, SDLoc(N));
10532}]>;
10533def ValigndImm8XForm : SDNodeXForm<timm, [{
10534  return getI8Imm(N->getZExtValue() * 4, SDLoc(N));
10535}]>;
10536
10537multiclass avx512_vpalign_mask_lowering<string OpcodeStr, SDNode OpNode,
10538                                        X86VectorVTInfo From, X86VectorVTInfo To,
10539                                        SDNodeXForm ImmXForm> {
10540  def : Pat<(To.VT (vselect_mask To.KRCWM:$mask,
10541                                 (bitconvert
10542                                  (From.VT (OpNode From.RC:$src1, From.RC:$src2,
10543                                                   timm:$src3))),
10544                                 To.RC:$src0)),
10545            (!cast<Instruction>(OpcodeStr#"rrik") To.RC:$src0, To.KRCWM:$mask,
10546                                                  To.RC:$src1, To.RC:$src2,
10547                                                  (ImmXForm timm:$src3))>;
10548
10549  def : Pat<(To.VT (vselect_mask To.KRCWM:$mask,
10550                                 (bitconvert
10551                                  (From.VT (OpNode From.RC:$src1, From.RC:$src2,
10552                                                   timm:$src3))),
10553                                 To.ImmAllZerosV)),
10554            (!cast<Instruction>(OpcodeStr#"rrikz") To.KRCWM:$mask,
10555                                                   To.RC:$src1, To.RC:$src2,
10556                                                   (ImmXForm timm:$src3))>;
10557
10558  def : Pat<(To.VT (vselect_mask To.KRCWM:$mask,
10559                                 (bitconvert
10560                                  (From.VT (OpNode From.RC:$src1,
10561                                                   (From.LdFrag addr:$src2),
10562                                           timm:$src3))),
10563                                 To.RC:$src0)),
10564            (!cast<Instruction>(OpcodeStr#"rmik") To.RC:$src0, To.KRCWM:$mask,
10565                                                  To.RC:$src1, addr:$src2,
10566                                                  (ImmXForm timm:$src3))>;
10567
10568  def : Pat<(To.VT (vselect_mask To.KRCWM:$mask,
10569                                 (bitconvert
10570                                  (From.VT (OpNode From.RC:$src1,
10571                                                   (From.LdFrag addr:$src2),
10572                                           timm:$src3))),
10573                                 To.ImmAllZerosV)),
10574            (!cast<Instruction>(OpcodeStr#"rmikz") To.KRCWM:$mask,
10575                                                   To.RC:$src1, addr:$src2,
10576                                                   (ImmXForm timm:$src3))>;
10577}
10578
10579multiclass avx512_vpalign_mask_lowering_mb<string OpcodeStr, SDNode OpNode,
10580                                           X86VectorVTInfo From,
10581                                           X86VectorVTInfo To,
10582                                           SDNodeXForm ImmXForm> :
10583      avx512_vpalign_mask_lowering<OpcodeStr, OpNode, From, To, ImmXForm> {
10584  def : Pat<(From.VT (OpNode From.RC:$src1,
10585                             (bitconvert (To.VT (To.BroadcastLdFrag addr:$src2))),
10586                             timm:$src3)),
10587            (!cast<Instruction>(OpcodeStr#"rmbi") To.RC:$src1, addr:$src2,
10588                                                  (ImmXForm timm:$src3))>;
10589
10590  def : Pat<(To.VT (vselect_mask To.KRCWM:$mask,
10591                                 (bitconvert
10592                                  (From.VT (OpNode From.RC:$src1,
10593                                           (bitconvert
10594                                            (To.VT (To.BroadcastLdFrag addr:$src2))),
10595                                           timm:$src3))),
10596                                 To.RC:$src0)),
10597            (!cast<Instruction>(OpcodeStr#"rmbik") To.RC:$src0, To.KRCWM:$mask,
10598                                                   To.RC:$src1, addr:$src2,
10599                                                   (ImmXForm timm:$src3))>;
10600
10601  def : Pat<(To.VT (vselect_mask To.KRCWM:$mask,
10602                                 (bitconvert
10603                                  (From.VT (OpNode From.RC:$src1,
10604                                           (bitconvert
10605                                            (To.VT (To.BroadcastLdFrag addr:$src2))),
10606                                           timm:$src3))),
10607                                 To.ImmAllZerosV)),
10608            (!cast<Instruction>(OpcodeStr#"rmbikz") To.KRCWM:$mask,
10609                                                    To.RC:$src1, addr:$src2,
10610                                                    (ImmXForm timm:$src3))>;
10611}
10612
10613let Predicates = [HasAVX512] in {
10614  // For 512-bit we lower to the widest element type we can. So we only need
10615  // to handle converting valignq to valignd.
10616  defm : avx512_vpalign_mask_lowering_mb<"VALIGNDZ", X86VAlign, v8i64_info,
10617                                         v16i32_info, ValignqImm32XForm>;
10618}
10619
10620let Predicates = [HasVLX] in {
10621  // For 128-bit we lower to the widest element type we can. So we only need
10622  // to handle converting valignq to valignd.
10623  defm : avx512_vpalign_mask_lowering_mb<"VALIGNDZ128", X86VAlign, v2i64x_info,
10624                                         v4i32x_info, ValignqImm32XForm>;
10625  // For 256-bit we lower to the widest element type we can. So we only need
10626  // to handle converting valignq to valignd.
10627  defm : avx512_vpalign_mask_lowering_mb<"VALIGNDZ256", X86VAlign, v4i64x_info,
10628                                         v8i32x_info, ValignqImm32XForm>;
10629}
10630
10631let Predicates = [HasVLX, HasBWI] in {
10632  // We can turn 128 and 256 bit VALIGND/VALIGNQ into VPALIGNR.
10633  defm : avx512_vpalign_mask_lowering<"VPALIGNRZ128", X86VAlign, v2i64x_info,
10634                                      v16i8x_info, ValignqImm8XForm>;
10635  defm : avx512_vpalign_mask_lowering<"VPALIGNRZ128", X86VAlign, v4i32x_info,
10636                                      v16i8x_info, ValigndImm8XForm>;
10637}
10638
10639defm VDBPSADBW: avx512_common_3Op_rm_imm8<0x42, X86dbpsadbw, "vdbpsadbw",
10640                SchedWritePSADBW, avx512vl_i16_info, avx512vl_i8_info>,
10641                EVEX_CD8<8, CD8VF>, NotEVEX2VEXConvertible;
10642
10643multiclass avx512_unary_rm<bits<8> opc, string OpcodeStr, SDNode OpNode,
10644                           X86FoldableSchedWrite sched, X86VectorVTInfo _> {
10645  let ExeDomain = _.ExeDomain in {
10646  defm rr : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
10647                    (ins _.RC:$src1), OpcodeStr,
10648                    "$src1", "$src1",
10649                    (_.VT (OpNode (_.VT _.RC:$src1)))>, EVEX, AVX5128IBase,
10650                    Sched<[sched]>;
10651
10652  defm rm : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
10653                  (ins _.MemOp:$src1), OpcodeStr,
10654                  "$src1", "$src1",
10655                  (_.VT (OpNode (_.VT (bitconvert (_.LdFrag addr:$src1)))))>,
10656            EVEX, AVX5128IBase, EVEX_CD8<_.EltSize, CD8VF>,
10657            Sched<[sched.Folded]>;
10658  }
10659}
10660
10661multiclass avx512_unary_rmb<bits<8> opc, string OpcodeStr, SDNode OpNode,
10662                            X86FoldableSchedWrite sched, X86VectorVTInfo _> :
10663           avx512_unary_rm<opc, OpcodeStr, OpNode, sched, _> {
10664  defm rmb : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
10665                  (ins _.ScalarMemOp:$src1), OpcodeStr,
10666                  "${src1}"#_.BroadcastStr,
10667                  "${src1}"#_.BroadcastStr,
10668                  (_.VT (OpNode (_.VT (_.BroadcastLdFrag addr:$src1))))>,
10669             EVEX, AVX5128IBase, EVEX_B, EVEX_CD8<_.EltSize, CD8VF>,
10670             Sched<[sched.Folded]>;
10671}
10672
10673multiclass avx512_unary_rm_vl<bits<8> opc, string OpcodeStr, SDNode OpNode,
10674                              X86SchedWriteWidths sched,
10675                              AVX512VLVectorVTInfo VTInfo, Predicate prd> {
10676  let Predicates = [prd] in
10677    defm Z : avx512_unary_rm<opc, OpcodeStr, OpNode, sched.ZMM, VTInfo.info512>,
10678                             EVEX_V512;
10679
10680  let Predicates = [prd, HasVLX] in {
10681    defm Z256 : avx512_unary_rm<opc, OpcodeStr, OpNode, sched.YMM, VTInfo.info256>,
10682                              EVEX_V256;
10683    defm Z128 : avx512_unary_rm<opc, OpcodeStr, OpNode, sched.XMM, VTInfo.info128>,
10684                              EVEX_V128;
10685  }
10686}
10687
10688multiclass avx512_unary_rmb_vl<bits<8> opc, string OpcodeStr, SDNode OpNode,
10689                               X86SchedWriteWidths sched, AVX512VLVectorVTInfo VTInfo,
10690                               Predicate prd> {
10691  let Predicates = [prd] in
10692    defm Z : avx512_unary_rmb<opc, OpcodeStr, OpNode, sched.ZMM, VTInfo.info512>,
10693                              EVEX_V512;
10694
10695  let Predicates = [prd, HasVLX] in {
10696    defm Z256 : avx512_unary_rmb<opc, OpcodeStr, OpNode, sched.YMM, VTInfo.info256>,
10697                                 EVEX_V256;
10698    defm Z128 : avx512_unary_rmb<opc, OpcodeStr, OpNode, sched.XMM, VTInfo.info128>,
10699                                 EVEX_V128;
10700  }
10701}
10702
10703multiclass avx512_unary_rm_vl_dq<bits<8> opc_d, bits<8> opc_q, string OpcodeStr,
10704                                 SDNode OpNode, X86SchedWriteWidths sched,
10705                                 Predicate prd> {
10706  defm Q : avx512_unary_rmb_vl<opc_q, OpcodeStr#"q", OpNode, sched,
10707                               avx512vl_i64_info, prd>, VEX_W;
10708  defm D : avx512_unary_rmb_vl<opc_d, OpcodeStr#"d", OpNode, sched,
10709                               avx512vl_i32_info, prd>;
10710}
10711
10712multiclass avx512_unary_rm_vl_bw<bits<8> opc_b, bits<8> opc_w, string OpcodeStr,
10713                                 SDNode OpNode, X86SchedWriteWidths sched,
10714                                 Predicate prd> {
10715  defm W : avx512_unary_rm_vl<opc_w, OpcodeStr#"w", OpNode, sched,
10716                              avx512vl_i16_info, prd>, VEX_WIG;
10717  defm B : avx512_unary_rm_vl<opc_b, OpcodeStr#"b", OpNode, sched,
10718                              avx512vl_i8_info, prd>, VEX_WIG;
10719}
10720
10721multiclass avx512_unary_rm_vl_all<bits<8> opc_b, bits<8> opc_w,
10722                                  bits<8> opc_d, bits<8> opc_q,
10723                                  string OpcodeStr, SDNode OpNode,
10724                                  X86SchedWriteWidths sched> {
10725  defm NAME : avx512_unary_rm_vl_dq<opc_d, opc_q, OpcodeStr, OpNode, sched,
10726                                    HasAVX512>,
10727              avx512_unary_rm_vl_bw<opc_b, opc_w, OpcodeStr, OpNode, sched,
10728                                    HasBWI>;
10729}
10730
10731defm VPABS : avx512_unary_rm_vl_all<0x1C, 0x1D, 0x1E, 0x1F, "vpabs", abs,
10732                                    SchedWriteVecALU>;
10733
10734// VPABS: Use 512bit version to implement 128/256 bit in case NoVLX.
10735let Predicates = [HasAVX512, NoVLX] in {
10736  def : Pat<(v4i64 (abs VR256X:$src)),
10737            (EXTRACT_SUBREG
10738                (VPABSQZrr
10739                    (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src, sub_ymm)),
10740             sub_ymm)>;
10741  def : Pat<(v2i64 (abs VR128X:$src)),
10742            (EXTRACT_SUBREG
10743                (VPABSQZrr
10744                    (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm)),
10745             sub_xmm)>;
10746}
10747
10748// Use 512bit version to implement 128/256 bit.
10749multiclass avx512_unary_lowering<string InstrStr, SDNode OpNode,
10750                                 AVX512VLVectorVTInfo _, Predicate prd> {
10751  let Predicates = [prd, NoVLX] in {
10752    def : Pat<(_.info256.VT (OpNode (_.info256.VT _.info256.RC:$src1))),
10753              (EXTRACT_SUBREG
10754                (!cast<Instruction>(InstrStr # "Zrr")
10755                  (INSERT_SUBREG(_.info512.VT(IMPLICIT_DEF)),
10756                                 _.info256.RC:$src1,
10757                                 _.info256.SubRegIdx)),
10758              _.info256.SubRegIdx)>;
10759
10760    def : Pat<(_.info128.VT (OpNode (_.info128.VT _.info128.RC:$src1))),
10761              (EXTRACT_SUBREG
10762                (!cast<Instruction>(InstrStr # "Zrr")
10763                  (INSERT_SUBREG(_.info512.VT(IMPLICIT_DEF)),
10764                                 _.info128.RC:$src1,
10765                                 _.info128.SubRegIdx)),
10766              _.info128.SubRegIdx)>;
10767  }
10768}
10769
10770defm VPLZCNT    : avx512_unary_rm_vl_dq<0x44, 0x44, "vplzcnt", ctlz,
10771                                        SchedWriteVecIMul, HasCDI>;
10772
10773// FIXME: Is there a better scheduler class for VPCONFLICT?
10774defm VPCONFLICT : avx512_unary_rm_vl_dq<0xC4, 0xC4, "vpconflict", X86Conflict,
10775                                        SchedWriteVecALU, HasCDI>;
10776
10777// VPLZCNT: Use 512bit version to implement 128/256 bit in case NoVLX.
10778defm : avx512_unary_lowering<"VPLZCNTQ", ctlz, avx512vl_i64_info, HasCDI>;
10779defm : avx512_unary_lowering<"VPLZCNTD", ctlz, avx512vl_i32_info, HasCDI>;
10780
10781//===---------------------------------------------------------------------===//
10782// Counts number of ones - VPOPCNTD and VPOPCNTQ
10783//===---------------------------------------------------------------------===//
10784
10785// FIXME: Is there a better scheduler class for VPOPCNTD/VPOPCNTQ?
10786defm VPOPCNT : avx512_unary_rm_vl_dq<0x55, 0x55, "vpopcnt", ctpop,
10787                                     SchedWriteVecALU, HasVPOPCNTDQ>;
10788
10789defm : avx512_unary_lowering<"VPOPCNTQ", ctpop, avx512vl_i64_info, HasVPOPCNTDQ>;
10790defm : avx512_unary_lowering<"VPOPCNTD", ctpop, avx512vl_i32_info, HasVPOPCNTDQ>;
10791
10792//===---------------------------------------------------------------------===//
10793// Replicate Single FP - MOVSHDUP and MOVSLDUP
10794//===---------------------------------------------------------------------===//
10795
10796multiclass avx512_replicate<bits<8> opc, string OpcodeStr, SDNode OpNode,
10797                            X86SchedWriteWidths sched> {
10798  defm NAME:       avx512_unary_rm_vl<opc, OpcodeStr, OpNode, sched,
10799                                      avx512vl_f32_info, HasAVX512>, XS;
10800}
10801
10802defm VMOVSHDUP : avx512_replicate<0x16, "vmovshdup", X86Movshdup,
10803                                  SchedWriteFShuffle>;
10804defm VMOVSLDUP : avx512_replicate<0x12, "vmovsldup", X86Movsldup,
10805                                  SchedWriteFShuffle>;
10806
10807//===----------------------------------------------------------------------===//
10808// AVX-512 - MOVDDUP
10809//===----------------------------------------------------------------------===//
10810
10811multiclass avx512_movddup_128<bits<8> opc, string OpcodeStr,
10812                              X86FoldableSchedWrite sched, X86VectorVTInfo _> {
10813  let ExeDomain = _.ExeDomain in {
10814  defm rr : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
10815                   (ins _.RC:$src), OpcodeStr, "$src", "$src",
10816                   (_.VT (X86VBroadcast (_.VT _.RC:$src)))>, EVEX,
10817                   Sched<[sched]>;
10818  defm rm : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
10819                 (ins _.ScalarMemOp:$src), OpcodeStr, "$src", "$src",
10820                 (_.VT (_.BroadcastLdFrag addr:$src))>,
10821                 EVEX, EVEX_CD8<_.EltSize, CD8VH>,
10822                 Sched<[sched.Folded]>;
10823  }
10824}
10825
10826multiclass avx512_movddup_common<bits<8> opc, string OpcodeStr, SDNode OpNode,
10827                                 X86SchedWriteWidths sched, AVX512VLVectorVTInfo VTInfo> {
10828  defm Z : avx512_unary_rm<opc, OpcodeStr, X86Movddup, sched.ZMM,
10829                           VTInfo.info512>, EVEX_V512;
10830
10831  let Predicates = [HasAVX512, HasVLX] in {
10832    defm Z256 : avx512_unary_rm<opc, OpcodeStr, X86Movddup, sched.YMM,
10833                                VTInfo.info256>, EVEX_V256;
10834    defm Z128 : avx512_movddup_128<opc, OpcodeStr, sched.XMM,
10835                                   VTInfo.info128>, EVEX_V128;
10836  }
10837}
10838
10839multiclass avx512_movddup<bits<8> opc, string OpcodeStr, SDNode OpNode,
10840                          X86SchedWriteWidths sched> {
10841  defm NAME:      avx512_movddup_common<opc, OpcodeStr, OpNode, sched,
10842                                        avx512vl_f64_info>, XD, VEX_W;
10843}
10844
10845defm VMOVDDUP : avx512_movddup<0x12, "vmovddup", X86Movddup, SchedWriteFShuffle>;
10846
10847let Predicates = [HasVLX] in {
10848def : Pat<(v2f64 (X86VBroadcast f64:$src)),
10849          (VMOVDDUPZ128rr (v2f64 (COPY_TO_REGCLASS FR64X:$src, VR128X)))>;
10850
10851def : Pat<(vselect_mask (v2i1 VK2WM:$mask), (v2f64 (X86VBroadcast f64:$src)),
10852                        (v2f64 VR128X:$src0)),
10853          (VMOVDDUPZ128rrk VR128X:$src0, VK2WM:$mask,
10854                           (v2f64 (COPY_TO_REGCLASS FR64X:$src, VR128X)))>;
10855def : Pat<(vselect_mask (v2i1 VK2WM:$mask), (v2f64 (X86VBroadcast f64:$src)),
10856                        immAllZerosV),
10857          (VMOVDDUPZ128rrkz VK2WM:$mask, (v2f64 (COPY_TO_REGCLASS FR64X:$src, VR128X)))>;
10858}
10859
10860//===----------------------------------------------------------------------===//
10861// AVX-512 - Unpack Instructions
10862//===----------------------------------------------------------------------===//
10863
10864let Uses = []<Register>, mayRaiseFPException = 0 in {
10865defm VUNPCKH : avx512_fp_binop_p<0x15, "vunpckh", X86Unpckh, X86Unpckh, HasAVX512,
10866                                 SchedWriteFShuffleSizes, 0, 1>;
10867defm VUNPCKL : avx512_fp_binop_p<0x14, "vunpckl", X86Unpckl, X86Unpckl, HasAVX512,
10868                                 SchedWriteFShuffleSizes>;
10869}
10870
10871defm VPUNPCKLBW : avx512_binop_rm_vl_b<0x60, "vpunpcklbw", X86Unpckl,
10872                                       SchedWriteShuffle, HasBWI>;
10873defm VPUNPCKHBW : avx512_binop_rm_vl_b<0x68, "vpunpckhbw", X86Unpckh,
10874                                       SchedWriteShuffle, HasBWI>;
10875defm VPUNPCKLWD : avx512_binop_rm_vl_w<0x61, "vpunpcklwd", X86Unpckl,
10876                                       SchedWriteShuffle, HasBWI>;
10877defm VPUNPCKHWD : avx512_binop_rm_vl_w<0x69, "vpunpckhwd", X86Unpckh,
10878                                       SchedWriteShuffle, HasBWI>;
10879
10880defm VPUNPCKLDQ : avx512_binop_rm_vl_d<0x62, "vpunpckldq", X86Unpckl,
10881                                       SchedWriteShuffle, HasAVX512>;
10882defm VPUNPCKHDQ : avx512_binop_rm_vl_d<0x6A, "vpunpckhdq", X86Unpckh,
10883                                       SchedWriteShuffle, HasAVX512>;
10884defm VPUNPCKLQDQ : avx512_binop_rm_vl_q<0x6C, "vpunpcklqdq", X86Unpckl,
10885                                        SchedWriteShuffle, HasAVX512>;
10886defm VPUNPCKHQDQ : avx512_binop_rm_vl_q<0x6D, "vpunpckhqdq", X86Unpckh,
10887                                        SchedWriteShuffle, HasAVX512>;
10888
10889//===----------------------------------------------------------------------===//
10890// AVX-512 - Extract & Insert Integer Instructions
10891//===----------------------------------------------------------------------===//
10892
10893multiclass avx512_extract_elt_bw_m<bits<8> opc, string OpcodeStr, SDNode OpNode,
10894                                                            X86VectorVTInfo _> {
10895  def mr : AVX512Ii8<opc, MRMDestMem, (outs),
10896              (ins _.ScalarMemOp:$dst, _.RC:$src1, u8imm:$src2),
10897              OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}",
10898              [(store (_.EltVT (trunc (OpNode (_.VT _.RC:$src1), imm:$src2))),
10899                       addr:$dst)]>,
10900              EVEX, EVEX_CD8<_.EltSize, CD8VT1>, Sched<[WriteVecExtractSt]>;
10901}
10902
10903multiclass avx512_extract_elt_b<string OpcodeStr, X86VectorVTInfo _> {
10904  let Predicates = [HasBWI] in {
10905    def rr : AVX512Ii8<0x14, MRMDestReg, (outs GR32orGR64:$dst),
10906                  (ins _.RC:$src1, u8imm:$src2),
10907                  OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}",
10908                  [(set GR32orGR64:$dst,
10909                        (X86pextrb (_.VT _.RC:$src1), imm:$src2))]>,
10910                  EVEX, TAPD, Sched<[WriteVecExtract]>;
10911
10912    defm NAME : avx512_extract_elt_bw_m<0x14, OpcodeStr, X86pextrb, _>, TAPD;
10913  }
10914}
10915
10916multiclass avx512_extract_elt_w<string OpcodeStr, X86VectorVTInfo _> {
10917  let Predicates = [HasBWI] in {
10918    def rr : AVX512Ii8<0xC5, MRMSrcReg, (outs GR32orGR64:$dst),
10919                  (ins _.RC:$src1, u8imm:$src2),
10920                  OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}",
10921                  [(set GR32orGR64:$dst,
10922                        (X86pextrw (_.VT _.RC:$src1), imm:$src2))]>,
10923                  EVEX, PD, Sched<[WriteVecExtract]>;
10924
10925    let hasSideEffects = 0, isCodeGenOnly = 1, ForceDisassemble = 1 in
10926    def rr_REV : AVX512Ii8<0x15, MRMDestReg, (outs GR32orGR64:$dst),
10927                   (ins _.RC:$src1, u8imm:$src2),
10928                   OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}", []>,
10929                   EVEX, TAPD, FoldGenData<NAME#rr>,
10930                   Sched<[WriteVecExtract]>;
10931
10932    defm NAME : avx512_extract_elt_bw_m<0x15, OpcodeStr, X86pextrw, _>, TAPD;
10933  }
10934}
10935
10936multiclass avx512_extract_elt_dq<string OpcodeStr, X86VectorVTInfo _,
10937                                                            RegisterClass GRC> {
10938  let Predicates = [HasDQI] in {
10939    def rr : AVX512Ii8<0x16, MRMDestReg, (outs GRC:$dst),
10940                  (ins _.RC:$src1, u8imm:$src2),
10941                  OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}",
10942                  [(set GRC:$dst,
10943                      (extractelt (_.VT _.RC:$src1), imm:$src2))]>,
10944                  EVEX, TAPD, Sched<[WriteVecExtract]>;
10945
10946    def mr : AVX512Ii8<0x16, MRMDestMem, (outs),
10947                (ins _.ScalarMemOp:$dst, _.RC:$src1, u8imm:$src2),
10948                OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}",
10949                [(store (extractelt (_.VT _.RC:$src1),
10950                                    imm:$src2),addr:$dst)]>,
10951                EVEX, EVEX_CD8<_.EltSize, CD8VT1>, TAPD,
10952                Sched<[WriteVecExtractSt]>;
10953  }
10954}
10955
10956defm VPEXTRBZ : avx512_extract_elt_b<"vpextrb", v16i8x_info>, VEX_WIG;
10957defm VPEXTRWZ : avx512_extract_elt_w<"vpextrw", v8i16x_info>, VEX_WIG;
10958defm VPEXTRDZ : avx512_extract_elt_dq<"vpextrd", v4i32x_info, GR32>;
10959defm VPEXTRQZ : avx512_extract_elt_dq<"vpextrq", v2i64x_info, GR64>, VEX_W;
10960
10961multiclass avx512_insert_elt_m<bits<8> opc, string OpcodeStr, SDNode OpNode,
10962                                            X86VectorVTInfo _, PatFrag LdFrag> {
10963  def rm : AVX512Ii8<opc, MRMSrcMem, (outs _.RC:$dst),
10964      (ins _.RC:$src1,  _.ScalarMemOp:$src2, u8imm:$src3),
10965      OpcodeStr#"\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}",
10966      [(set _.RC:$dst,
10967          (_.VT (OpNode _.RC:$src1, (LdFrag addr:$src2), imm:$src3)))]>,
10968      EVEX_4V, EVEX_CD8<_.EltSize, CD8VT1>, Sched<[WriteVecInsert.Folded, WriteVecInsert.ReadAfterFold]>;
10969}
10970
10971multiclass avx512_insert_elt_bw<bits<8> opc, string OpcodeStr, SDNode OpNode,
10972                                            X86VectorVTInfo _, PatFrag LdFrag> {
10973  let Predicates = [HasBWI] in {
10974    def rr : AVX512Ii8<opc, MRMSrcReg, (outs _.RC:$dst),
10975        (ins _.RC:$src1, GR32orGR64:$src2, u8imm:$src3),
10976        OpcodeStr#"\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}",
10977        [(set _.RC:$dst,
10978            (OpNode _.RC:$src1, GR32orGR64:$src2, imm:$src3))]>, EVEX_4V,
10979        Sched<[WriteVecInsert]>;
10980
10981    defm NAME : avx512_insert_elt_m<opc, OpcodeStr, OpNode, _, LdFrag>;
10982  }
10983}
10984
10985multiclass avx512_insert_elt_dq<bits<8> opc, string OpcodeStr,
10986                                         X86VectorVTInfo _, RegisterClass GRC> {
10987  let Predicates = [HasDQI] in {
10988    def rr : AVX512Ii8<opc, MRMSrcReg, (outs _.RC:$dst),
10989        (ins _.RC:$src1, GRC:$src2, u8imm:$src3),
10990        OpcodeStr#"\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}",
10991        [(set _.RC:$dst,
10992            (_.VT (insertelt _.RC:$src1, GRC:$src2, imm:$src3)))]>,
10993        EVEX_4V, TAPD, Sched<[WriteVecInsert]>;
10994
10995    defm NAME : avx512_insert_elt_m<opc, OpcodeStr, insertelt, _,
10996                                    _.ScalarLdFrag>, TAPD;
10997  }
10998}
10999
11000defm VPINSRBZ : avx512_insert_elt_bw<0x20, "vpinsrb", X86pinsrb, v16i8x_info,
11001                                     extloadi8>, TAPD, VEX_WIG;
11002defm VPINSRWZ : avx512_insert_elt_bw<0xC4, "vpinsrw", X86pinsrw, v8i16x_info,
11003                                     extloadi16>, PD, VEX_WIG;
11004defm VPINSRDZ : avx512_insert_elt_dq<0x22, "vpinsrd", v4i32x_info, GR32>;
11005defm VPINSRQZ : avx512_insert_elt_dq<0x22, "vpinsrq", v2i64x_info, GR64>, VEX_W;
11006
11007//===----------------------------------------------------------------------===//
11008// VSHUFPS - VSHUFPD Operations
11009//===----------------------------------------------------------------------===//
11010
11011multiclass avx512_shufp<string OpcodeStr, AVX512VLVectorVTInfo VTInfo_I,
11012                        AVX512VLVectorVTInfo VTInfo_FP>{
11013  defm NAME: avx512_common_3Op_imm8<OpcodeStr, VTInfo_FP, 0xC6, X86Shufp,
11014                                    SchedWriteFShuffle>,
11015                                    EVEX_CD8<VTInfo_FP.info512.EltSize, CD8VF>,
11016                                    AVX512AIi8Base, EVEX_4V;
11017}
11018
11019defm VSHUFPS: avx512_shufp<"vshufps", avx512vl_i32_info, avx512vl_f32_info>, PS;
11020defm VSHUFPD: avx512_shufp<"vshufpd", avx512vl_i64_info, avx512vl_f64_info>, PD, VEX_W;
11021
11022//===----------------------------------------------------------------------===//
11023// AVX-512 - Byte shift Left/Right
11024//===----------------------------------------------------------------------===//
11025
11026multiclass avx512_shift_packed<bits<8> opc, SDNode OpNode, Format MRMr,
11027                               Format MRMm, string OpcodeStr,
11028                               X86FoldableSchedWrite sched, X86VectorVTInfo _>{
11029  def ri : AVX512<opc, MRMr,
11030             (outs _.RC:$dst), (ins _.RC:$src1, u8imm:$src2),
11031             !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
11032             [(set _.RC:$dst,(_.VT (OpNode _.RC:$src1, (i8 timm:$src2))))]>,
11033             Sched<[sched]>;
11034  def mi : AVX512<opc, MRMm,
11035           (outs _.RC:$dst), (ins _.MemOp:$src1, u8imm:$src2),
11036           !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
11037           [(set _.RC:$dst,(_.VT (OpNode
11038                                 (_.VT (bitconvert (_.LdFrag addr:$src1))),
11039                                 (i8 timm:$src2))))]>,
11040           Sched<[sched.Folded, sched.ReadAfterFold]>;
11041}
11042
11043multiclass avx512_shift_packed_all<bits<8> opc, SDNode OpNode, Format MRMr,
11044                                   Format MRMm, string OpcodeStr,
11045                                   X86SchedWriteWidths sched, Predicate prd>{
11046  let Predicates = [prd] in
11047    defm Z : avx512_shift_packed<opc, OpNode, MRMr, MRMm, OpcodeStr,
11048                                 sched.ZMM, v64i8_info>, EVEX_V512;
11049  let Predicates = [prd, HasVLX] in {
11050    defm Z256 : avx512_shift_packed<opc, OpNode, MRMr, MRMm, OpcodeStr,
11051                                    sched.YMM, v32i8x_info>, EVEX_V256;
11052    defm Z128 : avx512_shift_packed<opc, OpNode, MRMr, MRMm, OpcodeStr,
11053                                    sched.XMM, v16i8x_info>, EVEX_V128;
11054  }
11055}
11056defm VPSLLDQ : avx512_shift_packed_all<0x73, X86vshldq, MRM7r, MRM7m, "vpslldq",
11057                                       SchedWriteShuffle, HasBWI>,
11058                                       AVX512PDIi8Base, EVEX_4V, VEX_WIG;
11059defm VPSRLDQ : avx512_shift_packed_all<0x73, X86vshrdq, MRM3r, MRM3m, "vpsrldq",
11060                                       SchedWriteShuffle, HasBWI>,
11061                                       AVX512PDIi8Base, EVEX_4V, VEX_WIG;
11062
11063multiclass avx512_psadbw_packed<bits<8> opc, SDNode OpNode,
11064                                string OpcodeStr, X86FoldableSchedWrite sched,
11065                                X86VectorVTInfo _dst, X86VectorVTInfo _src> {
11066  let isCommutable = 1 in
11067  def rr : AVX512BI<opc, MRMSrcReg,
11068             (outs _dst.RC:$dst), (ins _src.RC:$src1, _src.RC:$src2),
11069             !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
11070             [(set _dst.RC:$dst,(_dst.VT
11071                                (OpNode (_src.VT _src.RC:$src1),
11072                                        (_src.VT _src.RC:$src2))))]>,
11073             Sched<[sched]>;
11074  def rm : AVX512BI<opc, MRMSrcMem,
11075           (outs _dst.RC:$dst), (ins _src.RC:$src1, _src.MemOp:$src2),
11076           !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
11077           [(set _dst.RC:$dst,(_dst.VT
11078                              (OpNode (_src.VT _src.RC:$src1),
11079                              (_src.VT (bitconvert
11080                                        (_src.LdFrag addr:$src2))))))]>,
11081           Sched<[sched.Folded, sched.ReadAfterFold]>;
11082}
11083
11084multiclass avx512_psadbw_packed_all<bits<8> opc, SDNode OpNode,
11085                                    string OpcodeStr, X86SchedWriteWidths sched,
11086                                    Predicate prd> {
11087  let Predicates = [prd] in
11088    defm Z : avx512_psadbw_packed<opc, OpNode, OpcodeStr, sched.ZMM,
11089                                  v8i64_info, v64i8_info>, EVEX_V512;
11090  let Predicates = [prd, HasVLX] in {
11091    defm Z256 : avx512_psadbw_packed<opc, OpNode, OpcodeStr, sched.YMM,
11092                                     v4i64x_info, v32i8x_info>, EVEX_V256;
11093    defm Z128 : avx512_psadbw_packed<opc, OpNode, OpcodeStr, sched.XMM,
11094                                     v2i64x_info, v16i8x_info>, EVEX_V128;
11095  }
11096}
11097
11098defm VPSADBW : avx512_psadbw_packed_all<0xf6, X86psadbw, "vpsadbw",
11099                                        SchedWritePSADBW, HasBWI>, EVEX_4V, VEX_WIG;
11100
11101// Transforms to swizzle an immediate to enable better matching when
11102// memory operand isn't in the right place.
11103def VPTERNLOG321_imm8 : SDNodeXForm<timm, [{
11104  // Convert a VPTERNLOG immediate by swapping operand 0 and operand 2.
11105  uint8_t Imm = N->getZExtValue();
11106  // Swap bits 1/4 and 3/6.
11107  uint8_t NewImm = Imm & 0xa5;
11108  if (Imm & 0x02) NewImm |= 0x10;
11109  if (Imm & 0x10) NewImm |= 0x02;
11110  if (Imm & 0x08) NewImm |= 0x40;
11111  if (Imm & 0x40) NewImm |= 0x08;
11112  return getI8Imm(NewImm, SDLoc(N));
11113}]>;
11114def VPTERNLOG213_imm8 : SDNodeXForm<timm, [{
11115  // Convert a VPTERNLOG immediate by swapping operand 1 and operand 2.
11116  uint8_t Imm = N->getZExtValue();
11117  // Swap bits 2/4 and 3/5.
11118  uint8_t NewImm = Imm & 0xc3;
11119  if (Imm & 0x04) NewImm |= 0x10;
11120  if (Imm & 0x10) NewImm |= 0x04;
11121  if (Imm & 0x08) NewImm |= 0x20;
11122  if (Imm & 0x20) NewImm |= 0x08;
11123  return getI8Imm(NewImm, SDLoc(N));
11124}]>;
11125def VPTERNLOG132_imm8 : SDNodeXForm<timm, [{
11126  // Convert a VPTERNLOG immediate by swapping operand 1 and operand 2.
11127  uint8_t Imm = N->getZExtValue();
11128  // Swap bits 1/2 and 5/6.
11129  uint8_t NewImm = Imm & 0x99;
11130  if (Imm & 0x02) NewImm |= 0x04;
11131  if (Imm & 0x04) NewImm |= 0x02;
11132  if (Imm & 0x20) NewImm |= 0x40;
11133  if (Imm & 0x40) NewImm |= 0x20;
11134  return getI8Imm(NewImm, SDLoc(N));
11135}]>;
11136def VPTERNLOG231_imm8 : SDNodeXForm<timm, [{
11137  // Convert a VPTERNLOG immediate by moving operand 1 to the end.
11138  uint8_t Imm = N->getZExtValue();
11139  // Move bits 1->2, 2->4, 3->6, 4->1, 5->3, 6->5
11140  uint8_t NewImm = Imm & 0x81;
11141  if (Imm & 0x02) NewImm |= 0x04;
11142  if (Imm & 0x04) NewImm |= 0x10;
11143  if (Imm & 0x08) NewImm |= 0x40;
11144  if (Imm & 0x10) NewImm |= 0x02;
11145  if (Imm & 0x20) NewImm |= 0x08;
11146  if (Imm & 0x40) NewImm |= 0x20;
11147  return getI8Imm(NewImm, SDLoc(N));
11148}]>;
11149def VPTERNLOG312_imm8 : SDNodeXForm<timm, [{
11150  // Convert a VPTERNLOG immediate by moving operand 2 to the beginning.
11151  uint8_t Imm = N->getZExtValue();
11152  // Move bits 1->4, 2->1, 3->5, 4->2, 5->6, 6->3
11153  uint8_t NewImm = Imm & 0x81;
11154  if (Imm & 0x02) NewImm |= 0x10;
11155  if (Imm & 0x04) NewImm |= 0x02;
11156  if (Imm & 0x08) NewImm |= 0x20;
11157  if (Imm & 0x10) NewImm |= 0x04;
11158  if (Imm & 0x20) NewImm |= 0x40;
11159  if (Imm & 0x40) NewImm |= 0x08;
11160  return getI8Imm(NewImm, SDLoc(N));
11161}]>;
11162
11163multiclass avx512_ternlog<bits<8> opc, string OpcodeStr, SDNode OpNode,
11164                          X86FoldableSchedWrite sched, X86VectorVTInfo _,
11165                          string Name>{
11166  let Constraints = "$src1 = $dst", ExeDomain = _.ExeDomain in {
11167  defm rri : AVX512_maskable_3src<opc, MRMSrcReg, _, (outs _.RC:$dst),
11168                      (ins _.RC:$src2, _.RC:$src3, u8imm:$src4),
11169                      OpcodeStr, "$src4, $src3, $src2", "$src2, $src3, $src4",
11170                      (OpNode (_.VT _.RC:$src1),
11171                              (_.VT _.RC:$src2),
11172                              (_.VT _.RC:$src3),
11173                              (i8 timm:$src4)), 1, 1>,
11174                      AVX512AIi8Base, EVEX_4V, Sched<[sched]>;
11175  defm rmi : AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst),
11176                    (ins _.RC:$src2, _.MemOp:$src3, u8imm:$src4),
11177                    OpcodeStr, "$src4, $src3, $src2", "$src2, $src3, $src4",
11178                    (OpNode (_.VT _.RC:$src1),
11179                            (_.VT _.RC:$src2),
11180                            (_.VT (bitconvert (_.LdFrag addr:$src3))),
11181                            (i8 timm:$src4)), 1, 0>,
11182                    AVX512AIi8Base, EVEX_4V, EVEX_CD8<_.EltSize, CD8VF>,
11183                    Sched<[sched.Folded, sched.ReadAfterFold]>;
11184  defm rmbi : AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst),
11185                    (ins _.RC:$src2, _.ScalarMemOp:$src3, u8imm:$src4),
11186                    OpcodeStr, "$src4, ${src3}"#_.BroadcastStr#", $src2",
11187                    "$src2, ${src3}"#_.BroadcastStr#", $src4",
11188                    (OpNode (_.VT _.RC:$src1),
11189                            (_.VT _.RC:$src2),
11190                            (_.VT (_.BroadcastLdFrag addr:$src3)),
11191                            (i8 timm:$src4)), 1, 0>, EVEX_B,
11192                    AVX512AIi8Base, EVEX_4V, EVEX_CD8<_.EltSize, CD8VF>,
11193                    Sched<[sched.Folded, sched.ReadAfterFold]>;
11194  }// Constraints = "$src1 = $dst"
11195
11196  // Additional patterns for matching passthru operand in other positions.
11197  def : Pat<(_.VT (vselect_mask _.KRCWM:$mask,
11198                   (OpNode _.RC:$src3, _.RC:$src2, _.RC:$src1, (i8 timm:$src4)),
11199                   _.RC:$src1)),
11200            (!cast<Instruction>(Name#_.ZSuffix#rrik) _.RC:$src1, _.KRCWM:$mask,
11201             _.RC:$src2, _.RC:$src3, (VPTERNLOG321_imm8 timm:$src4))>;
11202  def : Pat<(_.VT (vselect_mask _.KRCWM:$mask,
11203                   (OpNode _.RC:$src2, _.RC:$src1, _.RC:$src3, (i8 timm:$src4)),
11204                   _.RC:$src1)),
11205            (!cast<Instruction>(Name#_.ZSuffix#rrik) _.RC:$src1, _.KRCWM:$mask,
11206             _.RC:$src2, _.RC:$src3, (VPTERNLOG213_imm8 timm:$src4))>;
11207
11208  // Additional patterns for matching loads in other positions.
11209  def : Pat<(_.VT (OpNode (bitconvert (_.LdFrag addr:$src3)),
11210                          _.RC:$src2, _.RC:$src1, (i8 timm:$src4))),
11211            (!cast<Instruction>(Name#_.ZSuffix#rmi) _.RC:$src1, _.RC:$src2,
11212                                   addr:$src3, (VPTERNLOG321_imm8 timm:$src4))>;
11213  def : Pat<(_.VT (OpNode _.RC:$src1,
11214                          (bitconvert (_.LdFrag addr:$src3)),
11215                          _.RC:$src2, (i8 timm:$src4))),
11216            (!cast<Instruction>(Name#_.ZSuffix#rmi) _.RC:$src1, _.RC:$src2,
11217                                   addr:$src3, (VPTERNLOG132_imm8 timm:$src4))>;
11218
11219  // Additional patterns for matching zero masking with loads in other
11220  // positions.
11221  def : Pat<(_.VT (vselect_mask _.KRCWM:$mask,
11222                   (OpNode (bitconvert (_.LdFrag addr:$src3)),
11223                    _.RC:$src2, _.RC:$src1, (i8 timm:$src4)),
11224                   _.ImmAllZerosV)),
11225            (!cast<Instruction>(Name#_.ZSuffix#rmikz) _.RC:$src1, _.KRCWM:$mask,
11226             _.RC:$src2, addr:$src3, (VPTERNLOG321_imm8 timm:$src4))>;
11227  def : Pat<(_.VT (vselect_mask _.KRCWM:$mask,
11228                   (OpNode _.RC:$src1, (bitconvert (_.LdFrag addr:$src3)),
11229                    _.RC:$src2, (i8 timm:$src4)),
11230                   _.ImmAllZerosV)),
11231            (!cast<Instruction>(Name#_.ZSuffix#rmikz) _.RC:$src1, _.KRCWM:$mask,
11232             _.RC:$src2, addr:$src3, (VPTERNLOG132_imm8 timm:$src4))>;
11233
11234  // Additional patterns for matching masked loads with different
11235  // operand orders.
11236  def : Pat<(_.VT (vselect_mask _.KRCWM:$mask,
11237                   (OpNode _.RC:$src1, (bitconvert (_.LdFrag addr:$src3)),
11238                    _.RC:$src2, (i8 timm:$src4)),
11239                   _.RC:$src1)),
11240            (!cast<Instruction>(Name#_.ZSuffix#rmik) _.RC:$src1, _.KRCWM:$mask,
11241             _.RC:$src2, addr:$src3, (VPTERNLOG132_imm8 timm:$src4))>;
11242  def : Pat<(_.VT (vselect_mask _.KRCWM:$mask,
11243                   (OpNode (bitconvert (_.LdFrag addr:$src3)),
11244                    _.RC:$src2, _.RC:$src1, (i8 timm:$src4)),
11245                   _.RC:$src1)),
11246            (!cast<Instruction>(Name#_.ZSuffix#rmik) _.RC:$src1, _.KRCWM:$mask,
11247             _.RC:$src2, addr:$src3, (VPTERNLOG321_imm8 timm:$src4))>;
11248  def : Pat<(_.VT (vselect_mask _.KRCWM:$mask,
11249                   (OpNode _.RC:$src2, _.RC:$src1,
11250                    (bitconvert (_.LdFrag addr:$src3)), (i8 timm:$src4)),
11251                   _.RC:$src1)),
11252            (!cast<Instruction>(Name#_.ZSuffix#rmik) _.RC:$src1, _.KRCWM:$mask,
11253             _.RC:$src2, addr:$src3, (VPTERNLOG213_imm8 timm:$src4))>;
11254  def : Pat<(_.VT (vselect_mask _.KRCWM:$mask,
11255                   (OpNode _.RC:$src2, (bitconvert (_.LdFrag addr:$src3)),
11256                    _.RC:$src1, (i8 timm:$src4)),
11257                   _.RC:$src1)),
11258            (!cast<Instruction>(Name#_.ZSuffix#rmik) _.RC:$src1, _.KRCWM:$mask,
11259             _.RC:$src2, addr:$src3, (VPTERNLOG231_imm8 timm:$src4))>;
11260  def : Pat<(_.VT (vselect_mask _.KRCWM:$mask,
11261                   (OpNode (bitconvert (_.LdFrag addr:$src3)),
11262                    _.RC:$src1, _.RC:$src2, (i8 timm:$src4)),
11263                   _.RC:$src1)),
11264            (!cast<Instruction>(Name#_.ZSuffix#rmik) _.RC:$src1, _.KRCWM:$mask,
11265             _.RC:$src2, addr:$src3, (VPTERNLOG312_imm8 timm:$src4))>;
11266
11267  // Additional patterns for matching broadcasts in other positions.
11268  def : Pat<(_.VT (OpNode (_.BroadcastLdFrag addr:$src3),
11269                          _.RC:$src2, _.RC:$src1, (i8 timm:$src4))),
11270            (!cast<Instruction>(Name#_.ZSuffix#rmbi) _.RC:$src1, _.RC:$src2,
11271                                   addr:$src3, (VPTERNLOG321_imm8 timm:$src4))>;
11272  def : Pat<(_.VT (OpNode _.RC:$src1,
11273                          (_.BroadcastLdFrag addr:$src3),
11274                          _.RC:$src2, (i8 timm:$src4))),
11275            (!cast<Instruction>(Name#_.ZSuffix#rmbi) _.RC:$src1, _.RC:$src2,
11276                                   addr:$src3, (VPTERNLOG132_imm8 timm:$src4))>;
11277
11278  // Additional patterns for matching zero masking with broadcasts in other
11279  // positions.
11280  def : Pat<(_.VT (vselect_mask _.KRCWM:$mask,
11281                   (OpNode (_.BroadcastLdFrag addr:$src3),
11282                    _.RC:$src2, _.RC:$src1, (i8 timm:$src4)),
11283                   _.ImmAllZerosV)),
11284            (!cast<Instruction>(Name#_.ZSuffix#rmbikz) _.RC:$src1,
11285             _.KRCWM:$mask, _.RC:$src2, addr:$src3,
11286             (VPTERNLOG321_imm8 timm:$src4))>;
11287  def : Pat<(_.VT (vselect_mask _.KRCWM:$mask,
11288                   (OpNode _.RC:$src1,
11289                    (_.BroadcastLdFrag addr:$src3),
11290                    _.RC:$src2, (i8 timm:$src4)),
11291                   _.ImmAllZerosV)),
11292            (!cast<Instruction>(Name#_.ZSuffix#rmbikz) _.RC:$src1,
11293             _.KRCWM:$mask, _.RC:$src2, addr:$src3,
11294             (VPTERNLOG132_imm8 timm:$src4))>;
11295
11296  // Additional patterns for matching masked broadcasts with different
11297  // operand orders.
11298  def : Pat<(_.VT (vselect_mask _.KRCWM:$mask,
11299                   (OpNode _.RC:$src1, (_.BroadcastLdFrag addr:$src3),
11300                    _.RC:$src2, (i8 timm:$src4)),
11301                   _.RC:$src1)),
11302            (!cast<Instruction>(Name#_.ZSuffix#rmbik) _.RC:$src1, _.KRCWM:$mask,
11303             _.RC:$src2, addr:$src3, (VPTERNLOG132_imm8 timm:$src4))>;
11304  def : Pat<(_.VT (vselect_mask _.KRCWM:$mask,
11305                   (OpNode (_.BroadcastLdFrag addr:$src3),
11306                    _.RC:$src2, _.RC:$src1, (i8 timm:$src4)),
11307                   _.RC:$src1)),
11308            (!cast<Instruction>(Name#_.ZSuffix#rmbik) _.RC:$src1, _.KRCWM:$mask,
11309             _.RC:$src2, addr:$src3, (VPTERNLOG321_imm8 timm:$src4))>;
11310  def : Pat<(_.VT (vselect_mask _.KRCWM:$mask,
11311                   (OpNode _.RC:$src2, _.RC:$src1,
11312                    (_.BroadcastLdFrag addr:$src3),
11313                    (i8 timm:$src4)), _.RC:$src1)),
11314            (!cast<Instruction>(Name#_.ZSuffix#rmbik) _.RC:$src1, _.KRCWM:$mask,
11315             _.RC:$src2, addr:$src3, (VPTERNLOG213_imm8 timm:$src4))>;
11316  def : Pat<(_.VT (vselect_mask _.KRCWM:$mask,
11317                   (OpNode _.RC:$src2,
11318                    (_.BroadcastLdFrag addr:$src3),
11319                    _.RC:$src1, (i8 timm:$src4)),
11320                   _.RC:$src1)),
11321            (!cast<Instruction>(Name#_.ZSuffix#rmbik) _.RC:$src1, _.KRCWM:$mask,
11322             _.RC:$src2, addr:$src3, (VPTERNLOG231_imm8 timm:$src4))>;
11323  def : Pat<(_.VT (vselect_mask _.KRCWM:$mask,
11324                   (OpNode (_.BroadcastLdFrag addr:$src3),
11325                    _.RC:$src1, _.RC:$src2, (i8 timm:$src4)),
11326                   _.RC:$src1)),
11327            (!cast<Instruction>(Name#_.ZSuffix#rmbik) _.RC:$src1, _.KRCWM:$mask,
11328             _.RC:$src2, addr:$src3, (VPTERNLOG312_imm8 timm:$src4))>;
11329}
11330
11331multiclass avx512_common_ternlog<string OpcodeStr, X86SchedWriteWidths sched,
11332                                 AVX512VLVectorVTInfo _> {
11333  let Predicates = [HasAVX512] in
11334    defm Z    : avx512_ternlog<0x25, OpcodeStr, X86vpternlog, sched.ZMM,
11335                               _.info512, NAME>, EVEX_V512;
11336  let Predicates = [HasAVX512, HasVLX] in {
11337    defm Z128 : avx512_ternlog<0x25, OpcodeStr, X86vpternlog, sched.XMM,
11338                               _.info128, NAME>, EVEX_V128;
11339    defm Z256 : avx512_ternlog<0x25, OpcodeStr, X86vpternlog, sched.YMM,
11340                               _.info256, NAME>, EVEX_V256;
11341  }
11342}
11343
11344defm VPTERNLOGD : avx512_common_ternlog<"vpternlogd", SchedWriteVecALU,
11345                                        avx512vl_i32_info>;
11346defm VPTERNLOGQ : avx512_common_ternlog<"vpternlogq", SchedWriteVecALU,
11347                                        avx512vl_i64_info>, VEX_W;
11348
11349// Patterns to use VPTERNLOG for vXi16/vXi8 vectors.
11350let Predicates = [HasVLX] in {
11351  def : Pat<(v16i8 (X86vpternlog VR128X:$src1, VR128X:$src2, VR128X:$src3,
11352                                 (i8 timm:$src4))),
11353            (VPTERNLOGQZ128rri VR128X:$src1, VR128X:$src2, VR128X:$src3,
11354                               timm:$src4)>;
11355  def : Pat<(v16i8 (X86vpternlog VR128X:$src1, VR128X:$src2,
11356                                 (loadv16i8 addr:$src3), (i8 timm:$src4))),
11357            (VPTERNLOGQZ128rmi VR128X:$src1, VR128X:$src2, addr:$src3,
11358                               timm:$src4)>;
11359  def : Pat<(v16i8 (X86vpternlog (loadv16i8 addr:$src3), VR128X:$src2,
11360                                 VR128X:$src1, (i8 timm:$src4))),
11361            (VPTERNLOGQZ128rmi VR128X:$src1, VR128X:$src2, addr:$src3,
11362                               (VPTERNLOG321_imm8 timm:$src4))>;
11363  def : Pat<(v16i8 (X86vpternlog VR128X:$src1, (loadv16i8 addr:$src3),
11364                                 VR128X:$src2, (i8 timm:$src4))),
11365            (VPTERNLOGQZ128rmi VR128X:$src1, VR128X:$src2, addr:$src3,
11366                               (VPTERNLOG132_imm8 timm:$src4))>;
11367
11368  def : Pat<(v16i8 (X86vpternlog VR128X:$src1, VR128X:$src2,
11369                                 (bitconvert (v4i32 (X86VBroadcastld32 addr:$src3))),
11370                                 (i8 timm:$src4))),
11371            (VPTERNLOGDZ128rmbi VR128X:$src1, VR128X:$src2, addr:$src3,
11372                                timm:$src4)>;
11373  def : Pat<(v16i8 (X86vpternlog (bitconvert (v4i32 (X86VBroadcastld32 addr:$src3))),
11374                                 VR128X:$src2, VR128X:$src1, (i8 timm:$src4))),
11375            (VPTERNLOGDZ128rmbi VR128X:$src1, VR128X:$src2, addr:$src3,
11376                                (VPTERNLOG321_imm8 timm:$src4))>;
11377  def : Pat<(v16i8 (X86vpternlog VR128X:$src1,
11378                                 (bitconvert (v4i32 (X86VBroadcastld32 addr:$src3))),
11379                                 VR128X:$src2, (i8 timm:$src4))),
11380            (VPTERNLOGDZ128rmbi VR128X:$src1, VR128X:$src2, addr:$src3,
11381                               (VPTERNLOG132_imm8 timm:$src4))>;
11382
11383  def : Pat<(v16i8 (X86vpternlog VR128X:$src1, VR128X:$src2,
11384                                 (bitconvert (v2i64 (X86VBroadcastld64 addr:$src3))),
11385                                 (i8 timm:$src4))),
11386            (VPTERNLOGQZ128rmbi VR128X:$src1, VR128X:$src2, addr:$src3,
11387                                timm:$src4)>;
11388  def : Pat<(v16i8 (X86vpternlog (bitconvert (v2i64 (X86VBroadcastld64 addr:$src3))),
11389                                 VR128X:$src2, VR128X:$src1, (i8 timm:$src4))),
11390            (VPTERNLOGQZ128rmbi VR128X:$src1, VR128X:$src2, addr:$src3,
11391                                (VPTERNLOG321_imm8 timm:$src4))>;
11392  def : Pat<(v16i8 (X86vpternlog VR128X:$src1,
11393                                 (bitconvert (v2i64 (X86VBroadcastld64 addr:$src3))),
11394                                 VR128X:$src2, (i8 timm:$src4))),
11395            (VPTERNLOGQZ128rmbi VR128X:$src1, VR128X:$src2, addr:$src3,
11396                                (VPTERNLOG132_imm8 timm:$src4))>;
11397
11398  def : Pat<(v8i16 (X86vpternlog VR128X:$src1, VR128X:$src2, VR128X:$src3,
11399                                 (i8 timm:$src4))),
11400            (VPTERNLOGQZ128rri VR128X:$src1, VR128X:$src2, VR128X:$src3,
11401                               timm:$src4)>;
11402  def : Pat<(v8i16 (X86vpternlog VR128X:$src1, VR128X:$src2,
11403                                 (loadv8i16 addr:$src3), (i8 timm:$src4))),
11404            (VPTERNLOGQZ128rmi VR128X:$src1, VR128X:$src2, addr:$src3,
11405                               timm:$src4)>;
11406  def : Pat<(v8i16 (X86vpternlog (loadv8i16 addr:$src3), VR128X:$src2,
11407                                 VR128X:$src1, (i8 timm:$src4))),
11408            (VPTERNLOGQZ128rmi VR128X:$src1, VR128X:$src2, addr:$src3,
11409                               (VPTERNLOG321_imm8 timm:$src4))>;
11410  def : Pat<(v8i16 (X86vpternlog VR128X:$src1, (loadv8i16 addr:$src3),
11411                                 VR128X:$src2, (i8 timm:$src4))),
11412            (VPTERNLOGQZ128rmi VR128X:$src1, VR128X:$src2, addr:$src3,
11413                               (VPTERNLOG132_imm8 timm:$src4))>;
11414
11415  def : Pat<(v8i16 (X86vpternlog VR128X:$src1, VR128X:$src2,
11416                                 (bitconvert (v4i32 (X86VBroadcastld32 addr:$src3))),
11417                                 (i8 timm:$src4))),
11418            (VPTERNLOGDZ128rmbi VR128X:$src1, VR128X:$src2, addr:$src3,
11419                                timm:$src4)>;
11420  def : Pat<(v8i16 (X86vpternlog (bitconvert (v4i32 (X86VBroadcastld32 addr:$src3))),
11421                                 VR128X:$src2, VR128X:$src1, (i8 timm:$src4))),
11422            (VPTERNLOGDZ128rmbi VR128X:$src1, VR128X:$src2, addr:$src3,
11423                                (VPTERNLOG321_imm8 timm:$src4))>;
11424  def : Pat<(v8i16 (X86vpternlog VR128X:$src1,
11425                                 (bitconvert (v4i32 (X86VBroadcastld32 addr:$src3))),
11426                                 VR128X:$src2, (i8 timm:$src4))),
11427            (VPTERNLOGDZ128rmbi VR128X:$src1, VR128X:$src2, addr:$src3,
11428                               (VPTERNLOG132_imm8 timm:$src4))>;
11429
11430  def : Pat<(v8i16 (X86vpternlog VR128X:$src1, VR128X:$src2,
11431                                 (bitconvert (v2i64 (X86VBroadcastld64 addr:$src3))),
11432                                 (i8 timm:$src4))),
11433            (VPTERNLOGQZ128rmbi VR128X:$src1, VR128X:$src2, addr:$src3,
11434                                timm:$src4)>;
11435  def : Pat<(v8i16 (X86vpternlog (bitconvert (v2i64 (X86VBroadcastld64 addr:$src3))),
11436                                 VR128X:$src2, VR128X:$src1, (i8 timm:$src4))),
11437            (VPTERNLOGQZ128rmbi VR128X:$src1, VR128X:$src2, addr:$src3,
11438                                (VPTERNLOG321_imm8 timm:$src4))>;
11439  def : Pat<(v8i16 (X86vpternlog VR128X:$src1,
11440                                 (bitconvert (v2i64 (X86VBroadcastld64 addr:$src3))),
11441                                 VR128X:$src2, (i8 timm:$src4))),
11442            (VPTERNLOGQZ128rmbi VR128X:$src1, VR128X:$src2, addr:$src3,
11443                                (VPTERNLOG132_imm8 timm:$src4))>;
11444
11445  def : Pat<(v4i32 (X86vpternlog VR128X:$src1, VR128X:$src2,
11446                                 (bitconvert (v2i64 (X86VBroadcastld64 addr:$src3))),
11447                                 (i8 timm:$src4))),
11448            (VPTERNLOGQZ128rmbi VR128X:$src1, VR128X:$src2, addr:$src3,
11449                                timm:$src4)>;
11450  def : Pat<(v4i32 (X86vpternlog (bitconvert (v2i64 (X86VBroadcastld64 addr:$src3))),
11451                                 VR128X:$src2, VR128X:$src1, (i8 timm:$src4))),
11452            (VPTERNLOGQZ128rmbi VR128X:$src1, VR128X:$src2, addr:$src3,
11453                                (VPTERNLOG321_imm8 timm:$src4))>;
11454  def : Pat<(v4i32 (X86vpternlog VR128X:$src1,
11455                                 (bitconvert (v2i64 (X86VBroadcastld64 addr:$src3))),
11456                                 VR128X:$src2, (i8 timm:$src4))),
11457            (VPTERNLOGQZ128rmbi VR128X:$src1, VR128X:$src2, addr:$src3,
11458                                (VPTERNLOG132_imm8 timm:$src4))>;
11459
11460  def : Pat<(v2i64 (X86vpternlog VR128X:$src1, VR128X:$src2,
11461                                 (bitconvert (v4i32 (X86VBroadcastld32 addr:$src3))),
11462                                 (i8 timm:$src4))),
11463            (VPTERNLOGDZ128rmbi VR128X:$src1, VR128X:$src2, addr:$src3,
11464                                timm:$src4)>;
11465  def : Pat<(v2i64 (X86vpternlog (bitconvert (v4i32 (X86VBroadcastld32 addr:$src3))),
11466                                 VR128X:$src2, VR128X:$src1, (i8 timm:$src4))),
11467            (VPTERNLOGDZ128rmbi VR128X:$src1, VR128X:$src2, addr:$src3,
11468                                (VPTERNLOG321_imm8 timm:$src4))>;
11469  def : Pat<(v2i64 (X86vpternlog VR128X:$src1,
11470                                 (bitconvert (v4i32 (X86VBroadcastld32 addr:$src3))),
11471                                 VR128X:$src2, (i8 timm:$src4))),
11472            (VPTERNLOGDZ128rmbi VR128X:$src1, VR128X:$src2, addr:$src3,
11473                               (VPTERNLOG132_imm8 timm:$src4))>;
11474
11475  def : Pat<(v32i8 (X86vpternlog VR256X:$src1, VR256X:$src2, VR256X:$src3,
11476                                 (i8 timm:$src4))),
11477            (VPTERNLOGQZ256rri VR256X:$src1, VR256X:$src2, VR256X:$src3,
11478                               timm:$src4)>;
11479  def : Pat<(v32i8 (X86vpternlog VR256X:$src1, VR256X:$src2,
11480                                 (loadv32i8 addr:$src3), (i8 timm:$src4))),
11481            (VPTERNLOGQZ256rmi VR256X:$src1, VR256X:$src2, addr:$src3,
11482                               timm:$src4)>;
11483  def : Pat<(v32i8 (X86vpternlog (loadv32i8 addr:$src3), VR256X:$src2,
11484                                 VR256X:$src1, (i8 timm:$src4))),
11485            (VPTERNLOGQZ256rmi VR256X:$src1, VR256X:$src2, addr:$src3,
11486                               (VPTERNLOG321_imm8 timm:$src4))>;
11487  def : Pat<(v32i8 (X86vpternlog VR256X:$src1, (loadv32i8 addr:$src3),
11488                                 VR256X:$src2, (i8 timm:$src4))),
11489            (VPTERNLOGQZ256rmi VR256X:$src1, VR256X:$src2, addr:$src3,
11490                               (VPTERNLOG132_imm8 timm:$src4))>;
11491
11492  def : Pat<(v32i8 (X86vpternlog VR256X:$src1, VR256X:$src2,
11493                                 (bitconvert (v8i32 (X86VBroadcastld32 addr:$src3))),
11494                                 (i8 timm:$src4))),
11495            (VPTERNLOGDZ256rmbi VR256X:$src1, VR256X:$src2, addr:$src3,
11496                                timm:$src4)>;
11497  def : Pat<(v32i8 (X86vpternlog (bitconvert (v8i32 (X86VBroadcastld32 addr:$src3))),
11498                                 VR256X:$src2, VR256X:$src1, (i8 timm:$src4))),
11499            (VPTERNLOGDZ256rmbi VR256X:$src1, VR256X:$src2, addr:$src3,
11500                                (VPTERNLOG321_imm8 timm:$src4))>;
11501  def : Pat<(v32i8 (X86vpternlog VR256X:$src1,
11502                                 (bitconvert (v8i32 (X86VBroadcastld32 addr:$src3))),
11503                                 VR256X:$src2, (i8 timm:$src4))),
11504            (VPTERNLOGDZ256rmbi VR256X:$src1, VR256X:$src2, addr:$src3,
11505                               (VPTERNLOG132_imm8 timm:$src4))>;
11506
11507  def : Pat<(v32i8 (X86vpternlog VR256X:$src1, VR256X:$src2,
11508                                 (bitconvert (v4i64 (X86VBroadcastld64 addr:$src3))),
11509                                 (i8 timm:$src4))),
11510            (VPTERNLOGQZ256rmbi VR256X:$src1, VR256X:$src2, addr:$src3,
11511                                timm:$src4)>;
11512  def : Pat<(v32i8 (X86vpternlog (bitconvert (v4i64 (X86VBroadcastld64 addr:$src3))),
11513                                 VR256X:$src2, VR256X:$src1, (i8 timm:$src4))),
11514            (VPTERNLOGQZ256rmbi VR256X:$src1, VR256X:$src2, addr:$src3,
11515                                (VPTERNLOG321_imm8 timm:$src4))>;
11516  def : Pat<(v32i8 (X86vpternlog VR256X:$src1,
11517                                 (bitconvert (v4i64 (X86VBroadcastld64 addr:$src3))),
11518                                 VR256X:$src2, (i8 timm:$src4))),
11519            (VPTERNLOGQZ256rmbi VR256X:$src1, VR256X:$src2, addr:$src3,
11520                                (VPTERNLOG132_imm8 timm:$src4))>;
11521
11522  def : Pat<(v16i16 (X86vpternlog VR256X:$src1, VR256X:$src2, VR256X:$src3,
11523                                  (i8 timm:$src4))),
11524            (VPTERNLOGQZ256rri VR256X:$src1, VR256X:$src2, VR256X:$src3,
11525                               timm:$src4)>;
11526  def : Pat<(v16i16 (X86vpternlog VR256X:$src1, VR256X:$src2,
11527                                  (loadv16i16 addr:$src3), (i8 timm:$src4))),
11528            (VPTERNLOGQZ256rmi VR256X:$src1, VR256X:$src2, addr:$src3,
11529                               timm:$src4)>;
11530  def : Pat<(v16i16 (X86vpternlog (loadv16i16 addr:$src3), VR256X:$src2,
11531                                  VR256X:$src1, (i8 timm:$src4))),
11532            (VPTERNLOGQZ256rmi VR256X:$src1, VR256X:$src2, addr:$src3,
11533                               (VPTERNLOG321_imm8 timm:$src4))>;
11534  def : Pat<(v16i16 (X86vpternlog VR256X:$src1, (loadv16i16 addr:$src3),
11535                                  VR256X:$src2, (i8 timm:$src4))),
11536            (VPTERNLOGQZ256rmi VR256X:$src1, VR256X:$src2, addr:$src3,
11537                               (VPTERNLOG132_imm8 timm:$src4))>;
11538
11539  def : Pat<(v16i16 (X86vpternlog VR256X:$src1, VR256X:$src2,
11540                                  (bitconvert (v8i32 (X86VBroadcastld32 addr:$src3))),
11541                                  (i8 timm:$src4))),
11542            (VPTERNLOGDZ256rmbi VR256X:$src1, VR256X:$src2, addr:$src3,
11543                                timm:$src4)>;
11544  def : Pat<(v16i16 (X86vpternlog (bitconvert (v8i32 (X86VBroadcastld32 addr:$src3))),
11545                                 VR256X:$src2, VR256X:$src1, (i8 timm:$src4))),
11546            (VPTERNLOGDZ256rmbi VR256X:$src1, VR256X:$src2, addr:$src3,
11547                                (VPTERNLOG321_imm8 timm:$src4))>;
11548  def : Pat<(v16i16 (X86vpternlog VR256X:$src1,
11549                                  (bitconvert (v8i32 (X86VBroadcastld32 addr:$src3))),
11550                                  VR256X:$src2, (i8 timm:$src4))),
11551            (VPTERNLOGDZ256rmbi VR256X:$src1, VR256X:$src2, addr:$src3,
11552                               (VPTERNLOG132_imm8 timm:$src4))>;
11553
11554  def : Pat<(v16i16 (X86vpternlog VR256X:$src1, VR256X:$src2,
11555                                  (bitconvert (v4i64 (X86VBroadcastld64 addr:$src3))),
11556                                  (i8 timm:$src4))),
11557            (VPTERNLOGQZ256rmbi VR256X:$src1, VR256X:$src2, addr:$src3,
11558                                timm:$src4)>;
11559  def : Pat<(v16i16 (X86vpternlog (bitconvert (v4i64 (X86VBroadcastld64 addr:$src3))),
11560                                  VR256X:$src2, VR256X:$src1, (i8 timm:$src4))),
11561            (VPTERNLOGQZ256rmbi VR256X:$src1, VR256X:$src2, addr:$src3,
11562                                (VPTERNLOG321_imm8 timm:$src4))>;
11563  def : Pat<(v16i16 (X86vpternlog VR256X:$src1,
11564                                  (bitconvert (v4i64 (X86VBroadcastld64 addr:$src3))),
11565                                  VR256X:$src2, (i8 timm:$src4))),
11566            (VPTERNLOGQZ256rmbi VR256X:$src1, VR256X:$src2, addr:$src3,
11567                                (VPTERNLOG132_imm8 timm:$src4))>;
11568
11569  def : Pat<(v8i32 (X86vpternlog VR256X:$src1, VR256X:$src2,
11570                                 (bitconvert (v4i64 (X86VBroadcastld64 addr:$src3))),
11571                                 (i8 timm:$src4))),
11572            (VPTERNLOGQZ256rmbi VR256X:$src1, VR256X:$src2, addr:$src3,
11573                                timm:$src4)>;
11574  def : Pat<(v8i32 (X86vpternlog (bitconvert (v4i64 (X86VBroadcastld64 addr:$src3))),
11575                                  VR256X:$src2, VR256X:$src1, (i8 timm:$src4))),
11576            (VPTERNLOGQZ256rmbi VR256X:$src1, VR256X:$src2, addr:$src3,
11577                                (VPTERNLOG321_imm8 timm:$src4))>;
11578  def : Pat<(v8i32 (X86vpternlog VR256X:$src1,
11579                                 (bitconvert (v4i64 (X86VBroadcastld64 addr:$src3))),
11580                                 VR256X:$src2, (i8 timm:$src4))),
11581            (VPTERNLOGQZ256rmbi VR256X:$src1, VR256X:$src2, addr:$src3,
11582                                (VPTERNLOG132_imm8 timm:$src4))>;
11583
11584  def : Pat<(v4i64 (X86vpternlog VR256X:$src1, VR256X:$src2,
11585                                 (bitconvert (v8i32 (X86VBroadcastld32 addr:$src3))),
11586                                 (i8 timm:$src4))),
11587            (VPTERNLOGDZ256rmbi VR256X:$src1, VR256X:$src2, addr:$src3,
11588                                timm:$src4)>;
11589  def : Pat<(v4i64 (X86vpternlog (bitconvert (v8i32 (X86VBroadcastld32 addr:$src3))),
11590                                 VR256X:$src2, VR256X:$src1, (i8 timm:$src4))),
11591            (VPTERNLOGDZ256rmbi VR256X:$src1, VR256X:$src2, addr:$src3,
11592                                (VPTERNLOG321_imm8 timm:$src4))>;
11593  def : Pat<(v4i64 (X86vpternlog VR256X:$src1,
11594                                 (bitconvert (v8i32 (X86VBroadcastld32 addr:$src3))),
11595                                 VR256X:$src2, (i8 timm:$src4))),
11596            (VPTERNLOGDZ256rmbi VR256X:$src1, VR256X:$src2, addr:$src3,
11597                               (VPTERNLOG132_imm8 timm:$src4))>;
11598}
11599
11600let Predicates = [HasAVX512] in {
11601  def : Pat<(v64i8 (X86vpternlog VR512:$src1, VR512:$src2, VR512:$src3,
11602                                 (i8 timm:$src4))),
11603            (VPTERNLOGQZrri VR512:$src1, VR512:$src2, VR512:$src3,
11604                            timm:$src4)>;
11605  def : Pat<(v64i8 (X86vpternlog VR512:$src1, VR512:$src2,
11606                                 (loadv64i8 addr:$src3), (i8 timm:$src4))),
11607            (VPTERNLOGQZrmi VR512:$src1, VR512:$src2, addr:$src3,
11608                            timm:$src4)>;
11609  def : Pat<(v64i8 (X86vpternlog (loadv64i8 addr:$src3), VR512:$src2,
11610                                  VR512:$src1, (i8 timm:$src4))),
11611            (VPTERNLOGQZrmi VR512:$src1, VR512:$src2, addr:$src3,
11612                            (VPTERNLOG321_imm8 timm:$src4))>;
11613  def : Pat<(v64i8 (X86vpternlog VR512:$src1, (loadv64i8 addr:$src3),
11614                                 VR512:$src2, (i8 timm:$src4))),
11615            (VPTERNLOGQZrmi VR512:$src1, VR512:$src2, addr:$src3,
11616                            (VPTERNLOG132_imm8 timm:$src4))>;
11617
11618  def : Pat<(v64i8 (X86vpternlog VR512:$src1, VR512:$src2,
11619                                 (bitconvert (v16i32 (X86VBroadcastld32 addr:$src3))),
11620                                 (i8 timm:$src4))),
11621            (VPTERNLOGDZrmbi VR512:$src1, VR512:$src2, addr:$src3,
11622                             timm:$src4)>;
11623  def : Pat<(v64i8 (X86vpternlog (bitconvert (v16i32 (X86VBroadcastld32 addr:$src3))),
11624                                 VR512:$src2, VR512:$src1, (i8 timm:$src4))),
11625            (VPTERNLOGDZrmbi VR512:$src1, VR512:$src2, addr:$src3,
11626                             (VPTERNLOG321_imm8 timm:$src4))>;
11627  def : Pat<(v64i8 (X86vpternlog VR512:$src1,
11628                                 (bitconvert (v16i32 (X86VBroadcastld32 addr:$src3))),
11629                                 VR512:$src2, (i8 timm:$src4))),
11630            (VPTERNLOGDZrmbi VR512:$src1, VR512:$src2, addr:$src3,
11631                             (VPTERNLOG132_imm8 timm:$src4))>;
11632
11633  def : Pat<(v64i8 (X86vpternlog VR512:$src1, VR512:$src2,
11634                                 (bitconvert (v8i64 (X86VBroadcastld64 addr:$src3))),
11635                                 (i8 timm:$src4))),
11636            (VPTERNLOGQZrmbi VR512:$src1, VR512:$src2, addr:$src3,
11637                                timm:$src4)>;
11638  def : Pat<(v64i8 (X86vpternlog (bitconvert (v8i64 (X86VBroadcastld64 addr:$src3))),
11639                                 VR512:$src2, VR512:$src1, (i8 timm:$src4))),
11640            (VPTERNLOGQZrmbi VR512:$src1, VR512:$src2, addr:$src3,
11641                                (VPTERNLOG321_imm8 timm:$src4))>;
11642  def : Pat<(v64i8 (X86vpternlog VR512:$src1,
11643                                 (bitconvert (v8i64 (X86VBroadcastld64 addr:$src3))),
11644                                 VR512:$src2, (i8 timm:$src4))),
11645            (VPTERNLOGQZrmbi VR512:$src1, VR512:$src2, addr:$src3,
11646                                (VPTERNLOG132_imm8 timm:$src4))>;
11647
11648  def : Pat<(v32i16 (X86vpternlog VR512:$src1, VR512:$src2, VR512:$src3,
11649                                  (i8 timm:$src4))),
11650            (VPTERNLOGQZrri VR512:$src1, VR512:$src2, VR512:$src3,
11651                            timm:$src4)>;
11652  def : Pat<(v32i16 (X86vpternlog VR512:$src1, VR512:$src2,
11653                                  (loadv32i16 addr:$src3), (i8 timm:$src4))),
11654            (VPTERNLOGQZrmi VR512:$src1, VR512:$src2, addr:$src3,
11655                            timm:$src4)>;
11656  def : Pat<(v32i16 (X86vpternlog (loadv32i16 addr:$src3), VR512:$src2,
11657                                  VR512:$src1, (i8 timm:$src4))),
11658            (VPTERNLOGQZrmi VR512:$src1, VR512:$src2, addr:$src3,
11659                            (VPTERNLOG321_imm8 timm:$src4))>;
11660  def : Pat<(v32i16 (X86vpternlog VR512:$src1, (loadv32i16 addr:$src3),
11661                                  VR512:$src2, (i8 timm:$src4))),
11662            (VPTERNLOGQZrmi VR512:$src1, VR512:$src2, addr:$src3,
11663                            (VPTERNLOG132_imm8 timm:$src4))>;
11664
11665  def : Pat<(v32i16 (X86vpternlog VR512:$src1, VR512:$src2,
11666                                  (bitconvert (v16i32 (X86VBroadcastld32 addr:$src3))),
11667                                  (i8 timm:$src4))),
11668            (VPTERNLOGDZrmbi VR512:$src1, VR512:$src2, addr:$src3,
11669                             timm:$src4)>;
11670  def : Pat<(v32i16 (X86vpternlog (bitconvert (v16i32 (X86VBroadcastld32 addr:$src3))),
11671                                 VR512:$src2, VR512:$src1, (i8 timm:$src4))),
11672            (VPTERNLOGDZrmbi VR512:$src1, VR512:$src2, addr:$src3,
11673                                (VPTERNLOG321_imm8 timm:$src4))>;
11674  def : Pat<(v32i16 (X86vpternlog VR512:$src1,
11675                                  (bitconvert (v16i32 (X86VBroadcastld32 addr:$src3))),
11676                                  VR512:$src2, (i8 timm:$src4))),
11677            (VPTERNLOGDZrmbi VR512:$src1, VR512:$src2, addr:$src3,
11678                               (VPTERNLOG132_imm8 timm:$src4))>;
11679
11680  def : Pat<(v32i16 (X86vpternlog VR512:$src1, VR512:$src2,
11681                                  (bitconvert (v8i64 (X86VBroadcastld64 addr:$src3))),
11682                                  (i8 timm:$src4))),
11683            (VPTERNLOGQZrmbi VR512:$src1, VR512:$src2, addr:$src3,
11684                             timm:$src4)>;
11685  def : Pat<(v32i16 (X86vpternlog (bitconvert (v8i64 (X86VBroadcastld64 addr:$src3))),
11686                                  VR512:$src2, VR512:$src1, (i8 timm:$src4))),
11687            (VPTERNLOGQZrmbi VR512:$src1, VR512:$src2, addr:$src3,
11688                             (VPTERNLOG321_imm8 timm:$src4))>;
11689  def : Pat<(v32i16 (X86vpternlog VR512:$src1,
11690                                  (bitconvert (v8i64 (X86VBroadcastld64 addr:$src3))),
11691                                  VR512:$src2, (i8 timm:$src4))),
11692            (VPTERNLOGQZrmbi VR512:$src1, VR512:$src2, addr:$src3,
11693                             (VPTERNLOG132_imm8 timm:$src4))>;
11694
11695  def : Pat<(v32i16 (X86vpternlog VR512:$src1, VR512:$src2,
11696                                  (bitconvert (v16i32 (X86VBroadcastld32 addr:$src3))),
11697                                  (i8 timm:$src4))),
11698            (VPTERNLOGDZrmbi VR512:$src1, VR512:$src2, addr:$src3,
11699                             timm:$src4)>;
11700  def : Pat<(v32i16 (X86vpternlog (bitconvert (v16i32 (X86VBroadcastld32 addr:$src3))),
11701                                 VR512:$src2, VR512:$src1, (i8 timm:$src4))),
11702            (VPTERNLOGDZrmbi VR512:$src1, VR512:$src2, addr:$src3,
11703                                (VPTERNLOG321_imm8 timm:$src4))>;
11704  def : Pat<(v32i16 (X86vpternlog VR512:$src1,
11705                                  (bitconvert (v16i32 (X86VBroadcastld32 addr:$src3))),
11706                                  VR512:$src2, (i8 timm:$src4))),
11707            (VPTERNLOGDZrmbi VR512:$src1, VR512:$src2, addr:$src3,
11708                               (VPTERNLOG132_imm8 timm:$src4))>;
11709
11710  def : Pat<(v16i32 (X86vpternlog VR512:$src1, VR512:$src2,
11711                                  (bitconvert (v8i64 (X86VBroadcastld64 addr:$src3))),
11712                                  (i8 timm:$src4))),
11713            (VPTERNLOGQZrmbi VR512:$src1, VR512:$src2, addr:$src3,
11714                             timm:$src4)>;
11715  def : Pat<(v16i32 (X86vpternlog (bitconvert (v8i64 (X86VBroadcastld64 addr:$src3))),
11716                                  VR512:$src2, VR512:$src1, (i8 timm:$src4))),
11717            (VPTERNLOGQZrmbi VR512:$src1, VR512:$src2, addr:$src3,
11718                             (VPTERNLOG321_imm8 timm:$src4))>;
11719  def : Pat<(v16i32 (X86vpternlog VR512:$src1,
11720                                  (bitconvert (v8i64 (X86VBroadcastld64 addr:$src3))),
11721                                  VR512:$src2, (i8 timm:$src4))),
11722            (VPTERNLOGQZrmbi VR512:$src1, VR512:$src2, addr:$src3,
11723                             (VPTERNLOG132_imm8 timm:$src4))>;
11724
11725  def : Pat<(v8i64 (X86vpternlog VR512:$src1, VR512:$src2,
11726                                  (bitconvert (v16i32 (X86VBroadcastld32 addr:$src3))),
11727                                  (i8 timm:$src4))),
11728            (VPTERNLOGDZrmbi VR512:$src1, VR512:$src2, addr:$src3,
11729                             timm:$src4)>;
11730  def : Pat<(v8i64 (X86vpternlog (bitconvert (v16i32 (X86VBroadcastld32 addr:$src3))),
11731                                 VR512:$src2, VR512:$src1, (i8 timm:$src4))),
11732            (VPTERNLOGDZrmbi VR512:$src1, VR512:$src2, addr:$src3,
11733                                (VPTERNLOG321_imm8 timm:$src4))>;
11734  def : Pat<(v8i64 (X86vpternlog VR512:$src1,
11735                                  (bitconvert (v16i32 (X86VBroadcastld32 addr:$src3))),
11736                                  VR512:$src2, (i8 timm:$src4))),
11737            (VPTERNLOGDZrmbi VR512:$src1, VR512:$src2, addr:$src3,
11738                               (VPTERNLOG132_imm8 timm:$src4))>;
11739}
11740
11741// Patterns to implement vnot using vpternlog instead of creating all ones
11742// using pcmpeq or vpternlog and then xoring with that. The value 15 is chosen
11743// so that the result is only dependent on src0. But we use the same source
11744// for all operands to prevent a false dependency.
11745// TODO: We should maybe have a more generalized algorithm for folding to
11746// vpternlog.
11747let Predicates = [HasAVX512] in {
11748  def : Pat<(xor VR512:$src, (v64i8 immAllOnesV)),
11749            (VPTERNLOGQZrri VR512:$src, VR512:$src, VR512:$src, (i8 15))>;
11750  def : Pat<(xor VR512:$src, (v32i16 immAllOnesV)),
11751            (VPTERNLOGQZrri VR512:$src, VR512:$src, VR512:$src, (i8 15))>;
11752  def : Pat<(xor VR512:$src, (v16i32 immAllOnesV)),
11753            (VPTERNLOGQZrri VR512:$src, VR512:$src, VR512:$src, (i8 15))>;
11754  def : Pat<(xor VR512:$src, (v8i64 immAllOnesV)),
11755            (VPTERNLOGQZrri VR512:$src, VR512:$src, VR512:$src, (i8 15))>;
11756}
11757
11758let Predicates = [HasAVX512, NoVLX] in {
11759  def : Pat<(xor VR128X:$src, (v16i8 immAllOnesV)),
11760            (EXTRACT_SUBREG
11761             (VPTERNLOGQZrri
11762              (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
11763              (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
11764              (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
11765              (i8 15)), sub_xmm)>;
11766  def : Pat<(xor VR128X:$src, (v8i16 immAllOnesV)),
11767            (EXTRACT_SUBREG
11768             (VPTERNLOGQZrri
11769              (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
11770              (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
11771              (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
11772              (i8 15)), sub_xmm)>;
11773  def : Pat<(xor VR128X:$src, (v4i32 immAllOnesV)),
11774            (EXTRACT_SUBREG
11775             (VPTERNLOGQZrri
11776              (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
11777              (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
11778              (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
11779              (i8 15)), sub_xmm)>;
11780  def : Pat<(xor VR128X:$src, (v2i64 immAllOnesV)),
11781            (EXTRACT_SUBREG
11782             (VPTERNLOGQZrri
11783              (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
11784              (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
11785              (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
11786              (i8 15)), sub_xmm)>;
11787
11788  def : Pat<(xor VR256X:$src, (v32i8 immAllOnesV)),
11789            (EXTRACT_SUBREG
11790             (VPTERNLOGQZrri
11791              (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src, sub_ymm),
11792              (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src, sub_ymm),
11793              (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src, sub_ymm),
11794              (i8 15)), sub_ymm)>;
11795  def : Pat<(xor VR256X:$src, (v16i16 immAllOnesV)),
11796            (EXTRACT_SUBREG
11797             (VPTERNLOGQZrri
11798              (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src, sub_ymm),
11799              (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src, sub_ymm),
11800              (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src, sub_ymm),
11801              (i8 15)), sub_ymm)>;
11802  def : Pat<(xor VR256X:$src, (v8i32 immAllOnesV)),
11803            (EXTRACT_SUBREG
11804             (VPTERNLOGQZrri
11805              (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src, sub_ymm),
11806              (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src, sub_ymm),
11807              (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src, sub_ymm),
11808              (i8 15)), sub_ymm)>;
11809  def : Pat<(xor VR256X:$src, (v4i64 immAllOnesV)),
11810            (EXTRACT_SUBREG
11811             (VPTERNLOGQZrri
11812              (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src, sub_ymm),
11813              (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src, sub_ymm),
11814              (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src, sub_ymm),
11815              (i8 15)), sub_ymm)>;
11816}
11817
11818let Predicates = [HasVLX] in {
11819  def : Pat<(xor VR128X:$src, (v16i8 immAllOnesV)),
11820            (VPTERNLOGQZ128rri VR128X:$src, VR128X:$src, VR128X:$src, (i8 15))>;
11821  def : Pat<(xor VR128X:$src, (v8i16 immAllOnesV)),
11822            (VPTERNLOGQZ128rri VR128X:$src, VR128X:$src, VR128X:$src, (i8 15))>;
11823  def : Pat<(xor VR128X:$src, (v4i32 immAllOnesV)),
11824            (VPTERNLOGQZ128rri VR128X:$src, VR128X:$src, VR128X:$src, (i8 15))>;
11825  def : Pat<(xor VR128X:$src, (v2i64 immAllOnesV)),
11826            (VPTERNLOGQZ128rri VR128X:$src, VR128X:$src, VR128X:$src, (i8 15))>;
11827
11828  def : Pat<(xor VR256X:$src, (v32i8 immAllOnesV)),
11829            (VPTERNLOGQZ256rri VR256X:$src, VR256X:$src, VR256X:$src, (i8 15))>;
11830  def : Pat<(xor VR256X:$src, (v16i16 immAllOnesV)),
11831            (VPTERNLOGQZ256rri VR256X:$src, VR256X:$src, VR256X:$src, (i8 15))>;
11832  def : Pat<(xor VR256X:$src, (v8i32 immAllOnesV)),
11833            (VPTERNLOGQZ256rri VR256X:$src, VR256X:$src, VR256X:$src, (i8 15))>;
11834  def : Pat<(xor VR256X:$src, (v4i64 immAllOnesV)),
11835            (VPTERNLOGQZ256rri VR256X:$src, VR256X:$src, VR256X:$src, (i8 15))>;
11836}
11837
11838//===----------------------------------------------------------------------===//
11839// AVX-512 - FixupImm
11840//===----------------------------------------------------------------------===//
11841
11842multiclass avx512_fixupimm_packed<bits<8> opc, string OpcodeStr,
11843                                  X86FoldableSchedWrite sched, X86VectorVTInfo _,
11844                                  X86VectorVTInfo TblVT>{
11845  let Constraints = "$src1 = $dst", ExeDomain = _.ExeDomain,
11846      Uses = [MXCSR], mayRaiseFPException = 1 in {
11847    defm rri : AVX512_maskable_3src<opc, MRMSrcReg, _, (outs _.RC:$dst),
11848                        (ins _.RC:$src2, _.RC:$src3, i32u8imm:$src4),
11849                         OpcodeStr#_.Suffix, "$src4, $src3, $src2", "$src2, $src3, $src4",
11850                        (X86VFixupimm (_.VT _.RC:$src1),
11851                                      (_.VT _.RC:$src2),
11852                                      (TblVT.VT _.RC:$src3),
11853                                      (i32 timm:$src4))>, Sched<[sched]>;
11854    defm rmi : AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst),
11855                      (ins _.RC:$src2, _.MemOp:$src3, i32u8imm:$src4),
11856                      OpcodeStr#_.Suffix, "$src4, $src3, $src2", "$src2, $src3, $src4",
11857                      (X86VFixupimm (_.VT _.RC:$src1),
11858                                    (_.VT _.RC:$src2),
11859                                    (TblVT.VT (bitconvert (TblVT.LdFrag addr:$src3))),
11860                                    (i32 timm:$src4))>,
11861                      Sched<[sched.Folded, sched.ReadAfterFold]>;
11862    defm rmbi : AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst),
11863                      (ins _.RC:$src2, _.ScalarMemOp:$src3, i32u8imm:$src4),
11864                    OpcodeStr#_.Suffix, "$src4, ${src3}"#_.BroadcastStr#", $src2",
11865                    "$src2, ${src3}"#_.BroadcastStr#", $src4",
11866                      (X86VFixupimm (_.VT _.RC:$src1),
11867                                    (_.VT _.RC:$src2),
11868                                    (TblVT.VT (TblVT.BroadcastLdFrag addr:$src3)),
11869                                    (i32 timm:$src4))>,
11870                    EVEX_B, Sched<[sched.Folded, sched.ReadAfterFold]>;
11871  } // Constraints = "$src1 = $dst"
11872}
11873
11874multiclass avx512_fixupimm_packed_sae<bits<8> opc, string OpcodeStr,
11875                                      X86FoldableSchedWrite sched,
11876                                      X86VectorVTInfo _, X86VectorVTInfo TblVT>
11877  : avx512_fixupimm_packed<opc, OpcodeStr, sched, _, TblVT> {
11878let Constraints = "$src1 = $dst", ExeDomain = _.ExeDomain, Uses = [MXCSR] in {
11879  defm rrib : AVX512_maskable_3src<opc, MRMSrcReg, _, (outs _.RC:$dst),
11880                      (ins _.RC:$src2, _.RC:$src3, i32u8imm:$src4),
11881                      OpcodeStr#_.Suffix, "$src4, {sae}, $src3, $src2",
11882                      "$src2, $src3, {sae}, $src4",
11883                      (X86VFixupimmSAE (_.VT _.RC:$src1),
11884                                       (_.VT _.RC:$src2),
11885                                       (TblVT.VT _.RC:$src3),
11886                                       (i32 timm:$src4))>,
11887                      EVEX_B, Sched<[sched]>;
11888  }
11889}
11890
11891multiclass avx512_fixupimm_scalar<bits<8> opc, string OpcodeStr,
11892                                  X86FoldableSchedWrite sched, X86VectorVTInfo _,
11893                                  X86VectorVTInfo _src3VT> {
11894  let Constraints = "$src1 = $dst" , Predicates = [HasAVX512],
11895      ExeDomain = _.ExeDomain in {
11896    defm rri : AVX512_maskable_3src_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
11897                      (ins _.RC:$src2, _.RC:$src3, i32u8imm:$src4),
11898                      OpcodeStr#_.Suffix, "$src4, $src3, $src2", "$src2, $src3, $src4",
11899                      (X86VFixupimms (_.VT _.RC:$src1),
11900                                     (_.VT _.RC:$src2),
11901                                     (_src3VT.VT _src3VT.RC:$src3),
11902                                     (i32 timm:$src4))>, Sched<[sched]>, SIMD_EXC;
11903    let Uses = [MXCSR] in
11904    defm rrib : AVX512_maskable_3src_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
11905                      (ins _.RC:$src2, _.RC:$src3, i32u8imm:$src4),
11906                      OpcodeStr#_.Suffix, "$src4, {sae}, $src3, $src2",
11907                      "$src2, $src3, {sae}, $src4",
11908                      (X86VFixupimmSAEs (_.VT _.RC:$src1),
11909                                        (_.VT _.RC:$src2),
11910                                        (_src3VT.VT _src3VT.RC:$src3),
11911                                        (i32 timm:$src4))>,
11912                      EVEX_B, Sched<[sched.Folded, sched.ReadAfterFold]>;
11913    defm rmi : AVX512_maskable_3src_scalar<opc, MRMSrcMem, _, (outs _.RC:$dst),
11914                     (ins _.RC:$src2, _.ScalarMemOp:$src3, i32u8imm:$src4),
11915                     OpcodeStr#_.Suffix, "$src4, $src3, $src2", "$src2, $src3, $src4",
11916                     (X86VFixupimms (_.VT _.RC:$src1),
11917                                    (_.VT _.RC:$src2),
11918                                    (_src3VT.VT (scalar_to_vector
11919                                              (_src3VT.ScalarLdFrag addr:$src3))),
11920                                    (i32 timm:$src4))>,
11921                     Sched<[sched.Folded, sched.ReadAfterFold]>, SIMD_EXC;
11922  }
11923}
11924
11925multiclass avx512_fixupimm_packed_all<X86SchedWriteWidths sched,
11926                                      AVX512VLVectorVTInfo _Vec,
11927                                      AVX512VLVectorVTInfo _Tbl> {
11928  let Predicates = [HasAVX512] in
11929    defm Z    : avx512_fixupimm_packed_sae<0x54, "vfixupimm", sched.ZMM,
11930                                _Vec.info512, _Tbl.info512>, AVX512AIi8Base,
11931                                EVEX_4V, EVEX_V512;
11932  let Predicates = [HasAVX512, HasVLX] in {
11933    defm Z128 : avx512_fixupimm_packed<0x54, "vfixupimm", sched.XMM,
11934                            _Vec.info128, _Tbl.info128>, AVX512AIi8Base,
11935                            EVEX_4V, EVEX_V128;
11936    defm Z256 : avx512_fixupimm_packed<0x54, "vfixupimm", sched.YMM,
11937                            _Vec.info256, _Tbl.info256>, AVX512AIi8Base,
11938                            EVEX_4V, EVEX_V256;
11939  }
11940}
11941
11942defm VFIXUPIMMSSZ : avx512_fixupimm_scalar<0x55, "vfixupimm",
11943                                           SchedWriteFAdd.Scl, f32x_info, v4i32x_info>,
11944                          AVX512AIi8Base, VEX_LIG, EVEX_4V, EVEX_CD8<32, CD8VT1>;
11945defm VFIXUPIMMSDZ : avx512_fixupimm_scalar<0x55, "vfixupimm",
11946                                           SchedWriteFAdd.Scl, f64x_info, v2i64x_info>,
11947                          AVX512AIi8Base, VEX_LIG, EVEX_4V, EVEX_CD8<64, CD8VT1>, VEX_W;
11948defm VFIXUPIMMPS : avx512_fixupimm_packed_all<SchedWriteFAdd, avx512vl_f32_info,
11949                         avx512vl_i32_info>, EVEX_CD8<32, CD8VF>;
11950defm VFIXUPIMMPD : avx512_fixupimm_packed_all<SchedWriteFAdd, avx512vl_f64_info,
11951                         avx512vl_i64_info>, EVEX_CD8<64, CD8VF>, VEX_W;
11952
11953// Patterns used to select SSE scalar fp arithmetic instructions from
11954// either:
11955//
11956// (1) a scalar fp operation followed by a blend
11957//
11958// The effect is that the backend no longer emits unnecessary vector
11959// insert instructions immediately after SSE scalar fp instructions
11960// like addss or mulss.
11961//
11962// For example, given the following code:
11963//   __m128 foo(__m128 A, __m128 B) {
11964//     A[0] += B[0];
11965//     return A;
11966//   }
11967//
11968// Previously we generated:
11969//   addss %xmm0, %xmm1
11970//   movss %xmm1, %xmm0
11971//
11972// We now generate:
11973//   addss %xmm1, %xmm0
11974//
11975// (2) a vector packed single/double fp operation followed by a vector insert
11976//
11977// The effect is that the backend converts the packed fp instruction
11978// followed by a vector insert into a single SSE scalar fp instruction.
11979//
11980// For example, given the following code:
11981//   __m128 foo(__m128 A, __m128 B) {
11982//     __m128 C = A + B;
11983//     return (__m128) {c[0], a[1], a[2], a[3]};
11984//   }
11985//
11986// Previously we generated:
11987//   addps %xmm0, %xmm1
11988//   movss %xmm1, %xmm0
11989//
11990// We now generate:
11991//   addss %xmm1, %xmm0
11992
11993// TODO: Some canonicalization in lowering would simplify the number of
11994// patterns we have to try to match.
11995multiclass AVX512_scalar_math_fp_patterns<SDNode Op, SDNode MaskedOp,
11996                                          string OpcPrefix, SDNode MoveNode,
11997                                          X86VectorVTInfo _, PatLeaf ZeroFP> {
11998  let Predicates = [HasAVX512] in {
11999    // extracted scalar math op with insert via movss
12000    def : Pat<(MoveNode
12001               (_.VT VR128X:$dst),
12002               (_.VT (scalar_to_vector
12003                      (Op (_.EltVT (extractelt (_.VT VR128X:$dst), (iPTR 0))),
12004                          _.FRC:$src)))),
12005              (!cast<Instruction>("V"#OpcPrefix#"Zrr_Int") _.VT:$dst,
12006               (_.VT (COPY_TO_REGCLASS _.FRC:$src, VR128X)))>;
12007    def : Pat<(MoveNode
12008               (_.VT VR128X:$dst),
12009               (_.VT (scalar_to_vector
12010                      (Op (_.EltVT (extractelt (_.VT VR128X:$dst), (iPTR 0))),
12011                          (_.ScalarLdFrag addr:$src))))),
12012              (!cast<Instruction>("V"#OpcPrefix#"Zrm_Int") _.VT:$dst, addr:$src)>;
12013
12014    // extracted masked scalar math op with insert via movss
12015    def : Pat<(MoveNode (_.VT VR128X:$src1),
12016               (scalar_to_vector
12017                (X86selects_mask VK1WM:$mask,
12018                            (MaskedOp (_.EltVT
12019                                       (extractelt (_.VT VR128X:$src1), (iPTR 0))),
12020                                      _.FRC:$src2),
12021                            _.FRC:$src0))),
12022              (!cast<Instruction>("V"#OpcPrefix#"Zrr_Intk")
12023               (_.VT (COPY_TO_REGCLASS _.FRC:$src0, VR128X)),
12024               VK1WM:$mask, _.VT:$src1,
12025               (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)))>;
12026    def : Pat<(MoveNode (_.VT VR128X:$src1),
12027               (scalar_to_vector
12028                (X86selects_mask VK1WM:$mask,
12029                            (MaskedOp (_.EltVT
12030                                       (extractelt (_.VT VR128X:$src1), (iPTR 0))),
12031                                      (_.ScalarLdFrag addr:$src2)),
12032                            _.FRC:$src0))),
12033              (!cast<Instruction>("V"#OpcPrefix#"Zrm_Intk")
12034               (_.VT (COPY_TO_REGCLASS _.FRC:$src0, VR128X)),
12035               VK1WM:$mask, _.VT:$src1, addr:$src2)>;
12036
12037    // extracted masked scalar math op with insert via movss
12038    def : Pat<(MoveNode (_.VT VR128X:$src1),
12039               (scalar_to_vector
12040                (X86selects_mask VK1WM:$mask,
12041                            (MaskedOp (_.EltVT
12042                                       (extractelt (_.VT VR128X:$src1), (iPTR 0))),
12043                                      _.FRC:$src2), (_.EltVT ZeroFP)))),
12044      (!cast<I>("V"#OpcPrefix#"Zrr_Intkz")
12045          VK1WM:$mask, _.VT:$src1,
12046          (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)))>;
12047    def : Pat<(MoveNode (_.VT VR128X:$src1),
12048               (scalar_to_vector
12049                (X86selects_mask VK1WM:$mask,
12050                            (MaskedOp (_.EltVT
12051                                       (extractelt (_.VT VR128X:$src1), (iPTR 0))),
12052                                      (_.ScalarLdFrag addr:$src2)), (_.EltVT ZeroFP)))),
12053      (!cast<I>("V"#OpcPrefix#"Zrm_Intkz") VK1WM:$mask, _.VT:$src1, addr:$src2)>;
12054  }
12055}
12056
12057defm : AVX512_scalar_math_fp_patterns<any_fadd, fadd, "ADDSS", X86Movss, v4f32x_info, fp32imm0>;
12058defm : AVX512_scalar_math_fp_patterns<any_fsub, fsub, "SUBSS", X86Movss, v4f32x_info, fp32imm0>;
12059defm : AVX512_scalar_math_fp_patterns<any_fmul, fmul, "MULSS", X86Movss, v4f32x_info, fp32imm0>;
12060defm : AVX512_scalar_math_fp_patterns<any_fdiv, fdiv, "DIVSS", X86Movss, v4f32x_info, fp32imm0>;
12061
12062defm : AVX512_scalar_math_fp_patterns<any_fadd, fadd, "ADDSD", X86Movsd, v2f64x_info, fp64imm0>;
12063defm : AVX512_scalar_math_fp_patterns<any_fsub, fsub, "SUBSD", X86Movsd, v2f64x_info, fp64imm0>;
12064defm : AVX512_scalar_math_fp_patterns<any_fmul, fmul, "MULSD", X86Movsd, v2f64x_info, fp64imm0>;
12065defm : AVX512_scalar_math_fp_patterns<any_fdiv, fdiv, "DIVSD", X86Movsd, v2f64x_info, fp64imm0>;
12066
12067multiclass AVX512_scalar_unary_math_patterns<SDNode OpNode, string OpcPrefix,
12068                                             SDNode Move, X86VectorVTInfo _> {
12069  let Predicates = [HasAVX512] in {
12070    def : Pat<(_.VT (Move _.VT:$dst,
12071                     (scalar_to_vector (OpNode (extractelt _.VT:$src, 0))))),
12072              (!cast<Instruction>("V"#OpcPrefix#"Zr_Int") _.VT:$dst, _.VT:$src)>;
12073  }
12074}
12075
12076defm : AVX512_scalar_unary_math_patterns<any_fsqrt, "SQRTSS", X86Movss, v4f32x_info>;
12077defm : AVX512_scalar_unary_math_patterns<any_fsqrt, "SQRTSD", X86Movsd, v2f64x_info>;
12078
12079//===----------------------------------------------------------------------===//
12080// AES instructions
12081//===----------------------------------------------------------------------===//
12082
12083multiclass avx512_vaes<bits<8> Op, string OpStr, string IntPrefix> {
12084  let Predicates = [HasVLX, HasVAES] in {
12085    defm Z128 : AESI_binop_rm_int<Op, OpStr,
12086                                  !cast<Intrinsic>(IntPrefix),
12087                                  loadv2i64, 0, VR128X, i128mem>,
12088                  EVEX_4V, EVEX_CD8<64, CD8VF>, EVEX_V128, VEX_WIG;
12089    defm Z256 : AESI_binop_rm_int<Op, OpStr,
12090                                  !cast<Intrinsic>(IntPrefix#"_256"),
12091                                  loadv4i64, 0, VR256X, i256mem>,
12092                  EVEX_4V, EVEX_CD8<64, CD8VF>, EVEX_V256, VEX_WIG;
12093    }
12094    let Predicates = [HasAVX512, HasVAES] in
12095    defm Z    : AESI_binop_rm_int<Op, OpStr,
12096                                  !cast<Intrinsic>(IntPrefix#"_512"),
12097                                  loadv8i64, 0, VR512, i512mem>,
12098                  EVEX_4V, EVEX_CD8<64, CD8VF>, EVEX_V512, VEX_WIG;
12099}
12100
12101defm VAESENC      : avx512_vaes<0xDC, "vaesenc", "int_x86_aesni_aesenc">;
12102defm VAESENCLAST  : avx512_vaes<0xDD, "vaesenclast", "int_x86_aesni_aesenclast">;
12103defm VAESDEC      : avx512_vaes<0xDE, "vaesdec", "int_x86_aesni_aesdec">;
12104defm VAESDECLAST  : avx512_vaes<0xDF, "vaesdeclast", "int_x86_aesni_aesdeclast">;
12105
12106//===----------------------------------------------------------------------===//
12107// PCLMUL instructions - Carry less multiplication
12108//===----------------------------------------------------------------------===//
12109
12110let Predicates = [HasAVX512, HasVPCLMULQDQ] in
12111defm VPCLMULQDQZ : vpclmulqdq<VR512, i512mem, loadv8i64, int_x86_pclmulqdq_512>,
12112                              EVEX_4V, EVEX_V512, EVEX_CD8<64, CD8VF>, VEX_WIG;
12113
12114let Predicates = [HasVLX, HasVPCLMULQDQ] in {
12115defm VPCLMULQDQZ128 : vpclmulqdq<VR128X, i128mem, loadv2i64, int_x86_pclmulqdq>,
12116                              EVEX_4V, EVEX_V128, EVEX_CD8<64, CD8VF>, VEX_WIG;
12117
12118defm VPCLMULQDQZ256: vpclmulqdq<VR256X, i256mem, loadv4i64,
12119                                int_x86_pclmulqdq_256>, EVEX_4V, EVEX_V256,
12120                                EVEX_CD8<64, CD8VF>, VEX_WIG;
12121}
12122
12123// Aliases
12124defm : vpclmulqdq_aliases<"VPCLMULQDQZ", VR512, i512mem>;
12125defm : vpclmulqdq_aliases<"VPCLMULQDQZ128", VR128X, i128mem>;
12126defm : vpclmulqdq_aliases<"VPCLMULQDQZ256", VR256X, i256mem>;
12127
12128//===----------------------------------------------------------------------===//
12129// VBMI2
12130//===----------------------------------------------------------------------===//
12131
12132multiclass VBMI2_shift_var_rm<bits<8> Op, string OpStr, SDNode OpNode,
12133                              X86FoldableSchedWrite sched, X86VectorVTInfo VTI> {
12134  let Constraints = "$src1 = $dst",
12135      ExeDomain   = VTI.ExeDomain in {
12136    defm r:   AVX512_maskable_3src<Op, MRMSrcReg, VTI, (outs VTI.RC:$dst),
12137                (ins VTI.RC:$src2, VTI.RC:$src3), OpStr,
12138                "$src3, $src2", "$src2, $src3",
12139                (VTI.VT (OpNode VTI.RC:$src1, VTI.RC:$src2, VTI.RC:$src3))>,
12140                AVX512FMA3Base, Sched<[sched]>;
12141    defm m:   AVX512_maskable_3src<Op, MRMSrcMem, VTI, (outs VTI.RC:$dst),
12142                (ins VTI.RC:$src2, VTI.MemOp:$src3), OpStr,
12143                "$src3, $src2", "$src2, $src3",
12144                (VTI.VT (OpNode VTI.RC:$src1, VTI.RC:$src2,
12145                        (VTI.VT (VTI.LdFrag addr:$src3))))>,
12146                AVX512FMA3Base,
12147                Sched<[sched.Folded, sched.ReadAfterFold]>;
12148  }
12149}
12150
12151multiclass VBMI2_shift_var_rmb<bits<8> Op, string OpStr, SDNode OpNode,
12152                               X86FoldableSchedWrite sched, X86VectorVTInfo VTI>
12153         : VBMI2_shift_var_rm<Op, OpStr, OpNode, sched, VTI> {
12154  let Constraints = "$src1 = $dst",
12155      ExeDomain   = VTI.ExeDomain in
12156  defm mb:  AVX512_maskable_3src<Op, MRMSrcMem, VTI, (outs VTI.RC:$dst),
12157              (ins VTI.RC:$src2, VTI.ScalarMemOp:$src3), OpStr,
12158              "${src3}"#VTI.BroadcastStr#", $src2",
12159              "$src2, ${src3}"#VTI.BroadcastStr,
12160              (OpNode VTI.RC:$src1, VTI.RC:$src2,
12161               (VTI.VT (VTI.BroadcastLdFrag addr:$src3)))>,
12162              AVX512FMA3Base, EVEX_B,
12163              Sched<[sched.Folded, sched.ReadAfterFold]>;
12164}
12165
12166multiclass VBMI2_shift_var_rm_common<bits<8> Op, string OpStr, SDNode OpNode,
12167                                     X86SchedWriteWidths sched, AVX512VLVectorVTInfo VTI> {
12168  let Predicates = [HasVBMI2] in
12169  defm Z      : VBMI2_shift_var_rm<Op, OpStr, OpNode, sched.ZMM, VTI.info512>,
12170                                   EVEX_V512;
12171  let Predicates = [HasVBMI2, HasVLX] in {
12172    defm Z256 : VBMI2_shift_var_rm<Op, OpStr, OpNode, sched.YMM, VTI.info256>,
12173                                   EVEX_V256;
12174    defm Z128 : VBMI2_shift_var_rm<Op, OpStr, OpNode, sched.XMM, VTI.info128>,
12175                                   EVEX_V128;
12176  }
12177}
12178
12179multiclass VBMI2_shift_var_rmb_common<bits<8> Op, string OpStr, SDNode OpNode,
12180                                      X86SchedWriteWidths sched, AVX512VLVectorVTInfo VTI> {
12181  let Predicates = [HasVBMI2] in
12182  defm Z      : VBMI2_shift_var_rmb<Op, OpStr, OpNode, sched.ZMM, VTI.info512>,
12183                                    EVEX_V512;
12184  let Predicates = [HasVBMI2, HasVLX] in {
12185    defm Z256 : VBMI2_shift_var_rmb<Op, OpStr, OpNode, sched.YMM, VTI.info256>,
12186                                    EVEX_V256;
12187    defm Z128 : VBMI2_shift_var_rmb<Op, OpStr, OpNode, sched.XMM, VTI.info128>,
12188                                    EVEX_V128;
12189  }
12190}
12191multiclass VBMI2_shift_var<bits<8> wOp, bits<8> dqOp, string Prefix,
12192                           SDNode OpNode, X86SchedWriteWidths sched> {
12193  defm W : VBMI2_shift_var_rm_common<wOp, Prefix#"w", OpNode, sched,
12194             avx512vl_i16_info>, VEX_W, EVEX_CD8<16, CD8VF>;
12195  defm D : VBMI2_shift_var_rmb_common<dqOp, Prefix#"d", OpNode, sched,
12196             avx512vl_i32_info>, EVEX_CD8<32, CD8VF>;
12197  defm Q : VBMI2_shift_var_rmb_common<dqOp, Prefix#"q", OpNode, sched,
12198             avx512vl_i64_info>, VEX_W, EVEX_CD8<64, CD8VF>;
12199}
12200
12201multiclass VBMI2_shift_imm<bits<8> wOp, bits<8> dqOp, string Prefix,
12202                           SDNode OpNode, X86SchedWriteWidths sched> {
12203  defm W : avx512_common_3Op_rm_imm8<wOp, OpNode, Prefix#"w", sched,
12204             avx512vl_i16_info, avx512vl_i16_info, HasVBMI2>,
12205             VEX_W, EVEX_CD8<16, CD8VF>;
12206  defm D : avx512_common_3Op_imm8<Prefix#"d", avx512vl_i32_info, dqOp,
12207             OpNode, sched, HasVBMI2>, AVX512AIi8Base, EVEX_4V, EVEX_CD8<32, CD8VF>;
12208  defm Q : avx512_common_3Op_imm8<Prefix#"q", avx512vl_i64_info, dqOp, OpNode,
12209             sched, HasVBMI2>, AVX512AIi8Base, EVEX_4V, EVEX_CD8<64, CD8VF>, VEX_W;
12210}
12211
12212// Concat & Shift
12213defm VPSHLDV : VBMI2_shift_var<0x70, 0x71, "vpshldv", X86VShldv, SchedWriteVecIMul>;
12214defm VPSHRDV : VBMI2_shift_var<0x72, 0x73, "vpshrdv", X86VShrdv, SchedWriteVecIMul>;
12215defm VPSHLD  : VBMI2_shift_imm<0x70, 0x71, "vpshld", X86VShld, SchedWriteVecIMul>;
12216defm VPSHRD  : VBMI2_shift_imm<0x72, 0x73, "vpshrd", X86VShrd, SchedWriteVecIMul>;
12217
12218// Compress
12219defm VPCOMPRESSB : compress_by_elt_width<0x63, "vpcompressb", WriteVarShuffle256,
12220                                         avx512vl_i8_info, HasVBMI2>, EVEX,
12221                                         NotMemoryFoldable;
12222defm VPCOMPRESSW : compress_by_elt_width <0x63, "vpcompressw", WriteVarShuffle256,
12223                                          avx512vl_i16_info, HasVBMI2>, EVEX, VEX_W,
12224                                          NotMemoryFoldable;
12225// Expand
12226defm VPEXPANDB : expand_by_elt_width <0x62, "vpexpandb", WriteVarShuffle256,
12227                                      avx512vl_i8_info, HasVBMI2>, EVEX;
12228defm VPEXPANDW : expand_by_elt_width <0x62, "vpexpandw", WriteVarShuffle256,
12229                                      avx512vl_i16_info, HasVBMI2>, EVEX, VEX_W;
12230
12231//===----------------------------------------------------------------------===//
12232// VNNI
12233//===----------------------------------------------------------------------===//
12234
12235let Constraints = "$src1 = $dst" in
12236multiclass VNNI_rmb<bits<8> Op, string OpStr, SDNode OpNode,
12237                    X86FoldableSchedWrite sched, X86VectorVTInfo VTI,
12238                    bit IsCommutable> {
12239  let ExeDomain = VTI.ExeDomain in {
12240  defm r  :   AVX512_maskable_3src<Op, MRMSrcReg, VTI, (outs VTI.RC:$dst),
12241                                   (ins VTI.RC:$src2, VTI.RC:$src3), OpStr,
12242                                   "$src3, $src2", "$src2, $src3",
12243                                   (VTI.VT (OpNode VTI.RC:$src1,
12244                                            VTI.RC:$src2, VTI.RC:$src3)),
12245                                   IsCommutable, IsCommutable>,
12246                                   EVEX_4V, T8PD, Sched<[sched]>;
12247  defm m  :   AVX512_maskable_3src<Op, MRMSrcMem, VTI, (outs VTI.RC:$dst),
12248                                   (ins VTI.RC:$src2, VTI.MemOp:$src3), OpStr,
12249                                   "$src3, $src2", "$src2, $src3",
12250                                   (VTI.VT (OpNode VTI.RC:$src1, VTI.RC:$src2,
12251                                            (VTI.VT (VTI.LdFrag addr:$src3))))>,
12252                                   EVEX_4V, EVEX_CD8<32, CD8VF>, T8PD,
12253                                   Sched<[sched.Folded, sched.ReadAfterFold]>;
12254  defm mb :   AVX512_maskable_3src<Op, MRMSrcMem, VTI, (outs VTI.RC:$dst),
12255                                   (ins VTI.RC:$src2, VTI.ScalarMemOp:$src3),
12256                                   OpStr, "${src3}"#VTI.BroadcastStr#", $src2",
12257                                   "$src2, ${src3}"#VTI.BroadcastStr,
12258                                   (OpNode VTI.RC:$src1, VTI.RC:$src2,
12259                                    (VTI.VT (VTI.BroadcastLdFrag addr:$src3)))>,
12260                                   EVEX_4V, EVEX_CD8<32, CD8VF>, EVEX_B,
12261                                   T8PD, Sched<[sched.Folded, sched.ReadAfterFold]>;
12262  }
12263}
12264
12265multiclass VNNI_common<bits<8> Op, string OpStr, SDNode OpNode,
12266                       X86SchedWriteWidths sched, bit IsCommutable> {
12267  let Predicates = [HasVNNI] in
12268  defm Z      :   VNNI_rmb<Op, OpStr, OpNode, sched.ZMM, v16i32_info,
12269                           IsCommutable>, EVEX_V512;
12270  let Predicates = [HasVNNI, HasVLX] in {
12271    defm Z256 :   VNNI_rmb<Op, OpStr, OpNode, sched.YMM, v8i32x_info,
12272                           IsCommutable>, EVEX_V256;
12273    defm Z128 :   VNNI_rmb<Op, OpStr, OpNode, sched.XMM, v4i32x_info,
12274                           IsCommutable>, EVEX_V128;
12275  }
12276}
12277
12278// FIXME: Is there a better scheduler class for VPDP?
12279defm VPDPBUSD   : VNNI_common<0x50, "vpdpbusd", X86Vpdpbusd, SchedWriteVecIMul, 0>;
12280defm VPDPBUSDS  : VNNI_common<0x51, "vpdpbusds", X86Vpdpbusds, SchedWriteVecIMul, 0>;
12281defm VPDPWSSD   : VNNI_common<0x52, "vpdpwssd", X86Vpdpwssd, SchedWriteVecIMul, 1>;
12282defm VPDPWSSDS  : VNNI_common<0x53, "vpdpwssds", X86Vpdpwssds, SchedWriteVecIMul, 1>;
12283
12284def X86vpmaddwd_su : PatFrag<(ops node:$lhs, node:$rhs),
12285                             (X86vpmaddwd node:$lhs, node:$rhs), [{
12286  return N->hasOneUse();
12287}]>;
12288
12289// Patterns to match VPDPWSSD from existing instructions/intrinsics.
12290let Predicates = [HasVNNI] in {
12291  def : Pat<(v16i32 (add VR512:$src1,
12292                         (X86vpmaddwd_su VR512:$src2, VR512:$src3))),
12293            (VPDPWSSDZr VR512:$src1, VR512:$src2, VR512:$src3)>;
12294  def : Pat<(v16i32 (add VR512:$src1,
12295                         (X86vpmaddwd_su VR512:$src2, (load addr:$src3)))),
12296            (VPDPWSSDZm VR512:$src1, VR512:$src2, addr:$src3)>;
12297}
12298let Predicates = [HasVNNI,HasVLX] in {
12299  def : Pat<(v8i32 (add VR256X:$src1,
12300                        (X86vpmaddwd_su VR256X:$src2, VR256X:$src3))),
12301            (VPDPWSSDZ256r VR256X:$src1, VR256X:$src2, VR256X:$src3)>;
12302  def : Pat<(v8i32 (add VR256X:$src1,
12303                        (X86vpmaddwd_su VR256X:$src2, (load addr:$src3)))),
12304            (VPDPWSSDZ256m VR256X:$src1, VR256X:$src2, addr:$src3)>;
12305  def : Pat<(v4i32 (add VR128X:$src1,
12306                        (X86vpmaddwd_su VR128X:$src2, VR128X:$src3))),
12307            (VPDPWSSDZ128r VR128X:$src1, VR128X:$src2, VR128X:$src3)>;
12308  def : Pat<(v4i32 (add VR128X:$src1,
12309                        (X86vpmaddwd_su VR128X:$src2, (load addr:$src3)))),
12310            (VPDPWSSDZ128m VR128X:$src1, VR128X:$src2, addr:$src3)>;
12311}
12312
12313//===----------------------------------------------------------------------===//
12314// Bit Algorithms
12315//===----------------------------------------------------------------------===//
12316
12317// FIXME: Is there a better scheduler class for VPOPCNTB/VPOPCNTW?
12318defm VPOPCNTB : avx512_unary_rm_vl<0x54, "vpopcntb", ctpop, SchedWriteVecALU,
12319                                   avx512vl_i8_info, HasBITALG>;
12320defm VPOPCNTW : avx512_unary_rm_vl<0x54, "vpopcntw", ctpop, SchedWriteVecALU,
12321                                   avx512vl_i16_info, HasBITALG>, VEX_W;
12322
12323defm : avx512_unary_lowering<"VPOPCNTB", ctpop, avx512vl_i8_info, HasBITALG>;
12324defm : avx512_unary_lowering<"VPOPCNTW", ctpop, avx512vl_i16_info, HasBITALG>;
12325
12326def X86Vpshufbitqmb_su : PatFrag<(ops node:$src1, node:$src2),
12327                                 (X86Vpshufbitqmb node:$src1, node:$src2), [{
12328  return N->hasOneUse();
12329}]>;
12330
12331multiclass VPSHUFBITQMB_rm<X86FoldableSchedWrite sched, X86VectorVTInfo VTI> {
12332  defm rr : AVX512_maskable_cmp<0x8F, MRMSrcReg, VTI, (outs VTI.KRC:$dst),
12333                                (ins VTI.RC:$src1, VTI.RC:$src2),
12334                                "vpshufbitqmb",
12335                                "$src2, $src1", "$src1, $src2",
12336                                (X86Vpshufbitqmb (VTI.VT VTI.RC:$src1),
12337                                (VTI.VT VTI.RC:$src2)),
12338                                (X86Vpshufbitqmb_su (VTI.VT VTI.RC:$src1),
12339                                (VTI.VT VTI.RC:$src2))>, EVEX_4V, T8PD,
12340                                Sched<[sched]>;
12341  defm rm : AVX512_maskable_cmp<0x8F, MRMSrcMem, VTI, (outs VTI.KRC:$dst),
12342                                (ins VTI.RC:$src1, VTI.MemOp:$src2),
12343                                "vpshufbitqmb",
12344                                "$src2, $src1", "$src1, $src2",
12345                                (X86Vpshufbitqmb (VTI.VT VTI.RC:$src1),
12346                                (VTI.VT (VTI.LdFrag addr:$src2))),
12347                                (X86Vpshufbitqmb_su (VTI.VT VTI.RC:$src1),
12348                                (VTI.VT (VTI.LdFrag addr:$src2)))>,
12349                                EVEX_4V, EVEX_CD8<8, CD8VF>, T8PD,
12350                                Sched<[sched.Folded, sched.ReadAfterFold]>;
12351}
12352
12353multiclass VPSHUFBITQMB_common<X86SchedWriteWidths sched, AVX512VLVectorVTInfo VTI> {
12354  let Predicates = [HasBITALG] in
12355  defm Z      : VPSHUFBITQMB_rm<sched.ZMM, VTI.info512>, EVEX_V512;
12356  let Predicates = [HasBITALG, HasVLX] in {
12357    defm Z256 : VPSHUFBITQMB_rm<sched.YMM, VTI.info256>, EVEX_V256;
12358    defm Z128 : VPSHUFBITQMB_rm<sched.XMM, VTI.info128>, EVEX_V128;
12359  }
12360}
12361
12362// FIXME: Is there a better scheduler class for VPSHUFBITQMB?
12363defm VPSHUFBITQMB : VPSHUFBITQMB_common<SchedWriteVecIMul, avx512vl_i8_info>;
12364
12365//===----------------------------------------------------------------------===//
12366// GFNI
12367//===----------------------------------------------------------------------===//
12368
12369multiclass GF2P8MULB_avx512_common<bits<8> Op, string OpStr, SDNode OpNode,
12370                                   X86SchedWriteWidths sched> {
12371  let Predicates = [HasGFNI, HasAVX512, HasBWI] in
12372  defm Z      : avx512_binop_rm<Op, OpStr, OpNode, v64i8_info, sched.ZMM, 1>,
12373                                EVEX_V512;
12374  let Predicates = [HasGFNI, HasVLX, HasBWI] in {
12375    defm Z256 : avx512_binop_rm<Op, OpStr, OpNode, v32i8x_info, sched.YMM, 1>,
12376                                EVEX_V256;
12377    defm Z128 : avx512_binop_rm<Op, OpStr, OpNode, v16i8x_info, sched.XMM, 1>,
12378                                EVEX_V128;
12379  }
12380}
12381
12382defm VGF2P8MULB : GF2P8MULB_avx512_common<0xCF, "vgf2p8mulb", X86GF2P8mulb,
12383                                          SchedWriteVecALU>,
12384                                          EVEX_CD8<8, CD8VF>, T8PD;
12385
12386multiclass GF2P8AFFINE_avx512_rmb_imm<bits<8> Op, string OpStr, SDNode OpNode,
12387                                      X86FoldableSchedWrite sched, X86VectorVTInfo VTI,
12388                                      X86VectorVTInfo BcstVTI>
12389           : avx512_3Op_rm_imm8<Op, OpStr, OpNode, sched, VTI, VTI> {
12390  let ExeDomain = VTI.ExeDomain in
12391  defm rmbi : AVX512_maskable<Op, MRMSrcMem, VTI, (outs VTI.RC:$dst),
12392                (ins VTI.RC:$src1, VTI.ScalarMemOp:$src2, u8imm:$src3),
12393                OpStr, "$src3, ${src2}"#BcstVTI.BroadcastStr#", $src1",
12394                "$src1, ${src2}"#BcstVTI.BroadcastStr#", $src3",
12395                (OpNode (VTI.VT VTI.RC:$src1),
12396                 (bitconvert (BcstVTI.VT (X86VBroadcastld64 addr:$src2))),
12397                 (i8 timm:$src3))>, EVEX_B,
12398                 Sched<[sched.Folded, sched.ReadAfterFold]>;
12399}
12400
12401multiclass GF2P8AFFINE_avx512_common<bits<8> Op, string OpStr, SDNode OpNode,
12402                                     X86SchedWriteWidths sched> {
12403  let Predicates = [HasGFNI, HasAVX512, HasBWI] in
12404  defm Z      : GF2P8AFFINE_avx512_rmb_imm<Op, OpStr, OpNode, sched.ZMM,
12405                                           v64i8_info, v8i64_info>, EVEX_V512;
12406  let Predicates = [HasGFNI, HasVLX, HasBWI] in {
12407    defm Z256 : GF2P8AFFINE_avx512_rmb_imm<Op, OpStr, OpNode, sched.YMM,
12408                                           v32i8x_info, v4i64x_info>, EVEX_V256;
12409    defm Z128 : GF2P8AFFINE_avx512_rmb_imm<Op, OpStr, OpNode, sched.XMM,
12410                                           v16i8x_info, v2i64x_info>, EVEX_V128;
12411  }
12412}
12413
12414defm VGF2P8AFFINEINVQB : GF2P8AFFINE_avx512_common<0xCF, "vgf2p8affineinvqb",
12415                         X86GF2P8affineinvqb, SchedWriteVecIMul>,
12416                         EVEX_4V, EVEX_CD8<8, CD8VF>, VEX_W, AVX512AIi8Base;
12417defm VGF2P8AFFINEQB    : GF2P8AFFINE_avx512_common<0xCE, "vgf2p8affineqb",
12418                         X86GF2P8affineqb, SchedWriteVecIMul>,
12419                         EVEX_4V, EVEX_CD8<8, CD8VF>, VEX_W, AVX512AIi8Base;
12420
12421
12422//===----------------------------------------------------------------------===//
12423// AVX5124FMAPS
12424//===----------------------------------------------------------------------===//
12425
12426let hasSideEffects = 0, mayLoad = 1, ExeDomain = SSEPackedSingle,
12427    Constraints = "$src1 = $dst", Uses = [MXCSR], mayRaiseFPException = 1 in {
12428defm V4FMADDPSrm : AVX512_maskable_3src_in_asm<0x9A, MRMSrcMem, v16f32_info,
12429                    (outs VR512:$dst), (ins VR512:$src2, f128mem:$src3),
12430                    "v4fmaddps", "$src3, $src2", "$src2, $src3",
12431                    []>, EVEX_V512, EVEX_4V, T8XD, EVEX_CD8<32, CD8VQ>,
12432                    Sched<[SchedWriteFMA.ZMM.Folded]>;
12433
12434defm V4FNMADDPSrm : AVX512_maskable_3src_in_asm<0xAA, MRMSrcMem, v16f32_info,
12435                     (outs VR512:$dst), (ins VR512:$src2, f128mem:$src3),
12436                     "v4fnmaddps", "$src3, $src2", "$src2, $src3",
12437                     []>, EVEX_V512, EVEX_4V, T8XD, EVEX_CD8<32, CD8VQ>,
12438                     Sched<[SchedWriteFMA.ZMM.Folded]>;
12439
12440defm V4FMADDSSrm : AVX512_maskable_3src_in_asm<0x9B, MRMSrcMem, f32x_info,
12441                    (outs VR128X:$dst), (ins  VR128X:$src2, f128mem:$src3),
12442                    "v4fmaddss", "$src3, $src2", "$src2, $src3",
12443                    []>, VEX_LIG, EVEX_4V, T8XD, EVEX_CD8<32, CD8VF>,
12444                    Sched<[SchedWriteFMA.Scl.Folded]>;
12445
12446defm V4FNMADDSSrm : AVX512_maskable_3src_in_asm<0xAB, MRMSrcMem, f32x_info,
12447                     (outs VR128X:$dst), (ins VR128X:$src2, f128mem:$src3),
12448                     "v4fnmaddss", "$src3, $src2", "$src2, $src3",
12449                     []>, VEX_LIG, EVEX_4V, T8XD, EVEX_CD8<32, CD8VF>,
12450                     Sched<[SchedWriteFMA.Scl.Folded]>;
12451}
12452
12453//===----------------------------------------------------------------------===//
12454// AVX5124VNNIW
12455//===----------------------------------------------------------------------===//
12456
12457let hasSideEffects = 0, mayLoad = 1, ExeDomain = SSEPackedInt,
12458    Constraints = "$src1 = $dst" in {
12459defm VP4DPWSSDrm : AVX512_maskable_3src_in_asm<0x52, MRMSrcMem, v16i32_info,
12460                    (outs VR512:$dst), (ins VR512:$src2, f128mem:$src3),
12461                     "vp4dpwssd", "$src3, $src2", "$src2, $src3",
12462                    []>, EVEX_V512, EVEX_4V, T8XD, EVEX_CD8<32, CD8VQ>,
12463                    Sched<[SchedWriteFMA.ZMM.Folded]>;
12464
12465defm VP4DPWSSDSrm : AVX512_maskable_3src_in_asm<0x53, MRMSrcMem, v16i32_info,
12466                     (outs VR512:$dst), (ins VR512:$src2, f128mem:$src3),
12467                     "vp4dpwssds", "$src3, $src2", "$src2, $src3",
12468                     []>, EVEX_V512, EVEX_4V, T8XD, EVEX_CD8<32, CD8VQ>,
12469                     Sched<[SchedWriteFMA.ZMM.Folded]>;
12470}
12471
12472let hasSideEffects = 0 in {
12473  let mayStore = 1, SchedRW = [WriteFStoreX] in
12474  def MASKPAIR16STORE : PseudoI<(outs), (ins anymem:$dst, VK16PAIR:$src), []>;
12475  let mayLoad = 1, SchedRW = [WriteFLoadX] in
12476  def MASKPAIR16LOAD : PseudoI<(outs VK16PAIR:$dst), (ins anymem:$src), []>;
12477}
12478
12479//===----------------------------------------------------------------------===//
12480// VP2INTERSECT
12481//===----------------------------------------------------------------------===//
12482
12483multiclass avx512_vp2intersect_modes<X86FoldableSchedWrite sched, X86VectorVTInfo _> {
12484  def rr : I<0x68, MRMSrcReg,
12485                  (outs _.KRPC:$dst),
12486                  (ins _.RC:$src1, _.RC:$src2),
12487                  !strconcat("vp2intersect", _.Suffix,
12488                             "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
12489                  [(set _.KRPC:$dst, (X86vp2intersect
12490                            _.RC:$src1, (_.VT _.RC:$src2)))]>,
12491                  EVEX_4V, T8XD, Sched<[sched]>;
12492
12493  def rm : I<0x68, MRMSrcMem,
12494                  (outs _.KRPC:$dst),
12495                  (ins  _.RC:$src1, _.MemOp:$src2),
12496                  !strconcat("vp2intersect", _.Suffix,
12497                             "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
12498                  [(set _.KRPC:$dst, (X86vp2intersect
12499                            _.RC:$src1, (_.VT (bitconvert (_.LdFrag addr:$src2)))))]>,
12500                  EVEX_4V, T8XD, EVEX_CD8<_.EltSize, CD8VF>,
12501                  Sched<[sched.Folded, sched.ReadAfterFold]>;
12502
12503  def rmb : I<0x68, MRMSrcMem,
12504                  (outs _.KRPC:$dst),
12505                  (ins _.RC:$src1, _.ScalarMemOp:$src2),
12506                  !strconcat("vp2intersect", _.Suffix, "\t{${src2}", _.BroadcastStr,
12507                             ", $src1, $dst|$dst, $src1, ${src2}", _.BroadcastStr ,"}"),
12508                  [(set _.KRPC:$dst, (X86vp2intersect
12509                             _.RC:$src1, (_.VT (_.BroadcastLdFrag addr:$src2))))]>,
12510                  EVEX_4V, T8XD, EVEX_B, EVEX_CD8<_.EltSize, CD8VF>,
12511                  Sched<[sched.Folded, sched.ReadAfterFold]>;
12512}
12513
12514multiclass avx512_vp2intersect<X86SchedWriteWidths sched, AVX512VLVectorVTInfo _> {
12515  let Predicates  = [HasAVX512, HasVP2INTERSECT] in
12516    defm Z : avx512_vp2intersect_modes<sched.ZMM, _.info512>, EVEX_V512;
12517
12518  let Predicates = [HasAVX512, HasVP2INTERSECT, HasVLX] in {
12519    defm Z256 : avx512_vp2intersect_modes<sched.YMM, _.info256>, EVEX_V256;
12520    defm Z128 : avx512_vp2intersect_modes<sched.XMM, _.info128>, EVEX_V128;
12521  }
12522}
12523
12524defm VP2INTERSECTD : avx512_vp2intersect<SchedWriteVecALU, avx512vl_i32_info>;
12525defm VP2INTERSECTQ : avx512_vp2intersect<SchedWriteVecALU, avx512vl_i64_info>, VEX_W;
12526
12527multiclass avx512_binop_all2<bits<8> opc, string OpcodeStr,
12528                             X86SchedWriteWidths sched,
12529                             AVX512VLVectorVTInfo _SrcVTInfo,
12530                             AVX512VLVectorVTInfo _DstVTInfo,
12531                             SDNode OpNode, Predicate prd,
12532                             bit IsCommutable = 0> {
12533  let Predicates = [prd] in
12534    defm NAME#Z : avx512_binop_rm2<opc, OpcodeStr, sched.ZMM, OpNode,
12535                                   _SrcVTInfo.info512, _DstVTInfo.info512,
12536                                   _SrcVTInfo.info512, IsCommutable>,
12537                                   EVEX_V512, EVEX_CD8<32, CD8VF>;
12538  let Predicates = [HasVLX, prd] in {
12539    defm NAME#Z256 : avx512_binop_rm2<opc, OpcodeStr, sched.YMM, OpNode,
12540                                      _SrcVTInfo.info256, _DstVTInfo.info256,
12541                                      _SrcVTInfo.info256, IsCommutable>,
12542                                     EVEX_V256, EVEX_CD8<32, CD8VF>;
12543    defm NAME#Z128 : avx512_binop_rm2<opc, OpcodeStr, sched.XMM, OpNode,
12544                                      _SrcVTInfo.info128, _DstVTInfo.info128,
12545                                      _SrcVTInfo.info128, IsCommutable>,
12546                                      EVEX_V128, EVEX_CD8<32, CD8VF>;
12547  }
12548}
12549
12550let ExeDomain = SSEPackedSingle in
12551defm VCVTNE2PS2BF16 : avx512_binop_all2<0x72, "vcvtne2ps2bf16",
12552                                        SchedWriteCvtPD2PS, //FIXME: Should be SchedWriteCvtPS2BF
12553                                        avx512vl_f32_info, avx512vl_i16_info,
12554                                        X86cvtne2ps2bf16, HasBF16, 0>, T8XD;
12555
12556// Truncate Float to BFloat16
12557multiclass avx512_cvtps2bf16<bits<8> opc, string OpcodeStr,
12558                             X86SchedWriteWidths sched> {
12559  let ExeDomain = SSEPackedSingle in {
12560  let Predicates = [HasBF16], Uses = []<Register>, mayRaiseFPException = 0 in {
12561    defm Z : avx512_vcvt_fp<opc, OpcodeStr, v16i16x_info, v16f32_info,
12562                            X86cvtneps2bf16, X86cvtneps2bf16, sched.ZMM>, EVEX_V512;
12563  }
12564  let Predicates = [HasBF16, HasVLX] in {
12565    let Uses = []<Register>, mayRaiseFPException = 0 in {
12566    defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v8i16x_info, v4f32x_info,
12567                               null_frag, null_frag, sched.XMM, "{1to4}", "{x}", f128mem,
12568                               VK4WM>, EVEX_V128;
12569    defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v8i16x_info, v8f32x_info,
12570                               X86cvtneps2bf16, X86cvtneps2bf16,
12571                               sched.YMM, "{1to8}", "{y}">, EVEX_V256;
12572    }
12573  } // Predicates = [HasBF16, HasVLX]
12574  } // ExeDomain = SSEPackedSingle
12575
12576  def : InstAlias<OpcodeStr#"x\t{$src, $dst|$dst, $src}",
12577                  (!cast<Instruction>(NAME # "Z128rr") VR128X:$dst,
12578                  VR128X:$src), 0>;
12579  def : InstAlias<OpcodeStr#"x\t{$src, $dst|$dst, $src}",
12580                  (!cast<Instruction>(NAME # "Z128rm") VR128X:$dst,
12581                  f128mem:$src), 0, "intel">;
12582  def : InstAlias<OpcodeStr#"y\t{$src, $dst|$dst, $src}",
12583                  (!cast<Instruction>(NAME # "Z256rr") VR128X:$dst,
12584                  VR256X:$src), 0>;
12585  def : InstAlias<OpcodeStr#"y\t{$src, $dst|$dst, $src}",
12586                  (!cast<Instruction>(NAME # "Z256rm") VR128X:$dst,
12587                  f256mem:$src), 0, "intel">;
12588}
12589
12590defm VCVTNEPS2BF16 : avx512_cvtps2bf16<0x72, "vcvtneps2bf16",
12591                                       SchedWriteCvtPD2PS>, T8XS,
12592                                       EVEX_CD8<32, CD8VF>;
12593
12594let Predicates = [HasBF16, HasVLX] in {
12595  // Special patterns to allow use of X86mcvtneps2bf16 for masking. Instruction
12596  // patterns have been disabled with null_frag.
12597  def : Pat<(v8i16 (X86cvtneps2bf16 (v4f32 VR128X:$src))),
12598            (VCVTNEPS2BF16Z128rr VR128X:$src)>;
12599  def : Pat<(X86mcvtneps2bf16 (v4f32 VR128X:$src), (v8i16 VR128X:$src0),
12600                              VK4WM:$mask),
12601            (VCVTNEPS2BF16Z128rrk VR128X:$src0, VK4WM:$mask, VR128X:$src)>;
12602  def : Pat<(X86mcvtneps2bf16 (v4f32 VR128X:$src), v8i16x_info.ImmAllZerosV,
12603                              VK4WM:$mask),
12604            (VCVTNEPS2BF16Z128rrkz VK4WM:$mask, VR128X:$src)>;
12605
12606  def : Pat<(v8i16 (X86cvtneps2bf16 (loadv4f32 addr:$src))),
12607            (VCVTNEPS2BF16Z128rm addr:$src)>;
12608  def : Pat<(X86mcvtneps2bf16 (loadv4f32 addr:$src), (v8i16 VR128X:$src0),
12609                              VK4WM:$mask),
12610            (VCVTNEPS2BF16Z128rmk VR128X:$src0, VK4WM:$mask, addr:$src)>;
12611  def : Pat<(X86mcvtneps2bf16 (loadv4f32 addr:$src), v8i16x_info.ImmAllZerosV,
12612                              VK4WM:$mask),
12613            (VCVTNEPS2BF16Z128rmkz VK4WM:$mask, addr:$src)>;
12614
12615  def : Pat<(v8i16 (X86cvtneps2bf16 (v4f32
12616                                     (X86VBroadcastld32 addr:$src)))),
12617            (VCVTNEPS2BF16Z128rmb addr:$src)>;
12618  def : Pat<(X86mcvtneps2bf16 (v4f32 (X86VBroadcastld32 addr:$src)),
12619                              (v8i16 VR128X:$src0), VK4WM:$mask),
12620            (VCVTNEPS2BF16Z128rmbk VR128X:$src0, VK4WM:$mask, addr:$src)>;
12621  def : Pat<(X86mcvtneps2bf16 (v4f32 (X86VBroadcastld32 addr:$src)),
12622                              v8i16x_info.ImmAllZerosV, VK4WM:$mask),
12623            (VCVTNEPS2BF16Z128rmbkz VK4WM:$mask, addr:$src)>;
12624}
12625
12626let Constraints = "$src1 = $dst" in {
12627multiclass avx512_dpbf16ps_rm<bits<8> opc, string OpcodeStr, SDNode OpNode,
12628                              X86FoldableSchedWrite sched,
12629                              X86VectorVTInfo _, X86VectorVTInfo src_v> {
12630  defm r: AVX512_maskable_3src<opc, MRMSrcReg, _, (outs _.RC:$dst),
12631                           (ins src_v.RC:$src2, src_v.RC:$src3),
12632                           OpcodeStr, "$src3, $src2", "$src2, $src3",
12633                           (_.VT (OpNode _.RC:$src1, src_v.RC:$src2, src_v.RC:$src3))>,
12634                           EVEX_4V, Sched<[sched]>;
12635
12636  defm m: AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst),
12637                               (ins src_v.RC:$src2, src_v.MemOp:$src3),
12638                               OpcodeStr, "$src3, $src2", "$src2, $src3",
12639                               (_.VT (OpNode _.RC:$src1, src_v.RC:$src2,
12640                               (src_v.LdFrag addr:$src3)))>, EVEX_4V,
12641                               Sched<[sched.Folded, sched.ReadAfterFold]>;
12642
12643  defm mb: AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst),
12644                  (ins src_v.RC:$src2, src_v.ScalarMemOp:$src3),
12645                  OpcodeStr,
12646                  !strconcat("${src3}", _.BroadcastStr,", $src2"),
12647                  !strconcat("$src2, ${src3}", _.BroadcastStr),
12648                  (_.VT (OpNode _.RC:$src1, src_v.RC:$src2,
12649                  (src_v.VT (src_v.BroadcastLdFrag addr:$src3))))>,
12650                  EVEX_B, EVEX_4V, Sched<[sched.Folded, sched.ReadAfterFold]>;
12651
12652}
12653} // Constraints = "$src1 = $dst"
12654
12655multiclass avx512_dpbf16ps_sizes<bits<8> opc, string OpcodeStr, SDNode OpNode,
12656                                 X86SchedWriteWidths sched, AVX512VLVectorVTInfo _,
12657                                 AVX512VLVectorVTInfo src_v, Predicate prd> {
12658  let Predicates = [prd] in {
12659    defm Z    : avx512_dpbf16ps_rm<opc, OpcodeStr, OpNode, sched.ZMM, _.info512,
12660                                   src_v.info512>, EVEX_V512;
12661  }
12662  let Predicates = [HasVLX, prd] in {
12663    defm Z256 : avx512_dpbf16ps_rm<opc, OpcodeStr, OpNode, sched.YMM, _.info256,
12664                                   src_v.info256>, EVEX_V256;
12665    defm Z128 : avx512_dpbf16ps_rm<opc, OpcodeStr, OpNode, sched.XMM, _.info128,
12666                                   src_v.info128>, EVEX_V128;
12667  }
12668}
12669
12670let ExeDomain = SSEPackedSingle in
12671defm VDPBF16PS : avx512_dpbf16ps_sizes<0x52, "vdpbf16ps", X86dpbf16ps, SchedWriteFMA,
12672                                       avx512vl_f32_info, avx512vl_i32_info,
12673                                       HasBF16>, T8XS, EVEX_CD8<32, CD8VF>;
12674