xref: /freebsd/contrib/llvm-project/llvm/lib/Target/X86/X86InstrAVX512.td (revision 63f537551380d2dab29fa402ad1269feae17e594)
1//===-- X86InstrAVX512.td - AVX512 Instruction Set ---------*- tablegen -*-===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file describes the X86 AVX512 instruction set, defining the
10// instructions, and properties of the instructions which are needed for code
11// generation, machine code emission, and analysis.
12//
13//===----------------------------------------------------------------------===//
14
15// Group template arguments that can be derived from the vector type (EltNum x
16// EltVT).  These are things like the register class for the writemask, etc.
17// The idea is to pass one of these as the template argument rather than the
18// individual arguments.
19// The template is also used for scalar types, in this case numelts is 1.
20class X86VectorVTInfo<int numelts, ValueType eltvt, RegisterClass rc,
21                      string suffix = ""> {
22  RegisterClass RC = rc;
23  ValueType EltVT = eltvt;
24  int NumElts = numelts;
25
26  // Corresponding mask register class.
27  RegisterClass KRC = !cast<RegisterClass>("VK" # NumElts);
28
29  // Corresponding mask register pair class.
30  RegisterOperand KRPC = !if (!gt(NumElts, 16), ?,
31                              !cast<RegisterOperand>("VK" # NumElts # "Pair"));
32
33  // Corresponding write-mask register class.
34  RegisterClass KRCWM = !cast<RegisterClass>("VK" # NumElts # "WM");
35
36  // The mask VT.
37  ValueType KVT = !cast<ValueType>("v" # NumElts # "i1");
38
39  // Suffix used in the instruction mnemonic.
40  string Suffix = suffix;
41
42  // VTName is a string name for vector VT. For vector types it will be
43  // v # NumElts # EltVT, so for vector of 8 elements of i32 it will be v8i32
44  // It is a little bit complex for scalar types, where NumElts = 1.
45  // In this case we build v4f32 or v2f64
46  string VTName = "v" # !if (!eq (NumElts, 1),
47                        !if (!eq (EltVT.Size, 16), 8,
48                        !if (!eq (EltVT.Size, 32), 4,
49                        !if (!eq (EltVT.Size, 64), 2, NumElts))), NumElts) # EltVT;
50
51  // The vector VT.
52  ValueType VT = !cast<ValueType>(VTName);
53
54  string EltTypeName = !cast<string>(EltVT);
55  // Size of the element type in bits, e.g. 32 for v16i32.
56  string EltSizeName = !subst("i", "", !subst("f", "", !subst("b", "", EltTypeName)));
57  int EltSize = EltVT.Size;
58
59  // "i" for integer types and "f" for floating-point types
60  string TypeVariantName = !subst("b", "", !subst(EltSizeName, "", EltTypeName));
61
62  // Size of RC in bits, e.g. 512 for VR512.
63  int Size = VT.Size;
64
65  // The corresponding memory operand, e.g. i512mem for VR512.
66  X86MemOperand MemOp = !cast<X86MemOperand>(TypeVariantName # Size # "mem");
67  X86MemOperand ScalarMemOp = !cast<X86MemOperand>(!subst("b", "", EltTypeName) # "mem");
68  // FP scalar memory operand for intrinsics - ssmem/sdmem.
69  Operand IntScalarMemOp = !if (!eq (EltTypeName, "f16"), !cast<Operand>("shmem"),
70                           !if (!eq (EltTypeName, "bf16"), !cast<Operand>("shmem"),
71                           !if (!eq (EltTypeName, "f32"), !cast<Operand>("ssmem"),
72                           !if (!eq (EltTypeName, "f64"), !cast<Operand>("sdmem"), ?))));
73
74  // Load patterns
75  PatFrag LdFrag = !cast<PatFrag>("load" # VTName);
76
77  PatFrag AlignedLdFrag = !cast<PatFrag>("alignedload" # VTName);
78
79  PatFrag ScalarLdFrag = !cast<PatFrag>("load" # !subst("b", "", EltTypeName));
80  PatFrag BroadcastLdFrag = !cast<PatFrag>("X86VBroadcastld" # EltSizeName);
81
82  PatFrags ScalarIntMemFrags = !if (!eq (EltTypeName, "f16"), !cast<PatFrags>("sse_load_f16"),
83                               !if (!eq (EltTypeName, "bf16"), !cast<PatFrags>("sse_load_f16"),
84                               !if (!eq (EltTypeName, "f32"), !cast<PatFrags>("sse_load_f32"),
85                               !if (!eq (EltTypeName, "f64"), !cast<PatFrags>("sse_load_f64"), ?))));
86
87  // The string to specify embedded broadcast in assembly.
88  string BroadcastStr = "{1to" # NumElts # "}";
89
90  // 8-bit compressed displacement tuple/subvector format.  This is only
91  // defined for NumElts <= 8.
92  CD8VForm CD8TupleForm = !if (!eq (!srl(NumElts, 4), 0),
93                               !cast<CD8VForm>("CD8VT" # NumElts), ?);
94
95  SubRegIndex SubRegIdx = !if (!eq (Size, 128), sub_xmm,
96                          !if (!eq (Size, 256), sub_ymm, ?));
97
98  Domain ExeDomain = !if (!eq (EltTypeName, "f32"), SSEPackedSingle,
99                     !if (!eq (EltTypeName, "f64"), SSEPackedDouble,
100                     !if (!eq (EltTypeName, "f16"), SSEPackedSingle, // FIXME?
101                     !if (!eq (EltTypeName, "bf16"), SSEPackedSingle, // FIXME?
102                     SSEPackedInt))));
103
104  RegisterClass FRC = !if (!eq (EltTypeName, "f32"), FR32X,
105                      !if (!eq (EltTypeName, "f16"), FR16X,
106                      !if (!eq (EltTypeName, "bf16"), FR16X,
107                      FR64X)));
108
109  dag ImmAllZerosV = (VT immAllZerosV);
110
111  string ZSuffix = !if (!eq (Size, 128), "Z128",
112                   !if (!eq (Size, 256), "Z256", "Z"));
113}
114
115def v64i8_info  : X86VectorVTInfo<64,  i8, VR512, "b">;
116def v32i16_info : X86VectorVTInfo<32, i16, VR512, "w">;
117def v16i32_info : X86VectorVTInfo<16, i32, VR512, "d">;
118def v8i64_info  : X86VectorVTInfo<8,  i64, VR512, "q">;
119def v32f16_info : X86VectorVTInfo<32, f16, VR512, "ph">;
120def v32bf16_info: X86VectorVTInfo<32, bf16, VR512, "pbf">;
121def v16f32_info : X86VectorVTInfo<16, f32, VR512, "ps">;
122def v8f64_info  : X86VectorVTInfo<8,  f64, VR512, "pd">;
123
124// "x" in v32i8x_info means RC = VR256X
125def v32i8x_info  : X86VectorVTInfo<32,  i8, VR256X, "b">;
126def v16i16x_info : X86VectorVTInfo<16, i16, VR256X, "w">;
127def v8i32x_info  : X86VectorVTInfo<8,  i32, VR256X, "d">;
128def v4i64x_info  : X86VectorVTInfo<4,  i64, VR256X, "q">;
129def v16f16x_info : X86VectorVTInfo<16, f16, VR256X, "ph">;
130def v16bf16x_info: X86VectorVTInfo<16, bf16, VR256X, "pbf">;
131def v8f32x_info  : X86VectorVTInfo<8,  f32, VR256X, "ps">;
132def v4f64x_info  : X86VectorVTInfo<4,  f64, VR256X, "pd">;
133
134def v16i8x_info  : X86VectorVTInfo<16,  i8, VR128X, "b">;
135def v8i16x_info  : X86VectorVTInfo<8,  i16, VR128X, "w">;
136def v4i32x_info  : X86VectorVTInfo<4,  i32, VR128X, "d">;
137def v2i64x_info  : X86VectorVTInfo<2,  i64, VR128X, "q">;
138def v8f16x_info  : X86VectorVTInfo<8,  f16, VR128X, "ph">;
139def v8bf16x_info : X86VectorVTInfo<8,  bf16, VR128X, "pbf">;
140def v4f32x_info  : X86VectorVTInfo<4,  f32, VR128X, "ps">;
141def v2f64x_info  : X86VectorVTInfo<2,  f64, VR128X, "pd">;
142
143// We map scalar types to the smallest (128-bit) vector type
144// with the appropriate element type. This allows to use the same masking logic.
145def i32x_info    : X86VectorVTInfo<1,  i32, GR32, "si">;
146def i64x_info    : X86VectorVTInfo<1,  i64, GR64, "sq">;
147def f16x_info    : X86VectorVTInfo<1,  f16, VR128X, "sh">;
148def bf16x_info   : X86VectorVTInfo<1,  bf16, VR128X, "sbf">;
149def f32x_info    : X86VectorVTInfo<1,  f32, VR128X, "ss">;
150def f64x_info    : X86VectorVTInfo<1,  f64, VR128X, "sd">;
151
152class AVX512VLVectorVTInfo<X86VectorVTInfo i512, X86VectorVTInfo i256,
153                           X86VectorVTInfo i128> {
154  X86VectorVTInfo info512 = i512;
155  X86VectorVTInfo info256 = i256;
156  X86VectorVTInfo info128 = i128;
157}
158
159def avx512vl_i8_info  : AVX512VLVectorVTInfo<v64i8_info, v32i8x_info,
160                                             v16i8x_info>;
161def avx512vl_i16_info : AVX512VLVectorVTInfo<v32i16_info, v16i16x_info,
162                                             v8i16x_info>;
163def avx512vl_i32_info : AVX512VLVectorVTInfo<v16i32_info, v8i32x_info,
164                                             v4i32x_info>;
165def avx512vl_i64_info : AVX512VLVectorVTInfo<v8i64_info, v4i64x_info,
166                                             v2i64x_info>;
167def avx512vl_f16_info : AVX512VLVectorVTInfo<v32f16_info, v16f16x_info,
168                                             v8f16x_info>;
169def avx512vl_bf16_info : AVX512VLVectorVTInfo<v32bf16_info, v16bf16x_info,
170                                             v8bf16x_info>;
171def avx512vl_f32_info : AVX512VLVectorVTInfo<v16f32_info, v8f32x_info,
172                                             v4f32x_info>;
173def avx512vl_f64_info : AVX512VLVectorVTInfo<v8f64_info, v4f64x_info,
174                                             v2f64x_info>;
175
176class X86KVectorVTInfo<RegisterClass _krc, RegisterClass _krcwm,
177                       ValueType _vt> {
178  RegisterClass KRC = _krc;
179  RegisterClass KRCWM = _krcwm;
180  ValueType KVT = _vt;
181}
182
183def v1i1_info : X86KVectorVTInfo<VK1, VK1WM, v1i1>;
184def v2i1_info : X86KVectorVTInfo<VK2, VK2WM, v2i1>;
185def v4i1_info : X86KVectorVTInfo<VK4, VK4WM, v4i1>;
186def v8i1_info : X86KVectorVTInfo<VK8, VK8WM, v8i1>;
187def v16i1_info : X86KVectorVTInfo<VK16, VK16WM, v16i1>;
188def v32i1_info : X86KVectorVTInfo<VK32, VK32WM, v32i1>;
189def v64i1_info : X86KVectorVTInfo<VK64, VK64WM, v64i1>;
190
191// Used for matching masked operations. Ensures the operation part only has a
192// single use.
193def vselect_mask : PatFrag<(ops node:$mask, node:$src1, node:$src2),
194                           (vselect node:$mask, node:$src1, node:$src2), [{
195  return isProfitableToFormMaskedOp(N);
196}]>;
197
198def X86selects_mask : PatFrag<(ops node:$mask, node:$src1, node:$src2),
199                              (X86selects node:$mask, node:$src1, node:$src2), [{
200  return isProfitableToFormMaskedOp(N);
201}]>;
202
203// This multiclass generates the masking variants from the non-masking
204// variant.  It only provides the assembly pieces for the masking variants.
205// It assumes custom ISel patterns for masking which can be provided as
206// template arguments.
207multiclass AVX512_maskable_custom<bits<8> O, Format F,
208                                  dag Outs,
209                                  dag Ins, dag MaskingIns, dag ZeroMaskingIns,
210                                  string OpcodeStr,
211                                  string AttSrcAsm, string IntelSrcAsm,
212                                  list<dag> Pattern,
213                                  list<dag> MaskingPattern,
214                                  list<dag> ZeroMaskingPattern,
215                                  string MaskingConstraint = "",
216                                  bit IsCommutable = 0,
217                                  bit IsKCommutable = 0,
218                                  bit IsKZCommutable = IsCommutable,
219                                  string ClobberConstraint = ""> {
220  let isCommutable = IsCommutable, Constraints = ClobberConstraint in
221    def NAME: AVX512<O, F, Outs, Ins,
222                       OpcodeStr#"\t{"#AttSrcAsm#", $dst|"#
223                                     "$dst, "#IntelSrcAsm#"}",
224                       Pattern>;
225
226  // Prefer over VMOV*rrk Pat<>
227  let isCommutable = IsKCommutable in
228    def NAME#k: AVX512<O, F, Outs, MaskingIns,
229                       OpcodeStr#"\t{"#AttSrcAsm#", $dst {${mask}}|"#
230                                     "$dst {${mask}}, "#IntelSrcAsm#"}",
231                       MaskingPattern>,
232              EVEX_K {
233      // In case of the 3src subclass this is overridden with a let.
234      string Constraints = !if(!eq(ClobberConstraint, ""), MaskingConstraint,
235                               !if(!eq(MaskingConstraint, ""), ClobberConstraint,
236                                   !strconcat(ClobberConstraint, ", ", MaskingConstraint)));
237    }
238
239  // Zero mask does not add any restrictions to commute operands transformation.
240  // So, it is Ok to use IsCommutable instead of IsKCommutable.
241  let isCommutable = IsKZCommutable, // Prefer over VMOV*rrkz Pat<>
242      Constraints = ClobberConstraint in
243    def NAME#kz: AVX512<O, F, Outs, ZeroMaskingIns,
244                       OpcodeStr#"\t{"#AttSrcAsm#", $dst {${mask}} {z}|"#
245                                     "$dst {${mask}} {z}, "#IntelSrcAsm#"}",
246                       ZeroMaskingPattern>,
247              EVEX_KZ;
248}
249
250
251// Common base class of AVX512_maskable and AVX512_maskable_3src.
252multiclass AVX512_maskable_common<bits<8> O, Format F, X86VectorVTInfo _,
253                                  dag Outs,
254                                  dag Ins, dag MaskingIns, dag ZeroMaskingIns,
255                                  string OpcodeStr,
256                                  string AttSrcAsm, string IntelSrcAsm,
257                                  dag RHS, dag MaskingRHS,
258                                  SDPatternOperator Select = vselect_mask,
259                                  string MaskingConstraint = "",
260                                  bit IsCommutable = 0,
261                                  bit IsKCommutable = 0,
262                                  bit IsKZCommutable = IsCommutable,
263                                  string ClobberConstraint = ""> :
264  AVX512_maskable_custom<O, F, Outs, Ins, MaskingIns, ZeroMaskingIns, OpcodeStr,
265                         AttSrcAsm, IntelSrcAsm,
266                         [(set _.RC:$dst, RHS)],
267                         [(set _.RC:$dst, MaskingRHS)],
268                         [(set _.RC:$dst,
269                               (Select _.KRCWM:$mask, RHS, _.ImmAllZerosV))],
270                         MaskingConstraint, IsCommutable,
271                         IsKCommutable, IsKZCommutable, ClobberConstraint>;
272
273// This multiclass generates the unconditional/non-masking, the masking and
274// the zero-masking variant of the vector instruction.  In the masking case, the
275// preserved vector elements come from a new dummy input operand tied to $dst.
276// This version uses a separate dag for non-masking and masking.
277multiclass AVX512_maskable_split<bits<8> O, Format F, X86VectorVTInfo _,
278                           dag Outs, dag Ins, string OpcodeStr,
279                           string AttSrcAsm, string IntelSrcAsm,
280                           dag RHS, dag MaskRHS,
281                           string ClobberConstraint = "",
282                           bit IsCommutable = 0, bit IsKCommutable = 0,
283                           bit IsKZCommutable = IsCommutable> :
284   AVX512_maskable_custom<O, F, Outs, Ins,
285                          !con((ins _.RC:$src0, _.KRCWM:$mask), Ins),
286                          !con((ins _.KRCWM:$mask), Ins),
287                          OpcodeStr, AttSrcAsm, IntelSrcAsm,
288                          [(set _.RC:$dst, RHS)],
289                          [(set _.RC:$dst,
290                              (vselect_mask _.KRCWM:$mask, MaskRHS, _.RC:$src0))],
291                          [(set _.RC:$dst,
292                              (vselect_mask _.KRCWM:$mask, MaskRHS, _.ImmAllZerosV))],
293                          "$src0 = $dst", IsCommutable, IsKCommutable,
294                          IsKZCommutable, ClobberConstraint>;
295
296// This multiclass generates the unconditional/non-masking, the masking and
297// the zero-masking variant of the vector instruction.  In the masking case, the
298// preserved vector elements come from a new dummy input operand tied to $dst.
299multiclass AVX512_maskable<bits<8> O, Format F, X86VectorVTInfo _,
300                           dag Outs, dag Ins, string OpcodeStr,
301                           string AttSrcAsm, string IntelSrcAsm,
302                           dag RHS,
303                           bit IsCommutable = 0, bit IsKCommutable = 0,
304                           bit IsKZCommutable = IsCommutable,
305                           SDPatternOperator Select = vselect_mask,
306                           string ClobberConstraint = ""> :
307   AVX512_maskable_common<O, F, _, Outs, Ins,
308                          !con((ins _.RC:$src0, _.KRCWM:$mask), Ins),
309                          !con((ins _.KRCWM:$mask), Ins),
310                          OpcodeStr, AttSrcAsm, IntelSrcAsm, RHS,
311                          (Select _.KRCWM:$mask, RHS, _.RC:$src0),
312                          Select, "$src0 = $dst", IsCommutable, IsKCommutable,
313                          IsKZCommutable, ClobberConstraint>;
314
315// This multiclass generates the unconditional/non-masking, the masking and
316// the zero-masking variant of the scalar instruction.
317multiclass AVX512_maskable_scalar<bits<8> O, Format F, X86VectorVTInfo _,
318                           dag Outs, dag Ins, string OpcodeStr,
319                           string AttSrcAsm, string IntelSrcAsm,
320                           dag RHS> :
321   AVX512_maskable<O, F, _, Outs, Ins, OpcodeStr, AttSrcAsm, IntelSrcAsm,
322                   RHS, 0, 0, 0, X86selects_mask>;
323
324// Similar to AVX512_maskable but in this case one of the source operands
325// ($src1) is already tied to $dst so we just use that for the preserved
326// vector elements.  NOTE that the NonTiedIns (the ins dag) should exclude
327// $src1.
328multiclass AVX512_maskable_3src<bits<8> O, Format F, X86VectorVTInfo _,
329                                dag Outs, dag NonTiedIns, string OpcodeStr,
330                                string AttSrcAsm, string IntelSrcAsm,
331                                dag RHS,
332                                bit IsCommutable = 0,
333                                bit IsKCommutable = 0,
334                                SDPatternOperator Select = vselect_mask,
335                                bit MaskOnly = 0> :
336   AVX512_maskable_common<O, F, _, Outs,
337                          !con((ins _.RC:$src1), NonTiedIns),
338                          !con((ins _.RC:$src1, _.KRCWM:$mask), NonTiedIns),
339                          !con((ins _.RC:$src1, _.KRCWM:$mask), NonTiedIns),
340                          OpcodeStr, AttSrcAsm, IntelSrcAsm,
341                          !if(MaskOnly, (null_frag), RHS),
342                          (Select _.KRCWM:$mask, RHS, _.RC:$src1),
343                          Select, "", IsCommutable, IsKCommutable>;
344
345// Similar to AVX512_maskable_3src but in this case the input VT for the tied
346// operand differs from the output VT. This requires a bitconvert on
347// the preserved vector going into the vselect.
348// NOTE: The unmasked pattern is disabled.
349multiclass AVX512_maskable_3src_cast<bits<8> O, Format F, X86VectorVTInfo OutVT,
350                                     X86VectorVTInfo InVT,
351                                     dag Outs, dag NonTiedIns, string OpcodeStr,
352                                     string AttSrcAsm, string IntelSrcAsm,
353                                     dag RHS, bit IsCommutable = 0> :
354   AVX512_maskable_common<O, F, OutVT, Outs,
355                          !con((ins InVT.RC:$src1), NonTiedIns),
356                          !con((ins InVT.RC:$src1, InVT.KRCWM:$mask), NonTiedIns),
357                          !con((ins InVT.RC:$src1, InVT.KRCWM:$mask), NonTiedIns),
358                          OpcodeStr, AttSrcAsm, IntelSrcAsm, (null_frag),
359                          (vselect_mask InVT.KRCWM:$mask, RHS,
360                           (bitconvert InVT.RC:$src1)),
361                           vselect_mask, "", IsCommutable>;
362
363multiclass AVX512_maskable_3src_scalar<bits<8> O, Format F, X86VectorVTInfo _,
364                                     dag Outs, dag NonTiedIns, string OpcodeStr,
365                                     string AttSrcAsm, string IntelSrcAsm,
366                                     dag RHS,
367                                     bit IsCommutable = 0,
368                                     bit IsKCommutable = 0,
369                                     bit MaskOnly = 0> :
370   AVX512_maskable_3src<O, F, _, Outs, NonTiedIns, OpcodeStr, AttSrcAsm,
371                        IntelSrcAsm, RHS, IsCommutable, IsKCommutable,
372                        X86selects_mask, MaskOnly>;
373
374multiclass AVX512_maskable_in_asm<bits<8> O, Format F, X86VectorVTInfo _,
375                                  dag Outs, dag Ins,
376                                  string OpcodeStr,
377                                  string AttSrcAsm, string IntelSrcAsm,
378                                  list<dag> Pattern> :
379   AVX512_maskable_custom<O, F, Outs, Ins,
380                          !con((ins _.RC:$src0, _.KRCWM:$mask), Ins),
381                          !con((ins _.KRCWM:$mask), Ins),
382                          OpcodeStr, AttSrcAsm, IntelSrcAsm, Pattern, [], [],
383                          "$src0 = $dst">;
384
385multiclass AVX512_maskable_3src_in_asm<bits<8> O, Format F, X86VectorVTInfo _,
386                                       dag Outs, dag NonTiedIns,
387                                       string OpcodeStr,
388                                       string AttSrcAsm, string IntelSrcAsm,
389                                       list<dag> Pattern> :
390   AVX512_maskable_custom<O, F, Outs,
391                          !con((ins _.RC:$src1), NonTiedIns),
392                          !con((ins _.RC:$src1, _.KRCWM:$mask), NonTiedIns),
393                          !con((ins _.RC:$src1, _.KRCWM:$mask), NonTiedIns),
394                          OpcodeStr, AttSrcAsm, IntelSrcAsm, Pattern, [], [],
395                          "">;
396
397// Instruction with mask that puts result in mask register,
398// like "compare" and "vptest"
399multiclass AVX512_maskable_custom_cmp<bits<8> O, Format F,
400                                  dag Outs,
401                                  dag Ins, dag MaskingIns,
402                                  string OpcodeStr,
403                                  string AttSrcAsm, string IntelSrcAsm,
404                                  list<dag> Pattern,
405                                  list<dag> MaskingPattern,
406                                  bit IsCommutable = 0> {
407    let isCommutable = IsCommutable in {
408    def NAME: AVX512<O, F, Outs, Ins,
409                       OpcodeStr#"\t{"#AttSrcAsm#", $dst|"#
410                                     "$dst, "#IntelSrcAsm#"}",
411                       Pattern>;
412
413    def NAME#k: AVX512<O, F, Outs, MaskingIns,
414                       OpcodeStr#"\t{"#AttSrcAsm#", $dst {${mask}}|"#
415                                     "$dst {${mask}}, "#IntelSrcAsm#"}",
416                       MaskingPattern>, EVEX_K;
417    }
418}
419
420multiclass AVX512_maskable_common_cmp<bits<8> O, Format F, X86VectorVTInfo _,
421                                  dag Outs,
422                                  dag Ins, dag MaskingIns,
423                                  string OpcodeStr,
424                                  string AttSrcAsm, string IntelSrcAsm,
425                                  dag RHS, dag MaskingRHS,
426                                  bit IsCommutable = 0> :
427  AVX512_maskable_custom_cmp<O, F, Outs, Ins, MaskingIns, OpcodeStr,
428                         AttSrcAsm, IntelSrcAsm,
429                         [(set _.KRC:$dst, RHS)],
430                         [(set _.KRC:$dst, MaskingRHS)], IsCommutable>;
431
432multiclass AVX512_maskable_cmp<bits<8> O, Format F, X86VectorVTInfo _,
433                           dag Outs, dag Ins, string OpcodeStr,
434                           string AttSrcAsm, string IntelSrcAsm,
435                           dag RHS, dag RHS_su, bit IsCommutable = 0> :
436   AVX512_maskable_common_cmp<O, F, _, Outs, Ins,
437                          !con((ins _.KRCWM:$mask), Ins),
438                          OpcodeStr, AttSrcAsm, IntelSrcAsm, RHS,
439                          (and _.KRCWM:$mask, RHS_su), IsCommutable>;
440
441// Used by conversion instructions.
442multiclass AVX512_maskable_cvt<bits<8> O, Format F, X86VectorVTInfo _,
443                                  dag Outs,
444                                  dag Ins, dag MaskingIns, dag ZeroMaskingIns,
445                                  string OpcodeStr,
446                                  string AttSrcAsm, string IntelSrcAsm,
447                                  dag RHS, dag MaskingRHS, dag ZeroMaskingRHS> :
448  AVX512_maskable_custom<O, F, Outs, Ins, MaskingIns, ZeroMaskingIns, OpcodeStr,
449                         AttSrcAsm, IntelSrcAsm,
450                         [(set _.RC:$dst, RHS)],
451                         [(set _.RC:$dst, MaskingRHS)],
452                         [(set _.RC:$dst, ZeroMaskingRHS)],
453                         "$src0 = $dst">;
454
455multiclass AVX512_maskable_fma<bits<8> O, Format F, X86VectorVTInfo _,
456                               dag Outs, dag NonTiedIns, string OpcodeStr,
457                               string AttSrcAsm, string IntelSrcAsm,
458                               dag RHS, dag MaskingRHS, bit IsCommutable,
459                               bit IsKCommutable> :
460   AVX512_maskable_custom<O, F, Outs,
461                          !con((ins _.RC:$src1), NonTiedIns),
462                          !con((ins _.RC:$src1, _.KRCWM:$mask), NonTiedIns),
463                          !con((ins _.RC:$src1, _.KRCWM:$mask), NonTiedIns),
464                          OpcodeStr, AttSrcAsm, IntelSrcAsm,
465                          [(set _.RC:$dst, RHS)],
466                          [(set _.RC:$dst,
467                            (vselect_mask _.KRCWM:$mask, MaskingRHS, _.RC:$src1))],
468                          [(set _.RC:$dst,
469                            (vselect_mask _.KRCWM:$mask, MaskingRHS, _.ImmAllZerosV))],
470                          "", IsCommutable, IsKCommutable>;
471
472// Alias instruction that maps zero vector to pxor / xorp* for AVX-512.
473// This is expanded by ExpandPostRAPseudos to an xorps / vxorps, and then
474// swizzled by ExecutionDomainFix to pxor.
475// We set canFoldAsLoad because this can be converted to a constant-pool
476// load of an all-zeros value if folding it would be beneficial.
477let isReMaterializable = 1, isAsCheapAsAMove = 1, canFoldAsLoad = 1,
478    isPseudo = 1, Predicates = [HasAVX512], SchedRW = [WriteZero] in {
479def AVX512_512_SET0 : I<0, Pseudo, (outs VR512:$dst), (ins), "",
480               [(set VR512:$dst, (v16i32 immAllZerosV))]>;
481def AVX512_512_SETALLONES : I<0, Pseudo, (outs VR512:$dst), (ins), "",
482               [(set VR512:$dst, (v16i32 immAllOnesV))]>;
483}
484
485let Predicates = [HasAVX512] in {
486def : Pat<(v64i8 immAllZerosV), (AVX512_512_SET0)>;
487def : Pat<(v32i16 immAllZerosV), (AVX512_512_SET0)>;
488def : Pat<(v8i64 immAllZerosV), (AVX512_512_SET0)>;
489def : Pat<(v32f16 immAllZerosV), (AVX512_512_SET0)>;
490def : Pat<(v16f32 immAllZerosV), (AVX512_512_SET0)>;
491def : Pat<(v8f64 immAllZerosV), (AVX512_512_SET0)>;
492}
493
494// Alias instructions that allow VPTERNLOG to be used with a mask to create
495// a mix of all ones and all zeros elements. This is done this way to force
496// the same register to be used as input for all three sources.
497let isPseudo = 1, Predicates = [HasAVX512], SchedRW = [WriteVecALU] in {
498def AVX512_512_SEXT_MASK_32 : I<0, Pseudo, (outs VR512:$dst),
499                                (ins VK16WM:$mask), "",
500                           [(set VR512:$dst, (vselect (v16i1 VK16WM:$mask),
501                                                      (v16i32 immAllOnesV),
502                                                      (v16i32 immAllZerosV)))]>;
503def AVX512_512_SEXT_MASK_64 : I<0, Pseudo, (outs VR512:$dst),
504                                (ins VK8WM:$mask), "",
505                [(set VR512:$dst, (vselect (v8i1 VK8WM:$mask),
506                                           (v8i64 immAllOnesV),
507                                           (v8i64 immAllZerosV)))]>;
508}
509
510let isReMaterializable = 1, isAsCheapAsAMove = 1, canFoldAsLoad = 1,
511    isPseudo = 1, Predicates = [HasAVX512], SchedRW = [WriteZero] in {
512def AVX512_128_SET0 : I<0, Pseudo, (outs VR128X:$dst), (ins), "",
513               [(set VR128X:$dst, (v4i32 immAllZerosV))]>;
514def AVX512_256_SET0 : I<0, Pseudo, (outs VR256X:$dst), (ins), "",
515               [(set VR256X:$dst, (v8i32 immAllZerosV))]>;
516}
517
518let Predicates = [HasAVX512] in {
519def : Pat<(v8i16 immAllZerosV), (AVX512_128_SET0)>;
520def : Pat<(v16i8 immAllZerosV), (AVX512_128_SET0)>;
521def : Pat<(v2i64 immAllZerosV), (AVX512_128_SET0)>;
522def : Pat<(v8f16 immAllZerosV), (AVX512_128_SET0)>;
523def : Pat<(v4f32 immAllZerosV), (AVX512_128_SET0)>;
524def : Pat<(v2f64 immAllZerosV), (AVX512_128_SET0)>;
525def : Pat<(v32i8 immAllZerosV), (AVX512_256_SET0)>;
526def : Pat<(v16i16 immAllZerosV), (AVX512_256_SET0)>;
527def : Pat<(v4i64 immAllZerosV), (AVX512_256_SET0)>;
528def : Pat<(v16f16 immAllZerosV), (AVX512_256_SET0)>;
529def : Pat<(v8f32 immAllZerosV), (AVX512_256_SET0)>;
530def : Pat<(v4f64 immAllZerosV), (AVX512_256_SET0)>;
531}
532
533// Alias instructions that map fld0 to xorps for sse or vxorps for avx.
534// This is expanded by ExpandPostRAPseudos.
535let isReMaterializable = 1, isAsCheapAsAMove = 1, canFoldAsLoad = 1,
536    isPseudo = 1, SchedRW = [WriteZero], Predicates = [HasAVX512] in {
537  def AVX512_FsFLD0SH : I<0, Pseudo, (outs FR16X:$dst), (ins), "",
538                          [(set FR16X:$dst, fp16imm0)]>;
539  def AVX512_FsFLD0SS : I<0, Pseudo, (outs FR32X:$dst), (ins), "",
540                          [(set FR32X:$dst, fp32imm0)]>;
541  def AVX512_FsFLD0SD : I<0, Pseudo, (outs FR64X:$dst), (ins), "",
542                          [(set FR64X:$dst, fp64imm0)]>;
543  def AVX512_FsFLD0F128 : I<0, Pseudo, (outs VR128X:$dst), (ins), "",
544                            [(set VR128X:$dst, fp128imm0)]>;
545}
546
547//===----------------------------------------------------------------------===//
548// AVX-512 - VECTOR INSERT
549//
550
551// Supports two different pattern operators for mask and unmasked ops. Allows
552// null_frag to be passed for one.
553multiclass vinsert_for_size_split<int Opcode, X86VectorVTInfo From,
554                                  X86VectorVTInfo To,
555                                  SDPatternOperator vinsert_insert,
556                                  SDPatternOperator vinsert_for_mask,
557                                  X86FoldableSchedWrite sched> {
558  let hasSideEffects = 0, ExeDomain = To.ExeDomain in {
559    defm rr : AVX512_maskable_split<Opcode, MRMSrcReg, To, (outs To.RC:$dst),
560                   (ins To.RC:$src1, From.RC:$src2, u8imm:$src3),
561                   "vinsert" # From.EltTypeName # "x" # From.NumElts,
562                   "$src3, $src2, $src1", "$src1, $src2, $src3",
563                   (vinsert_insert:$src3 (To.VT To.RC:$src1),
564                                         (From.VT From.RC:$src2),
565                                         (iPTR imm)),
566                   (vinsert_for_mask:$src3 (To.VT To.RC:$src1),
567                                           (From.VT From.RC:$src2),
568                                           (iPTR imm))>,
569                   AVX512AIi8Base, EVEX_4V, Sched<[sched]>;
570    let mayLoad = 1 in
571    defm rm : AVX512_maskable_split<Opcode, MRMSrcMem, To, (outs To.RC:$dst),
572                   (ins To.RC:$src1, From.MemOp:$src2, u8imm:$src3),
573                   "vinsert" # From.EltTypeName # "x" # From.NumElts,
574                   "$src3, $src2, $src1", "$src1, $src2, $src3",
575                   (vinsert_insert:$src3 (To.VT To.RC:$src1),
576                               (From.VT (From.LdFrag addr:$src2)),
577                               (iPTR imm)),
578                   (vinsert_for_mask:$src3 (To.VT To.RC:$src1),
579                               (From.VT (From.LdFrag addr:$src2)),
580                               (iPTR imm))>, AVX512AIi8Base, EVEX_4V,
581                   EVEX_CD8<From.EltSize, From.CD8TupleForm>,
582                   Sched<[sched.Folded, sched.ReadAfterFold]>;
583  }
584}
585
586// Passes the same pattern operator for masked and unmasked ops.
587multiclass vinsert_for_size<int Opcode, X86VectorVTInfo From,
588                            X86VectorVTInfo To,
589                            SDPatternOperator vinsert_insert,
590                            X86FoldableSchedWrite sched> :
591  vinsert_for_size_split<Opcode, From, To, vinsert_insert, vinsert_insert, sched>;
592
593multiclass vinsert_for_size_lowering<string InstrStr, X86VectorVTInfo From,
594                       X86VectorVTInfo To, PatFrag vinsert_insert,
595                       SDNodeXForm INSERT_get_vinsert_imm , list<Predicate> p> {
596  let Predicates = p in {
597    def : Pat<(vinsert_insert:$ins
598                     (To.VT To.RC:$src1), (From.VT From.RC:$src2), (iPTR imm)),
599              (To.VT (!cast<Instruction>(InstrStr#"rr")
600                     To.RC:$src1, From.RC:$src2,
601                     (INSERT_get_vinsert_imm To.RC:$ins)))>;
602
603    def : Pat<(vinsert_insert:$ins
604                  (To.VT To.RC:$src1),
605                  (From.VT (From.LdFrag addr:$src2)),
606                  (iPTR imm)),
607              (To.VT (!cast<Instruction>(InstrStr#"rm")
608                  To.RC:$src1, addr:$src2,
609                  (INSERT_get_vinsert_imm To.RC:$ins)))>;
610  }
611}
612
613multiclass vinsert_for_type<ValueType EltVT32, int Opcode128,
614                            ValueType EltVT64, int Opcode256,
615                            X86FoldableSchedWrite sched> {
616
617  let Predicates = [HasVLX] in
618    defm NAME # "32x4Z256" : vinsert_for_size<Opcode128,
619                                 X86VectorVTInfo< 4, EltVT32, VR128X>,
620                                 X86VectorVTInfo< 8, EltVT32, VR256X>,
621                                 vinsert128_insert, sched>, EVEX_V256;
622
623  defm NAME # "32x4Z" : vinsert_for_size<Opcode128,
624                                 X86VectorVTInfo< 4, EltVT32, VR128X>,
625                                 X86VectorVTInfo<16, EltVT32, VR512>,
626                                 vinsert128_insert, sched>, EVEX_V512;
627
628  defm NAME # "64x4Z" : vinsert_for_size<Opcode256,
629                                 X86VectorVTInfo< 4, EltVT64, VR256X>,
630                                 X86VectorVTInfo< 8, EltVT64, VR512>,
631                                 vinsert256_insert, sched>, VEX_W, EVEX_V512;
632
633  // Even with DQI we'd like to only use these instructions for masking.
634  let Predicates = [HasVLX, HasDQI] in
635    defm NAME # "64x2Z256" : vinsert_for_size_split<Opcode128,
636                                   X86VectorVTInfo< 2, EltVT64, VR128X>,
637                                   X86VectorVTInfo< 4, EltVT64, VR256X>,
638                                   null_frag, vinsert128_insert, sched>,
639                                   VEX_W1X, EVEX_V256;
640
641  // Even with DQI we'd like to only use these instructions for masking.
642  let Predicates = [HasDQI] in {
643    defm NAME # "64x2Z" : vinsert_for_size_split<Opcode128,
644                                 X86VectorVTInfo< 2, EltVT64, VR128X>,
645                                 X86VectorVTInfo< 8, EltVT64, VR512>,
646                                 null_frag, vinsert128_insert, sched>,
647                                 VEX_W, EVEX_V512;
648
649    defm NAME # "32x8Z" : vinsert_for_size_split<Opcode256,
650                                   X86VectorVTInfo< 8, EltVT32, VR256X>,
651                                   X86VectorVTInfo<16, EltVT32, VR512>,
652                                   null_frag, vinsert256_insert, sched>,
653                                   EVEX_V512;
654  }
655}
656
657// FIXME: Is there a better scheduler class for VINSERTF/VINSERTI?
658defm VINSERTF : vinsert_for_type<f32, 0x18, f64, 0x1a, WriteFShuffle256>;
659defm VINSERTI : vinsert_for_type<i32, 0x38, i64, 0x3a, WriteShuffle256>;
660
661// Codegen pattern with the alternative types,
662// Even with AVX512DQ we'll still use these for unmasked operations.
663defm : vinsert_for_size_lowering<"VINSERTF32x4Z256", v2f64x_info, v4f64x_info,
664              vinsert128_insert, INSERT_get_vinsert128_imm, [HasVLX]>;
665defm : vinsert_for_size_lowering<"VINSERTI32x4Z256", v2i64x_info, v4i64x_info,
666              vinsert128_insert, INSERT_get_vinsert128_imm, [HasVLX]>;
667
668defm : vinsert_for_size_lowering<"VINSERTF32x4Z", v2f64x_info, v8f64_info,
669              vinsert128_insert, INSERT_get_vinsert128_imm, [HasAVX512]>;
670defm : vinsert_for_size_lowering<"VINSERTI32x4Z", v2i64x_info, v8i64_info,
671              vinsert128_insert, INSERT_get_vinsert128_imm, [HasAVX512]>;
672
673defm : vinsert_for_size_lowering<"VINSERTF64x4Z", v8f32x_info, v16f32_info,
674              vinsert256_insert, INSERT_get_vinsert256_imm, [HasAVX512]>;
675defm : vinsert_for_size_lowering<"VINSERTI64x4Z", v8i32x_info, v16i32_info,
676              vinsert256_insert, INSERT_get_vinsert256_imm, [HasAVX512]>;
677
678// Codegen pattern with the alternative types insert VEC128 into VEC256
679defm : vinsert_for_size_lowering<"VINSERTI32x4Z256", v8i16x_info, v16i16x_info,
680              vinsert128_insert, INSERT_get_vinsert128_imm, [HasVLX]>;
681defm : vinsert_for_size_lowering<"VINSERTI32x4Z256", v16i8x_info, v32i8x_info,
682              vinsert128_insert, INSERT_get_vinsert128_imm, [HasVLX]>;
683defm : vinsert_for_size_lowering<"VINSERTF32x4Z256", v8f16x_info, v16f16x_info,
684              vinsert128_insert, INSERT_get_vinsert128_imm, [HasVLX]>;
685// Codegen pattern with the alternative types insert VEC128 into VEC512
686defm : vinsert_for_size_lowering<"VINSERTI32x4Z", v8i16x_info, v32i16_info,
687              vinsert128_insert, INSERT_get_vinsert128_imm, [HasAVX512]>;
688defm : vinsert_for_size_lowering<"VINSERTI32x4Z", v16i8x_info, v64i8_info,
689               vinsert128_insert, INSERT_get_vinsert128_imm, [HasAVX512]>;
690defm : vinsert_for_size_lowering<"VINSERTF32x4Z", v8f16x_info, v32f16_info,
691              vinsert128_insert, INSERT_get_vinsert128_imm, [HasAVX512]>;
692// Codegen pattern with the alternative types insert VEC256 into VEC512
693defm : vinsert_for_size_lowering<"VINSERTI64x4Z", v16i16x_info, v32i16_info,
694              vinsert256_insert, INSERT_get_vinsert256_imm, [HasAVX512]>;
695defm : vinsert_for_size_lowering<"VINSERTI64x4Z", v32i8x_info, v64i8_info,
696              vinsert256_insert, INSERT_get_vinsert256_imm, [HasAVX512]>;
697defm : vinsert_for_size_lowering<"VINSERTF64x4Z", v16f16x_info, v32f16_info,
698              vinsert256_insert, INSERT_get_vinsert256_imm, [HasAVX512]>;
699
700
701multiclass vinsert_for_mask_cast<string InstrStr, X86VectorVTInfo From,
702                                 X86VectorVTInfo To, X86VectorVTInfo Cast,
703                                 PatFrag vinsert_insert,
704                                 SDNodeXForm INSERT_get_vinsert_imm,
705                                 list<Predicate> p> {
706let Predicates = p in {
707  def : Pat<(Cast.VT
708             (vselect_mask Cast.KRCWM:$mask,
709                           (bitconvert
710                            (vinsert_insert:$ins (To.VT To.RC:$src1),
711                                                 (From.VT From.RC:$src2),
712                                                 (iPTR imm))),
713                           Cast.RC:$src0)),
714            (!cast<Instruction>(InstrStr#"rrk")
715             Cast.RC:$src0, Cast.KRCWM:$mask, To.RC:$src1, From.RC:$src2,
716             (INSERT_get_vinsert_imm To.RC:$ins))>;
717  def : Pat<(Cast.VT
718             (vselect_mask Cast.KRCWM:$mask,
719                           (bitconvert
720                            (vinsert_insert:$ins (To.VT To.RC:$src1),
721                                                 (From.VT
722                                                  (bitconvert
723                                                   (From.LdFrag addr:$src2))),
724                                                 (iPTR imm))),
725                           Cast.RC:$src0)),
726            (!cast<Instruction>(InstrStr#"rmk")
727             Cast.RC:$src0, Cast.KRCWM:$mask, To.RC:$src1, addr:$src2,
728             (INSERT_get_vinsert_imm To.RC:$ins))>;
729
730  def : Pat<(Cast.VT
731             (vselect_mask Cast.KRCWM:$mask,
732                           (bitconvert
733                            (vinsert_insert:$ins (To.VT To.RC:$src1),
734                                                 (From.VT From.RC:$src2),
735                                                 (iPTR imm))),
736                           Cast.ImmAllZerosV)),
737            (!cast<Instruction>(InstrStr#"rrkz")
738             Cast.KRCWM:$mask, To.RC:$src1, From.RC:$src2,
739             (INSERT_get_vinsert_imm To.RC:$ins))>;
740  def : Pat<(Cast.VT
741             (vselect_mask Cast.KRCWM:$mask,
742                           (bitconvert
743                            (vinsert_insert:$ins (To.VT To.RC:$src1),
744                                                 (From.VT (From.LdFrag addr:$src2)),
745                                                 (iPTR imm))),
746                           Cast.ImmAllZerosV)),
747            (!cast<Instruction>(InstrStr#"rmkz")
748             Cast.KRCWM:$mask, To.RC:$src1, addr:$src2,
749             (INSERT_get_vinsert_imm To.RC:$ins))>;
750}
751}
752
753defm : vinsert_for_mask_cast<"VINSERTF32x4Z256", v2f64x_info, v4f64x_info,
754                             v8f32x_info, vinsert128_insert,
755                             INSERT_get_vinsert128_imm, [HasVLX]>;
756defm : vinsert_for_mask_cast<"VINSERTF64x2Z256", v4f32x_info, v8f32x_info,
757                             v4f64x_info, vinsert128_insert,
758                             INSERT_get_vinsert128_imm, [HasDQI, HasVLX]>;
759
760defm : vinsert_for_mask_cast<"VINSERTI32x4Z256", v2i64x_info, v4i64x_info,
761                             v8i32x_info, vinsert128_insert,
762                             INSERT_get_vinsert128_imm, [HasVLX]>;
763defm : vinsert_for_mask_cast<"VINSERTI32x4Z256", v8i16x_info, v16i16x_info,
764                             v8i32x_info, vinsert128_insert,
765                             INSERT_get_vinsert128_imm, [HasVLX]>;
766defm : vinsert_for_mask_cast<"VINSERTI32x4Z256", v16i8x_info, v32i8x_info,
767                             v8i32x_info, vinsert128_insert,
768                             INSERT_get_vinsert128_imm, [HasVLX]>;
769defm : vinsert_for_mask_cast<"VINSERTF64x2Z256", v4i32x_info, v8i32x_info,
770                             v4i64x_info, vinsert128_insert,
771                             INSERT_get_vinsert128_imm, [HasDQI, HasVLX]>;
772defm : vinsert_for_mask_cast<"VINSERTF64x2Z256", v8i16x_info, v16i16x_info,
773                             v4i64x_info, vinsert128_insert,
774                             INSERT_get_vinsert128_imm, [HasDQI, HasVLX]>;
775defm : vinsert_for_mask_cast<"VINSERTF64x2Z256", v16i8x_info, v32i8x_info,
776                             v4i64x_info, vinsert128_insert,
777                             INSERT_get_vinsert128_imm, [HasDQI, HasVLX]>;
778
779defm : vinsert_for_mask_cast<"VINSERTF32x4Z", v2f64x_info, v8f64_info,
780                             v16f32_info, vinsert128_insert,
781                             INSERT_get_vinsert128_imm, [HasAVX512]>;
782defm : vinsert_for_mask_cast<"VINSERTF64x2Z", v4f32x_info, v16f32_info,
783                             v8f64_info, vinsert128_insert,
784                             INSERT_get_vinsert128_imm, [HasDQI]>;
785
786defm : vinsert_for_mask_cast<"VINSERTI32x4Z", v2i64x_info, v8i64_info,
787                             v16i32_info, vinsert128_insert,
788                             INSERT_get_vinsert128_imm, [HasAVX512]>;
789defm : vinsert_for_mask_cast<"VINSERTI32x4Z", v8i16x_info, v32i16_info,
790                             v16i32_info, vinsert128_insert,
791                             INSERT_get_vinsert128_imm, [HasAVX512]>;
792defm : vinsert_for_mask_cast<"VINSERTI32x4Z", v16i8x_info, v64i8_info,
793                             v16i32_info, vinsert128_insert,
794                             INSERT_get_vinsert128_imm, [HasAVX512]>;
795defm : vinsert_for_mask_cast<"VINSERTI64x2Z", v4i32x_info, v16i32_info,
796                             v8i64_info, vinsert128_insert,
797                             INSERT_get_vinsert128_imm, [HasDQI]>;
798defm : vinsert_for_mask_cast<"VINSERTI64x2Z", v8i16x_info, v32i16_info,
799                             v8i64_info, vinsert128_insert,
800                             INSERT_get_vinsert128_imm, [HasDQI]>;
801defm : vinsert_for_mask_cast<"VINSERTI64x2Z", v16i8x_info, v64i8_info,
802                             v8i64_info, vinsert128_insert,
803                             INSERT_get_vinsert128_imm, [HasDQI]>;
804
805defm : vinsert_for_mask_cast<"VINSERTF32x8Z", v4f64x_info, v8f64_info,
806                             v16f32_info, vinsert256_insert,
807                             INSERT_get_vinsert256_imm, [HasDQI]>;
808defm : vinsert_for_mask_cast<"VINSERTF64x4Z", v8f32x_info, v16f32_info,
809                             v8f64_info, vinsert256_insert,
810                             INSERT_get_vinsert256_imm, [HasAVX512]>;
811
812defm : vinsert_for_mask_cast<"VINSERTI32x8Z", v4i64x_info, v8i64_info,
813                             v16i32_info, vinsert256_insert,
814                             INSERT_get_vinsert256_imm, [HasDQI]>;
815defm : vinsert_for_mask_cast<"VINSERTI32x8Z", v16i16x_info, v32i16_info,
816                             v16i32_info, vinsert256_insert,
817                             INSERT_get_vinsert256_imm, [HasDQI]>;
818defm : vinsert_for_mask_cast<"VINSERTI32x8Z", v32i8x_info, v64i8_info,
819                             v16i32_info, vinsert256_insert,
820                             INSERT_get_vinsert256_imm, [HasDQI]>;
821defm : vinsert_for_mask_cast<"VINSERTI64x4Z", v8i32x_info, v16i32_info,
822                             v8i64_info, vinsert256_insert,
823                             INSERT_get_vinsert256_imm, [HasAVX512]>;
824defm : vinsert_for_mask_cast<"VINSERTI64x4Z", v16i16x_info, v32i16_info,
825                             v8i64_info, vinsert256_insert,
826                             INSERT_get_vinsert256_imm, [HasAVX512]>;
827defm : vinsert_for_mask_cast<"VINSERTI64x4Z", v32i8x_info, v64i8_info,
828                             v8i64_info, vinsert256_insert,
829                             INSERT_get_vinsert256_imm, [HasAVX512]>;
830
831// vinsertps - insert f32 to XMM
832let ExeDomain = SSEPackedSingle in {
833let isCommutable = 1 in
834def VINSERTPSZrr : AVX512AIi8<0x21, MRMSrcReg, (outs VR128X:$dst),
835      (ins VR128X:$src1, VR128X:$src2, u8imm:$src3),
836      "vinsertps\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}",
837      [(set VR128X:$dst, (X86insertps VR128X:$src1, VR128X:$src2, timm:$src3))]>,
838      EVEX_4V, Sched<[SchedWriteFShuffle.XMM]>;
839def VINSERTPSZrm: AVX512AIi8<0x21, MRMSrcMem, (outs VR128X:$dst),
840      (ins VR128X:$src1, f32mem:$src2, u8imm:$src3),
841      "vinsertps\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}",
842      [(set VR128X:$dst, (X86insertps VR128X:$src1,
843                          (v4f32 (scalar_to_vector (loadf32 addr:$src2))),
844                          timm:$src3))]>,
845      EVEX_4V, EVEX_CD8<32, CD8VT1>,
846      Sched<[SchedWriteFShuffle.XMM.Folded, SchedWriteFShuffle.XMM.ReadAfterFold]>;
847}
848
849//===----------------------------------------------------------------------===//
850// AVX-512 VECTOR EXTRACT
851//---
852
853// Supports two different pattern operators for mask and unmasked ops. Allows
854// null_frag to be passed for one.
855multiclass vextract_for_size_split<int Opcode,
856                                   X86VectorVTInfo From, X86VectorVTInfo To,
857                                   SDPatternOperator vextract_extract,
858                                   SDPatternOperator vextract_for_mask,
859                                   SchedWrite SchedRR, SchedWrite SchedMR> {
860
861  let hasSideEffects = 0, ExeDomain = To.ExeDomain in {
862    defm rr : AVX512_maskable_split<Opcode, MRMDestReg, To, (outs To.RC:$dst),
863                (ins From.RC:$src1, u8imm:$idx),
864                "vextract" # To.EltTypeName # "x" # To.NumElts,
865                "$idx, $src1", "$src1, $idx",
866                (vextract_extract:$idx (From.VT From.RC:$src1), (iPTR imm)),
867                (vextract_for_mask:$idx (From.VT From.RC:$src1), (iPTR imm))>,
868                AVX512AIi8Base, EVEX, Sched<[SchedRR]>;
869
870    def mr  : AVX512AIi8<Opcode, MRMDestMem, (outs),
871                    (ins To.MemOp:$dst, From.RC:$src1, u8imm:$idx),
872                    "vextract" # To.EltTypeName # "x" # To.NumElts #
873                        "\t{$idx, $src1, $dst|$dst, $src1, $idx}",
874                    [(store (To.VT (vextract_extract:$idx
875                                    (From.VT From.RC:$src1), (iPTR imm))),
876                             addr:$dst)]>, EVEX,
877                    Sched<[SchedMR]>;
878
879    let mayStore = 1, hasSideEffects = 0 in
880    def mrk : AVX512AIi8<Opcode, MRMDestMem, (outs),
881                    (ins To.MemOp:$dst, To.KRCWM:$mask,
882                                        From.RC:$src1, u8imm:$idx),
883                     "vextract" # To.EltTypeName # "x" # To.NumElts #
884                          "\t{$idx, $src1, $dst {${mask}}|"
885                          "$dst {${mask}}, $src1, $idx}", []>,
886                    EVEX_K, EVEX, Sched<[SchedMR]>, NotMemoryFoldable;
887  }
888}
889
890// Passes the same pattern operator for masked and unmasked ops.
891multiclass vextract_for_size<int Opcode, X86VectorVTInfo From,
892                             X86VectorVTInfo To,
893                             SDPatternOperator vextract_extract,
894                             SchedWrite SchedRR, SchedWrite SchedMR> :
895  vextract_for_size_split<Opcode, From, To, vextract_extract, vextract_extract, SchedRR, SchedMR>;
896
897// Codegen pattern for the alternative types
898multiclass vextract_for_size_lowering<string InstrStr, X86VectorVTInfo From,
899                X86VectorVTInfo To, PatFrag vextract_extract,
900                SDNodeXForm EXTRACT_get_vextract_imm, list<Predicate> p> {
901  let Predicates = p in {
902     def : Pat<(vextract_extract:$ext (From.VT From.RC:$src1), (iPTR imm)),
903               (To.VT (!cast<Instruction>(InstrStr#"rr")
904                          From.RC:$src1,
905                          (EXTRACT_get_vextract_imm To.RC:$ext)))>;
906     def : Pat<(store (To.VT (vextract_extract:$ext (From.VT From.RC:$src1),
907                              (iPTR imm))), addr:$dst),
908               (!cast<Instruction>(InstrStr#"mr") addr:$dst, From.RC:$src1,
909                (EXTRACT_get_vextract_imm To.RC:$ext))>;
910  }
911}
912
913multiclass vextract_for_type<ValueType EltVT32, int Opcode128,
914                             ValueType EltVT64, int Opcode256,
915                             SchedWrite SchedRR, SchedWrite SchedMR> {
916  let Predicates = [HasAVX512] in {
917    defm NAME # "32x4Z" : vextract_for_size<Opcode128,
918                                   X86VectorVTInfo<16, EltVT32, VR512>,
919                                   X86VectorVTInfo< 4, EltVT32, VR128X>,
920                                   vextract128_extract, SchedRR, SchedMR>,
921                                       EVEX_V512, EVEX_CD8<32, CD8VT4>;
922    defm NAME # "64x4Z" : vextract_for_size<Opcode256,
923                                   X86VectorVTInfo< 8, EltVT64, VR512>,
924                                   X86VectorVTInfo< 4, EltVT64, VR256X>,
925                                   vextract256_extract, SchedRR, SchedMR>,
926                                       VEX_W, EVEX_V512, EVEX_CD8<64, CD8VT4>;
927  }
928  let Predicates = [HasVLX] in
929    defm NAME # "32x4Z256" : vextract_for_size<Opcode128,
930                                 X86VectorVTInfo< 8, EltVT32, VR256X>,
931                                 X86VectorVTInfo< 4, EltVT32, VR128X>,
932                                 vextract128_extract, SchedRR, SchedMR>,
933                                     EVEX_V256, EVEX_CD8<32, CD8VT4>;
934
935  // Even with DQI we'd like to only use these instructions for masking.
936  let Predicates = [HasVLX, HasDQI] in
937    defm NAME # "64x2Z256" : vextract_for_size_split<Opcode128,
938                                 X86VectorVTInfo< 4, EltVT64, VR256X>,
939                                 X86VectorVTInfo< 2, EltVT64, VR128X>,
940                                 null_frag, vextract128_extract, SchedRR, SchedMR>,
941                                     VEX_W1X, EVEX_V256, EVEX_CD8<64, CD8VT2>;
942
943  // Even with DQI we'd like to only use these instructions for masking.
944  let Predicates = [HasDQI] in {
945    defm NAME # "64x2Z" : vextract_for_size_split<Opcode128,
946                                 X86VectorVTInfo< 8, EltVT64, VR512>,
947                                 X86VectorVTInfo< 2, EltVT64, VR128X>,
948                                 null_frag, vextract128_extract, SchedRR, SchedMR>,
949                                     VEX_W, EVEX_V512, EVEX_CD8<64, CD8VT2>;
950    defm NAME # "32x8Z" : vextract_for_size_split<Opcode256,
951                                 X86VectorVTInfo<16, EltVT32, VR512>,
952                                 X86VectorVTInfo< 8, EltVT32, VR256X>,
953                                 null_frag, vextract256_extract, SchedRR, SchedMR>,
954                                     EVEX_V512, EVEX_CD8<32, CD8VT8>;
955  }
956}
957
958// TODO - replace WriteFStore/WriteVecStore with X86SchedWriteMoveLSWidths types.
959defm VEXTRACTF : vextract_for_type<f32, 0x19, f64, 0x1b, WriteFShuffle256, WriteFStore>;
960defm VEXTRACTI : vextract_for_type<i32, 0x39, i64, 0x3b, WriteShuffle256, WriteVecStore>;
961
962// extract_subvector codegen patterns with the alternative types.
963// Even with AVX512DQ we'll still use these for unmasked operations.
964defm : vextract_for_size_lowering<"VEXTRACTF32x4Z", v8f64_info, v2f64x_info,
965          vextract128_extract, EXTRACT_get_vextract128_imm, [HasAVX512]>;
966defm : vextract_for_size_lowering<"VEXTRACTI32x4Z", v8i64_info, v2i64x_info,
967          vextract128_extract, EXTRACT_get_vextract128_imm, [HasAVX512]>;
968
969defm : vextract_for_size_lowering<"VEXTRACTF64x4Z", v16f32_info, v8f32x_info,
970          vextract256_extract, EXTRACT_get_vextract256_imm, [HasAVX512]>;
971defm : vextract_for_size_lowering<"VEXTRACTI64x4Z", v16i32_info, v8i32x_info,
972          vextract256_extract, EXTRACT_get_vextract256_imm, [HasAVX512]>;
973
974defm : vextract_for_size_lowering<"VEXTRACTF32x4Z256", v4f64x_info, v2f64x_info,
975          vextract128_extract, EXTRACT_get_vextract128_imm, [HasVLX]>;
976defm : vextract_for_size_lowering<"VEXTRACTI32x4Z256", v4i64x_info, v2i64x_info,
977          vextract128_extract, EXTRACT_get_vextract128_imm, [HasVLX]>;
978
979// Codegen pattern with the alternative types extract VEC128 from VEC256
980defm : vextract_for_size_lowering<"VEXTRACTI32x4Z256", v16i16x_info, v8i16x_info,
981          vextract128_extract, EXTRACT_get_vextract128_imm, [HasVLX]>;
982defm : vextract_for_size_lowering<"VEXTRACTI32x4Z256", v32i8x_info, v16i8x_info,
983          vextract128_extract, EXTRACT_get_vextract128_imm, [HasVLX]>;
984defm : vextract_for_size_lowering<"VEXTRACTF32x4Z256", v16f16x_info, v8f16x_info,
985          vextract128_extract, EXTRACT_get_vextract128_imm, [HasVLX]>;
986
987// Codegen pattern with the alternative types extract VEC128 from VEC512
988defm : vextract_for_size_lowering<"VEXTRACTI32x4Z", v32i16_info, v8i16x_info,
989                 vextract128_extract, EXTRACT_get_vextract128_imm, [HasAVX512]>;
990defm : vextract_for_size_lowering<"VEXTRACTI32x4Z", v64i8_info, v16i8x_info,
991                 vextract128_extract, EXTRACT_get_vextract128_imm, [HasAVX512]>;
992defm : vextract_for_size_lowering<"VEXTRACTF32x4Z", v32f16_info, v8f16x_info,
993                 vextract128_extract, EXTRACT_get_vextract128_imm, [HasAVX512]>;
994// Codegen pattern with the alternative types extract VEC256 from VEC512
995defm : vextract_for_size_lowering<"VEXTRACTI64x4Z", v32i16_info, v16i16x_info,
996                 vextract256_extract, EXTRACT_get_vextract256_imm, [HasAVX512]>;
997defm : vextract_for_size_lowering<"VEXTRACTI64x4Z", v64i8_info, v32i8x_info,
998                 vextract256_extract, EXTRACT_get_vextract256_imm, [HasAVX512]>;
999defm : vextract_for_size_lowering<"VEXTRACTF64x4Z", v32f16_info, v16f16x_info,
1000                 vextract256_extract, EXTRACT_get_vextract256_imm, [HasAVX512]>;
1001
1002
1003// A 128-bit extract from bits [255:128] of a 512-bit vector should use a
1004// smaller extract to enable EVEX->VEX.
1005let Predicates = [NoVLX] in {
1006def : Pat<(v2i64 (extract_subvector (v8i64 VR512:$src), (iPTR 2))),
1007          (v2i64 (VEXTRACTI128rr
1008                  (v4i64 (EXTRACT_SUBREG (v8i64 VR512:$src), sub_ymm)),
1009                  (iPTR 1)))>;
1010def : Pat<(v2f64 (extract_subvector (v8f64 VR512:$src), (iPTR 2))),
1011          (v2f64 (VEXTRACTF128rr
1012                  (v4f64 (EXTRACT_SUBREG (v8f64 VR512:$src), sub_ymm)),
1013                  (iPTR 1)))>;
1014def : Pat<(v4i32 (extract_subvector (v16i32 VR512:$src), (iPTR 4))),
1015          (v4i32 (VEXTRACTI128rr
1016                  (v8i32 (EXTRACT_SUBREG (v16i32 VR512:$src), sub_ymm)),
1017                  (iPTR 1)))>;
1018def : Pat<(v4f32 (extract_subvector (v16f32 VR512:$src), (iPTR 4))),
1019          (v4f32 (VEXTRACTF128rr
1020                  (v8f32 (EXTRACT_SUBREG (v16f32 VR512:$src), sub_ymm)),
1021                  (iPTR 1)))>;
1022def : Pat<(v8i16 (extract_subvector (v32i16 VR512:$src), (iPTR 8))),
1023          (v8i16 (VEXTRACTI128rr
1024                  (v16i16 (EXTRACT_SUBREG (v32i16 VR512:$src), sub_ymm)),
1025                  (iPTR 1)))>;
1026def : Pat<(v8f16 (extract_subvector (v32f16 VR512:$src), (iPTR 8))),
1027          (v8f16 (VEXTRACTF128rr
1028                  (v16f16 (EXTRACT_SUBREG (v32f16 VR512:$src), sub_ymm)),
1029                  (iPTR 1)))>;
1030def : Pat<(v16i8 (extract_subvector (v64i8 VR512:$src), (iPTR 16))),
1031          (v16i8 (VEXTRACTI128rr
1032                  (v32i8 (EXTRACT_SUBREG (v64i8 VR512:$src), sub_ymm)),
1033                  (iPTR 1)))>;
1034}
1035
1036// A 128-bit extract from bits [255:128] of a 512-bit vector should use a
1037// smaller extract to enable EVEX->VEX.
1038let Predicates = [HasVLX] in {
1039def : Pat<(v2i64 (extract_subvector (v8i64 VR512:$src), (iPTR 2))),
1040          (v2i64 (VEXTRACTI32x4Z256rr
1041                  (v4i64 (EXTRACT_SUBREG (v8i64 VR512:$src), sub_ymm)),
1042                  (iPTR 1)))>;
1043def : Pat<(v2f64 (extract_subvector (v8f64 VR512:$src), (iPTR 2))),
1044          (v2f64 (VEXTRACTF32x4Z256rr
1045                  (v4f64 (EXTRACT_SUBREG (v8f64 VR512:$src), sub_ymm)),
1046                  (iPTR 1)))>;
1047def : Pat<(v4i32 (extract_subvector (v16i32 VR512:$src), (iPTR 4))),
1048          (v4i32 (VEXTRACTI32x4Z256rr
1049                  (v8i32 (EXTRACT_SUBREG (v16i32 VR512:$src), sub_ymm)),
1050                  (iPTR 1)))>;
1051def : Pat<(v4f32 (extract_subvector (v16f32 VR512:$src), (iPTR 4))),
1052          (v4f32 (VEXTRACTF32x4Z256rr
1053                  (v8f32 (EXTRACT_SUBREG (v16f32 VR512:$src), sub_ymm)),
1054                  (iPTR 1)))>;
1055def : Pat<(v8i16 (extract_subvector (v32i16 VR512:$src), (iPTR 8))),
1056          (v8i16 (VEXTRACTI32x4Z256rr
1057                  (v16i16 (EXTRACT_SUBREG (v32i16 VR512:$src), sub_ymm)),
1058                  (iPTR 1)))>;
1059def : Pat<(v8f16 (extract_subvector (v32f16 VR512:$src), (iPTR 8))),
1060          (v8f16 (VEXTRACTF32x4Z256rr
1061                  (v16f16 (EXTRACT_SUBREG (v32f16 VR512:$src), sub_ymm)),
1062                  (iPTR 1)))>;
1063def : Pat<(v16i8 (extract_subvector (v64i8 VR512:$src), (iPTR 16))),
1064          (v16i8 (VEXTRACTI32x4Z256rr
1065                  (v32i8 (EXTRACT_SUBREG (v64i8 VR512:$src), sub_ymm)),
1066                  (iPTR 1)))>;
1067}
1068
1069
1070// Additional patterns for handling a bitcast between the vselect and the
1071// extract_subvector.
1072multiclass vextract_for_mask_cast<string InstrStr, X86VectorVTInfo From,
1073                                  X86VectorVTInfo To, X86VectorVTInfo Cast,
1074                                  PatFrag vextract_extract,
1075                                  SDNodeXForm EXTRACT_get_vextract_imm,
1076                                  list<Predicate> p> {
1077let Predicates = p in {
1078  def : Pat<(Cast.VT (vselect_mask Cast.KRCWM:$mask,
1079                                   (bitconvert
1080                                    (To.VT (vextract_extract:$ext
1081                                            (From.VT From.RC:$src), (iPTR imm)))),
1082                                   To.RC:$src0)),
1083            (Cast.VT (!cast<Instruction>(InstrStr#"rrk")
1084                      Cast.RC:$src0, Cast.KRCWM:$mask, From.RC:$src,
1085                      (EXTRACT_get_vextract_imm To.RC:$ext)))>;
1086
1087  def : Pat<(Cast.VT (vselect_mask Cast.KRCWM:$mask,
1088                                   (bitconvert
1089                                    (To.VT (vextract_extract:$ext
1090                                            (From.VT From.RC:$src), (iPTR imm)))),
1091                                   Cast.ImmAllZerosV)),
1092            (Cast.VT (!cast<Instruction>(InstrStr#"rrkz")
1093                      Cast.KRCWM:$mask, From.RC:$src,
1094                      (EXTRACT_get_vextract_imm To.RC:$ext)))>;
1095}
1096}
1097
1098defm : vextract_for_mask_cast<"VEXTRACTF32x4Z256", v4f64x_info, v2f64x_info,
1099                              v4f32x_info, vextract128_extract,
1100                              EXTRACT_get_vextract128_imm, [HasVLX]>;
1101defm : vextract_for_mask_cast<"VEXTRACTF64x2Z256", v8f32x_info, v4f32x_info,
1102                              v2f64x_info, vextract128_extract,
1103                              EXTRACT_get_vextract128_imm, [HasDQI, HasVLX]>;
1104
1105defm : vextract_for_mask_cast<"VEXTRACTI32x4Z256", v4i64x_info, v2i64x_info,
1106                              v4i32x_info, vextract128_extract,
1107                              EXTRACT_get_vextract128_imm, [HasVLX]>;
1108defm : vextract_for_mask_cast<"VEXTRACTI32x4Z256", v16i16x_info, v8i16x_info,
1109                              v4i32x_info, vextract128_extract,
1110                              EXTRACT_get_vextract128_imm, [HasVLX]>;
1111defm : vextract_for_mask_cast<"VEXTRACTI32x4Z256", v32i8x_info, v16i8x_info,
1112                              v4i32x_info, vextract128_extract,
1113                              EXTRACT_get_vextract128_imm, [HasVLX]>;
1114defm : vextract_for_mask_cast<"VEXTRACTI64x2Z256", v8i32x_info, v4i32x_info,
1115                              v2i64x_info, vextract128_extract,
1116                              EXTRACT_get_vextract128_imm, [HasDQI, HasVLX]>;
1117defm : vextract_for_mask_cast<"VEXTRACTI64x2Z256", v16i16x_info, v8i16x_info,
1118                              v2i64x_info, vextract128_extract,
1119                              EXTRACT_get_vextract128_imm, [HasDQI, HasVLX]>;
1120defm : vextract_for_mask_cast<"VEXTRACTI64x2Z256", v32i8x_info, v16i8x_info,
1121                              v2i64x_info, vextract128_extract,
1122                              EXTRACT_get_vextract128_imm, [HasDQI, HasVLX]>;
1123
1124defm : vextract_for_mask_cast<"VEXTRACTF32x4Z", v8f64_info, v2f64x_info,
1125                              v4f32x_info, vextract128_extract,
1126                              EXTRACT_get_vextract128_imm, [HasAVX512]>;
1127defm : vextract_for_mask_cast<"VEXTRACTF64x2Z", v16f32_info, v4f32x_info,
1128                              v2f64x_info, vextract128_extract,
1129                              EXTRACT_get_vextract128_imm, [HasDQI]>;
1130
1131defm : vextract_for_mask_cast<"VEXTRACTI32x4Z", v8i64_info, v2i64x_info,
1132                              v4i32x_info, vextract128_extract,
1133                              EXTRACT_get_vextract128_imm, [HasAVX512]>;
1134defm : vextract_for_mask_cast<"VEXTRACTI32x4Z", v32i16_info, v8i16x_info,
1135                              v4i32x_info, vextract128_extract,
1136                              EXTRACT_get_vextract128_imm, [HasAVX512]>;
1137defm : vextract_for_mask_cast<"VEXTRACTI32x4Z", v64i8_info, v16i8x_info,
1138                              v4i32x_info, vextract128_extract,
1139                              EXTRACT_get_vextract128_imm, [HasAVX512]>;
1140defm : vextract_for_mask_cast<"VEXTRACTI64x2Z", v16i32_info, v4i32x_info,
1141                              v2i64x_info, vextract128_extract,
1142                              EXTRACT_get_vextract128_imm, [HasDQI]>;
1143defm : vextract_for_mask_cast<"VEXTRACTI64x2Z", v32i16_info, v8i16x_info,
1144                              v2i64x_info, vextract128_extract,
1145                              EXTRACT_get_vextract128_imm, [HasDQI]>;
1146defm : vextract_for_mask_cast<"VEXTRACTI64x2Z", v64i8_info, v16i8x_info,
1147                              v2i64x_info, vextract128_extract,
1148                              EXTRACT_get_vextract128_imm, [HasDQI]>;
1149
1150defm : vextract_for_mask_cast<"VEXTRACTF32x8Z", v8f64_info, v4f64x_info,
1151                              v8f32x_info, vextract256_extract,
1152                              EXTRACT_get_vextract256_imm, [HasDQI]>;
1153defm : vextract_for_mask_cast<"VEXTRACTF64x4Z", v16f32_info, v8f32x_info,
1154                              v4f64x_info, vextract256_extract,
1155                              EXTRACT_get_vextract256_imm, [HasAVX512]>;
1156
1157defm : vextract_for_mask_cast<"VEXTRACTI32x8Z", v8i64_info, v4i64x_info,
1158                              v8i32x_info, vextract256_extract,
1159                              EXTRACT_get_vextract256_imm, [HasDQI]>;
1160defm : vextract_for_mask_cast<"VEXTRACTI32x8Z", v32i16_info, v16i16x_info,
1161                              v8i32x_info, vextract256_extract,
1162                              EXTRACT_get_vextract256_imm, [HasDQI]>;
1163defm : vextract_for_mask_cast<"VEXTRACTI32x8Z", v64i8_info, v32i8x_info,
1164                              v8i32x_info, vextract256_extract,
1165                              EXTRACT_get_vextract256_imm, [HasDQI]>;
1166defm : vextract_for_mask_cast<"VEXTRACTI64x4Z", v16i32_info, v8i32x_info,
1167                              v4i64x_info, vextract256_extract,
1168                              EXTRACT_get_vextract256_imm, [HasAVX512]>;
1169defm : vextract_for_mask_cast<"VEXTRACTI64x4Z", v32i16_info, v16i16x_info,
1170                              v4i64x_info, vextract256_extract,
1171                              EXTRACT_get_vextract256_imm, [HasAVX512]>;
1172defm : vextract_for_mask_cast<"VEXTRACTI64x4Z", v64i8_info, v32i8x_info,
1173                              v4i64x_info, vextract256_extract,
1174                              EXTRACT_get_vextract256_imm, [HasAVX512]>;
1175
1176// vextractps - extract 32 bits from XMM
1177def VEXTRACTPSZrr : AVX512AIi8<0x17, MRMDestReg, (outs GR32orGR64:$dst),
1178      (ins VR128X:$src1, u8imm:$src2),
1179      "vextractps\t{$src2, $src1, $dst|$dst, $src1, $src2}",
1180      [(set GR32orGR64:$dst, (extractelt (bc_v4i32 (v4f32 VR128X:$src1)), imm:$src2))]>,
1181      EVEX, VEX_WIG, Sched<[WriteVecExtract]>;
1182
1183def VEXTRACTPSZmr : AVX512AIi8<0x17, MRMDestMem, (outs),
1184      (ins f32mem:$dst, VR128X:$src1, u8imm:$src2),
1185      "vextractps\t{$src2, $src1, $dst|$dst, $src1, $src2}",
1186      [(store (extractelt (bc_v4i32 (v4f32 VR128X:$src1)), imm:$src2),
1187                          addr:$dst)]>,
1188      EVEX, VEX_WIG, EVEX_CD8<32, CD8VT1>, Sched<[WriteVecExtractSt]>;
1189
1190//===---------------------------------------------------------------------===//
1191// AVX-512 BROADCAST
1192//---
1193// broadcast with a scalar argument.
1194multiclass avx512_broadcast_scalar<string Name, X86VectorVTInfo DestInfo,
1195                                   X86VectorVTInfo SrcInfo> {
1196  def : Pat<(DestInfo.VT (X86VBroadcast SrcInfo.FRC:$src)),
1197            (!cast<Instruction>(Name#DestInfo.ZSuffix#rr)
1198             (SrcInfo.VT (COPY_TO_REGCLASS SrcInfo.FRC:$src, SrcInfo.RC)))>;
1199  def : Pat<(DestInfo.VT (vselect_mask DestInfo.KRCWM:$mask,
1200                                       (X86VBroadcast SrcInfo.FRC:$src),
1201                                       DestInfo.RC:$src0)),
1202            (!cast<Instruction>(Name#DestInfo.ZSuffix#rrk)
1203             DestInfo.RC:$src0, DestInfo.KRCWM:$mask,
1204             (SrcInfo.VT (COPY_TO_REGCLASS SrcInfo.FRC:$src, SrcInfo.RC)))>;
1205  def : Pat<(DestInfo.VT (vselect_mask DestInfo.KRCWM:$mask,
1206                                       (X86VBroadcast SrcInfo.FRC:$src),
1207                                       DestInfo.ImmAllZerosV)),
1208            (!cast<Instruction>(Name#DestInfo.ZSuffix#rrkz)
1209             DestInfo.KRCWM:$mask, (SrcInfo.VT (COPY_TO_REGCLASS SrcInfo.FRC:$src, SrcInfo.RC)))>;
1210}
1211
1212// Split version to allow mask and broadcast node to be different types. This
1213// helps support the 32x2 broadcasts.
1214multiclass avx512_broadcast_rm_split<bits<8> opc, string OpcodeStr,
1215                                     SchedWrite SchedRR, SchedWrite SchedRM,
1216                                     X86VectorVTInfo MaskInfo,
1217                                     X86VectorVTInfo DestInfo,
1218                                     X86VectorVTInfo SrcInfo,
1219                                     bit IsConvertibleToThreeAddress,
1220                                     SDPatternOperator UnmaskedOp = X86VBroadcast,
1221                                     SDPatternOperator UnmaskedBcastOp = SrcInfo.BroadcastLdFrag> {
1222  let hasSideEffects = 0 in
1223  def rr : AVX512PI<opc, MRMSrcReg, (outs MaskInfo.RC:$dst), (ins SrcInfo.RC:$src),
1224                    !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
1225                    [(set MaskInfo.RC:$dst,
1226                      (MaskInfo.VT
1227                       (bitconvert
1228                        (DestInfo.VT
1229                         (UnmaskedOp (SrcInfo.VT SrcInfo.RC:$src))))))],
1230                    DestInfo.ExeDomain>, T8PD, EVEX, Sched<[SchedRR]>;
1231  def rrkz : AVX512PI<opc, MRMSrcReg, (outs MaskInfo.RC:$dst),
1232                      (ins MaskInfo.KRCWM:$mask, SrcInfo.RC:$src),
1233                      !strconcat(OpcodeStr, "\t{$src, ${dst} {${mask}} {z}|",
1234                       "${dst} {${mask}} {z}, $src}"),
1235                       [(set MaskInfo.RC:$dst,
1236                         (vselect_mask MaskInfo.KRCWM:$mask,
1237                          (MaskInfo.VT
1238                           (bitconvert
1239                            (DestInfo.VT
1240                             (X86VBroadcast (SrcInfo.VT SrcInfo.RC:$src))))),
1241                          MaskInfo.ImmAllZerosV))],
1242                       DestInfo.ExeDomain>, T8PD, EVEX, EVEX_KZ, Sched<[SchedRR]>;
1243  let Constraints = "$src0 = $dst" in
1244  def rrk : AVX512PI<opc, MRMSrcReg, (outs MaskInfo.RC:$dst),
1245                     (ins MaskInfo.RC:$src0, MaskInfo.KRCWM:$mask,
1246                          SrcInfo.RC:$src),
1247                     !strconcat(OpcodeStr, "\t{$src, ${dst} {${mask}}|",
1248                     "${dst} {${mask}}, $src}"),
1249                     [(set MaskInfo.RC:$dst,
1250                       (vselect_mask MaskInfo.KRCWM:$mask,
1251                        (MaskInfo.VT
1252                         (bitconvert
1253                          (DestInfo.VT
1254                           (X86VBroadcast (SrcInfo.VT SrcInfo.RC:$src))))),
1255                        MaskInfo.RC:$src0))],
1256                      DestInfo.ExeDomain>, T8PD, EVEX, EVEX_K, Sched<[SchedRR]>;
1257
1258  let hasSideEffects = 0, mayLoad = 1 in
1259  def rm : AVX512PI<opc, MRMSrcMem, (outs MaskInfo.RC:$dst),
1260                    (ins SrcInfo.ScalarMemOp:$src),
1261                    !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
1262                    [(set MaskInfo.RC:$dst,
1263                      (MaskInfo.VT
1264                       (bitconvert
1265                        (DestInfo.VT
1266                         (UnmaskedBcastOp addr:$src)))))],
1267                    DestInfo.ExeDomain>, T8PD, EVEX,
1268                    EVEX_CD8<SrcInfo.EltSize, CD8VT1>, Sched<[SchedRM]>;
1269
1270  def rmkz : AVX512PI<opc, MRMSrcMem, (outs MaskInfo.RC:$dst),
1271                      (ins MaskInfo.KRCWM:$mask, SrcInfo.ScalarMemOp:$src),
1272                      !strconcat(OpcodeStr, "\t{$src, ${dst} {${mask}} {z}|",
1273                       "${dst} {${mask}} {z}, $src}"),
1274                       [(set MaskInfo.RC:$dst,
1275                         (vselect_mask MaskInfo.KRCWM:$mask,
1276                          (MaskInfo.VT
1277                           (bitconvert
1278                            (DestInfo.VT
1279                             (SrcInfo.BroadcastLdFrag addr:$src)))),
1280                          MaskInfo.ImmAllZerosV))],
1281                       DestInfo.ExeDomain>, T8PD, EVEX, EVEX_KZ,
1282                       EVEX_CD8<SrcInfo.EltSize, CD8VT1>, Sched<[SchedRM]>;
1283
1284  let Constraints = "$src0 = $dst",
1285      isConvertibleToThreeAddress = IsConvertibleToThreeAddress in
1286  def rmk : AVX512PI<opc, MRMSrcMem, (outs MaskInfo.RC:$dst),
1287                     (ins MaskInfo.RC:$src0, MaskInfo.KRCWM:$mask,
1288                          SrcInfo.ScalarMemOp:$src),
1289                     !strconcat(OpcodeStr, "\t{$src, ${dst} {${mask}}|",
1290                     "${dst} {${mask}}, $src}"),
1291                     [(set MaskInfo.RC:$dst,
1292                       (vselect_mask MaskInfo.KRCWM:$mask,
1293                        (MaskInfo.VT
1294                         (bitconvert
1295                          (DestInfo.VT
1296                           (SrcInfo.BroadcastLdFrag addr:$src)))),
1297                        MaskInfo.RC:$src0))],
1298                      DestInfo.ExeDomain>, T8PD, EVEX, EVEX_K,
1299                      EVEX_CD8<SrcInfo.EltSize, CD8VT1>, Sched<[SchedRM]>;
1300}
1301
1302// Helper class to force mask and broadcast result to same type.
1303multiclass avx512_broadcast_rm<bits<8> opc, string OpcodeStr,
1304                               SchedWrite SchedRR, SchedWrite SchedRM,
1305                               X86VectorVTInfo DestInfo,
1306                               X86VectorVTInfo SrcInfo,
1307                               bit IsConvertibleToThreeAddress> :
1308  avx512_broadcast_rm_split<opc, OpcodeStr, SchedRR, SchedRM,
1309                            DestInfo, DestInfo, SrcInfo,
1310                            IsConvertibleToThreeAddress>;
1311
1312multiclass avx512_fp_broadcast_sd<bits<8> opc, string OpcodeStr,
1313                                  AVX512VLVectorVTInfo _> {
1314  let Predicates = [HasAVX512] in {
1315    defm Z  : avx512_broadcast_rm<opc, OpcodeStr, WriteFShuffle256,
1316                                  WriteFShuffle256Ld, _.info512, _.info128, 1>,
1317              avx512_broadcast_scalar<NAME, _.info512, _.info128>,
1318              EVEX_V512;
1319  }
1320
1321  let Predicates = [HasVLX] in {
1322    defm Z256  : avx512_broadcast_rm<opc, OpcodeStr, WriteFShuffle256,
1323                                     WriteFShuffle256Ld, _.info256, _.info128, 1>,
1324                 avx512_broadcast_scalar<NAME, _.info256, _.info128>,
1325                 EVEX_V256;
1326  }
1327}
1328
1329multiclass avx512_fp_broadcast_ss<bits<8> opc, string OpcodeStr,
1330                                  AVX512VLVectorVTInfo _> {
1331  let Predicates = [HasAVX512] in {
1332    defm Z  : avx512_broadcast_rm<opc, OpcodeStr, WriteFShuffle256,
1333                                  WriteFShuffle256Ld, _.info512, _.info128, 1>,
1334              avx512_broadcast_scalar<NAME, _.info512, _.info128>,
1335              EVEX_V512;
1336  }
1337
1338  let Predicates = [HasVLX] in {
1339    defm Z256  : avx512_broadcast_rm<opc, OpcodeStr, WriteFShuffle256,
1340                                     WriteFShuffle256Ld, _.info256, _.info128, 1>,
1341                 avx512_broadcast_scalar<NAME, _.info256, _.info128>,
1342                 EVEX_V256;
1343    defm Z128  : avx512_broadcast_rm<opc, OpcodeStr, WriteFShuffle256,
1344                                     WriteFShuffle256Ld, _.info128, _.info128, 1>,
1345                 avx512_broadcast_scalar<NAME, _.info128, _.info128>,
1346                 EVEX_V128;
1347  }
1348}
1349defm VBROADCASTSS  : avx512_fp_broadcast_ss<0x18, "vbroadcastss",
1350                                       avx512vl_f32_info>;
1351defm VBROADCASTSD  : avx512_fp_broadcast_sd<0x19, "vbroadcastsd",
1352                                       avx512vl_f64_info>, VEX_W1X;
1353
1354multiclass avx512_int_broadcast_reg<bits<8> opc, SchedWrite SchedRR,
1355                                    X86VectorVTInfo _, SDPatternOperator OpNode,
1356                                    RegisterClass SrcRC> {
1357  // Fold with a mask even if it has multiple uses since it is cheap.
1358  let ExeDomain = _.ExeDomain in
1359  defm rr : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
1360                          (ins SrcRC:$src),
1361                          "vpbroadcast"#_.Suffix, "$src", "$src",
1362                          (_.VT (OpNode SrcRC:$src)), /*IsCommutable*/0,
1363                          /*IsKCommutable*/0, /*IsKZCommutable*/0, vselect>,
1364                          T8PD, EVEX, Sched<[SchedRR]>;
1365}
1366
1367multiclass avx512_int_broadcastbw_reg<bits<8> opc, string Name, SchedWrite SchedRR,
1368                                    X86VectorVTInfo _, SDPatternOperator OpNode,
1369                                    RegisterClass SrcRC, SubRegIndex Subreg> {
1370  let hasSideEffects = 0, ExeDomain = _.ExeDomain in
1371  defm rr : AVX512_maskable_custom<opc, MRMSrcReg,
1372                         (outs _.RC:$dst), (ins GR32:$src),
1373                         !con((ins _.RC:$src0, _.KRCWM:$mask), (ins GR32:$src)),
1374                         !con((ins _.KRCWM:$mask), (ins GR32:$src)),
1375                         "vpbroadcast"#_.Suffix, "$src", "$src", [], [], [],
1376                         "$src0 = $dst">, T8PD, EVEX, Sched<[SchedRR]>;
1377
1378  def : Pat <(_.VT (OpNode SrcRC:$src)),
1379             (!cast<Instruction>(Name#rr)
1380              (i32 (INSERT_SUBREG (i32 (IMPLICIT_DEF)), SrcRC:$src, Subreg)))>;
1381
1382  // Fold with a mask even if it has multiple uses since it is cheap.
1383  def : Pat <(vselect _.KRCWM:$mask, (_.VT (OpNode SrcRC:$src)), _.RC:$src0),
1384             (!cast<Instruction>(Name#rrk) _.RC:$src0, _.KRCWM:$mask,
1385              (i32 (INSERT_SUBREG (i32 (IMPLICIT_DEF)), SrcRC:$src, Subreg)))>;
1386
1387  def : Pat <(vselect _.KRCWM:$mask, (_.VT (OpNode SrcRC:$src)), _.ImmAllZerosV),
1388             (!cast<Instruction>(Name#rrkz) _.KRCWM:$mask,
1389              (i32 (INSERT_SUBREG (i32 (IMPLICIT_DEF)), SrcRC:$src, Subreg)))>;
1390}
1391
1392multiclass avx512_int_broadcastbw_reg_vl<bits<8> opc, string Name,
1393                      AVX512VLVectorVTInfo _, SDPatternOperator OpNode,
1394                      RegisterClass SrcRC, SubRegIndex Subreg, Predicate prd> {
1395  let Predicates = [prd] in
1396    defm Z : avx512_int_broadcastbw_reg<opc, Name#Z, WriteShuffle256, _.info512,
1397              OpNode, SrcRC, Subreg>, EVEX_V512;
1398  let Predicates = [prd, HasVLX] in {
1399    defm Z256 : avx512_int_broadcastbw_reg<opc, Name#Z256, WriteShuffle256,
1400              _.info256, OpNode, SrcRC, Subreg>, EVEX_V256;
1401    defm Z128 : avx512_int_broadcastbw_reg<opc, Name#Z128, WriteShuffle,
1402              _.info128, OpNode, SrcRC, Subreg>, EVEX_V128;
1403  }
1404}
1405
1406multiclass avx512_int_broadcast_reg_vl<bits<8> opc, AVX512VLVectorVTInfo _,
1407                                       SDPatternOperator OpNode,
1408                                       RegisterClass SrcRC, Predicate prd> {
1409  let Predicates = [prd] in
1410    defm Z : avx512_int_broadcast_reg<opc, WriteShuffle256, _.info512, OpNode,
1411                                      SrcRC>, EVEX_V512;
1412  let Predicates = [prd, HasVLX] in {
1413    defm Z256 : avx512_int_broadcast_reg<opc, WriteShuffle256, _.info256, OpNode,
1414                                         SrcRC>, EVEX_V256;
1415    defm Z128 : avx512_int_broadcast_reg<opc, WriteShuffle, _.info128, OpNode,
1416                                         SrcRC>, EVEX_V128;
1417  }
1418}
1419
1420defm VPBROADCASTBr : avx512_int_broadcastbw_reg_vl<0x7A, "VPBROADCASTBr",
1421                       avx512vl_i8_info, X86VBroadcast, GR8, sub_8bit, HasBWI>;
1422defm VPBROADCASTWr : avx512_int_broadcastbw_reg_vl<0x7B, "VPBROADCASTWr",
1423                       avx512vl_i16_info, X86VBroadcast, GR16, sub_16bit,
1424                       HasBWI>;
1425defm VPBROADCASTDr : avx512_int_broadcast_reg_vl<0x7C, avx512vl_i32_info,
1426                                                 X86VBroadcast, GR32, HasAVX512>;
1427defm VPBROADCASTQr : avx512_int_broadcast_reg_vl<0x7C, avx512vl_i64_info,
1428                                                 X86VBroadcast, GR64, HasAVX512>, VEX_W;
1429
1430multiclass avx512_int_broadcast_rm_vl<bits<8> opc, string OpcodeStr,
1431                                      AVX512VLVectorVTInfo _, Predicate prd,
1432                                      bit IsConvertibleToThreeAddress> {
1433  let Predicates = [prd] in {
1434    defm Z :   avx512_broadcast_rm<opc, OpcodeStr, WriteShuffle256,
1435                                   WriteShuffle256Ld, _.info512, _.info128,
1436                                   IsConvertibleToThreeAddress>,
1437                                  EVEX_V512;
1438  }
1439  let Predicates = [prd, HasVLX] in {
1440    defm Z256 : avx512_broadcast_rm<opc, OpcodeStr, WriteShuffle256,
1441                                    WriteShuffle256Ld, _.info256, _.info128,
1442                                    IsConvertibleToThreeAddress>,
1443                                 EVEX_V256;
1444    defm Z128 : avx512_broadcast_rm<opc, OpcodeStr, WriteShuffle,
1445                                    WriteShuffleXLd, _.info128, _.info128,
1446                                    IsConvertibleToThreeAddress>,
1447                                 EVEX_V128;
1448  }
1449}
1450
1451defm VPBROADCASTB  : avx512_int_broadcast_rm_vl<0x78, "vpbroadcastb",
1452                                           avx512vl_i8_info, HasBWI, 0>;
1453defm VPBROADCASTW  : avx512_int_broadcast_rm_vl<0x79, "vpbroadcastw",
1454                                           avx512vl_i16_info, HasBWI, 0>;
1455defm VPBROADCASTD  : avx512_int_broadcast_rm_vl<0x58, "vpbroadcastd",
1456                                           avx512vl_i32_info, HasAVX512, 1>;
1457defm VPBROADCASTQ  : avx512_int_broadcast_rm_vl<0x59, "vpbroadcastq",
1458                                           avx512vl_i64_info, HasAVX512, 1>, VEX_W1X;
1459
1460multiclass avx512_subvec_broadcast_rm<bits<8> opc, string OpcodeStr,
1461                                      SDPatternOperator OpNode,
1462                                      X86VectorVTInfo _Dst,
1463                                      X86VectorVTInfo _Src> {
1464  defm rm : AVX512_maskable<opc, MRMSrcMem, _Dst, (outs _Dst.RC:$dst),
1465                           (ins _Src.MemOp:$src), OpcodeStr, "$src", "$src",
1466                           (_Dst.VT (OpNode addr:$src))>,
1467                           Sched<[SchedWriteShuffle.YMM.Folded]>,
1468                           AVX5128IBase, EVEX;
1469}
1470
1471// This should be used for the AVX512DQ broadcast instructions. It disables
1472// the unmasked patterns so that we only use the DQ instructions when masking
1473//  is requested.
1474multiclass avx512_subvec_broadcast_rm_dq<bits<8> opc, string OpcodeStr,
1475                                         SDPatternOperator OpNode,
1476                                         X86VectorVTInfo _Dst,
1477                                         X86VectorVTInfo _Src> {
1478  let hasSideEffects = 0, mayLoad = 1 in
1479  defm rm : AVX512_maskable_split<opc, MRMSrcMem, _Dst, (outs _Dst.RC:$dst),
1480                           (ins _Src.MemOp:$src), OpcodeStr, "$src", "$src",
1481                           (null_frag),
1482                           (_Dst.VT (OpNode addr:$src))>,
1483                           Sched<[SchedWriteShuffle.YMM.Folded]>,
1484                           AVX5128IBase, EVEX;
1485}
1486let Predicates = [HasBWI] in {
1487  def : Pat<(v32f16 (X86VBroadcastld16 addr:$src)),
1488            (VPBROADCASTWZrm addr:$src)>;
1489
1490  def : Pat<(v32f16 (X86VBroadcast (v8f16 VR128X:$src))),
1491            (VPBROADCASTWZrr VR128X:$src)>;
1492  def : Pat<(v32f16 (X86VBroadcast (f16 FR16X:$src))),
1493            (VPBROADCASTWZrr (COPY_TO_REGCLASS FR16X:$src, VR128X))>;
1494}
1495let Predicates = [HasVLX, HasBWI] in {
1496  def : Pat<(v8f16 (X86VBroadcastld16 addr:$src)),
1497            (VPBROADCASTWZ128rm addr:$src)>;
1498  def : Pat<(v16f16 (X86VBroadcastld16 addr:$src)),
1499            (VPBROADCASTWZ256rm addr:$src)>;
1500
1501  def : Pat<(v8f16 (X86VBroadcast (v8f16 VR128X:$src))),
1502            (VPBROADCASTWZ128rr VR128X:$src)>;
1503  def : Pat<(v16f16 (X86VBroadcast (v8f16 VR128X:$src))),
1504            (VPBROADCASTWZ256rr VR128X:$src)>;
1505
1506  def : Pat<(v8f16 (X86VBroadcast (f16 FR16X:$src))),
1507            (VPBROADCASTWZ128rr (COPY_TO_REGCLASS FR16X:$src, VR128X))>;
1508  def : Pat<(v16f16 (X86VBroadcast (f16 FR16X:$src))),
1509            (VPBROADCASTWZ256rr (COPY_TO_REGCLASS FR16X:$src, VR128X))>;
1510}
1511
1512//===----------------------------------------------------------------------===//
1513// AVX-512 BROADCAST SUBVECTORS
1514//
1515
1516defm VBROADCASTI32X4 : avx512_subvec_broadcast_rm<0x5a, "vbroadcasti32x4",
1517                       X86SubVBroadcastld128, v16i32_info, v4i32x_info>,
1518                       EVEX_V512, EVEX_CD8<32, CD8VT4>;
1519defm VBROADCASTF32X4 : avx512_subvec_broadcast_rm<0x1a, "vbroadcastf32x4",
1520                       X86SubVBroadcastld128, v16f32_info, v4f32x_info>,
1521                       EVEX_V512, EVEX_CD8<32, CD8VT4>;
1522defm VBROADCASTI64X4 : avx512_subvec_broadcast_rm<0x5b, "vbroadcasti64x4",
1523                       X86SubVBroadcastld256, v8i64_info, v4i64x_info>, VEX_W,
1524                       EVEX_V512, EVEX_CD8<64, CD8VT4>;
1525defm VBROADCASTF64X4 : avx512_subvec_broadcast_rm<0x1b, "vbroadcastf64x4",
1526                       X86SubVBroadcastld256, v8f64_info, v4f64x_info>, VEX_W,
1527                       EVEX_V512, EVEX_CD8<64, CD8VT4>;
1528
1529let Predicates = [HasAVX512] in {
1530def : Pat<(v8f64 (X86SubVBroadcastld256 addr:$src)),
1531          (VBROADCASTF64X4rm addr:$src)>;
1532def : Pat<(v16f32 (X86SubVBroadcastld256 addr:$src)),
1533          (VBROADCASTF64X4rm addr:$src)>;
1534def : Pat<(v32f16 (X86SubVBroadcastld256 addr:$src)),
1535          (VBROADCASTF64X4rm addr:$src)>;
1536def : Pat<(v8i64 (X86SubVBroadcastld256 addr:$src)),
1537          (VBROADCASTI64X4rm addr:$src)>;
1538def : Pat<(v16i32 (X86SubVBroadcastld256 addr:$src)),
1539          (VBROADCASTI64X4rm addr:$src)>;
1540def : Pat<(v32i16 (X86SubVBroadcastld256 addr:$src)),
1541          (VBROADCASTI64X4rm addr:$src)>;
1542def : Pat<(v64i8 (X86SubVBroadcastld256 addr:$src)),
1543          (VBROADCASTI64X4rm addr:$src)>;
1544
1545def : Pat<(v8f64 (X86SubVBroadcastld128 addr:$src)),
1546          (VBROADCASTF32X4rm addr:$src)>;
1547def : Pat<(v16f32 (X86SubVBroadcastld128 addr:$src)),
1548          (VBROADCASTF32X4rm addr:$src)>;
1549def : Pat<(v32f16 (X86SubVBroadcastld128 addr:$src)),
1550          (VBROADCASTF32X4rm addr:$src)>;
1551def : Pat<(v8i64 (X86SubVBroadcastld128 addr:$src)),
1552          (VBROADCASTI32X4rm addr:$src)>;
1553def : Pat<(v16i32 (X86SubVBroadcastld128 addr:$src)),
1554          (VBROADCASTI32X4rm addr:$src)>;
1555def : Pat<(v32i16 (X86SubVBroadcastld128 addr:$src)),
1556          (VBROADCASTI32X4rm addr:$src)>;
1557def : Pat<(v64i8 (X86SubVBroadcastld128 addr:$src)),
1558          (VBROADCASTI32X4rm addr:$src)>;
1559
1560// Patterns for selects of bitcasted operations.
1561def : Pat<(vselect_mask VK16WM:$mask,
1562                        (bc_v16f32 (v8f64 (X86SubVBroadcastld128 addr:$src))),
1563                        (v16f32 immAllZerosV)),
1564          (VBROADCASTF32X4rmkz VK16WM:$mask, addr:$src)>;
1565def : Pat<(vselect_mask VK16WM:$mask,
1566                        (bc_v16f32 (v8f64 (X86SubVBroadcastld128 addr:$src))),
1567                        VR512:$src0),
1568          (VBROADCASTF32X4rmk VR512:$src0, VK16WM:$mask, addr:$src)>;
1569def : Pat<(vselect_mask VK16WM:$mask,
1570                        (bc_v16i32 (v8i64 (X86SubVBroadcastld128 addr:$src))),
1571                        (v16i32 immAllZerosV)),
1572          (VBROADCASTI32X4rmkz VK16WM:$mask, addr:$src)>;
1573def : Pat<(vselect_mask VK16WM:$mask,
1574                        (bc_v16i32 (v8i64 (X86SubVBroadcastld128 addr:$src))),
1575                        VR512:$src0),
1576          (VBROADCASTI32X4rmk VR512:$src0, VK16WM:$mask, addr:$src)>;
1577
1578def : Pat<(vselect_mask VK8WM:$mask,
1579                        (bc_v8f64 (v16f32 (X86SubVBroadcastld256 addr:$src))),
1580                        (v8f64 immAllZerosV)),
1581          (VBROADCASTF64X4rmkz VK8WM:$mask, addr:$src)>;
1582def : Pat<(vselect_mask VK8WM:$mask,
1583                        (bc_v8f64 (v16f32 (X86SubVBroadcastld256 addr:$src))),
1584                        VR512:$src0),
1585          (VBROADCASTF64X4rmk VR512:$src0, VK8WM:$mask, addr:$src)>;
1586def : Pat<(vselect_mask VK8WM:$mask,
1587                        (bc_v8i64 (v16i32 (X86SubVBroadcastld256 addr:$src))),
1588                        (v8i64 immAllZerosV)),
1589          (VBROADCASTI64X4rmkz VK8WM:$mask, addr:$src)>;
1590def : Pat<(vselect_mask VK8WM:$mask,
1591                        (bc_v8i64 (v16i32 (X86SubVBroadcastld256 addr:$src))),
1592                        VR512:$src0),
1593          (VBROADCASTI64X4rmk VR512:$src0, VK8WM:$mask, addr:$src)>;
1594}
1595
1596let Predicates = [HasVLX] in {
1597defm VBROADCASTI32X4Z256 : avx512_subvec_broadcast_rm<0x5a, "vbroadcasti32x4",
1598                           X86SubVBroadcastld128, v8i32x_info, v4i32x_info>,
1599                           EVEX_V256, EVEX_CD8<32, CD8VT4>;
1600defm VBROADCASTF32X4Z256 : avx512_subvec_broadcast_rm<0x1a, "vbroadcastf32x4",
1601                           X86SubVBroadcastld128, v8f32x_info, v4f32x_info>,
1602                           EVEX_V256, EVEX_CD8<32, CD8VT4>;
1603
1604def : Pat<(v4f64 (X86SubVBroadcastld128 addr:$src)),
1605          (VBROADCASTF32X4Z256rm addr:$src)>;
1606def : Pat<(v8f32 (X86SubVBroadcastld128 addr:$src)),
1607          (VBROADCASTF32X4Z256rm addr:$src)>;
1608def : Pat<(v16f16 (X86SubVBroadcastld128 addr:$src)),
1609          (VBROADCASTF32X4Z256rm addr:$src)>;
1610def : Pat<(v4i64 (X86SubVBroadcastld128 addr:$src)),
1611          (VBROADCASTI32X4Z256rm addr:$src)>;
1612def : Pat<(v8i32 (X86SubVBroadcastld128 addr:$src)),
1613          (VBROADCASTI32X4Z256rm addr:$src)>;
1614def : Pat<(v16i16 (X86SubVBroadcastld128 addr:$src)),
1615          (VBROADCASTI32X4Z256rm addr:$src)>;
1616def : Pat<(v32i8 (X86SubVBroadcastld128 addr:$src)),
1617          (VBROADCASTI32X4Z256rm addr:$src)>;
1618
1619// Patterns for selects of bitcasted operations.
1620def : Pat<(vselect_mask VK8WM:$mask,
1621                        (bc_v8f32 (v4f64 (X86SubVBroadcastld128 addr:$src))),
1622                        (v8f32 immAllZerosV)),
1623          (VBROADCASTF32X4Z256rmkz VK8WM:$mask, addr:$src)>;
1624def : Pat<(vselect_mask VK8WM:$mask,
1625                        (bc_v8f32 (v4f64 (X86SubVBroadcastld128 addr:$src))),
1626                        VR256X:$src0),
1627          (VBROADCASTF32X4Z256rmk VR256X:$src0, VK8WM:$mask, addr:$src)>;
1628def : Pat<(vselect_mask VK8WM:$mask,
1629                        (bc_v8i32 (v4i64 (X86SubVBroadcastld128 addr:$src))),
1630                        (v8i32 immAllZerosV)),
1631          (VBROADCASTI32X4Z256rmkz VK8WM:$mask, addr:$src)>;
1632def : Pat<(vselect_mask VK8WM:$mask,
1633                        (bc_v8i32 (v4i64 (X86SubVBroadcastld128 addr:$src))),
1634                        VR256X:$src0),
1635          (VBROADCASTI32X4Z256rmk VR256X:$src0, VK8WM:$mask, addr:$src)>;
1636}
1637
1638let Predicates = [HasVLX, HasDQI] in {
1639defm VBROADCASTI64X2Z128 : avx512_subvec_broadcast_rm_dq<0x5a, "vbroadcasti64x2",
1640                           X86SubVBroadcastld128, v4i64x_info, v2i64x_info>, VEX_W1X,
1641                           EVEX_V256, EVEX_CD8<64, CD8VT2>;
1642defm VBROADCASTF64X2Z128 : avx512_subvec_broadcast_rm_dq<0x1a, "vbroadcastf64x2",
1643                           X86SubVBroadcastld128, v4f64x_info, v2f64x_info>, VEX_W1X,
1644                           EVEX_V256, EVEX_CD8<64, CD8VT2>;
1645
1646// Patterns for selects of bitcasted operations.
1647def : Pat<(vselect_mask VK4WM:$mask,
1648                        (bc_v4f64 (v8f32 (X86SubVBroadcastld128 addr:$src))),
1649                        (v4f64 immAllZerosV)),
1650          (VBROADCASTF64X2Z128rmkz VK4WM:$mask, addr:$src)>;
1651def : Pat<(vselect_mask VK4WM:$mask,
1652                        (bc_v4f64 (v8f32 (X86SubVBroadcastld128 addr:$src))),
1653                        VR256X:$src0),
1654          (VBROADCASTF64X2Z128rmk VR256X:$src0, VK4WM:$mask, addr:$src)>;
1655def : Pat<(vselect_mask VK4WM:$mask,
1656                        (bc_v4i64 (v8i32 (X86SubVBroadcastld128 addr:$src))),
1657                        (v4i64 immAllZerosV)),
1658          (VBROADCASTI64X2Z128rmkz VK4WM:$mask, addr:$src)>;
1659def : Pat<(vselect_mask VK4WM:$mask,
1660                        (bc_v4i64 (v8i32 (X86SubVBroadcastld128 addr:$src))),
1661                        VR256X:$src0),
1662          (VBROADCASTI64X2Z128rmk VR256X:$src0, VK4WM:$mask, addr:$src)>;
1663}
1664
1665let Predicates = [HasDQI] in {
1666defm VBROADCASTI64X2 : avx512_subvec_broadcast_rm_dq<0x5a, "vbroadcasti64x2",
1667                       X86SubVBroadcastld128, v8i64_info, v2i64x_info>, VEX_W,
1668                       EVEX_V512, EVEX_CD8<64, CD8VT2>;
1669defm VBROADCASTI32X8 : avx512_subvec_broadcast_rm_dq<0x5b, "vbroadcasti32x8",
1670                       X86SubVBroadcastld256, v16i32_info, v8i32x_info>,
1671                       EVEX_V512, EVEX_CD8<32, CD8VT8>;
1672defm VBROADCASTF64X2 : avx512_subvec_broadcast_rm_dq<0x1a, "vbroadcastf64x2",
1673                       X86SubVBroadcastld128, v8f64_info, v2f64x_info>, VEX_W,
1674                       EVEX_V512, EVEX_CD8<64, CD8VT2>;
1675defm VBROADCASTF32X8 : avx512_subvec_broadcast_rm_dq<0x1b, "vbroadcastf32x8",
1676                       X86SubVBroadcastld256, v16f32_info, v8f32x_info>,
1677                       EVEX_V512, EVEX_CD8<32, CD8VT8>;
1678
1679// Patterns for selects of bitcasted operations.
1680def : Pat<(vselect_mask VK16WM:$mask,
1681                        (bc_v16f32 (v8f64 (X86SubVBroadcastld256 addr:$src))),
1682                        (v16f32 immAllZerosV)),
1683          (VBROADCASTF32X8rmkz VK16WM:$mask, addr:$src)>;
1684def : Pat<(vselect_mask VK16WM:$mask,
1685                        (bc_v16f32 (v8f64 (X86SubVBroadcastld256 addr:$src))),
1686                        VR512:$src0),
1687          (VBROADCASTF32X8rmk VR512:$src0, VK16WM:$mask, addr:$src)>;
1688def : Pat<(vselect_mask VK16WM:$mask,
1689                        (bc_v16i32 (v8i64 (X86SubVBroadcastld256 addr:$src))),
1690                        (v16i32 immAllZerosV)),
1691          (VBROADCASTI32X8rmkz VK16WM:$mask, addr:$src)>;
1692def : Pat<(vselect_mask VK16WM:$mask,
1693                        (bc_v16i32 (v8i64 (X86SubVBroadcastld256 addr:$src))),
1694                        VR512:$src0),
1695          (VBROADCASTI32X8rmk VR512:$src0, VK16WM:$mask, addr:$src)>;
1696
1697def : Pat<(vselect_mask VK8WM:$mask,
1698                        (bc_v8f64 (v16f32 (X86SubVBroadcastld128 addr:$src))),
1699                        (v8f64 immAllZerosV)),
1700          (VBROADCASTF64X2rmkz VK8WM:$mask, addr:$src)>;
1701def : Pat<(vselect_mask VK8WM:$mask,
1702                        (bc_v8f64 (v16f32 (X86SubVBroadcastld128 addr:$src))),
1703                        VR512:$src0),
1704          (VBROADCASTF64X2rmk VR512:$src0, VK8WM:$mask, addr:$src)>;
1705def : Pat<(vselect_mask VK8WM:$mask,
1706                        (bc_v8i64 (v16i32 (X86SubVBroadcastld128 addr:$src))),
1707                        (v8i64 immAllZerosV)),
1708          (VBROADCASTI64X2rmkz VK8WM:$mask, addr:$src)>;
1709def : Pat<(vselect_mask VK8WM:$mask,
1710                        (bc_v8i64 (v16i32 (X86SubVBroadcastld128 addr:$src))),
1711                        VR512:$src0),
1712          (VBROADCASTI64X2rmk VR512:$src0, VK8WM:$mask, addr:$src)>;
1713}
1714
1715multiclass avx512_common_broadcast_32x2<bits<8> opc, string OpcodeStr,
1716                                        AVX512VLVectorVTInfo _Dst,
1717                                        AVX512VLVectorVTInfo _Src> {
1718  let Predicates = [HasDQI] in
1719    defm Z :    avx512_broadcast_rm_split<opc, OpcodeStr, WriteShuffle256,
1720                                          WriteShuffle256Ld, _Dst.info512,
1721                                          _Src.info512, _Src.info128, 0, null_frag, null_frag>,
1722                                          EVEX_V512;
1723  let Predicates = [HasDQI, HasVLX] in
1724    defm Z256 : avx512_broadcast_rm_split<opc, OpcodeStr, WriteShuffle256,
1725                                          WriteShuffle256Ld, _Dst.info256,
1726                                          _Src.info256, _Src.info128, 0, null_frag, null_frag>,
1727                                          EVEX_V256;
1728}
1729
1730multiclass avx512_common_broadcast_i32x2<bits<8> opc, string OpcodeStr,
1731                                         AVX512VLVectorVTInfo _Dst,
1732                                         AVX512VLVectorVTInfo _Src> :
1733  avx512_common_broadcast_32x2<opc, OpcodeStr, _Dst, _Src> {
1734
1735  let Predicates = [HasDQI, HasVLX] in
1736    defm Z128 : avx512_broadcast_rm_split<opc, OpcodeStr, WriteShuffle,
1737                                          WriteShuffleXLd, _Dst.info128,
1738                                          _Src.info128, _Src.info128, 0, null_frag, null_frag>,
1739                                          EVEX_V128;
1740}
1741
1742defm VBROADCASTI32X2  : avx512_common_broadcast_i32x2<0x59, "vbroadcasti32x2",
1743                                          avx512vl_i32_info, avx512vl_i64_info>;
1744defm VBROADCASTF32X2  : avx512_common_broadcast_32x2<0x19, "vbroadcastf32x2",
1745                                          avx512vl_f32_info, avx512vl_f64_info>;
1746
1747//===----------------------------------------------------------------------===//
1748// AVX-512 BROADCAST MASK TO VECTOR REGISTER
1749//---
1750multiclass avx512_mask_broadcastm<bits<8> opc, string OpcodeStr,
1751                                  X86VectorVTInfo _, RegisterClass KRC> {
1752  def rr : AVX512XS8I<opc, MRMSrcReg, (outs _.RC:$dst), (ins KRC:$src),
1753                  !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
1754                  [(set _.RC:$dst, (_.VT (X86VBroadcastm KRC:$src)))]>,
1755                  EVEX, Sched<[WriteShuffle]>;
1756}
1757
1758multiclass avx512_mask_broadcast<bits<8> opc, string OpcodeStr,
1759                                 AVX512VLVectorVTInfo VTInfo, RegisterClass KRC> {
1760  let Predicates = [HasCDI] in
1761    defm Z : avx512_mask_broadcastm<opc, OpcodeStr, VTInfo.info512, KRC>, EVEX_V512;
1762  let Predicates = [HasCDI, HasVLX] in {
1763    defm Z256 : avx512_mask_broadcastm<opc, OpcodeStr, VTInfo.info256, KRC>, EVEX_V256;
1764    defm Z128 : avx512_mask_broadcastm<opc, OpcodeStr, VTInfo.info128, KRC>, EVEX_V128;
1765  }
1766}
1767
1768defm VPBROADCASTMW2D : avx512_mask_broadcast<0x3A, "vpbroadcastmw2d",
1769                                               avx512vl_i32_info, VK16>;
1770defm VPBROADCASTMB2Q : avx512_mask_broadcast<0x2A, "vpbroadcastmb2q",
1771                                               avx512vl_i64_info, VK8>, VEX_W;
1772
1773//===----------------------------------------------------------------------===//
1774// -- VPERMI2 - 3 source operands form --
1775multiclass avx512_perm_i<bits<8> opc, string OpcodeStr,
1776                         X86FoldableSchedWrite sched,
1777                         X86VectorVTInfo _, X86VectorVTInfo IdxVT> {
1778let Constraints = "$src1 = $dst", ExeDomain = _.ExeDomain,
1779    hasSideEffects = 0 in {
1780  defm rr: AVX512_maskable_3src_cast<opc, MRMSrcReg, _, IdxVT, (outs _.RC:$dst),
1781          (ins _.RC:$src2, _.RC:$src3),
1782          OpcodeStr, "$src3, $src2", "$src2, $src3",
1783          (_.VT (X86VPermt2 _.RC:$src2, IdxVT.RC:$src1, _.RC:$src3)), 1>,
1784          EVEX_4V, AVX5128IBase, Sched<[sched]>;
1785
1786  let mayLoad = 1 in
1787  defm rm: AVX512_maskable_3src_cast<opc, MRMSrcMem, _, IdxVT, (outs _.RC:$dst),
1788            (ins _.RC:$src2, _.MemOp:$src3),
1789            OpcodeStr, "$src3, $src2", "$src2, $src3",
1790            (_.VT (X86VPermt2 _.RC:$src2, IdxVT.RC:$src1,
1791                   (_.VT (_.LdFrag addr:$src3)))), 1>,
1792            EVEX_4V, AVX5128IBase, Sched<[sched.Folded, sched.ReadAfterFold]>;
1793  }
1794}
1795
1796multiclass avx512_perm_i_mb<bits<8> opc, string OpcodeStr,
1797                            X86FoldableSchedWrite sched,
1798                            X86VectorVTInfo _, X86VectorVTInfo IdxVT> {
1799  let Constraints = "$src1 = $dst", ExeDomain = _.ExeDomain,
1800      hasSideEffects = 0, mayLoad = 1 in
1801  defm rmb: AVX512_maskable_3src_cast<opc, MRMSrcMem, _, IdxVT, (outs _.RC:$dst),
1802              (ins _.RC:$src2, _.ScalarMemOp:$src3),
1803              OpcodeStr,   !strconcat("${src3}", _.BroadcastStr,", $src2"),
1804              !strconcat("$src2, ${src3}", _.BroadcastStr ),
1805              (_.VT (X86VPermt2 _.RC:$src2,
1806               IdxVT.RC:$src1,(_.VT (_.BroadcastLdFrag addr:$src3)))), 1>,
1807              AVX5128IBase, EVEX_4V, EVEX_B,
1808              Sched<[sched.Folded, sched.ReadAfterFold]>;
1809}
1810
1811multiclass avx512_perm_i_sizes<bits<8> opc, string OpcodeStr,
1812                               X86FoldableSchedWrite sched,
1813                               AVX512VLVectorVTInfo VTInfo,
1814                               AVX512VLVectorVTInfo ShuffleMask> {
1815  defm NAME: avx512_perm_i<opc, OpcodeStr, sched, VTInfo.info512,
1816                           ShuffleMask.info512>,
1817            avx512_perm_i_mb<opc, OpcodeStr, sched, VTInfo.info512,
1818                             ShuffleMask.info512>, EVEX_V512;
1819  let Predicates = [HasVLX] in {
1820  defm NAME#128: avx512_perm_i<opc, OpcodeStr, sched, VTInfo.info128,
1821                               ShuffleMask.info128>,
1822                 avx512_perm_i_mb<opc, OpcodeStr, sched, VTInfo.info128,
1823                                  ShuffleMask.info128>, EVEX_V128;
1824  defm NAME#256: avx512_perm_i<opc, OpcodeStr, sched, VTInfo.info256,
1825                               ShuffleMask.info256>,
1826                 avx512_perm_i_mb<opc, OpcodeStr, sched, VTInfo.info256,
1827                                  ShuffleMask.info256>, EVEX_V256;
1828  }
1829}
1830
1831multiclass avx512_perm_i_sizes_bw<bits<8> opc, string OpcodeStr,
1832                                  X86FoldableSchedWrite sched,
1833                                  AVX512VLVectorVTInfo VTInfo,
1834                                  AVX512VLVectorVTInfo Idx,
1835                                  Predicate Prd> {
1836  let Predicates = [Prd] in
1837  defm NAME: avx512_perm_i<opc, OpcodeStr, sched, VTInfo.info512,
1838                           Idx.info512>, EVEX_V512;
1839  let Predicates = [Prd, HasVLX] in {
1840  defm NAME#128: avx512_perm_i<opc, OpcodeStr, sched, VTInfo.info128,
1841                               Idx.info128>, EVEX_V128;
1842  defm NAME#256: avx512_perm_i<opc, OpcodeStr, sched, VTInfo.info256,
1843                               Idx.info256>,  EVEX_V256;
1844  }
1845}
1846
1847defm VPERMI2D  : avx512_perm_i_sizes<0x76, "vpermi2d", WriteVarShuffle256,
1848                  avx512vl_i32_info, avx512vl_i32_info>, EVEX_CD8<32, CD8VF>;
1849defm VPERMI2Q  : avx512_perm_i_sizes<0x76, "vpermi2q", WriteVarShuffle256,
1850                  avx512vl_i64_info, avx512vl_i64_info>, VEX_W, EVEX_CD8<64, CD8VF>;
1851defm VPERMI2W  : avx512_perm_i_sizes_bw<0x75, "vpermi2w", WriteVarShuffle256,
1852                  avx512vl_i16_info, avx512vl_i16_info, HasBWI>,
1853                  VEX_W, EVEX_CD8<16, CD8VF>;
1854defm VPERMI2B  : avx512_perm_i_sizes_bw<0x75, "vpermi2b", WriteVarShuffle256,
1855                  avx512vl_i8_info, avx512vl_i8_info, HasVBMI>,
1856                  EVEX_CD8<8, CD8VF>;
1857defm VPERMI2PS : avx512_perm_i_sizes<0x77, "vpermi2ps", WriteFVarShuffle256,
1858                  avx512vl_f32_info, avx512vl_i32_info>, EVEX_CD8<32, CD8VF>;
1859defm VPERMI2PD : avx512_perm_i_sizes<0x77, "vpermi2pd", WriteFVarShuffle256,
1860                  avx512vl_f64_info, avx512vl_i64_info>, VEX_W, EVEX_CD8<64, CD8VF>;
1861
1862// Extra patterns to deal with extra bitcasts due to passthru and index being
1863// different types on the fp versions.
1864multiclass avx512_perm_i_lowering<string InstrStr, X86VectorVTInfo _,
1865                                  X86VectorVTInfo IdxVT,
1866                                  X86VectorVTInfo CastVT> {
1867  def : Pat<(_.VT (vselect_mask _.KRCWM:$mask,
1868                                (X86VPermt2 (_.VT _.RC:$src2),
1869                                            (IdxVT.VT (bitconvert
1870                                                       (CastVT.VT _.RC:$src1))),
1871                                            _.RC:$src3),
1872                                (_.VT (bitconvert (CastVT.VT _.RC:$src1))))),
1873            (!cast<Instruction>(InstrStr#"rrk") _.RC:$src1, _.KRCWM:$mask,
1874                                                _.RC:$src2, _.RC:$src3)>;
1875  def : Pat<(_.VT (vselect_mask _.KRCWM:$mask,
1876                                (X86VPermt2 _.RC:$src2,
1877                                            (IdxVT.VT (bitconvert
1878                                                       (CastVT.VT _.RC:$src1))),
1879                                            (_.LdFrag addr:$src3)),
1880                                (_.VT (bitconvert  (CastVT.VT _.RC:$src1))))),
1881            (!cast<Instruction>(InstrStr#"rmk") _.RC:$src1, _.KRCWM:$mask,
1882                                                _.RC:$src2, addr:$src3)>;
1883  def : Pat<(_.VT (vselect_mask _.KRCWM:$mask,
1884                                 (X86VPermt2 _.RC:$src2,
1885                                             (IdxVT.VT (bitconvert  (CastVT.VT _.RC:$src1))),
1886                                             (_.BroadcastLdFrag addr:$src3)),
1887                                 (_.VT (bitconvert  (CastVT.VT _.RC:$src1))))),
1888            (!cast<Instruction>(InstrStr#"rmbk") _.RC:$src1, _.KRCWM:$mask,
1889                                                 _.RC:$src2, addr:$src3)>;
1890}
1891
1892// TODO: Should we add more casts? The vXi64 case is common due to ABI.
1893defm : avx512_perm_i_lowering<"VPERMI2PS", v16f32_info, v16i32_info, v8i64_info>;
1894defm : avx512_perm_i_lowering<"VPERMI2PS256", v8f32x_info, v8i32x_info, v4i64x_info>;
1895defm : avx512_perm_i_lowering<"VPERMI2PS128", v4f32x_info, v4i32x_info, v2i64x_info>;
1896
1897// VPERMT2
1898multiclass avx512_perm_t<bits<8> opc, string OpcodeStr,
1899                         X86FoldableSchedWrite sched,
1900                         X86VectorVTInfo _, X86VectorVTInfo IdxVT> {
1901let Constraints = "$src1 = $dst", ExeDomain = _.ExeDomain in {
1902  defm rr: AVX512_maskable_3src<opc, MRMSrcReg, _, (outs _.RC:$dst),
1903          (ins IdxVT.RC:$src2, _.RC:$src3),
1904          OpcodeStr, "$src3, $src2", "$src2, $src3",
1905          (_.VT (X86VPermt2 _.RC:$src1, IdxVT.RC:$src2, _.RC:$src3)), 1>,
1906          EVEX_4V, AVX5128IBase, Sched<[sched]>;
1907
1908  defm rm: AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst),
1909            (ins IdxVT.RC:$src2, _.MemOp:$src3),
1910            OpcodeStr, "$src3, $src2", "$src2, $src3",
1911            (_.VT (X86VPermt2 _.RC:$src1, IdxVT.RC:$src2,
1912                   (_.LdFrag addr:$src3))), 1>,
1913            EVEX_4V, AVX5128IBase, Sched<[sched.Folded, sched.ReadAfterFold]>;
1914  }
1915}
1916multiclass avx512_perm_t_mb<bits<8> opc, string OpcodeStr,
1917                            X86FoldableSchedWrite sched,
1918                            X86VectorVTInfo _, X86VectorVTInfo IdxVT> {
1919  let Constraints = "$src1 = $dst", ExeDomain = _.ExeDomain in
1920  defm rmb: AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst),
1921              (ins IdxVT.RC:$src2, _.ScalarMemOp:$src3),
1922              OpcodeStr,   !strconcat("${src3}", _.BroadcastStr,", $src2"),
1923              !strconcat("$src2, ${src3}", _.BroadcastStr ),
1924              (_.VT (X86VPermt2 _.RC:$src1,
1925               IdxVT.RC:$src2,(_.VT (_.BroadcastLdFrag addr:$src3)))), 1>,
1926              AVX5128IBase, EVEX_4V, EVEX_B,
1927              Sched<[sched.Folded, sched.ReadAfterFold]>;
1928}
1929
1930multiclass avx512_perm_t_sizes<bits<8> opc, string OpcodeStr,
1931                               X86FoldableSchedWrite sched,
1932                               AVX512VLVectorVTInfo VTInfo,
1933                               AVX512VLVectorVTInfo ShuffleMask> {
1934  defm NAME: avx512_perm_t<opc, OpcodeStr, sched, VTInfo.info512,
1935                              ShuffleMask.info512>,
1936            avx512_perm_t_mb<opc, OpcodeStr, sched, VTInfo.info512,
1937                              ShuffleMask.info512>, EVEX_V512;
1938  let Predicates = [HasVLX] in {
1939  defm NAME#128: avx512_perm_t<opc, OpcodeStr, sched, VTInfo.info128,
1940                              ShuffleMask.info128>,
1941                 avx512_perm_t_mb<opc, OpcodeStr, sched, VTInfo.info128,
1942                              ShuffleMask.info128>, EVEX_V128;
1943  defm NAME#256: avx512_perm_t<opc, OpcodeStr, sched, VTInfo.info256,
1944                              ShuffleMask.info256>,
1945                 avx512_perm_t_mb<opc, OpcodeStr, sched, VTInfo.info256,
1946                              ShuffleMask.info256>, EVEX_V256;
1947  }
1948}
1949
1950multiclass avx512_perm_t_sizes_bw<bits<8> opc, string OpcodeStr,
1951                                  X86FoldableSchedWrite sched,
1952                                  AVX512VLVectorVTInfo VTInfo,
1953                                  AVX512VLVectorVTInfo Idx, Predicate Prd> {
1954  let Predicates = [Prd] in
1955  defm NAME: avx512_perm_t<opc, OpcodeStr, sched, VTInfo.info512,
1956                           Idx.info512>, EVEX_V512;
1957  let Predicates = [Prd, HasVLX] in {
1958  defm NAME#128: avx512_perm_t<opc, OpcodeStr, sched, VTInfo.info128,
1959                               Idx.info128>, EVEX_V128;
1960  defm NAME#256: avx512_perm_t<opc, OpcodeStr, sched, VTInfo.info256,
1961                               Idx.info256>, EVEX_V256;
1962  }
1963}
1964
1965defm VPERMT2D  : avx512_perm_t_sizes<0x7E, "vpermt2d", WriteVarShuffle256,
1966                  avx512vl_i32_info, avx512vl_i32_info>, EVEX_CD8<32, CD8VF>;
1967defm VPERMT2Q  : avx512_perm_t_sizes<0x7E, "vpermt2q", WriteVarShuffle256,
1968                  avx512vl_i64_info, avx512vl_i64_info>, VEX_W, EVEX_CD8<64, CD8VF>;
1969defm VPERMT2W  : avx512_perm_t_sizes_bw<0x7D, "vpermt2w", WriteVarShuffle256,
1970                  avx512vl_i16_info, avx512vl_i16_info, HasBWI>,
1971                  VEX_W, EVEX_CD8<16, CD8VF>;
1972defm VPERMT2B  : avx512_perm_t_sizes_bw<0x7D, "vpermt2b", WriteVarShuffle256,
1973                  avx512vl_i8_info, avx512vl_i8_info, HasVBMI>,
1974                  EVEX_CD8<8, CD8VF>;
1975defm VPERMT2PS : avx512_perm_t_sizes<0x7F, "vpermt2ps", WriteFVarShuffle256,
1976                  avx512vl_f32_info, avx512vl_i32_info>, EVEX_CD8<32, CD8VF>;
1977defm VPERMT2PD : avx512_perm_t_sizes<0x7F, "vpermt2pd", WriteFVarShuffle256,
1978                  avx512vl_f64_info, avx512vl_i64_info>, VEX_W, EVEX_CD8<64, CD8VF>;
1979
1980//===----------------------------------------------------------------------===//
1981// AVX-512 - BLEND using mask
1982//
1983
1984multiclass WriteFVarBlendask<bits<8> opc, string OpcodeStr,
1985                             X86FoldableSchedWrite sched, X86VectorVTInfo _> {
1986  let ExeDomain = _.ExeDomain, hasSideEffects = 0 in {
1987  def rr : AVX5128I<opc, MRMSrcReg, (outs _.RC:$dst),
1988             (ins _.RC:$src1, _.RC:$src2),
1989             !strconcat(OpcodeStr,
1990             "\t{$src2, $src1, ${dst}|${dst}, $src1, $src2}"), []>,
1991             EVEX_4V, Sched<[sched]>;
1992  def rrk : AVX5128I<opc, MRMSrcReg, (outs _.RC:$dst),
1993             (ins _.KRCWM:$mask, _.RC:$src1, _.RC:$src2),
1994             !strconcat(OpcodeStr,
1995             "\t{$src2, $src1, ${dst} {${mask}}|${dst} {${mask}}, $src1, $src2}"),
1996             []>, EVEX_4V, EVEX_K, Sched<[sched]>;
1997  def rrkz : AVX5128I<opc, MRMSrcReg, (outs _.RC:$dst),
1998             (ins _.KRCWM:$mask, _.RC:$src1, _.RC:$src2),
1999             !strconcat(OpcodeStr,
2000             "\t{$src2, $src1, ${dst} {${mask}} {z}|${dst} {${mask}} {z}, $src1, $src2}"),
2001             []>, EVEX_4V, EVEX_KZ, Sched<[sched]>, NotMemoryFoldable;
2002  let mayLoad = 1 in {
2003  def rm  : AVX5128I<opc, MRMSrcMem, (outs _.RC:$dst),
2004             (ins _.RC:$src1, _.MemOp:$src2),
2005             !strconcat(OpcodeStr,
2006             "\t{$src2, $src1, ${dst}|${dst}, $src1, $src2}"),
2007             []>, EVEX_4V, EVEX_CD8<_.EltSize, CD8VF>,
2008             Sched<[sched.Folded, sched.ReadAfterFold]>;
2009  def rmk : AVX5128I<opc, MRMSrcMem, (outs _.RC:$dst),
2010             (ins _.KRCWM:$mask, _.RC:$src1, _.MemOp:$src2),
2011             !strconcat(OpcodeStr,
2012             "\t{$src2, $src1, ${dst} {${mask}}|${dst} {${mask}}, $src1, $src2}"),
2013             []>, EVEX_4V, EVEX_K, EVEX_CD8<_.EltSize, CD8VF>,
2014             Sched<[sched.Folded, sched.ReadAfterFold]>;
2015  def rmkz : AVX5128I<opc, MRMSrcMem, (outs _.RC:$dst),
2016             (ins _.KRCWM:$mask, _.RC:$src1, _.MemOp:$src2),
2017             !strconcat(OpcodeStr,
2018             "\t{$src2, $src1, ${dst} {${mask}} {z}|${dst} {${mask}} {z}, $src1, $src2}"),
2019             []>, EVEX_4V, EVEX_KZ, EVEX_CD8<_.EltSize, CD8VF>,
2020             Sched<[sched.Folded, sched.ReadAfterFold]>, NotMemoryFoldable;
2021  }
2022  }
2023}
2024multiclass WriteFVarBlendask_rmb<bits<8> opc, string OpcodeStr,
2025                                 X86FoldableSchedWrite sched, X86VectorVTInfo _> {
2026  let ExeDomain = _.ExeDomain, mayLoad = 1, hasSideEffects = 0 in {
2027  def rmbk : AVX5128I<opc, MRMSrcMem, (outs _.RC:$dst),
2028      (ins _.KRCWM:$mask, _.RC:$src1, _.ScalarMemOp:$src2),
2029       !strconcat(OpcodeStr,
2030            "\t{${src2}", _.BroadcastStr, ", $src1, $dst {${mask}}|",
2031            "$dst {${mask}}, $src1, ${src2}", _.BroadcastStr, "}"), []>,
2032      EVEX_4V, EVEX_K, EVEX_B, EVEX_CD8<_.EltSize, CD8VF>,
2033      Sched<[sched.Folded, sched.ReadAfterFold]>;
2034
2035  def rmbkz : AVX5128I<opc, MRMSrcMem, (outs _.RC:$dst),
2036      (ins _.KRCWM:$mask, _.RC:$src1, _.ScalarMemOp:$src2),
2037       !strconcat(OpcodeStr,
2038            "\t{${src2}", _.BroadcastStr, ", $src1, $dst {${mask}} {z}|",
2039            "$dst {${mask}} {z}, $src1, ${src2}", _.BroadcastStr, "}"), []>,
2040      EVEX_4V, EVEX_KZ, EVEX_B, EVEX_CD8<_.EltSize, CD8VF>,
2041      Sched<[sched.Folded, sched.ReadAfterFold]>, NotMemoryFoldable;
2042
2043  def rmb : AVX5128I<opc, MRMSrcMem, (outs _.RC:$dst),
2044      (ins _.RC:$src1, _.ScalarMemOp:$src2),
2045       !strconcat(OpcodeStr,
2046            "\t{${src2}", _.BroadcastStr, ", $src1, $dst|",
2047            "$dst, $src1, ${src2}", _.BroadcastStr, "}"), []>,
2048      EVEX_4V, EVEX_B, EVEX_CD8<_.EltSize, CD8VF>,
2049      Sched<[sched.Folded, sched.ReadAfterFold]>;
2050  }
2051}
2052
2053multiclass blendmask_dq<bits<8> opc, string OpcodeStr, X86SchedWriteWidths sched,
2054                        AVX512VLVectorVTInfo VTInfo> {
2055  defm Z : WriteFVarBlendask<opc, OpcodeStr, sched.ZMM, VTInfo.info512>,
2056           WriteFVarBlendask_rmb<opc, OpcodeStr, sched.ZMM, VTInfo.info512>,
2057                                 EVEX_V512;
2058
2059  let Predicates = [HasVLX] in {
2060    defm Z256 : WriteFVarBlendask<opc, OpcodeStr, sched.YMM, VTInfo.info256>,
2061                WriteFVarBlendask_rmb<opc, OpcodeStr, sched.YMM, VTInfo.info256>,
2062                                      EVEX_V256;
2063    defm Z128 : WriteFVarBlendask<opc, OpcodeStr, sched.XMM, VTInfo.info128>,
2064                WriteFVarBlendask_rmb<opc, OpcodeStr, sched.XMM, VTInfo.info128>,
2065                                      EVEX_V128;
2066  }
2067}
2068
2069multiclass blendmask_bw<bits<8> opc, string OpcodeStr, X86SchedWriteWidths sched,
2070                        AVX512VLVectorVTInfo VTInfo> {
2071  let Predicates = [HasBWI] in
2072    defm Z : WriteFVarBlendask<opc, OpcodeStr, sched.ZMM, VTInfo.info512>,
2073                               EVEX_V512;
2074
2075  let Predicates = [HasBWI, HasVLX] in {
2076    defm Z256 : WriteFVarBlendask<opc, OpcodeStr, sched.YMM, VTInfo.info256>,
2077                                  EVEX_V256;
2078    defm Z128 : WriteFVarBlendask<opc, OpcodeStr, sched.XMM, VTInfo.info128>,
2079                                  EVEX_V128;
2080  }
2081}
2082
2083defm VBLENDMPS : blendmask_dq<0x65, "vblendmps", SchedWriteFVarBlend,
2084                              avx512vl_f32_info>;
2085defm VBLENDMPD : blendmask_dq<0x65, "vblendmpd", SchedWriteFVarBlend,
2086                              avx512vl_f64_info>, VEX_W;
2087defm VPBLENDMD : blendmask_dq<0x64, "vpblendmd", SchedWriteVarBlend,
2088                              avx512vl_i32_info>;
2089defm VPBLENDMQ : blendmask_dq<0x64, "vpblendmq", SchedWriteVarBlend,
2090                              avx512vl_i64_info>, VEX_W;
2091defm VPBLENDMB : blendmask_bw<0x66, "vpblendmb", SchedWriteVarBlend,
2092                              avx512vl_i8_info>;
2093defm VPBLENDMW : blendmask_bw<0x66, "vpblendmw", SchedWriteVarBlend,
2094                              avx512vl_i16_info>, VEX_W;
2095
2096//===----------------------------------------------------------------------===//
2097// Compare Instructions
2098//===----------------------------------------------------------------------===//
2099
2100// avx512_cmp_scalar - AVX512 CMPSS and CMPSD
2101
2102multiclass avx512_cmp_scalar<X86VectorVTInfo _, SDNode OpNode, SDNode OpNodeSAE,
2103                             PatFrag OpNode_su, PatFrag OpNodeSAE_su,
2104                             X86FoldableSchedWrite sched> {
2105  defm  rr_Int  : AVX512_maskable_cmp<0xC2, MRMSrcReg, _,
2106                      (outs _.KRC:$dst),
2107                      (ins _.RC:$src1, _.RC:$src2, u8imm:$cc),
2108                      "vcmp"#_.Suffix,
2109                      "$cc, $src2, $src1", "$src1, $src2, $cc",
2110                      (OpNode (_.VT _.RC:$src1), (_.VT _.RC:$src2), timm:$cc),
2111                      (OpNode_su (_.VT _.RC:$src1), (_.VT _.RC:$src2),
2112                                 timm:$cc)>, EVEX_4V, VEX_LIG, Sched<[sched]>, SIMD_EXC;
2113  let mayLoad = 1 in
2114  defm  rm_Int  : AVX512_maskable_cmp<0xC2, MRMSrcMem, _,
2115                    (outs _.KRC:$dst),
2116                    (ins _.RC:$src1, _.IntScalarMemOp:$src2, u8imm:$cc),
2117                    "vcmp"#_.Suffix,
2118                    "$cc, $src2, $src1", "$src1, $src2, $cc",
2119                    (OpNode (_.VT _.RC:$src1), (_.ScalarIntMemFrags addr:$src2),
2120                        timm:$cc),
2121                    (OpNode_su (_.VT _.RC:$src1), (_.ScalarIntMemFrags addr:$src2),
2122                        timm:$cc)>, EVEX_4V, VEX_LIG, EVEX_CD8<_.EltSize, CD8VT1>,
2123                    Sched<[sched.Folded, sched.ReadAfterFold]>, SIMD_EXC;
2124
2125  let Uses = [MXCSR] in
2126  defm  rrb_Int  : AVX512_maskable_cmp<0xC2, MRMSrcReg, _,
2127                     (outs _.KRC:$dst),
2128                     (ins _.RC:$src1, _.RC:$src2, u8imm:$cc),
2129                     "vcmp"#_.Suffix,
2130                     "$cc, {sae}, $src2, $src1","$src1, $src2, {sae}, $cc",
2131                     (OpNodeSAE (_.VT _.RC:$src1), (_.VT _.RC:$src2),
2132                                timm:$cc),
2133                     (OpNodeSAE_su (_.VT _.RC:$src1), (_.VT _.RC:$src2),
2134                                   timm:$cc)>,
2135                     EVEX_4V, VEX_LIG, EVEX_B, Sched<[sched]>;
2136
2137  let isCodeGenOnly = 1 in {
2138    let isCommutable = 1 in
2139    def rr : AVX512Ii8<0xC2, MRMSrcReg,
2140                (outs _.KRC:$dst), (ins _.FRC:$src1, _.FRC:$src2, u8imm:$cc),
2141                !strconcat("vcmp", _.Suffix,
2142                           "\t{$cc, $src2, $src1, $dst|$dst, $src1, $src2, $cc}"),
2143                [(set _.KRC:$dst, (OpNode _.FRC:$src1,
2144                                          _.FRC:$src2,
2145                                          timm:$cc))]>,
2146                EVEX_4V, VEX_LIG, Sched<[sched]>, SIMD_EXC;
2147    def rm : AVX512Ii8<0xC2, MRMSrcMem,
2148              (outs _.KRC:$dst),
2149              (ins _.FRC:$src1, _.ScalarMemOp:$src2, u8imm:$cc),
2150              !strconcat("vcmp", _.Suffix,
2151                         "\t{$cc, $src2, $src1, $dst|$dst, $src1, $src2, $cc}"),
2152              [(set _.KRC:$dst, (OpNode _.FRC:$src1,
2153                                        (_.ScalarLdFrag addr:$src2),
2154                                        timm:$cc))]>,
2155              EVEX_4V, VEX_LIG, EVEX_CD8<_.EltSize, CD8VT1>,
2156              Sched<[sched.Folded, sched.ReadAfterFold]>, SIMD_EXC;
2157  }
2158}
2159
2160def X86cmpms_su : PatFrag<(ops node:$src1, node:$src2, node:$cc),
2161                          (X86cmpms node:$src1, node:$src2, node:$cc), [{
2162  return N->hasOneUse();
2163}]>;
2164def X86cmpmsSAE_su : PatFrag<(ops node:$src1, node:$src2, node:$cc),
2165                          (X86cmpmsSAE node:$src1, node:$src2, node:$cc), [{
2166  return N->hasOneUse();
2167}]>;
2168
2169let Predicates = [HasAVX512] in {
2170  let ExeDomain = SSEPackedSingle in
2171  defm VCMPSSZ : avx512_cmp_scalar<f32x_info, X86cmpms, X86cmpmsSAE,
2172                                   X86cmpms_su, X86cmpmsSAE_su,
2173                                   SchedWriteFCmp.Scl>, AVX512XSIi8Base;
2174  let ExeDomain = SSEPackedDouble in
2175  defm VCMPSDZ : avx512_cmp_scalar<f64x_info, X86cmpms, X86cmpmsSAE,
2176                                   X86cmpms_su, X86cmpmsSAE_su,
2177                                   SchedWriteFCmp.Scl>, AVX512XDIi8Base, VEX_W;
2178}
2179let Predicates = [HasFP16], ExeDomain = SSEPackedSingle in
2180  defm VCMPSHZ : avx512_cmp_scalar<f16x_info, X86cmpms, X86cmpmsSAE,
2181                                   X86cmpms_su, X86cmpmsSAE_su,
2182                                   SchedWriteFCmp.Scl>, AVX512XSIi8Base, TA;
2183
2184multiclass avx512_icmp_packed<bits<8> opc, string OpcodeStr,
2185                              X86FoldableSchedWrite sched,
2186                              X86VectorVTInfo _, bit IsCommutable> {
2187  let isCommutable = IsCommutable, hasSideEffects = 0 in
2188  def rr : AVX512BI<opc, MRMSrcReg,
2189             (outs _.KRC:$dst), (ins _.RC:$src1, _.RC:$src2),
2190             !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
2191             []>, EVEX_4V, Sched<[sched]>;
2192  let mayLoad = 1, hasSideEffects = 0 in
2193  def rm : AVX512BI<opc, MRMSrcMem,
2194             (outs _.KRC:$dst), (ins _.RC:$src1, _.MemOp:$src2),
2195             !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
2196             []>, EVEX_4V, Sched<[sched.Folded, sched.ReadAfterFold]>;
2197  let isCommutable = IsCommutable, hasSideEffects = 0 in
2198  def rrk : AVX512BI<opc, MRMSrcReg,
2199              (outs _.KRC:$dst), (ins _.KRCWM:$mask, _.RC:$src1, _.RC:$src2),
2200              !strconcat(OpcodeStr, "\t{$src2, $src1, $dst {${mask}}|",
2201                          "$dst {${mask}}, $src1, $src2}"),
2202              []>, EVEX_4V, EVEX_K, Sched<[sched]>;
2203  let mayLoad = 1, hasSideEffects = 0 in
2204  def rmk : AVX512BI<opc, MRMSrcMem,
2205              (outs _.KRC:$dst), (ins _.KRCWM:$mask, _.RC:$src1, _.MemOp:$src2),
2206              !strconcat(OpcodeStr, "\t{$src2, $src1, $dst {${mask}}|",
2207                          "$dst {${mask}}, $src1, $src2}"),
2208              []>, EVEX_4V, EVEX_K, Sched<[sched.Folded, sched.ReadAfterFold]>;
2209}
2210
2211multiclass avx512_icmp_packed_rmb<bits<8> opc, string OpcodeStr,
2212                                  X86FoldableSchedWrite sched, X86VectorVTInfo _,
2213                                  bit IsCommutable> :
2214           avx512_icmp_packed<opc, OpcodeStr, sched, _, IsCommutable> {
2215  let mayLoad = 1, hasSideEffects = 0 in {
2216  def rmb : AVX512BI<opc, MRMSrcMem,
2217              (outs _.KRC:$dst), (ins _.RC:$src1, _.ScalarMemOp:$src2),
2218              !strconcat(OpcodeStr, "\t{${src2}", _.BroadcastStr, ", $src1, $dst",
2219                                    "|$dst, $src1, ${src2}", _.BroadcastStr, "}"),
2220              []>, EVEX_4V, EVEX_B, Sched<[sched.Folded, sched.ReadAfterFold]>;
2221  def rmbk : AVX512BI<opc, MRMSrcMem,
2222               (outs _.KRC:$dst), (ins _.KRCWM:$mask, _.RC:$src1,
2223                                       _.ScalarMemOp:$src2),
2224               !strconcat(OpcodeStr,
2225                          "\t{${src2}", _.BroadcastStr, ", $src1, $dst {${mask}}|",
2226                          "$dst {${mask}}, $src1, ${src2}", _.BroadcastStr, "}"),
2227               []>, EVEX_4V, EVEX_K, EVEX_B,
2228               Sched<[sched.Folded, sched.ReadAfterFold]>;
2229  }
2230}
2231
2232multiclass avx512_icmp_packed_vl<bits<8> opc, string OpcodeStr,
2233                                 X86SchedWriteWidths sched,
2234                                 AVX512VLVectorVTInfo VTInfo, Predicate prd,
2235                                 bit IsCommutable = 0> {
2236  let Predicates = [prd] in
2237  defm Z : avx512_icmp_packed<opc, OpcodeStr, sched.ZMM,
2238                              VTInfo.info512, IsCommutable>, EVEX_V512;
2239
2240  let Predicates = [prd, HasVLX] in {
2241    defm Z256 : avx512_icmp_packed<opc, OpcodeStr, sched.YMM,
2242                                   VTInfo.info256, IsCommutable>, EVEX_V256;
2243    defm Z128 : avx512_icmp_packed<opc, OpcodeStr, sched.XMM,
2244                                   VTInfo.info128, IsCommutable>, EVEX_V128;
2245  }
2246}
2247
2248multiclass avx512_icmp_packed_rmb_vl<bits<8> opc, string OpcodeStr,
2249                                     X86SchedWriteWidths sched,
2250                                     AVX512VLVectorVTInfo VTInfo,
2251                                     Predicate prd, bit IsCommutable = 0> {
2252  let Predicates = [prd] in
2253  defm Z : avx512_icmp_packed_rmb<opc, OpcodeStr, sched.ZMM,
2254                                  VTInfo.info512, IsCommutable>, EVEX_V512;
2255
2256  let Predicates = [prd, HasVLX] in {
2257    defm Z256 : avx512_icmp_packed_rmb<opc, OpcodeStr, sched.YMM,
2258                                       VTInfo.info256, IsCommutable>, EVEX_V256;
2259    defm Z128 : avx512_icmp_packed_rmb<opc, OpcodeStr, sched.XMM,
2260                                       VTInfo.info128, IsCommutable>, EVEX_V128;
2261  }
2262}
2263
2264// This fragment treats X86cmpm as commutable to help match loads in both
2265// operands for PCMPEQ.
2266def X86setcc_commute : SDNode<"ISD::SETCC", SDTSetCC, [SDNPCommutative]>;
2267def X86pcmpgtm : PatFrag<(ops node:$src1, node:$src2),
2268                         (setcc node:$src1, node:$src2, SETGT)>;
2269
2270// AddedComplexity is needed because the explicit SETEQ/SETGT CondCode doesn't
2271// increase the pattern complexity the way an immediate would.
2272let AddedComplexity = 2 in {
2273// FIXME: Is there a better scheduler class for VPCMP?
2274defm VPCMPEQB : avx512_icmp_packed_vl<0x74, "vpcmpeqb",
2275                      SchedWriteVecALU, avx512vl_i8_info, HasBWI, 1>,
2276                EVEX_CD8<8, CD8VF>, VEX_WIG;
2277
2278defm VPCMPEQW : avx512_icmp_packed_vl<0x75, "vpcmpeqw",
2279                      SchedWriteVecALU, avx512vl_i16_info, HasBWI, 1>,
2280                EVEX_CD8<16, CD8VF>, VEX_WIG;
2281
2282defm VPCMPEQD : avx512_icmp_packed_rmb_vl<0x76, "vpcmpeqd",
2283                      SchedWriteVecALU, avx512vl_i32_info, HasAVX512, 1>,
2284                EVEX_CD8<32, CD8VF>;
2285
2286defm VPCMPEQQ : avx512_icmp_packed_rmb_vl<0x29, "vpcmpeqq",
2287                      SchedWriteVecALU, avx512vl_i64_info, HasAVX512, 1>,
2288                T8PD, VEX_W, EVEX_CD8<64, CD8VF>;
2289
2290defm VPCMPGTB : avx512_icmp_packed_vl<0x64, "vpcmpgtb",
2291                      SchedWriteVecALU, avx512vl_i8_info, HasBWI>,
2292                EVEX_CD8<8, CD8VF>, VEX_WIG;
2293
2294defm VPCMPGTW : avx512_icmp_packed_vl<0x65, "vpcmpgtw",
2295                      SchedWriteVecALU, avx512vl_i16_info, HasBWI>,
2296                EVEX_CD8<16, CD8VF>, VEX_WIG;
2297
2298defm VPCMPGTD : avx512_icmp_packed_rmb_vl<0x66, "vpcmpgtd",
2299                      SchedWriteVecALU, avx512vl_i32_info, HasAVX512>,
2300                EVEX_CD8<32, CD8VF>;
2301
2302defm VPCMPGTQ : avx512_icmp_packed_rmb_vl<0x37, "vpcmpgtq",
2303                      SchedWriteVecALU, avx512vl_i64_info, HasAVX512>,
2304                T8PD, VEX_W, EVEX_CD8<64, CD8VF>;
2305}
2306
2307def X86pcmpm_imm : SDNodeXForm<setcc, [{
2308  ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(2))->get();
2309  uint8_t SSECC = X86::getVPCMPImmForCond(CC);
2310  return getI8Imm(SSECC, SDLoc(N));
2311}]>;
2312
2313// Swapped operand version of the above.
2314def X86pcmpm_imm_commute : SDNodeXForm<setcc, [{
2315  ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(2))->get();
2316  uint8_t SSECC = X86::getVPCMPImmForCond(CC);
2317  SSECC = X86::getSwappedVPCMPImm(SSECC);
2318  return getI8Imm(SSECC, SDLoc(N));
2319}]>;
2320
2321multiclass avx512_icmp_cc<bits<8> opc, string Suffix, PatFrag Frag,
2322                          PatFrag Frag_su,
2323                          X86FoldableSchedWrite sched,
2324                          X86VectorVTInfo _, string Name> {
2325  let isCommutable = 1 in
2326  def rri : AVX512AIi8<opc, MRMSrcReg,
2327             (outs _.KRC:$dst), (ins _.RC:$src1, _.RC:$src2, u8imm:$cc),
2328             !strconcat("vpcmp", Suffix,
2329                        "\t{$cc, $src2, $src1, $dst|$dst, $src1, $src2, $cc}"),
2330             [(set _.KRC:$dst, (_.KVT (Frag:$cc (_.VT _.RC:$src1),
2331                                                (_.VT _.RC:$src2),
2332                                                cond)))]>,
2333             EVEX_4V, Sched<[sched]>;
2334  def rmi : AVX512AIi8<opc, MRMSrcMem,
2335             (outs _.KRC:$dst), (ins _.RC:$src1, _.MemOp:$src2, u8imm:$cc),
2336             !strconcat("vpcmp", Suffix,
2337                        "\t{$cc, $src2, $src1, $dst|$dst, $src1, $src2, $cc}"),
2338             [(set _.KRC:$dst, (_.KVT
2339                                (Frag:$cc
2340                                 (_.VT _.RC:$src1),
2341                                 (_.VT (_.LdFrag addr:$src2)),
2342                                 cond)))]>,
2343             EVEX_4V, Sched<[sched.Folded, sched.ReadAfterFold]>;
2344  let isCommutable = 1 in
2345  def rrik : AVX512AIi8<opc, MRMSrcReg,
2346              (outs _.KRC:$dst), (ins _.KRCWM:$mask, _.RC:$src1, _.RC:$src2,
2347                                      u8imm:$cc),
2348              !strconcat("vpcmp", Suffix,
2349                         "\t{$cc, $src2, $src1, $dst {${mask}}|",
2350                         "$dst {${mask}}, $src1, $src2, $cc}"),
2351              [(set _.KRC:$dst, (and _.KRCWM:$mask,
2352                                     (_.KVT (Frag_su:$cc (_.VT _.RC:$src1),
2353                                                         (_.VT _.RC:$src2),
2354                                                         cond))))]>,
2355              EVEX_4V, EVEX_K, Sched<[sched]>;
2356  def rmik : AVX512AIi8<opc, MRMSrcMem,
2357              (outs _.KRC:$dst), (ins _.KRCWM:$mask, _.RC:$src1, _.MemOp:$src2,
2358                                    u8imm:$cc),
2359              !strconcat("vpcmp", Suffix,
2360                         "\t{$cc, $src2, $src1, $dst {${mask}}|",
2361                         "$dst {${mask}}, $src1, $src2, $cc}"),
2362              [(set _.KRC:$dst, (and _.KRCWM:$mask,
2363                                     (_.KVT
2364                                      (Frag_su:$cc
2365                                       (_.VT _.RC:$src1),
2366                                       (_.VT (_.LdFrag addr:$src2)),
2367                                       cond))))]>,
2368              EVEX_4V, EVEX_K, Sched<[sched.Folded, sched.ReadAfterFold]>;
2369
2370  def : Pat<(_.KVT (Frag:$cc (_.LdFrag addr:$src2),
2371                             (_.VT _.RC:$src1), cond)),
2372            (!cast<Instruction>(Name#_.ZSuffix#"rmi")
2373             _.RC:$src1, addr:$src2, (X86pcmpm_imm_commute $cc))>;
2374
2375  def : Pat<(and _.KRCWM:$mask,
2376                 (_.KVT (Frag_su:$cc (_.LdFrag addr:$src2),
2377                                     (_.VT _.RC:$src1), cond))),
2378            (!cast<Instruction>(Name#_.ZSuffix#"rmik")
2379             _.KRCWM:$mask, _.RC:$src1, addr:$src2,
2380             (X86pcmpm_imm_commute $cc))>;
2381}
2382
2383multiclass avx512_icmp_cc_rmb<bits<8> opc, string Suffix, PatFrag Frag,
2384                              PatFrag Frag_su, X86FoldableSchedWrite sched,
2385                              X86VectorVTInfo _, string Name> :
2386           avx512_icmp_cc<opc, Suffix, Frag, Frag_su, sched, _, Name> {
2387  def rmib : AVX512AIi8<opc, MRMSrcMem,
2388             (outs _.KRC:$dst), (ins _.RC:$src1, _.ScalarMemOp:$src2,
2389                                     u8imm:$cc),
2390             !strconcat("vpcmp", Suffix,
2391                        "\t{$cc, ${src2}", _.BroadcastStr, ", $src1, $dst|",
2392                        "$dst, $src1, ${src2}", _.BroadcastStr, ", $cc}"),
2393             [(set _.KRC:$dst, (_.KVT (Frag:$cc
2394                                       (_.VT _.RC:$src1),
2395                                       (_.BroadcastLdFrag addr:$src2),
2396                                       cond)))]>,
2397             EVEX_4V, EVEX_B, Sched<[sched.Folded, sched.ReadAfterFold]>;
2398  def rmibk : AVX512AIi8<opc, MRMSrcMem,
2399              (outs _.KRC:$dst), (ins _.KRCWM:$mask, _.RC:$src1,
2400                                       _.ScalarMemOp:$src2, u8imm:$cc),
2401              !strconcat("vpcmp", Suffix,
2402                  "\t{$cc, ${src2}", _.BroadcastStr, ", $src1, $dst {${mask}}|",
2403                  "$dst {${mask}}, $src1, ${src2}", _.BroadcastStr, ", $cc}"),
2404              [(set _.KRC:$dst, (and _.KRCWM:$mask,
2405                                     (_.KVT (Frag_su:$cc
2406                                             (_.VT _.RC:$src1),
2407                                             (_.BroadcastLdFrag addr:$src2),
2408                                             cond))))]>,
2409              EVEX_4V, EVEX_K, EVEX_B, Sched<[sched.Folded, sched.ReadAfterFold]>;
2410
2411  def : Pat<(_.KVT (Frag:$cc (_.BroadcastLdFrag addr:$src2),
2412                    (_.VT _.RC:$src1), cond)),
2413            (!cast<Instruction>(Name#_.ZSuffix#"rmib")
2414             _.RC:$src1, addr:$src2, (X86pcmpm_imm_commute $cc))>;
2415
2416  def : Pat<(and _.KRCWM:$mask,
2417                 (_.KVT (Frag_su:$cc (_.BroadcastLdFrag addr:$src2),
2418                                     (_.VT _.RC:$src1), cond))),
2419            (!cast<Instruction>(Name#_.ZSuffix#"rmibk")
2420             _.KRCWM:$mask, _.RC:$src1, addr:$src2,
2421             (X86pcmpm_imm_commute $cc))>;
2422}
2423
2424multiclass avx512_icmp_cc_vl<bits<8> opc, string Suffix, PatFrag Frag,
2425                             PatFrag Frag_su, X86SchedWriteWidths sched,
2426                             AVX512VLVectorVTInfo VTInfo, Predicate prd> {
2427  let Predicates = [prd] in
2428  defm Z : avx512_icmp_cc<opc, Suffix, Frag, Frag_su,
2429                          sched.ZMM, VTInfo.info512, NAME>, EVEX_V512;
2430
2431  let Predicates = [prd, HasVLX] in {
2432    defm Z256 : avx512_icmp_cc<opc, Suffix, Frag, Frag_su,
2433                               sched.YMM, VTInfo.info256, NAME>, EVEX_V256;
2434    defm Z128 : avx512_icmp_cc<opc, Suffix, Frag, Frag_su,
2435                               sched.XMM, VTInfo.info128, NAME>, EVEX_V128;
2436  }
2437}
2438
2439multiclass avx512_icmp_cc_rmb_vl<bits<8> opc, string Suffix, PatFrag Frag,
2440                                 PatFrag Frag_su, X86SchedWriteWidths sched,
2441                                 AVX512VLVectorVTInfo VTInfo, Predicate prd> {
2442  let Predicates = [prd] in
2443  defm Z : avx512_icmp_cc_rmb<opc, Suffix, Frag, Frag_su,
2444                              sched.ZMM, VTInfo.info512, NAME>, EVEX_V512;
2445
2446  let Predicates = [prd, HasVLX] in {
2447    defm Z256 : avx512_icmp_cc_rmb<opc, Suffix, Frag, Frag_su,
2448                                   sched.YMM, VTInfo.info256, NAME>, EVEX_V256;
2449    defm Z128 : avx512_icmp_cc_rmb<opc, Suffix, Frag, Frag_su,
2450                                   sched.XMM, VTInfo.info128, NAME>, EVEX_V128;
2451  }
2452}
2453
2454def X86pcmpm : PatFrag<(ops node:$src1, node:$src2, node:$cc),
2455                       (setcc node:$src1, node:$src2, node:$cc), [{
2456  ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(2))->get();
2457  return !ISD::isUnsignedIntSetCC(CC);
2458}], X86pcmpm_imm>;
2459
2460def X86pcmpm_su : PatFrag<(ops node:$src1, node:$src2, node:$cc),
2461                          (setcc node:$src1, node:$src2, node:$cc), [{
2462  ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(2))->get();
2463  return N->hasOneUse() && !ISD::isUnsignedIntSetCC(CC);
2464}], X86pcmpm_imm>;
2465
2466def X86pcmpum : PatFrag<(ops node:$src1, node:$src2, node:$cc),
2467                        (setcc node:$src1, node:$src2, node:$cc), [{
2468  ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(2))->get();
2469  return ISD::isUnsignedIntSetCC(CC);
2470}], X86pcmpm_imm>;
2471
2472def X86pcmpum_su : PatFrag<(ops node:$src1, node:$src2, node:$cc),
2473                           (setcc node:$src1, node:$src2, node:$cc), [{
2474  ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(2))->get();
2475  return N->hasOneUse() && ISD::isUnsignedIntSetCC(CC);
2476}], X86pcmpm_imm>;
2477
2478// FIXME: Is there a better scheduler class for VPCMP/VPCMPU?
2479defm VPCMPB : avx512_icmp_cc_vl<0x3F, "b", X86pcmpm, X86pcmpm_su,
2480                                SchedWriteVecALU, avx512vl_i8_info, HasBWI>,
2481                                EVEX_CD8<8, CD8VF>;
2482defm VPCMPUB : avx512_icmp_cc_vl<0x3E, "ub", X86pcmpum, X86pcmpum_su,
2483                                 SchedWriteVecALU, avx512vl_i8_info, HasBWI>,
2484                                 EVEX_CD8<8, CD8VF>;
2485
2486defm VPCMPW : avx512_icmp_cc_vl<0x3F, "w", X86pcmpm, X86pcmpm_su,
2487                                SchedWriteVecALU, avx512vl_i16_info, HasBWI>,
2488                                VEX_W, EVEX_CD8<16, CD8VF>;
2489defm VPCMPUW : avx512_icmp_cc_vl<0x3E, "uw", X86pcmpum, X86pcmpum_su,
2490                                 SchedWriteVecALU, avx512vl_i16_info, HasBWI>,
2491                                 VEX_W, EVEX_CD8<16, CD8VF>;
2492
2493defm VPCMPD : avx512_icmp_cc_rmb_vl<0x1F, "d", X86pcmpm, X86pcmpm_su,
2494                                    SchedWriteVecALU, avx512vl_i32_info,
2495                                    HasAVX512>, EVEX_CD8<32, CD8VF>;
2496defm VPCMPUD : avx512_icmp_cc_rmb_vl<0x1E, "ud", X86pcmpum, X86pcmpum_su,
2497                                     SchedWriteVecALU, avx512vl_i32_info,
2498                                     HasAVX512>, EVEX_CD8<32, CD8VF>;
2499
2500defm VPCMPQ : avx512_icmp_cc_rmb_vl<0x1F, "q", X86pcmpm, X86pcmpm_su,
2501                                    SchedWriteVecALU, avx512vl_i64_info,
2502                                    HasAVX512>, VEX_W, EVEX_CD8<64, CD8VF>;
2503defm VPCMPUQ : avx512_icmp_cc_rmb_vl<0x1E, "uq", X86pcmpum, X86pcmpum_su,
2504                                     SchedWriteVecALU, avx512vl_i64_info,
2505                                     HasAVX512>, VEX_W, EVEX_CD8<64, CD8VF>;
2506
2507def X86cmpm_su : PatFrag<(ops node:$src1, node:$src2, node:$cc),
2508                         (X86cmpm node:$src1, node:$src2, node:$cc), [{
2509  return N->hasOneUse();
2510}]>;
2511
2512def X86cmpm_imm_commute : SDNodeXForm<timm, [{
2513  uint8_t Imm = X86::getSwappedVCMPImm(N->getZExtValue() & 0x1f);
2514  return getI8Imm(Imm, SDLoc(N));
2515}]>;
2516
2517multiclass avx512_vcmp_common<X86FoldableSchedWrite sched, X86VectorVTInfo _,
2518                              string Name> {
2519let Uses = [MXCSR], mayRaiseFPException = 1 in {
2520  defm  rri  : AVX512_maskable_cmp<0xC2, MRMSrcReg, _,
2521                   (outs _.KRC:$dst), (ins _.RC:$src1, _.RC:$src2,u8imm:$cc),
2522                   "vcmp"#_.Suffix,
2523                   "$cc, $src2, $src1", "$src1, $src2, $cc",
2524                   (X86any_cmpm (_.VT _.RC:$src1), (_.VT _.RC:$src2), timm:$cc),
2525                   (X86cmpm_su (_.VT _.RC:$src1), (_.VT _.RC:$src2), timm:$cc),
2526                   1>, Sched<[sched]>;
2527
2528  defm  rmi  : AVX512_maskable_cmp<0xC2, MRMSrcMem, _,
2529                (outs _.KRC:$dst),(ins _.RC:$src1, _.MemOp:$src2, u8imm:$cc),
2530                "vcmp"#_.Suffix,
2531                "$cc, $src2, $src1", "$src1, $src2, $cc",
2532                (X86any_cmpm (_.VT _.RC:$src1), (_.VT (_.LdFrag addr:$src2)),
2533                             timm:$cc),
2534                (X86cmpm_su (_.VT _.RC:$src1), (_.VT (_.LdFrag addr:$src2)),
2535                            timm:$cc)>,
2536                Sched<[sched.Folded, sched.ReadAfterFold]>;
2537
2538  defm  rmbi : AVX512_maskable_cmp<0xC2, MRMSrcMem, _,
2539                (outs _.KRC:$dst),
2540                (ins _.RC:$src1, _.ScalarMemOp:$src2, u8imm:$cc),
2541                "vcmp"#_.Suffix,
2542                "$cc, ${src2}"#_.BroadcastStr#", $src1",
2543                "$src1, ${src2}"#_.BroadcastStr#", $cc",
2544                (X86any_cmpm (_.VT _.RC:$src1),
2545                             (_.VT (_.BroadcastLdFrag addr:$src2)),
2546                             timm:$cc),
2547                (X86cmpm_su (_.VT _.RC:$src1),
2548                            (_.VT (_.BroadcastLdFrag addr:$src2)),
2549                            timm:$cc)>,
2550                EVEX_B, Sched<[sched.Folded, sched.ReadAfterFold]>;
2551  }
2552
2553  // Patterns for selecting with loads in other operand.
2554  def : Pat<(X86any_cmpm (_.LdFrag addr:$src2), (_.VT _.RC:$src1),
2555                         timm:$cc),
2556            (!cast<Instruction>(Name#_.ZSuffix#"rmi") _.RC:$src1, addr:$src2,
2557                                                      (X86cmpm_imm_commute timm:$cc))>;
2558
2559  def : Pat<(and _.KRCWM:$mask, (X86cmpm_su (_.LdFrag addr:$src2),
2560                                            (_.VT _.RC:$src1),
2561                                            timm:$cc)),
2562            (!cast<Instruction>(Name#_.ZSuffix#"rmik") _.KRCWM:$mask,
2563                                                       _.RC:$src1, addr:$src2,
2564                                                       (X86cmpm_imm_commute timm:$cc))>;
2565
2566  def : Pat<(X86any_cmpm (_.BroadcastLdFrag addr:$src2),
2567                         (_.VT _.RC:$src1), timm:$cc),
2568            (!cast<Instruction>(Name#_.ZSuffix#"rmbi") _.RC:$src1, addr:$src2,
2569                                                       (X86cmpm_imm_commute timm:$cc))>;
2570
2571  def : Pat<(and _.KRCWM:$mask, (X86cmpm_su (_.BroadcastLdFrag addr:$src2),
2572                                            (_.VT _.RC:$src1),
2573                                            timm:$cc)),
2574            (!cast<Instruction>(Name#_.ZSuffix#"rmbik") _.KRCWM:$mask,
2575                                                        _.RC:$src1, addr:$src2,
2576                                                        (X86cmpm_imm_commute timm:$cc))>;
2577
2578  // Patterns for mask intrinsics.
2579  def : Pat<(X86cmpmm (_.VT _.RC:$src1), (_.VT _.RC:$src2), timm:$cc,
2580                      (_.KVT immAllOnesV)),
2581            (!cast<Instruction>(Name#_.ZSuffix#"rri") _.RC:$src1, _.RC:$src2, timm:$cc)>;
2582
2583  def : Pat<(X86cmpmm (_.VT _.RC:$src1), (_.VT _.RC:$src2), timm:$cc, _.KRCWM:$mask),
2584            (!cast<Instruction>(Name#_.ZSuffix#"rrik") _.KRCWM:$mask, _.RC:$src1,
2585                                                       _.RC:$src2, timm:$cc)>;
2586
2587  def : Pat<(X86cmpmm (_.VT _.RC:$src1), (_.VT (_.LdFrag addr:$src2)), timm:$cc,
2588                      (_.KVT immAllOnesV)),
2589            (!cast<Instruction>(Name#_.ZSuffix#"rmi") _.RC:$src1, addr:$src2, timm:$cc)>;
2590
2591  def : Pat<(X86cmpmm (_.VT _.RC:$src1), (_.VT (_.LdFrag addr:$src2)), timm:$cc,
2592                      _.KRCWM:$mask),
2593            (!cast<Instruction>(Name#_.ZSuffix#"rmik") _.KRCWM:$mask, _.RC:$src1,
2594                                                       addr:$src2, timm:$cc)>;
2595
2596  def : Pat<(X86cmpmm (_.VT _.RC:$src1), (_.VT (_.BroadcastLdFrag addr:$src2)), timm:$cc,
2597                      (_.KVT immAllOnesV)),
2598            (!cast<Instruction>(Name#_.ZSuffix#"rmbi") _.RC:$src1, addr:$src2, timm:$cc)>;
2599
2600  def : Pat<(X86cmpmm (_.VT _.RC:$src1), (_.VT (_.BroadcastLdFrag addr:$src2)), timm:$cc,
2601                      _.KRCWM:$mask),
2602            (!cast<Instruction>(Name#_.ZSuffix#"rmbik") _.KRCWM:$mask, _.RC:$src1,
2603                                                        addr:$src2, timm:$cc)>;
2604
2605  // Patterns for mask intrinsics with loads in other operand.
2606  def : Pat<(X86cmpmm (_.VT (_.LdFrag addr:$src2)), (_.VT _.RC:$src1), timm:$cc,
2607                      (_.KVT immAllOnesV)),
2608            (!cast<Instruction>(Name#_.ZSuffix#"rmi") _.RC:$src1, addr:$src2,
2609                                                      (X86cmpm_imm_commute timm:$cc))>;
2610
2611  def : Pat<(X86cmpmm (_.VT (_.LdFrag addr:$src2)), (_.VT _.RC:$src1), timm:$cc,
2612                      _.KRCWM:$mask),
2613            (!cast<Instruction>(Name#_.ZSuffix#"rmik") _.KRCWM:$mask,
2614                                                       _.RC:$src1, addr:$src2,
2615                                                       (X86cmpm_imm_commute timm:$cc))>;
2616
2617  def : Pat<(X86cmpmm (_.VT (_.BroadcastLdFrag addr:$src2)), (_.VT _.RC:$src1), timm:$cc,
2618                      (_.KVT immAllOnesV)),
2619            (!cast<Instruction>(Name#_.ZSuffix#"rmbi") _.RC:$src1, addr:$src2,
2620                                                       (X86cmpm_imm_commute timm:$cc))>;
2621
2622  def : Pat<(X86cmpmm (_.VT (_.BroadcastLdFrag addr:$src2)), (_.VT _.RC:$src1), timm:$cc,
2623                      _.KRCWM:$mask),
2624            (!cast<Instruction>(Name#_.ZSuffix#"rmbik") _.KRCWM:$mask,
2625                                                        _.RC:$src1, addr:$src2,
2626                                                        (X86cmpm_imm_commute  timm:$cc))>;
2627}
2628
2629multiclass avx512_vcmp_sae<X86FoldableSchedWrite sched, X86VectorVTInfo _> {
2630  // comparison code form (VCMP[EQ/LT/LE/...]
2631  let Uses = [MXCSR] in
2632  defm  rrib  : AVX512_maskable_custom_cmp<0xC2, MRMSrcReg, (outs _.KRC:$dst),
2633                     (ins _.RC:$src1, _.RC:$src2, u8imm:$cc),
2634                     (ins _.KRCWM:$mask, _.RC:$src1, _.RC:$src2, u8imm:$cc),
2635                     "vcmp"#_.Suffix,
2636                     "$cc, {sae}, $src2, $src1",
2637                     "$src1, $src2, {sae}, $cc",
2638                     [(set _.KRC:$dst, (X86cmpmmSAE (_.VT _.RC:$src1),
2639                                        (_.VT _.RC:$src2), timm:$cc, (_.KVT immAllOnesV)))],
2640                     [(set _.KRC:$dst, (X86cmpmmSAE (_.VT _.RC:$src1),
2641                                        (_.VT _.RC:$src2), timm:$cc, _.KRCWM:$mask))]>,
2642                     EVEX_B, Sched<[sched]>;
2643}
2644
2645multiclass avx512_vcmp<X86SchedWriteWidths sched, AVX512VLVectorVTInfo _,
2646                       Predicate Pred = HasAVX512> {
2647  let Predicates = [Pred] in {
2648    defm Z    : avx512_vcmp_common<sched.ZMM, _.info512, NAME>,
2649                avx512_vcmp_sae<sched.ZMM, _.info512>, EVEX_V512;
2650
2651  }
2652  let Predicates = [Pred,HasVLX] in {
2653   defm Z128 : avx512_vcmp_common<sched.XMM, _.info128, NAME>, EVEX_V128;
2654   defm Z256 : avx512_vcmp_common<sched.YMM, _.info256, NAME>, EVEX_V256;
2655  }
2656}
2657
2658defm VCMPPD : avx512_vcmp<SchedWriteFCmp, avx512vl_f64_info>,
2659                          AVX512PDIi8Base, EVEX_4V, EVEX_CD8<64, CD8VF>, VEX_W;
2660defm VCMPPS : avx512_vcmp<SchedWriteFCmp, avx512vl_f32_info>,
2661                          AVX512PSIi8Base, EVEX_4V, EVEX_CD8<32, CD8VF>;
2662defm VCMPPH : avx512_vcmp<SchedWriteFCmp, avx512vl_f16_info, HasFP16>,
2663                          AVX512PSIi8Base, EVEX_4V, EVEX_CD8<16, CD8VF>, TA;
2664
2665// Patterns to select fp compares with load as first operand.
2666let Predicates = [HasAVX512] in {
2667  def : Pat<(v1i1 (X86cmpms (loadf64 addr:$src2), FR64X:$src1, timm:$cc)),
2668            (VCMPSDZrm FR64X:$src1, addr:$src2, (X86cmpm_imm_commute timm:$cc))>;
2669
2670  def : Pat<(v1i1 (X86cmpms (loadf32 addr:$src2), FR32X:$src1, timm:$cc)),
2671            (VCMPSSZrm FR32X:$src1, addr:$src2, (X86cmpm_imm_commute timm:$cc))>;
2672}
2673
2674let Predicates = [HasFP16] in {
2675  def : Pat<(v1i1 (X86cmpms (loadf16 addr:$src2), FR16X:$src1, timm:$cc)),
2676            (VCMPSHZrm FR16X:$src1, addr:$src2, (X86cmpm_imm_commute timm:$cc))>;
2677}
2678
2679// ----------------------------------------------------------------
2680// FPClass
2681
2682def X86Vfpclasss_su : PatFrag<(ops node:$src1, node:$src2),
2683                              (X86Vfpclasss node:$src1, node:$src2), [{
2684  return N->hasOneUse();
2685}]>;
2686
2687def X86Vfpclass_su : PatFrag<(ops node:$src1, node:$src2),
2688                             (X86Vfpclass node:$src1, node:$src2), [{
2689  return N->hasOneUse();
2690}]>;
2691
2692//handle fpclass instruction  mask =  op(reg_scalar,imm)
2693//                                    op(mem_scalar,imm)
2694multiclass avx512_scalar_fpclass<bits<8> opc, string OpcodeStr,
2695                                 X86FoldableSchedWrite sched, X86VectorVTInfo _,
2696                                 Predicate prd> {
2697  let Predicates = [prd], ExeDomain = _.ExeDomain, Uses = [MXCSR] in {
2698      def rr : AVX512<opc, MRMSrcReg, (outs _.KRC:$dst),
2699                      (ins _.RC:$src1, i32u8imm:$src2),
2700                      OpcodeStr#_.Suffix#"\t{$src2, $src1, $dst|$dst, $src1, $src2}",
2701                      [(set _.KRC:$dst,(X86Vfpclasss (_.VT _.RC:$src1),
2702                              (i32 timm:$src2)))]>,
2703                      Sched<[sched]>;
2704      def rrk : AVX512<opc, MRMSrcReg, (outs _.KRC:$dst),
2705                      (ins _.KRCWM:$mask, _.RC:$src1, i32u8imm:$src2),
2706                      OpcodeStr#_.Suffix#
2707                      "\t{$src2, $src1, $dst {${mask}}|$dst {${mask}}, $src1, $src2}",
2708                      [(set _.KRC:$dst,(and _.KRCWM:$mask,
2709                                      (X86Vfpclasss_su (_.VT _.RC:$src1),
2710                                      (i32 timm:$src2))))]>,
2711                      EVEX_K, Sched<[sched]>;
2712    def rm : AVX512<opc, MRMSrcMem, (outs _.KRC:$dst),
2713                    (ins _.IntScalarMemOp:$src1, i32u8imm:$src2),
2714                    OpcodeStr#_.Suffix#
2715                              "\t{$src2, $src1, $dst|$dst, $src1, $src2}",
2716                    [(set _.KRC:$dst,
2717                          (X86Vfpclasss (_.ScalarIntMemFrags addr:$src1),
2718                                        (i32 timm:$src2)))]>,
2719                    Sched<[sched.Folded, sched.ReadAfterFold]>;
2720    def rmk : AVX512<opc, MRMSrcMem, (outs _.KRC:$dst),
2721                    (ins _.KRCWM:$mask, _.IntScalarMemOp:$src1, i32u8imm:$src2),
2722                    OpcodeStr#_.Suffix#
2723                    "\t{$src2, $src1, $dst {${mask}}|$dst {${mask}}, $src1, $src2}",
2724                    [(set _.KRC:$dst,(and _.KRCWM:$mask,
2725                        (X86Vfpclasss_su (_.ScalarIntMemFrags addr:$src1),
2726                            (i32 timm:$src2))))]>,
2727                    EVEX_K, Sched<[sched.Folded, sched.ReadAfterFold]>;
2728  }
2729}
2730
2731//handle fpclass instruction mask = fpclass(reg_vec, reg_vec, imm)
2732//                                  fpclass(reg_vec, mem_vec, imm)
2733//                                  fpclass(reg_vec, broadcast(eltVt), imm)
2734multiclass avx512_vector_fpclass<bits<8> opc, string OpcodeStr,
2735                                 X86FoldableSchedWrite sched, X86VectorVTInfo _,
2736                                 string mem>{
2737  let ExeDomain = _.ExeDomain, Uses = [MXCSR] in {
2738  def rr : AVX512<opc, MRMSrcReg, (outs _.KRC:$dst),
2739                      (ins _.RC:$src1, i32u8imm:$src2),
2740                      OpcodeStr#_.Suffix#"\t{$src2, $src1, $dst|$dst, $src1, $src2}",
2741                      [(set _.KRC:$dst,(X86Vfpclass (_.VT _.RC:$src1),
2742                                       (i32 timm:$src2)))]>,
2743                      Sched<[sched]>;
2744  def rrk : AVX512<opc, MRMSrcReg, (outs _.KRC:$dst),
2745                      (ins _.KRCWM:$mask, _.RC:$src1, i32u8imm:$src2),
2746                      OpcodeStr#_.Suffix#
2747                      "\t{$src2, $src1, $dst {${mask}}|$dst {${mask}}, $src1, $src2}",
2748                      [(set _.KRC:$dst,(and _.KRCWM:$mask,
2749                                       (X86Vfpclass_su (_.VT _.RC:$src1),
2750                                       (i32 timm:$src2))))]>,
2751                      EVEX_K, Sched<[sched]>;
2752  def rm : AVX512<opc, MRMSrcMem, (outs _.KRC:$dst),
2753                    (ins _.MemOp:$src1, i32u8imm:$src2),
2754                    OpcodeStr#_.Suffix#"{"#mem#"}"#
2755                    "\t{$src2, $src1, $dst|$dst, $src1, $src2}",
2756                    [(set _.KRC:$dst,(X86Vfpclass
2757                                     (_.VT (_.LdFrag addr:$src1)),
2758                                     (i32 timm:$src2)))]>,
2759                    Sched<[sched.Folded, sched.ReadAfterFold]>;
2760  def rmk : AVX512<opc, MRMSrcMem, (outs _.KRC:$dst),
2761                    (ins _.KRCWM:$mask, _.MemOp:$src1, i32u8imm:$src2),
2762                    OpcodeStr#_.Suffix#"{"#mem#"}"#
2763                    "\t{$src2, $src1, $dst {${mask}}|$dst {${mask}}, $src1, $src2}",
2764                    [(set _.KRC:$dst, (and _.KRCWM:$mask, (X86Vfpclass_su
2765                                  (_.VT (_.LdFrag addr:$src1)),
2766                                  (i32 timm:$src2))))]>,
2767                    EVEX_K, Sched<[sched.Folded, sched.ReadAfterFold]>;
2768  def rmb : AVX512<opc, MRMSrcMem, (outs _.KRC:$dst),
2769                    (ins _.ScalarMemOp:$src1, i32u8imm:$src2),
2770                    OpcodeStr#_.Suffix#"\t{$src2, ${src1}"#
2771                                      _.BroadcastStr#", $dst|$dst, ${src1}"
2772                                                  #_.BroadcastStr#", $src2}",
2773                    [(set _.KRC:$dst,(X86Vfpclass
2774                                     (_.VT (_.BroadcastLdFrag addr:$src1)),
2775                                     (i32 timm:$src2)))]>,
2776                    EVEX_B, Sched<[sched.Folded, sched.ReadAfterFold]>;
2777  def rmbk : AVX512<opc, MRMSrcMem, (outs _.KRC:$dst),
2778                    (ins _.KRCWM:$mask, _.ScalarMemOp:$src1, i32u8imm:$src2),
2779                    OpcodeStr#_.Suffix#"\t{$src2, ${src1}"#
2780                          _.BroadcastStr#", $dst {${mask}}|$dst {${mask}}, ${src1}"#
2781                                                   _.BroadcastStr#", $src2}",
2782                    [(set _.KRC:$dst,(and _.KRCWM:$mask, (X86Vfpclass_su
2783                                     (_.VT (_.BroadcastLdFrag addr:$src1)),
2784                                     (i32 timm:$src2))))]>,
2785                    EVEX_B, EVEX_K,  Sched<[sched.Folded, sched.ReadAfterFold]>;
2786  }
2787
2788  // Allow registers or broadcast with the x, y, z suffix we use to disambiguate
2789  // the memory form.
2790  def : InstAlias<OpcodeStr#_.Suffix#mem#
2791                  "\t{$src2, $src1, $dst|$dst, $src1, $src2}",
2792                  (!cast<Instruction>(NAME#"rr")
2793                   _.KRC:$dst, _.RC:$src1, i32u8imm:$src2), 0, "att">;
2794  def : InstAlias<OpcodeStr#_.Suffix#mem#
2795                  "\t{$src2, $src1, $dst {${mask}}|$dst {${mask}}, $src1, $src2}",
2796                  (!cast<Instruction>(NAME#"rrk")
2797                   _.KRC:$dst, _.KRCWM:$mask, _.RC:$src1, i32u8imm:$src2), 0, "att">;
2798  def : InstAlias<OpcodeStr#_.Suffix#mem#
2799                  "\t{$src2, ${src1}"#_.BroadcastStr#", $dst|$dst, ${src1}"#
2800                  _.BroadcastStr#", $src2}",
2801                  (!cast<Instruction>(NAME#"rmb")
2802                   _.KRC:$dst, _.ScalarMemOp:$src1, i32u8imm:$src2), 0, "att">;
2803  def : InstAlias<OpcodeStr#_.Suffix#mem#
2804                  "\t{$src2, ${src1}"#_.BroadcastStr#", $dst {${mask}}|"
2805                  "$dst {${mask}}, ${src1}"#_.BroadcastStr#", $src2}",
2806                  (!cast<Instruction>(NAME#"rmbk")
2807                   _.KRC:$dst, _.KRCWM:$mask, _.ScalarMemOp:$src1, i32u8imm:$src2), 0, "att">;
2808}
2809
2810multiclass avx512_vector_fpclass_all<string OpcodeStr, AVX512VLVectorVTInfo _,
2811                                     bits<8> opc, X86SchedWriteWidths sched,
2812                                     Predicate prd>{
2813  let Predicates = [prd] in {
2814    defm Z    : avx512_vector_fpclass<opc, OpcodeStr, sched.ZMM,
2815                                      _.info512, "z">, EVEX_V512;
2816  }
2817  let Predicates = [prd, HasVLX] in {
2818    defm Z128 : avx512_vector_fpclass<opc, OpcodeStr, sched.XMM,
2819                                      _.info128, "x">, EVEX_V128;
2820    defm Z256 : avx512_vector_fpclass<opc, OpcodeStr, sched.YMM,
2821                                      _.info256, "y">, EVEX_V256;
2822  }
2823}
2824
2825multiclass avx512_fp_fpclass_all<string OpcodeStr, bits<8> opcVec,
2826                                 bits<8> opcScalar, X86SchedWriteWidths sched> {
2827  defm PH : avx512_vector_fpclass_all<OpcodeStr,  avx512vl_f16_info, opcVec,
2828                                      sched, HasFP16>,
2829                                      EVEX_CD8<16, CD8VF>, AVX512PSIi8Base, TA;
2830  defm SHZ : avx512_scalar_fpclass<opcScalar, OpcodeStr,
2831                                   sched.Scl, f16x_info, HasFP16>,
2832                                   EVEX_CD8<16, CD8VT1>, AVX512PSIi8Base, TA;
2833  defm PS : avx512_vector_fpclass_all<OpcodeStr,  avx512vl_f32_info, opcVec,
2834                                      sched, HasDQI>,
2835                                      EVEX_CD8<32, CD8VF>, AVX512AIi8Base;
2836  defm PD : avx512_vector_fpclass_all<OpcodeStr,  avx512vl_f64_info, opcVec,
2837                                      sched, HasDQI>,
2838                                      EVEX_CD8<64, CD8VF>, AVX512AIi8Base, VEX_W;
2839  defm SSZ : avx512_scalar_fpclass<opcScalar, OpcodeStr,
2840                                   sched.Scl, f32x_info, HasDQI>, VEX_LIG,
2841                                   EVEX_CD8<32, CD8VT1>, AVX512AIi8Base;
2842  defm SDZ : avx512_scalar_fpclass<opcScalar, OpcodeStr,
2843                                   sched.Scl, f64x_info, HasDQI>, VEX_LIG,
2844                                   EVEX_CD8<64, CD8VT1>, AVX512AIi8Base, VEX_W;
2845}
2846
2847defm VFPCLASS : avx512_fp_fpclass_all<"vfpclass", 0x66, 0x67, SchedWriteFCmp>, EVEX;
2848
2849//-----------------------------------------------------------------
2850// Mask register copy, including
2851// - copy between mask registers
2852// - load/store mask registers
2853// - copy from GPR to mask register and vice versa
2854//
2855multiclass avx512_mask_mov<bits<8> opc_kk, bits<8> opc_km, bits<8> opc_mk,
2856                         string OpcodeStr, RegisterClass KRC,
2857                         ValueType vvt, X86MemOperand x86memop> {
2858  let isMoveReg = 1, hasSideEffects = 0, SchedRW = [WriteMove] in
2859  def kk : I<opc_kk, MRMSrcReg, (outs KRC:$dst), (ins KRC:$src),
2860             !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), []>,
2861             Sched<[WriteMove]>;
2862  def km : I<opc_km, MRMSrcMem, (outs KRC:$dst), (ins x86memop:$src),
2863             !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
2864             [(set KRC:$dst, (vvt (load addr:$src)))]>,
2865             Sched<[WriteLoad]>;
2866  def mk : I<opc_mk, MRMDestMem, (outs), (ins x86memop:$dst, KRC:$src),
2867             !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
2868             [(store KRC:$src, addr:$dst)]>,
2869             Sched<[WriteStore]>;
2870}
2871
2872multiclass avx512_mask_mov_gpr<bits<8> opc_kr, bits<8> opc_rk,
2873                             string OpcodeStr,
2874                             RegisterClass KRC, RegisterClass GRC> {
2875  let hasSideEffects = 0 in {
2876    def kr : I<opc_kr, MRMSrcReg, (outs KRC:$dst), (ins GRC:$src),
2877               !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), []>,
2878               Sched<[WriteMove]>;
2879    def rk : I<opc_rk, MRMSrcReg, (outs GRC:$dst), (ins KRC:$src),
2880               !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), []>,
2881               Sched<[WriteMove]>;
2882  }
2883}
2884
2885let Predicates = [HasDQI] in
2886  defm KMOVB : avx512_mask_mov<0x90, 0x90, 0x91, "kmovb", VK8, v8i1, i8mem>,
2887               avx512_mask_mov_gpr<0x92, 0x93, "kmovb", VK8, GR32>,
2888               VEX, PD;
2889
2890let Predicates = [HasAVX512] in
2891  defm KMOVW : avx512_mask_mov<0x90, 0x90, 0x91, "kmovw", VK16, v16i1, i16mem>,
2892               avx512_mask_mov_gpr<0x92, 0x93, "kmovw", VK16, GR32>,
2893               VEX, PS;
2894
2895let Predicates = [HasBWI] in {
2896  defm KMOVD : avx512_mask_mov<0x90, 0x90, 0x91, "kmovd", VK32, v32i1,i32mem>,
2897               VEX, PD, VEX_W;
2898  defm KMOVD : avx512_mask_mov_gpr<0x92, 0x93, "kmovd", VK32, GR32>,
2899               VEX, XD;
2900  defm KMOVQ : avx512_mask_mov<0x90, 0x90, 0x91, "kmovq", VK64, v64i1, i64mem>,
2901               VEX, PS, VEX_W;
2902  defm KMOVQ : avx512_mask_mov_gpr<0x92, 0x93, "kmovq", VK64, GR64>,
2903               VEX, XD, VEX_W;
2904}
2905
2906// GR from/to mask register
2907def : Pat<(v16i1 (bitconvert (i16 GR16:$src))),
2908          (COPY_TO_REGCLASS (i32 (INSERT_SUBREG (IMPLICIT_DEF), GR16:$src, sub_16bit)), VK16)>;
2909def : Pat<(i16 (bitconvert (v16i1 VK16:$src))),
2910          (EXTRACT_SUBREG (i32 (COPY_TO_REGCLASS VK16:$src, GR32)), sub_16bit)>;
2911def : Pat<(i8 (trunc (i16 (bitconvert (v16i1 VK16:$src))))),
2912          (EXTRACT_SUBREG (i32 (COPY_TO_REGCLASS VK16:$src, GR32)), sub_8bit)>;
2913
2914def : Pat<(v8i1 (bitconvert (i8 GR8:$src))),
2915          (COPY_TO_REGCLASS (i32 (INSERT_SUBREG (IMPLICIT_DEF), GR8:$src, sub_8bit)), VK8)>;
2916def : Pat<(i8 (bitconvert (v8i1 VK8:$src))),
2917          (EXTRACT_SUBREG (i32 (COPY_TO_REGCLASS VK8:$src, GR32)), sub_8bit)>;
2918
2919def : Pat<(i32 (zext (i16 (bitconvert (v16i1 VK16:$src))))),
2920          (KMOVWrk VK16:$src)>;
2921def : Pat<(i64 (zext (i16 (bitconvert (v16i1 VK16:$src))))),
2922          (SUBREG_TO_REG (i64 0), (KMOVWrk VK16:$src), sub_32bit)>;
2923def : Pat<(i32 (anyext (i16 (bitconvert (v16i1 VK16:$src))))),
2924          (COPY_TO_REGCLASS VK16:$src, GR32)>;
2925def : Pat<(i64 (anyext (i16 (bitconvert (v16i1 VK16:$src))))),
2926          (INSERT_SUBREG (IMPLICIT_DEF), (COPY_TO_REGCLASS VK16:$src, GR32), sub_32bit)>;
2927
2928def : Pat<(i32 (zext (i8 (bitconvert (v8i1 VK8:$src))))),
2929          (KMOVBrk VK8:$src)>, Requires<[HasDQI]>;
2930def : Pat<(i64 (zext (i8 (bitconvert (v8i1 VK8:$src))))),
2931          (SUBREG_TO_REG (i64 0), (KMOVBrk VK8:$src), sub_32bit)>, Requires<[HasDQI]>;
2932def : Pat<(i32 (anyext (i8 (bitconvert (v8i1 VK8:$src))))),
2933          (COPY_TO_REGCLASS VK8:$src, GR32)>;
2934def : Pat<(i64 (anyext (i8 (bitconvert (v8i1 VK8:$src))))),
2935          (INSERT_SUBREG (IMPLICIT_DEF), (COPY_TO_REGCLASS VK8:$src, GR32), sub_32bit)>;
2936
2937def : Pat<(v32i1 (bitconvert (i32 GR32:$src))),
2938          (COPY_TO_REGCLASS GR32:$src, VK32)>;
2939def : Pat<(i32 (bitconvert (v32i1 VK32:$src))),
2940          (COPY_TO_REGCLASS VK32:$src, GR32)>;
2941def : Pat<(v64i1 (bitconvert (i64 GR64:$src))),
2942          (COPY_TO_REGCLASS GR64:$src, VK64)>;
2943def : Pat<(i64 (bitconvert (v64i1 VK64:$src))),
2944          (COPY_TO_REGCLASS VK64:$src, GR64)>;
2945
2946// Load/store kreg
2947let Predicates = [HasDQI] in {
2948  def : Pat<(v1i1 (load addr:$src)),
2949            (COPY_TO_REGCLASS (KMOVBkm addr:$src), VK1)>;
2950  def : Pat<(v2i1 (load addr:$src)),
2951            (COPY_TO_REGCLASS (KMOVBkm addr:$src), VK2)>;
2952  def : Pat<(v4i1 (load addr:$src)),
2953            (COPY_TO_REGCLASS (KMOVBkm addr:$src), VK4)>;
2954}
2955
2956let Predicates = [HasAVX512] in {
2957  def : Pat<(v8i1 (bitconvert (i8 (load addr:$src)))),
2958            (COPY_TO_REGCLASS (MOVZX32rm8 addr:$src), VK8)>;
2959  def : Pat<(v16i1 (bitconvert (loadi16 addr:$src))),
2960            (KMOVWkm addr:$src)>;
2961}
2962
2963def X86kextract : SDNode<"ISD::EXTRACT_VECTOR_ELT",
2964                         SDTypeProfile<1, 2, [SDTCisVT<0, i8>,
2965                                              SDTCVecEltisVT<1, i1>,
2966                                              SDTCisPtrTy<2>]>>;
2967
2968let Predicates = [HasAVX512] in {
2969  multiclass operation_gpr_mask_copy_lowering<RegisterClass maskRC, ValueType maskVT> {
2970    def : Pat<(maskVT (scalar_to_vector GR32:$src)),
2971              (COPY_TO_REGCLASS GR32:$src, maskRC)>;
2972
2973    def : Pat<(maskVT (scalar_to_vector GR8:$src)),
2974              (COPY_TO_REGCLASS (INSERT_SUBREG (i32 (IMPLICIT_DEF)), GR8:$src, sub_8bit), maskRC)>;
2975
2976    def : Pat<(i8 (X86kextract maskRC:$src, (iPTR 0))),
2977              (EXTRACT_SUBREG (i32 (COPY_TO_REGCLASS maskRC:$src, GR32)), sub_8bit)>;
2978
2979    def : Pat<(i32 (anyext (i8 (X86kextract maskRC:$src, (iPTR 0))))),
2980              (i32 (COPY_TO_REGCLASS maskRC:$src, GR32))>;
2981  }
2982
2983  defm : operation_gpr_mask_copy_lowering<VK1,  v1i1>;
2984  defm : operation_gpr_mask_copy_lowering<VK2,  v2i1>;
2985  defm : operation_gpr_mask_copy_lowering<VK4,  v4i1>;
2986  defm : operation_gpr_mask_copy_lowering<VK8,  v8i1>;
2987  defm : operation_gpr_mask_copy_lowering<VK16,  v16i1>;
2988  defm : operation_gpr_mask_copy_lowering<VK32,  v32i1>;
2989  defm : operation_gpr_mask_copy_lowering<VK64,  v64i1>;
2990
2991  def : Pat<(insert_subvector (v16i1 immAllZerosV),
2992                              (v1i1 (scalar_to_vector GR8:$src)), (iPTR 0)),
2993            (KMOVWkr (AND32ri8
2994                      (INSERT_SUBREG (i32 (IMPLICIT_DEF)), GR8:$src, sub_8bit),
2995                      (i32 1)))>;
2996}
2997
2998// Mask unary operation
2999// - KNOT
3000multiclass avx512_mask_unop<bits<8> opc, string OpcodeStr,
3001                            RegisterClass KRC, SDPatternOperator OpNode,
3002                            X86FoldableSchedWrite sched, Predicate prd> {
3003  let Predicates = [prd] in
3004    def rr : I<opc, MRMSrcReg, (outs KRC:$dst), (ins KRC:$src),
3005               !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
3006               [(set KRC:$dst, (OpNode KRC:$src))]>,
3007               Sched<[sched]>;
3008}
3009
3010multiclass avx512_mask_unop_all<bits<8> opc, string OpcodeStr,
3011                                SDPatternOperator OpNode,
3012                                X86FoldableSchedWrite sched> {
3013  defm B : avx512_mask_unop<opc, !strconcat(OpcodeStr, "b"), VK8, OpNode,
3014                            sched, HasDQI>, VEX, PD;
3015  defm W : avx512_mask_unop<opc, !strconcat(OpcodeStr, "w"), VK16, OpNode,
3016                            sched, HasAVX512>, VEX, PS;
3017  defm D : avx512_mask_unop<opc, !strconcat(OpcodeStr, "d"), VK32, OpNode,
3018                            sched, HasBWI>, VEX, PD, VEX_W;
3019  defm Q : avx512_mask_unop<opc, !strconcat(OpcodeStr, "q"), VK64, OpNode,
3020                            sched, HasBWI>, VEX, PS, VEX_W;
3021}
3022
3023// TODO - do we need a X86SchedWriteWidths::KMASK type?
3024defm KNOT : avx512_mask_unop_all<0x44, "knot", vnot, SchedWriteVecLogic.XMM>;
3025
3026// KNL does not support KMOVB, 8-bit mask is promoted to 16-bit
3027let Predicates = [HasAVX512, NoDQI] in
3028def : Pat<(vnot VK8:$src),
3029          (COPY_TO_REGCLASS (KNOTWrr (COPY_TO_REGCLASS VK8:$src, VK16)), VK8)>;
3030
3031def : Pat<(vnot VK4:$src),
3032          (COPY_TO_REGCLASS (KNOTWrr (COPY_TO_REGCLASS VK4:$src, VK16)), VK4)>;
3033def : Pat<(vnot VK2:$src),
3034          (COPY_TO_REGCLASS (KNOTWrr (COPY_TO_REGCLASS VK2:$src, VK16)), VK2)>;
3035def : Pat<(vnot VK1:$src),
3036          (COPY_TO_REGCLASS (KNOTWrr (COPY_TO_REGCLASS VK1:$src, VK16)), VK2)>;
3037
3038// Mask binary operation
3039// - KAND, KANDN, KOR, KXNOR, KXOR
3040multiclass avx512_mask_binop<bits<8> opc, string OpcodeStr,
3041                           RegisterClass KRC, SDPatternOperator OpNode,
3042                           X86FoldableSchedWrite sched, Predicate prd,
3043                           bit IsCommutable> {
3044  let Predicates = [prd], isCommutable = IsCommutable in
3045    def rr : I<opc, MRMSrcReg, (outs KRC:$dst), (ins KRC:$src1, KRC:$src2),
3046               !strconcat(OpcodeStr,
3047                          "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
3048               [(set KRC:$dst, (OpNode KRC:$src1, KRC:$src2))]>,
3049               Sched<[sched]>;
3050}
3051
3052multiclass avx512_mask_binop_all<bits<8> opc, string OpcodeStr,
3053                                 SDPatternOperator OpNode,
3054                                 X86FoldableSchedWrite sched, bit IsCommutable,
3055                                 Predicate prdW = HasAVX512> {
3056  defm B : avx512_mask_binop<opc, !strconcat(OpcodeStr, "b"), VK8, OpNode,
3057                             sched, HasDQI, IsCommutable>, VEX_4V, VEX_L, PD;
3058  defm W : avx512_mask_binop<opc, !strconcat(OpcodeStr, "w"), VK16, OpNode,
3059                             sched, prdW, IsCommutable>, VEX_4V, VEX_L, PS;
3060  defm D : avx512_mask_binop<opc, !strconcat(OpcodeStr, "d"), VK32, OpNode,
3061                             sched, HasBWI, IsCommutable>, VEX_4V, VEX_L, VEX_W, PD;
3062  defm Q : avx512_mask_binop<opc, !strconcat(OpcodeStr, "q"), VK64, OpNode,
3063                             sched, HasBWI, IsCommutable>, VEX_4V, VEX_L, VEX_W, PS;
3064}
3065
3066// These nodes use 'vnot' instead of 'not' to support vectors.
3067def vandn : PatFrag<(ops node:$i0, node:$i1), (and (vnot node:$i0), node:$i1)>;
3068def vxnor : PatFrag<(ops node:$i0, node:$i1), (vnot (xor node:$i0, node:$i1))>;
3069
3070// TODO - do we need a X86SchedWriteWidths::KMASK type?
3071defm KAND  : avx512_mask_binop_all<0x41, "kand",  and,     SchedWriteVecLogic.XMM, 1>;
3072defm KOR   : avx512_mask_binop_all<0x45, "kor",   or,      SchedWriteVecLogic.XMM, 1>;
3073defm KXNOR : avx512_mask_binop_all<0x46, "kxnor", vxnor,   SchedWriteVecLogic.XMM, 1>;
3074defm KXOR  : avx512_mask_binop_all<0x47, "kxor",  xor,     SchedWriteVecLogic.XMM, 1>;
3075defm KANDN : avx512_mask_binop_all<0x42, "kandn", vandn,   SchedWriteVecLogic.XMM, 0>;
3076defm KADD  : avx512_mask_binop_all<0x4A, "kadd",  X86kadd, SchedWriteVecLogic.XMM, 1, HasDQI>;
3077
3078multiclass avx512_binop_pat<SDPatternOperator VOpNode,
3079                            Instruction Inst> {
3080  // With AVX512F, 8-bit mask is promoted to 16-bit mask,
3081  // for the DQI set, this type is legal and KxxxB instruction is used
3082  let Predicates = [NoDQI] in
3083  def : Pat<(VOpNode VK8:$src1, VK8:$src2),
3084            (COPY_TO_REGCLASS
3085              (Inst (COPY_TO_REGCLASS VK8:$src1, VK16),
3086                    (COPY_TO_REGCLASS VK8:$src2, VK16)), VK8)>;
3087
3088  // All types smaller than 8 bits require conversion anyway
3089  def : Pat<(VOpNode VK1:$src1, VK1:$src2),
3090        (COPY_TO_REGCLASS (Inst
3091                           (COPY_TO_REGCLASS VK1:$src1, VK16),
3092                           (COPY_TO_REGCLASS VK1:$src2, VK16)), VK1)>;
3093  def : Pat<(VOpNode VK2:$src1, VK2:$src2),
3094        (COPY_TO_REGCLASS (Inst
3095                           (COPY_TO_REGCLASS VK2:$src1, VK16),
3096                           (COPY_TO_REGCLASS VK2:$src2, VK16)), VK2)>;
3097  def : Pat<(VOpNode VK4:$src1, VK4:$src2),
3098        (COPY_TO_REGCLASS (Inst
3099                           (COPY_TO_REGCLASS VK4:$src1, VK16),
3100                           (COPY_TO_REGCLASS VK4:$src2, VK16)), VK4)>;
3101}
3102
3103defm : avx512_binop_pat<and,   KANDWrr>;
3104defm : avx512_binop_pat<vandn, KANDNWrr>;
3105defm : avx512_binop_pat<or,    KORWrr>;
3106defm : avx512_binop_pat<vxnor, KXNORWrr>;
3107defm : avx512_binop_pat<xor,   KXORWrr>;
3108
3109// Mask unpacking
3110multiclass avx512_mask_unpck<string Suffix, X86KVectorVTInfo Dst,
3111                             X86KVectorVTInfo Src, X86FoldableSchedWrite sched,
3112                             Predicate prd> {
3113  let Predicates = [prd] in {
3114    let hasSideEffects = 0 in
3115    def rr : I<0x4b, MRMSrcReg, (outs Dst.KRC:$dst),
3116               (ins Src.KRC:$src1, Src.KRC:$src2),
3117               "kunpck"#Suffix#"\t{$src2, $src1, $dst|$dst, $src1, $src2}", []>,
3118               VEX_4V, VEX_L, Sched<[sched]>;
3119
3120    def : Pat<(Dst.KVT (concat_vectors Src.KRC:$src1, Src.KRC:$src2)),
3121              (!cast<Instruction>(NAME#rr) Src.KRC:$src2, Src.KRC:$src1)>;
3122  }
3123}
3124
3125defm KUNPCKBW : avx512_mask_unpck<"bw", v16i1_info, v8i1_info,  WriteShuffle, HasAVX512>, PD;
3126defm KUNPCKWD : avx512_mask_unpck<"wd", v32i1_info, v16i1_info, WriteShuffle, HasBWI>, PS;
3127defm KUNPCKDQ : avx512_mask_unpck<"dq", v64i1_info, v32i1_info, WriteShuffle, HasBWI>, PS, VEX_W;
3128
3129// Mask bit testing
3130multiclass avx512_mask_testop<bits<8> opc, string OpcodeStr, RegisterClass KRC,
3131                              SDNode OpNode, X86FoldableSchedWrite sched,
3132                              Predicate prd> {
3133  let Predicates = [prd], Defs = [EFLAGS] in
3134    def rr : I<opc, MRMSrcReg, (outs), (ins KRC:$src1, KRC:$src2),
3135               !strconcat(OpcodeStr, "\t{$src2, $src1|$src1, $src2}"),
3136               [(set EFLAGS, (OpNode KRC:$src1, KRC:$src2))]>,
3137               Sched<[sched]>;
3138}
3139
3140multiclass avx512_mask_testop_w<bits<8> opc, string OpcodeStr, SDNode OpNode,
3141                                X86FoldableSchedWrite sched,
3142                                Predicate prdW = HasAVX512> {
3143  defm B : avx512_mask_testop<opc, OpcodeStr#"b", VK8, OpNode, sched, HasDQI>,
3144                                                                VEX, PD;
3145  defm W : avx512_mask_testop<opc, OpcodeStr#"w", VK16, OpNode, sched, prdW>,
3146                                                                VEX, PS;
3147  defm Q : avx512_mask_testop<opc, OpcodeStr#"q", VK64, OpNode, sched, HasBWI>,
3148                                                                VEX, PS, VEX_W;
3149  defm D : avx512_mask_testop<opc, OpcodeStr#"d", VK32, OpNode, sched, HasBWI>,
3150                                                                VEX, PD, VEX_W;
3151}
3152
3153// TODO - do we need a X86SchedWriteWidths::KMASK type?
3154defm KORTEST : avx512_mask_testop_w<0x98, "kortest", X86kortest, SchedWriteVecLogic.XMM>;
3155defm KTEST   : avx512_mask_testop_w<0x99, "ktest", X86ktest, SchedWriteVecLogic.XMM, HasDQI>;
3156
3157// Mask shift
3158multiclass avx512_mask_shiftop<bits<8> opc, string OpcodeStr, RegisterClass KRC,
3159                               SDNode OpNode, X86FoldableSchedWrite sched> {
3160  let Predicates = [HasAVX512] in
3161    def ri : Ii8<opc, MRMSrcReg, (outs KRC:$dst), (ins KRC:$src, u8imm:$imm),
3162                 !strconcat(OpcodeStr,
3163                            "\t{$imm, $src, $dst|$dst, $src, $imm}"),
3164                            [(set KRC:$dst, (OpNode KRC:$src, (i8 timm:$imm)))]>,
3165                 Sched<[sched]>;
3166}
3167
3168multiclass avx512_mask_shiftop_w<bits<8> opc1, bits<8> opc2, string OpcodeStr,
3169                                 SDNode OpNode, X86FoldableSchedWrite sched> {
3170  defm W : avx512_mask_shiftop<opc1, !strconcat(OpcodeStr, "w"), VK16, OpNode,
3171                               sched>, VEX, TAPD, VEX_W;
3172  let Predicates = [HasDQI] in
3173  defm B : avx512_mask_shiftop<opc1, !strconcat(OpcodeStr, "b"), VK8, OpNode,
3174                               sched>, VEX, TAPD;
3175  let Predicates = [HasBWI] in {
3176  defm Q : avx512_mask_shiftop<opc2, !strconcat(OpcodeStr, "q"), VK64, OpNode,
3177                               sched>, VEX, TAPD, VEX_W;
3178  defm D : avx512_mask_shiftop<opc2, !strconcat(OpcodeStr, "d"), VK32, OpNode,
3179                               sched>, VEX, TAPD;
3180  }
3181}
3182
3183defm KSHIFTL : avx512_mask_shiftop_w<0x32, 0x33, "kshiftl", X86kshiftl, WriteShuffle>;
3184defm KSHIFTR : avx512_mask_shiftop_w<0x30, 0x31, "kshiftr", X86kshiftr, WriteShuffle>;
3185
3186// Patterns for comparing 128/256-bit integer vectors using 512-bit instruction.
3187multiclass axv512_icmp_packed_cc_no_vlx_lowering<PatFrag Frag, PatFrag Frag_su,
3188                                                 string InstStr,
3189                                                 X86VectorVTInfo Narrow,
3190                                                 X86VectorVTInfo Wide> {
3191def : Pat<(Narrow.KVT (Frag:$cc (Narrow.VT Narrow.RC:$src1),
3192                                (Narrow.VT Narrow.RC:$src2), cond)),
3193          (COPY_TO_REGCLASS
3194           (!cast<Instruction>(InstStr#"Zrri")
3195            (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src1, Narrow.SubRegIdx)),
3196            (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src2, Narrow.SubRegIdx)),
3197            (X86pcmpm_imm $cc)), Narrow.KRC)>;
3198
3199def : Pat<(Narrow.KVT (and Narrow.KRC:$mask,
3200                           (Narrow.KVT (Frag_su:$cc (Narrow.VT Narrow.RC:$src1),
3201                                                    (Narrow.VT Narrow.RC:$src2),
3202                                                    cond)))),
3203          (COPY_TO_REGCLASS (!cast<Instruction>(InstStr#"Zrrik")
3204           (COPY_TO_REGCLASS Narrow.KRC:$mask, Wide.KRC),
3205           (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src1, Narrow.SubRegIdx)),
3206           (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src2, Narrow.SubRegIdx)),
3207           (X86pcmpm_imm $cc)), Narrow.KRC)>;
3208}
3209
3210multiclass axv512_icmp_packed_cc_rmb_no_vlx_lowering<PatFrag Frag, PatFrag Frag_su,
3211                                                     string InstStr,
3212                                                     X86VectorVTInfo Narrow,
3213                                                     X86VectorVTInfo Wide> {
3214// Broadcast load.
3215def : Pat<(Narrow.KVT (Frag:$cc (Narrow.VT Narrow.RC:$src1),
3216                                (Narrow.BroadcastLdFrag addr:$src2), cond)),
3217          (COPY_TO_REGCLASS
3218           (!cast<Instruction>(InstStr#"Zrmib")
3219            (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src1, Narrow.SubRegIdx)),
3220            addr:$src2, (X86pcmpm_imm $cc)), Narrow.KRC)>;
3221
3222def : Pat<(Narrow.KVT (and Narrow.KRC:$mask,
3223                           (Narrow.KVT
3224                            (Frag_su:$cc (Narrow.VT Narrow.RC:$src1),
3225                                         (Narrow.BroadcastLdFrag addr:$src2),
3226                                         cond)))),
3227          (COPY_TO_REGCLASS (!cast<Instruction>(InstStr#"Zrmibk")
3228           (COPY_TO_REGCLASS Narrow.KRC:$mask, Wide.KRC),
3229           (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src1, Narrow.SubRegIdx)),
3230           addr:$src2, (X86pcmpm_imm $cc)), Narrow.KRC)>;
3231
3232// Commuted with broadcast load.
3233def : Pat<(Narrow.KVT (Frag:$cc (Narrow.BroadcastLdFrag addr:$src2),
3234                                (Narrow.VT Narrow.RC:$src1),
3235                                cond)),
3236          (COPY_TO_REGCLASS
3237           (!cast<Instruction>(InstStr#"Zrmib")
3238            (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src1, Narrow.SubRegIdx)),
3239            addr:$src2, (X86pcmpm_imm_commute $cc)), Narrow.KRC)>;
3240
3241def : Pat<(Narrow.KVT (and Narrow.KRC:$mask,
3242                           (Narrow.KVT
3243                            (Frag_su:$cc (Narrow.BroadcastLdFrag addr:$src2),
3244                                         (Narrow.VT Narrow.RC:$src1),
3245                                         cond)))),
3246          (COPY_TO_REGCLASS (!cast<Instruction>(InstStr#"Zrmibk")
3247           (COPY_TO_REGCLASS Narrow.KRC:$mask, Wide.KRC),
3248           (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src1, Narrow.SubRegIdx)),
3249           addr:$src2, (X86pcmpm_imm_commute $cc)), Narrow.KRC)>;
3250}
3251
3252// Same as above, but for fp types which don't use PatFrags.
3253multiclass axv512_cmp_packed_cc_no_vlx_lowering<string InstStr,
3254                                                X86VectorVTInfo Narrow,
3255                                                X86VectorVTInfo Wide> {
3256def : Pat<(Narrow.KVT (X86cmpm (Narrow.VT Narrow.RC:$src1),
3257                               (Narrow.VT Narrow.RC:$src2), timm:$cc)),
3258          (COPY_TO_REGCLASS
3259           (!cast<Instruction>(InstStr#"Zrri")
3260            (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src1, Narrow.SubRegIdx)),
3261            (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src2, Narrow.SubRegIdx)),
3262            timm:$cc), Narrow.KRC)>;
3263
3264def : Pat<(Narrow.KVT (and Narrow.KRC:$mask,
3265                           (X86cmpm_su (Narrow.VT Narrow.RC:$src1),
3266                                       (Narrow.VT Narrow.RC:$src2), timm:$cc))),
3267          (COPY_TO_REGCLASS (!cast<Instruction>(InstStr#"Zrrik")
3268           (COPY_TO_REGCLASS Narrow.KRC:$mask, Wide.KRC),
3269           (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src1, Narrow.SubRegIdx)),
3270           (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src2, Narrow.SubRegIdx)),
3271           timm:$cc), Narrow.KRC)>;
3272
3273// Broadcast load.
3274def : Pat<(Narrow.KVT (X86cmpm (Narrow.VT Narrow.RC:$src1),
3275                               (Narrow.VT (Narrow.BroadcastLdFrag addr:$src2)), timm:$cc)),
3276          (COPY_TO_REGCLASS
3277           (!cast<Instruction>(InstStr#"Zrmbi")
3278            (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src1, Narrow.SubRegIdx)),
3279            addr:$src2, timm:$cc), Narrow.KRC)>;
3280
3281def : Pat<(Narrow.KVT (and Narrow.KRC:$mask,
3282                           (X86cmpm_su (Narrow.VT Narrow.RC:$src1),
3283                                       (Narrow.VT (Narrow.BroadcastLdFrag addr:$src2)), timm:$cc))),
3284          (COPY_TO_REGCLASS (!cast<Instruction>(InstStr#"Zrmbik")
3285           (COPY_TO_REGCLASS Narrow.KRC:$mask, Wide.KRC),
3286           (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src1, Narrow.SubRegIdx)),
3287           addr:$src2, timm:$cc), Narrow.KRC)>;
3288
3289// Commuted with broadcast load.
3290def : Pat<(Narrow.KVT (X86cmpm (Narrow.VT (Narrow.BroadcastLdFrag addr:$src2)),
3291                               (Narrow.VT Narrow.RC:$src1), timm:$cc)),
3292          (COPY_TO_REGCLASS
3293           (!cast<Instruction>(InstStr#"Zrmbi")
3294            (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src1, Narrow.SubRegIdx)),
3295            addr:$src2, (X86cmpm_imm_commute timm:$cc)), Narrow.KRC)>;
3296
3297def : Pat<(Narrow.KVT (and Narrow.KRC:$mask,
3298                           (X86cmpm_su (Narrow.VT (Narrow.BroadcastLdFrag addr:$src2)),
3299                                       (Narrow.VT Narrow.RC:$src1), timm:$cc))),
3300          (COPY_TO_REGCLASS (!cast<Instruction>(InstStr#"Zrmbik")
3301           (COPY_TO_REGCLASS Narrow.KRC:$mask, Wide.KRC),
3302           (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src1, Narrow.SubRegIdx)),
3303           addr:$src2, (X86cmpm_imm_commute timm:$cc)), Narrow.KRC)>;
3304}
3305
3306let Predicates = [HasAVX512, NoVLX] in {
3307  defm : axv512_icmp_packed_cc_no_vlx_lowering<X86pcmpm, X86pcmpm_su, "VPCMPD", v8i32x_info, v16i32_info>;
3308  defm : axv512_icmp_packed_cc_no_vlx_lowering<X86pcmpum, X86pcmpum_su, "VPCMPUD", v8i32x_info, v16i32_info>;
3309
3310  defm : axv512_icmp_packed_cc_no_vlx_lowering<X86pcmpm, X86pcmpm_su, "VPCMPD", v4i32x_info, v16i32_info>;
3311  defm : axv512_icmp_packed_cc_no_vlx_lowering<X86pcmpum, X86pcmpum_su, "VPCMPUD", v4i32x_info, v16i32_info>;
3312
3313  defm : axv512_icmp_packed_cc_no_vlx_lowering<X86pcmpm, X86pcmpm_su, "VPCMPQ", v4i64x_info, v8i64_info>;
3314  defm : axv512_icmp_packed_cc_no_vlx_lowering<X86pcmpum, X86pcmpum_su, "VPCMPUQ", v4i64x_info, v8i64_info>;
3315
3316  defm : axv512_icmp_packed_cc_no_vlx_lowering<X86pcmpm, X86pcmpm_su, "VPCMPQ", v2i64x_info, v8i64_info>;
3317  defm : axv512_icmp_packed_cc_no_vlx_lowering<X86pcmpum, X86pcmpum_su, "VPCMPUQ", v2i64x_info, v8i64_info>;
3318
3319  defm : axv512_icmp_packed_cc_rmb_no_vlx_lowering<X86pcmpm, X86pcmpm_su, "VPCMPD", v8i32x_info, v16i32_info>;
3320  defm : axv512_icmp_packed_cc_rmb_no_vlx_lowering<X86pcmpum, X86pcmpum_su, "VPCMPUD", v8i32x_info, v16i32_info>;
3321
3322  defm : axv512_icmp_packed_cc_rmb_no_vlx_lowering<X86pcmpm, X86pcmpm_su, "VPCMPD", v4i32x_info, v16i32_info>;
3323  defm : axv512_icmp_packed_cc_rmb_no_vlx_lowering<X86pcmpum, X86pcmpum_su, "VPCMPUD", v4i32x_info, v16i32_info>;
3324
3325  defm : axv512_icmp_packed_cc_rmb_no_vlx_lowering<X86pcmpm, X86pcmpm_su, "VPCMPQ", v4i64x_info, v8i64_info>;
3326  defm : axv512_icmp_packed_cc_rmb_no_vlx_lowering<X86pcmpum, X86pcmpum_su, "VPCMPUQ", v4i64x_info, v8i64_info>;
3327
3328  defm : axv512_icmp_packed_cc_rmb_no_vlx_lowering<X86pcmpm, X86pcmpm_su, "VPCMPQ", v2i64x_info, v8i64_info>;
3329  defm : axv512_icmp_packed_cc_rmb_no_vlx_lowering<X86pcmpum, X86pcmpum_su, "VPCMPUQ", v2i64x_info, v8i64_info>;
3330
3331  defm : axv512_cmp_packed_cc_no_vlx_lowering<"VCMPPS", v8f32x_info, v16f32_info>;
3332  defm : axv512_cmp_packed_cc_no_vlx_lowering<"VCMPPS", v4f32x_info, v16f32_info>;
3333  defm : axv512_cmp_packed_cc_no_vlx_lowering<"VCMPPD", v4f64x_info, v8f64_info>;
3334  defm : axv512_cmp_packed_cc_no_vlx_lowering<"VCMPPD", v2f64x_info, v8f64_info>;
3335}
3336
3337let Predicates = [HasBWI, NoVLX] in {
3338  defm : axv512_icmp_packed_cc_no_vlx_lowering<X86pcmpm, X86pcmpm_su, "VPCMPB", v32i8x_info, v64i8_info>;
3339  defm : axv512_icmp_packed_cc_no_vlx_lowering<X86pcmpum, X86pcmpum_su, "VPCMPUB", v32i8x_info, v64i8_info>;
3340
3341  defm : axv512_icmp_packed_cc_no_vlx_lowering<X86pcmpm, X86pcmpm_su, "VPCMPB", v16i8x_info, v64i8_info>;
3342  defm : axv512_icmp_packed_cc_no_vlx_lowering<X86pcmpum, X86pcmpum_su, "VPCMPUB", v16i8x_info, v64i8_info>;
3343
3344  defm : axv512_icmp_packed_cc_no_vlx_lowering<X86pcmpm, X86pcmpm_su, "VPCMPW", v16i16x_info, v32i16_info>;
3345  defm : axv512_icmp_packed_cc_no_vlx_lowering<X86pcmpum, X86pcmpum_su, "VPCMPUW", v16i16x_info, v32i16_info>;
3346
3347  defm : axv512_icmp_packed_cc_no_vlx_lowering<X86pcmpm, X86pcmpm_su, "VPCMPW", v8i16x_info, v32i16_info>;
3348  defm : axv512_icmp_packed_cc_no_vlx_lowering<X86pcmpum, X86pcmpum_su, "VPCMPUW", v8i16x_info, v32i16_info>;
3349}
3350
3351// Mask setting all 0s or 1s
3352multiclass avx512_mask_setop<RegisterClass KRC, ValueType VT, SDPatternOperator Val> {
3353  let Predicates = [HasAVX512] in
3354    let isReMaterializable = 1, isAsCheapAsAMove = 1, isPseudo = 1,
3355        SchedRW = [WriteZero] in
3356      def NAME# : I<0, Pseudo, (outs KRC:$dst), (ins), "",
3357                     [(set KRC:$dst, (VT Val))]>;
3358}
3359
3360multiclass avx512_mask_setop_w<SDPatternOperator Val> {
3361  defm W : avx512_mask_setop<VK16, v16i1, Val>;
3362  defm D : avx512_mask_setop<VK32,  v32i1, Val>;
3363  defm Q : avx512_mask_setop<VK64, v64i1, Val>;
3364}
3365
3366defm KSET0 : avx512_mask_setop_w<immAllZerosV>;
3367defm KSET1 : avx512_mask_setop_w<immAllOnesV>;
3368
3369// With AVX-512 only, 8-bit mask is promoted to 16-bit mask.
3370let Predicates = [HasAVX512] in {
3371  def : Pat<(v8i1 immAllZerosV), (COPY_TO_REGCLASS (KSET0W), VK8)>;
3372  def : Pat<(v4i1 immAllZerosV), (COPY_TO_REGCLASS (KSET0W), VK4)>;
3373  def : Pat<(v2i1 immAllZerosV), (COPY_TO_REGCLASS (KSET0W), VK2)>;
3374  def : Pat<(v1i1 immAllZerosV), (COPY_TO_REGCLASS (KSET0W), VK1)>;
3375  def : Pat<(v8i1 immAllOnesV),  (COPY_TO_REGCLASS (KSET1W), VK8)>;
3376  def : Pat<(v4i1 immAllOnesV),  (COPY_TO_REGCLASS (KSET1W), VK4)>;
3377  def : Pat<(v2i1 immAllOnesV),  (COPY_TO_REGCLASS (KSET1W), VK2)>;
3378  def : Pat<(v1i1 immAllOnesV),  (COPY_TO_REGCLASS (KSET1W), VK1)>;
3379}
3380
3381// Patterns for kmask insert_subvector/extract_subvector to/from index=0
3382multiclass operation_subvector_mask_lowering<RegisterClass subRC, ValueType subVT,
3383                                             RegisterClass RC, ValueType VT> {
3384  def : Pat<(subVT (extract_subvector (VT RC:$src), (iPTR 0))),
3385            (subVT (COPY_TO_REGCLASS RC:$src, subRC))>;
3386
3387  def : Pat<(VT (insert_subvector undef, subRC:$src, (iPTR 0))),
3388            (VT (COPY_TO_REGCLASS subRC:$src, RC))>;
3389}
3390defm : operation_subvector_mask_lowering<VK1,  v1i1,  VK2,  v2i1>;
3391defm : operation_subvector_mask_lowering<VK1,  v1i1,  VK4,  v4i1>;
3392defm : operation_subvector_mask_lowering<VK1,  v1i1,  VK8,  v8i1>;
3393defm : operation_subvector_mask_lowering<VK1,  v1i1,  VK16, v16i1>;
3394defm : operation_subvector_mask_lowering<VK1,  v1i1,  VK32, v32i1>;
3395defm : operation_subvector_mask_lowering<VK1,  v1i1,  VK64, v64i1>;
3396
3397defm : operation_subvector_mask_lowering<VK2,  v2i1,  VK4,  v4i1>;
3398defm : operation_subvector_mask_lowering<VK2,  v2i1,  VK8,  v8i1>;
3399defm : operation_subvector_mask_lowering<VK2,  v2i1,  VK16, v16i1>;
3400defm : operation_subvector_mask_lowering<VK2,  v2i1,  VK32, v32i1>;
3401defm : operation_subvector_mask_lowering<VK2,  v2i1,  VK64, v64i1>;
3402
3403defm : operation_subvector_mask_lowering<VK4,  v4i1,  VK8,  v8i1>;
3404defm : operation_subvector_mask_lowering<VK4,  v4i1,  VK16, v16i1>;
3405defm : operation_subvector_mask_lowering<VK4,  v4i1,  VK32, v32i1>;
3406defm : operation_subvector_mask_lowering<VK4,  v4i1,  VK64, v64i1>;
3407
3408defm : operation_subvector_mask_lowering<VK8,  v8i1,  VK16, v16i1>;
3409defm : operation_subvector_mask_lowering<VK8,  v8i1,  VK32, v32i1>;
3410defm : operation_subvector_mask_lowering<VK8,  v8i1,  VK64, v64i1>;
3411
3412defm : operation_subvector_mask_lowering<VK16, v16i1, VK32, v32i1>;
3413defm : operation_subvector_mask_lowering<VK16, v16i1, VK64, v64i1>;
3414
3415defm : operation_subvector_mask_lowering<VK32, v32i1, VK64, v64i1>;
3416
3417//===----------------------------------------------------------------------===//
3418// AVX-512 - Aligned and unaligned load and store
3419//
3420
3421multiclass avx512_load<bits<8> opc, string OpcodeStr, string Name,
3422                       X86VectorVTInfo _, PatFrag ld_frag, PatFrag mload,
3423                       X86SchedWriteMoveLS Sched, string EVEX2VEXOvrd,
3424                       bit NoRMPattern = 0,
3425                       SDPatternOperator SelectOprr = vselect> {
3426  let hasSideEffects = 0 in {
3427  let isMoveReg = 1 in
3428  def rr : AVX512PI<opc, MRMSrcReg, (outs _.RC:$dst), (ins _.RC:$src),
3429                    !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), [],
3430                    _.ExeDomain>, EVEX, Sched<[Sched.RR]>,
3431                    EVEX2VEXOverride<EVEX2VEXOvrd#"rr">;
3432  def rrkz : AVX512PI<opc, MRMSrcReg, (outs _.RC:$dst),
3433                      (ins _.KRCWM:$mask,  _.RC:$src),
3434                      !strconcat(OpcodeStr, "\t{$src, ${dst} {${mask}} {z}|",
3435                       "${dst} {${mask}} {z}, $src}"),
3436                       [(set _.RC:$dst, (_.VT (SelectOprr _.KRCWM:$mask,
3437                                           (_.VT _.RC:$src),
3438                                           _.ImmAllZerosV)))], _.ExeDomain>,
3439                       EVEX, EVEX_KZ, Sched<[Sched.RR]>;
3440
3441  let mayLoad = 1, canFoldAsLoad = 1, isReMaterializable = 1 in
3442  def rm : AVX512PI<opc, MRMSrcMem, (outs _.RC:$dst), (ins _.MemOp:$src),
3443                    !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
3444                    !if(NoRMPattern, [],
3445                        [(set _.RC:$dst,
3446                          (_.VT (ld_frag addr:$src)))]),
3447                    _.ExeDomain>, EVEX, Sched<[Sched.RM]>,
3448                    EVEX2VEXOverride<EVEX2VEXOvrd#"rm">;
3449
3450  let Constraints = "$src0 = $dst", isConvertibleToThreeAddress = 1 in {
3451    def rrk : AVX512PI<opc, MRMSrcReg, (outs _.RC:$dst),
3452                      (ins _.RC:$src0, _.KRCWM:$mask, _.RC:$src1),
3453                      !strconcat(OpcodeStr, "\t{$src1, ${dst} {${mask}}|",
3454                      "${dst} {${mask}}, $src1}"),
3455                      [(set _.RC:$dst, (_.VT (SelectOprr _.KRCWM:$mask,
3456                                          (_.VT _.RC:$src1),
3457                                          (_.VT _.RC:$src0))))], _.ExeDomain>,
3458                       EVEX, EVEX_K, Sched<[Sched.RR]>;
3459    def rmk : AVX512PI<opc, MRMSrcMem, (outs _.RC:$dst),
3460                     (ins _.RC:$src0, _.KRCWM:$mask, _.MemOp:$src1),
3461                     !strconcat(OpcodeStr, "\t{$src1, ${dst} {${mask}}|",
3462                      "${dst} {${mask}}, $src1}"),
3463                     [(set _.RC:$dst, (_.VT
3464                         (vselect_mask _.KRCWM:$mask,
3465                          (_.VT (ld_frag addr:$src1)),
3466                           (_.VT _.RC:$src0))))], _.ExeDomain>,
3467                     EVEX, EVEX_K, Sched<[Sched.RM]>;
3468  }
3469  def rmkz : AVX512PI<opc, MRMSrcMem, (outs _.RC:$dst),
3470                  (ins _.KRCWM:$mask, _.MemOp:$src),
3471                  OpcodeStr #"\t{$src, ${dst} {${mask}} {z}|"#
3472                                "${dst} {${mask}} {z}, $src}",
3473                  [(set _.RC:$dst, (_.VT (vselect_mask _.KRCWM:$mask,
3474                    (_.VT (ld_frag addr:$src)), _.ImmAllZerosV)))],
3475                  _.ExeDomain>, EVEX, EVEX_KZ, Sched<[Sched.RM]>;
3476  }
3477  def : Pat<(_.VT (mload addr:$ptr, _.KRCWM:$mask, undef)),
3478            (!cast<Instruction>(Name#_.ZSuffix#rmkz) _.KRCWM:$mask, addr:$ptr)>;
3479
3480  def : Pat<(_.VT (mload addr:$ptr, _.KRCWM:$mask, _.ImmAllZerosV)),
3481            (!cast<Instruction>(Name#_.ZSuffix#rmkz) _.KRCWM:$mask, addr:$ptr)>;
3482
3483  def : Pat<(_.VT (mload addr:$ptr, _.KRCWM:$mask, (_.VT _.RC:$src0))),
3484            (!cast<Instruction>(Name#_.ZSuffix#rmk) _.RC:$src0,
3485             _.KRCWM:$mask, addr:$ptr)>;
3486}
3487
3488multiclass avx512_alignedload_vl<bits<8> opc, string OpcodeStr,
3489                                 AVX512VLVectorVTInfo _, Predicate prd,
3490                                 X86SchedWriteMoveLSWidths Sched,
3491                                 string EVEX2VEXOvrd, bit NoRMPattern = 0> {
3492  let Predicates = [prd] in
3493  defm Z : avx512_load<opc, OpcodeStr, NAME, _.info512,
3494                       _.info512.AlignedLdFrag, masked_load_aligned,
3495                       Sched.ZMM, "", NoRMPattern>, EVEX_V512;
3496
3497  let Predicates = [prd, HasVLX] in {
3498  defm Z256 : avx512_load<opc, OpcodeStr, NAME, _.info256,
3499                          _.info256.AlignedLdFrag, masked_load_aligned,
3500                          Sched.YMM, EVEX2VEXOvrd#"Y", NoRMPattern>, EVEX_V256;
3501  defm Z128 : avx512_load<opc, OpcodeStr, NAME, _.info128,
3502                          _.info128.AlignedLdFrag, masked_load_aligned,
3503                          Sched.XMM, EVEX2VEXOvrd, NoRMPattern>, EVEX_V128;
3504  }
3505}
3506
3507multiclass avx512_load_vl<bits<8> opc, string OpcodeStr,
3508                          AVX512VLVectorVTInfo _, Predicate prd,
3509                          X86SchedWriteMoveLSWidths Sched,
3510                          string EVEX2VEXOvrd, bit NoRMPattern = 0,
3511                          SDPatternOperator SelectOprr = vselect> {
3512  let Predicates = [prd] in
3513  defm Z : avx512_load<opc, OpcodeStr, NAME, _.info512, _.info512.LdFrag,
3514                       masked_load, Sched.ZMM, "",
3515                       NoRMPattern, SelectOprr>, EVEX_V512;
3516
3517  let Predicates = [prd, HasVLX] in {
3518  defm Z256 : avx512_load<opc, OpcodeStr, NAME, _.info256, _.info256.LdFrag,
3519                         masked_load, Sched.YMM, EVEX2VEXOvrd#"Y",
3520                         NoRMPattern, SelectOprr>, EVEX_V256;
3521  defm Z128 : avx512_load<opc, OpcodeStr, NAME, _.info128, _.info128.LdFrag,
3522                         masked_load, Sched.XMM, EVEX2VEXOvrd,
3523                         NoRMPattern, SelectOprr>, EVEX_V128;
3524  }
3525}
3526
3527multiclass avx512_store<bits<8> opc, string OpcodeStr, string BaseName,
3528                        X86VectorVTInfo _, PatFrag st_frag, PatFrag mstore,
3529                        X86SchedWriteMoveLS Sched, string EVEX2VEXOvrd,
3530                        bit NoMRPattern = 0> {
3531  let hasSideEffects = 0, isCodeGenOnly = 1, ForceDisassemble = 1 in {
3532  let isMoveReg = 1 in
3533  def rr_REV  : AVX512PI<opc, MRMDestReg, (outs _.RC:$dst), (ins _.RC:$src),
3534                         OpcodeStr # "\t{$src, $dst|$dst, $src}",
3535                         [], _.ExeDomain>, EVEX,
3536                         FoldGenData<BaseName#_.ZSuffix#rr>, Sched<[Sched.RR]>,
3537                         EVEX2VEXOverride<EVEX2VEXOvrd#"rr_REV">;
3538  def rrk_REV : AVX512PI<opc, MRMDestReg, (outs  _.RC:$dst),
3539                         (ins _.KRCWM:$mask, _.RC:$src),
3540                         OpcodeStr # "\t{$src, ${dst} {${mask}}|"#
3541                         "${dst} {${mask}}, $src}",
3542                         [], _.ExeDomain>,  EVEX, EVEX_K,
3543                         FoldGenData<BaseName#_.ZSuffix#rrk>,
3544                         Sched<[Sched.RR]>;
3545  def rrkz_REV : AVX512PI<opc, MRMDestReg, (outs  _.RC:$dst),
3546                          (ins _.KRCWM:$mask, _.RC:$src),
3547                          OpcodeStr # "\t{$src, ${dst} {${mask}} {z}|" #
3548                          "${dst} {${mask}} {z}, $src}",
3549                          [], _.ExeDomain>, EVEX, EVEX_KZ,
3550                          FoldGenData<BaseName#_.ZSuffix#rrkz>,
3551                          Sched<[Sched.RR]>;
3552  }
3553
3554  let hasSideEffects = 0, mayStore = 1 in
3555  def mr : AVX512PI<opc, MRMDestMem, (outs), (ins _.MemOp:$dst, _.RC:$src),
3556                    !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
3557                    !if(NoMRPattern, [],
3558                        [(st_frag (_.VT _.RC:$src), addr:$dst)]),
3559                    _.ExeDomain>, EVEX, Sched<[Sched.MR]>,
3560                    EVEX2VEXOverride<EVEX2VEXOvrd#"mr">;
3561  def mrk : AVX512PI<opc, MRMDestMem, (outs),
3562                     (ins _.MemOp:$dst, _.KRCWM:$mask, _.RC:$src),
3563              OpcodeStr # "\t{$src, ${dst} {${mask}}|${dst} {${mask}}, $src}",
3564               [], _.ExeDomain>, EVEX, EVEX_K, Sched<[Sched.MR]>,
3565               NotMemoryFoldable;
3566
3567  def: Pat<(mstore (_.VT _.RC:$src), addr:$ptr, _.KRCWM:$mask),
3568           (!cast<Instruction>(BaseName#_.ZSuffix#mrk) addr:$ptr,
3569                                                        _.KRCWM:$mask, _.RC:$src)>;
3570
3571  def : InstAlias<OpcodeStr#".s\t{$src, $dst|$dst, $src}",
3572                  (!cast<Instruction>(BaseName#_.ZSuffix#"rr_REV")
3573                   _.RC:$dst, _.RC:$src), 0>;
3574  def : InstAlias<OpcodeStr#".s\t{$src, ${dst} {${mask}}|${dst} {${mask}}, $src}",
3575                  (!cast<Instruction>(BaseName#_.ZSuffix#"rrk_REV")
3576                   _.RC:$dst, _.KRCWM:$mask, _.RC:$src), 0>;
3577  def : InstAlias<OpcodeStr#".s\t{$src, ${dst} {${mask}} {z}|${dst} {${mask}} {z}, $src}",
3578                  (!cast<Instruction>(BaseName#_.ZSuffix#"rrkz_REV")
3579                   _.RC:$dst, _.KRCWM:$mask, _.RC:$src), 0>;
3580}
3581
3582multiclass avx512_store_vl< bits<8> opc, string OpcodeStr,
3583                            AVX512VLVectorVTInfo _, Predicate prd,
3584                            X86SchedWriteMoveLSWidths Sched,
3585                            string EVEX2VEXOvrd, bit NoMRPattern = 0> {
3586  let Predicates = [prd] in
3587  defm Z : avx512_store<opc, OpcodeStr, NAME, _.info512, store,
3588                        masked_store, Sched.ZMM, "",
3589                        NoMRPattern>, EVEX_V512;
3590  let Predicates = [prd, HasVLX] in {
3591    defm Z256 : avx512_store<opc, OpcodeStr, NAME, _.info256, store,
3592                             masked_store, Sched.YMM,
3593                             EVEX2VEXOvrd#"Y", NoMRPattern>, EVEX_V256;
3594    defm Z128 : avx512_store<opc, OpcodeStr, NAME, _.info128, store,
3595                             masked_store, Sched.XMM, EVEX2VEXOvrd,
3596                             NoMRPattern>, EVEX_V128;
3597  }
3598}
3599
3600multiclass avx512_alignedstore_vl<bits<8> opc, string OpcodeStr,
3601                                  AVX512VLVectorVTInfo _, Predicate prd,
3602                                  X86SchedWriteMoveLSWidths Sched,
3603                                  string EVEX2VEXOvrd, bit NoMRPattern = 0> {
3604  let Predicates = [prd] in
3605  defm Z : avx512_store<opc, OpcodeStr, NAME, _.info512, alignedstore,
3606                        masked_store_aligned, Sched.ZMM, "",
3607                        NoMRPattern>, EVEX_V512;
3608
3609  let Predicates = [prd, HasVLX] in {
3610    defm Z256 : avx512_store<opc, OpcodeStr, NAME, _.info256, alignedstore,
3611                             masked_store_aligned, Sched.YMM,
3612                             EVEX2VEXOvrd#"Y", NoMRPattern>, EVEX_V256;
3613    defm Z128 : avx512_store<opc, OpcodeStr, NAME, _.info128, alignedstore,
3614                             masked_store_aligned, Sched.XMM, EVEX2VEXOvrd,
3615                             NoMRPattern>, EVEX_V128;
3616  }
3617}
3618
3619defm VMOVAPS : avx512_alignedload_vl<0x28, "vmovaps", avx512vl_f32_info,
3620                                     HasAVX512, SchedWriteFMoveLS, "VMOVAPS">,
3621               avx512_alignedstore_vl<0x29, "vmovaps", avx512vl_f32_info,
3622                                      HasAVX512, SchedWriteFMoveLS, "VMOVAPS">,
3623               PS, EVEX_CD8<32, CD8VF>;
3624
3625defm VMOVAPD : avx512_alignedload_vl<0x28, "vmovapd", avx512vl_f64_info,
3626                                     HasAVX512, SchedWriteFMoveLS, "VMOVAPD">,
3627               avx512_alignedstore_vl<0x29, "vmovapd", avx512vl_f64_info,
3628                                      HasAVX512, SchedWriteFMoveLS, "VMOVAPD">,
3629               PD, VEX_W, EVEX_CD8<64, CD8VF>;
3630
3631defm VMOVUPS : avx512_load_vl<0x10, "vmovups", avx512vl_f32_info, HasAVX512,
3632                              SchedWriteFMoveLS, "VMOVUPS", 0, null_frag>,
3633               avx512_store_vl<0x11, "vmovups", avx512vl_f32_info, HasAVX512,
3634                               SchedWriteFMoveLS, "VMOVUPS">,
3635                               PS, EVEX_CD8<32, CD8VF>;
3636
3637defm VMOVUPD : avx512_load_vl<0x10, "vmovupd", avx512vl_f64_info, HasAVX512,
3638                              SchedWriteFMoveLS, "VMOVUPD", 0, null_frag>,
3639               avx512_store_vl<0x11, "vmovupd", avx512vl_f64_info, HasAVX512,
3640                               SchedWriteFMoveLS, "VMOVUPD">,
3641               PD, VEX_W, EVEX_CD8<64, CD8VF>;
3642
3643defm VMOVDQA32 : avx512_alignedload_vl<0x6F, "vmovdqa32", avx512vl_i32_info,
3644                                       HasAVX512, SchedWriteVecMoveLS,
3645                                       "VMOVDQA", 1>,
3646                 avx512_alignedstore_vl<0x7F, "vmovdqa32", avx512vl_i32_info,
3647                                        HasAVX512, SchedWriteVecMoveLS,
3648                                        "VMOVDQA", 1>,
3649                 PD, EVEX_CD8<32, CD8VF>;
3650
3651defm VMOVDQA64 : avx512_alignedload_vl<0x6F, "vmovdqa64", avx512vl_i64_info,
3652                                       HasAVX512, SchedWriteVecMoveLS,
3653                                       "VMOVDQA">,
3654                 avx512_alignedstore_vl<0x7F, "vmovdqa64", avx512vl_i64_info,
3655                                        HasAVX512, SchedWriteVecMoveLS,
3656                                        "VMOVDQA">,
3657                 PD, VEX_W, EVEX_CD8<64, CD8VF>;
3658
3659defm VMOVDQU8 : avx512_load_vl<0x6F, "vmovdqu8", avx512vl_i8_info, HasBWI,
3660                               SchedWriteVecMoveLS, "VMOVDQU", 1>,
3661                avx512_store_vl<0x7F, "vmovdqu8", avx512vl_i8_info, HasBWI,
3662                                SchedWriteVecMoveLS, "VMOVDQU", 1>,
3663                XD, EVEX_CD8<8, CD8VF>;
3664
3665defm VMOVDQU16 : avx512_load_vl<0x6F, "vmovdqu16", avx512vl_i16_info, HasBWI,
3666                                SchedWriteVecMoveLS, "VMOVDQU", 1>,
3667                 avx512_store_vl<0x7F, "vmovdqu16", avx512vl_i16_info, HasBWI,
3668                                 SchedWriteVecMoveLS, "VMOVDQU", 1>,
3669                 XD, VEX_W, EVEX_CD8<16, CD8VF>;
3670
3671defm VMOVDQU32 : avx512_load_vl<0x6F, "vmovdqu32", avx512vl_i32_info, HasAVX512,
3672                                SchedWriteVecMoveLS, "VMOVDQU", 1, null_frag>,
3673                 avx512_store_vl<0x7F, "vmovdqu32", avx512vl_i32_info, HasAVX512,
3674                                 SchedWriteVecMoveLS, "VMOVDQU", 1>,
3675                 XS, EVEX_CD8<32, CD8VF>;
3676
3677defm VMOVDQU64 : avx512_load_vl<0x6F, "vmovdqu64", avx512vl_i64_info, HasAVX512,
3678                                SchedWriteVecMoveLS, "VMOVDQU", 0, null_frag>,
3679                 avx512_store_vl<0x7F, "vmovdqu64", avx512vl_i64_info, HasAVX512,
3680                                 SchedWriteVecMoveLS, "VMOVDQU">,
3681                 XS, VEX_W, EVEX_CD8<64, CD8VF>;
3682
3683// Special instructions to help with spilling when we don't have VLX. We need
3684// to load or store from a ZMM register instead. These are converted in
3685// expandPostRAPseudos.
3686let isReMaterializable = 1, canFoldAsLoad = 1,
3687    isPseudo = 1, mayLoad = 1, hasSideEffects = 0 in {
3688def VMOVAPSZ128rm_NOVLX : I<0, Pseudo, (outs VR128X:$dst), (ins f128mem:$src),
3689                            "", []>, Sched<[WriteFLoadX]>;
3690def VMOVAPSZ256rm_NOVLX : I<0, Pseudo, (outs VR256X:$dst), (ins f256mem:$src),
3691                            "", []>, Sched<[WriteFLoadY]>;
3692def VMOVUPSZ128rm_NOVLX : I<0, Pseudo, (outs VR128X:$dst), (ins f128mem:$src),
3693                            "", []>, Sched<[WriteFLoadX]>;
3694def VMOVUPSZ256rm_NOVLX : I<0, Pseudo, (outs VR256X:$dst), (ins f256mem:$src),
3695                            "", []>, Sched<[WriteFLoadY]>;
3696}
3697
3698let isPseudo = 1, mayStore = 1, hasSideEffects = 0 in {
3699def VMOVAPSZ128mr_NOVLX : I<0, Pseudo, (outs), (ins f128mem:$dst, VR128X:$src),
3700                            "", []>, Sched<[WriteFStoreX]>;
3701def VMOVAPSZ256mr_NOVLX : I<0, Pseudo, (outs), (ins f256mem:$dst, VR256X:$src),
3702                            "", []>, Sched<[WriteFStoreY]>;
3703def VMOVUPSZ128mr_NOVLX : I<0, Pseudo, (outs), (ins f128mem:$dst, VR128X:$src),
3704                            "", []>, Sched<[WriteFStoreX]>;
3705def VMOVUPSZ256mr_NOVLX : I<0, Pseudo, (outs), (ins f256mem:$dst, VR256X:$src),
3706                            "", []>, Sched<[WriteFStoreY]>;
3707}
3708
3709def : Pat<(v8i64 (vselect VK8WM:$mask, (v8i64 immAllZerosV),
3710                          (v8i64 VR512:$src))),
3711   (VMOVDQA64Zrrkz (COPY_TO_REGCLASS (KNOTWrr (COPY_TO_REGCLASS VK8:$mask, VK16)),
3712                                              VK8), VR512:$src)>;
3713
3714def : Pat<(v16i32 (vselect VK16WM:$mask, (v16i32 immAllZerosV),
3715                           (v16i32 VR512:$src))),
3716                  (VMOVDQA32Zrrkz (KNOTWrr VK16WM:$mask), VR512:$src)>;
3717
3718// These patterns exist to prevent the above patterns from introducing a second
3719// mask inversion when one already exists.
3720def : Pat<(v8i64 (vselect (v8i1 (vnot VK8:$mask)),
3721                          (v8i64 immAllZerosV),
3722                          (v8i64 VR512:$src))),
3723                 (VMOVDQA64Zrrkz VK8:$mask, VR512:$src)>;
3724def : Pat<(v16i32 (vselect (v16i1 (vnot VK16:$mask)),
3725                           (v16i32 immAllZerosV),
3726                           (v16i32 VR512:$src))),
3727                  (VMOVDQA32Zrrkz VK16WM:$mask, VR512:$src)>;
3728
3729multiclass mask_move_lowering<string InstrStr, X86VectorVTInfo Narrow,
3730                              X86VectorVTInfo Wide> {
3731 def : Pat<(Narrow.VT (vselect (Narrow.KVT Narrow.KRCWM:$mask),
3732                               Narrow.RC:$src1, Narrow.RC:$src0)),
3733           (EXTRACT_SUBREG
3734            (Wide.VT
3735             (!cast<Instruction>(InstrStr#"rrk")
3736              (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src0, Narrow.SubRegIdx)),
3737              (COPY_TO_REGCLASS Narrow.KRCWM:$mask, Wide.KRCWM),
3738              (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src1, Narrow.SubRegIdx)))),
3739            Narrow.SubRegIdx)>;
3740
3741 def : Pat<(Narrow.VT (vselect (Narrow.KVT Narrow.KRCWM:$mask),
3742                               Narrow.RC:$src1, Narrow.ImmAllZerosV)),
3743           (EXTRACT_SUBREG
3744            (Wide.VT
3745             (!cast<Instruction>(InstrStr#"rrkz")
3746              (COPY_TO_REGCLASS Narrow.KRCWM:$mask, Wide.KRCWM),
3747              (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src1, Narrow.SubRegIdx)))),
3748            Narrow.SubRegIdx)>;
3749}
3750
3751// Patterns for handling v8i1 selects of 256-bit vectors when VLX isn't
3752// available. Use a 512-bit operation and extract.
3753let Predicates = [HasAVX512, NoVLX] in {
3754  defm : mask_move_lowering<"VMOVAPSZ", v4f32x_info, v16f32_info>;
3755  defm : mask_move_lowering<"VMOVDQA32Z", v4i32x_info, v16i32_info>;
3756  defm : mask_move_lowering<"VMOVAPSZ", v8f32x_info, v16f32_info>;
3757  defm : mask_move_lowering<"VMOVDQA32Z", v8i32x_info, v16i32_info>;
3758
3759  defm : mask_move_lowering<"VMOVAPDZ", v2f64x_info, v8f64_info>;
3760  defm : mask_move_lowering<"VMOVDQA64Z", v2i64x_info, v8i64_info>;
3761  defm : mask_move_lowering<"VMOVAPDZ", v4f64x_info, v8f64_info>;
3762  defm : mask_move_lowering<"VMOVDQA64Z", v4i64x_info, v8i64_info>;
3763}
3764
3765let Predicates = [HasBWI, NoVLX] in {
3766  defm : mask_move_lowering<"VMOVDQU8Z", v16i8x_info, v64i8_info>;
3767  defm : mask_move_lowering<"VMOVDQU8Z", v32i8x_info, v64i8_info>;
3768
3769  defm : mask_move_lowering<"VMOVDQU16Z", v8i16x_info, v32i16_info>;
3770  defm : mask_move_lowering<"VMOVDQU16Z", v16i16x_info, v32i16_info>;
3771
3772  defm : mask_move_lowering<"VMOVDQU16Z", v8f16x_info, v32f16_info>;
3773  defm : mask_move_lowering<"VMOVDQU16Z", v16f16x_info, v32f16_info>;
3774
3775  defm : mask_move_lowering<"VMOVDQU16Z", v8bf16x_info, v32bf16_info>;
3776  defm : mask_move_lowering<"VMOVDQU16Z", v16bf16x_info, v32bf16_info>;
3777}
3778
3779let Predicates = [HasAVX512] in {
3780  // 512-bit load.
3781  def : Pat<(alignedloadv16i32 addr:$src),
3782            (VMOVDQA64Zrm addr:$src)>;
3783  def : Pat<(alignedloadv32i16 addr:$src),
3784            (VMOVDQA64Zrm addr:$src)>;
3785  def : Pat<(alignedloadv32f16 addr:$src),
3786            (VMOVAPSZrm addr:$src)>;
3787  def : Pat<(alignedloadv32bf16 addr:$src),
3788            (VMOVAPSZrm addr:$src)>;
3789  def : Pat<(alignedloadv64i8 addr:$src),
3790            (VMOVDQA64Zrm addr:$src)>;
3791  def : Pat<(loadv16i32 addr:$src),
3792            (VMOVDQU64Zrm addr:$src)>;
3793  def : Pat<(loadv32i16 addr:$src),
3794            (VMOVDQU64Zrm addr:$src)>;
3795  def : Pat<(loadv32f16 addr:$src),
3796            (VMOVUPSZrm addr:$src)>;
3797  def : Pat<(loadv32bf16 addr:$src),
3798            (VMOVUPSZrm addr:$src)>;
3799  def : Pat<(loadv64i8 addr:$src),
3800            (VMOVDQU64Zrm addr:$src)>;
3801
3802  // 512-bit store.
3803  def : Pat<(alignedstore (v16i32 VR512:$src), addr:$dst),
3804            (VMOVDQA64Zmr addr:$dst, VR512:$src)>;
3805  def : Pat<(alignedstore (v32i16 VR512:$src), addr:$dst),
3806            (VMOVDQA64Zmr addr:$dst, VR512:$src)>;
3807  def : Pat<(alignedstore (v32f16 VR512:$src), addr:$dst),
3808            (VMOVAPSZmr addr:$dst, VR512:$src)>;
3809  def : Pat<(alignedstore (v32bf16 VR512:$src), addr:$dst),
3810            (VMOVAPSZmr addr:$dst, VR512:$src)>;
3811  def : Pat<(alignedstore (v64i8 VR512:$src), addr:$dst),
3812            (VMOVDQA64Zmr addr:$dst, VR512:$src)>;
3813  def : Pat<(store (v16i32 VR512:$src), addr:$dst),
3814            (VMOVDQU64Zmr addr:$dst, VR512:$src)>;
3815  def : Pat<(store (v32i16 VR512:$src), addr:$dst),
3816            (VMOVDQU64Zmr addr:$dst, VR512:$src)>;
3817  def : Pat<(store (v32f16 VR512:$src), addr:$dst),
3818            (VMOVUPSZmr addr:$dst, VR512:$src)>;
3819  def : Pat<(store (v32bf16 VR512:$src), addr:$dst),
3820            (VMOVUPSZmr addr:$dst, VR512:$src)>;
3821  def : Pat<(store (v64i8 VR512:$src), addr:$dst),
3822            (VMOVDQU64Zmr addr:$dst, VR512:$src)>;
3823}
3824
3825let Predicates = [HasVLX] in {
3826  // 128-bit load.
3827  def : Pat<(alignedloadv4i32 addr:$src),
3828            (VMOVDQA64Z128rm addr:$src)>;
3829  def : Pat<(alignedloadv8i16 addr:$src),
3830            (VMOVDQA64Z128rm addr:$src)>;
3831  def : Pat<(alignedloadv8f16 addr:$src),
3832            (VMOVAPSZ128rm addr:$src)>;
3833  def : Pat<(alignedloadv8bf16 addr:$src),
3834            (VMOVAPSZ128rm addr:$src)>;
3835  def : Pat<(alignedloadv16i8 addr:$src),
3836            (VMOVDQA64Z128rm addr:$src)>;
3837  def : Pat<(loadv4i32 addr:$src),
3838            (VMOVDQU64Z128rm addr:$src)>;
3839  def : Pat<(loadv8i16 addr:$src),
3840            (VMOVDQU64Z128rm addr:$src)>;
3841  def : Pat<(loadv8f16 addr:$src),
3842            (VMOVUPSZ128rm addr:$src)>;
3843  def : Pat<(loadv8bf16 addr:$src),
3844            (VMOVUPSZ128rm addr:$src)>;
3845  def : Pat<(loadv16i8 addr:$src),
3846            (VMOVDQU64Z128rm addr:$src)>;
3847
3848  // 128-bit store.
3849  def : Pat<(alignedstore (v4i32 VR128X:$src), addr:$dst),
3850            (VMOVDQA64Z128mr addr:$dst, VR128X:$src)>;
3851  def : Pat<(alignedstore (v8i16 VR128X:$src), addr:$dst),
3852            (VMOVDQA64Z128mr addr:$dst, VR128X:$src)>;
3853  def : Pat<(alignedstore (v8f16 VR128X:$src), addr:$dst),
3854            (VMOVAPSZ128mr addr:$dst, VR128X:$src)>;
3855  def : Pat<(alignedstore (v8bf16 VR128X:$src), addr:$dst),
3856            (VMOVAPSZ128mr addr:$dst, VR128X:$src)>;
3857  def : Pat<(alignedstore (v16i8 VR128X:$src), addr:$dst),
3858            (VMOVDQA64Z128mr addr:$dst, VR128X:$src)>;
3859  def : Pat<(store (v4i32 VR128X:$src), addr:$dst),
3860            (VMOVDQU64Z128mr addr:$dst, VR128X:$src)>;
3861  def : Pat<(store (v8i16 VR128X:$src), addr:$dst),
3862            (VMOVDQU64Z128mr addr:$dst, VR128X:$src)>;
3863  def : Pat<(store (v8f16 VR128X:$src), addr:$dst),
3864            (VMOVUPSZ128mr addr:$dst, VR128X:$src)>;
3865  def : Pat<(store (v8bf16 VR128X:$src), addr:$dst),
3866            (VMOVUPSZ128mr addr:$dst, VR128X:$src)>;
3867  def : Pat<(store (v16i8 VR128X:$src), addr:$dst),
3868            (VMOVDQU64Z128mr addr:$dst, VR128X:$src)>;
3869
3870  // 256-bit load.
3871  def : Pat<(alignedloadv8i32 addr:$src),
3872            (VMOVDQA64Z256rm addr:$src)>;
3873  def : Pat<(alignedloadv16i16 addr:$src),
3874            (VMOVDQA64Z256rm addr:$src)>;
3875  def : Pat<(alignedloadv16f16 addr:$src),
3876            (VMOVAPSZ256rm addr:$src)>;
3877  def : Pat<(alignedloadv16bf16 addr:$src),
3878            (VMOVAPSZ256rm addr:$src)>;
3879  def : Pat<(alignedloadv32i8 addr:$src),
3880            (VMOVDQA64Z256rm addr:$src)>;
3881  def : Pat<(loadv8i32 addr:$src),
3882            (VMOVDQU64Z256rm addr:$src)>;
3883  def : Pat<(loadv16i16 addr:$src),
3884            (VMOVDQU64Z256rm addr:$src)>;
3885  def : Pat<(loadv16f16 addr:$src),
3886            (VMOVUPSZ256rm addr:$src)>;
3887  def : Pat<(loadv16bf16 addr:$src),
3888            (VMOVUPSZ256rm addr:$src)>;
3889  def : Pat<(loadv32i8 addr:$src),
3890            (VMOVDQU64Z256rm addr:$src)>;
3891
3892  // 256-bit store.
3893  def : Pat<(alignedstore (v8i32 VR256X:$src), addr:$dst),
3894            (VMOVDQA64Z256mr addr:$dst, VR256X:$src)>;
3895  def : Pat<(alignedstore (v16i16 VR256X:$src), addr:$dst),
3896            (VMOVDQA64Z256mr addr:$dst, VR256X:$src)>;
3897  def : Pat<(alignedstore (v16f16 VR256X:$src), addr:$dst),
3898            (VMOVAPSZ256mr addr:$dst, VR256X:$src)>;
3899  def : Pat<(alignedstore (v16bf16 VR256X:$src), addr:$dst),
3900            (VMOVAPSZ256mr addr:$dst, VR256X:$src)>;
3901  def : Pat<(alignedstore (v32i8 VR256X:$src), addr:$dst),
3902            (VMOVDQA64Z256mr addr:$dst, VR256X:$src)>;
3903  def : Pat<(store (v8i32 VR256X:$src), addr:$dst),
3904            (VMOVDQU64Z256mr addr:$dst, VR256X:$src)>;
3905  def : Pat<(store (v16i16 VR256X:$src), addr:$dst),
3906            (VMOVDQU64Z256mr addr:$dst, VR256X:$src)>;
3907  def : Pat<(store (v16f16 VR256X:$src), addr:$dst),
3908            (VMOVUPSZ256mr addr:$dst, VR256X:$src)>;
3909  def : Pat<(store (v16bf16 VR256X:$src), addr:$dst),
3910            (VMOVUPSZ256mr addr:$dst, VR256X:$src)>;
3911  def : Pat<(store (v32i8 VR256X:$src), addr:$dst),
3912            (VMOVDQU64Z256mr addr:$dst, VR256X:$src)>;
3913}
3914
3915multiclass mask_move_lowering_f16_bf16<AVX512VLVectorVTInfo _> {
3916let Predicates = [HasBWI] in {
3917  def : Pat<(_.info512.VT (vselect VK32WM:$mask, (_.info512.VT VR512:$src1), (_.info512.VT VR512:$src0))),
3918            (VMOVDQU16Zrrk VR512:$src0, VK32WM:$mask, VR512:$src1)>;
3919  def : Pat<(_.info512.VT (vselect VK32WM:$mask, (_.info512.VT VR512:$src1), _.info512.ImmAllZerosV)),
3920            (VMOVDQU16Zrrkz VK32WM:$mask, VR512:$src1)>;
3921  def : Pat<(_.info512.VT (vselect VK32WM:$mask,
3922                     (_.info512.VT (_.info512.AlignedLdFrag addr:$src)), (_.info512.VT VR512:$src0))),
3923            (VMOVDQU16Zrmk VR512:$src0, VK32WM:$mask, addr:$src)>;
3924  def : Pat<(_.info512.VT (vselect VK32WM:$mask,
3925                     (_.info512.VT (_.info512.AlignedLdFrag addr:$src)), _.info512.ImmAllZerosV)),
3926            (VMOVDQU16Zrmkz VK32WM:$mask, addr:$src)>;
3927  def : Pat<(_.info512.VT (vselect VK32WM:$mask,
3928                     (_.info512.VT (_.info512.LdFrag addr:$src)), (_.info512.VT VR512:$src0))),
3929            (VMOVDQU16Zrmk VR512:$src0, VK32WM:$mask, addr:$src)>;
3930  def : Pat<(_.info512.VT (vselect VK32WM:$mask,
3931                     (_.info512.VT (_.info512.LdFrag addr:$src)), _.info512.ImmAllZerosV)),
3932            (VMOVDQU16Zrmkz VK32WM:$mask, addr:$src)>;
3933  def : Pat<(_.info512.VT (masked_load addr:$src, VK32WM:$mask, (_.info512.VT VR512:$src0))),
3934            (VMOVDQU16Zrmk VR512:$src0, VK32WM:$mask, addr:$src)>;
3935  def : Pat<(_.info512.VT (masked_load addr:$src, VK32WM:$mask, undef)),
3936            (VMOVDQU16Zrmkz VK32WM:$mask, addr:$src)>;
3937  def : Pat<(_.info512.VT (masked_load addr:$src, VK32WM:$mask, _.info512.ImmAllZerosV)),
3938            (VMOVDQU16Zrmkz VK32WM:$mask, addr:$src)>;
3939
3940  def : Pat<(masked_store (_.info512.VT VR512:$src), addr:$dst, VK32WM:$mask),
3941            (VMOVDQU16Zmrk addr:$dst, VK32WM:$mask, VR512:$src)>;
3942}
3943let Predicates = [HasBWI, HasVLX] in {
3944  def : Pat<(_.info256.VT (vselect VK16WM:$mask, (_.info256.VT VR256X:$src1), (_.info256.VT VR256X:$src0))),
3945            (VMOVDQU16Z256rrk VR256X:$src0, VK16WM:$mask, VR256X:$src1)>;
3946  def : Pat<(_.info256.VT (vselect VK16WM:$mask, (_.info256.VT VR256X:$src1), _.info256.ImmAllZerosV)),
3947            (VMOVDQU16Z256rrkz VK16WM:$mask, VR256X:$src1)>;
3948  def : Pat<(_.info256.VT (vselect VK16WM:$mask,
3949                     (_.info256.VT (_.info256.AlignedLdFrag addr:$src)), (_.info256.VT VR256X:$src0))),
3950            (VMOVDQU16Z256rmk VR256X:$src0, VK16WM:$mask, addr:$src)>;
3951  def : Pat<(_.info256.VT (vselect VK16WM:$mask,
3952                     (_.info256.VT (_.info256.AlignedLdFrag addr:$src)), _.info256.ImmAllZerosV)),
3953            (VMOVDQU16Z256rmkz VK16WM:$mask, addr:$src)>;
3954  def : Pat<(_.info256.VT (vselect VK16WM:$mask,
3955                     (_.info256.VT (_.info256.LdFrag addr:$src)), (_.info256.VT VR256X:$src0))),
3956            (VMOVDQU16Z256rmk VR256X:$src0, VK16WM:$mask, addr:$src)>;
3957  def : Pat<(_.info256.VT (vselect VK16WM:$mask,
3958                     (_.info256.VT (_.info256.LdFrag addr:$src)), _.info256.ImmAllZerosV)),
3959            (VMOVDQU16Z256rmkz VK16WM:$mask, addr:$src)>;
3960  def : Pat<(_.info256.VT (masked_load addr:$src, VK16WM:$mask, (_.info256.VT VR256X:$src0))),
3961            (VMOVDQU16Z256rmk VR256X:$src0, VK16WM:$mask, addr:$src)>;
3962  def : Pat<(_.info256.VT (masked_load addr:$src, VK16WM:$mask, undef)),
3963            (VMOVDQU16Z256rmkz VK16WM:$mask, addr:$src)>;
3964  def : Pat<(_.info256.VT (masked_load addr:$src, VK16WM:$mask, _.info256.ImmAllZerosV)),
3965            (VMOVDQU16Z256rmkz VK16WM:$mask, addr:$src)>;
3966
3967  def : Pat<(masked_store (_.info256.VT VR256X:$src), addr:$dst, VK16WM:$mask),
3968            (VMOVDQU16Z256mrk addr:$dst, VK16WM:$mask, VR256X:$src)>;
3969
3970  def : Pat<(_.info128.VT (vselect VK8WM:$mask, (_.info128.VT VR128X:$src1), (_.info128.VT VR128X:$src0))),
3971            (VMOVDQU16Z128rrk VR128X:$src0, VK8WM:$mask, VR128X:$src1)>;
3972  def : Pat<(_.info128.VT (vselect VK8WM:$mask, (_.info128.VT VR128X:$src1), _.info128.ImmAllZerosV)),
3973            (VMOVDQU16Z128rrkz VK8WM:$mask, VR128X:$src1)>;
3974  def : Pat<(_.info128.VT (vselect VK8WM:$mask,
3975                     (_.info128.VT (_.info128.AlignedLdFrag addr:$src)), (_.info128.VT VR128X:$src0))),
3976            (VMOVDQU16Z128rmk VR128X:$src0, VK8WM:$mask, addr:$src)>;
3977  def : Pat<(_.info128.VT (vselect VK8WM:$mask,
3978                     (_.info128.VT (_.info128.AlignedLdFrag addr:$src)), _.info128.ImmAllZerosV)),
3979            (VMOVDQU16Z128rmkz VK8WM:$mask, addr:$src)>;
3980  def : Pat<(_.info128.VT (vselect VK8WM:$mask,
3981                     (_.info128.VT (_.info128.LdFrag addr:$src)), (_.info128.VT VR128X:$src0))),
3982            (VMOVDQU16Z128rmk VR128X:$src0, VK8WM:$mask, addr:$src)>;
3983  def : Pat<(_.info128.VT (vselect VK8WM:$mask,
3984                     (_.info128.VT (_.info128.LdFrag addr:$src)), _.info128.ImmAllZerosV)),
3985            (VMOVDQU16Z128rmkz VK8WM:$mask, addr:$src)>;
3986  def : Pat<(_.info128.VT (masked_load addr:$src, VK8WM:$mask, (_.info128.VT VR128X:$src0))),
3987            (VMOVDQU16Z128rmk VR128X:$src0, VK8WM:$mask, addr:$src)>;
3988  def : Pat<(_.info128.VT (masked_load addr:$src, VK8WM:$mask, undef)),
3989            (VMOVDQU16Z128rmkz VK8WM:$mask, addr:$src)>;
3990  def : Pat<(_.info128.VT (masked_load addr:$src, VK8WM:$mask, _.info128.ImmAllZerosV)),
3991            (VMOVDQU16Z128rmkz VK8WM:$mask, addr:$src)>;
3992
3993  def : Pat<(masked_store (_.info128.VT VR128X:$src), addr:$dst, VK8WM:$mask),
3994            (VMOVDQU16Z128mrk addr:$dst, VK8WM:$mask, VR128X:$src)>;
3995}
3996}
3997
3998defm : mask_move_lowering_f16_bf16<avx512vl_f16_info>;
3999defm : mask_move_lowering_f16_bf16<avx512vl_bf16_info>;
4000
4001// Move Int Doubleword to Packed Double Int
4002//
4003let ExeDomain = SSEPackedInt in {
4004def VMOVDI2PDIZrr : AVX512BI<0x6E, MRMSrcReg, (outs VR128X:$dst), (ins GR32:$src),
4005                      "vmovd\t{$src, $dst|$dst, $src}",
4006                      [(set VR128X:$dst,
4007                        (v4i32 (scalar_to_vector GR32:$src)))]>,
4008                        EVEX, Sched<[WriteVecMoveFromGpr]>;
4009def VMOVDI2PDIZrm : AVX512BI<0x6E, MRMSrcMem, (outs VR128X:$dst), (ins i32mem:$src),
4010                      "vmovd\t{$src, $dst|$dst, $src}",
4011                      [(set VR128X:$dst,
4012                        (v4i32 (scalar_to_vector (loadi32 addr:$src))))]>,
4013                      EVEX, EVEX_CD8<32, CD8VT1>, Sched<[WriteVecLoad]>;
4014def VMOV64toPQIZrr : AVX512BI<0x6E, MRMSrcReg, (outs VR128X:$dst), (ins GR64:$src),
4015                      "vmovq\t{$src, $dst|$dst, $src}",
4016                        [(set VR128X:$dst,
4017                          (v2i64 (scalar_to_vector GR64:$src)))]>,
4018                      EVEX, VEX_W, Sched<[WriteVecMoveFromGpr]>;
4019let isCodeGenOnly = 1, ForceDisassemble = 1, hasSideEffects = 0, mayLoad = 1 in
4020def VMOV64toPQIZrm : AVX512BI<0x6E, MRMSrcMem, (outs VR128X:$dst),
4021                      (ins i64mem:$src),
4022                      "vmovq\t{$src, $dst|$dst, $src}", []>,
4023                      EVEX, VEX_W, EVEX_CD8<64, CD8VT1>, Sched<[WriteVecLoad]>;
4024let isCodeGenOnly = 1 in {
4025def VMOV64toSDZrr : AVX512BI<0x6E, MRMSrcReg, (outs FR64X:$dst), (ins GR64:$src),
4026                       "vmovq\t{$src, $dst|$dst, $src}",
4027                       [(set FR64X:$dst, (bitconvert GR64:$src))]>,
4028                       EVEX, VEX_W, Sched<[WriteVecMoveFromGpr]>;
4029def VMOVSDto64Zrr : AVX512BI<0x7E, MRMDestReg, (outs GR64:$dst), (ins FR64X:$src),
4030                         "vmovq\t{$src, $dst|$dst, $src}",
4031                         [(set GR64:$dst, (bitconvert FR64X:$src))]>,
4032                         EVEX, VEX_W, Sched<[WriteVecMoveFromGpr]>;
4033}
4034} // ExeDomain = SSEPackedInt
4035
4036// Move Int Doubleword to Single Scalar
4037//
4038let ExeDomain = SSEPackedInt, isCodeGenOnly = 1 in {
4039def VMOVDI2SSZrr  : AVX512BI<0x6E, MRMSrcReg, (outs FR32X:$dst), (ins GR32:$src),
4040                      "vmovd\t{$src, $dst|$dst, $src}",
4041                      [(set FR32X:$dst, (bitconvert GR32:$src))]>,
4042                      EVEX, Sched<[WriteVecMoveFromGpr]>;
4043} // ExeDomain = SSEPackedInt, isCodeGenOnly = 1
4044
4045// Move doubleword from xmm register to r/m32
4046//
4047let ExeDomain = SSEPackedInt in {
4048def VMOVPDI2DIZrr  : AVX512BI<0x7E, MRMDestReg, (outs GR32:$dst), (ins VR128X:$src),
4049                       "vmovd\t{$src, $dst|$dst, $src}",
4050                       [(set GR32:$dst, (extractelt (v4i32 VR128X:$src),
4051                                        (iPTR 0)))]>,
4052                       EVEX, Sched<[WriteVecMoveToGpr]>;
4053def VMOVPDI2DIZmr  : AVX512BI<0x7E, MRMDestMem, (outs),
4054                       (ins i32mem:$dst, VR128X:$src),
4055                       "vmovd\t{$src, $dst|$dst, $src}",
4056                       [(store (i32 (extractelt (v4i32 VR128X:$src),
4057                                     (iPTR 0))), addr:$dst)]>,
4058                       EVEX, EVEX_CD8<32, CD8VT1>, Sched<[WriteVecStore]>;
4059} // ExeDomain = SSEPackedInt
4060
4061// Move quadword from xmm1 register to r/m64
4062//
4063let ExeDomain = SSEPackedInt in {
4064def VMOVPQIto64Zrr : I<0x7E, MRMDestReg, (outs GR64:$dst), (ins VR128X:$src),
4065                      "vmovq\t{$src, $dst|$dst, $src}",
4066                      [(set GR64:$dst, (extractelt (v2i64 VR128X:$src),
4067                                                   (iPTR 0)))]>,
4068                      PD, EVEX, VEX_W, Sched<[WriteVecMoveToGpr]>,
4069                      Requires<[HasAVX512]>;
4070
4071let isCodeGenOnly = 1, ForceDisassemble = 1, hasSideEffects = 0, mayStore = 1 in
4072def VMOVPQIto64Zmr : I<0x7E, MRMDestMem, (outs), (ins i64mem:$dst, VR128X:$src),
4073                      "vmovq\t{$src, $dst|$dst, $src}", []>, PD,
4074                      EVEX, VEX_W, Sched<[WriteVecStore]>,
4075                      Requires<[HasAVX512, In64BitMode]>;
4076
4077def VMOVPQI2QIZmr : I<0xD6, MRMDestMem, (outs),
4078                      (ins i64mem:$dst, VR128X:$src),
4079                      "vmovq\t{$src, $dst|$dst, $src}",
4080                      [(store (extractelt (v2i64 VR128X:$src), (iPTR 0)),
4081                              addr:$dst)]>,
4082                      EVEX, PD, VEX_W, EVEX_CD8<64, CD8VT1>,
4083                      Sched<[WriteVecStore]>, Requires<[HasAVX512]>;
4084
4085let hasSideEffects = 0, isCodeGenOnly = 1, ForceDisassemble = 1 in
4086def VMOVPQI2QIZrr : AVX512BI<0xD6, MRMDestReg, (outs VR128X:$dst),
4087                             (ins VR128X:$src),
4088                             "vmovq\t{$src, $dst|$dst, $src}", []>,
4089                             EVEX, VEX_W, Sched<[SchedWriteVecLogic.XMM]>;
4090} // ExeDomain = SSEPackedInt
4091
4092def : InstAlias<"vmovq.s\t{$src, $dst|$dst, $src}",
4093                (VMOVPQI2QIZrr VR128X:$dst, VR128X:$src), 0>;
4094
4095let Predicates = [HasAVX512] in {
4096  def : Pat<(X86vextractstore64 (v2i64 VR128X:$src), addr:$dst),
4097            (VMOVPQI2QIZmr addr:$dst, VR128X:$src)>;
4098}
4099
4100// Move Scalar Single to Double Int
4101//
4102let ExeDomain = SSEPackedInt, isCodeGenOnly = 1 in {
4103def VMOVSS2DIZrr  : AVX512BI<0x7E, MRMDestReg, (outs GR32:$dst),
4104                      (ins FR32X:$src),
4105                      "vmovd\t{$src, $dst|$dst, $src}",
4106                      [(set GR32:$dst, (bitconvert FR32X:$src))]>,
4107                      EVEX, Sched<[WriteVecMoveToGpr]>;
4108} // ExeDomain = SSEPackedInt, isCodeGenOnly = 1
4109
4110// Move Quadword Int to Packed Quadword Int
4111//
4112let ExeDomain = SSEPackedInt in {
4113def VMOVQI2PQIZrm : AVX512XSI<0x7E, MRMSrcMem, (outs VR128X:$dst),
4114                      (ins i64mem:$src),
4115                      "vmovq\t{$src, $dst|$dst, $src}",
4116                      [(set VR128X:$dst,
4117                        (v2i64 (scalar_to_vector (loadi64 addr:$src))))]>,
4118                      EVEX, VEX_W, EVEX_CD8<8, CD8VT8>, Sched<[WriteVecLoad]>;
4119} // ExeDomain = SSEPackedInt
4120
4121// Allow "vmovd" but print "vmovq".
4122def : InstAlias<"vmovd\t{$src, $dst|$dst, $src}",
4123                (VMOV64toPQIZrr VR128X:$dst, GR64:$src), 0>;
4124def : InstAlias<"vmovd\t{$src, $dst|$dst, $src}",
4125                (VMOVPQIto64Zrr GR64:$dst, VR128X:$src), 0>;
4126
4127// Conversions between masks and scalar fp.
4128def : Pat<(v32i1 (bitconvert FR32X:$src)),
4129          (KMOVDkr (VMOVSS2DIZrr FR32X:$src))>;
4130def : Pat<(f32 (bitconvert VK32:$src)),
4131          (VMOVDI2SSZrr (KMOVDrk VK32:$src))>;
4132
4133def : Pat<(v64i1 (bitconvert FR64X:$src)),
4134          (KMOVQkr (VMOVSDto64Zrr FR64X:$src))>;
4135def : Pat<(f64 (bitconvert VK64:$src)),
4136          (VMOV64toSDZrr (KMOVQrk VK64:$src))>;
4137
4138//===----------------------------------------------------------------------===//
4139// AVX-512  MOVSH, MOVSS, MOVSD
4140//===----------------------------------------------------------------------===//
4141
4142multiclass avx512_move_scalar<string asm, SDNode OpNode, PatFrag vzload_frag,
4143                              X86VectorVTInfo _, Predicate prd = HasAVX512> {
4144  let Predicates = !if (!eq (prd, HasFP16), [HasFP16], [prd, OptForSize]) in
4145  def rr : AVX512PI<0x10, MRMSrcReg, (outs _.RC:$dst),
4146             (ins _.RC:$src1, _.RC:$src2),
4147             !strconcat(asm, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
4148             [(set _.RC:$dst, (_.VT (OpNode _.RC:$src1, _.RC:$src2)))],
4149             _.ExeDomain>, EVEX_4V, Sched<[SchedWriteFShuffle.XMM]>;
4150  let Predicates = [prd] in {
4151  def rrkz : AVX512PI<0x10, MRMSrcReg, (outs _.RC:$dst),
4152              (ins _.KRCWM:$mask, _.RC:$src1, _.RC:$src2),
4153              !strconcat(asm, "\t{$src2, $src1, $dst {${mask}} {z}|",
4154              "$dst {${mask}} {z}, $src1, $src2}"),
4155              [(set _.RC:$dst, (_.VT (X86selects _.KRCWM:$mask,
4156                                      (_.VT (OpNode _.RC:$src1, _.RC:$src2)),
4157                                      _.ImmAllZerosV)))],
4158              _.ExeDomain>, EVEX_4V, EVEX_KZ, Sched<[SchedWriteFShuffle.XMM]>;
4159  let Constraints = "$src0 = $dst"  in
4160  def rrk : AVX512PI<0x10, MRMSrcReg, (outs _.RC:$dst),
4161             (ins _.RC:$src0, _.KRCWM:$mask, _.RC:$src1, _.RC:$src2),
4162             !strconcat(asm, "\t{$src2, $src1, $dst {${mask}}|",
4163             "$dst {${mask}}, $src1, $src2}"),
4164             [(set _.RC:$dst, (_.VT (X86selects _.KRCWM:$mask,
4165                                     (_.VT (OpNode _.RC:$src1, _.RC:$src2)),
4166                                     (_.VT _.RC:$src0))))],
4167             _.ExeDomain>, EVEX_4V, EVEX_K, Sched<[SchedWriteFShuffle.XMM]>;
4168  let canFoldAsLoad = 1, isReMaterializable = 1 in {
4169  def rm : AVX512PI<0x10, MRMSrcMem, (outs _.RC:$dst), (ins _.ScalarMemOp:$src),
4170             !strconcat(asm, "\t{$src, $dst|$dst, $src}"),
4171             [(set _.RC:$dst, (_.VT (vzload_frag addr:$src)))],
4172             _.ExeDomain>, EVEX, Sched<[WriteFLoad]>;
4173  // _alt version uses FR32/FR64 register class.
4174  let isCodeGenOnly = 1 in
4175  def rm_alt : AVX512PI<0x10, MRMSrcMem, (outs _.FRC:$dst), (ins _.ScalarMemOp:$src),
4176                 !strconcat(asm, "\t{$src, $dst|$dst, $src}"),
4177                 [(set _.FRC:$dst, (_.ScalarLdFrag addr:$src))],
4178                 _.ExeDomain>, EVEX, Sched<[WriteFLoad]>;
4179  }
4180  let mayLoad = 1, hasSideEffects = 0 in {
4181    let Constraints = "$src0 = $dst" in
4182    def rmk : AVX512PI<0x10, MRMSrcMem, (outs _.RC:$dst),
4183               (ins _.RC:$src0, _.KRCWM:$mask, _.ScalarMemOp:$src),
4184               !strconcat(asm, "\t{$src, $dst {${mask}}|",
4185               "$dst {${mask}}, $src}"),
4186               [], _.ExeDomain>, EVEX, EVEX_K, Sched<[WriteFLoad]>;
4187    def rmkz : AVX512PI<0x10, MRMSrcMem, (outs _.RC:$dst),
4188               (ins _.KRCWM:$mask, _.ScalarMemOp:$src),
4189               !strconcat(asm, "\t{$src, $dst {${mask}} {z}|",
4190               "$dst {${mask}} {z}, $src}"),
4191               [], _.ExeDomain>, EVEX, EVEX_KZ, Sched<[WriteFLoad]>;
4192  }
4193  def mr: AVX512PI<0x11, MRMDestMem, (outs), (ins _.ScalarMemOp:$dst, _.FRC:$src),
4194             !strconcat(asm, "\t{$src, $dst|$dst, $src}"),
4195             [(store _.FRC:$src, addr:$dst)],  _.ExeDomain>,
4196             EVEX, Sched<[WriteFStore]>;
4197  let mayStore = 1, hasSideEffects = 0 in
4198  def mrk: AVX512PI<0x11, MRMDestMem, (outs),
4199              (ins _.ScalarMemOp:$dst, VK1WM:$mask, _.RC:$src),
4200              !strconcat(asm, "\t{$src, $dst {${mask}}|$dst {${mask}}, $src}"),
4201              [], _.ExeDomain>, EVEX, EVEX_K, Sched<[WriteFStore]>,
4202              NotMemoryFoldable;
4203  }
4204}
4205
4206defm VMOVSSZ : avx512_move_scalar<"vmovss", X86Movss, X86vzload32, f32x_info>,
4207                                  VEX_LIG, XS, EVEX_CD8<32, CD8VT1>;
4208
4209defm VMOVSDZ : avx512_move_scalar<"vmovsd", X86Movsd, X86vzload64, f64x_info>,
4210                                  VEX_LIG, XD, VEX_W, EVEX_CD8<64, CD8VT1>;
4211
4212defm VMOVSHZ : avx512_move_scalar<"vmovsh", X86Movsh, X86vzload16, f16x_info,
4213                                  HasFP16>,
4214                                  VEX_LIG, T_MAP5XS, EVEX_CD8<16, CD8VT1>;
4215
4216multiclass avx512_move_scalar_lowering<string InstrStr, SDNode OpNode,
4217                                       PatLeaf ZeroFP, X86VectorVTInfo _> {
4218
4219def : Pat<(_.VT (OpNode _.RC:$src0,
4220                        (_.VT (scalar_to_vector
4221                                  (_.EltVT (X86selects VK1WM:$mask,
4222                                                       (_.EltVT _.FRC:$src1),
4223                                                       (_.EltVT _.FRC:$src2))))))),
4224          (!cast<Instruction>(InstrStr#rrk)
4225                        (_.VT (COPY_TO_REGCLASS _.FRC:$src2, _.RC)),
4226                        VK1WM:$mask,
4227                        (_.VT _.RC:$src0),
4228                        (_.VT (COPY_TO_REGCLASS _.FRC:$src1, _.RC)))>;
4229
4230def : Pat<(_.VT (OpNode _.RC:$src0,
4231                        (_.VT (scalar_to_vector
4232                                  (_.EltVT (X86selects VK1WM:$mask,
4233                                                       (_.EltVT _.FRC:$src1),
4234                                                       (_.EltVT ZeroFP))))))),
4235          (!cast<Instruction>(InstrStr#rrkz)
4236                        VK1WM:$mask,
4237                        (_.VT _.RC:$src0),
4238                        (_.VT (COPY_TO_REGCLASS _.FRC:$src1, _.RC)))>;
4239}
4240
4241multiclass avx512_store_scalar_lowering<string InstrStr, AVX512VLVectorVTInfo _,
4242                                        dag Mask, RegisterClass MaskRC> {
4243
4244def : Pat<(masked_store
4245             (_.info512.VT (insert_subvector undef,
4246                               (_.info128.VT _.info128.RC:$src),
4247                               (iPTR 0))), addr:$dst, Mask),
4248          (!cast<Instruction>(InstrStr#mrk) addr:$dst,
4249                      (COPY_TO_REGCLASS MaskRC:$mask, VK1WM),
4250                      _.info128.RC:$src)>;
4251
4252}
4253
4254multiclass avx512_store_scalar_lowering_subreg<string InstrStr,
4255                                               AVX512VLVectorVTInfo _,
4256                                               dag Mask, RegisterClass MaskRC,
4257                                               SubRegIndex subreg> {
4258
4259def : Pat<(masked_store
4260             (_.info512.VT (insert_subvector undef,
4261                               (_.info128.VT _.info128.RC:$src),
4262                               (iPTR 0))), addr:$dst, Mask),
4263          (!cast<Instruction>(InstrStr#mrk) addr:$dst,
4264                      (COPY_TO_REGCLASS (i32 (INSERT_SUBREG (IMPLICIT_DEF), MaskRC:$mask, subreg)), VK1WM),
4265                      _.info128.RC:$src)>;
4266
4267}
4268
4269// This matches the more recent codegen from clang that avoids emitting a 512
4270// bit masked store directly. Codegen will widen 128-bit masked store to 512
4271// bits on AVX512F only targets.
4272multiclass avx512_store_scalar_lowering_subreg2<string InstrStr,
4273                                               AVX512VLVectorVTInfo _,
4274                                               dag Mask512, dag Mask128,
4275                                               RegisterClass MaskRC,
4276                                               SubRegIndex subreg> {
4277
4278// AVX512F pattern.
4279def : Pat<(masked_store
4280             (_.info512.VT (insert_subvector undef,
4281                               (_.info128.VT _.info128.RC:$src),
4282                               (iPTR 0))), addr:$dst, Mask512),
4283          (!cast<Instruction>(InstrStr#mrk) addr:$dst,
4284                      (COPY_TO_REGCLASS (i32 (INSERT_SUBREG (IMPLICIT_DEF), MaskRC:$mask, subreg)), VK1WM),
4285                      _.info128.RC:$src)>;
4286
4287// AVX512VL pattern.
4288def : Pat<(masked_store (_.info128.VT _.info128.RC:$src), addr:$dst, Mask128),
4289          (!cast<Instruction>(InstrStr#mrk) addr:$dst,
4290                      (COPY_TO_REGCLASS (i32 (INSERT_SUBREG (IMPLICIT_DEF), MaskRC:$mask, subreg)), VK1WM),
4291                      _.info128.RC:$src)>;
4292}
4293
4294multiclass avx512_load_scalar_lowering<string InstrStr, AVX512VLVectorVTInfo _,
4295                                       dag Mask, RegisterClass MaskRC> {
4296
4297def : Pat<(_.info128.VT (extract_subvector
4298                         (_.info512.VT (masked_load addr:$srcAddr, Mask,
4299                                        _.info512.ImmAllZerosV)),
4300                           (iPTR 0))),
4301          (!cast<Instruction>(InstrStr#rmkz)
4302                      (COPY_TO_REGCLASS MaskRC:$mask, VK1WM),
4303                      addr:$srcAddr)>;
4304
4305def : Pat<(_.info128.VT (extract_subvector
4306                (_.info512.VT (masked_load addr:$srcAddr, Mask,
4307                      (_.info512.VT (insert_subvector undef,
4308                            (_.info128.VT (X86vzmovl _.info128.RC:$src)),
4309                            (iPTR 0))))),
4310                (iPTR 0))),
4311          (!cast<Instruction>(InstrStr#rmk) _.info128.RC:$src,
4312                      (COPY_TO_REGCLASS MaskRC:$mask, VK1WM),
4313                      addr:$srcAddr)>;
4314
4315}
4316
4317multiclass avx512_load_scalar_lowering_subreg<string InstrStr,
4318                                              AVX512VLVectorVTInfo _,
4319                                              dag Mask, RegisterClass MaskRC,
4320                                              SubRegIndex subreg> {
4321
4322def : Pat<(_.info128.VT (extract_subvector
4323                         (_.info512.VT (masked_load addr:$srcAddr, Mask,
4324                                        _.info512.ImmAllZerosV)),
4325                           (iPTR 0))),
4326          (!cast<Instruction>(InstrStr#rmkz)
4327                      (COPY_TO_REGCLASS (i32 (INSERT_SUBREG (IMPLICIT_DEF), MaskRC:$mask, subreg)), VK1WM),
4328                      addr:$srcAddr)>;
4329
4330def : Pat<(_.info128.VT (extract_subvector
4331                (_.info512.VT (masked_load addr:$srcAddr, Mask,
4332                      (_.info512.VT (insert_subvector undef,
4333                            (_.info128.VT (X86vzmovl _.info128.RC:$src)),
4334                            (iPTR 0))))),
4335                (iPTR 0))),
4336          (!cast<Instruction>(InstrStr#rmk) _.info128.RC:$src,
4337                      (COPY_TO_REGCLASS (i32 (INSERT_SUBREG (IMPLICIT_DEF), MaskRC:$mask, subreg)), VK1WM),
4338                      addr:$srcAddr)>;
4339
4340}
4341
4342// This matches the more recent codegen from clang that avoids emitting a 512
4343// bit masked load directly. Codegen will widen 128-bit masked load to 512
4344// bits on AVX512F only targets.
4345multiclass avx512_load_scalar_lowering_subreg2<string InstrStr,
4346                                              AVX512VLVectorVTInfo _,
4347                                              dag Mask512, dag Mask128,
4348                                              RegisterClass MaskRC,
4349                                              SubRegIndex subreg> {
4350// AVX512F patterns.
4351def : Pat<(_.info128.VT (extract_subvector
4352                         (_.info512.VT (masked_load addr:$srcAddr, Mask512,
4353                                        _.info512.ImmAllZerosV)),
4354                           (iPTR 0))),
4355          (!cast<Instruction>(InstrStr#rmkz)
4356                      (COPY_TO_REGCLASS (i32 (INSERT_SUBREG (IMPLICIT_DEF), MaskRC:$mask, subreg)), VK1WM),
4357                      addr:$srcAddr)>;
4358
4359def : Pat<(_.info128.VT (extract_subvector
4360                (_.info512.VT (masked_load addr:$srcAddr, Mask512,
4361                      (_.info512.VT (insert_subvector undef,
4362                            (_.info128.VT (X86vzmovl _.info128.RC:$src)),
4363                            (iPTR 0))))),
4364                (iPTR 0))),
4365          (!cast<Instruction>(InstrStr#rmk) _.info128.RC:$src,
4366                      (COPY_TO_REGCLASS (i32 (INSERT_SUBREG (IMPLICIT_DEF), MaskRC:$mask, subreg)), VK1WM),
4367                      addr:$srcAddr)>;
4368
4369// AVX512Vl patterns.
4370def : Pat<(_.info128.VT (masked_load addr:$srcAddr, Mask128,
4371                         _.info128.ImmAllZerosV)),
4372          (!cast<Instruction>(InstrStr#rmkz)
4373                      (COPY_TO_REGCLASS (i32 (INSERT_SUBREG (IMPLICIT_DEF), MaskRC:$mask, subreg)), VK1WM),
4374                      addr:$srcAddr)>;
4375
4376def : Pat<(_.info128.VT (masked_load addr:$srcAddr, Mask128,
4377                         (_.info128.VT (X86vzmovl _.info128.RC:$src)))),
4378          (!cast<Instruction>(InstrStr#rmk) _.info128.RC:$src,
4379                      (COPY_TO_REGCLASS (i32 (INSERT_SUBREG (IMPLICIT_DEF), MaskRC:$mask, subreg)), VK1WM),
4380                      addr:$srcAddr)>;
4381}
4382
4383defm : avx512_move_scalar_lowering<"VMOVSSZ", X86Movss, fp32imm0, v4f32x_info>;
4384defm : avx512_move_scalar_lowering<"VMOVSDZ", X86Movsd, fp64imm0, v2f64x_info>;
4385
4386defm : avx512_store_scalar_lowering<"VMOVSSZ", avx512vl_f32_info,
4387                   (v16i1 (bitconvert (i16 (trunc (and GR32:$mask, (i32 1)))))), GR32>;
4388defm : avx512_store_scalar_lowering_subreg<"VMOVSSZ", avx512vl_f32_info,
4389                   (v16i1 (bitconvert (i16 (and GR16:$mask, (i16 1))))), GR16, sub_16bit>;
4390defm : avx512_store_scalar_lowering_subreg<"VMOVSDZ", avx512vl_f64_info,
4391                   (v8i1 (bitconvert (i8 (and GR8:$mask, (i8 1))))), GR8, sub_8bit>;
4392
4393let Predicates = [HasFP16] in {
4394defm : avx512_move_scalar_lowering<"VMOVSHZ", X86Movsh, fp16imm0, v8f16x_info>;
4395defm : avx512_store_scalar_lowering<"VMOVSHZ", avx512vl_f16_info,
4396                   (v32i1 (bitconvert (and GR32:$mask, (i32 1)))), GR32>;
4397defm : avx512_store_scalar_lowering_subreg<"VMOVSHZ", avx512vl_f16_info,
4398                   (v32i1 (bitconvert (and GR32:$mask, (i32 1)))), GR32, sub_32bit>;
4399defm : avx512_store_scalar_lowering_subreg2<"VMOVSHZ", avx512vl_f16_info,
4400                   (v32i1 (insert_subvector
4401                           (v32i1 immAllZerosV),
4402                           (v8i1 (bitconvert (and GR8:$mask, (i8 1)))),
4403                           (iPTR 0))),
4404                   (v8i1 (bitconvert (and GR8:$mask, (i8 1)))),
4405                   GR8, sub_8bit>;
4406
4407defm : avx512_load_scalar_lowering<"VMOVSHZ", avx512vl_f16_info,
4408                   (v32i1 (bitconvert (and GR32:$mask, (i32 1)))), GR32>;
4409defm : avx512_load_scalar_lowering_subreg<"VMOVSHZ", avx512vl_f16_info,
4410                   (v32i1 (bitconvert (and GR32:$mask, (i32 1)))), GR32, sub_32bit>;
4411defm : avx512_load_scalar_lowering_subreg2<"VMOVSHZ", avx512vl_f16_info,
4412                   (v32i1 (insert_subvector
4413                           (v32i1 immAllZerosV),
4414                           (v8i1 (bitconvert (and GR8:$mask, (i8 1)))),
4415                           (iPTR 0))),
4416                   (v8i1 (bitconvert (and GR8:$mask, (i8 1)))),
4417                   GR8, sub_8bit>;
4418
4419def : Pat<(f16 (X86selects VK1WM:$mask, (f16 FR16X:$src1), (f16 FR16X:$src2))),
4420          (COPY_TO_REGCLASS (v8f16 (VMOVSHZrrk
4421           (v8f16 (COPY_TO_REGCLASS FR16X:$src2, VR128X)),
4422           VK1WM:$mask, (v8f16 (IMPLICIT_DEF)),
4423           (v8f16 (COPY_TO_REGCLASS FR16X:$src1, VR128X)))), FR16X)>;
4424
4425def : Pat<(f16 (X86selects VK1WM:$mask, (f16 FR16X:$src1), fp16imm0)),
4426          (COPY_TO_REGCLASS (v8f16 (VMOVSHZrrkz VK1WM:$mask, (v8f16 (IMPLICIT_DEF)),
4427           (v8f16 (COPY_TO_REGCLASS FR16X:$src1, VR128X)))), FR16X)>;
4428}
4429
4430defm : avx512_store_scalar_lowering_subreg2<"VMOVSSZ", avx512vl_f32_info,
4431                   (v16i1 (insert_subvector
4432                           (v16i1 immAllZerosV),
4433                           (v4i1 (extract_subvector
4434                                  (v8i1 (bitconvert (and GR8:$mask, (i8 1)))),
4435                                  (iPTR 0))),
4436                           (iPTR 0))),
4437                   (v4i1 (extract_subvector
4438                          (v8i1 (bitconvert (and GR8:$mask, (i8 1)))),
4439                          (iPTR 0))), GR8, sub_8bit>;
4440defm : avx512_store_scalar_lowering_subreg2<"VMOVSDZ", avx512vl_f64_info,
4441                   (v8i1
4442                    (extract_subvector
4443                     (v16i1
4444                      (insert_subvector
4445                       (v16i1 immAllZerosV),
4446                       (v2i1 (extract_subvector
4447                              (v8i1 (bitconvert (i8 (and GR8:$mask, (i8 1))))),
4448                              (iPTR 0))),
4449                       (iPTR 0))),
4450                     (iPTR 0))),
4451                   (v2i1 (extract_subvector
4452                          (v8i1 (bitconvert (i8 (and GR8:$mask, (i8 1))))),
4453                          (iPTR 0))), GR8, sub_8bit>;
4454
4455defm : avx512_load_scalar_lowering<"VMOVSSZ", avx512vl_f32_info,
4456                   (v16i1 (bitconvert (i16 (trunc (and GR32:$mask, (i32 1)))))), GR32>;
4457defm : avx512_load_scalar_lowering_subreg<"VMOVSSZ", avx512vl_f32_info,
4458                   (v16i1 (bitconvert (i16 (and GR16:$mask, (i16 1))))), GR16, sub_16bit>;
4459defm : avx512_load_scalar_lowering_subreg<"VMOVSDZ", avx512vl_f64_info,
4460                   (v8i1 (bitconvert (i8 (and GR8:$mask, (i8 1))))), GR8, sub_8bit>;
4461
4462defm : avx512_load_scalar_lowering_subreg2<"VMOVSSZ", avx512vl_f32_info,
4463                   (v16i1 (insert_subvector
4464                           (v16i1 immAllZerosV),
4465                           (v4i1 (extract_subvector
4466                                  (v8i1 (bitconvert (and GR8:$mask, (i8 1)))),
4467                                  (iPTR 0))),
4468                           (iPTR 0))),
4469                   (v4i1 (extract_subvector
4470                          (v8i1 (bitconvert (and GR8:$mask, (i8 1)))),
4471                          (iPTR 0))), GR8, sub_8bit>;
4472defm : avx512_load_scalar_lowering_subreg2<"VMOVSDZ", avx512vl_f64_info,
4473                   (v8i1
4474                    (extract_subvector
4475                     (v16i1
4476                      (insert_subvector
4477                       (v16i1 immAllZerosV),
4478                       (v2i1 (extract_subvector
4479                              (v8i1 (bitconvert (i8 (and GR8:$mask, (i8 1))))),
4480                              (iPTR 0))),
4481                       (iPTR 0))),
4482                     (iPTR 0))),
4483                   (v2i1 (extract_subvector
4484                          (v8i1 (bitconvert (i8 (and GR8:$mask, (i8 1))))),
4485                          (iPTR 0))), GR8, sub_8bit>;
4486
4487def : Pat<(f32 (X86selects VK1WM:$mask, (f32 FR32X:$src1), (f32 FR32X:$src2))),
4488          (COPY_TO_REGCLASS (v4f32 (VMOVSSZrrk
4489           (v4f32 (COPY_TO_REGCLASS FR32X:$src2, VR128X)),
4490           VK1WM:$mask, (v4f32 (IMPLICIT_DEF)),
4491           (v4f32 (COPY_TO_REGCLASS FR32X:$src1, VR128X)))), FR32X)>;
4492
4493def : Pat<(f32 (X86selects VK1WM:$mask, (f32 FR32X:$src1), fp32imm0)),
4494          (COPY_TO_REGCLASS (v4f32 (VMOVSSZrrkz VK1WM:$mask, (v4f32 (IMPLICIT_DEF)),
4495           (v4f32 (COPY_TO_REGCLASS FR32X:$src1, VR128X)))), FR32X)>;
4496
4497def : Pat<(f32 (X86selects VK1WM:$mask, (loadf32 addr:$src), (f32 FR32X:$src0))),
4498          (COPY_TO_REGCLASS
4499           (v4f32 (VMOVSSZrmk (v4f32 (COPY_TO_REGCLASS FR32X:$src0, VR128X)),
4500                                                       VK1WM:$mask, addr:$src)),
4501           FR32X)>;
4502def : Pat<(f32 (X86selects VK1WM:$mask, (loadf32 addr:$src), fp32imm0)),
4503          (COPY_TO_REGCLASS (v4f32 (VMOVSSZrmkz VK1WM:$mask, addr:$src)), FR32X)>;
4504
4505def : Pat<(f64 (X86selects VK1WM:$mask, (f64 FR64X:$src1), (f64 FR64X:$src2))),
4506          (COPY_TO_REGCLASS (v2f64 (VMOVSDZrrk
4507           (v2f64 (COPY_TO_REGCLASS FR64X:$src2, VR128X)),
4508           VK1WM:$mask, (v2f64 (IMPLICIT_DEF)),
4509           (v2f64 (COPY_TO_REGCLASS FR64X:$src1, VR128X)))), FR64X)>;
4510
4511def : Pat<(f64 (X86selects VK1WM:$mask, (f64 FR64X:$src1), fp64imm0)),
4512          (COPY_TO_REGCLASS (v2f64 (VMOVSDZrrkz VK1WM:$mask, (v2f64 (IMPLICIT_DEF)),
4513           (v2f64 (COPY_TO_REGCLASS FR64X:$src1, VR128X)))), FR64X)>;
4514
4515def : Pat<(f64 (X86selects VK1WM:$mask, (loadf64 addr:$src), (f64 FR64X:$src0))),
4516          (COPY_TO_REGCLASS
4517           (v2f64 (VMOVSDZrmk (v2f64 (COPY_TO_REGCLASS FR64X:$src0, VR128X)),
4518                                                       VK1WM:$mask, addr:$src)),
4519           FR64X)>;
4520def : Pat<(f64 (X86selects VK1WM:$mask, (loadf64 addr:$src), fp64imm0)),
4521          (COPY_TO_REGCLASS (v2f64 (VMOVSDZrmkz VK1WM:$mask, addr:$src)), FR64X)>;
4522
4523
4524def : Pat<(v4f32 (X86selects VK1WM:$mask, (v4f32 VR128X:$src1), (v4f32 VR128X:$src2))),
4525          (VMOVSSZrrk VR128X:$src2, VK1WM:$mask, VR128X:$src1, VR128X:$src1)>;
4526def : Pat<(v2f64 (X86selects VK1WM:$mask, (v2f64 VR128X:$src1), (v2f64 VR128X:$src2))),
4527          (VMOVSDZrrk VR128X:$src2, VK1WM:$mask, VR128X:$src1, VR128X:$src1)>;
4528
4529def : Pat<(v4f32 (X86selects VK1WM:$mask, (v4f32 VR128X:$src1), (v4f32 immAllZerosV))),
4530          (VMOVSSZrrkz VK1WM:$mask, VR128X:$src1, VR128X:$src1)>;
4531def : Pat<(v2f64 (X86selects VK1WM:$mask, (v2f64 VR128X:$src1), (v2f64 immAllZerosV))),
4532          (VMOVSDZrrkz VK1WM:$mask, VR128X:$src1, VR128X:$src1)>;
4533
4534let hasSideEffects = 0, isCodeGenOnly = 1, ForceDisassemble = 1 in {
4535  let Predicates = [HasFP16] in {
4536    def VMOVSHZrr_REV: AVX512<0x11, MRMDestReg, (outs VR128X:$dst),
4537        (ins VR128X:$src1, VR128X:$src2),
4538        "vmovsh\t{$src2, $src1, $dst|$dst, $src1, $src2}",
4539        []>, T_MAP5XS, EVEX_4V, VEX_LIG,
4540        FoldGenData<"VMOVSHZrr">,
4541        Sched<[SchedWriteFShuffle.XMM]>;
4542
4543    let Constraints = "$src0 = $dst" in
4544    def VMOVSHZrrk_REV: AVX512<0x11, MRMDestReg, (outs VR128X:$dst),
4545        (ins f16x_info.RC:$src0, f16x_info.KRCWM:$mask,
4546         VR128X:$src1, VR128X:$src2),
4547        "vmovsh\t{$src2, $src1, $dst {${mask}}|"#
4548          "$dst {${mask}}, $src1, $src2}",
4549        []>, T_MAP5XS, EVEX_K, EVEX_4V, VEX_LIG,
4550        FoldGenData<"VMOVSHZrrk">,
4551        Sched<[SchedWriteFShuffle.XMM]>;
4552
4553    def VMOVSHZrrkz_REV: AVX512<0x11, MRMDestReg, (outs VR128X:$dst),
4554        (ins f16x_info.KRCWM:$mask, VR128X:$src1, VR128X:$src2),
4555        "vmovsh\t{$src2, $src1, $dst {${mask}} {z}|"#
4556          "$dst {${mask}} {z}, $src1, $src2}",
4557        []>, EVEX_KZ, T_MAP5XS, EVEX_4V, VEX_LIG,
4558        FoldGenData<"VMOVSHZrrkz">,
4559        Sched<[SchedWriteFShuffle.XMM]>;
4560  }
4561  def VMOVSSZrr_REV: AVX512<0x11, MRMDestReg, (outs VR128X:$dst),
4562                           (ins VR128X:$src1, VR128X:$src2),
4563                           "vmovss\t{$src2, $src1, $dst|$dst, $src1, $src2}",
4564                           []>, XS, EVEX_4V, VEX_LIG,
4565                           FoldGenData<"VMOVSSZrr">,
4566                           Sched<[SchedWriteFShuffle.XMM]>;
4567
4568  let Constraints = "$src0 = $dst" in
4569  def VMOVSSZrrk_REV: AVX512<0x11, MRMDestReg, (outs VR128X:$dst),
4570                             (ins f32x_info.RC:$src0, f32x_info.KRCWM:$mask,
4571                                                   VR128X:$src1, VR128X:$src2),
4572                             "vmovss\t{$src2, $src1, $dst {${mask}}|"#
4573                                        "$dst {${mask}}, $src1, $src2}",
4574                             []>, EVEX_K, XS, EVEX_4V, VEX_LIG,
4575                             FoldGenData<"VMOVSSZrrk">,
4576                             Sched<[SchedWriteFShuffle.XMM]>;
4577
4578  def VMOVSSZrrkz_REV: AVX512<0x11, MRMDestReg, (outs VR128X:$dst),
4579                         (ins f32x_info.KRCWM:$mask, VR128X:$src1, VR128X:$src2),
4580                         "vmovss\t{$src2, $src1, $dst {${mask}} {z}|"#
4581                                    "$dst {${mask}} {z}, $src1, $src2}",
4582                         []>, EVEX_KZ, XS, EVEX_4V, VEX_LIG,
4583                         FoldGenData<"VMOVSSZrrkz">,
4584                         Sched<[SchedWriteFShuffle.XMM]>;
4585
4586  def VMOVSDZrr_REV: AVX512<0x11, MRMDestReg, (outs VR128X:$dst),
4587                           (ins VR128X:$src1, VR128X:$src2),
4588                           "vmovsd\t{$src2, $src1, $dst|$dst, $src1, $src2}",
4589                           []>, XD, EVEX_4V, VEX_LIG, VEX_W,
4590                           FoldGenData<"VMOVSDZrr">,
4591                           Sched<[SchedWriteFShuffle.XMM]>;
4592
4593  let Constraints = "$src0 = $dst" in
4594  def VMOVSDZrrk_REV: AVX512<0x11, MRMDestReg, (outs VR128X:$dst),
4595                             (ins f64x_info.RC:$src0, f64x_info.KRCWM:$mask,
4596                                                   VR128X:$src1, VR128X:$src2),
4597                             "vmovsd\t{$src2, $src1, $dst {${mask}}|"#
4598                                        "$dst {${mask}}, $src1, $src2}",
4599                             []>, EVEX_K, XD, EVEX_4V, VEX_LIG,
4600                             VEX_W, FoldGenData<"VMOVSDZrrk">,
4601                             Sched<[SchedWriteFShuffle.XMM]>;
4602
4603  def VMOVSDZrrkz_REV: AVX512<0x11, MRMDestReg, (outs VR128X:$dst),
4604                              (ins f64x_info.KRCWM:$mask, VR128X:$src1,
4605                                                          VR128X:$src2),
4606                              "vmovsd\t{$src2, $src1, $dst {${mask}} {z}|"#
4607                                         "$dst {${mask}} {z}, $src1, $src2}",
4608                              []>, EVEX_KZ, XD, EVEX_4V, VEX_LIG,
4609                              VEX_W, FoldGenData<"VMOVSDZrrkz">,
4610                              Sched<[SchedWriteFShuffle.XMM]>;
4611}
4612
4613def : InstAlias<"vmovsh.s\t{$src2, $src1, $dst|$dst, $src1, $src2}",
4614                (VMOVSHZrr_REV VR128X:$dst, VR128X:$src1, VR128X:$src2), 0>;
4615def : InstAlias<"vmovsh.s\t{$src2, $src1, $dst {${mask}}|"#
4616                             "$dst {${mask}}, $src1, $src2}",
4617                (VMOVSHZrrk_REV VR128X:$dst, VK1WM:$mask,
4618                                VR128X:$src1, VR128X:$src2), 0>;
4619def : InstAlias<"vmovsh.s\t{$src2, $src1, $dst {${mask}} {z}|"#
4620                             "$dst {${mask}} {z}, $src1, $src2}",
4621                (VMOVSHZrrkz_REV VR128X:$dst, VK1WM:$mask,
4622                                 VR128X:$src1, VR128X:$src2), 0>;
4623def : InstAlias<"vmovss.s\t{$src2, $src1, $dst|$dst, $src1, $src2}",
4624                (VMOVSSZrr_REV VR128X:$dst, VR128X:$src1, VR128X:$src2), 0>;
4625def : InstAlias<"vmovss.s\t{$src2, $src1, $dst {${mask}}|"#
4626                             "$dst {${mask}}, $src1, $src2}",
4627                (VMOVSSZrrk_REV VR128X:$dst, VK1WM:$mask,
4628                                VR128X:$src1, VR128X:$src2), 0>;
4629def : InstAlias<"vmovss.s\t{$src2, $src1, $dst {${mask}} {z}|"#
4630                             "$dst {${mask}} {z}, $src1, $src2}",
4631                (VMOVSSZrrkz_REV VR128X:$dst, VK1WM:$mask,
4632                                 VR128X:$src1, VR128X:$src2), 0>;
4633def : InstAlias<"vmovsd.s\t{$src2, $src1, $dst|$dst, $src1, $src2}",
4634                (VMOVSDZrr_REV VR128X:$dst, VR128X:$src1, VR128X:$src2), 0>;
4635def : InstAlias<"vmovsd.s\t{$src2, $src1, $dst {${mask}}|"#
4636                             "$dst {${mask}}, $src1, $src2}",
4637                (VMOVSDZrrk_REV VR128X:$dst, VK1WM:$mask,
4638                                VR128X:$src1, VR128X:$src2), 0>;
4639def : InstAlias<"vmovsd.s\t{$src2, $src1, $dst {${mask}} {z}|"#
4640                             "$dst {${mask}} {z}, $src1, $src2}",
4641                (VMOVSDZrrkz_REV VR128X:$dst, VK1WM:$mask,
4642                                 VR128X:$src1, VR128X:$src2), 0>;
4643
4644let Predicates = [HasAVX512, OptForSize] in {
4645  def : Pat<(v4f32 (X86vzmovl (v4f32 VR128X:$src))),
4646            (VMOVSSZrr (v4f32 (AVX512_128_SET0)), VR128X:$src)>;
4647  def : Pat<(v4i32 (X86vzmovl (v4i32 VR128X:$src))),
4648            (VMOVSSZrr (v4i32 (AVX512_128_SET0)), VR128X:$src)>;
4649
4650  // Move low f32 and clear high bits.
4651  def : Pat<(v8f32 (X86vzmovl (v8f32 VR256X:$src))),
4652            (SUBREG_TO_REG (i32 0),
4653             (v4f32 (VMOVSSZrr (v4f32 (AVX512_128_SET0)),
4654              (v4f32 (EXTRACT_SUBREG (v8f32 VR256X:$src), sub_xmm)))), sub_xmm)>;
4655  def : Pat<(v8i32 (X86vzmovl (v8i32 VR256X:$src))),
4656            (SUBREG_TO_REG (i32 0),
4657             (v4i32 (VMOVSSZrr (v4i32 (AVX512_128_SET0)),
4658              (v4i32 (EXTRACT_SUBREG (v8i32 VR256X:$src), sub_xmm)))), sub_xmm)>;
4659
4660  def : Pat<(v16f32 (X86vzmovl (v16f32 VR512:$src))),
4661            (SUBREG_TO_REG (i32 0),
4662             (v4f32 (VMOVSSZrr (v4f32 (AVX512_128_SET0)),
4663              (v4f32 (EXTRACT_SUBREG (v16f32 VR512:$src), sub_xmm)))), sub_xmm)>;
4664  def : Pat<(v16i32 (X86vzmovl (v16i32 VR512:$src))),
4665            (SUBREG_TO_REG (i32 0),
4666             (v4i32 (VMOVSSZrr (v4i32 (AVX512_128_SET0)),
4667              (v4i32 (EXTRACT_SUBREG (v16i32 VR512:$src), sub_xmm)))), sub_xmm)>;
4668}
4669
4670// Use 128-bit blends for OptForSpeed since BLENDs have better throughput than
4671// VMOVSS/SD. Unfortunately, loses the ability to use XMM16-31.
4672let Predicates = [HasAVX512, OptForSpeed] in {
4673  def : Pat<(v16f32 (X86vzmovl (v16f32 VR512:$src))),
4674            (SUBREG_TO_REG (i32 0),
4675             (v4f32 (VBLENDPSrri (v4f32 (V_SET0)),
4676                          (v4f32 (EXTRACT_SUBREG (v16f32 VR512:$src), sub_xmm)),
4677                          (i8 1))), sub_xmm)>;
4678  def : Pat<(v16i32 (X86vzmovl (v16i32 VR512:$src))),
4679            (SUBREG_TO_REG (i32 0),
4680             (v4i32 (VPBLENDWrri (v4i32 (V_SET0)),
4681                          (v4i32 (EXTRACT_SUBREG (v16i32 VR512:$src), sub_xmm)),
4682                          (i8 3))), sub_xmm)>;
4683}
4684
4685let Predicates = [HasAVX512] in {
4686  def : Pat<(v4f32 (scalar_to_vector (loadf32 addr:$src))),
4687            (VMOVSSZrm addr:$src)>;
4688  def : Pat<(v2f64 (scalar_to_vector (loadf64 addr:$src))),
4689            (VMOVSDZrm addr:$src)>;
4690
4691  // Represent the same patterns above but in the form they appear for
4692  // 256-bit types
4693  def : Pat<(v8f32 (X86vzload32 addr:$src)),
4694            (SUBREG_TO_REG (i32 0), (VMOVSSZrm addr:$src), sub_xmm)>;
4695  def : Pat<(v4f64 (X86vzload64 addr:$src)),
4696            (SUBREG_TO_REG (i32 0), (VMOVSDZrm addr:$src), sub_xmm)>;
4697
4698  // Represent the same patterns above but in the form they appear for
4699  // 512-bit types
4700  def : Pat<(v16f32 (X86vzload32 addr:$src)),
4701            (SUBREG_TO_REG (i32 0), (VMOVSSZrm addr:$src), sub_xmm)>;
4702  def : Pat<(v8f64 (X86vzload64 addr:$src)),
4703            (SUBREG_TO_REG (i32 0), (VMOVSDZrm addr:$src), sub_xmm)>;
4704}
4705let Predicates = [HasFP16] in {
4706  def : Pat<(v8f16 (X86vzmovl (v8f16 VR128X:$src))),
4707            (VMOVSHZrr (v8f16 (AVX512_128_SET0)), VR128X:$src)>;
4708  def : Pat<(v8i16 (X86vzmovl (v8i16 VR128X:$src))),
4709            (VMOVSHZrr (v8i16 (AVX512_128_SET0)), VR128X:$src)>;
4710
4711  // FIXME we need better canonicalization in dag combine
4712  def : Pat<(v16f16 (X86vzmovl (v16f16 VR256X:$src))),
4713            (SUBREG_TO_REG (i32 0),
4714             (v8f16 (VMOVSHZrr (v8f16 (AVX512_128_SET0)),
4715              (v8f16 (EXTRACT_SUBREG (v16f16 VR256X:$src), sub_xmm)))), sub_xmm)>;
4716  def : Pat<(v16i16 (X86vzmovl (v16i16 VR256X:$src))),
4717            (SUBREG_TO_REG (i32 0),
4718             (v8i16 (VMOVSHZrr (v8i16 (AVX512_128_SET0)),
4719              (v8i16 (EXTRACT_SUBREG (v16i16 VR256X:$src), sub_xmm)))), sub_xmm)>;
4720
4721  // FIXME we need better canonicalization in dag combine
4722  def : Pat<(v32f16 (X86vzmovl (v32f16 VR512:$src))),
4723            (SUBREG_TO_REG (i32 0),
4724             (v8f16 (VMOVSHZrr (v8f16 (AVX512_128_SET0)),
4725              (v8f16 (EXTRACT_SUBREG (v32f16 VR512:$src), sub_xmm)))), sub_xmm)>;
4726  def : Pat<(v32i16 (X86vzmovl (v32i16 VR512:$src))),
4727            (SUBREG_TO_REG (i32 0),
4728             (v8i16 (VMOVSHZrr (v8i16 (AVX512_128_SET0)),
4729              (v8i16 (EXTRACT_SUBREG (v32i16 VR512:$src), sub_xmm)))), sub_xmm)>;
4730
4731  def : Pat<(v8f16 (X86vzload16 addr:$src)),
4732            (VMOVSHZrm addr:$src)>;
4733
4734  def : Pat<(v16f16 (X86vzload16 addr:$src)),
4735            (SUBREG_TO_REG (i32 0), (VMOVSHZrm addr:$src), sub_xmm)>;
4736
4737  def : Pat<(v32f16 (X86vzload16 addr:$src)),
4738            (SUBREG_TO_REG (i32 0), (VMOVSHZrm addr:$src), sub_xmm)>;
4739}
4740
4741let ExeDomain = SSEPackedInt, SchedRW = [SchedWriteVecLogic.XMM] in {
4742def VMOVZPQILo2PQIZrr : AVX512XSI<0x7E, MRMSrcReg, (outs VR128X:$dst),
4743                                (ins VR128X:$src),
4744                                "vmovq\t{$src, $dst|$dst, $src}",
4745                                [(set VR128X:$dst, (v2i64 (X86vzmovl
4746                                                   (v2i64 VR128X:$src))))]>,
4747                                EVEX, VEX_W;
4748}
4749
4750let Predicates = [HasAVX512] in {
4751  def : Pat<(v4i32 (scalar_to_vector (i32 (anyext GR8:$src)))),
4752            (VMOVDI2PDIZrr (i32 (INSERT_SUBREG (i32 (IMPLICIT_DEF)),
4753                                              GR8:$src, sub_8bit)))>;
4754  def : Pat<(v4i32 (X86vzmovl (v4i32 (scalar_to_vector GR32:$src)))),
4755            (VMOVDI2PDIZrr GR32:$src)>;
4756
4757  def : Pat<(v2i64 (X86vzmovl (v2i64 (scalar_to_vector GR64:$src)))),
4758            (VMOV64toPQIZrr GR64:$src)>;
4759
4760  // AVX 128-bit movd/movq instruction write zeros in the high 128-bit part.
4761  def : Pat<(v4i32 (X86vzload32 addr:$src)),
4762            (VMOVDI2PDIZrm addr:$src)>;
4763  def : Pat<(v8i32 (X86vzload32 addr:$src)),
4764            (SUBREG_TO_REG (i32 0), (v4i32 (VMOVDI2PDIZrm addr:$src)), sub_xmm)>;
4765  def : Pat<(v2f64 (X86vzmovl (v2f64 VR128X:$src))),
4766            (VMOVZPQILo2PQIZrr VR128X:$src)>;
4767  def : Pat<(v2i64 (X86vzload64 addr:$src)),
4768            (VMOVQI2PQIZrm addr:$src)>;
4769  def : Pat<(v4i64 (X86vzload64 addr:$src)),
4770            (SUBREG_TO_REG (i64 0), (v2i64 (VMOVQI2PQIZrm addr:$src)), sub_xmm)>;
4771
4772  // Use regular 128-bit instructions to match 512-bit scalar_to_vec+zext.
4773  def : Pat<(v16i32 (X86vzload32 addr:$src)),
4774            (SUBREG_TO_REG (i32 0), (v4i32 (VMOVDI2PDIZrm addr:$src)), sub_xmm)>;
4775  def : Pat<(v8i64 (X86vzload64 addr:$src)),
4776            (SUBREG_TO_REG (i64 0), (v2i64 (VMOVQI2PQIZrm addr:$src)), sub_xmm)>;
4777
4778  def : Pat<(v4f64 (X86vzmovl (v4f64 VR256X:$src))),
4779            (SUBREG_TO_REG (i32 0),
4780             (v2f64 (VMOVZPQILo2PQIZrr
4781                     (v2f64 (EXTRACT_SUBREG (v4f64 VR256X:$src), sub_xmm)))),
4782             sub_xmm)>;
4783  def : Pat<(v4i64 (X86vzmovl (v4i64 VR256X:$src))),
4784            (SUBREG_TO_REG (i32 0),
4785             (v2i64 (VMOVZPQILo2PQIZrr
4786                     (v2i64 (EXTRACT_SUBREG (v4i64 VR256X:$src), sub_xmm)))),
4787             sub_xmm)>;
4788
4789  def : Pat<(v8f64 (X86vzmovl (v8f64 VR512:$src))),
4790            (SUBREG_TO_REG (i32 0),
4791             (v2f64 (VMOVZPQILo2PQIZrr
4792                     (v2f64 (EXTRACT_SUBREG (v8f64 VR512:$src), sub_xmm)))),
4793             sub_xmm)>;
4794  def : Pat<(v8i64 (X86vzmovl (v8i64 VR512:$src))),
4795            (SUBREG_TO_REG (i32 0),
4796             (v2i64 (VMOVZPQILo2PQIZrr
4797                     (v2i64 (EXTRACT_SUBREG (v8i64 VR512:$src), sub_xmm)))),
4798             sub_xmm)>;
4799}
4800
4801//===----------------------------------------------------------------------===//
4802// AVX-512 - Non-temporals
4803//===----------------------------------------------------------------------===//
4804
4805def VMOVNTDQAZrm : AVX512PI<0x2A, MRMSrcMem, (outs VR512:$dst),
4806                      (ins i512mem:$src), "vmovntdqa\t{$src, $dst|$dst, $src}",
4807                      [], SSEPackedInt>, Sched<[SchedWriteVecMoveLS.ZMM.RM]>,
4808                      EVEX, T8PD, EVEX_V512, EVEX_CD8<64, CD8VF>;
4809
4810let Predicates = [HasVLX] in {
4811  def VMOVNTDQAZ256rm : AVX512PI<0x2A, MRMSrcMem, (outs VR256X:$dst),
4812                       (ins i256mem:$src),
4813                       "vmovntdqa\t{$src, $dst|$dst, $src}",
4814                       [], SSEPackedInt>, Sched<[SchedWriteVecMoveLS.YMM.RM]>,
4815                       EVEX, T8PD, EVEX_V256, EVEX_CD8<64, CD8VF>;
4816
4817  def VMOVNTDQAZ128rm : AVX512PI<0x2A, MRMSrcMem, (outs VR128X:$dst),
4818                      (ins i128mem:$src),
4819                      "vmovntdqa\t{$src, $dst|$dst, $src}",
4820                      [], SSEPackedInt>, Sched<[SchedWriteVecMoveLS.XMM.RM]>,
4821                      EVEX, T8PD, EVEX_V128, EVEX_CD8<64, CD8VF>;
4822}
4823
4824multiclass avx512_movnt<bits<8> opc, string OpcodeStr, X86VectorVTInfo _,
4825                        X86SchedWriteMoveLS Sched,
4826                        PatFrag st_frag = alignednontemporalstore> {
4827  let SchedRW = [Sched.MR], AddedComplexity = 400 in
4828  def mr : AVX512PI<opc, MRMDestMem, (outs), (ins _.MemOp:$dst, _.RC:$src),
4829                    !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
4830                    [(st_frag (_.VT _.RC:$src), addr:$dst)],
4831                    _.ExeDomain>, EVEX, EVEX_CD8<_.EltSize, CD8VF>;
4832}
4833
4834multiclass avx512_movnt_vl<bits<8> opc, string OpcodeStr,
4835                           AVX512VLVectorVTInfo VTInfo,
4836                           X86SchedWriteMoveLSWidths Sched> {
4837  let Predicates = [HasAVX512] in
4838    defm Z : avx512_movnt<opc, OpcodeStr, VTInfo.info512, Sched.ZMM>, EVEX_V512;
4839
4840  let Predicates = [HasAVX512, HasVLX] in {
4841    defm Z256 : avx512_movnt<opc, OpcodeStr, VTInfo.info256, Sched.YMM>, EVEX_V256;
4842    defm Z128 : avx512_movnt<opc, OpcodeStr, VTInfo.info128, Sched.XMM>, EVEX_V128;
4843  }
4844}
4845
4846defm VMOVNTDQ : avx512_movnt_vl<0xE7, "vmovntdq", avx512vl_i64_info,
4847                                SchedWriteVecMoveLSNT>, PD;
4848defm VMOVNTPD : avx512_movnt_vl<0x2B, "vmovntpd", avx512vl_f64_info,
4849                                SchedWriteFMoveLSNT>, PD, VEX_W;
4850defm VMOVNTPS : avx512_movnt_vl<0x2B, "vmovntps", avx512vl_f32_info,
4851                                SchedWriteFMoveLSNT>, PS;
4852
4853let Predicates = [HasAVX512], AddedComplexity = 400 in {
4854  def : Pat<(alignednontemporalstore (v16i32 VR512:$src), addr:$dst),
4855            (VMOVNTDQZmr addr:$dst, VR512:$src)>;
4856  def : Pat<(alignednontemporalstore (v32i16 VR512:$src), addr:$dst),
4857            (VMOVNTDQZmr addr:$dst, VR512:$src)>;
4858  def : Pat<(alignednontemporalstore (v64i8 VR512:$src), addr:$dst),
4859            (VMOVNTDQZmr addr:$dst, VR512:$src)>;
4860
4861  def : Pat<(v8f64 (alignednontemporalload addr:$src)),
4862            (VMOVNTDQAZrm addr:$src)>;
4863  def : Pat<(v16f32 (alignednontemporalload addr:$src)),
4864            (VMOVNTDQAZrm addr:$src)>;
4865  def : Pat<(v8i64 (alignednontemporalload addr:$src)),
4866            (VMOVNTDQAZrm addr:$src)>;
4867  def : Pat<(v16i32 (alignednontemporalload addr:$src)),
4868            (VMOVNTDQAZrm addr:$src)>;
4869  def : Pat<(v32i16 (alignednontemporalload addr:$src)),
4870            (VMOVNTDQAZrm addr:$src)>;
4871  def : Pat<(v64i8 (alignednontemporalload addr:$src)),
4872            (VMOVNTDQAZrm addr:$src)>;
4873}
4874
4875let Predicates = [HasVLX], AddedComplexity = 400 in {
4876  def : Pat<(alignednontemporalstore (v8i32 VR256X:$src), addr:$dst),
4877            (VMOVNTDQZ256mr addr:$dst, VR256X:$src)>;
4878  def : Pat<(alignednontemporalstore (v16i16 VR256X:$src), addr:$dst),
4879            (VMOVNTDQZ256mr addr:$dst, VR256X:$src)>;
4880  def : Pat<(alignednontemporalstore (v32i8 VR256X:$src), addr:$dst),
4881            (VMOVNTDQZ256mr addr:$dst, VR256X:$src)>;
4882
4883  def : Pat<(v4f64 (alignednontemporalload addr:$src)),
4884            (VMOVNTDQAZ256rm addr:$src)>;
4885  def : Pat<(v8f32 (alignednontemporalload addr:$src)),
4886            (VMOVNTDQAZ256rm addr:$src)>;
4887  def : Pat<(v4i64 (alignednontemporalload addr:$src)),
4888            (VMOVNTDQAZ256rm addr:$src)>;
4889  def : Pat<(v8i32 (alignednontemporalload addr:$src)),
4890            (VMOVNTDQAZ256rm addr:$src)>;
4891  def : Pat<(v16i16 (alignednontemporalload addr:$src)),
4892            (VMOVNTDQAZ256rm addr:$src)>;
4893  def : Pat<(v32i8 (alignednontemporalload addr:$src)),
4894            (VMOVNTDQAZ256rm addr:$src)>;
4895
4896  def : Pat<(alignednontemporalstore (v4i32 VR128X:$src), addr:$dst),
4897            (VMOVNTDQZ128mr addr:$dst, VR128X:$src)>;
4898  def : Pat<(alignednontemporalstore (v8i16 VR128X:$src), addr:$dst),
4899            (VMOVNTDQZ128mr addr:$dst, VR128X:$src)>;
4900  def : Pat<(alignednontemporalstore (v16i8 VR128X:$src), addr:$dst),
4901            (VMOVNTDQZ128mr addr:$dst, VR128X:$src)>;
4902
4903  def : Pat<(v2f64 (alignednontemporalload addr:$src)),
4904            (VMOVNTDQAZ128rm addr:$src)>;
4905  def : Pat<(v4f32 (alignednontemporalload addr:$src)),
4906            (VMOVNTDQAZ128rm addr:$src)>;
4907  def : Pat<(v2i64 (alignednontemporalload addr:$src)),
4908            (VMOVNTDQAZ128rm addr:$src)>;
4909  def : Pat<(v4i32 (alignednontemporalload addr:$src)),
4910            (VMOVNTDQAZ128rm addr:$src)>;
4911  def : Pat<(v8i16 (alignednontemporalload addr:$src)),
4912            (VMOVNTDQAZ128rm addr:$src)>;
4913  def : Pat<(v16i8 (alignednontemporalload addr:$src)),
4914            (VMOVNTDQAZ128rm addr:$src)>;
4915}
4916
4917//===----------------------------------------------------------------------===//
4918// AVX-512 - Integer arithmetic
4919//
4920multiclass avx512_binop_rm<bits<8> opc, string OpcodeStr, SDNode OpNode,
4921                           X86VectorVTInfo _, X86FoldableSchedWrite sched,
4922                           bit IsCommutable = 0> {
4923  defm rr : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
4924                    (ins _.RC:$src1, _.RC:$src2), OpcodeStr,
4925                    "$src2, $src1", "$src1, $src2",
4926                    (_.VT (OpNode _.RC:$src1, _.RC:$src2)),
4927                    IsCommutable, IsCommutable>, AVX512BIBase, EVEX_4V,
4928                    Sched<[sched]>;
4929
4930  defm rm : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
4931                  (ins _.RC:$src1, _.MemOp:$src2), OpcodeStr,
4932                  "$src2, $src1", "$src1, $src2",
4933                  (_.VT (OpNode _.RC:$src1, (_.LdFrag addr:$src2)))>,
4934                  AVX512BIBase, EVEX_4V,
4935                  Sched<[sched.Folded, sched.ReadAfterFold]>;
4936}
4937
4938multiclass avx512_binop_rmb<bits<8> opc, string OpcodeStr, SDNode OpNode,
4939                            X86VectorVTInfo _, X86FoldableSchedWrite sched,
4940                            bit IsCommutable = 0> :
4941           avx512_binop_rm<opc, OpcodeStr, OpNode, _, sched, IsCommutable> {
4942  defm rmb : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
4943                  (ins _.RC:$src1, _.ScalarMemOp:$src2), OpcodeStr,
4944                  "${src2}"#_.BroadcastStr#", $src1",
4945                  "$src1, ${src2}"#_.BroadcastStr,
4946                  (_.VT (OpNode _.RC:$src1,
4947                                (_.BroadcastLdFrag addr:$src2)))>,
4948                  AVX512BIBase, EVEX_4V, EVEX_B,
4949                  Sched<[sched.Folded, sched.ReadAfterFold]>;
4950}
4951
4952multiclass avx512_binop_rm_vl<bits<8> opc, string OpcodeStr, SDNode OpNode,
4953                              AVX512VLVectorVTInfo VTInfo,
4954                              X86SchedWriteWidths sched, Predicate prd,
4955                              bit IsCommutable = 0> {
4956  let Predicates = [prd] in
4957    defm Z : avx512_binop_rm<opc, OpcodeStr, OpNode, VTInfo.info512, sched.ZMM,
4958                             IsCommutable>, EVEX_V512;
4959
4960  let Predicates = [prd, HasVLX] in {
4961    defm Z256 : avx512_binop_rm<opc, OpcodeStr, OpNode, VTInfo.info256,
4962                                sched.YMM, IsCommutable>, EVEX_V256;
4963    defm Z128 : avx512_binop_rm<opc, OpcodeStr, OpNode, VTInfo.info128,
4964                                sched.XMM, IsCommutable>, EVEX_V128;
4965  }
4966}
4967
4968multiclass avx512_binop_rmb_vl<bits<8> opc, string OpcodeStr, SDNode OpNode,
4969                               AVX512VLVectorVTInfo VTInfo,
4970                               X86SchedWriteWidths sched, Predicate prd,
4971                               bit IsCommutable = 0> {
4972  let Predicates = [prd] in
4973    defm Z : avx512_binop_rmb<opc, OpcodeStr, OpNode, VTInfo.info512, sched.ZMM,
4974                             IsCommutable>, EVEX_V512;
4975
4976  let Predicates = [prd, HasVLX] in {
4977    defm Z256 : avx512_binop_rmb<opc, OpcodeStr, OpNode, VTInfo.info256,
4978                                 sched.YMM, IsCommutable>, EVEX_V256;
4979    defm Z128 : avx512_binop_rmb<opc, OpcodeStr, OpNode, VTInfo.info128,
4980                                 sched.XMM, IsCommutable>, EVEX_V128;
4981  }
4982}
4983
4984multiclass avx512_binop_rm_vl_q<bits<8> opc, string OpcodeStr, SDNode OpNode,
4985                                X86SchedWriteWidths sched, Predicate prd,
4986                                bit IsCommutable = 0> {
4987  defm NAME : avx512_binop_rmb_vl<opc, OpcodeStr, OpNode, avx512vl_i64_info,
4988                                  sched, prd, IsCommutable>,
4989                                  VEX_W, EVEX_CD8<64, CD8VF>;
4990}
4991
4992multiclass avx512_binop_rm_vl_d<bits<8> opc, string OpcodeStr, SDNode OpNode,
4993                                X86SchedWriteWidths sched, Predicate prd,
4994                                bit IsCommutable = 0> {
4995  defm NAME : avx512_binop_rmb_vl<opc, OpcodeStr, OpNode, avx512vl_i32_info,
4996                                  sched, prd, IsCommutable>, EVEX_CD8<32, CD8VF>;
4997}
4998
4999multiclass avx512_binop_rm_vl_w<bits<8> opc, string OpcodeStr, SDNode OpNode,
5000                                X86SchedWriteWidths sched, Predicate prd,
5001                                bit IsCommutable = 0> {
5002  defm NAME : avx512_binop_rm_vl<opc, OpcodeStr, OpNode, avx512vl_i16_info,
5003                                 sched, prd, IsCommutable>, EVEX_CD8<16, CD8VF>,
5004                                 VEX_WIG;
5005}
5006
5007multiclass avx512_binop_rm_vl_b<bits<8> opc, string OpcodeStr, SDNode OpNode,
5008                                X86SchedWriteWidths sched, Predicate prd,
5009                                bit IsCommutable = 0> {
5010  defm NAME : avx512_binop_rm_vl<opc, OpcodeStr, OpNode, avx512vl_i8_info,
5011                                 sched, prd, IsCommutable>, EVEX_CD8<8, CD8VF>,
5012                                 VEX_WIG;
5013}
5014
5015multiclass avx512_binop_rm_vl_dq<bits<8> opc_d, bits<8> opc_q, string OpcodeStr,
5016                                 SDNode OpNode, X86SchedWriteWidths sched,
5017                                 Predicate prd, bit IsCommutable = 0> {
5018  defm Q : avx512_binop_rm_vl_q<opc_q, OpcodeStr#"q", OpNode, sched, prd,
5019                                   IsCommutable>;
5020
5021  defm D : avx512_binop_rm_vl_d<opc_d, OpcodeStr#"d", OpNode, sched, prd,
5022                                   IsCommutable>;
5023}
5024
5025multiclass avx512_binop_rm_vl_bw<bits<8> opc_b, bits<8> opc_w, string OpcodeStr,
5026                                 SDNode OpNode, X86SchedWriteWidths sched,
5027                                 Predicate prd, bit IsCommutable = 0> {
5028  defm W : avx512_binop_rm_vl_w<opc_w, OpcodeStr#"w", OpNode, sched, prd,
5029                                   IsCommutable>;
5030
5031  defm B : avx512_binop_rm_vl_b<opc_b, OpcodeStr#"b", OpNode, sched, prd,
5032                                   IsCommutable>;
5033}
5034
5035multiclass avx512_binop_rm_vl_all<bits<8> opc_b, bits<8> opc_w,
5036                                  bits<8> opc_d, bits<8> opc_q,
5037                                  string OpcodeStr, SDNode OpNode,
5038                                  X86SchedWriteWidths sched,
5039                                  bit IsCommutable = 0> {
5040  defm NAME : avx512_binop_rm_vl_dq<opc_d, opc_q, OpcodeStr, OpNode,
5041                                    sched, HasAVX512, IsCommutable>,
5042              avx512_binop_rm_vl_bw<opc_b, opc_w, OpcodeStr, OpNode,
5043                                    sched, HasBWI, IsCommutable>;
5044}
5045
5046multiclass avx512_binop_rm2<bits<8> opc, string OpcodeStr,
5047                            X86FoldableSchedWrite sched,
5048                            SDNode OpNode,X86VectorVTInfo _Src,
5049                            X86VectorVTInfo _Dst, X86VectorVTInfo _Brdct,
5050                            bit IsCommutable = 0> {
5051  defm rr : AVX512_maskable<opc, MRMSrcReg, _Dst, (outs _Dst.RC:$dst),
5052                            (ins _Src.RC:$src1, _Src.RC:$src2), OpcodeStr,
5053                            "$src2, $src1","$src1, $src2",
5054                            (_Dst.VT (OpNode
5055                                         (_Src.VT _Src.RC:$src1),
5056                                         (_Src.VT _Src.RC:$src2))),
5057                            IsCommutable>,
5058                            AVX512BIBase, EVEX_4V, Sched<[sched]>;
5059  defm rm : AVX512_maskable<opc, MRMSrcMem, _Dst, (outs _Dst.RC:$dst),
5060                        (ins _Src.RC:$src1, _Src.MemOp:$src2), OpcodeStr,
5061                        "$src2, $src1", "$src1, $src2",
5062                        (_Dst.VT (OpNode (_Src.VT _Src.RC:$src1),
5063                                      (_Src.LdFrag addr:$src2)))>,
5064                        AVX512BIBase, EVEX_4V,
5065                        Sched<[sched.Folded, sched.ReadAfterFold]>;
5066
5067  defm rmb : AVX512_maskable<opc, MRMSrcMem, _Dst, (outs _Dst.RC:$dst),
5068                    (ins _Src.RC:$src1, _Brdct.ScalarMemOp:$src2),
5069                    OpcodeStr,
5070                    "${src2}"#_Brdct.BroadcastStr#", $src1",
5071                     "$src1, ${src2}"#_Brdct.BroadcastStr,
5072                    (_Dst.VT (OpNode (_Src.VT _Src.RC:$src1), (bitconvert
5073                                 (_Brdct.VT (_Brdct.BroadcastLdFrag addr:$src2)))))>,
5074                    AVX512BIBase, EVEX_4V, EVEX_B,
5075                    Sched<[sched.Folded, sched.ReadAfterFold]>;
5076}
5077
5078defm VPADD : avx512_binop_rm_vl_all<0xFC, 0xFD, 0xFE, 0xD4, "vpadd", add,
5079                                    SchedWriteVecALU, 1>;
5080defm VPSUB : avx512_binop_rm_vl_all<0xF8, 0xF9, 0xFA, 0xFB, "vpsub", sub,
5081                                    SchedWriteVecALU, 0>;
5082defm VPADDS : avx512_binop_rm_vl_bw<0xEC, 0xED, "vpadds", saddsat,
5083                                    SchedWriteVecALU, HasBWI, 1>;
5084defm VPSUBS : avx512_binop_rm_vl_bw<0xE8, 0xE9, "vpsubs", ssubsat,
5085                                    SchedWriteVecALU, HasBWI, 0>;
5086defm VPADDUS : avx512_binop_rm_vl_bw<0xDC, 0xDD, "vpaddus", uaddsat,
5087                                     SchedWriteVecALU, HasBWI, 1>;
5088defm VPSUBUS : avx512_binop_rm_vl_bw<0xD8, 0xD9, "vpsubus", usubsat,
5089                                     SchedWriteVecALU, HasBWI, 0>;
5090defm VPMULLD : avx512_binop_rm_vl_d<0x40, "vpmulld", mul,
5091                                    SchedWritePMULLD, HasAVX512, 1>, T8PD;
5092defm VPMULLW : avx512_binop_rm_vl_w<0xD5, "vpmullw", mul,
5093                                    SchedWriteVecIMul, HasBWI, 1>;
5094defm VPMULLQ : avx512_binop_rm_vl_q<0x40, "vpmullq", mul,
5095                                    SchedWriteVecIMul, HasDQI, 1>, T8PD,
5096                                    NotEVEX2VEXConvertible;
5097defm VPMULHW : avx512_binop_rm_vl_w<0xE5, "vpmulhw", mulhs, SchedWriteVecIMul,
5098                                    HasBWI, 1>;
5099defm VPMULHUW : avx512_binop_rm_vl_w<0xE4, "vpmulhuw", mulhu, SchedWriteVecIMul,
5100                                     HasBWI, 1>;
5101defm VPMULHRSW : avx512_binop_rm_vl_w<0x0B, "vpmulhrsw", X86mulhrs,
5102                                      SchedWriteVecIMul, HasBWI, 1>, T8PD;
5103defm VPAVG : avx512_binop_rm_vl_bw<0xE0, 0xE3, "vpavg", avgceilu,
5104                                   SchedWriteVecALU, HasBWI, 1>;
5105defm VPMULDQ : avx512_binop_rm_vl_q<0x28, "vpmuldq", X86pmuldq,
5106                                    SchedWriteVecIMul, HasAVX512, 1>, T8PD;
5107defm VPMULUDQ : avx512_binop_rm_vl_q<0xF4, "vpmuludq", X86pmuludq,
5108                                     SchedWriteVecIMul, HasAVX512, 1>;
5109
5110multiclass avx512_binop_all<bits<8> opc, string OpcodeStr,
5111                            X86SchedWriteWidths sched,
5112                            AVX512VLVectorVTInfo _SrcVTInfo,
5113                            AVX512VLVectorVTInfo _DstVTInfo,
5114                            SDNode OpNode, Predicate prd,  bit IsCommutable = 0> {
5115  let Predicates = [prd] in
5116    defm NAME#Z : avx512_binop_rm2<opc, OpcodeStr, sched.ZMM, OpNode,
5117                                 _SrcVTInfo.info512, _DstVTInfo.info512,
5118                                 v8i64_info, IsCommutable>,
5119                                  EVEX_V512, EVEX_CD8<64, CD8VF>, VEX_W;
5120  let Predicates = [HasVLX, prd] in {
5121    defm NAME#Z256 : avx512_binop_rm2<opc, OpcodeStr, sched.YMM, OpNode,
5122                                      _SrcVTInfo.info256, _DstVTInfo.info256,
5123                                      v4i64x_info, IsCommutable>,
5124                                      EVEX_V256, EVEX_CD8<64, CD8VF>, VEX_W;
5125    defm NAME#Z128 : avx512_binop_rm2<opc, OpcodeStr, sched.XMM, OpNode,
5126                                      _SrcVTInfo.info128, _DstVTInfo.info128,
5127                                      v2i64x_info, IsCommutable>,
5128                                     EVEX_V128, EVEX_CD8<64, CD8VF>, VEX_W;
5129  }
5130}
5131
5132defm VPMULTISHIFTQB : avx512_binop_all<0x83, "vpmultishiftqb", SchedWriteVecALU,
5133                                avx512vl_i8_info, avx512vl_i8_info,
5134                                X86multishift, HasVBMI, 0>, T8PD;
5135
5136multiclass avx512_packs_rmb<bits<8> opc, string OpcodeStr, SDNode OpNode,
5137                            X86VectorVTInfo _Src, X86VectorVTInfo _Dst,
5138                            X86FoldableSchedWrite sched> {
5139  defm rmb : AVX512_maskable<opc, MRMSrcMem, _Dst, (outs _Dst.RC:$dst),
5140                    (ins _Src.RC:$src1, _Src.ScalarMemOp:$src2),
5141                    OpcodeStr,
5142                    "${src2}"#_Src.BroadcastStr#", $src1",
5143                     "$src1, ${src2}"#_Src.BroadcastStr,
5144                    (_Dst.VT (OpNode (_Src.VT _Src.RC:$src1), (bitconvert
5145                                 (_Src.VT (_Src.BroadcastLdFrag addr:$src2)))))>,
5146                    EVEX_4V, EVEX_B, EVEX_CD8<_Src.EltSize, CD8VF>,
5147                    Sched<[sched.Folded, sched.ReadAfterFold]>;
5148}
5149
5150multiclass avx512_packs_rm<bits<8> opc, string OpcodeStr,
5151                            SDNode OpNode,X86VectorVTInfo _Src,
5152                            X86VectorVTInfo _Dst, X86FoldableSchedWrite sched,
5153                            bit IsCommutable = 0> {
5154  defm rr : AVX512_maskable<opc, MRMSrcReg, _Dst, (outs _Dst.RC:$dst),
5155                            (ins _Src.RC:$src1, _Src.RC:$src2), OpcodeStr,
5156                            "$src2, $src1","$src1, $src2",
5157                            (_Dst.VT (OpNode
5158                                         (_Src.VT _Src.RC:$src1),
5159                                         (_Src.VT _Src.RC:$src2))),
5160                            IsCommutable, IsCommutable>,
5161                            EVEX_CD8<_Src.EltSize, CD8VF>, EVEX_4V, Sched<[sched]>;
5162  defm rm : AVX512_maskable<opc, MRMSrcMem, _Dst, (outs _Dst.RC:$dst),
5163                        (ins _Src.RC:$src1, _Src.MemOp:$src2), OpcodeStr,
5164                        "$src2, $src1", "$src1, $src2",
5165                        (_Dst.VT (OpNode (_Src.VT _Src.RC:$src1),
5166                                      (_Src.LdFrag addr:$src2)))>,
5167                         EVEX_4V, EVEX_CD8<_Src.EltSize, CD8VF>,
5168                         Sched<[sched.Folded, sched.ReadAfterFold]>;
5169}
5170
5171multiclass avx512_packs_all_i32_i16<bits<8> opc, string OpcodeStr,
5172                                    SDNode OpNode> {
5173  let Predicates = [HasBWI] in
5174  defm NAME#Z : avx512_packs_rm<opc, OpcodeStr, OpNode, v16i32_info,
5175                                 v32i16_info, SchedWriteShuffle.ZMM>,
5176                avx512_packs_rmb<opc, OpcodeStr, OpNode, v16i32_info,
5177                                 v32i16_info, SchedWriteShuffle.ZMM>, EVEX_V512;
5178  let Predicates = [HasBWI, HasVLX] in {
5179    defm NAME#Z256 : avx512_packs_rm<opc, OpcodeStr, OpNode, v8i32x_info,
5180                                     v16i16x_info, SchedWriteShuffle.YMM>,
5181                     avx512_packs_rmb<opc, OpcodeStr, OpNode, v8i32x_info,
5182                                      v16i16x_info, SchedWriteShuffle.YMM>,
5183                                      EVEX_V256;
5184    defm NAME#Z128 : avx512_packs_rm<opc, OpcodeStr, OpNode, v4i32x_info,
5185                                     v8i16x_info, SchedWriteShuffle.XMM>,
5186                     avx512_packs_rmb<opc, OpcodeStr, OpNode, v4i32x_info,
5187                                      v8i16x_info, SchedWriteShuffle.XMM>,
5188                                      EVEX_V128;
5189  }
5190}
5191multiclass avx512_packs_all_i16_i8<bits<8> opc, string OpcodeStr,
5192                            SDNode OpNode> {
5193  let Predicates = [HasBWI] in
5194  defm NAME#Z : avx512_packs_rm<opc, OpcodeStr, OpNode, v32i16_info, v64i8_info,
5195                                SchedWriteShuffle.ZMM>, EVEX_V512, VEX_WIG;
5196  let Predicates = [HasBWI, HasVLX] in {
5197    defm NAME#Z256 : avx512_packs_rm<opc, OpcodeStr, OpNode, v16i16x_info,
5198                                     v32i8x_info, SchedWriteShuffle.YMM>,
5199                                     EVEX_V256, VEX_WIG;
5200    defm NAME#Z128 : avx512_packs_rm<opc, OpcodeStr, OpNode, v8i16x_info,
5201                                     v16i8x_info, SchedWriteShuffle.XMM>,
5202                                     EVEX_V128, VEX_WIG;
5203  }
5204}
5205
5206multiclass avx512_vpmadd<bits<8> opc, string OpcodeStr,
5207                            SDNode OpNode, AVX512VLVectorVTInfo _Src,
5208                            AVX512VLVectorVTInfo _Dst, bit IsCommutable = 0> {
5209  let Predicates = [HasBWI] in
5210  defm NAME#Z : avx512_packs_rm<opc, OpcodeStr, OpNode, _Src.info512,
5211                                _Dst.info512, SchedWriteVecIMul.ZMM,
5212                                IsCommutable>, EVEX_V512;
5213  let Predicates = [HasBWI, HasVLX] in {
5214    defm NAME#Z256 : avx512_packs_rm<opc, OpcodeStr, OpNode, _Src.info256,
5215                                     _Dst.info256, SchedWriteVecIMul.YMM,
5216                                     IsCommutable>, EVEX_V256;
5217    defm NAME#Z128 : avx512_packs_rm<opc, OpcodeStr, OpNode, _Src.info128,
5218                                     _Dst.info128, SchedWriteVecIMul.XMM,
5219                                     IsCommutable>, EVEX_V128;
5220  }
5221}
5222
5223defm VPACKSSDW : avx512_packs_all_i32_i16<0x6B, "vpackssdw", X86Packss>, AVX512BIBase;
5224defm VPACKUSDW : avx512_packs_all_i32_i16<0x2b, "vpackusdw", X86Packus>, AVX5128IBase;
5225defm VPACKSSWB : avx512_packs_all_i16_i8 <0x63, "vpacksswb", X86Packss>, AVX512BIBase;
5226defm VPACKUSWB : avx512_packs_all_i16_i8 <0x67, "vpackuswb", X86Packus>, AVX512BIBase;
5227
5228defm VPMADDUBSW : avx512_vpmadd<0x04, "vpmaddubsw", X86vpmaddubsw,
5229                     avx512vl_i8_info, avx512vl_i16_info>, AVX512BIBase, T8PD, VEX_WIG;
5230defm VPMADDWD   : avx512_vpmadd<0xF5, "vpmaddwd", X86vpmaddwd,
5231                     avx512vl_i16_info, avx512vl_i32_info, 1>, AVX512BIBase, VEX_WIG;
5232
5233defm VPMAXSB : avx512_binop_rm_vl_b<0x3C, "vpmaxsb", smax,
5234                                    SchedWriteVecALU, HasBWI, 1>, T8PD;
5235defm VPMAXSW : avx512_binop_rm_vl_w<0xEE, "vpmaxsw", smax,
5236                                    SchedWriteVecALU, HasBWI, 1>;
5237defm VPMAXSD : avx512_binop_rm_vl_d<0x3D, "vpmaxsd", smax,
5238                                    SchedWriteVecALU, HasAVX512, 1>, T8PD;
5239defm VPMAXSQ : avx512_binop_rm_vl_q<0x3D, "vpmaxsq", smax,
5240                                    SchedWriteVecALU, HasAVX512, 1>, T8PD,
5241                                    NotEVEX2VEXConvertible;
5242
5243defm VPMAXUB : avx512_binop_rm_vl_b<0xDE, "vpmaxub", umax,
5244                                    SchedWriteVecALU, HasBWI, 1>;
5245defm VPMAXUW : avx512_binop_rm_vl_w<0x3E, "vpmaxuw", umax,
5246                                    SchedWriteVecALU, HasBWI, 1>, T8PD;
5247defm VPMAXUD : avx512_binop_rm_vl_d<0x3F, "vpmaxud", umax,
5248                                    SchedWriteVecALU, HasAVX512, 1>, T8PD;
5249defm VPMAXUQ : avx512_binop_rm_vl_q<0x3F, "vpmaxuq", umax,
5250                                    SchedWriteVecALU, HasAVX512, 1>, T8PD,
5251                                    NotEVEX2VEXConvertible;
5252
5253defm VPMINSB : avx512_binop_rm_vl_b<0x38, "vpminsb", smin,
5254                                    SchedWriteVecALU, HasBWI, 1>, T8PD;
5255defm VPMINSW : avx512_binop_rm_vl_w<0xEA, "vpminsw", smin,
5256                                    SchedWriteVecALU, HasBWI, 1>;
5257defm VPMINSD : avx512_binop_rm_vl_d<0x39, "vpminsd", smin,
5258                                    SchedWriteVecALU, HasAVX512, 1>, T8PD;
5259defm VPMINSQ : avx512_binop_rm_vl_q<0x39, "vpminsq", smin,
5260                                    SchedWriteVecALU, HasAVX512, 1>, T8PD,
5261                                    NotEVEX2VEXConvertible;
5262
5263defm VPMINUB : avx512_binop_rm_vl_b<0xDA, "vpminub", umin,
5264                                    SchedWriteVecALU, HasBWI, 1>;
5265defm VPMINUW : avx512_binop_rm_vl_w<0x3A, "vpminuw", umin,
5266                                    SchedWriteVecALU, HasBWI, 1>, T8PD;
5267defm VPMINUD : avx512_binop_rm_vl_d<0x3B, "vpminud", umin,
5268                                    SchedWriteVecALU, HasAVX512, 1>, T8PD;
5269defm VPMINUQ : avx512_binop_rm_vl_q<0x3B, "vpminuq", umin,
5270                                    SchedWriteVecALU, HasAVX512, 1>, T8PD,
5271                                    NotEVEX2VEXConvertible;
5272
5273// PMULLQ: Use 512bit version to implement 128/256 bit in case NoVLX.
5274let Predicates = [HasDQI, NoVLX] in {
5275  def : Pat<(v4i64 (mul (v4i64 VR256X:$src1), (v4i64 VR256X:$src2))),
5276            (EXTRACT_SUBREG
5277                (VPMULLQZrr
5278                    (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src1, sub_ymm),
5279                    (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src2, sub_ymm)),
5280             sub_ymm)>;
5281  def : Pat<(v4i64 (mul (v4i64 VR256X:$src1), (v4i64 (X86VBroadcastld64 addr:$src2)))),
5282            (EXTRACT_SUBREG
5283                (VPMULLQZrmb
5284                    (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src1, sub_ymm),
5285                    addr:$src2),
5286             sub_ymm)>;
5287
5288  def : Pat<(v2i64 (mul (v2i64 VR128X:$src1), (v2i64 VR128X:$src2))),
5289            (EXTRACT_SUBREG
5290                (VPMULLQZrr
5291                    (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src1, sub_xmm),
5292                    (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src2, sub_xmm)),
5293             sub_xmm)>;
5294  def : Pat<(v2i64 (mul (v2i64 VR128X:$src1), (v2i64 (X86VBroadcastld64 addr:$src2)))),
5295            (EXTRACT_SUBREG
5296                (VPMULLQZrmb
5297                    (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src1, sub_xmm),
5298                    addr:$src2),
5299             sub_xmm)>;
5300}
5301
5302multiclass avx512_min_max_lowering<string Instr, SDNode OpNode> {
5303  def : Pat<(v4i64 (OpNode VR256X:$src1, VR256X:$src2)),
5304            (EXTRACT_SUBREG
5305                (!cast<Instruction>(Instr#"rr")
5306                    (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src1, sub_ymm),
5307                    (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src2, sub_ymm)),
5308             sub_ymm)>;
5309  def : Pat<(v4i64 (OpNode (v4i64 VR256X:$src1), (v4i64 (X86VBroadcastld64 addr:$src2)))),
5310            (EXTRACT_SUBREG
5311                (!cast<Instruction>(Instr#"rmb")
5312                    (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src1, sub_ymm),
5313                    addr:$src2),
5314             sub_ymm)>;
5315
5316  def : Pat<(v2i64 (OpNode VR128X:$src1, VR128X:$src2)),
5317            (EXTRACT_SUBREG
5318                (!cast<Instruction>(Instr#"rr")
5319                    (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src1, sub_xmm),
5320                    (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src2, sub_xmm)),
5321             sub_xmm)>;
5322  def : Pat<(v2i64 (OpNode (v2i64 VR128X:$src1), (v2i64 (X86VBroadcastld64 addr:$src2)))),
5323            (EXTRACT_SUBREG
5324                (!cast<Instruction>(Instr#"rmb")
5325                    (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src1, sub_xmm),
5326                    addr:$src2),
5327             sub_xmm)>;
5328}
5329
5330let Predicates = [HasAVX512, NoVLX] in {
5331  defm : avx512_min_max_lowering<"VPMAXUQZ", umax>;
5332  defm : avx512_min_max_lowering<"VPMINUQZ", umin>;
5333  defm : avx512_min_max_lowering<"VPMAXSQZ", smax>;
5334  defm : avx512_min_max_lowering<"VPMINSQZ", smin>;
5335}
5336
5337//===----------------------------------------------------------------------===//
5338// AVX-512  Logical Instructions
5339//===----------------------------------------------------------------------===//
5340
5341defm VPAND : avx512_binop_rm_vl_dq<0xDB, 0xDB, "vpand", and,
5342                                   SchedWriteVecLogic, HasAVX512, 1>;
5343defm VPOR : avx512_binop_rm_vl_dq<0xEB, 0xEB, "vpor", or,
5344                                  SchedWriteVecLogic, HasAVX512, 1>;
5345defm VPXOR : avx512_binop_rm_vl_dq<0xEF, 0xEF, "vpxor", xor,
5346                                   SchedWriteVecLogic, HasAVX512, 1>;
5347defm VPANDN : avx512_binop_rm_vl_dq<0xDF, 0xDF, "vpandn", X86andnp,
5348                                    SchedWriteVecLogic, HasAVX512>;
5349
5350let Predicates = [HasVLX] in {
5351  def : Pat<(v16i8 (and VR128X:$src1, VR128X:$src2)),
5352            (VPANDQZ128rr VR128X:$src1, VR128X:$src2)>;
5353  def : Pat<(v8i16 (and VR128X:$src1, VR128X:$src2)),
5354            (VPANDQZ128rr VR128X:$src1, VR128X:$src2)>;
5355
5356  def : Pat<(v16i8 (or VR128X:$src1, VR128X:$src2)),
5357            (VPORQZ128rr VR128X:$src1, VR128X:$src2)>;
5358  def : Pat<(v8i16 (or VR128X:$src1, VR128X:$src2)),
5359            (VPORQZ128rr VR128X:$src1, VR128X:$src2)>;
5360
5361  def : Pat<(v16i8 (xor VR128X:$src1, VR128X:$src2)),
5362            (VPXORQZ128rr VR128X:$src1, VR128X:$src2)>;
5363  def : Pat<(v8i16 (xor VR128X:$src1, VR128X:$src2)),
5364            (VPXORQZ128rr VR128X:$src1, VR128X:$src2)>;
5365
5366  def : Pat<(v16i8 (X86andnp VR128X:$src1, VR128X:$src2)),
5367            (VPANDNQZ128rr VR128X:$src1, VR128X:$src2)>;
5368  def : Pat<(v8i16 (X86andnp VR128X:$src1, VR128X:$src2)),
5369            (VPANDNQZ128rr VR128X:$src1, VR128X:$src2)>;
5370
5371  def : Pat<(and VR128X:$src1, (loadv16i8 addr:$src2)),
5372            (VPANDQZ128rm VR128X:$src1, addr:$src2)>;
5373  def : Pat<(and VR128X:$src1, (loadv8i16 addr:$src2)),
5374            (VPANDQZ128rm VR128X:$src1, addr:$src2)>;
5375
5376  def : Pat<(or VR128X:$src1, (loadv16i8 addr:$src2)),
5377            (VPORQZ128rm VR128X:$src1, addr:$src2)>;
5378  def : Pat<(or VR128X:$src1, (loadv8i16 addr:$src2)),
5379            (VPORQZ128rm VR128X:$src1, addr:$src2)>;
5380
5381  def : Pat<(xor VR128X:$src1, (loadv16i8 addr:$src2)),
5382            (VPXORQZ128rm VR128X:$src1, addr:$src2)>;
5383  def : Pat<(xor VR128X:$src1, (loadv8i16 addr:$src2)),
5384            (VPXORQZ128rm VR128X:$src1, addr:$src2)>;
5385
5386  def : Pat<(X86andnp VR128X:$src1, (loadv16i8 addr:$src2)),
5387            (VPANDNQZ128rm VR128X:$src1, addr:$src2)>;
5388  def : Pat<(X86andnp VR128X:$src1, (loadv8i16 addr:$src2)),
5389            (VPANDNQZ128rm VR128X:$src1, addr:$src2)>;
5390
5391  def : Pat<(v32i8 (and VR256X:$src1, VR256X:$src2)),
5392            (VPANDQZ256rr VR256X:$src1, VR256X:$src2)>;
5393  def : Pat<(v16i16 (and VR256X:$src1, VR256X:$src2)),
5394            (VPANDQZ256rr VR256X:$src1, VR256X:$src2)>;
5395
5396  def : Pat<(v32i8 (or VR256X:$src1, VR256X:$src2)),
5397            (VPORQZ256rr VR256X:$src1, VR256X:$src2)>;
5398  def : Pat<(v16i16 (or VR256X:$src1, VR256X:$src2)),
5399            (VPORQZ256rr VR256X:$src1, VR256X:$src2)>;
5400
5401  def : Pat<(v32i8 (xor VR256X:$src1, VR256X:$src2)),
5402            (VPXORQZ256rr VR256X:$src1, VR256X:$src2)>;
5403  def : Pat<(v16i16 (xor VR256X:$src1, VR256X:$src2)),
5404            (VPXORQZ256rr VR256X:$src1, VR256X:$src2)>;
5405
5406  def : Pat<(v32i8 (X86andnp VR256X:$src1, VR256X:$src2)),
5407            (VPANDNQZ256rr VR256X:$src1, VR256X:$src2)>;
5408  def : Pat<(v16i16 (X86andnp VR256X:$src1, VR256X:$src2)),
5409            (VPANDNQZ256rr VR256X:$src1, VR256X:$src2)>;
5410
5411  def : Pat<(and VR256X:$src1, (loadv32i8 addr:$src2)),
5412            (VPANDQZ256rm VR256X:$src1, addr:$src2)>;
5413  def : Pat<(and VR256X:$src1, (loadv16i16 addr:$src2)),
5414            (VPANDQZ256rm VR256X:$src1, addr:$src2)>;
5415
5416  def : Pat<(or VR256X:$src1, (loadv32i8 addr:$src2)),
5417            (VPORQZ256rm VR256X:$src1, addr:$src2)>;
5418  def : Pat<(or VR256X:$src1, (loadv16i16 addr:$src2)),
5419            (VPORQZ256rm VR256X:$src1, addr:$src2)>;
5420
5421  def : Pat<(xor VR256X:$src1, (loadv32i8 addr:$src2)),
5422            (VPXORQZ256rm VR256X:$src1, addr:$src2)>;
5423  def : Pat<(xor VR256X:$src1, (loadv16i16 addr:$src2)),
5424            (VPXORQZ256rm VR256X:$src1, addr:$src2)>;
5425
5426  def : Pat<(X86andnp VR256X:$src1, (loadv32i8 addr:$src2)),
5427            (VPANDNQZ256rm VR256X:$src1, addr:$src2)>;
5428  def : Pat<(X86andnp VR256X:$src1, (loadv16i16 addr:$src2)),
5429            (VPANDNQZ256rm VR256X:$src1, addr:$src2)>;
5430}
5431
5432let Predicates = [HasAVX512] in {
5433  def : Pat<(v64i8 (and VR512:$src1, VR512:$src2)),
5434            (VPANDQZrr VR512:$src1, VR512:$src2)>;
5435  def : Pat<(v32i16 (and VR512:$src1, VR512:$src2)),
5436            (VPANDQZrr VR512:$src1, VR512:$src2)>;
5437
5438  def : Pat<(v64i8 (or VR512:$src1, VR512:$src2)),
5439            (VPORQZrr VR512:$src1, VR512:$src2)>;
5440  def : Pat<(v32i16 (or VR512:$src1, VR512:$src2)),
5441            (VPORQZrr VR512:$src1, VR512:$src2)>;
5442
5443  def : Pat<(v64i8 (xor VR512:$src1, VR512:$src2)),
5444            (VPXORQZrr VR512:$src1, VR512:$src2)>;
5445  def : Pat<(v32i16 (xor VR512:$src1, VR512:$src2)),
5446            (VPXORQZrr VR512:$src1, VR512:$src2)>;
5447
5448  def : Pat<(v64i8 (X86andnp VR512:$src1, VR512:$src2)),
5449            (VPANDNQZrr VR512:$src1, VR512:$src2)>;
5450  def : Pat<(v32i16 (X86andnp VR512:$src1, VR512:$src2)),
5451            (VPANDNQZrr VR512:$src1, VR512:$src2)>;
5452
5453  def : Pat<(and VR512:$src1, (loadv64i8 addr:$src2)),
5454            (VPANDQZrm VR512:$src1, addr:$src2)>;
5455  def : Pat<(and VR512:$src1, (loadv32i16 addr:$src2)),
5456            (VPANDQZrm VR512:$src1, addr:$src2)>;
5457
5458  def : Pat<(or VR512:$src1, (loadv64i8 addr:$src2)),
5459            (VPORQZrm VR512:$src1, addr:$src2)>;
5460  def : Pat<(or VR512:$src1, (loadv32i16 addr:$src2)),
5461            (VPORQZrm VR512:$src1, addr:$src2)>;
5462
5463  def : Pat<(xor VR512:$src1, (loadv64i8 addr:$src2)),
5464            (VPXORQZrm VR512:$src1, addr:$src2)>;
5465  def : Pat<(xor VR512:$src1, (loadv32i16 addr:$src2)),
5466            (VPXORQZrm VR512:$src1, addr:$src2)>;
5467
5468  def : Pat<(X86andnp VR512:$src1, (loadv64i8 addr:$src2)),
5469            (VPANDNQZrm VR512:$src1, addr:$src2)>;
5470  def : Pat<(X86andnp VR512:$src1, (loadv32i16 addr:$src2)),
5471            (VPANDNQZrm VR512:$src1, addr:$src2)>;
5472}
5473
5474// Patterns to catch vselect with different type than logic op.
5475multiclass avx512_logical_lowering<string InstrStr, SDNode OpNode,
5476                                    X86VectorVTInfo _,
5477                                    X86VectorVTInfo IntInfo> {
5478  // Masked register-register logical operations.
5479  def : Pat<(_.VT (vselect_mask _.KRCWM:$mask,
5480                   (bitconvert (IntInfo.VT (OpNode _.RC:$src1, _.RC:$src2))),
5481                   _.RC:$src0)),
5482            (!cast<Instruction>(InstrStr#rrk) _.RC:$src0, _.KRCWM:$mask,
5483             _.RC:$src1, _.RC:$src2)>;
5484
5485  def : Pat<(_.VT (vselect_mask _.KRCWM:$mask,
5486                   (bitconvert (IntInfo.VT (OpNode _.RC:$src1, _.RC:$src2))),
5487                   _.ImmAllZerosV)),
5488            (!cast<Instruction>(InstrStr#rrkz) _.KRCWM:$mask, _.RC:$src1,
5489             _.RC:$src2)>;
5490
5491  // Masked register-memory logical operations.
5492  def : Pat<(_.VT (vselect_mask _.KRCWM:$mask,
5493                   (bitconvert (IntInfo.VT (OpNode _.RC:$src1,
5494                                            (load addr:$src2)))),
5495                   _.RC:$src0)),
5496            (!cast<Instruction>(InstrStr#rmk) _.RC:$src0, _.KRCWM:$mask,
5497             _.RC:$src1, addr:$src2)>;
5498  def : Pat<(_.VT (vselect_mask _.KRCWM:$mask,
5499                   (bitconvert (IntInfo.VT (OpNode _.RC:$src1,
5500                                            (load addr:$src2)))),
5501                   _.ImmAllZerosV)),
5502            (!cast<Instruction>(InstrStr#rmkz) _.KRCWM:$mask, _.RC:$src1,
5503             addr:$src2)>;
5504}
5505
5506multiclass avx512_logical_lowering_bcast<string InstrStr, SDNode OpNode,
5507                                         X86VectorVTInfo _,
5508                                         X86VectorVTInfo IntInfo> {
5509  // Register-broadcast logical operations.
5510  def : Pat<(_.VT (vselect_mask _.KRCWM:$mask,
5511                   (bitconvert
5512                    (IntInfo.VT (OpNode _.RC:$src1,
5513                                 (IntInfo.VT (IntInfo.BroadcastLdFrag addr:$src2))))),
5514                   _.RC:$src0)),
5515            (!cast<Instruction>(InstrStr#rmbk) _.RC:$src0, _.KRCWM:$mask,
5516             _.RC:$src1, addr:$src2)>;
5517  def : Pat<(_.VT (vselect_mask _.KRCWM:$mask,
5518                   (bitconvert
5519                    (IntInfo.VT (OpNode _.RC:$src1,
5520                                 (IntInfo.VT (IntInfo.BroadcastLdFrag addr:$src2))))),
5521                   _.ImmAllZerosV)),
5522            (!cast<Instruction>(InstrStr#rmbkz)  _.KRCWM:$mask,
5523             _.RC:$src1, addr:$src2)>;
5524}
5525
5526multiclass avx512_logical_lowering_sizes<string InstrStr, SDNode OpNode,
5527                                         AVX512VLVectorVTInfo SelectInfo,
5528                                         AVX512VLVectorVTInfo IntInfo> {
5529let Predicates = [HasVLX] in {
5530  defm : avx512_logical_lowering<InstrStr#"Z128", OpNode, SelectInfo.info128,
5531                                 IntInfo.info128>;
5532  defm : avx512_logical_lowering<InstrStr#"Z256", OpNode, SelectInfo.info256,
5533                                 IntInfo.info256>;
5534}
5535let Predicates = [HasAVX512] in {
5536  defm : avx512_logical_lowering<InstrStr#"Z", OpNode, SelectInfo.info512,
5537                                 IntInfo.info512>;
5538}
5539}
5540
5541multiclass avx512_logical_lowering_sizes_bcast<string InstrStr, SDNode OpNode,
5542                                               AVX512VLVectorVTInfo SelectInfo,
5543                                               AVX512VLVectorVTInfo IntInfo> {
5544let Predicates = [HasVLX] in {
5545  defm : avx512_logical_lowering_bcast<InstrStr#"Z128", OpNode,
5546                                       SelectInfo.info128, IntInfo.info128>;
5547  defm : avx512_logical_lowering_bcast<InstrStr#"Z256", OpNode,
5548                                       SelectInfo.info256, IntInfo.info256>;
5549}
5550let Predicates = [HasAVX512] in {
5551  defm : avx512_logical_lowering_bcast<InstrStr#"Z", OpNode,
5552                                       SelectInfo.info512, IntInfo.info512>;
5553}
5554}
5555
5556multiclass avx512_logical_lowering_types<string InstrStr, SDNode OpNode> {
5557  // i64 vselect with i32/i16/i8 logic op
5558  defm : avx512_logical_lowering_sizes<InstrStr#"Q", OpNode, avx512vl_i64_info,
5559                                       avx512vl_i32_info>;
5560  defm : avx512_logical_lowering_sizes<InstrStr#"Q", OpNode, avx512vl_i64_info,
5561                                       avx512vl_i16_info>;
5562  defm : avx512_logical_lowering_sizes<InstrStr#"Q", OpNode, avx512vl_i64_info,
5563                                       avx512vl_i8_info>;
5564
5565  // i32 vselect with i64/i16/i8 logic op
5566  defm : avx512_logical_lowering_sizes<InstrStr#"D", OpNode, avx512vl_i32_info,
5567                                       avx512vl_i64_info>;
5568  defm : avx512_logical_lowering_sizes<InstrStr#"D", OpNode, avx512vl_i32_info,
5569                                       avx512vl_i16_info>;
5570  defm : avx512_logical_lowering_sizes<InstrStr#"D", OpNode, avx512vl_i32_info,
5571                                       avx512vl_i8_info>;
5572
5573  // f32 vselect with i64/i32/i16/i8 logic op
5574  defm : avx512_logical_lowering_sizes<InstrStr#"D", OpNode, avx512vl_f32_info,
5575                                       avx512vl_i64_info>;
5576  defm : avx512_logical_lowering_sizes<InstrStr#"D", OpNode, avx512vl_f32_info,
5577                                       avx512vl_i32_info>;
5578  defm : avx512_logical_lowering_sizes<InstrStr#"D", OpNode, avx512vl_f32_info,
5579                                       avx512vl_i16_info>;
5580  defm : avx512_logical_lowering_sizes<InstrStr#"D", OpNode, avx512vl_f32_info,
5581                                       avx512vl_i8_info>;
5582
5583  // f64 vselect with i64/i32/i16/i8 logic op
5584  defm : avx512_logical_lowering_sizes<InstrStr#"Q", OpNode, avx512vl_f64_info,
5585                                       avx512vl_i64_info>;
5586  defm : avx512_logical_lowering_sizes<InstrStr#"Q", OpNode, avx512vl_f64_info,
5587                                       avx512vl_i32_info>;
5588  defm : avx512_logical_lowering_sizes<InstrStr#"Q", OpNode, avx512vl_f64_info,
5589                                       avx512vl_i16_info>;
5590  defm : avx512_logical_lowering_sizes<InstrStr#"Q", OpNode, avx512vl_f64_info,
5591                                       avx512vl_i8_info>;
5592
5593  defm : avx512_logical_lowering_sizes_bcast<InstrStr#"D", OpNode,
5594                                             avx512vl_f32_info,
5595                                             avx512vl_i32_info>;
5596  defm : avx512_logical_lowering_sizes_bcast<InstrStr#"Q", OpNode,
5597                                             avx512vl_f64_info,
5598                                             avx512vl_i64_info>;
5599}
5600
5601defm : avx512_logical_lowering_types<"VPAND", and>;
5602defm : avx512_logical_lowering_types<"VPOR",  or>;
5603defm : avx512_logical_lowering_types<"VPXOR", xor>;
5604defm : avx512_logical_lowering_types<"VPANDN", X86andnp>;
5605
5606//===----------------------------------------------------------------------===//
5607// AVX-512  FP arithmetic
5608//===----------------------------------------------------------------------===//
5609
5610multiclass avx512_fp_scalar<bits<8> opc, string OpcodeStr,X86VectorVTInfo _,
5611                            SDPatternOperator OpNode, SDNode VecNode,
5612                            X86FoldableSchedWrite sched, bit IsCommutable> {
5613  let ExeDomain = _.ExeDomain, Uses = [MXCSR], mayRaiseFPException = 1 in {
5614  defm rr_Int : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
5615                           (ins _.RC:$src1, _.RC:$src2), OpcodeStr,
5616                           "$src2, $src1", "$src1, $src2",
5617                           (_.VT (VecNode _.RC:$src1, _.RC:$src2))>,
5618                           Sched<[sched]>;
5619
5620  defm rm_Int : AVX512_maskable_scalar<opc, MRMSrcMem, _, (outs _.RC:$dst),
5621                         (ins _.RC:$src1, _.IntScalarMemOp:$src2), OpcodeStr,
5622                         "$src2, $src1", "$src1, $src2",
5623                         (_.VT (VecNode _.RC:$src1,
5624                                        (_.ScalarIntMemFrags addr:$src2)))>,
5625                         Sched<[sched.Folded, sched.ReadAfterFold]>;
5626  let isCodeGenOnly = 1, Predicates = [HasAVX512] in {
5627  def rr : I< opc, MRMSrcReg, (outs _.FRC:$dst),
5628                         (ins _.FRC:$src1, _.FRC:$src2),
5629                          OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}",
5630                          [(set _.FRC:$dst, (OpNode _.FRC:$src1, _.FRC:$src2))]>,
5631                          Sched<[sched]> {
5632    let isCommutable = IsCommutable;
5633  }
5634  def rm : I< opc, MRMSrcMem, (outs _.FRC:$dst),
5635                         (ins _.FRC:$src1, _.ScalarMemOp:$src2),
5636                         OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}",
5637                         [(set _.FRC:$dst, (OpNode _.FRC:$src1,
5638                         (_.ScalarLdFrag addr:$src2)))]>,
5639                         Sched<[sched.Folded, sched.ReadAfterFold]>;
5640  }
5641  }
5642}
5643
5644multiclass avx512_fp_scalar_round<bits<8> opc, string OpcodeStr,X86VectorVTInfo _,
5645                                  SDNode VecNode, X86FoldableSchedWrite sched> {
5646  let ExeDomain = _.ExeDomain, Uses = [MXCSR] in
5647  defm rrb_Int : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
5648                          (ins _.RC:$src1, _.RC:$src2, AVX512RC:$rc), OpcodeStr,
5649                          "$rc, $src2, $src1", "$src1, $src2, $rc",
5650                          (VecNode (_.VT _.RC:$src1), (_.VT _.RC:$src2),
5651                          (i32 timm:$rc))>,
5652                          EVEX_B, EVEX_RC, Sched<[sched]>;
5653}
5654multiclass avx512_fp_scalar_sae<bits<8> opc, string OpcodeStr,X86VectorVTInfo _,
5655                                SDNode OpNode, SDNode VecNode, SDNode SaeNode,
5656                                X86FoldableSchedWrite sched, bit IsCommutable,
5657                                string EVEX2VexOvrd> {
5658  let ExeDomain = _.ExeDomain in {
5659  defm rr_Int : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
5660                           (ins _.RC:$src1, _.RC:$src2), OpcodeStr,
5661                           "$src2, $src1", "$src1, $src2",
5662                           (_.VT (VecNode _.RC:$src1, _.RC:$src2))>,
5663                           Sched<[sched]>, SIMD_EXC;
5664
5665  defm rm_Int : AVX512_maskable_scalar<opc, MRMSrcMem, _, (outs _.RC:$dst),
5666                         (ins _.RC:$src1, _.IntScalarMemOp:$src2), OpcodeStr,
5667                         "$src2, $src1", "$src1, $src2",
5668                         (_.VT (VecNode _.RC:$src1,
5669                                        (_.ScalarIntMemFrags addr:$src2)))>,
5670                         Sched<[sched.Folded, sched.ReadAfterFold]>, SIMD_EXC;
5671
5672  let isCodeGenOnly = 1, Predicates = [HasAVX512],
5673      Uses = [MXCSR], mayRaiseFPException = 1 in {
5674  def rr : I< opc, MRMSrcReg, (outs _.FRC:$dst),
5675                         (ins _.FRC:$src1, _.FRC:$src2),
5676                          OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}",
5677                          [(set _.FRC:$dst, (OpNode _.FRC:$src1, _.FRC:$src2))]>,
5678                          Sched<[sched]>,
5679                          EVEX2VEXOverride<EVEX2VexOvrd#"rr"> {
5680    let isCommutable = IsCommutable;
5681  }
5682  def rm : I< opc, MRMSrcMem, (outs _.FRC:$dst),
5683                         (ins _.FRC:$src1, _.ScalarMemOp:$src2),
5684                         OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}",
5685                         [(set _.FRC:$dst, (OpNode _.FRC:$src1,
5686                         (_.ScalarLdFrag addr:$src2)))]>,
5687                         Sched<[sched.Folded, sched.ReadAfterFold]>,
5688                         EVEX2VEXOverride<EVEX2VexOvrd#"rm">;
5689  }
5690
5691  let Uses = [MXCSR] in
5692  defm rrb_Int : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
5693                            (ins _.RC:$src1, _.RC:$src2), OpcodeStr,
5694                            "{sae}, $src2, $src1", "$src1, $src2, {sae}",
5695                            (SaeNode (_.VT _.RC:$src1), (_.VT _.RC:$src2))>,
5696                            EVEX_B, Sched<[sched]>;
5697  }
5698}
5699
5700multiclass avx512_binop_s_round<bits<8> opc, string OpcodeStr, SDPatternOperator OpNode,
5701                                SDNode VecNode, SDNode RndNode,
5702                                X86SchedWriteSizes sched, bit IsCommutable> {
5703  defm SSZ : avx512_fp_scalar<opc, OpcodeStr#"ss", f32x_info, OpNode, VecNode,
5704                              sched.PS.Scl, IsCommutable>,
5705             avx512_fp_scalar_round<opc, OpcodeStr#"ss", f32x_info, RndNode,
5706                              sched.PS.Scl>,
5707                              XS, EVEX_4V, VEX_LIG,  EVEX_CD8<32, CD8VT1>;
5708  defm SDZ : avx512_fp_scalar<opc, OpcodeStr#"sd", f64x_info, OpNode, VecNode,
5709                              sched.PD.Scl, IsCommutable>,
5710             avx512_fp_scalar_round<opc, OpcodeStr#"sd", f64x_info, RndNode,
5711                              sched.PD.Scl>,
5712                              XD, VEX_W, EVEX_4V, VEX_LIG, EVEX_CD8<64, CD8VT1>;
5713  let Predicates = [HasFP16] in
5714    defm SHZ : avx512_fp_scalar<opc, OpcodeStr#"sh", f16x_info, OpNode,
5715                                VecNode, sched.PH.Scl, IsCommutable>,
5716               avx512_fp_scalar_round<opc, OpcodeStr#"sh", f16x_info, RndNode,
5717                                sched.PH.Scl>,
5718                                T_MAP5XS, EVEX_4V, VEX_LIG, EVEX_CD8<16, CD8VT1>;
5719}
5720
5721multiclass avx512_binop_s_sae<bits<8> opc, string OpcodeStr, SDNode OpNode,
5722                              SDNode VecNode, SDNode SaeNode,
5723                              X86SchedWriteSizes sched, bit IsCommutable> {
5724  defm SSZ : avx512_fp_scalar_sae<opc, OpcodeStr#"ss", f32x_info, OpNode,
5725                              VecNode, SaeNode, sched.PS.Scl, IsCommutable,
5726                              NAME#"SS">,
5727                              XS, EVEX_4V, VEX_LIG,  EVEX_CD8<32, CD8VT1>;
5728  defm SDZ : avx512_fp_scalar_sae<opc, OpcodeStr#"sd", f64x_info, OpNode,
5729                              VecNode, SaeNode, sched.PD.Scl, IsCommutable,
5730                              NAME#"SD">,
5731                              XD, VEX_W, EVEX_4V, VEX_LIG, EVEX_CD8<64, CD8VT1>;
5732  let Predicates = [HasFP16] in {
5733    defm SHZ : avx512_fp_scalar_sae<opc, OpcodeStr#"sh", f16x_info, OpNode,
5734                                VecNode, SaeNode, sched.PH.Scl, IsCommutable,
5735                                NAME#"SH">,
5736                                T_MAP5XS, EVEX_4V, VEX_LIG, EVEX_CD8<16, CD8VT1>,
5737                                NotEVEX2VEXConvertible;
5738  }
5739}
5740defm VADD : avx512_binop_s_round<0x58, "vadd", any_fadd, X86fadds, X86faddRnds,
5741                                 SchedWriteFAddSizes, 1>;
5742defm VMUL : avx512_binop_s_round<0x59, "vmul", any_fmul, X86fmuls, X86fmulRnds,
5743                                 SchedWriteFMulSizes, 1>;
5744defm VSUB : avx512_binop_s_round<0x5C, "vsub", any_fsub, X86fsubs, X86fsubRnds,
5745                                 SchedWriteFAddSizes, 0>;
5746defm VDIV : avx512_binop_s_round<0x5E, "vdiv", any_fdiv, X86fdivs, X86fdivRnds,
5747                                 SchedWriteFDivSizes, 0>;
5748defm VMIN : avx512_binop_s_sae<0x5D, "vmin", X86fmin, X86fmins, X86fminSAEs,
5749                               SchedWriteFCmpSizes, 0>;
5750defm VMAX : avx512_binop_s_sae<0x5F, "vmax", X86fmax, X86fmaxs, X86fmaxSAEs,
5751                               SchedWriteFCmpSizes, 0>;
5752
5753// MIN/MAX nodes are commutable under "unsafe-fp-math". In this case we use
5754// X86fminc and X86fmaxc instead of X86fmin and X86fmax
5755multiclass avx512_comutable_binop_s<bits<8> opc, string OpcodeStr,
5756                                    X86VectorVTInfo _, SDNode OpNode,
5757                                    X86FoldableSchedWrite sched,
5758                                    string EVEX2VEXOvrd> {
5759  let isCodeGenOnly = 1, Predicates = [HasAVX512], ExeDomain = _.ExeDomain in {
5760  def rr : I< opc, MRMSrcReg, (outs _.FRC:$dst),
5761                         (ins _.FRC:$src1, _.FRC:$src2),
5762                          OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}",
5763                          [(set _.FRC:$dst, (OpNode _.FRC:$src1, _.FRC:$src2))]>,
5764                          Sched<[sched]>, EVEX2VEXOverride<EVEX2VEXOvrd#"rr"> {
5765    let isCommutable = 1;
5766  }
5767  def rm : I< opc, MRMSrcMem, (outs _.FRC:$dst),
5768                         (ins _.FRC:$src1, _.ScalarMemOp:$src2),
5769                         OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}",
5770                         [(set _.FRC:$dst, (OpNode _.FRC:$src1,
5771                         (_.ScalarLdFrag addr:$src2)))]>,
5772                         Sched<[sched.Folded, sched.ReadAfterFold]>,
5773                         EVEX2VEXOverride<EVEX2VEXOvrd#"rm">;
5774  }
5775}
5776defm VMINCSSZ : avx512_comutable_binop_s<0x5D, "vminss", f32x_info, X86fminc,
5777                                         SchedWriteFCmp.Scl, "VMINCSS">, XS,
5778                                         EVEX_4V, VEX_LIG, EVEX_CD8<32, CD8VT1>, SIMD_EXC;
5779
5780defm VMINCSDZ : avx512_comutable_binop_s<0x5D, "vminsd", f64x_info, X86fminc,
5781                                         SchedWriteFCmp.Scl, "VMINCSD">, XD,
5782                                         VEX_W, EVEX_4V, VEX_LIG,
5783                                         EVEX_CD8<64, CD8VT1>, SIMD_EXC;
5784
5785defm VMAXCSSZ : avx512_comutable_binop_s<0x5F, "vmaxss", f32x_info, X86fmaxc,
5786                                         SchedWriteFCmp.Scl, "VMAXCSS">, XS,
5787                                         EVEX_4V, VEX_LIG, EVEX_CD8<32, CD8VT1>, SIMD_EXC;
5788
5789defm VMAXCSDZ : avx512_comutable_binop_s<0x5F, "vmaxsd", f64x_info, X86fmaxc,
5790                                         SchedWriteFCmp.Scl, "VMAXCSD">, XD,
5791                                         VEX_W, EVEX_4V, VEX_LIG,
5792                                         EVEX_CD8<64, CD8VT1>, SIMD_EXC;
5793
5794defm VMINCSHZ : avx512_comutable_binop_s<0x5D, "vminsh", f16x_info, X86fminc,
5795                                         SchedWriteFCmp.Scl, "VMINCSH">, T_MAP5XS,
5796                                         EVEX_4V, VEX_LIG, EVEX_CD8<16, CD8VT1>, SIMD_EXC,
5797                                         NotEVEX2VEXConvertible;
5798defm VMAXCSHZ : avx512_comutable_binop_s<0x5F, "vmaxsh", f16x_info, X86fmaxc,
5799                                         SchedWriteFCmp.Scl, "VMAXCSH">, T_MAP5XS,
5800                                         EVEX_4V, VEX_LIG, EVEX_CD8<16, CD8VT1>, SIMD_EXC,
5801                                         NotEVEX2VEXConvertible;
5802
5803multiclass avx512_fp_packed<bits<8> opc, string OpcodeStr, SDPatternOperator OpNode,
5804                            SDPatternOperator MaskOpNode,
5805                            X86VectorVTInfo _, X86FoldableSchedWrite sched,
5806                            bit IsCommutable,
5807                            bit IsKCommutable = IsCommutable,
5808                            string suffix = _.Suffix,
5809                            string ClobberConstraint = "",
5810                            bit MayRaiseFPException = 1> {
5811  let ExeDomain = _.ExeDomain, hasSideEffects = 0,
5812      Uses = [MXCSR], mayRaiseFPException = MayRaiseFPException in {
5813  defm rr: AVX512_maskable_split<opc, MRMSrcReg, _, (outs _.RC:$dst),
5814                                 (ins _.RC:$src1, _.RC:$src2), OpcodeStr#suffix,
5815                                 "$src2, $src1", "$src1, $src2",
5816                                 (_.VT (OpNode _.RC:$src1, _.RC:$src2)),
5817                                 (_.VT (MaskOpNode _.RC:$src1, _.RC:$src2)), ClobberConstraint,
5818                                 IsCommutable, IsKCommutable, IsKCommutable>, EVEX_4V, Sched<[sched]>;
5819  let mayLoad = 1 in {
5820    defm rm: AVX512_maskable_split<opc, MRMSrcMem, _, (outs _.RC:$dst),
5821                                   (ins _.RC:$src1, _.MemOp:$src2), OpcodeStr#suffix,
5822                                   "$src2, $src1", "$src1, $src2",
5823                                   (OpNode _.RC:$src1, (_.LdFrag addr:$src2)),
5824                                   (MaskOpNode _.RC:$src1, (_.LdFrag addr:$src2)),
5825                                   ClobberConstraint>, EVEX_4V, Sched<[sched.Folded, sched.ReadAfterFold]>;
5826    defm rmb: AVX512_maskable_split<opc, MRMSrcMem, _, (outs _.RC:$dst),
5827                                    (ins _.RC:$src1, _.ScalarMemOp:$src2), OpcodeStr#suffix,
5828                                    "${src2}"#_.BroadcastStr#", $src1",
5829                                    "$src1, ${src2}"#_.BroadcastStr,
5830                                    (OpNode  _.RC:$src1, (_.VT (_.BroadcastLdFrag addr:$src2))),
5831                                    (MaskOpNode  _.RC:$src1, (_.VT (_.BroadcastLdFrag addr:$src2))),
5832                                    ClobberConstraint>, EVEX_4V, EVEX_B, Sched<[sched.Folded, sched.ReadAfterFold]>;
5833    }
5834  }
5835}
5836
5837multiclass avx512_fp_round_packed<bits<8> opc, string OpcodeStr,
5838                                  SDPatternOperator OpNodeRnd,
5839                                  X86FoldableSchedWrite sched, X86VectorVTInfo _,
5840                                  string suffix = _.Suffix,
5841                                  string ClobberConstraint = ""> {
5842  let ExeDomain = _.ExeDomain, Uses = [MXCSR] in
5843  defm rrb: AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
5844                  (ins _.RC:$src1, _.RC:$src2, AVX512RC:$rc), OpcodeStr#suffix,
5845                  "$rc, $src2, $src1", "$src1, $src2, $rc",
5846                  (_.VT (OpNodeRnd _.RC:$src1, _.RC:$src2, (i32 timm:$rc))),
5847                  0, 0, 0, vselect_mask, ClobberConstraint>,
5848                  EVEX_4V, EVEX_B, EVEX_RC, Sched<[sched]>;
5849}
5850
5851multiclass avx512_fp_sae_packed<bits<8> opc, string OpcodeStr,
5852                                SDPatternOperator OpNodeSAE,
5853                                X86FoldableSchedWrite sched, X86VectorVTInfo _> {
5854  let ExeDomain = _.ExeDomain, Uses = [MXCSR] in
5855  defm rrb: AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
5856                  (ins _.RC:$src1, _.RC:$src2), OpcodeStr#_.Suffix,
5857                  "{sae}, $src2, $src1", "$src1, $src2, {sae}",
5858                  (_.VT (OpNodeSAE _.RC:$src1, _.RC:$src2))>,
5859                  EVEX_4V, EVEX_B, Sched<[sched]>;
5860}
5861
5862multiclass avx512_fp_binop_p<bits<8> opc, string OpcodeStr, SDPatternOperator OpNode,
5863                             SDPatternOperator MaskOpNode,
5864                             Predicate prd, X86SchedWriteSizes sched,
5865                             bit IsCommutable = 0,
5866                             bit IsPD128Commutable = IsCommutable> {
5867  let Predicates = [prd] in {
5868  defm PSZ : avx512_fp_packed<opc, OpcodeStr, OpNode, MaskOpNode, v16f32_info,
5869                              sched.PS.ZMM, IsCommutable>, EVEX_V512, PS,
5870                              EVEX_CD8<32, CD8VF>;
5871  defm PDZ : avx512_fp_packed<opc, OpcodeStr, OpNode, MaskOpNode, v8f64_info,
5872                              sched.PD.ZMM, IsCommutable>, EVEX_V512, PD, VEX_W,
5873                              EVEX_CD8<64, CD8VF>;
5874  }
5875
5876    // Define only if AVX512VL feature is present.
5877  let Predicates = [prd, HasVLX] in {
5878    defm PSZ128 : avx512_fp_packed<opc, OpcodeStr, OpNode, MaskOpNode, v4f32x_info,
5879                                   sched.PS.XMM, IsCommutable>, EVEX_V128, PS,
5880                                   EVEX_CD8<32, CD8VF>;
5881    defm PSZ256 : avx512_fp_packed<opc, OpcodeStr, OpNode, MaskOpNode, v8f32x_info,
5882                                   sched.PS.YMM, IsCommutable>, EVEX_V256, PS,
5883                                   EVEX_CD8<32, CD8VF>;
5884    defm PDZ128 : avx512_fp_packed<opc, OpcodeStr, OpNode, MaskOpNode, v2f64x_info,
5885                                   sched.PD.XMM, IsPD128Commutable,
5886                                   IsCommutable>, EVEX_V128, PD, VEX_W,
5887                                   EVEX_CD8<64, CD8VF>;
5888    defm PDZ256 : avx512_fp_packed<opc, OpcodeStr, OpNode, MaskOpNode, v4f64x_info,
5889                                   sched.PD.YMM, IsCommutable>, EVEX_V256, PD, VEX_W,
5890                                   EVEX_CD8<64, CD8VF>;
5891  }
5892}
5893
5894multiclass avx512_fp_binop_ph<bits<8> opc, string OpcodeStr, SDPatternOperator OpNode,
5895                              SDPatternOperator MaskOpNode,
5896                              X86SchedWriteSizes sched, bit IsCommutable = 0> {
5897  let Predicates = [HasFP16] in {
5898    defm PHZ : avx512_fp_packed<opc, OpcodeStr, OpNode, MaskOpNode, v32f16_info,
5899                                sched.PH.ZMM, IsCommutable>, EVEX_V512, T_MAP5PS,
5900                                EVEX_CD8<16, CD8VF>;
5901  }
5902  let Predicates = [HasVLX, HasFP16] in {
5903    defm PHZ128 : avx512_fp_packed<opc, OpcodeStr, OpNode, MaskOpNode, v8f16x_info,
5904                                   sched.PH.XMM, IsCommutable>, EVEX_V128, T_MAP5PS,
5905                                   EVEX_CD8<16, CD8VF>;
5906    defm PHZ256 : avx512_fp_packed<opc, OpcodeStr, OpNode, MaskOpNode, v16f16x_info,
5907                                   sched.PH.YMM, IsCommutable>, EVEX_V256, T_MAP5PS,
5908                                   EVEX_CD8<16, CD8VF>;
5909  }
5910}
5911
5912let Uses = [MXCSR] in
5913multiclass avx512_fp_binop_p_round<bits<8> opc, string OpcodeStr, SDNode OpNodeRnd,
5914                                   X86SchedWriteSizes sched> {
5915  let Predicates = [HasFP16] in {
5916    defm PHZ : avx512_fp_round_packed<opc, OpcodeStr, OpNodeRnd, sched.PH.ZMM,
5917                                      v32f16_info>,
5918                                      EVEX_V512, T_MAP5PS, EVEX_CD8<16, CD8VF>;
5919  }
5920  defm PSZ : avx512_fp_round_packed<opc, OpcodeStr, OpNodeRnd, sched.PS.ZMM,
5921                                    v16f32_info>,
5922                                    EVEX_V512, PS, EVEX_CD8<32, CD8VF>;
5923  defm PDZ : avx512_fp_round_packed<opc, OpcodeStr, OpNodeRnd, sched.PD.ZMM,
5924                                    v8f64_info>,
5925                                    EVEX_V512, PD, VEX_W,EVEX_CD8<64, CD8VF>;
5926}
5927
5928let Uses = [MXCSR] in
5929multiclass avx512_fp_binop_p_sae<bits<8> opc, string OpcodeStr, SDNode OpNodeRnd,
5930                                 X86SchedWriteSizes sched> {
5931  let Predicates = [HasFP16] in {
5932    defm PHZ : avx512_fp_sae_packed<opc, OpcodeStr, OpNodeRnd, sched.PH.ZMM,
5933                                    v32f16_info>,
5934                                    EVEX_V512, T_MAP5PS, EVEX_CD8<16, CD8VF>;
5935  }
5936  defm PSZ : avx512_fp_sae_packed<opc, OpcodeStr, OpNodeRnd, sched.PS.ZMM,
5937                                  v16f32_info>,
5938                                  EVEX_V512, PS, EVEX_CD8<32, CD8VF>;
5939  defm PDZ : avx512_fp_sae_packed<opc, OpcodeStr, OpNodeRnd, sched.PD.ZMM,
5940                                  v8f64_info>,
5941                                  EVEX_V512, PD, VEX_W,EVEX_CD8<64, CD8VF>;
5942}
5943
5944defm VADD : avx512_fp_binop_p<0x58, "vadd", any_fadd, fadd, HasAVX512,
5945                              SchedWriteFAddSizes, 1>,
5946            avx512_fp_binop_ph<0x58, "vadd", any_fadd, fadd, SchedWriteFAddSizes, 1>,
5947            avx512_fp_binop_p_round<0x58, "vadd", X86faddRnd, SchedWriteFAddSizes>;
5948defm VMUL : avx512_fp_binop_p<0x59, "vmul", any_fmul, fmul, HasAVX512,
5949                              SchedWriteFMulSizes, 1>,
5950            avx512_fp_binop_ph<0x59, "vmul", any_fmul, fmul, SchedWriteFMulSizes, 1>,
5951            avx512_fp_binop_p_round<0x59, "vmul", X86fmulRnd, SchedWriteFMulSizes>;
5952defm VSUB : avx512_fp_binop_p<0x5C, "vsub", any_fsub, fsub, HasAVX512,
5953                              SchedWriteFAddSizes>,
5954            avx512_fp_binop_ph<0x5C, "vsub", any_fsub, fsub, SchedWriteFAddSizes>,
5955            avx512_fp_binop_p_round<0x5C, "vsub", X86fsubRnd, SchedWriteFAddSizes>;
5956defm VDIV : avx512_fp_binop_p<0x5E, "vdiv", any_fdiv, fdiv, HasAVX512,
5957                              SchedWriteFDivSizes>,
5958            avx512_fp_binop_ph<0x5E, "vdiv", any_fdiv, fdiv, SchedWriteFDivSizes>,
5959            avx512_fp_binop_p_round<0x5E, "vdiv", X86fdivRnd, SchedWriteFDivSizes>;
5960defm VMIN : avx512_fp_binop_p<0x5D, "vmin", X86fmin, X86fmin, HasAVX512,
5961                              SchedWriteFCmpSizes, 0>,
5962            avx512_fp_binop_ph<0x5D, "vmin", X86fmin, X86fmin, SchedWriteFCmpSizes, 0>,
5963            avx512_fp_binop_p_sae<0x5D, "vmin", X86fminSAE, SchedWriteFCmpSizes>;
5964defm VMAX : avx512_fp_binop_p<0x5F, "vmax", X86fmax, X86fmax, HasAVX512,
5965                              SchedWriteFCmpSizes, 0>,
5966            avx512_fp_binop_ph<0x5F, "vmax", X86fmax, X86fmax, SchedWriteFCmpSizes, 0>,
5967            avx512_fp_binop_p_sae<0x5F, "vmax", X86fmaxSAE, SchedWriteFCmpSizes>;
5968let isCodeGenOnly = 1 in {
5969  defm VMINC : avx512_fp_binop_p<0x5D, "vmin", X86fminc, X86fminc, HasAVX512,
5970                                 SchedWriteFCmpSizes, 1>,
5971               avx512_fp_binop_ph<0x5D, "vmin", X86fminc, X86fminc,
5972                                 SchedWriteFCmpSizes, 1>;
5973  defm VMAXC : avx512_fp_binop_p<0x5F, "vmax", X86fmaxc, X86fmaxc, HasAVX512,
5974                                 SchedWriteFCmpSizes, 1>,
5975               avx512_fp_binop_ph<0x5F, "vmax", X86fmaxc, X86fmaxc,
5976                                 SchedWriteFCmpSizes, 1>;
5977}
5978let Uses = []<Register>, mayRaiseFPException = 0 in {
5979defm VAND  : avx512_fp_binop_p<0x54, "vand", null_frag, null_frag, HasDQI,
5980                               SchedWriteFLogicSizes, 1>;
5981defm VANDN : avx512_fp_binop_p<0x55, "vandn", null_frag, null_frag, HasDQI,
5982                               SchedWriteFLogicSizes, 0>;
5983defm VOR   : avx512_fp_binop_p<0x56, "vor", null_frag, null_frag, HasDQI,
5984                               SchedWriteFLogicSizes, 1>;
5985defm VXOR  : avx512_fp_binop_p<0x57, "vxor", null_frag, null_frag, HasDQI,
5986                               SchedWriteFLogicSizes, 1>;
5987}
5988
5989multiclass avx512_fp_scalef_p<bits<8> opc, string OpcodeStr, SDNode OpNode,
5990                              X86FoldableSchedWrite sched, X86VectorVTInfo _> {
5991  let ExeDomain = _.ExeDomain, Uses = [MXCSR], mayRaiseFPException = 1 in {
5992  defm rr: AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
5993                  (ins _.RC:$src1, _.RC:$src2), OpcodeStr#_.Suffix,
5994                  "$src2, $src1", "$src1, $src2",
5995                  (_.VT (OpNode _.RC:$src1, _.RC:$src2))>,
5996                  EVEX_4V, Sched<[sched]>;
5997  defm rm: AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
5998                  (ins _.RC:$src1, _.MemOp:$src2), OpcodeStr#_.Suffix,
5999                  "$src2, $src1", "$src1, $src2",
6000                  (OpNode _.RC:$src1, (_.LdFrag addr:$src2))>,
6001                  EVEX_4V, Sched<[sched.Folded, sched.ReadAfterFold]>;
6002  defm rmb: AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
6003                   (ins _.RC:$src1, _.ScalarMemOp:$src2), OpcodeStr#_.Suffix,
6004                   "${src2}"#_.BroadcastStr#", $src1",
6005                   "$src1, ${src2}"#_.BroadcastStr,
6006                   (OpNode  _.RC:$src1, (_.VT (_.BroadcastLdFrag addr:$src2)))>,
6007                   EVEX_4V, EVEX_B, Sched<[sched.Folded, sched.ReadAfterFold]>;
6008  }
6009}
6010
6011multiclass avx512_fp_scalef_scalar<bits<8> opc, string OpcodeStr, SDNode OpNode,
6012                                   X86FoldableSchedWrite sched, X86VectorVTInfo _> {
6013  let ExeDomain = _.ExeDomain, Uses = [MXCSR], mayRaiseFPException = 1 in {
6014  defm rr: AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
6015                  (ins _.RC:$src1, _.RC:$src2), OpcodeStr#_.Suffix,
6016                  "$src2, $src1", "$src1, $src2",
6017                  (_.VT (OpNode _.RC:$src1, _.RC:$src2))>,
6018                  Sched<[sched]>;
6019  defm rm: AVX512_maskable_scalar<opc, MRMSrcMem, _, (outs _.RC:$dst),
6020                  (ins _.RC:$src1, _.IntScalarMemOp:$src2), OpcodeStr#_.Suffix,
6021                  "$src2, $src1", "$src1, $src2",
6022                  (OpNode _.RC:$src1, (_.ScalarIntMemFrags addr:$src2))>,
6023                  Sched<[sched.Folded, sched.ReadAfterFold]>;
6024  }
6025}
6026
6027multiclass avx512_fp_scalef_all<bits<8> opc, bits<8> opcScaler, string OpcodeStr,
6028                                X86SchedWriteWidths sched> {
6029  let Predicates = [HasFP16] in {
6030    defm PHZ : avx512_fp_scalef_p<opc, OpcodeStr, X86scalef, sched.ZMM, v32f16_info>,
6031               avx512_fp_round_packed<opc, OpcodeStr, X86scalefRnd, sched.ZMM, v32f16_info>,
6032                                EVEX_V512, T_MAP6PD, EVEX_CD8<16, CD8VF>;
6033    defm SHZ : avx512_fp_scalef_scalar<opcScaler, OpcodeStr, X86scalefs, sched.Scl, f16x_info>,
6034               avx512_fp_scalar_round<opcScaler, OpcodeStr#"sh", f16x_info, X86scalefsRnd, sched.Scl>,
6035                             EVEX_4V, T_MAP6PD, EVEX_CD8<16, CD8VT1>;
6036  }
6037  defm PSZ : avx512_fp_scalef_p<opc, OpcodeStr, X86scalef, sched.ZMM, v16f32_info>,
6038             avx512_fp_round_packed<opc, OpcodeStr, X86scalefRnd, sched.ZMM, v16f32_info>,
6039                              EVEX_V512, EVEX_CD8<32, CD8VF>, T8PD;
6040  defm PDZ : avx512_fp_scalef_p<opc, OpcodeStr, X86scalef, sched.ZMM, v8f64_info>,
6041             avx512_fp_round_packed<opc, OpcodeStr, X86scalefRnd, sched.ZMM, v8f64_info>,
6042                              EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>, T8PD;
6043  defm SSZ : avx512_fp_scalef_scalar<opcScaler, OpcodeStr, X86scalefs, sched.Scl, f32x_info>,
6044             avx512_fp_scalar_round<opcScaler, OpcodeStr#"ss", f32x_info,
6045                                    X86scalefsRnd, sched.Scl>,
6046                                    EVEX_4V, VEX_LIG, EVEX_CD8<32, CD8VT1>, T8PD;
6047  defm SDZ : avx512_fp_scalef_scalar<opcScaler, OpcodeStr, X86scalefs, sched.Scl, f64x_info>,
6048             avx512_fp_scalar_round<opcScaler, OpcodeStr#"sd", f64x_info,
6049                                    X86scalefsRnd, sched.Scl>,
6050                                    EVEX_4V, VEX_LIG, EVEX_CD8<64, CD8VT1>, VEX_W, T8PD;
6051
6052  // Define only if AVX512VL feature is present.
6053  let Predicates = [HasVLX] in {
6054    defm PSZ128 : avx512_fp_scalef_p<opc, OpcodeStr, X86scalef, sched.XMM, v4f32x_info>,
6055                                   EVEX_V128, EVEX_CD8<32, CD8VF>, T8PD;
6056    defm PSZ256 : avx512_fp_scalef_p<opc, OpcodeStr, X86scalef, sched.YMM, v8f32x_info>,
6057                                   EVEX_V256, EVEX_CD8<32, CD8VF>, T8PD;
6058    defm PDZ128 : avx512_fp_scalef_p<opc, OpcodeStr, X86scalef, sched.XMM, v2f64x_info>,
6059                                   EVEX_V128, VEX_W, EVEX_CD8<64, CD8VF>, T8PD;
6060    defm PDZ256 : avx512_fp_scalef_p<opc, OpcodeStr, X86scalef, sched.YMM, v4f64x_info>,
6061                                   EVEX_V256, VEX_W, EVEX_CD8<64, CD8VF>, T8PD;
6062  }
6063
6064  let Predicates = [HasFP16, HasVLX] in {
6065    defm PHZ128 : avx512_fp_scalef_p<opc, OpcodeStr, X86scalef, sched.XMM, v8f16x_info>,
6066                                   EVEX_V128, EVEX_CD8<16, CD8VF>, T_MAP6PD;
6067    defm PHZ256 : avx512_fp_scalef_p<opc, OpcodeStr, X86scalef, sched.YMM, v16f16x_info>,
6068                                   EVEX_V256, EVEX_CD8<16, CD8VF>, T_MAP6PD;
6069  }
6070}
6071defm VSCALEF : avx512_fp_scalef_all<0x2C, 0x2D, "vscalef",
6072                                    SchedWriteFAdd>, NotEVEX2VEXConvertible;
6073
6074//===----------------------------------------------------------------------===//
6075// AVX-512  VPTESTM instructions
6076//===----------------------------------------------------------------------===//
6077
6078multiclass avx512_vptest<bits<8> opc, string OpcodeStr,
6079                         X86FoldableSchedWrite sched, X86VectorVTInfo _> {
6080  // NOTE: Patterns are omitted in favor of manual selection in X86ISelDAGToDAG.
6081  // There are just too many permutations due to commutability and bitcasts.
6082  let ExeDomain = _.ExeDomain, hasSideEffects = 0 in {
6083  defm rr : AVX512_maskable_cmp<opc, MRMSrcReg, _, (outs _.KRC:$dst),
6084                   (ins _.RC:$src1, _.RC:$src2), OpcodeStr,
6085                      "$src2, $src1", "$src1, $src2",
6086                   (null_frag), (null_frag), 1>,
6087                   EVEX_4V, Sched<[sched]>;
6088  let mayLoad = 1 in
6089  defm rm : AVX512_maskable_cmp<opc, MRMSrcMem, _, (outs _.KRC:$dst),
6090                   (ins _.RC:$src1, _.MemOp:$src2), OpcodeStr,
6091                       "$src2, $src1", "$src1, $src2",
6092                   (null_frag), (null_frag)>,
6093                   EVEX_4V, EVEX_CD8<_.EltSize, CD8VF>,
6094                   Sched<[sched.Folded, sched.ReadAfterFold]>;
6095  }
6096}
6097
6098multiclass avx512_vptest_mb<bits<8> opc, string OpcodeStr,
6099                            X86FoldableSchedWrite sched, X86VectorVTInfo _> {
6100  let ExeDomain = _.ExeDomain, mayLoad = 1, hasSideEffects = 0 in
6101  defm rmb : AVX512_maskable_cmp<opc, MRMSrcMem, _, (outs _.KRC:$dst),
6102                    (ins _.RC:$src1, _.ScalarMemOp:$src2), OpcodeStr,
6103                    "${src2}"#_.BroadcastStr#", $src1",
6104                    "$src1, ${src2}"#_.BroadcastStr,
6105                    (null_frag), (null_frag)>,
6106                    EVEX_B, EVEX_4V, EVEX_CD8<_.EltSize, CD8VF>,
6107                    Sched<[sched.Folded, sched.ReadAfterFold]>;
6108}
6109
6110multiclass avx512_vptest_dq_sizes<bits<8> opc, string OpcodeStr,
6111                                  X86SchedWriteWidths sched,
6112                                  AVX512VLVectorVTInfo _> {
6113  let Predicates  = [HasAVX512] in
6114  defm Z : avx512_vptest<opc, OpcodeStr, sched.ZMM, _.info512>,
6115           avx512_vptest_mb<opc, OpcodeStr, sched.ZMM, _.info512>, EVEX_V512;
6116
6117  let Predicates = [HasAVX512, HasVLX] in {
6118  defm Z256 : avx512_vptest<opc, OpcodeStr, sched.YMM, _.info256>,
6119              avx512_vptest_mb<opc, OpcodeStr, sched.YMM, _.info256>, EVEX_V256;
6120  defm Z128 : avx512_vptest<opc, OpcodeStr, sched.XMM, _.info128>,
6121              avx512_vptest_mb<opc, OpcodeStr, sched.XMM, _.info128>, EVEX_V128;
6122  }
6123}
6124
6125multiclass avx512_vptest_dq<bits<8> opc, string OpcodeStr,
6126                            X86SchedWriteWidths sched> {
6127  defm D : avx512_vptest_dq_sizes<opc, OpcodeStr#"d", sched,
6128                                 avx512vl_i32_info>;
6129  defm Q : avx512_vptest_dq_sizes<opc, OpcodeStr#"q", sched,
6130                                 avx512vl_i64_info>, VEX_W;
6131}
6132
6133multiclass avx512_vptest_wb<bits<8> opc, string OpcodeStr,
6134                            X86SchedWriteWidths sched> {
6135  let Predicates = [HasBWI] in {
6136  defm WZ:    avx512_vptest<opc, OpcodeStr#"w", sched.ZMM,
6137                            v32i16_info>, EVEX_V512, VEX_W;
6138  defm BZ:    avx512_vptest<opc, OpcodeStr#"b", sched.ZMM,
6139                            v64i8_info>, EVEX_V512;
6140  }
6141
6142  let Predicates = [HasVLX, HasBWI] in {
6143  defm WZ256: avx512_vptest<opc, OpcodeStr#"w", sched.YMM,
6144                            v16i16x_info>, EVEX_V256, VEX_W;
6145  defm WZ128: avx512_vptest<opc, OpcodeStr#"w", sched.XMM,
6146                            v8i16x_info>, EVEX_V128, VEX_W;
6147  defm BZ256: avx512_vptest<opc, OpcodeStr#"b", sched.YMM,
6148                            v32i8x_info>, EVEX_V256;
6149  defm BZ128: avx512_vptest<opc, OpcodeStr#"b", sched.XMM,
6150                            v16i8x_info>, EVEX_V128;
6151  }
6152}
6153
6154multiclass avx512_vptest_all_forms<bits<8> opc_wb, bits<8> opc_dq, string OpcodeStr,
6155                                   X86SchedWriteWidths sched> :
6156  avx512_vptest_wb<opc_wb, OpcodeStr, sched>,
6157  avx512_vptest_dq<opc_dq, OpcodeStr, sched>;
6158
6159defm VPTESTM   : avx512_vptest_all_forms<0x26, 0x27, "vptestm",
6160                                         SchedWriteVecLogic>, T8PD;
6161defm VPTESTNM  : avx512_vptest_all_forms<0x26, 0x27, "vptestnm",
6162                                         SchedWriteVecLogic>, T8XS;
6163
6164//===----------------------------------------------------------------------===//
6165// AVX-512  Shift instructions
6166//===----------------------------------------------------------------------===//
6167
6168multiclass avx512_shift_rmi<bits<8> opc, Format ImmFormR, Format ImmFormM,
6169                            string OpcodeStr, SDNode OpNode,
6170                            X86FoldableSchedWrite sched, X86VectorVTInfo _> {
6171  let ExeDomain = _.ExeDomain in {
6172  defm ri : AVX512_maskable<opc, ImmFormR, _, (outs _.RC:$dst),
6173                   (ins _.RC:$src1, u8imm:$src2), OpcodeStr,
6174                      "$src2, $src1", "$src1, $src2",
6175                   (_.VT (OpNode _.RC:$src1, (i8 timm:$src2)))>,
6176                   Sched<[sched]>;
6177  defm mi : AVX512_maskable<opc, ImmFormM, _, (outs _.RC:$dst),
6178                   (ins _.MemOp:$src1, u8imm:$src2), OpcodeStr,
6179                       "$src2, $src1", "$src1, $src2",
6180                   (_.VT (OpNode (_.VT (_.LdFrag addr:$src1)),
6181                          (i8 timm:$src2)))>,
6182                   Sched<[sched.Folded]>;
6183  }
6184}
6185
6186multiclass avx512_shift_rmbi<bits<8> opc, Format ImmFormM,
6187                             string OpcodeStr, SDNode OpNode,
6188                             X86FoldableSchedWrite sched, X86VectorVTInfo _> {
6189  let ExeDomain = _.ExeDomain in
6190  defm mbi : AVX512_maskable<opc, ImmFormM, _, (outs _.RC:$dst),
6191                   (ins _.ScalarMemOp:$src1, u8imm:$src2), OpcodeStr,
6192      "$src2, ${src1}"#_.BroadcastStr, "${src1}"#_.BroadcastStr#", $src2",
6193     (_.VT (OpNode (_.BroadcastLdFrag addr:$src1), (i8 timm:$src2)))>,
6194     EVEX_B, Sched<[sched.Folded]>;
6195}
6196
6197multiclass avx512_shift_rrm<bits<8> opc, string OpcodeStr, SDNode OpNode,
6198                            X86FoldableSchedWrite sched, ValueType SrcVT,
6199                            X86VectorVTInfo _> {
6200   // src2 is always 128-bit
6201  let ExeDomain = _.ExeDomain in {
6202  defm rr : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
6203                   (ins _.RC:$src1, VR128X:$src2), OpcodeStr,
6204                      "$src2, $src1", "$src1, $src2",
6205                   (_.VT (OpNode _.RC:$src1, (SrcVT VR128X:$src2)))>,
6206                   AVX512BIBase, EVEX_4V, Sched<[sched]>;
6207  defm rm : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
6208                   (ins _.RC:$src1, i128mem:$src2), OpcodeStr,
6209                       "$src2, $src1", "$src1, $src2",
6210                   (_.VT (OpNode _.RC:$src1, (SrcVT (load addr:$src2))))>,
6211                   AVX512BIBase,
6212                   EVEX_4V, Sched<[sched.Folded, sched.ReadAfterFold]>;
6213  }
6214}
6215
6216multiclass avx512_shift_sizes<bits<8> opc, string OpcodeStr, SDNode OpNode,
6217                              X86SchedWriteWidths sched, ValueType SrcVT,
6218                              AVX512VLVectorVTInfo VTInfo,
6219                              Predicate prd> {
6220  let Predicates = [prd] in
6221  defm Z    : avx512_shift_rrm<opc, OpcodeStr, OpNode, sched.ZMM, SrcVT,
6222                               VTInfo.info512>, EVEX_V512,
6223                               EVEX_CD8<VTInfo.info512.EltSize, CD8VQ> ;
6224  let Predicates = [prd, HasVLX] in {
6225  defm Z256 : avx512_shift_rrm<opc, OpcodeStr, OpNode, sched.YMM, SrcVT,
6226                               VTInfo.info256>, EVEX_V256,
6227                               EVEX_CD8<VTInfo.info256.EltSize, CD8VH>;
6228  defm Z128 : avx512_shift_rrm<opc, OpcodeStr, OpNode, sched.XMM, SrcVT,
6229                               VTInfo.info128>, EVEX_V128,
6230                               EVEX_CD8<VTInfo.info128.EltSize, CD8VF>;
6231  }
6232}
6233
6234multiclass avx512_shift_types<bits<8> opcd, bits<8> opcq, bits<8> opcw,
6235                              string OpcodeStr, SDNode OpNode,
6236                              X86SchedWriteWidths sched,
6237                              bit NotEVEX2VEXConvertibleQ = 0> {
6238  defm D : avx512_shift_sizes<opcd, OpcodeStr#"d", OpNode, sched, v4i32,
6239                              avx512vl_i32_info, HasAVX512>;
6240  let notEVEX2VEXConvertible = NotEVEX2VEXConvertibleQ in
6241  defm Q : avx512_shift_sizes<opcq, OpcodeStr#"q", OpNode, sched, v2i64,
6242                              avx512vl_i64_info, HasAVX512>, VEX_W;
6243  defm W : avx512_shift_sizes<opcw, OpcodeStr#"w", OpNode, sched, v8i16,
6244                              avx512vl_i16_info, HasBWI>;
6245}
6246
6247multiclass avx512_shift_rmi_sizes<bits<8> opc, Format ImmFormR, Format ImmFormM,
6248                                  string OpcodeStr, SDNode OpNode,
6249                                  X86SchedWriteWidths sched,
6250                                  AVX512VLVectorVTInfo VTInfo> {
6251  let Predicates = [HasAVX512] in
6252  defm Z:    avx512_shift_rmi<opc, ImmFormR, ImmFormM, OpcodeStr, OpNode,
6253                              sched.ZMM, VTInfo.info512>,
6254             avx512_shift_rmbi<opc, ImmFormM, OpcodeStr, OpNode, sched.ZMM,
6255                               VTInfo.info512>, EVEX_V512;
6256  let Predicates = [HasAVX512, HasVLX] in {
6257  defm Z256: avx512_shift_rmi<opc, ImmFormR, ImmFormM, OpcodeStr, OpNode,
6258                              sched.YMM, VTInfo.info256>,
6259             avx512_shift_rmbi<opc, ImmFormM, OpcodeStr, OpNode, sched.YMM,
6260                               VTInfo.info256>, EVEX_V256;
6261  defm Z128: avx512_shift_rmi<opc, ImmFormR, ImmFormM, OpcodeStr, OpNode,
6262                              sched.XMM, VTInfo.info128>,
6263             avx512_shift_rmbi<opc, ImmFormM, OpcodeStr, OpNode, sched.XMM,
6264                               VTInfo.info128>, EVEX_V128;
6265  }
6266}
6267
6268multiclass avx512_shift_rmi_w<bits<8> opcw, Format ImmFormR, Format ImmFormM,
6269                              string OpcodeStr, SDNode OpNode,
6270                              X86SchedWriteWidths sched> {
6271  let Predicates = [HasBWI] in
6272  defm WZ:    avx512_shift_rmi<opcw, ImmFormR, ImmFormM, OpcodeStr, OpNode,
6273                               sched.ZMM, v32i16_info>, EVEX_V512, VEX_WIG;
6274  let Predicates = [HasVLX, HasBWI] in {
6275  defm WZ256: avx512_shift_rmi<opcw, ImmFormR, ImmFormM, OpcodeStr, OpNode,
6276                               sched.YMM, v16i16x_info>, EVEX_V256, VEX_WIG;
6277  defm WZ128: avx512_shift_rmi<opcw, ImmFormR, ImmFormM, OpcodeStr, OpNode,
6278                               sched.XMM, v8i16x_info>, EVEX_V128, VEX_WIG;
6279  }
6280}
6281
6282multiclass avx512_shift_rmi_dq<bits<8> opcd, bits<8> opcq,
6283                               Format ImmFormR, Format ImmFormM,
6284                               string OpcodeStr, SDNode OpNode,
6285                               X86SchedWriteWidths sched,
6286                               bit NotEVEX2VEXConvertibleQ = 0> {
6287  defm D: avx512_shift_rmi_sizes<opcd, ImmFormR, ImmFormM, OpcodeStr#"d", OpNode,
6288                                 sched, avx512vl_i32_info>, EVEX_CD8<32, CD8VF>;
6289  let notEVEX2VEXConvertible = NotEVEX2VEXConvertibleQ in
6290  defm Q: avx512_shift_rmi_sizes<opcq, ImmFormR, ImmFormM, OpcodeStr#"q", OpNode,
6291                                 sched, avx512vl_i64_info>, EVEX_CD8<64, CD8VF>, VEX_W;
6292}
6293
6294defm VPSRL : avx512_shift_rmi_dq<0x72, 0x73, MRM2r, MRM2m, "vpsrl", X86vsrli,
6295                                 SchedWriteVecShiftImm>,
6296             avx512_shift_rmi_w<0x71, MRM2r, MRM2m, "vpsrlw", X86vsrli,
6297                                SchedWriteVecShiftImm>, AVX512BIi8Base, EVEX_4V;
6298
6299defm VPSLL : avx512_shift_rmi_dq<0x72, 0x73, MRM6r, MRM6m, "vpsll", X86vshli,
6300                                 SchedWriteVecShiftImm>,
6301             avx512_shift_rmi_w<0x71, MRM6r, MRM6m, "vpsllw", X86vshli,
6302                                SchedWriteVecShiftImm>, AVX512BIi8Base, EVEX_4V;
6303
6304defm VPSRA : avx512_shift_rmi_dq<0x72, 0x72, MRM4r, MRM4m, "vpsra", X86vsrai,
6305                                 SchedWriteVecShiftImm, 1>,
6306             avx512_shift_rmi_w<0x71, MRM4r, MRM4m, "vpsraw", X86vsrai,
6307                                SchedWriteVecShiftImm>, AVX512BIi8Base, EVEX_4V;
6308
6309defm VPROR : avx512_shift_rmi_dq<0x72, 0x72, MRM0r, MRM0m, "vpror", X86vrotri,
6310                                 SchedWriteVecShiftImm>, AVX512BIi8Base, EVEX_4V;
6311defm VPROL : avx512_shift_rmi_dq<0x72, 0x72, MRM1r, MRM1m, "vprol", X86vrotli,
6312                                 SchedWriteVecShiftImm>, AVX512BIi8Base, EVEX_4V;
6313
6314defm VPSLL : avx512_shift_types<0xF2, 0xF3, 0xF1, "vpsll", X86vshl,
6315                                SchedWriteVecShift>;
6316defm VPSRA : avx512_shift_types<0xE2, 0xE2, 0xE1, "vpsra", X86vsra,
6317                                SchedWriteVecShift, 1>;
6318defm VPSRL : avx512_shift_types<0xD2, 0xD3, 0xD1, "vpsrl", X86vsrl,
6319                                SchedWriteVecShift>;
6320
6321// Use 512bit VPSRA/VPSRAI version to implement v2i64/v4i64 in case NoVLX.
6322let Predicates = [HasAVX512, NoVLX] in {
6323  def : Pat<(v4i64 (X86vsra (v4i64 VR256X:$src1), (v2i64 VR128X:$src2))),
6324            (EXTRACT_SUBREG (v8i64
6325              (VPSRAQZrr
6326                (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src1, sub_ymm)),
6327                 VR128X:$src2)), sub_ymm)>;
6328
6329  def : Pat<(v2i64 (X86vsra (v2i64 VR128X:$src1), (v2i64 VR128X:$src2))),
6330            (EXTRACT_SUBREG (v8i64
6331              (VPSRAQZrr
6332                (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR128X:$src1, sub_xmm)),
6333                 VR128X:$src2)), sub_xmm)>;
6334
6335  def : Pat<(v4i64 (X86vsrai (v4i64 VR256X:$src1), (i8 timm:$src2))),
6336            (EXTRACT_SUBREG (v8i64
6337              (VPSRAQZri
6338                (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src1, sub_ymm)),
6339                 timm:$src2)), sub_ymm)>;
6340
6341  def : Pat<(v2i64 (X86vsrai (v2i64 VR128X:$src1), (i8 timm:$src2))),
6342            (EXTRACT_SUBREG (v8i64
6343              (VPSRAQZri
6344                (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR128X:$src1, sub_xmm)),
6345                 timm:$src2)), sub_xmm)>;
6346}
6347
6348//===-------------------------------------------------------------------===//
6349// Variable Bit Shifts
6350//===-------------------------------------------------------------------===//
6351
6352multiclass avx512_var_shift<bits<8> opc, string OpcodeStr, SDNode OpNode,
6353                            X86FoldableSchedWrite sched, X86VectorVTInfo _> {
6354  let ExeDomain = _.ExeDomain in {
6355  defm rr : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
6356                   (ins _.RC:$src1, _.RC:$src2), OpcodeStr,
6357                      "$src2, $src1", "$src1, $src2",
6358                   (_.VT (OpNode _.RC:$src1, (_.VT _.RC:$src2)))>,
6359                   AVX5128IBase, EVEX_4V, Sched<[sched]>;
6360  defm rm : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
6361                   (ins _.RC:$src1, _.MemOp:$src2), OpcodeStr,
6362                       "$src2, $src1", "$src1, $src2",
6363                   (_.VT (OpNode _.RC:$src1,
6364                   (_.VT (_.LdFrag addr:$src2))))>,
6365                   AVX5128IBase, EVEX_4V, EVEX_CD8<_.EltSize, CD8VF>,
6366                   Sched<[sched.Folded, sched.ReadAfterFold]>;
6367  }
6368}
6369
6370multiclass avx512_var_shift_mb<bits<8> opc, string OpcodeStr, SDNode OpNode,
6371                               X86FoldableSchedWrite sched, X86VectorVTInfo _> {
6372  let ExeDomain = _.ExeDomain in
6373  defm rmb : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
6374                    (ins _.RC:$src1, _.ScalarMemOp:$src2), OpcodeStr,
6375                    "${src2}"#_.BroadcastStr#", $src1",
6376                    "$src1, ${src2}"#_.BroadcastStr,
6377                    (_.VT (OpNode _.RC:$src1, (_.VT (_.BroadcastLdFrag addr:$src2))))>,
6378                    AVX5128IBase, EVEX_B, EVEX_4V, EVEX_CD8<_.EltSize, CD8VF>,
6379                    Sched<[sched.Folded, sched.ReadAfterFold]>;
6380}
6381
6382multiclass avx512_var_shift_sizes<bits<8> opc, string OpcodeStr, SDNode OpNode,
6383                                  X86SchedWriteWidths sched, AVX512VLVectorVTInfo _> {
6384  let Predicates  = [HasAVX512] in
6385  defm Z : avx512_var_shift<opc, OpcodeStr, OpNode, sched.ZMM, _.info512>,
6386           avx512_var_shift_mb<opc, OpcodeStr, OpNode, sched.ZMM, _.info512>, EVEX_V512;
6387
6388  let Predicates = [HasAVX512, HasVLX] in {
6389  defm Z256 : avx512_var_shift<opc, OpcodeStr, OpNode, sched.YMM, _.info256>,
6390              avx512_var_shift_mb<opc, OpcodeStr, OpNode, sched.YMM, _.info256>, EVEX_V256;
6391  defm Z128 : avx512_var_shift<opc, OpcodeStr, OpNode, sched.XMM, _.info128>,
6392              avx512_var_shift_mb<opc, OpcodeStr, OpNode, sched.XMM, _.info128>, EVEX_V128;
6393  }
6394}
6395
6396multiclass avx512_var_shift_types<bits<8> opc, string OpcodeStr,
6397                                  SDNode OpNode, X86SchedWriteWidths sched> {
6398  defm D : avx512_var_shift_sizes<opc, OpcodeStr#"d", OpNode, sched,
6399                                 avx512vl_i32_info>;
6400  defm Q : avx512_var_shift_sizes<opc, OpcodeStr#"q", OpNode, sched,
6401                                 avx512vl_i64_info>, VEX_W;
6402}
6403
6404// Use 512bit version to implement 128/256 bit in case NoVLX.
6405multiclass avx512_var_shift_lowering<AVX512VLVectorVTInfo _, string OpcodeStr,
6406                                     SDNode OpNode, list<Predicate> p> {
6407  let Predicates = p in {
6408  def : Pat<(_.info256.VT (OpNode (_.info256.VT _.info256.RC:$src1),
6409                                  (_.info256.VT _.info256.RC:$src2))),
6410            (EXTRACT_SUBREG
6411                (!cast<Instruction>(OpcodeStr#"Zrr")
6412                    (INSERT_SUBREG (_.info512.VT (IMPLICIT_DEF)), VR256X:$src1, sub_ymm),
6413                    (INSERT_SUBREG (_.info512.VT (IMPLICIT_DEF)), VR256X:$src2, sub_ymm)),
6414             sub_ymm)>;
6415
6416  def : Pat<(_.info128.VT (OpNode (_.info128.VT _.info128.RC:$src1),
6417                                  (_.info128.VT _.info128.RC:$src2))),
6418            (EXTRACT_SUBREG
6419                (!cast<Instruction>(OpcodeStr#"Zrr")
6420                    (INSERT_SUBREG (_.info512.VT (IMPLICIT_DEF)), VR128X:$src1, sub_xmm),
6421                    (INSERT_SUBREG (_.info512.VT (IMPLICIT_DEF)), VR128X:$src2, sub_xmm)),
6422             sub_xmm)>;
6423  }
6424}
6425multiclass avx512_var_shift_w<bits<8> opc, string OpcodeStr,
6426                              SDNode OpNode, X86SchedWriteWidths sched> {
6427  let Predicates = [HasBWI] in
6428  defm WZ:    avx512_var_shift<opc, OpcodeStr, OpNode, sched.ZMM, v32i16_info>,
6429              EVEX_V512, VEX_W;
6430  let Predicates = [HasVLX, HasBWI] in {
6431
6432  defm WZ256: avx512_var_shift<opc, OpcodeStr, OpNode, sched.YMM, v16i16x_info>,
6433              EVEX_V256, VEX_W;
6434  defm WZ128: avx512_var_shift<opc, OpcodeStr, OpNode, sched.XMM, v8i16x_info>,
6435              EVEX_V128, VEX_W;
6436  }
6437}
6438
6439defm VPSLLV : avx512_var_shift_types<0x47, "vpsllv", X86vshlv, SchedWriteVarVecShift>,
6440              avx512_var_shift_w<0x12, "vpsllvw", X86vshlv, SchedWriteVarVecShift>;
6441
6442defm VPSRAV : avx512_var_shift_types<0x46, "vpsrav", X86vsrav, SchedWriteVarVecShift>,
6443              avx512_var_shift_w<0x11, "vpsravw", X86vsrav, SchedWriteVarVecShift>;
6444
6445defm VPSRLV : avx512_var_shift_types<0x45, "vpsrlv", X86vsrlv, SchedWriteVarVecShift>,
6446              avx512_var_shift_w<0x10, "vpsrlvw", X86vsrlv, SchedWriteVarVecShift>;
6447
6448defm VPRORV : avx512_var_shift_types<0x14, "vprorv", rotr, SchedWriteVarVecShift>;
6449defm VPROLV : avx512_var_shift_types<0x15, "vprolv", rotl, SchedWriteVarVecShift>;
6450
6451defm : avx512_var_shift_lowering<avx512vl_i64_info, "VPSRAVQ", X86vsrav, [HasAVX512, NoVLX]>;
6452defm : avx512_var_shift_lowering<avx512vl_i16_info, "VPSLLVW", X86vshlv, [HasBWI, NoVLX]>;
6453defm : avx512_var_shift_lowering<avx512vl_i16_info, "VPSRAVW", X86vsrav, [HasBWI, NoVLX]>;
6454defm : avx512_var_shift_lowering<avx512vl_i16_info, "VPSRLVW", X86vsrlv, [HasBWI, NoVLX]>;
6455
6456
6457// Use 512bit VPROL/VPROLI version to implement v2i64/v4i64 + v4i32/v8i32 in case NoVLX.
6458let Predicates = [HasAVX512, NoVLX] in {
6459  def : Pat<(v2i64 (rotl (v2i64 VR128X:$src1), (v2i64 VR128X:$src2))),
6460            (EXTRACT_SUBREG (v8i64
6461              (VPROLVQZrr
6462                (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR128X:$src1, sub_xmm)),
6463                (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR128X:$src2, sub_xmm)))),
6464                       sub_xmm)>;
6465  def : Pat<(v4i64 (rotl (v4i64 VR256X:$src1), (v4i64 VR256X:$src2))),
6466            (EXTRACT_SUBREG (v8i64
6467              (VPROLVQZrr
6468                (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src1, sub_ymm)),
6469                (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src2, sub_ymm)))),
6470                       sub_ymm)>;
6471
6472  def : Pat<(v4i32 (rotl (v4i32 VR128X:$src1), (v4i32 VR128X:$src2))),
6473            (EXTRACT_SUBREG (v16i32
6474              (VPROLVDZrr
6475                (v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR128X:$src1, sub_xmm)),
6476                (v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR128X:$src2, sub_xmm)))),
6477                        sub_xmm)>;
6478  def : Pat<(v8i32 (rotl (v8i32 VR256X:$src1), (v8i32 VR256X:$src2))),
6479            (EXTRACT_SUBREG (v16i32
6480              (VPROLVDZrr
6481                (v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src1, sub_ymm)),
6482                (v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src2, sub_ymm)))),
6483                        sub_ymm)>;
6484
6485  def : Pat<(v2i64 (X86vrotli (v2i64 VR128X:$src1), (i8 timm:$src2))),
6486            (EXTRACT_SUBREG (v8i64
6487              (VPROLQZri
6488                (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR128X:$src1, sub_xmm)),
6489                        timm:$src2)), sub_xmm)>;
6490  def : Pat<(v4i64 (X86vrotli (v4i64 VR256X:$src1), (i8 timm:$src2))),
6491            (EXTRACT_SUBREG (v8i64
6492              (VPROLQZri
6493                (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src1, sub_ymm)),
6494                       timm:$src2)), sub_ymm)>;
6495
6496  def : Pat<(v4i32 (X86vrotli (v4i32 VR128X:$src1), (i8 timm:$src2))),
6497            (EXTRACT_SUBREG (v16i32
6498              (VPROLDZri
6499                (v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR128X:$src1, sub_xmm)),
6500                        timm:$src2)), sub_xmm)>;
6501  def : Pat<(v8i32 (X86vrotli (v8i32 VR256X:$src1), (i8 timm:$src2))),
6502            (EXTRACT_SUBREG (v16i32
6503              (VPROLDZri
6504                (v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src1, sub_ymm)),
6505                        timm:$src2)), sub_ymm)>;
6506}
6507
6508// Use 512bit VPROR/VPRORI version to implement v2i64/v4i64 + v4i32/v8i32 in case NoVLX.
6509let Predicates = [HasAVX512, NoVLX] in {
6510  def : Pat<(v2i64 (rotr (v2i64 VR128X:$src1), (v2i64 VR128X:$src2))),
6511            (EXTRACT_SUBREG (v8i64
6512              (VPRORVQZrr
6513                (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR128X:$src1, sub_xmm)),
6514                (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR128X:$src2, sub_xmm)))),
6515                       sub_xmm)>;
6516  def : Pat<(v4i64 (rotr (v4i64 VR256X:$src1), (v4i64 VR256X:$src2))),
6517            (EXTRACT_SUBREG (v8i64
6518              (VPRORVQZrr
6519                (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src1, sub_ymm)),
6520                (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src2, sub_ymm)))),
6521                       sub_ymm)>;
6522
6523  def : Pat<(v4i32 (rotr (v4i32 VR128X:$src1), (v4i32 VR128X:$src2))),
6524            (EXTRACT_SUBREG (v16i32
6525              (VPRORVDZrr
6526                (v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR128X:$src1, sub_xmm)),
6527                (v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR128X:$src2, sub_xmm)))),
6528                        sub_xmm)>;
6529  def : Pat<(v8i32 (rotr (v8i32 VR256X:$src1), (v8i32 VR256X:$src2))),
6530            (EXTRACT_SUBREG (v16i32
6531              (VPRORVDZrr
6532                (v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src1, sub_ymm)),
6533                (v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src2, sub_ymm)))),
6534                        sub_ymm)>;
6535
6536  def : Pat<(v2i64 (X86vrotri (v2i64 VR128X:$src1), (i8 timm:$src2))),
6537            (EXTRACT_SUBREG (v8i64
6538              (VPRORQZri
6539                (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR128X:$src1, sub_xmm)),
6540                        timm:$src2)), sub_xmm)>;
6541  def : Pat<(v4i64 (X86vrotri (v4i64 VR256X:$src1), (i8 timm:$src2))),
6542            (EXTRACT_SUBREG (v8i64
6543              (VPRORQZri
6544                (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src1, sub_ymm)),
6545                       timm:$src2)), sub_ymm)>;
6546
6547  def : Pat<(v4i32 (X86vrotri (v4i32 VR128X:$src1), (i8 timm:$src2))),
6548            (EXTRACT_SUBREG (v16i32
6549              (VPRORDZri
6550                (v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR128X:$src1, sub_xmm)),
6551                        timm:$src2)), sub_xmm)>;
6552  def : Pat<(v8i32 (X86vrotri (v8i32 VR256X:$src1), (i8 timm:$src2))),
6553            (EXTRACT_SUBREG (v16i32
6554              (VPRORDZri
6555                (v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src1, sub_ymm)),
6556                        timm:$src2)), sub_ymm)>;
6557}
6558
6559//===-------------------------------------------------------------------===//
6560// 1-src variable permutation VPERMW/D/Q
6561//===-------------------------------------------------------------------===//
6562
6563multiclass avx512_vperm_dq_sizes<bits<8> opc, string OpcodeStr, SDNode OpNode,
6564                                 X86FoldableSchedWrite sched, AVX512VLVectorVTInfo _> {
6565  let Predicates  = [HasAVX512] in
6566  defm Z : avx512_var_shift<opc, OpcodeStr, OpNode, sched, _.info512>,
6567           avx512_var_shift_mb<opc, OpcodeStr, OpNode, sched, _.info512>, EVEX_V512;
6568
6569  let Predicates = [HasAVX512, HasVLX] in
6570  defm Z256 : avx512_var_shift<opc, OpcodeStr, OpNode, sched, _.info256>,
6571              avx512_var_shift_mb<opc, OpcodeStr, OpNode, sched, _.info256>, EVEX_V256;
6572}
6573
6574multiclass avx512_vpermi_dq_sizes<bits<8> opc, Format ImmFormR, Format ImmFormM,
6575                                 string OpcodeStr, SDNode OpNode,
6576                                 X86FoldableSchedWrite sched, AVX512VLVectorVTInfo VTInfo> {
6577  let Predicates = [HasAVX512] in
6578  defm Z:    avx512_shift_rmi<opc, ImmFormR, ImmFormM, OpcodeStr, OpNode,
6579                              sched, VTInfo.info512>,
6580             avx512_shift_rmbi<opc, ImmFormM, OpcodeStr, OpNode,
6581                               sched, VTInfo.info512>, EVEX_V512;
6582  let Predicates = [HasAVX512, HasVLX] in
6583  defm Z256: avx512_shift_rmi<opc, ImmFormR, ImmFormM, OpcodeStr, OpNode,
6584                              sched, VTInfo.info256>,
6585             avx512_shift_rmbi<opc, ImmFormM, OpcodeStr, OpNode,
6586                               sched, VTInfo.info256>, EVEX_V256;
6587}
6588
6589multiclass avx512_vperm_bw<bits<8> opc, string OpcodeStr,
6590                              Predicate prd, SDNode OpNode,
6591                              X86FoldableSchedWrite sched, AVX512VLVectorVTInfo _> {
6592  let Predicates = [prd] in
6593  defm Z:    avx512_var_shift<opc, OpcodeStr, OpNode, sched, _.info512>,
6594              EVEX_V512 ;
6595  let Predicates = [HasVLX, prd] in {
6596  defm Z256: avx512_var_shift<opc, OpcodeStr, OpNode, sched, _.info256>,
6597              EVEX_V256 ;
6598  defm Z128: avx512_var_shift<opc, OpcodeStr, OpNode, sched, _.info128>,
6599              EVEX_V128 ;
6600  }
6601}
6602
6603defm VPERMW  : avx512_vperm_bw<0x8D, "vpermw", HasBWI, X86VPermv,
6604                               WriteVarShuffle256, avx512vl_i16_info>, VEX_W;
6605defm VPERMB  : avx512_vperm_bw<0x8D, "vpermb", HasVBMI, X86VPermv,
6606                               WriteVarShuffle256, avx512vl_i8_info>;
6607
6608defm VPERMD : avx512_vperm_dq_sizes<0x36, "vpermd", X86VPermv,
6609                                    WriteVarShuffle256, avx512vl_i32_info>;
6610defm VPERMQ : avx512_vperm_dq_sizes<0x36, "vpermq", X86VPermv,
6611                                    WriteVarShuffle256, avx512vl_i64_info>, VEX_W;
6612defm VPERMPS : avx512_vperm_dq_sizes<0x16, "vpermps", X86VPermv,
6613                                     WriteFVarShuffle256, avx512vl_f32_info>;
6614defm VPERMPD : avx512_vperm_dq_sizes<0x16, "vpermpd", X86VPermv,
6615                                     WriteFVarShuffle256, avx512vl_f64_info>, VEX_W;
6616
6617defm VPERMQ : avx512_vpermi_dq_sizes<0x00, MRMSrcReg, MRMSrcMem, "vpermq",
6618                             X86VPermi, WriteShuffle256, avx512vl_i64_info>,
6619                             EVEX, AVX512AIi8Base, EVEX_CD8<64, CD8VF>, VEX_W;
6620defm VPERMPD : avx512_vpermi_dq_sizes<0x01, MRMSrcReg, MRMSrcMem, "vpermpd",
6621                             X86VPermi, WriteFShuffle256, avx512vl_f64_info>,
6622                             EVEX, AVX512AIi8Base, EVEX_CD8<64, CD8VF>, VEX_W;
6623
6624//===----------------------------------------------------------------------===//
6625// AVX-512 - VPERMIL
6626//===----------------------------------------------------------------------===//
6627
6628multiclass avx512_permil_vec<bits<8> OpcVar, string OpcodeStr, SDNode OpNode,
6629                             X86FoldableSchedWrite sched, X86VectorVTInfo _,
6630                             X86VectorVTInfo Ctrl> {
6631  defm rr: AVX512_maskable<OpcVar, MRMSrcReg, _, (outs _.RC:$dst),
6632                  (ins _.RC:$src1, Ctrl.RC:$src2), OpcodeStr,
6633                  "$src2, $src1", "$src1, $src2",
6634                  (_.VT (OpNode _.RC:$src1,
6635                               (Ctrl.VT Ctrl.RC:$src2)))>,
6636                  T8PD, EVEX_4V, Sched<[sched]>;
6637  defm rm: AVX512_maskable<OpcVar, MRMSrcMem, _, (outs _.RC:$dst),
6638                  (ins _.RC:$src1, Ctrl.MemOp:$src2), OpcodeStr,
6639                  "$src2, $src1", "$src1, $src2",
6640                  (_.VT (OpNode
6641                           _.RC:$src1,
6642                           (Ctrl.VT (Ctrl.LdFrag addr:$src2))))>,
6643                  T8PD, EVEX_4V, EVEX_CD8<_.EltSize, CD8VF>,
6644                  Sched<[sched.Folded, sched.ReadAfterFold]>;
6645  defm rmb: AVX512_maskable<OpcVar, MRMSrcMem, _, (outs _.RC:$dst),
6646                   (ins _.RC:$src1, _.ScalarMemOp:$src2), OpcodeStr,
6647                   "${src2}"#_.BroadcastStr#", $src1",
6648                   "$src1, ${src2}"#_.BroadcastStr,
6649                   (_.VT (OpNode
6650                            _.RC:$src1,
6651                            (Ctrl.VT (Ctrl.BroadcastLdFrag addr:$src2))))>,
6652                   T8PD, EVEX_4V, EVEX_B, EVEX_CD8<_.EltSize, CD8VF>,
6653                   Sched<[sched.Folded, sched.ReadAfterFold]>;
6654}
6655
6656multiclass avx512_permil_vec_common<string OpcodeStr, bits<8> OpcVar,
6657                                    X86SchedWriteWidths sched,
6658                                    AVX512VLVectorVTInfo _,
6659                                    AVX512VLVectorVTInfo Ctrl> {
6660  let Predicates = [HasAVX512] in {
6661    defm Z    : avx512_permil_vec<OpcVar, OpcodeStr, X86VPermilpv, sched.ZMM,
6662                                  _.info512, Ctrl.info512>, EVEX_V512;
6663  }
6664  let Predicates = [HasAVX512, HasVLX] in {
6665    defm Z128 : avx512_permil_vec<OpcVar, OpcodeStr, X86VPermilpv, sched.XMM,
6666                                  _.info128, Ctrl.info128>, EVEX_V128;
6667    defm Z256 : avx512_permil_vec<OpcVar, OpcodeStr, X86VPermilpv, sched.YMM,
6668                                  _.info256, Ctrl.info256>, EVEX_V256;
6669  }
6670}
6671
6672multiclass avx512_permil<string OpcodeStr, bits<8> OpcImm, bits<8> OpcVar,
6673                         AVX512VLVectorVTInfo _, AVX512VLVectorVTInfo Ctrl>{
6674  defm NAME: avx512_permil_vec_common<OpcodeStr, OpcVar, SchedWriteFVarShuffle,
6675                                      _, Ctrl>;
6676  defm NAME: avx512_shift_rmi_sizes<OpcImm, MRMSrcReg, MRMSrcMem, OpcodeStr,
6677                                    X86VPermilpi, SchedWriteFShuffle, _>,
6678                    EVEX, AVX512AIi8Base, EVEX_CD8<_.info128.EltSize, CD8VF>;
6679}
6680
6681let ExeDomain = SSEPackedSingle in
6682defm VPERMILPS : avx512_permil<"vpermilps", 0x04, 0x0C, avx512vl_f32_info,
6683                               avx512vl_i32_info>;
6684let ExeDomain = SSEPackedDouble in
6685defm VPERMILPD : avx512_permil<"vpermilpd", 0x05, 0x0D, avx512vl_f64_info,
6686                               avx512vl_i64_info>, VEX_W1X;
6687
6688//===----------------------------------------------------------------------===//
6689// AVX-512 - VPSHUFD, VPSHUFLW, VPSHUFHW
6690//===----------------------------------------------------------------------===//
6691
6692defm VPSHUFD : avx512_shift_rmi_sizes<0x70, MRMSrcReg, MRMSrcMem, "vpshufd",
6693                             X86PShufd, SchedWriteShuffle, avx512vl_i32_info>,
6694                             EVEX, AVX512BIi8Base, EVEX_CD8<32, CD8VF>;
6695defm VPSHUFH : avx512_shift_rmi_w<0x70, MRMSrcReg, MRMSrcMem, "vpshufhw",
6696                                  X86PShufhw, SchedWriteShuffle>,
6697                                  EVEX, AVX512XSIi8Base;
6698defm VPSHUFL : avx512_shift_rmi_w<0x70, MRMSrcReg, MRMSrcMem, "vpshuflw",
6699                                  X86PShuflw, SchedWriteShuffle>,
6700                                  EVEX, AVX512XDIi8Base;
6701
6702//===----------------------------------------------------------------------===//
6703// AVX-512 - VPSHUFB
6704//===----------------------------------------------------------------------===//
6705
6706multiclass avx512_pshufb_sizes<bits<8> opc, string OpcodeStr, SDNode OpNode,
6707                               X86SchedWriteWidths sched> {
6708  let Predicates = [HasBWI] in
6709  defm Z:    avx512_var_shift<opc, OpcodeStr, OpNode, sched.ZMM, v64i8_info>,
6710                              EVEX_V512;
6711
6712  let Predicates = [HasVLX, HasBWI] in {
6713  defm Z256: avx512_var_shift<opc, OpcodeStr, OpNode, sched.YMM, v32i8x_info>,
6714                              EVEX_V256;
6715  defm Z128: avx512_var_shift<opc, OpcodeStr, OpNode, sched.XMM, v16i8x_info>,
6716                              EVEX_V128;
6717  }
6718}
6719
6720defm VPSHUFB: avx512_pshufb_sizes<0x00, "vpshufb", X86pshufb,
6721                                  SchedWriteVarShuffle>, VEX_WIG;
6722
6723//===----------------------------------------------------------------------===//
6724// Move Low to High and High to Low packed FP Instructions
6725//===----------------------------------------------------------------------===//
6726
6727def VMOVLHPSZrr : AVX512PSI<0x16, MRMSrcReg, (outs VR128X:$dst),
6728          (ins VR128X:$src1, VR128X:$src2),
6729          "vmovlhps\t{$src2, $src1, $dst|$dst, $src1, $src2}",
6730          [(set VR128X:$dst, (v4f32 (X86Movlhps VR128X:$src1, VR128X:$src2)))]>,
6731          Sched<[SchedWriteFShuffle.XMM]>, EVEX_4V;
6732let isCommutable = 1 in
6733def VMOVHLPSZrr : AVX512PSI<0x12, MRMSrcReg, (outs VR128X:$dst),
6734          (ins VR128X:$src1, VR128X:$src2),
6735          "vmovhlps\t{$src2, $src1, $dst|$dst, $src1, $src2}",
6736          [(set VR128X:$dst, (v4f32 (X86Movhlps VR128X:$src1, VR128X:$src2)))]>,
6737          Sched<[SchedWriteFShuffle.XMM]>, EVEX_4V, NotMemoryFoldable;
6738
6739//===----------------------------------------------------------------------===//
6740// VMOVHPS/PD VMOVLPS Instructions
6741// All patterns was taken from SSS implementation.
6742//===----------------------------------------------------------------------===//
6743
6744multiclass avx512_mov_hilo_packed<bits<8> opc, string OpcodeStr,
6745                                  SDPatternOperator OpNode,
6746                                  X86VectorVTInfo _> {
6747  let hasSideEffects = 0, mayLoad = 1, ExeDomain = _.ExeDomain in
6748  def rm : AVX512<opc, MRMSrcMem, (outs _.RC:$dst),
6749                  (ins _.RC:$src1, f64mem:$src2),
6750                  !strconcat(OpcodeStr,
6751                             "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
6752                  [(set _.RC:$dst,
6753                     (OpNode _.RC:$src1,
6754                       (_.VT (bitconvert
6755                         (v2f64 (scalar_to_vector (loadf64 addr:$src2)))))))]>,
6756                  Sched<[SchedWriteFShuffle.XMM.Folded, SchedWriteFShuffle.XMM.ReadAfterFold]>, EVEX_4V;
6757}
6758
6759// No patterns for MOVLPS/MOVHPS as the Movlhps node should only be created in
6760// SSE1. And MOVLPS pattern is even more complex.
6761defm VMOVHPSZ128 : avx512_mov_hilo_packed<0x16, "vmovhps", null_frag,
6762                                  v4f32x_info>, EVEX_CD8<32, CD8VT2>, PS;
6763defm VMOVHPDZ128 : avx512_mov_hilo_packed<0x16, "vmovhpd", X86Unpckl,
6764                                  v2f64x_info>, EVEX_CD8<64, CD8VT1>, PD, VEX_W;
6765defm VMOVLPSZ128 : avx512_mov_hilo_packed<0x12, "vmovlps", null_frag,
6766                                  v4f32x_info>, EVEX_CD8<32, CD8VT2>, PS;
6767defm VMOVLPDZ128 : avx512_mov_hilo_packed<0x12, "vmovlpd", X86Movsd,
6768                                  v2f64x_info>, EVEX_CD8<64, CD8VT1>, PD, VEX_W;
6769
6770let Predicates = [HasAVX512] in {
6771  // VMOVHPD patterns
6772  def : Pat<(v2f64 (X86Unpckl VR128X:$src1, (X86vzload64 addr:$src2))),
6773            (VMOVHPDZ128rm VR128X:$src1, addr:$src2)>;
6774
6775  // VMOVLPD patterns
6776  def : Pat<(v2f64 (X86Movsd VR128X:$src1, (X86vzload64 addr:$src2))),
6777            (VMOVLPDZ128rm VR128X:$src1, addr:$src2)>;
6778}
6779
6780let SchedRW = [WriteFStore] in {
6781let mayStore = 1, hasSideEffects = 0 in
6782def VMOVHPSZ128mr : AVX512PSI<0x17, MRMDestMem, (outs),
6783                       (ins f64mem:$dst, VR128X:$src),
6784                       "vmovhps\t{$src, $dst|$dst, $src}",
6785                       []>, EVEX, EVEX_CD8<32, CD8VT2>;
6786def VMOVHPDZ128mr : AVX512PDI<0x17, MRMDestMem, (outs),
6787                       (ins f64mem:$dst, VR128X:$src),
6788                       "vmovhpd\t{$src, $dst|$dst, $src}",
6789                       [(store (f64 (extractelt
6790                                     (v2f64 (X86Unpckh VR128X:$src, VR128X:$src)),
6791                                     (iPTR 0))), addr:$dst)]>,
6792                       EVEX, EVEX_CD8<64, CD8VT1>, VEX_W;
6793let mayStore = 1, hasSideEffects = 0 in
6794def VMOVLPSZ128mr : AVX512PSI<0x13, MRMDestMem, (outs),
6795                       (ins f64mem:$dst, VR128X:$src),
6796                       "vmovlps\t{$src, $dst|$dst, $src}",
6797                       []>, EVEX, EVEX_CD8<32, CD8VT2>;
6798def VMOVLPDZ128mr : AVX512PDI<0x13, MRMDestMem, (outs),
6799                       (ins f64mem:$dst, VR128X:$src),
6800                       "vmovlpd\t{$src, $dst|$dst, $src}",
6801                       [(store (f64 (extractelt (v2f64 VR128X:$src),
6802                                     (iPTR 0))), addr:$dst)]>,
6803                       EVEX, EVEX_CD8<64, CD8VT1>, VEX_W;
6804} // SchedRW
6805
6806let Predicates = [HasAVX512] in {
6807  // VMOVHPD patterns
6808  def : Pat<(store (f64 (extractelt
6809                           (v2f64 (X86VPermilpi VR128X:$src, (i8 1))),
6810                           (iPTR 0))), addr:$dst),
6811           (VMOVHPDZ128mr addr:$dst, VR128X:$src)>;
6812}
6813//===----------------------------------------------------------------------===//
6814// FMA - Fused Multiply Operations
6815//
6816
6817multiclass avx512_fma3p_213_rm<bits<8> opc, string OpcodeStr, SDPatternOperator OpNode,
6818                               SDNode MaskOpNode, X86FoldableSchedWrite sched,
6819                               X86VectorVTInfo _> {
6820  let Constraints = "$src1 = $dst", ExeDomain = _.ExeDomain, hasSideEffects = 0,
6821      Uses = [MXCSR], mayRaiseFPException = 1 in {
6822  defm r: AVX512_maskable_fma<opc, MRMSrcReg, _, (outs _.RC:$dst),
6823          (ins _.RC:$src2, _.RC:$src3),
6824          OpcodeStr, "$src3, $src2", "$src2, $src3",
6825          (_.VT (OpNode _.RC:$src2, _.RC:$src1, _.RC:$src3)),
6826          (_.VT (MaskOpNode _.RC:$src2, _.RC:$src1, _.RC:$src3)), 1, 1>,
6827          EVEX_4V, Sched<[sched]>;
6828
6829  defm m: AVX512_maskable_fma<opc, MRMSrcMem, _, (outs _.RC:$dst),
6830          (ins _.RC:$src2, _.MemOp:$src3),
6831          OpcodeStr, "$src3, $src2", "$src2, $src3",
6832          (_.VT (OpNode _.RC:$src2, _.RC:$src1, (_.LdFrag addr:$src3))),
6833          (_.VT (MaskOpNode _.RC:$src2, _.RC:$src1, (_.LdFrag addr:$src3))), 1, 0>,
6834          EVEX_4V, Sched<[sched.Folded, sched.ReadAfterFold]>;
6835
6836  defm mb: AVX512_maskable_fma<opc, MRMSrcMem, _, (outs _.RC:$dst),
6837            (ins _.RC:$src2, _.ScalarMemOp:$src3),
6838            OpcodeStr,   !strconcat("${src3}", _.BroadcastStr,", $src2"),
6839            !strconcat("$src2, ${src3}", _.BroadcastStr ),
6840            (OpNode _.RC:$src2,
6841             _.RC:$src1,(_.VT (_.BroadcastLdFrag addr:$src3))),
6842            (MaskOpNode _.RC:$src2,
6843             _.RC:$src1,(_.VT (_.BroadcastLdFrag addr:$src3))), 1, 0>,
6844            EVEX_4V, EVEX_B, Sched<[sched.Folded, sched.ReadAfterFold]>;
6845  }
6846}
6847
6848multiclass avx512_fma3_213_round<bits<8> opc, string OpcodeStr, SDNode OpNode,
6849                                 X86FoldableSchedWrite sched,
6850                                 X86VectorVTInfo _> {
6851  let Constraints = "$src1 = $dst", ExeDomain = _.ExeDomain, hasSideEffects = 0,
6852      Uses = [MXCSR] in
6853  defm rb: AVX512_maskable_fma<opc, MRMSrcReg, _, (outs _.RC:$dst),
6854          (ins _.RC:$src2, _.RC:$src3, AVX512RC:$rc),
6855          OpcodeStr, "$rc, $src3, $src2", "$src2, $src3, $rc",
6856          (_.VT ( OpNode _.RC:$src2, _.RC:$src1, _.RC:$src3, (i32 timm:$rc))),
6857          (_.VT ( OpNode _.RC:$src2, _.RC:$src1, _.RC:$src3, (i32 timm:$rc))), 1, 1>,
6858          EVEX_4V, EVEX_B, EVEX_RC, Sched<[sched]>;
6859}
6860
6861multiclass avx512_fma3p_213_common<bits<8> opc, string OpcodeStr, SDPatternOperator OpNode,
6862                                   SDNode MaskOpNode, SDNode OpNodeRnd,
6863                                   X86SchedWriteWidths sched,
6864                                   AVX512VLVectorVTInfo _,
6865                                   Predicate prd = HasAVX512> {
6866  let Predicates = [prd] in {
6867    defm Z      : avx512_fma3p_213_rm<opc, OpcodeStr, OpNode, MaskOpNode,
6868                                      sched.ZMM, _.info512>,
6869                  avx512_fma3_213_round<opc, OpcodeStr, OpNodeRnd, sched.ZMM,
6870                                        _.info512>,
6871                              EVEX_V512, EVEX_CD8<_.info512.EltSize, CD8VF>;
6872  }
6873  let Predicates = [HasVLX, prd] in {
6874    defm Z256 : avx512_fma3p_213_rm<opc, OpcodeStr, OpNode, MaskOpNode,
6875                                    sched.YMM, _.info256>,
6876                      EVEX_V256, EVEX_CD8<_.info256.EltSize, CD8VF>;
6877    defm Z128 : avx512_fma3p_213_rm<opc, OpcodeStr, OpNode, MaskOpNode,
6878                                    sched.XMM, _.info128>,
6879                      EVEX_V128, EVEX_CD8<_.info128.EltSize, CD8VF>;
6880  }
6881}
6882
6883multiclass avx512_fma3p_213_f<bits<8> opc, string OpcodeStr, SDPatternOperator OpNode,
6884                              SDNode MaskOpNode, SDNode OpNodeRnd> {
6885    defm PH : avx512_fma3p_213_common<opc, OpcodeStr#"ph", OpNode, MaskOpNode,
6886                                      OpNodeRnd, SchedWriteFMA,
6887                                      avx512vl_f16_info, HasFP16>, T_MAP6PD;
6888    defm PS : avx512_fma3p_213_common<opc, OpcodeStr#"ps", OpNode, MaskOpNode,
6889                                      OpNodeRnd, SchedWriteFMA,
6890                                      avx512vl_f32_info>, T8PD;
6891    defm PD : avx512_fma3p_213_common<opc, OpcodeStr#"pd", OpNode, MaskOpNode,
6892                                      OpNodeRnd, SchedWriteFMA,
6893                                      avx512vl_f64_info>, T8PD, VEX_W;
6894}
6895
6896defm VFMADD213    : avx512_fma3p_213_f<0xA8, "vfmadd213", any_fma,
6897                                       fma, X86FmaddRnd>;
6898defm VFMSUB213    : avx512_fma3p_213_f<0xAA, "vfmsub213", X86any_Fmsub,
6899                                       X86Fmsub, X86FmsubRnd>;
6900defm VFMADDSUB213 : avx512_fma3p_213_f<0xA6, "vfmaddsub213", X86Fmaddsub,
6901                                       X86Fmaddsub, X86FmaddsubRnd>;
6902defm VFMSUBADD213 : avx512_fma3p_213_f<0xA7, "vfmsubadd213", X86Fmsubadd,
6903                                       X86Fmsubadd, X86FmsubaddRnd>;
6904defm VFNMADD213   : avx512_fma3p_213_f<0xAC, "vfnmadd213", X86any_Fnmadd,
6905                                       X86Fnmadd, X86FnmaddRnd>;
6906defm VFNMSUB213   : avx512_fma3p_213_f<0xAE, "vfnmsub213", X86any_Fnmsub,
6907                                       X86Fnmsub, X86FnmsubRnd>;
6908
6909
6910multiclass avx512_fma3p_231_rm<bits<8> opc, string OpcodeStr, SDPatternOperator OpNode,
6911                               SDNode MaskOpNode, X86FoldableSchedWrite sched,
6912                               X86VectorVTInfo _> {
6913  let Constraints = "$src1 = $dst", ExeDomain = _.ExeDomain, hasSideEffects = 0,
6914      Uses = [MXCSR], mayRaiseFPException = 1 in {
6915  defm r: AVX512_maskable_fma<opc, MRMSrcReg, _, (outs _.RC:$dst),
6916          (ins _.RC:$src2, _.RC:$src3),
6917          OpcodeStr, "$src3, $src2", "$src2, $src3",
6918          (null_frag),
6919          (_.VT (MaskOpNode _.RC:$src2, _.RC:$src3, _.RC:$src1)), 1, 1>,
6920          EVEX_4V, Sched<[sched]>;
6921
6922  defm m: AVX512_maskable_fma<opc, MRMSrcMem, _, (outs _.RC:$dst),
6923          (ins _.RC:$src2, _.MemOp:$src3),
6924          OpcodeStr, "$src3, $src2", "$src2, $src3",
6925          (_.VT (OpNode _.RC:$src2, (_.LdFrag addr:$src3), _.RC:$src1)),
6926          (_.VT (MaskOpNode _.RC:$src2, (_.LdFrag addr:$src3), _.RC:$src1)), 1, 0>,
6927          EVEX_4V, Sched<[sched.Folded, sched.ReadAfterFold]>;
6928
6929  defm mb: AVX512_maskable_fma<opc, MRMSrcMem, _, (outs _.RC:$dst),
6930         (ins _.RC:$src2, _.ScalarMemOp:$src3),
6931         OpcodeStr, "${src3}"#_.BroadcastStr#", $src2",
6932         "$src2, ${src3}"#_.BroadcastStr,
6933         (_.VT (OpNode _.RC:$src2,
6934                      (_.VT (_.BroadcastLdFrag addr:$src3)),
6935                      _.RC:$src1)),
6936         (_.VT (MaskOpNode _.RC:$src2,
6937                           (_.VT (_.BroadcastLdFrag addr:$src3)),
6938                           _.RC:$src1)), 1, 0>, EVEX_4V, EVEX_B,
6939         Sched<[sched.Folded, sched.ReadAfterFold]>;
6940  }
6941}
6942
6943multiclass avx512_fma3_231_round<bits<8> opc, string OpcodeStr, SDNode OpNode,
6944                                 X86FoldableSchedWrite sched,
6945                                 X86VectorVTInfo _> {
6946  let Constraints = "$src1 = $dst", ExeDomain = _.ExeDomain, hasSideEffects = 0,
6947      Uses = [MXCSR] in
6948  defm rb: AVX512_maskable_fma<opc, MRMSrcReg, _, (outs _.RC:$dst),
6949          (ins _.RC:$src2, _.RC:$src3, AVX512RC:$rc),
6950          OpcodeStr, "$rc, $src3, $src2", "$src2, $src3, $rc",
6951          (null_frag),
6952          (_.VT (OpNode _.RC:$src2, _.RC:$src3, _.RC:$src1, (i32 timm:$rc))),
6953          1, 1>, EVEX_4V, EVEX_B, EVEX_RC, Sched<[sched]>;
6954}
6955
6956multiclass avx512_fma3p_231_common<bits<8> opc, string OpcodeStr, SDPatternOperator OpNode,
6957                                   SDNode MaskOpNode, SDNode OpNodeRnd,
6958                                   X86SchedWriteWidths sched,
6959                                   AVX512VLVectorVTInfo _,
6960                                   Predicate prd = HasAVX512> {
6961  let Predicates = [prd] in {
6962    defm Z      : avx512_fma3p_231_rm<opc, OpcodeStr, OpNode, MaskOpNode,
6963                                      sched.ZMM, _.info512>,
6964                  avx512_fma3_231_round<opc, OpcodeStr, OpNodeRnd, sched.ZMM,
6965                                        _.info512>,
6966                              EVEX_V512, EVEX_CD8<_.info512.EltSize, CD8VF>;
6967  }
6968  let Predicates = [HasVLX, prd] in {
6969    defm Z256 : avx512_fma3p_231_rm<opc, OpcodeStr, OpNode, MaskOpNode,
6970                                    sched.YMM, _.info256>,
6971                      EVEX_V256, EVEX_CD8<_.info256.EltSize, CD8VF>;
6972    defm Z128 : avx512_fma3p_231_rm<opc, OpcodeStr, OpNode, MaskOpNode,
6973                                    sched.XMM, _.info128>,
6974                      EVEX_V128, EVEX_CD8<_.info128.EltSize, CD8VF>;
6975  }
6976}
6977
6978multiclass avx512_fma3p_231_f<bits<8> opc, string OpcodeStr, SDPatternOperator OpNode,
6979                              SDNode MaskOpNode, SDNode OpNodeRnd > {
6980    defm PH : avx512_fma3p_231_common<opc, OpcodeStr#"ph", OpNode, MaskOpNode,
6981                                      OpNodeRnd, SchedWriteFMA,
6982                                      avx512vl_f16_info, HasFP16>, T_MAP6PD;
6983    defm PS : avx512_fma3p_231_common<opc, OpcodeStr#"ps", OpNode, MaskOpNode,
6984                                      OpNodeRnd, SchedWriteFMA,
6985                                      avx512vl_f32_info>, T8PD;
6986    defm PD : avx512_fma3p_231_common<opc, OpcodeStr#"pd", OpNode, MaskOpNode,
6987                                      OpNodeRnd, SchedWriteFMA,
6988                                      avx512vl_f64_info>, T8PD, VEX_W;
6989}
6990
6991defm VFMADD231    : avx512_fma3p_231_f<0xB8, "vfmadd231", any_fma,
6992                                       fma, X86FmaddRnd>;
6993defm VFMSUB231    : avx512_fma3p_231_f<0xBA, "vfmsub231", X86any_Fmsub,
6994                                       X86Fmsub, X86FmsubRnd>;
6995defm VFMADDSUB231 : avx512_fma3p_231_f<0xB6, "vfmaddsub231", X86Fmaddsub,
6996                                       X86Fmaddsub, X86FmaddsubRnd>;
6997defm VFMSUBADD231 : avx512_fma3p_231_f<0xB7, "vfmsubadd231", X86Fmsubadd,
6998                                       X86Fmsubadd, X86FmsubaddRnd>;
6999defm VFNMADD231   : avx512_fma3p_231_f<0xBC, "vfnmadd231", X86any_Fnmadd,
7000                                       X86Fnmadd, X86FnmaddRnd>;
7001defm VFNMSUB231   : avx512_fma3p_231_f<0xBE, "vfnmsub231", X86any_Fnmsub,
7002                                       X86Fnmsub, X86FnmsubRnd>;
7003
7004multiclass avx512_fma3p_132_rm<bits<8> opc, string OpcodeStr, SDPatternOperator OpNode,
7005                               SDNode MaskOpNode, X86FoldableSchedWrite sched,
7006                               X86VectorVTInfo _> {
7007  let Constraints = "$src1 = $dst", ExeDomain = _.ExeDomain, hasSideEffects = 0,
7008      Uses = [MXCSR], mayRaiseFPException = 1 in {
7009  defm r: AVX512_maskable_fma<opc, MRMSrcReg, _, (outs _.RC:$dst),
7010          (ins _.RC:$src2, _.RC:$src3),
7011          OpcodeStr, "$src3, $src2", "$src2, $src3",
7012          (null_frag),
7013          (_.VT (MaskOpNode _.RC:$src1, _.RC:$src3, _.RC:$src2)), 1, 1>,
7014          EVEX_4V, Sched<[sched]>;
7015
7016  // Pattern is 312 order so that the load is in a different place from the
7017  // 213 and 231 patterns this helps tablegen's duplicate pattern detection.
7018  defm m: AVX512_maskable_fma<opc, MRMSrcMem, _, (outs _.RC:$dst),
7019          (ins _.RC:$src2, _.MemOp:$src3),
7020          OpcodeStr, "$src3, $src2", "$src2, $src3",
7021          (_.VT (OpNode (_.LdFrag addr:$src3), _.RC:$src1, _.RC:$src2)),
7022          (_.VT (MaskOpNode (_.LdFrag addr:$src3), _.RC:$src1, _.RC:$src2)), 1, 0>,
7023          EVEX_4V, Sched<[sched.Folded, sched.ReadAfterFold]>;
7024
7025  // Pattern is 312 order so that the load is in a different place from the
7026  // 213 and 231 patterns this helps tablegen's duplicate pattern detection.
7027  defm mb: AVX512_maskable_fma<opc, MRMSrcMem, _, (outs _.RC:$dst),
7028         (ins _.RC:$src2, _.ScalarMemOp:$src3),
7029         OpcodeStr, "${src3}"#_.BroadcastStr#", $src2",
7030         "$src2, ${src3}"#_.BroadcastStr,
7031         (_.VT (OpNode (_.VT (_.BroadcastLdFrag addr:$src3)),
7032                       _.RC:$src1, _.RC:$src2)),
7033         (_.VT (MaskOpNode (_.VT (_.BroadcastLdFrag addr:$src3)),
7034                           _.RC:$src1, _.RC:$src2)), 1, 0>,
7035         EVEX_4V, EVEX_B, Sched<[sched.Folded, sched.ReadAfterFold]>;
7036  }
7037}
7038
7039multiclass avx512_fma3_132_round<bits<8> opc, string OpcodeStr, SDNode OpNode,
7040                                 X86FoldableSchedWrite sched,
7041                                 X86VectorVTInfo _> {
7042  let Constraints = "$src1 = $dst", ExeDomain = _.ExeDomain, hasSideEffects = 0,
7043      Uses = [MXCSR] in
7044  defm rb: AVX512_maskable_fma<opc, MRMSrcReg, _, (outs _.RC:$dst),
7045          (ins _.RC:$src2, _.RC:$src3, AVX512RC:$rc),
7046          OpcodeStr, "$rc, $src3, $src2", "$src2, $src3, $rc",
7047          (null_frag),
7048          (_.VT (OpNode _.RC:$src1, _.RC:$src3, _.RC:$src2, (i32 timm:$rc))),
7049          1, 1>, EVEX_4V, EVEX_B, EVEX_RC, Sched<[sched]>;
7050}
7051
7052multiclass avx512_fma3p_132_common<bits<8> opc, string OpcodeStr, SDPatternOperator OpNode,
7053                                   SDNode MaskOpNode, SDNode OpNodeRnd,
7054                                   X86SchedWriteWidths sched,
7055                                   AVX512VLVectorVTInfo _,
7056                                   Predicate prd = HasAVX512> {
7057  let Predicates = [prd] in {
7058    defm Z      : avx512_fma3p_132_rm<opc, OpcodeStr, OpNode, MaskOpNode,
7059                                      sched.ZMM, _.info512>,
7060                  avx512_fma3_132_round<opc, OpcodeStr, OpNodeRnd, sched.ZMM,
7061                                        _.info512>,
7062                              EVEX_V512, EVEX_CD8<_.info512.EltSize, CD8VF>;
7063  }
7064  let Predicates = [HasVLX, prd] in {
7065    defm Z256 : avx512_fma3p_132_rm<opc, OpcodeStr, OpNode, MaskOpNode,
7066                                    sched.YMM, _.info256>,
7067                      EVEX_V256, EVEX_CD8<_.info256.EltSize, CD8VF>;
7068    defm Z128 : avx512_fma3p_132_rm<opc, OpcodeStr, OpNode, MaskOpNode,
7069                                    sched.XMM, _.info128>,
7070                      EVEX_V128, EVEX_CD8<_.info128.EltSize, CD8VF>;
7071  }
7072}
7073
7074multiclass avx512_fma3p_132_f<bits<8> opc, string OpcodeStr, SDPatternOperator OpNode,
7075                              SDNode MaskOpNode, SDNode OpNodeRnd > {
7076    defm PH : avx512_fma3p_132_common<opc, OpcodeStr#"ph", OpNode, MaskOpNode,
7077                                      OpNodeRnd, SchedWriteFMA,
7078                                      avx512vl_f16_info, HasFP16>, T_MAP6PD;
7079    defm PS : avx512_fma3p_132_common<opc, OpcodeStr#"ps", OpNode, MaskOpNode,
7080                                      OpNodeRnd, SchedWriteFMA,
7081                                      avx512vl_f32_info>, T8PD;
7082    defm PD : avx512_fma3p_132_common<opc, OpcodeStr#"pd", OpNode, MaskOpNode,
7083                                      OpNodeRnd, SchedWriteFMA,
7084                                      avx512vl_f64_info>, T8PD, VEX_W;
7085}
7086
7087defm VFMADD132    : avx512_fma3p_132_f<0x98, "vfmadd132", any_fma,
7088                                       fma, X86FmaddRnd>;
7089defm VFMSUB132    : avx512_fma3p_132_f<0x9A, "vfmsub132", X86any_Fmsub,
7090                                       X86Fmsub, X86FmsubRnd>;
7091defm VFMADDSUB132 : avx512_fma3p_132_f<0x96, "vfmaddsub132", X86Fmaddsub,
7092                                       X86Fmaddsub, X86FmaddsubRnd>;
7093defm VFMSUBADD132 : avx512_fma3p_132_f<0x97, "vfmsubadd132", X86Fmsubadd,
7094                                       X86Fmsubadd, X86FmsubaddRnd>;
7095defm VFNMADD132   : avx512_fma3p_132_f<0x9C, "vfnmadd132", X86any_Fnmadd,
7096                                       X86Fnmadd, X86FnmaddRnd>;
7097defm VFNMSUB132   : avx512_fma3p_132_f<0x9E, "vfnmsub132", X86any_Fnmsub,
7098                                       X86Fnmsub, X86FnmsubRnd>;
7099
7100// Scalar FMA
7101multiclass avx512_fma3s_common<bits<8> opc, string OpcodeStr, X86VectorVTInfo _,
7102                               dag RHS_r, dag RHS_m, dag RHS_b, bit MaskOnlyReg> {
7103let Constraints = "$src1 = $dst", hasSideEffects = 0 in {
7104  defm r_Int: AVX512_maskable_3src_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
7105          (ins _.RC:$src2, _.RC:$src3), OpcodeStr,
7106          "$src3, $src2", "$src2, $src3", (null_frag), 1, 1>,
7107          EVEX_4V, Sched<[SchedWriteFMA.Scl]>, SIMD_EXC;
7108
7109  let mayLoad = 1 in
7110  defm m_Int: AVX512_maskable_3src_scalar<opc, MRMSrcMem, _, (outs _.RC:$dst),
7111          (ins _.RC:$src2, _.IntScalarMemOp:$src3), OpcodeStr,
7112          "$src3, $src2", "$src2, $src3", (null_frag), 1, 1>,
7113          EVEX_4V, Sched<[SchedWriteFMA.Scl.Folded, SchedWriteFMA.Scl.ReadAfterFold]>, SIMD_EXC;
7114
7115  let Uses = [MXCSR] in
7116  defm rb_Int: AVX512_maskable_3src_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
7117         (ins _.RC:$src2, _.RC:$src3, AVX512RC:$rc),
7118         OpcodeStr, "$rc, $src3, $src2", "$src2, $src3, $rc", (null_frag), 1, 1>,
7119         EVEX_4V, EVEX_B, EVEX_RC, Sched<[SchedWriteFMA.Scl]>;
7120
7121  let isCodeGenOnly = 1, isCommutable = 1 in {
7122    def r     : AVX512<opc, MRMSrcReg, (outs _.FRC:$dst),
7123                     (ins _.FRC:$src1, _.FRC:$src2, _.FRC:$src3),
7124                     !strconcat(OpcodeStr,
7125                              "\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
7126                     !if(MaskOnlyReg, [], [RHS_r])>, Sched<[SchedWriteFMA.Scl]>, EVEX_4V, SIMD_EXC;
7127    def m     : AVX512<opc, MRMSrcMem, (outs _.FRC:$dst),
7128                    (ins _.FRC:$src1, _.FRC:$src2, _.ScalarMemOp:$src3),
7129                    !strconcat(OpcodeStr,
7130                               "\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
7131                    [RHS_m]>, Sched<[SchedWriteFMA.Scl.Folded, SchedWriteFMA.Scl.ReadAfterFold]>, EVEX_4V, SIMD_EXC;
7132
7133    let Uses = [MXCSR] in
7134    def rb    : AVX512<opc, MRMSrcReg, (outs _.FRC:$dst),
7135                     (ins _.FRC:$src1, _.FRC:$src2, _.FRC:$src3, AVX512RC:$rc),
7136                     !strconcat(OpcodeStr,
7137                              "\t{$rc, $src3, $src2, $dst|$dst, $src2, $src3, $rc}"),
7138                     !if(MaskOnlyReg, [], [RHS_b])>, EVEX_B, EVEX_RC,
7139                     Sched<[SchedWriteFMA.Scl]>, EVEX_4V;
7140  }// isCodeGenOnly = 1
7141}// Constraints = "$src1 = $dst"
7142}
7143
7144multiclass avx512_fma3s_all<bits<8> opc213, bits<8> opc231, bits<8> opc132,
7145                            string OpcodeStr, SDPatternOperator OpNode, SDNode OpNodeRnd,
7146                            X86VectorVTInfo _, string SUFF> {
7147  let ExeDomain = _.ExeDomain in {
7148  defm NAME#213#SUFF#Z: avx512_fma3s_common<opc213, OpcodeStr#"213"#_.Suffix, _,
7149                // Operands for intrinsic are in 123 order to preserve passthu
7150                // semantics.
7151                (set _.FRC:$dst, (_.EltVT (OpNode _.FRC:$src2, _.FRC:$src1,
7152                         _.FRC:$src3))),
7153                (set _.FRC:$dst, (_.EltVT (OpNode _.FRC:$src2, _.FRC:$src1,
7154                         (_.ScalarLdFrag addr:$src3)))),
7155                (set _.FRC:$dst, (_.EltVT (OpNodeRnd _.FRC:$src2, _.FRC:$src1,
7156                         _.FRC:$src3, (i32 timm:$rc)))), 0>;
7157
7158  defm NAME#231#SUFF#Z: avx512_fma3s_common<opc231, OpcodeStr#"231"#_.Suffix, _,
7159                (set _.FRC:$dst, (_.EltVT (OpNode _.FRC:$src2, _.FRC:$src3,
7160                                          _.FRC:$src1))),
7161                (set _.FRC:$dst, (_.EltVT (OpNode _.FRC:$src2,
7162                            (_.ScalarLdFrag addr:$src3), _.FRC:$src1))),
7163                (set _.FRC:$dst, (_.EltVT (OpNodeRnd _.FRC:$src2, _.FRC:$src3,
7164                         _.FRC:$src1, (i32 timm:$rc)))), 1>;
7165
7166  // One pattern is 312 order so that the load is in a different place from the
7167  // 213 and 231 patterns this helps tablegen's duplicate pattern detection.
7168  defm NAME#132#SUFF#Z: avx512_fma3s_common<opc132, OpcodeStr#"132"#_.Suffix, _,
7169                (set _.FRC:$dst, (_.EltVT (OpNode _.FRC:$src1, _.FRC:$src3,
7170                         _.FRC:$src2))),
7171                (set _.FRC:$dst, (_.EltVT (OpNode (_.ScalarLdFrag addr:$src3),
7172                                 _.FRC:$src1, _.FRC:$src2))),
7173                (set _.FRC:$dst, (_.EltVT (OpNodeRnd _.FRC:$src1, _.FRC:$src3,
7174                         _.FRC:$src2, (i32 timm:$rc)))), 1>;
7175  }
7176}
7177
7178multiclass avx512_fma3s<bits<8> opc213, bits<8> opc231, bits<8> opc132,
7179                        string OpcodeStr, SDPatternOperator OpNode, SDNode OpNodeRnd> {
7180  let Predicates = [HasAVX512] in {
7181    defm NAME : avx512_fma3s_all<opc213, opc231, opc132, OpcodeStr, OpNode,
7182                                 OpNodeRnd, f32x_info, "SS">,
7183                                 EVEX_CD8<32, CD8VT1>, VEX_LIG, T8PD;
7184    defm NAME : avx512_fma3s_all<opc213, opc231, opc132, OpcodeStr, OpNode,
7185                                 OpNodeRnd, f64x_info, "SD">,
7186                                 EVEX_CD8<64, CD8VT1>, VEX_LIG, VEX_W, T8PD;
7187  }
7188  let Predicates = [HasFP16] in {
7189    defm NAME : avx512_fma3s_all<opc213, opc231, opc132, OpcodeStr, OpNode,
7190                                 OpNodeRnd, f16x_info, "SH">,
7191                                 EVEX_CD8<16, CD8VT1>, VEX_LIG, T_MAP6PD;
7192  }
7193}
7194
7195defm VFMADD  : avx512_fma3s<0xA9, 0xB9, 0x99, "vfmadd", any_fma, X86FmaddRnd>;
7196defm VFMSUB  : avx512_fma3s<0xAB, 0xBB, 0x9B, "vfmsub", X86any_Fmsub, X86FmsubRnd>;
7197defm VFNMADD : avx512_fma3s<0xAD, 0xBD, 0x9D, "vfnmadd", X86any_Fnmadd, X86FnmaddRnd>;
7198defm VFNMSUB : avx512_fma3s<0xAF, 0xBF, 0x9F, "vfnmsub", X86any_Fnmsub, X86FnmsubRnd>;
7199
7200multiclass avx512_scalar_fma_patterns<SDPatternOperator Op, SDNode MaskedOp,
7201                                      SDNode RndOp, string Prefix,
7202                                      string Suffix, SDNode Move,
7203                                      X86VectorVTInfo _, PatLeaf ZeroFP,
7204                                      Predicate prd = HasAVX512> {
7205  let Predicates = [prd] in {
7206    def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector
7207                (Op _.FRC:$src2,
7208                    (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0))),
7209                    _.FRC:$src3))))),
7210              (!cast<I>(Prefix#"213"#Suffix#"Zr_Int")
7211               VR128X:$src1, (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)),
7212               (_.VT (COPY_TO_REGCLASS _.FRC:$src3, VR128X)))>;
7213
7214    def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector
7215                (Op _.FRC:$src2, _.FRC:$src3,
7216                    (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0)))))))),
7217              (!cast<I>(Prefix#"231"#Suffix#"Zr_Int")
7218               VR128X:$src1, (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)),
7219               (_.VT (COPY_TO_REGCLASS _.FRC:$src3, VR128X)))>;
7220
7221    def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector
7222                (Op _.FRC:$src2,
7223                    (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0))),
7224                    (_.ScalarLdFrag addr:$src3)))))),
7225              (!cast<I>(Prefix#"213"#Suffix#"Zm_Int")
7226               VR128X:$src1, (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)),
7227               addr:$src3)>;
7228
7229    def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector
7230                (Op (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0))),
7231                    (_.ScalarLdFrag addr:$src3), _.FRC:$src2))))),
7232              (!cast<I>(Prefix#"132"#Suffix#"Zm_Int")
7233               VR128X:$src1, (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)),
7234               addr:$src3)>;
7235
7236    def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector
7237                (Op _.FRC:$src2, (_.ScalarLdFrag addr:$src3),
7238                    (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0)))))))),
7239              (!cast<I>(Prefix#"231"#Suffix#"Zm_Int")
7240               VR128X:$src1, (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)),
7241               addr:$src3)>;
7242
7243    def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector
7244               (X86selects_mask VK1WM:$mask,
7245                (MaskedOp _.FRC:$src2,
7246                    (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0))),
7247                    _.FRC:$src3),
7248                (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0)))))))),
7249              (!cast<I>(Prefix#"213"#Suffix#"Zr_Intk")
7250               VR128X:$src1, VK1WM:$mask,
7251               (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)),
7252               (_.VT (COPY_TO_REGCLASS _.FRC:$src3, VR128X)))>;
7253
7254    def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector
7255               (X86selects_mask VK1WM:$mask,
7256                (MaskedOp _.FRC:$src2,
7257                    (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0))),
7258                    (_.ScalarLdFrag addr:$src3)),
7259                (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0)))))))),
7260              (!cast<I>(Prefix#"213"#Suffix#"Zm_Intk")
7261               VR128X:$src1, VK1WM:$mask,
7262               (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)), addr:$src3)>;
7263
7264    def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector
7265               (X86selects_mask VK1WM:$mask,
7266                (MaskedOp (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0))),
7267                          (_.ScalarLdFrag addr:$src3), _.FRC:$src2),
7268                (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0)))))))),
7269              (!cast<I>(Prefix#"132"#Suffix#"Zm_Intk")
7270               VR128X:$src1, VK1WM:$mask,
7271               (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)), addr:$src3)>;
7272
7273    def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector
7274               (X86selects_mask VK1WM:$mask,
7275                (MaskedOp _.FRC:$src2, _.FRC:$src3,
7276                          (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0)))),
7277                (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0)))))))),
7278              (!cast<I>(Prefix#"231"#Suffix#"Zr_Intk")
7279               VR128X:$src1, VK1WM:$mask,
7280               (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)),
7281               (_.VT (COPY_TO_REGCLASS _.FRC:$src3, VR128X)))>;
7282
7283    def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector
7284               (X86selects_mask VK1WM:$mask,
7285                (MaskedOp _.FRC:$src2, (_.ScalarLdFrag addr:$src3),
7286                          (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0)))),
7287                (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0)))))))),
7288              (!cast<I>(Prefix#"231"#Suffix#"Zm_Intk")
7289               VR128X:$src1, VK1WM:$mask,
7290               (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)), addr:$src3)>;
7291
7292    def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector
7293               (X86selects_mask VK1WM:$mask,
7294                (MaskedOp _.FRC:$src2,
7295                          (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0))),
7296                          _.FRC:$src3),
7297                (_.EltVT ZeroFP)))))),
7298              (!cast<I>(Prefix#"213"#Suffix#"Zr_Intkz")
7299               VR128X:$src1, VK1WM:$mask,
7300               (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)),
7301               (_.VT (COPY_TO_REGCLASS _.FRC:$src3, VR128X)))>;
7302
7303    def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector
7304               (X86selects_mask VK1WM:$mask,
7305                (MaskedOp _.FRC:$src2, _.FRC:$src3,
7306                          (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0)))),
7307                (_.EltVT ZeroFP)))))),
7308              (!cast<I>(Prefix#"231"#Suffix#"Zr_Intkz")
7309               VR128X:$src1, VK1WM:$mask,
7310               (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)),
7311               (_.VT (COPY_TO_REGCLASS _.FRC:$src3, VR128X)))>;
7312
7313    def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector
7314               (X86selects_mask VK1WM:$mask,
7315                (MaskedOp _.FRC:$src2,
7316                          (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0))),
7317                          (_.ScalarLdFrag addr:$src3)),
7318                (_.EltVT ZeroFP)))))),
7319              (!cast<I>(Prefix#"213"#Suffix#"Zm_Intkz")
7320               VR128X:$src1, VK1WM:$mask,
7321               (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)), addr:$src3)>;
7322
7323    def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector
7324               (X86selects_mask VK1WM:$mask,
7325                (MaskedOp (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0))),
7326                          _.FRC:$src2, (_.ScalarLdFrag addr:$src3)),
7327                (_.EltVT ZeroFP)))))),
7328              (!cast<I>(Prefix#"132"#Suffix#"Zm_Intkz")
7329               VR128X:$src1, VK1WM:$mask,
7330               (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)), addr:$src3)>;
7331
7332    def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector
7333               (X86selects_mask VK1WM:$mask,
7334                (MaskedOp _.FRC:$src2, (_.ScalarLdFrag addr:$src3),
7335                          (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0)))),
7336                (_.EltVT ZeroFP)))))),
7337              (!cast<I>(Prefix#"231"#Suffix#"Zm_Intkz")
7338               VR128X:$src1, VK1WM:$mask,
7339               (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)), addr:$src3)>;
7340
7341    // Patterns with rounding mode.
7342    def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector
7343                (RndOp _.FRC:$src2,
7344                       (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0))),
7345                       _.FRC:$src3, (i32 timm:$rc)))))),
7346              (!cast<I>(Prefix#"213"#Suffix#"Zrb_Int")
7347               VR128X:$src1, (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)),
7348               (_.VT (COPY_TO_REGCLASS _.FRC:$src3, VR128X)), AVX512RC:$rc)>;
7349
7350    def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector
7351                (RndOp _.FRC:$src2, _.FRC:$src3,
7352                       (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0))),
7353                       (i32 timm:$rc)))))),
7354              (!cast<I>(Prefix#"231"#Suffix#"Zrb_Int")
7355               VR128X:$src1, (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)),
7356               (_.VT (COPY_TO_REGCLASS _.FRC:$src3, VR128X)), AVX512RC:$rc)>;
7357
7358    def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector
7359               (X86selects_mask VK1WM:$mask,
7360                (RndOp _.FRC:$src2,
7361                       (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0))),
7362                       _.FRC:$src3, (i32 timm:$rc)),
7363                (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0)))))))),
7364              (!cast<I>(Prefix#"213"#Suffix#"Zrb_Intk")
7365               VR128X:$src1, VK1WM:$mask,
7366               (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)),
7367               (_.VT (COPY_TO_REGCLASS _.FRC:$src3, VR128X)), AVX512RC:$rc)>;
7368
7369    def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector
7370               (X86selects_mask VK1WM:$mask,
7371                (RndOp _.FRC:$src2, _.FRC:$src3,
7372                       (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0))),
7373                       (i32 timm:$rc)),
7374                (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0)))))))),
7375              (!cast<I>(Prefix#"231"#Suffix#"Zrb_Intk")
7376               VR128X:$src1, VK1WM:$mask,
7377               (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)),
7378               (_.VT (COPY_TO_REGCLASS _.FRC:$src3, VR128X)), AVX512RC:$rc)>;
7379
7380    def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector
7381               (X86selects_mask VK1WM:$mask,
7382                (RndOp _.FRC:$src2,
7383                       (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0))),
7384                       _.FRC:$src3, (i32 timm:$rc)),
7385                (_.EltVT ZeroFP)))))),
7386              (!cast<I>(Prefix#"213"#Suffix#"Zrb_Intkz")
7387               VR128X:$src1, VK1WM:$mask,
7388               (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)),
7389               (_.VT (COPY_TO_REGCLASS _.FRC:$src3, VR128X)), AVX512RC:$rc)>;
7390
7391    def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector
7392               (X86selects_mask VK1WM:$mask,
7393                (RndOp _.FRC:$src2, _.FRC:$src3,
7394                       (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0))),
7395                       (i32 timm:$rc)),
7396                (_.EltVT ZeroFP)))))),
7397              (!cast<I>(Prefix#"231"#Suffix#"Zrb_Intkz")
7398               VR128X:$src1, VK1WM:$mask,
7399               (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)),
7400               (_.VT (COPY_TO_REGCLASS _.FRC:$src3, VR128X)), AVX512RC:$rc)>;
7401  }
7402}
7403defm : avx512_scalar_fma_patterns<any_fma, fma, X86FmaddRnd, "VFMADD", "SH",
7404                                  X86Movsh, v8f16x_info, fp16imm0, HasFP16>;
7405defm : avx512_scalar_fma_patterns<X86any_Fmsub, X86Fmsub, X86FmsubRnd, "VFMSUB", "SH",
7406                                  X86Movsh, v8f16x_info, fp16imm0, HasFP16>;
7407defm : avx512_scalar_fma_patterns<X86any_Fnmadd, X86Fnmadd, X86FnmaddRnd, "VFNMADD", "SH",
7408                                  X86Movsh, v8f16x_info, fp16imm0, HasFP16>;
7409defm : avx512_scalar_fma_patterns<X86any_Fnmsub, X86Fnmsub, X86FnmsubRnd, "VFNMSUB", "SH",
7410                                  X86Movsh, v8f16x_info, fp16imm0, HasFP16>;
7411
7412defm : avx512_scalar_fma_patterns<any_fma, fma, X86FmaddRnd, "VFMADD",
7413                                  "SS", X86Movss, v4f32x_info, fp32imm0>;
7414defm : avx512_scalar_fma_patterns<X86any_Fmsub, X86Fmsub, X86FmsubRnd, "VFMSUB",
7415                                  "SS", X86Movss, v4f32x_info, fp32imm0>;
7416defm : avx512_scalar_fma_patterns<X86any_Fnmadd, X86Fnmadd, X86FnmaddRnd, "VFNMADD",
7417                                  "SS", X86Movss, v4f32x_info, fp32imm0>;
7418defm : avx512_scalar_fma_patterns<X86any_Fnmsub, X86Fnmsub, X86FnmsubRnd, "VFNMSUB",
7419                                  "SS", X86Movss, v4f32x_info, fp32imm0>;
7420
7421defm : avx512_scalar_fma_patterns<any_fma, fma, X86FmaddRnd, "VFMADD",
7422                                  "SD", X86Movsd, v2f64x_info, fp64imm0>;
7423defm : avx512_scalar_fma_patterns<X86any_Fmsub, X86Fmsub, X86FmsubRnd, "VFMSUB",
7424                                  "SD", X86Movsd, v2f64x_info, fp64imm0>;
7425defm : avx512_scalar_fma_patterns<X86any_Fnmadd, X86Fnmadd, X86FnmaddRnd, "VFNMADD",
7426                                  "SD", X86Movsd, v2f64x_info, fp64imm0>;
7427defm : avx512_scalar_fma_patterns<X86any_Fnmsub, X86Fnmsub, X86FnmsubRnd, "VFNMSUB",
7428                                  "SD", X86Movsd, v2f64x_info, fp64imm0>;
7429
7430//===----------------------------------------------------------------------===//
7431// AVX-512  Packed Multiply of Unsigned 52-bit Integers and Add the Low 52-bit IFMA
7432//===----------------------------------------------------------------------===//
7433let Constraints = "$src1 = $dst" in {
7434multiclass avx512_pmadd52_rm<bits<8> opc, string OpcodeStr, SDNode OpNode,
7435                             X86FoldableSchedWrite sched, X86VectorVTInfo _> {
7436  // NOTE: The SDNode have the multiply operands first with the add last.
7437  // This enables commuted load patterns to be autogenerated by tablegen.
7438  let ExeDomain = _.ExeDomain in {
7439  defm r: AVX512_maskable_3src<opc, MRMSrcReg, _, (outs _.RC:$dst),
7440          (ins _.RC:$src2, _.RC:$src3),
7441          OpcodeStr, "$src3, $src2", "$src2, $src3",
7442          (_.VT (OpNode _.RC:$src2, _.RC:$src3, _.RC:$src1)), 1, 1>,
7443          T8PD, EVEX_4V, Sched<[sched]>;
7444
7445  defm m: AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst),
7446          (ins _.RC:$src2, _.MemOp:$src3),
7447          OpcodeStr, "$src3, $src2", "$src2, $src3",
7448          (_.VT (OpNode _.RC:$src2, (_.LdFrag addr:$src3), _.RC:$src1))>,
7449          T8PD, EVEX_4V, Sched<[sched.Folded, sched.ReadAfterFold]>;
7450
7451  defm mb: AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst),
7452            (ins _.RC:$src2, _.ScalarMemOp:$src3),
7453            OpcodeStr,   !strconcat("${src3}", _.BroadcastStr,", $src2"),
7454            !strconcat("$src2, ${src3}", _.BroadcastStr ),
7455            (OpNode _.RC:$src2,
7456                    (_.VT (_.BroadcastLdFrag addr:$src3)),
7457                    _.RC:$src1)>,
7458            T8PD, EVEX_4V, EVEX_B, Sched<[sched.Folded, sched.ReadAfterFold]>;
7459  }
7460}
7461} // Constraints = "$src1 = $dst"
7462
7463multiclass avx512_pmadd52_common<bits<8> opc, string OpcodeStr, SDNode OpNode,
7464                                 X86SchedWriteWidths sched, AVX512VLVectorVTInfo _> {
7465  let Predicates = [HasIFMA] in {
7466    defm Z      : avx512_pmadd52_rm<opc, OpcodeStr, OpNode, sched.ZMM, _.info512>,
7467                      EVEX_V512, EVEX_CD8<_.info512.EltSize, CD8VF>;
7468  }
7469  let Predicates = [HasVLX, HasIFMA] in {
7470    defm Z256 : avx512_pmadd52_rm<opc, OpcodeStr, OpNode, sched.YMM, _.info256>,
7471                      EVEX_V256, EVEX_CD8<_.info256.EltSize, CD8VF>;
7472    defm Z128 : avx512_pmadd52_rm<opc, OpcodeStr, OpNode, sched.XMM, _.info128>,
7473                      EVEX_V128, EVEX_CD8<_.info128.EltSize, CD8VF>;
7474  }
7475}
7476
7477defm VPMADD52LUQ : avx512_pmadd52_common<0xb4, "vpmadd52luq", x86vpmadd52l,
7478                                         SchedWriteVecIMul, avx512vl_i64_info>,
7479                                         VEX_W;
7480defm VPMADD52HUQ : avx512_pmadd52_common<0xb5, "vpmadd52huq", x86vpmadd52h,
7481                                         SchedWriteVecIMul, avx512vl_i64_info>,
7482                                         VEX_W;
7483
7484//===----------------------------------------------------------------------===//
7485// AVX-512  Scalar convert from sign integer to float/double
7486//===----------------------------------------------------------------------===//
7487
7488multiclass avx512_vcvtsi<bits<8> opc, SDPatternOperator OpNode, X86FoldableSchedWrite sched,
7489                    RegisterClass SrcRC, X86VectorVTInfo DstVT,
7490                    X86MemOperand x86memop, PatFrag ld_frag, string asm,
7491                    string mem, list<Register> _Uses = [MXCSR],
7492                    bit _mayRaiseFPException = 1> {
7493let ExeDomain = DstVT.ExeDomain, Uses = _Uses,
7494    mayRaiseFPException = _mayRaiseFPException in {
7495  let hasSideEffects = 0, isCodeGenOnly = 1 in {
7496    def rr : SI<opc, MRMSrcReg, (outs DstVT.FRC:$dst),
7497              (ins DstVT.FRC:$src1, SrcRC:$src),
7498              !strconcat(asm,"\t{$src, $src1, $dst|$dst, $src1, $src}"), []>,
7499              EVEX_4V, Sched<[sched, ReadDefault, ReadInt2Fpu]>;
7500    let mayLoad = 1 in
7501      def rm : SI<opc, MRMSrcMem, (outs DstVT.FRC:$dst),
7502              (ins DstVT.FRC:$src1, x86memop:$src),
7503              asm#"{"#mem#"}\t{$src, $src1, $dst|$dst, $src1, $src}", []>,
7504              EVEX_4V, Sched<[sched.Folded, sched.ReadAfterFold]>;
7505  } // hasSideEffects = 0
7506  def rr_Int : SI<opc, MRMSrcReg, (outs DstVT.RC:$dst),
7507                (ins DstVT.RC:$src1, SrcRC:$src2),
7508                !strconcat(asm,"\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
7509                [(set DstVT.RC:$dst,
7510                      (OpNode (DstVT.VT DstVT.RC:$src1), SrcRC:$src2))]>,
7511               EVEX_4V, Sched<[sched, ReadDefault, ReadInt2Fpu]>;
7512
7513  def rm_Int : SI<opc, MRMSrcMem, (outs DstVT.RC:$dst),
7514                (ins DstVT.RC:$src1, x86memop:$src2),
7515                asm#"{"#mem#"}\t{$src2, $src1, $dst|$dst, $src1, $src2}",
7516                [(set DstVT.RC:$dst,
7517                      (OpNode (DstVT.VT DstVT.RC:$src1),
7518                               (ld_frag addr:$src2)))]>,
7519                EVEX_4V, Sched<[sched.Folded, sched.ReadAfterFold]>;
7520}
7521  def : InstAlias<"v"#asm#mem#"\t{$src2, $src1, $dst|$dst, $src1, $src2}",
7522                  (!cast<Instruction>(NAME#"rr_Int") DstVT.RC:$dst,
7523                  DstVT.RC:$src1, SrcRC:$src2), 0, "att">;
7524}
7525
7526multiclass avx512_vcvtsi_round<bits<8> opc, SDNode OpNode,
7527                               X86FoldableSchedWrite sched, RegisterClass SrcRC,
7528                               X86VectorVTInfo DstVT, string asm,
7529                               string mem> {
7530  let ExeDomain = DstVT.ExeDomain, Uses = [MXCSR] in
7531  def rrb_Int : SI<opc, MRMSrcReg, (outs DstVT.RC:$dst),
7532              (ins DstVT.RC:$src1, SrcRC:$src2, AVX512RC:$rc),
7533              !strconcat(asm,
7534                  "\t{$src2, $rc, $src1, $dst|$dst, $src1, $rc, $src2}"),
7535              [(set DstVT.RC:$dst,
7536                    (OpNode (DstVT.VT DstVT.RC:$src1),
7537                             SrcRC:$src2,
7538                             (i32 timm:$rc)))]>,
7539              EVEX_4V, EVEX_B, EVEX_RC, Sched<[sched, ReadDefault, ReadInt2Fpu]>;
7540  def : InstAlias<"v"#asm#mem#"\t{$src2, $rc, $src1, $dst|$dst, $src1, $rc, $src2}",
7541                  (!cast<Instruction>(NAME#"rrb_Int") DstVT.RC:$dst,
7542                  DstVT.RC:$src1, SrcRC:$src2, AVX512RC:$rc), 0, "att">;
7543}
7544
7545multiclass avx512_vcvtsi_common<bits<8> opc, SDNode OpNode, SDNode OpNodeRnd,
7546                                X86FoldableSchedWrite sched,
7547                                RegisterClass SrcRC, X86VectorVTInfo DstVT,
7548                                X86MemOperand x86memop, PatFrag ld_frag,
7549                                string asm, string mem> {
7550  defm NAME : avx512_vcvtsi_round<opc, OpNodeRnd, sched, SrcRC, DstVT, asm, mem>,
7551              avx512_vcvtsi<opc, OpNode, sched, SrcRC, DstVT, x86memop,
7552                            ld_frag, asm, mem>, VEX_LIG;
7553}
7554
7555let Predicates = [HasAVX512] in {
7556defm VCVTSI2SSZ  : avx512_vcvtsi_common<0x2A, X86SintToFp, X86SintToFpRnd,
7557                                 WriteCvtI2SS, GR32,
7558                                 v4f32x_info, i32mem, loadi32, "cvtsi2ss", "l">,
7559                                 XS, EVEX_CD8<32, CD8VT1>;
7560defm VCVTSI642SSZ: avx512_vcvtsi_common<0x2A, X86SintToFp, X86SintToFpRnd,
7561                                 WriteCvtI2SS, GR64,
7562                                 v4f32x_info, i64mem, loadi64, "cvtsi2ss", "q">,
7563                                 XS, VEX_W, EVEX_CD8<64, CD8VT1>;
7564defm VCVTSI2SDZ  : avx512_vcvtsi<0x2A, null_frag, WriteCvtI2SD, GR32,
7565                                 v2f64x_info, i32mem, loadi32, "cvtsi2sd", "l", [], 0>,
7566                                 XD, VEX_LIG, EVEX_CD8<32, CD8VT1>;
7567defm VCVTSI642SDZ: avx512_vcvtsi_common<0x2A, X86SintToFp, X86SintToFpRnd,
7568                                 WriteCvtI2SD, GR64,
7569                                 v2f64x_info, i64mem, loadi64, "cvtsi2sd", "q">,
7570                                 XD, VEX_W, EVEX_CD8<64, CD8VT1>;
7571
7572def : InstAlias<"vcvtsi2ss\t{$src, $src1, $dst|$dst, $src1, $src}",
7573              (VCVTSI2SSZrm_Int VR128X:$dst, VR128X:$src1, i32mem:$src), 0, "att">;
7574def : InstAlias<"vcvtsi2sd\t{$src, $src1, $dst|$dst, $src1, $src}",
7575              (VCVTSI2SDZrm_Int VR128X:$dst, VR128X:$src1, i32mem:$src), 0, "att">;
7576
7577def : Pat<(f32 (any_sint_to_fp (loadi32 addr:$src))),
7578          (VCVTSI2SSZrm (f32 (IMPLICIT_DEF)), addr:$src)>;
7579def : Pat<(f32 (any_sint_to_fp (loadi64 addr:$src))),
7580          (VCVTSI642SSZrm (f32 (IMPLICIT_DEF)), addr:$src)>;
7581def : Pat<(f64 (any_sint_to_fp (loadi32 addr:$src))),
7582          (VCVTSI2SDZrm (f64 (IMPLICIT_DEF)), addr:$src)>;
7583def : Pat<(f64 (any_sint_to_fp (loadi64 addr:$src))),
7584          (VCVTSI642SDZrm (f64 (IMPLICIT_DEF)), addr:$src)>;
7585
7586def : Pat<(f32 (any_sint_to_fp GR32:$src)),
7587          (VCVTSI2SSZrr (f32 (IMPLICIT_DEF)), GR32:$src)>;
7588def : Pat<(f32 (any_sint_to_fp GR64:$src)),
7589          (VCVTSI642SSZrr (f32 (IMPLICIT_DEF)), GR64:$src)>;
7590def : Pat<(f64 (any_sint_to_fp GR32:$src)),
7591          (VCVTSI2SDZrr (f64 (IMPLICIT_DEF)), GR32:$src)>;
7592def : Pat<(f64 (any_sint_to_fp GR64:$src)),
7593          (VCVTSI642SDZrr (f64 (IMPLICIT_DEF)), GR64:$src)>;
7594
7595defm VCVTUSI2SSZ   : avx512_vcvtsi_common<0x7B, X86UintToFp, X86UintToFpRnd,
7596                                  WriteCvtI2SS, GR32,
7597                                  v4f32x_info, i32mem, loadi32,
7598                                  "cvtusi2ss", "l">, XS, EVEX_CD8<32, CD8VT1>;
7599defm VCVTUSI642SSZ : avx512_vcvtsi_common<0x7B, X86UintToFp, X86UintToFpRnd,
7600                                  WriteCvtI2SS, GR64,
7601                                  v4f32x_info, i64mem, loadi64, "cvtusi2ss", "q">,
7602                                  XS, VEX_W, EVEX_CD8<64, CD8VT1>;
7603defm VCVTUSI2SDZ   : avx512_vcvtsi<0x7B, null_frag, WriteCvtI2SD, GR32, v2f64x_info,
7604                                  i32mem, loadi32, "cvtusi2sd", "l", [], 0>,
7605                                  XD, VEX_LIG, EVEX_CD8<32, CD8VT1>;
7606defm VCVTUSI642SDZ : avx512_vcvtsi_common<0x7B, X86UintToFp, X86UintToFpRnd,
7607                                  WriteCvtI2SD, GR64,
7608                                  v2f64x_info, i64mem, loadi64, "cvtusi2sd", "q">,
7609                                  XD, VEX_W, EVEX_CD8<64, CD8VT1>;
7610
7611def : InstAlias<"vcvtusi2ss\t{$src, $src1, $dst|$dst, $src1, $src}",
7612              (VCVTUSI2SSZrm_Int VR128X:$dst, VR128X:$src1, i32mem:$src), 0, "att">;
7613def : InstAlias<"vcvtusi2sd\t{$src, $src1, $dst|$dst, $src1, $src}",
7614              (VCVTUSI2SDZrm_Int VR128X:$dst, VR128X:$src1, i32mem:$src), 0, "att">;
7615
7616def : Pat<(f32 (any_uint_to_fp (loadi32 addr:$src))),
7617          (VCVTUSI2SSZrm (f32 (IMPLICIT_DEF)), addr:$src)>;
7618def : Pat<(f32 (any_uint_to_fp (loadi64 addr:$src))),
7619          (VCVTUSI642SSZrm (f32 (IMPLICIT_DEF)), addr:$src)>;
7620def : Pat<(f64 (any_uint_to_fp (loadi32 addr:$src))),
7621          (VCVTUSI2SDZrm (f64 (IMPLICIT_DEF)), addr:$src)>;
7622def : Pat<(f64 (any_uint_to_fp (loadi64 addr:$src))),
7623          (VCVTUSI642SDZrm (f64 (IMPLICIT_DEF)), addr:$src)>;
7624
7625def : Pat<(f32 (any_uint_to_fp GR32:$src)),
7626          (VCVTUSI2SSZrr (f32 (IMPLICIT_DEF)), GR32:$src)>;
7627def : Pat<(f32 (any_uint_to_fp GR64:$src)),
7628          (VCVTUSI642SSZrr (f32 (IMPLICIT_DEF)), GR64:$src)>;
7629def : Pat<(f64 (any_uint_to_fp GR32:$src)),
7630          (VCVTUSI2SDZrr (f64 (IMPLICIT_DEF)), GR32:$src)>;
7631def : Pat<(f64 (any_uint_to_fp GR64:$src)),
7632          (VCVTUSI642SDZrr (f64 (IMPLICIT_DEF)), GR64:$src)>;
7633}
7634
7635//===----------------------------------------------------------------------===//
7636// AVX-512  Scalar convert from float/double to integer
7637//===----------------------------------------------------------------------===//
7638
7639multiclass avx512_cvt_s_int_round<bits<8> opc, X86VectorVTInfo SrcVT,
7640                                  X86VectorVTInfo DstVT, SDNode OpNode,
7641                                  SDNode OpNodeRnd,
7642                                  X86FoldableSchedWrite sched, string asm,
7643                                  string aliasStr, Predicate prd = HasAVX512> {
7644  let Predicates = [prd], ExeDomain = SrcVT.ExeDomain in {
7645    def rr_Int : SI<opc, MRMSrcReg, (outs DstVT.RC:$dst), (ins SrcVT.RC:$src),
7646                !strconcat(asm,"\t{$src, $dst|$dst, $src}"),
7647                [(set DstVT.RC:$dst, (OpNode (SrcVT.VT SrcVT.RC:$src)))]>,
7648                EVEX, VEX_LIG, Sched<[sched]>, SIMD_EXC;
7649    let Uses = [MXCSR] in
7650    def rrb_Int : SI<opc, MRMSrcReg, (outs DstVT.RC:$dst), (ins SrcVT.RC:$src, AVX512RC:$rc),
7651                 !strconcat(asm,"\t{$rc, $src, $dst|$dst, $src, $rc}"),
7652                 [(set DstVT.RC:$dst, (OpNodeRnd (SrcVT.VT SrcVT.RC:$src),(i32 timm:$rc)))]>,
7653                 EVEX, VEX_LIG, EVEX_B, EVEX_RC,
7654                 Sched<[sched]>;
7655    def rm_Int : SI<opc, MRMSrcMem, (outs DstVT.RC:$dst), (ins SrcVT.IntScalarMemOp:$src),
7656                !strconcat(asm,"\t{$src, $dst|$dst, $src}"),
7657                [(set DstVT.RC:$dst, (OpNode
7658                      (SrcVT.ScalarIntMemFrags addr:$src)))]>,
7659                EVEX, VEX_LIG, Sched<[sched.Folded, sched.ReadAfterFold]>, SIMD_EXC;
7660  } // Predicates = [prd]
7661
7662  def : InstAlias<"v" # asm # aliasStr # "\t{$src, $dst|$dst, $src}",
7663          (!cast<Instruction>(NAME # "rr_Int") DstVT.RC:$dst, SrcVT.RC:$src), 0, "att">;
7664  def : InstAlias<"v" # asm # aliasStr # "\t{$rc, $src, $dst|$dst, $src, $rc}",
7665          (!cast<Instruction>(NAME # "rrb_Int") DstVT.RC:$dst, SrcVT.RC:$src, AVX512RC:$rc), 0, "att">;
7666  def : InstAlias<"v" # asm # aliasStr # "\t{$src, $dst|$dst, $src}",
7667          (!cast<Instruction>(NAME # "rm_Int") DstVT.RC:$dst,
7668                                          SrcVT.IntScalarMemOp:$src), 0, "att">;
7669}
7670
7671// Convert float/double to signed/unsigned int 32/64
7672defm VCVTSS2SIZ: avx512_cvt_s_int_round<0x2D, f32x_info, i32x_info,X86cvts2si,
7673                                   X86cvts2siRnd, WriteCvtSS2I, "cvtss2si", "{l}">,
7674                                   XS, EVEX_CD8<32, CD8VT1>;
7675defm VCVTSS2SI64Z: avx512_cvt_s_int_round<0x2D, f32x_info, i64x_info, X86cvts2si,
7676                                   X86cvts2siRnd, WriteCvtSS2I, "cvtss2si", "{q}">,
7677                                   XS, VEX_W, EVEX_CD8<32, CD8VT1>;
7678defm VCVTSS2USIZ: avx512_cvt_s_int_round<0x79, f32x_info, i32x_info, X86cvts2usi,
7679                                   X86cvts2usiRnd, WriteCvtSS2I, "cvtss2usi", "{l}">,
7680                                   XS, EVEX_CD8<32, CD8VT1>;
7681defm VCVTSS2USI64Z: avx512_cvt_s_int_round<0x79, f32x_info, i64x_info, X86cvts2usi,
7682                                   X86cvts2usiRnd, WriteCvtSS2I, "cvtss2usi", "{q}">,
7683                                   XS, VEX_W, EVEX_CD8<32, CD8VT1>;
7684defm VCVTSD2SIZ: avx512_cvt_s_int_round<0x2D, f64x_info, i32x_info, X86cvts2si,
7685                                   X86cvts2siRnd, WriteCvtSD2I, "cvtsd2si", "{l}">,
7686                                   XD, EVEX_CD8<64, CD8VT1>;
7687defm VCVTSD2SI64Z: avx512_cvt_s_int_round<0x2D, f64x_info, i64x_info, X86cvts2si,
7688                                   X86cvts2siRnd, WriteCvtSD2I, "cvtsd2si", "{q}">,
7689                                   XD, VEX_W, EVEX_CD8<64, CD8VT1>;
7690defm VCVTSD2USIZ:   avx512_cvt_s_int_round<0x79, f64x_info, i32x_info, X86cvts2usi,
7691                                   X86cvts2usiRnd, WriteCvtSD2I, "cvtsd2usi", "{l}">,
7692                                   XD, EVEX_CD8<64, CD8VT1>;
7693defm VCVTSD2USI64Z: avx512_cvt_s_int_round<0x79, f64x_info, i64x_info, X86cvts2usi,
7694                                   X86cvts2usiRnd, WriteCvtSD2I, "cvtsd2usi", "{q}">,
7695                                   XD, VEX_W, EVEX_CD8<64, CD8VT1>;
7696
7697multiclass avx512_cvt_s<bits<8> opc, string asm, X86VectorVTInfo SrcVT,
7698                        X86VectorVTInfo DstVT, SDNode OpNode,
7699                        X86FoldableSchedWrite sched> {
7700  let Predicates = [HasAVX512], ExeDomain = SrcVT.ExeDomain in {
7701    let isCodeGenOnly = 1 in {
7702    def rr : AVX512<opc, MRMSrcReg, (outs DstVT.RC:$dst), (ins SrcVT.FRC:$src),
7703                !strconcat(asm,"\t{$src, $dst|$dst, $src}"),
7704                [(set DstVT.RC:$dst, (OpNode SrcVT.FRC:$src))]>,
7705                EVEX, VEX_LIG, Sched<[sched]>, SIMD_EXC;
7706    def rm : AVX512<opc, MRMSrcMem, (outs DstVT.RC:$dst), (ins SrcVT.ScalarMemOp:$src),
7707                !strconcat(asm,"\t{$src, $dst|$dst, $src}"),
7708                [(set DstVT.RC:$dst, (OpNode (SrcVT.ScalarLdFrag addr:$src)))]>,
7709                EVEX, VEX_LIG, Sched<[sched.Folded, sched.ReadAfterFold]>, SIMD_EXC;
7710    }
7711  } // Predicates = [HasAVX512]
7712}
7713
7714defm VCVTSS2SIZ: avx512_cvt_s<0x2D, "vcvtss2si", f32x_info, i32x_info,
7715                       lrint, WriteCvtSS2I>, XS, EVEX_CD8<32, CD8VT1>;
7716defm VCVTSS2SI64Z: avx512_cvt_s<0x2D, "vcvtss2si", f32x_info, i64x_info,
7717                       llrint, WriteCvtSS2I>, VEX_W, XS, EVEX_CD8<32, CD8VT1>;
7718defm VCVTSD2SIZ: avx512_cvt_s<0x2D, "vcvtsd2si", f64x_info, i32x_info,
7719                       lrint, WriteCvtSD2I>, XD, EVEX_CD8<64, CD8VT1>;
7720defm VCVTSD2SI64Z: avx512_cvt_s<0x2D, "vcvtsd2si", f64x_info, i64x_info,
7721                       llrint, WriteCvtSD2I>, VEX_W, XD, EVEX_CD8<64, CD8VT1>;
7722
7723let Predicates = [HasAVX512] in {
7724  def : Pat<(i64 (lrint FR32:$src)), (VCVTSS2SI64Zrr FR32:$src)>;
7725  def : Pat<(i64 (lrint (loadf32 addr:$src))), (VCVTSS2SI64Zrm addr:$src)>;
7726
7727  def : Pat<(i64 (lrint FR64:$src)), (VCVTSD2SI64Zrr FR64:$src)>;
7728  def : Pat<(i64 (lrint (loadf64 addr:$src))), (VCVTSD2SI64Zrm addr:$src)>;
7729}
7730
7731// Patterns used for matching vcvtsi2s{s,d} intrinsic sequences from clang
7732// which produce unnecessary vmovs{s,d} instructions
7733let Predicates = [HasAVX512] in {
7734def : Pat<(v4f32 (X86Movss
7735                   (v4f32 VR128X:$dst),
7736                   (v4f32 (scalar_to_vector (f32 (any_sint_to_fp GR64:$src)))))),
7737          (VCVTSI642SSZrr_Int VR128X:$dst, GR64:$src)>;
7738
7739def : Pat<(v4f32 (X86Movss
7740                   (v4f32 VR128X:$dst),
7741                   (v4f32 (scalar_to_vector (f32 (any_sint_to_fp (loadi64 addr:$src))))))),
7742          (VCVTSI642SSZrm_Int VR128X:$dst, addr:$src)>;
7743
7744def : Pat<(v4f32 (X86Movss
7745                   (v4f32 VR128X:$dst),
7746                   (v4f32 (scalar_to_vector (f32 (any_sint_to_fp GR32:$src)))))),
7747          (VCVTSI2SSZrr_Int VR128X:$dst, GR32:$src)>;
7748
7749def : Pat<(v4f32 (X86Movss
7750                   (v4f32 VR128X:$dst),
7751                   (v4f32 (scalar_to_vector (f32 (any_sint_to_fp (loadi32 addr:$src))))))),
7752          (VCVTSI2SSZrm_Int VR128X:$dst, addr:$src)>;
7753
7754def : Pat<(v2f64 (X86Movsd
7755                   (v2f64 VR128X:$dst),
7756                   (v2f64 (scalar_to_vector (f64 (any_sint_to_fp GR64:$src)))))),
7757          (VCVTSI642SDZrr_Int VR128X:$dst, GR64:$src)>;
7758
7759def : Pat<(v2f64 (X86Movsd
7760                   (v2f64 VR128X:$dst),
7761                   (v2f64 (scalar_to_vector (f64 (any_sint_to_fp (loadi64 addr:$src))))))),
7762          (VCVTSI642SDZrm_Int VR128X:$dst, addr:$src)>;
7763
7764def : Pat<(v2f64 (X86Movsd
7765                   (v2f64 VR128X:$dst),
7766                   (v2f64 (scalar_to_vector (f64 (any_sint_to_fp GR32:$src)))))),
7767          (VCVTSI2SDZrr_Int VR128X:$dst, GR32:$src)>;
7768
7769def : Pat<(v2f64 (X86Movsd
7770                   (v2f64 VR128X:$dst),
7771                   (v2f64 (scalar_to_vector (f64 (any_sint_to_fp (loadi32 addr:$src))))))),
7772          (VCVTSI2SDZrm_Int VR128X:$dst, addr:$src)>;
7773
7774def : Pat<(v4f32 (X86Movss
7775                   (v4f32 VR128X:$dst),
7776                   (v4f32 (scalar_to_vector (f32 (any_uint_to_fp GR64:$src)))))),
7777          (VCVTUSI642SSZrr_Int VR128X:$dst, GR64:$src)>;
7778
7779def : Pat<(v4f32 (X86Movss
7780                   (v4f32 VR128X:$dst),
7781                   (v4f32 (scalar_to_vector (f32 (any_uint_to_fp (loadi64 addr:$src))))))),
7782          (VCVTUSI642SSZrm_Int VR128X:$dst, addr:$src)>;
7783
7784def : Pat<(v4f32 (X86Movss
7785                   (v4f32 VR128X:$dst),
7786                   (v4f32 (scalar_to_vector (f32 (any_uint_to_fp GR32:$src)))))),
7787          (VCVTUSI2SSZrr_Int VR128X:$dst, GR32:$src)>;
7788
7789def : Pat<(v4f32 (X86Movss
7790                   (v4f32 VR128X:$dst),
7791                   (v4f32 (scalar_to_vector (f32 (any_uint_to_fp (loadi32 addr:$src))))))),
7792          (VCVTUSI2SSZrm_Int VR128X:$dst, addr:$src)>;
7793
7794def : Pat<(v2f64 (X86Movsd
7795                   (v2f64 VR128X:$dst),
7796                   (v2f64 (scalar_to_vector (f64 (any_uint_to_fp GR64:$src)))))),
7797          (VCVTUSI642SDZrr_Int VR128X:$dst, GR64:$src)>;
7798
7799def : Pat<(v2f64 (X86Movsd
7800                   (v2f64 VR128X:$dst),
7801                   (v2f64 (scalar_to_vector (f64 (any_uint_to_fp (loadi64 addr:$src))))))),
7802          (VCVTUSI642SDZrm_Int VR128X:$dst, addr:$src)>;
7803
7804def : Pat<(v2f64 (X86Movsd
7805                   (v2f64 VR128X:$dst),
7806                   (v2f64 (scalar_to_vector (f64 (any_uint_to_fp GR32:$src)))))),
7807          (VCVTUSI2SDZrr_Int VR128X:$dst, GR32:$src)>;
7808
7809def : Pat<(v2f64 (X86Movsd
7810                   (v2f64 VR128X:$dst),
7811                   (v2f64 (scalar_to_vector (f64 (any_uint_to_fp (loadi32 addr:$src))))))),
7812          (VCVTUSI2SDZrm_Int VR128X:$dst, addr:$src)>;
7813} // Predicates = [HasAVX512]
7814
7815// Convert float/double to signed/unsigned int 32/64 with truncation
7816multiclass avx512_cvt_s_all<bits<8> opc, string asm, X86VectorVTInfo _SrcRC,
7817                            X86VectorVTInfo _DstRC, SDPatternOperator OpNode,
7818                            SDNode OpNodeInt, SDNode OpNodeSAE,
7819                            X86FoldableSchedWrite sched, string aliasStr,
7820                            Predicate prd = HasAVX512> {
7821let Predicates = [prd], ExeDomain = _SrcRC.ExeDomain in {
7822  let isCodeGenOnly = 1 in {
7823  def rr : AVX512<opc, MRMSrcReg, (outs _DstRC.RC:$dst), (ins _SrcRC.FRC:$src),
7824              !strconcat(asm,"\t{$src, $dst|$dst, $src}"),
7825              [(set _DstRC.RC:$dst, (OpNode _SrcRC.FRC:$src))]>,
7826              EVEX, VEX_LIG, Sched<[sched]>, SIMD_EXC;
7827  def rm : AVX512<opc, MRMSrcMem, (outs _DstRC.RC:$dst), (ins _SrcRC.ScalarMemOp:$src),
7828              !strconcat(asm,"\t{$src, $dst|$dst, $src}"),
7829              [(set _DstRC.RC:$dst, (OpNode (_SrcRC.ScalarLdFrag addr:$src)))]>,
7830              EVEX, VEX_LIG, Sched<[sched.Folded, sched.ReadAfterFold]>, SIMD_EXC;
7831  }
7832
7833  def rr_Int : AVX512<opc, MRMSrcReg, (outs _DstRC.RC:$dst), (ins _SrcRC.RC:$src),
7834            !strconcat(asm,"\t{$src, $dst|$dst, $src}"),
7835           [(set _DstRC.RC:$dst, (OpNodeInt (_SrcRC.VT _SrcRC.RC:$src)))]>,
7836           EVEX, VEX_LIG, Sched<[sched]>, SIMD_EXC;
7837  let Uses = [MXCSR] in
7838  def rrb_Int : AVX512<opc, MRMSrcReg, (outs _DstRC.RC:$dst), (ins _SrcRC.RC:$src),
7839            !strconcat(asm,"\t{{sae}, $src, $dst|$dst, $src, {sae}}"),
7840            [(set _DstRC.RC:$dst, (OpNodeSAE (_SrcRC.VT _SrcRC.RC:$src)))]>,
7841                                  EVEX, VEX_LIG, EVEX_B, Sched<[sched]>;
7842  def rm_Int : AVX512<opc, MRMSrcMem, (outs _DstRC.RC:$dst),
7843              (ins _SrcRC.IntScalarMemOp:$src),
7844              !strconcat(asm,"\t{$src, $dst|$dst, $src}"),
7845              [(set _DstRC.RC:$dst,
7846                (OpNodeInt (_SrcRC.ScalarIntMemFrags addr:$src)))]>,
7847              EVEX, VEX_LIG, Sched<[sched.Folded, sched.ReadAfterFold]>, SIMD_EXC;
7848} // Predicates = [prd]
7849
7850  def : InstAlias<asm # aliasStr # "\t{$src, $dst|$dst, $src}",
7851          (!cast<Instruction>(NAME # "rr_Int") _DstRC.RC:$dst, _SrcRC.RC:$src), 0, "att">;
7852  def : InstAlias<asm # aliasStr # "\t{{sae}, $src, $dst|$dst, $src, {sae}}",
7853          (!cast<Instruction>(NAME # "rrb_Int") _DstRC.RC:$dst, _SrcRC.RC:$src), 0, "att">;
7854  def : InstAlias<asm # aliasStr # "\t{$src, $dst|$dst, $src}",
7855          (!cast<Instruction>(NAME # "rm_Int") _DstRC.RC:$dst,
7856                                          _SrcRC.IntScalarMemOp:$src), 0, "att">;
7857}
7858
7859defm VCVTTSS2SIZ: avx512_cvt_s_all<0x2C, "vcvttss2si", f32x_info, i32x_info,
7860                        any_fp_to_sint, X86cvtts2Int, X86cvtts2IntSAE, WriteCvtSS2I,
7861                        "{l}">, XS, EVEX_CD8<32, CD8VT1>;
7862defm VCVTTSS2SI64Z: avx512_cvt_s_all<0x2C, "vcvttss2si", f32x_info, i64x_info,
7863                        any_fp_to_sint, X86cvtts2Int, X86cvtts2IntSAE, WriteCvtSS2I,
7864                        "{q}">, VEX_W, XS, EVEX_CD8<32, CD8VT1>;
7865defm VCVTTSD2SIZ: avx512_cvt_s_all<0x2C, "vcvttsd2si", f64x_info, i32x_info,
7866                        any_fp_to_sint, X86cvtts2Int, X86cvtts2IntSAE, WriteCvtSD2I,
7867                        "{l}">, XD, EVEX_CD8<64, CD8VT1>;
7868defm VCVTTSD2SI64Z: avx512_cvt_s_all<0x2C, "vcvttsd2si", f64x_info, i64x_info,
7869                        any_fp_to_sint, X86cvtts2Int, X86cvtts2IntSAE, WriteCvtSD2I,
7870                        "{q}">, VEX_W, XD, EVEX_CD8<64, CD8VT1>;
7871
7872defm VCVTTSS2USIZ: avx512_cvt_s_all<0x78, "vcvttss2usi", f32x_info, i32x_info,
7873                        any_fp_to_uint, X86cvtts2UInt, X86cvtts2UIntSAE, WriteCvtSS2I,
7874                        "{l}">, XS, EVEX_CD8<32, CD8VT1>;
7875defm VCVTTSS2USI64Z: avx512_cvt_s_all<0x78, "vcvttss2usi", f32x_info, i64x_info,
7876                        any_fp_to_uint, X86cvtts2UInt, X86cvtts2UIntSAE, WriteCvtSS2I,
7877                        "{q}">, XS,VEX_W, EVEX_CD8<32, CD8VT1>;
7878defm VCVTTSD2USIZ: avx512_cvt_s_all<0x78, "vcvttsd2usi", f64x_info, i32x_info,
7879                        any_fp_to_uint, X86cvtts2UInt, X86cvtts2UIntSAE, WriteCvtSD2I,
7880                        "{l}">, XD, EVEX_CD8<64, CD8VT1>;
7881defm VCVTTSD2USI64Z: avx512_cvt_s_all<0x78, "vcvttsd2usi", f64x_info, i64x_info,
7882                        any_fp_to_uint, X86cvtts2UInt, X86cvtts2UIntSAE, WriteCvtSD2I,
7883                        "{q}">, XD, VEX_W, EVEX_CD8<64, CD8VT1>;
7884
7885//===----------------------------------------------------------------------===//
7886// AVX-512  Convert form float to double and back
7887//===----------------------------------------------------------------------===//
7888
7889let Uses = [MXCSR], mayRaiseFPException = 1 in
7890multiclass avx512_cvt_fp_scalar<bits<8> opc, string OpcodeStr, X86VectorVTInfo _,
7891                                X86VectorVTInfo _Src, SDNode OpNode,
7892                                X86FoldableSchedWrite sched> {
7893  defm rr_Int : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
7894                         (ins _.RC:$src1, _Src.RC:$src2), OpcodeStr,
7895                         "$src2, $src1", "$src1, $src2",
7896                         (_.VT (OpNode (_.VT _.RC:$src1),
7897                                       (_Src.VT _Src.RC:$src2)))>,
7898                         EVEX_4V, VEX_LIG, Sched<[sched]>;
7899  defm rm_Int : AVX512_maskable_scalar<opc, MRMSrcMem, _, (outs _.RC:$dst),
7900                         (ins _.RC:$src1, _Src.IntScalarMemOp:$src2), OpcodeStr,
7901                         "$src2, $src1", "$src1, $src2",
7902                         (_.VT (OpNode (_.VT _.RC:$src1),
7903                                  (_Src.ScalarIntMemFrags addr:$src2)))>,
7904                         EVEX_4V, VEX_LIG,
7905                         Sched<[sched.Folded, sched.ReadAfterFold]>;
7906
7907  let isCodeGenOnly = 1, hasSideEffects = 0 in {
7908    def rr : I<opc, MRMSrcReg, (outs _.FRC:$dst),
7909               (ins _.FRC:$src1, _Src.FRC:$src2),
7910               OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}", []>,
7911               EVEX_4V, VEX_LIG, Sched<[sched]>;
7912    let mayLoad = 1 in
7913    def rm : I<opc, MRMSrcMem, (outs _.FRC:$dst),
7914               (ins _.FRC:$src1, _Src.ScalarMemOp:$src2),
7915               OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}", []>,
7916               EVEX_4V, VEX_LIG, Sched<[sched.Folded, sched.ReadAfterFold]>;
7917  }
7918}
7919
7920// Scalar Conversion with SAE - suppress all exceptions
7921multiclass avx512_cvt_fp_sae_scalar<bits<8> opc, string OpcodeStr, X86VectorVTInfo _,
7922                                    X86VectorVTInfo _Src, SDNode OpNodeSAE,
7923                                    X86FoldableSchedWrite sched> {
7924  let Uses = [MXCSR] in
7925  defm rrb_Int : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
7926                        (ins _.RC:$src1, _Src.RC:$src2), OpcodeStr,
7927                        "{sae}, $src2, $src1", "$src1, $src2, {sae}",
7928                        (_.VT (OpNodeSAE (_.VT _.RC:$src1),
7929                                         (_Src.VT _Src.RC:$src2)))>,
7930                        EVEX_4V, VEX_LIG, EVEX_B, Sched<[sched]>;
7931}
7932
7933// Scalar Conversion with rounding control (RC)
7934multiclass avx512_cvt_fp_rc_scalar<bits<8> opc, string OpcodeStr, X86VectorVTInfo _,
7935                                   X86VectorVTInfo _Src, SDNode OpNodeRnd,
7936                                   X86FoldableSchedWrite sched> {
7937  let Uses = [MXCSR] in
7938  defm rrb_Int : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
7939                        (ins _.RC:$src1, _Src.RC:$src2, AVX512RC:$rc), OpcodeStr,
7940                        "$rc, $src2, $src1", "$src1, $src2, $rc",
7941                        (_.VT (OpNodeRnd (_.VT _.RC:$src1),
7942                                         (_Src.VT _Src.RC:$src2), (i32 timm:$rc)))>,
7943                        EVEX_4V, VEX_LIG, Sched<[sched]>,
7944                        EVEX_B, EVEX_RC;
7945}
7946multiclass avx512_cvt_fp_scalar_trunc<bits<8> opc, string OpcodeStr,
7947                                      SDNode OpNode, SDNode OpNodeRnd,
7948                                      X86FoldableSchedWrite sched,
7949                                      X86VectorVTInfo _src, X86VectorVTInfo _dst,
7950                                      Predicate prd = HasAVX512> {
7951  let Predicates = [prd], ExeDomain = SSEPackedSingle in {
7952    defm Z : avx512_cvt_fp_scalar<opc, OpcodeStr, _dst, _src, OpNode, sched>,
7953             avx512_cvt_fp_rc_scalar<opc, OpcodeStr, _dst, _src,
7954                               OpNodeRnd, sched>, EVEX_CD8<_src.EltSize, CD8VT1>;
7955  }
7956}
7957
7958multiclass avx512_cvt_fp_scalar_extend<bits<8> opc, string OpcodeStr,
7959                                       SDNode OpNode, SDNode OpNodeSAE,
7960                                       X86FoldableSchedWrite sched,
7961                                       X86VectorVTInfo _src, X86VectorVTInfo _dst,
7962                                       Predicate prd = HasAVX512> {
7963  let Predicates = [prd], ExeDomain = SSEPackedSingle in {
7964    defm Z : avx512_cvt_fp_scalar<opc, OpcodeStr, _dst, _src, OpNode, sched>,
7965             avx512_cvt_fp_sae_scalar<opc, OpcodeStr, _dst, _src, OpNodeSAE, sched>,
7966             EVEX_CD8<_src.EltSize, CD8VT1>;
7967  }
7968}
7969defm VCVTSD2SS : avx512_cvt_fp_scalar_trunc<0x5A, "vcvtsd2ss", X86frounds,
7970                                         X86froundsRnd, WriteCvtSD2SS, f64x_info,
7971                                         f32x_info>, XD, VEX_W;
7972defm VCVTSS2SD : avx512_cvt_fp_scalar_extend<0x5A, "vcvtss2sd", X86fpexts,
7973                                          X86fpextsSAE, WriteCvtSS2SD, f32x_info,
7974                                          f64x_info>, XS;
7975defm VCVTSD2SH : avx512_cvt_fp_scalar_trunc<0x5A, "vcvtsd2sh", X86frounds,
7976                                          X86froundsRnd, WriteCvtSD2SS, f64x_info,
7977                                          f16x_info, HasFP16>, T_MAP5XD, VEX_W;
7978defm VCVTSH2SD : avx512_cvt_fp_scalar_extend<0x5A, "vcvtsh2sd", X86fpexts,
7979                                          X86fpextsSAE, WriteCvtSS2SD, f16x_info,
7980                                          f64x_info, HasFP16>, T_MAP5XS;
7981defm VCVTSS2SH : avx512_cvt_fp_scalar_trunc<0x1D, "vcvtss2sh", X86frounds,
7982                                          X86froundsRnd, WriteCvtSD2SS, f32x_info,
7983                                          f16x_info, HasFP16>, T_MAP5PS;
7984defm VCVTSH2SS : avx512_cvt_fp_scalar_extend<0x13, "vcvtsh2ss", X86fpexts,
7985                                          X86fpextsSAE, WriteCvtSS2SD, f16x_info,
7986                                          f32x_info, HasFP16>, T_MAP6PS;
7987
7988def : Pat<(f64 (any_fpextend FR32X:$src)),
7989          (VCVTSS2SDZrr (f64 (IMPLICIT_DEF)), FR32X:$src)>,
7990          Requires<[HasAVX512]>;
7991def : Pat<(f64 (any_fpextend (loadf32 addr:$src))),
7992          (VCVTSS2SDZrm (f64 (IMPLICIT_DEF)), addr:$src)>,
7993          Requires<[HasAVX512, OptForSize]>;
7994
7995def : Pat<(f32 (any_fpround FR64X:$src)),
7996          (VCVTSD2SSZrr (f32 (IMPLICIT_DEF)), FR64X:$src)>,
7997           Requires<[HasAVX512]>;
7998
7999def : Pat<(f32 (any_fpextend FR16X:$src)),
8000          (VCVTSH2SSZrr (f32 (IMPLICIT_DEF)), FR16X:$src)>,
8001          Requires<[HasFP16]>;
8002def : Pat<(f32 (any_fpextend (loadf16 addr:$src))),
8003          (VCVTSH2SSZrm (f32 (IMPLICIT_DEF)), addr:$src)>,
8004          Requires<[HasFP16, OptForSize]>;
8005
8006def : Pat<(f64 (any_fpextend FR16X:$src)),
8007          (VCVTSH2SDZrr (f64 (IMPLICIT_DEF)), FR16X:$src)>,
8008          Requires<[HasFP16]>;
8009def : Pat<(f64 (any_fpextend (loadf16 addr:$src))),
8010          (VCVTSH2SDZrm (f64 (IMPLICIT_DEF)), addr:$src)>,
8011          Requires<[HasFP16, OptForSize]>;
8012
8013def : Pat<(f16 (any_fpround FR32X:$src)),
8014          (VCVTSS2SHZrr (f16 (IMPLICIT_DEF)), FR32X:$src)>,
8015           Requires<[HasFP16]>;
8016def : Pat<(f16 (any_fpround FR64X:$src)),
8017          (VCVTSD2SHZrr (f16 (IMPLICIT_DEF)), FR64X:$src)>,
8018           Requires<[HasFP16]>;
8019
8020def : Pat<(v4f32 (X86Movss
8021                   (v4f32 VR128X:$dst),
8022                   (v4f32 (scalar_to_vector
8023                     (f32 (any_fpround (f64 (extractelt VR128X:$src, (iPTR 0))))))))),
8024          (VCVTSD2SSZrr_Int VR128X:$dst, VR128X:$src)>,
8025          Requires<[HasAVX512]>;
8026
8027def : Pat<(v2f64 (X86Movsd
8028                   (v2f64 VR128X:$dst),
8029                   (v2f64 (scalar_to_vector
8030                     (f64 (any_fpextend (f32 (extractelt VR128X:$src, (iPTR 0))))))))),
8031          (VCVTSS2SDZrr_Int VR128X:$dst, VR128X:$src)>,
8032          Requires<[HasAVX512]>;
8033
8034//===----------------------------------------------------------------------===//
8035// AVX-512  Vector convert from signed/unsigned integer to float/double
8036//          and from float/double to signed/unsigned integer
8037//===----------------------------------------------------------------------===//
8038
8039multiclass avx512_vcvt_fp<bits<8> opc, string OpcodeStr, X86VectorVTInfo _,
8040                          X86VectorVTInfo _Src, SDPatternOperator OpNode, SDPatternOperator MaskOpNode,
8041                          X86FoldableSchedWrite sched,
8042                          string Broadcast = _.BroadcastStr,
8043                          string Alias = "", X86MemOperand MemOp = _Src.MemOp,
8044                          RegisterClass MaskRC = _.KRCWM,
8045                          dag LdDAG = (_.VT (OpNode (_Src.VT (_Src.LdFrag addr:$src)))),
8046                          dag MaskLdDAG = (_.VT (MaskOpNode (_Src.VT (_Src.LdFrag addr:$src))))> {
8047let Uses = [MXCSR], mayRaiseFPException = 1 in {
8048  defm rr : AVX512_maskable_cvt<opc, MRMSrcReg, _, (outs _.RC:$dst),
8049                         (ins _Src.RC:$src),
8050                         (ins _.RC:$src0, MaskRC:$mask, _Src.RC:$src),
8051                         (ins MaskRC:$mask, _Src.RC:$src),
8052                          OpcodeStr, "$src", "$src",
8053                         (_.VT (OpNode (_Src.VT _Src.RC:$src))),
8054                         (vselect_mask MaskRC:$mask,
8055                                       (_.VT (MaskOpNode (_Src.VT _Src.RC:$src))),
8056                                       _.RC:$src0),
8057                         (vselect_mask MaskRC:$mask,
8058                                       (_.VT (MaskOpNode (_Src.VT _Src.RC:$src))),
8059                                       _.ImmAllZerosV)>,
8060                         EVEX, Sched<[sched]>;
8061
8062  defm rm : AVX512_maskable_cvt<opc, MRMSrcMem, _, (outs _.RC:$dst),
8063                         (ins MemOp:$src),
8064                         (ins _.RC:$src0, MaskRC:$mask, MemOp:$src),
8065                         (ins MaskRC:$mask, MemOp:$src),
8066                         OpcodeStr#Alias, "$src", "$src",
8067                         LdDAG,
8068                         (vselect_mask MaskRC:$mask, MaskLdDAG, _.RC:$src0),
8069                         (vselect_mask MaskRC:$mask, MaskLdDAG, _.ImmAllZerosV)>,
8070                         EVEX, Sched<[sched.Folded]>;
8071
8072  defm rmb : AVX512_maskable_cvt<opc, MRMSrcMem, _, (outs _.RC:$dst),
8073                         (ins _Src.ScalarMemOp:$src),
8074                         (ins _.RC:$src0, MaskRC:$mask, _Src.ScalarMemOp:$src),
8075                         (ins MaskRC:$mask, _Src.ScalarMemOp:$src),
8076                         OpcodeStr,
8077                         "${src}"#Broadcast, "${src}"#Broadcast,
8078                         (_.VT (OpNode (_Src.VT
8079                                  (_Src.BroadcastLdFrag addr:$src))
8080                            )),
8081                         (vselect_mask MaskRC:$mask,
8082                                       (_.VT
8083                                        (MaskOpNode
8084                                         (_Src.VT
8085                                          (_Src.BroadcastLdFrag addr:$src)))),
8086                                       _.RC:$src0),
8087                         (vselect_mask MaskRC:$mask,
8088                                       (_.VT
8089                                        (MaskOpNode
8090                                         (_Src.VT
8091                                          (_Src.BroadcastLdFrag addr:$src)))),
8092                                       _.ImmAllZerosV)>,
8093                         EVEX, EVEX_B, Sched<[sched.Folded]>;
8094  }
8095}
8096// Conversion with SAE - suppress all exceptions
8097multiclass avx512_vcvt_fp_sae<bits<8> opc, string OpcodeStr, X86VectorVTInfo _,
8098                              X86VectorVTInfo _Src, SDNode OpNodeSAE,
8099                              X86FoldableSchedWrite sched> {
8100  let Uses = [MXCSR] in
8101  defm rrb : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
8102                        (ins _Src.RC:$src), OpcodeStr,
8103                        "{sae}, $src", "$src, {sae}",
8104                        (_.VT (OpNodeSAE (_Src.VT _Src.RC:$src)))>,
8105                        EVEX, EVEX_B, Sched<[sched]>;
8106}
8107
8108// Conversion with rounding control (RC)
8109multiclass avx512_vcvt_fp_rc<bits<8> opc, string OpcodeStr, X86VectorVTInfo _,
8110                         X86VectorVTInfo _Src, SDPatternOperator OpNodeRnd,
8111                         X86FoldableSchedWrite sched> {
8112  let Uses = [MXCSR] in
8113  defm rrb : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
8114                        (ins _Src.RC:$src, AVX512RC:$rc), OpcodeStr,
8115                        "$rc, $src", "$src, $rc",
8116                        (_.VT (OpNodeRnd (_Src.VT _Src.RC:$src), (i32 timm:$rc)))>,
8117                        EVEX, EVEX_B, EVEX_RC, Sched<[sched]>;
8118}
8119
8120// Similar to avx512_vcvt_fp, but uses an extload for the memory form.
8121multiclass avx512_vcvt_fpextend<bits<8> opc, string OpcodeStr, X86VectorVTInfo _,
8122                                X86VectorVTInfo _Src, SDPatternOperator OpNode,
8123                                SDNode MaskOpNode,
8124                                X86FoldableSchedWrite sched,
8125                                string Broadcast = _.BroadcastStr,
8126                                string Alias = "", X86MemOperand MemOp = _Src.MemOp,
8127                                RegisterClass MaskRC = _.KRCWM>
8128  : avx512_vcvt_fp<opc, OpcodeStr, _, _Src, OpNode, MaskOpNode, sched, Broadcast,
8129                   Alias, MemOp, MaskRC,
8130                   (_.VT (!cast<PatFrag>("extload"#_Src.VTName) addr:$src)),
8131                   (_.VT (!cast<PatFrag>("extload"#_Src.VTName) addr:$src))>;
8132
8133// Extend [Float to Double, Half to Float]
8134multiclass avx512_cvt_extend<bits<8> opc, string OpcodeStr,
8135                             AVX512VLVectorVTInfo _dst, AVX512VLVectorVTInfo _src,
8136                             X86SchedWriteWidths sched, Predicate prd = HasAVX512> {
8137  let Predicates = [prd] in {
8138    defm Z : avx512_vcvt_fpextend<opc, OpcodeStr,  _dst.info512, _src.info256,
8139                            any_fpextend, fpextend, sched.ZMM>,
8140             avx512_vcvt_fp_sae<opc, OpcodeStr, _dst.info512, _src.info256,
8141                                X86vfpextSAE, sched.ZMM>, EVEX_V512;
8142  }
8143  let Predicates = [prd, HasVLX] in {
8144    defm Z128 : avx512_vcvt_fpextend<opc, OpcodeStr, _dst.info128, _src.info128,
8145                               X86any_vfpext, X86vfpext, sched.XMM,
8146                               _dst.info128.BroadcastStr,
8147                               "", f64mem>, EVEX_V128;
8148    defm Z256 : avx512_vcvt_fpextend<opc, OpcodeStr, _dst.info256, _src.info128,
8149                               any_fpextend, fpextend, sched.YMM>, EVEX_V256;
8150  }
8151}
8152
8153// Truncate [Double to Float, Float to Half]
8154multiclass avx512_cvt_trunc<bits<8> opc, string OpcodeStr,
8155                            AVX512VLVectorVTInfo _dst, AVX512VLVectorVTInfo _src,
8156                            X86SchedWriteWidths sched, Predicate prd = HasAVX512,
8157                            PatFrag bcast128 = _src.info128.BroadcastLdFrag,
8158                            PatFrag loadVT128 = _src.info128.LdFrag,
8159                            RegisterClass maskRC128 = _src.info128.KRCWM> {
8160  let Predicates = [prd] in {
8161    defm Z : avx512_vcvt_fp<opc, OpcodeStr, _dst.info256, _src.info512,
8162                            X86any_vfpround, X86vfpround, sched.ZMM>,
8163             avx512_vcvt_fp_rc<opc, OpcodeStr, _dst.info256, _src.info512,
8164                               X86vfproundRnd, sched.ZMM>, EVEX_V512;
8165  }
8166  let Predicates = [prd, HasVLX] in {
8167    defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, _dst.info128, _src.info128,
8168                               null_frag, null_frag, sched.XMM,
8169                               _src.info128.BroadcastStr, "{x}",
8170                               f128mem, maskRC128>, EVEX_V128;
8171    defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, _dst.info128, _src.info256,
8172                               X86any_vfpround, X86vfpround,
8173                               sched.YMM, _src.info256.BroadcastStr, "{y}">, EVEX_V256;
8174
8175    // Special patterns to allow use of X86vmfpround for masking. Instruction
8176    // patterns have been disabled with null_frag.
8177    def : Pat<(_dst.info128.VT (X86any_vfpround (_src.info128.VT VR128X:$src))),
8178              (!cast<Instruction>(NAME # "Z128rr") VR128X:$src)>;
8179    def : Pat<(X86vmfpround (_src.info128.VT VR128X:$src), (_dst.info128.VT VR128X:$src0),
8180                            maskRC128:$mask),
8181              (!cast<Instruction>(NAME # "Z128rrk") VR128X:$src0, maskRC128:$mask, VR128X:$src)>;
8182    def : Pat<(X86vmfpround (_src.info128.VT VR128X:$src), _dst.info128.ImmAllZerosV,
8183                            maskRC128:$mask),
8184              (!cast<Instruction>(NAME # "Z128rrkz") maskRC128:$mask, VR128X:$src)>;
8185
8186    def : Pat<(_dst.info128.VT (X86any_vfpround (loadVT128 addr:$src))),
8187              (!cast<Instruction>(NAME # "Z128rm") addr:$src)>;
8188    def : Pat<(X86vmfpround (loadVT128 addr:$src), (_dst.info128.VT VR128X:$src0),
8189                            maskRC128:$mask),
8190              (!cast<Instruction>(NAME # "Z128rmk") VR128X:$src0, maskRC128:$mask, addr:$src)>;
8191    def : Pat<(X86vmfpround (loadVT128 addr:$src), _dst.info128.ImmAllZerosV,
8192                            maskRC128:$mask),
8193              (!cast<Instruction>(NAME # "Z128rmkz") maskRC128:$mask, addr:$src)>;
8194
8195    def : Pat<(_dst.info128.VT (X86any_vfpround (_src.info128.VT (bcast128 addr:$src)))),
8196              (!cast<Instruction>(NAME # "Z128rmb") addr:$src)>;
8197    def : Pat<(X86vmfpround (_src.info128.VT (bcast128 addr:$src)),
8198                            (_dst.info128.VT VR128X:$src0), maskRC128:$mask),
8199              (!cast<Instruction>(NAME # "Z128rmbk") VR128X:$src0, maskRC128:$mask, addr:$src)>;
8200    def : Pat<(X86vmfpround (_src.info128.VT (bcast128 addr:$src)),
8201                            _dst.info128.ImmAllZerosV, maskRC128:$mask),
8202              (!cast<Instruction>(NAME # "Z128rmbkz") maskRC128:$mask, addr:$src)>;
8203  }
8204
8205  def : InstAlias<OpcodeStr#"x\t{$src, $dst|$dst, $src}",
8206                  (!cast<Instruction>(NAME # "Z128rr") VR128X:$dst, VR128X:$src), 0, "att">;
8207  def : InstAlias<OpcodeStr#"x\t{$src, $dst {${mask}}|$dst {${mask}}, $src}",
8208                  (!cast<Instruction>(NAME # "Z128rrk") VR128X:$dst,
8209                  VK2WM:$mask, VR128X:$src), 0, "att">;
8210  def : InstAlias<OpcodeStr#"x\t{$src, $dst {${mask}} {z}|"
8211                  "$dst {${mask}} {z}, $src}",
8212                  (!cast<Instruction>(NAME # "Z128rrkz") VR128X:$dst,
8213                  VK2WM:$mask, VR128X:$src), 0, "att">;
8214  def : InstAlias<OpcodeStr#"x\t{${src}{1to2}, $dst|$dst, ${src}{1to2}}",
8215                  (!cast<Instruction>(NAME # "Z128rmb") VR128X:$dst, f64mem:$src), 0, "att">;
8216  def : InstAlias<OpcodeStr#"x\t{${src}{1to2}, $dst {${mask}}|"
8217                  "$dst {${mask}}, ${src}{1to2}}",
8218                  (!cast<Instruction>(NAME # "Z128rmbk") VR128X:$dst,
8219                  VK2WM:$mask, f64mem:$src), 0, "att">;
8220  def : InstAlias<OpcodeStr#"x\t{${src}{1to2}, $dst {${mask}} {z}|"
8221                  "$dst {${mask}} {z}, ${src}{1to2}}",
8222                  (!cast<Instruction>(NAME # "Z128rmbkz") VR128X:$dst,
8223                  VK2WM:$mask, f64mem:$src), 0, "att">;
8224
8225  def : InstAlias<OpcodeStr#"y\t{$src, $dst|$dst, $src}",
8226                  (!cast<Instruction>(NAME # "Z256rr") VR128X:$dst, VR256X:$src), 0, "att">;
8227  def : InstAlias<OpcodeStr#"y\t{$src, $dst {${mask}}|$dst {${mask}}, $src}",
8228                  (!cast<Instruction>(NAME # "Z256rrk") VR128X:$dst,
8229                  VK4WM:$mask, VR256X:$src), 0, "att">;
8230  def : InstAlias<OpcodeStr#"y\t{$src, $dst {${mask}} {z}|"
8231                  "$dst {${mask}} {z}, $src}",
8232                  (!cast<Instruction>(NAME # "Z256rrkz") VR128X:$dst,
8233                  VK4WM:$mask, VR256X:$src), 0, "att">;
8234  def : InstAlias<OpcodeStr#"y\t{${src}{1to4}, $dst|$dst, ${src}{1to4}}",
8235                  (!cast<Instruction>(NAME # "Z256rmb") VR128X:$dst, f64mem:$src), 0, "att">;
8236  def : InstAlias<OpcodeStr#"y\t{${src}{1to4}, $dst {${mask}}|"
8237                  "$dst {${mask}}, ${src}{1to4}}",
8238                  (!cast<Instruction>(NAME # "Z256rmbk") VR128X:$dst,
8239                  VK4WM:$mask, f64mem:$src), 0, "att">;
8240  def : InstAlias<OpcodeStr#"y\t{${src}{1to4}, $dst {${mask}} {z}|"
8241                  "$dst {${mask}} {z}, ${src}{1to4}}",
8242                  (!cast<Instruction>(NAME # "Z256rmbkz") VR128X:$dst,
8243                  VK4WM:$mask, f64mem:$src), 0, "att">;
8244}
8245
8246defm VCVTPD2PS : avx512_cvt_trunc<0x5A, "vcvtpd2ps",
8247                                  avx512vl_f32_info, avx512vl_f64_info, SchedWriteCvtPD2PS>,
8248                                  VEX_W, PD, EVEX_CD8<64, CD8VF>;
8249defm VCVTPS2PD : avx512_cvt_extend<0x5A, "vcvtps2pd",
8250                                   avx512vl_f64_info, avx512vl_f32_info, SchedWriteCvtPS2PD>,
8251                                   PS, EVEX_CD8<32, CD8VH>;
8252
8253// Extend Half to Double
8254multiclass avx512_cvtph2pd<bits<8> opc, string OpcodeStr,
8255                            X86SchedWriteWidths sched> {
8256  let Predicates = [HasFP16] in {
8257    defm Z : avx512_vcvt_fpextend<opc, OpcodeStr, v8f64_info, v8f16x_info,
8258                                  any_fpextend, fpextend, sched.ZMM>,
8259             avx512_vcvt_fp_sae<opc, OpcodeStr, v8f64_info, v8f16x_info,
8260                                X86vfpextSAE, sched.ZMM>, EVEX_V512;
8261    def : Pat<(v8f64 (extloadv8f16 addr:$src)),
8262                (!cast<Instruction>(NAME # "Zrm") addr:$src)>;
8263  }
8264  let Predicates = [HasFP16, HasVLX] in {
8265    defm Z128 : avx512_vcvt_fpextend<opc, OpcodeStr, v2f64x_info, v8f16x_info,
8266                                     X86any_vfpext, X86vfpext, sched.XMM, "{1to2}", "",
8267                                     f32mem>, EVEX_V128;
8268    defm Z256 : avx512_vcvt_fpextend<opc, OpcodeStr, v4f64x_info, v8f16x_info,
8269                                     X86any_vfpext, X86vfpext, sched.YMM, "{1to4}", "",
8270                                     f64mem>, EVEX_V256;
8271  }
8272}
8273
8274// Truncate Double to Half
8275multiclass avx512_cvtpd2ph<bits<8> opc, string OpcodeStr, X86SchedWriteWidths sched> {
8276  let Predicates = [HasFP16] in {
8277    defm Z : avx512_vcvt_fp<opc, OpcodeStr, v8f16x_info, v8f64_info,
8278                            X86any_vfpround, X86vfpround, sched.ZMM, "{1to8}", "{z}">,
8279             avx512_vcvt_fp_rc<opc, OpcodeStr, v8f16x_info, v8f64_info,
8280                               X86vfproundRnd, sched.ZMM>, EVEX_V512;
8281  }
8282  let Predicates = [HasFP16, HasVLX] in {
8283    defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v8f16x_info, v2f64x_info, null_frag,
8284                               null_frag, sched.XMM, "{1to2}", "{x}", f128mem,
8285                               VK2WM>, EVEX_V128;
8286    defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v8f16x_info, v4f64x_info, null_frag,
8287                               null_frag, sched.YMM, "{1to4}", "{y}", f256mem,
8288                               VK4WM>, EVEX_V256;
8289  }
8290  def : InstAlias<OpcodeStr#"x\t{$src, $dst|$dst, $src}",
8291                  (!cast<Instruction>(NAME # "Z128rr") VR128X:$dst,
8292                  VR128X:$src), 0, "att">;
8293  def : InstAlias<OpcodeStr#"x\t{$src, $dst {${mask}}|$dst {${mask}}, $src}",
8294                  (!cast<Instruction>(NAME # "Z128rrk") VR128X:$dst,
8295                  VK2WM:$mask, VR128X:$src), 0, "att">;
8296  def : InstAlias<OpcodeStr#"x\t{$src, $dst {${mask}} {z}|$dst {${mask}} {z}, $src}",
8297                  (!cast<Instruction>(NAME # "Z128rrkz") VR128X:$dst,
8298                  VK2WM:$mask, VR128X:$src), 0, "att">;
8299  def : InstAlias<OpcodeStr#"x\t{${src}{1to2}, $dst|$dst, ${src}{1to2}}",
8300                  (!cast<Instruction>(NAME # "Z128rmb") VR128X:$dst,
8301                  i64mem:$src), 0, "att">;
8302  def : InstAlias<OpcodeStr#"x\t{${src}{1to2}, $dst {${mask}}|"
8303                  "$dst {${mask}}, ${src}{1to2}}",
8304                  (!cast<Instruction>(NAME # "Z128rmbk") VR128X:$dst,
8305                  VK2WM:$mask, i64mem:$src), 0, "att">;
8306  def : InstAlias<OpcodeStr#"x\t{${src}{1to2}, $dst {${mask}} {z}|"
8307                  "$dst {${mask}} {z}, ${src}{1to2}}",
8308                  (!cast<Instruction>(NAME # "Z128rmbkz") VR128X:$dst,
8309                  VK2WM:$mask, i64mem:$src), 0, "att">;
8310
8311  def : InstAlias<OpcodeStr#"y\t{$src, $dst|$dst, $src}",
8312                  (!cast<Instruction>(NAME # "Z256rr") VR128X:$dst,
8313                  VR256X:$src), 0, "att">;
8314  def : InstAlias<OpcodeStr#"y\t{$src, $dst {${mask}}|"
8315                  "$dst {${mask}}, $src}",
8316                  (!cast<Instruction>(NAME # "Z256rrk") VR128X:$dst,
8317                  VK4WM:$mask, VR256X:$src), 0, "att">;
8318  def : InstAlias<OpcodeStr#"y\t{$src, $dst {${mask}} {z}|"
8319                  "$dst {${mask}} {z}, $src}",
8320                  (!cast<Instruction>(NAME # "Z256rrkz") VR128X:$dst,
8321                  VK4WM:$mask, VR256X:$src), 0, "att">;
8322  def : InstAlias<OpcodeStr#"y\t{${src}{1to4}, $dst|$dst, ${src}{1to4}}",
8323                  (!cast<Instruction>(NAME # "Z256rmb") VR128X:$dst,
8324                  i64mem:$src), 0, "att">;
8325  def : InstAlias<OpcodeStr#"y\t{${src}{1to4}, $dst {${mask}}|"
8326                  "$dst {${mask}}, ${src}{1to4}}",
8327                  (!cast<Instruction>(NAME # "Z256rmbk") VR128X:$dst,
8328                  VK4WM:$mask, i64mem:$src), 0, "att">;
8329  def : InstAlias<OpcodeStr#"y\t{${src}{1to4}, $dst {${mask}} {z}|"
8330                  "$dst {${mask}} {z}, ${src}{1to4}}",
8331                  (!cast<Instruction>(NAME # "Z256rmbkz") VR128X:$dst,
8332                  VK4WM:$mask, i64mem:$src), 0, "att">;
8333
8334  def : InstAlias<OpcodeStr#"z\t{$src, $dst|$dst, $src}",
8335                  (!cast<Instruction>(NAME # "Zrr") VR128X:$dst,
8336                  VR512:$src), 0, "att">;
8337  def : InstAlias<OpcodeStr#"z\t{$src, $dst {${mask}}|"
8338                  "$dst {${mask}}, $src}",
8339                  (!cast<Instruction>(NAME # "Zrrk") VR128X:$dst,
8340                  VK8WM:$mask, VR512:$src), 0, "att">;
8341  def : InstAlias<OpcodeStr#"z\t{$src, $dst {${mask}} {z}|"
8342                  "$dst {${mask}} {z}, $src}",
8343                  (!cast<Instruction>(NAME # "Zrrkz") VR128X:$dst,
8344                  VK8WM:$mask, VR512:$src), 0, "att">;
8345  def : InstAlias<OpcodeStr#"z\t{${src}{1to8}, $dst|$dst, ${src}{1to8}}",
8346                  (!cast<Instruction>(NAME # "Zrmb") VR128X:$dst,
8347                  i64mem:$src), 0, "att">;
8348  def : InstAlias<OpcodeStr#"z\t{${src}{1to8}, $dst {${mask}}|"
8349                  "$dst {${mask}}, ${src}{1to8}}",
8350                  (!cast<Instruction>(NAME # "Zrmbk") VR128X:$dst,
8351                  VK8WM:$mask, i64mem:$src), 0, "att">;
8352  def : InstAlias<OpcodeStr#"z\t{${src}{1to8}, $dst {${mask}} {z}|"
8353                  "$dst {${mask}} {z}, ${src}{1to8}}",
8354                  (!cast<Instruction>(NAME # "Zrmbkz") VR128X:$dst,
8355                  VK8WM:$mask, i64mem:$src), 0, "att">;
8356}
8357
8358defm VCVTPS2PHX : avx512_cvt_trunc<0x1D, "vcvtps2phx", avx512vl_f16_info,
8359                                   avx512vl_f32_info, SchedWriteCvtPD2PS,
8360                                   HasFP16>, T_MAP5PD, EVEX_CD8<32, CD8VF>;
8361defm VCVTPH2PSX : avx512_cvt_extend<0x13, "vcvtph2psx", avx512vl_f32_info,
8362                                    avx512vl_f16_info, SchedWriteCvtPS2PD,
8363                                    HasFP16>, T_MAP6PD, EVEX_CD8<16, CD8VH>;
8364defm VCVTPD2PH : avx512_cvtpd2ph<0x5A, "vcvtpd2ph", SchedWriteCvtPD2PS>,
8365                                 VEX_W, T_MAP5PD, EVEX_CD8<64, CD8VF>;
8366defm VCVTPH2PD : avx512_cvtph2pd<0x5A, "vcvtph2pd", SchedWriteCvtPS2PD>,
8367                                 T_MAP5PS, EVEX_CD8<16, CD8VQ>;
8368
8369let Predicates = [HasFP16, HasVLX] in {
8370  // Special patterns to allow use of X86vmfpround for masking. Instruction
8371  // patterns have been disabled with null_frag.
8372  def : Pat<(v8f16 (X86any_vfpround (v4f64 VR256X:$src))),
8373            (VCVTPD2PHZ256rr VR256X:$src)>;
8374  def : Pat<(v8f16 (X86vmfpround (v4f64 VR256X:$src), (v8f16 VR128X:$src0),
8375                          VK4WM:$mask)),
8376            (VCVTPD2PHZ256rrk VR128X:$src0, VK4WM:$mask, VR256X:$src)>;
8377  def : Pat<(X86vmfpround (v4f64 VR256X:$src), v8f16x_info.ImmAllZerosV,
8378                          VK4WM:$mask),
8379            (VCVTPD2PHZ256rrkz VK4WM:$mask, VR256X:$src)>;
8380
8381  def : Pat<(v8f16 (X86any_vfpround (loadv4f64 addr:$src))),
8382            (VCVTPD2PHZ256rm addr:$src)>;
8383  def : Pat<(X86vmfpround (loadv4f64 addr:$src), (v8f16 VR128X:$src0),
8384                          VK4WM:$mask),
8385            (VCVTPD2PHZ256rmk VR128X:$src0, VK4WM:$mask, addr:$src)>;
8386  def : Pat<(X86vmfpround (loadv4f64 addr:$src), v8f16x_info.ImmAllZerosV,
8387                          VK4WM:$mask),
8388            (VCVTPD2PHZ256rmkz VK4WM:$mask, addr:$src)>;
8389
8390  def : Pat<(v8f16 (X86any_vfpround (v4f64 (X86VBroadcastld64 addr:$src)))),
8391            (VCVTPD2PHZ256rmb addr:$src)>;
8392  def : Pat<(X86vmfpround (v4f64 (X86VBroadcastld64 addr:$src)),
8393                          (v8f16 VR128X:$src0), VK4WM:$mask),
8394            (VCVTPD2PHZ256rmbk VR128X:$src0, VK4WM:$mask, addr:$src)>;
8395  def : Pat<(X86vmfpround (v4f64 (X86VBroadcastld64 addr:$src)),
8396                          v8f16x_info.ImmAllZerosV, VK4WM:$mask),
8397            (VCVTPD2PHZ256rmbkz VK4WM:$mask, addr:$src)>;
8398
8399  def : Pat<(v8f16 (X86any_vfpround (v2f64 VR128X:$src))),
8400            (VCVTPD2PHZ128rr VR128X:$src)>;
8401  def : Pat<(X86vmfpround (v2f64 VR128X:$src), (v8f16 VR128X:$src0),
8402                          VK2WM:$mask),
8403            (VCVTPD2PHZ128rrk VR128X:$src0, VK2WM:$mask, VR128X:$src)>;
8404  def : Pat<(X86vmfpround (v2f64 VR128X:$src), v8f16x_info.ImmAllZerosV,
8405                          VK2WM:$mask),
8406            (VCVTPD2PHZ128rrkz VK2WM:$mask, VR128X:$src)>;
8407
8408  def : Pat<(v8f16 (X86any_vfpround (loadv2f64 addr:$src))),
8409            (VCVTPD2PHZ128rm addr:$src)>;
8410  def : Pat<(X86vmfpround (loadv2f64 addr:$src), (v8f16 VR128X:$src0),
8411                          VK2WM:$mask),
8412            (VCVTPD2PHZ128rmk VR128X:$src0, VK2WM:$mask, addr:$src)>;
8413  def : Pat<(X86vmfpround (loadv2f64 addr:$src), v8f16x_info.ImmAllZerosV,
8414                          VK2WM:$mask),
8415            (VCVTPD2PHZ128rmkz VK2WM:$mask, addr:$src)>;
8416
8417  def : Pat<(v8f16 (X86any_vfpround (v2f64 (X86VBroadcastld64 addr:$src)))),
8418            (VCVTPD2PHZ128rmb addr:$src)>;
8419  def : Pat<(X86vmfpround (v2f64 (X86VBroadcastld64 addr:$src)),
8420                          (v8f16 VR128X:$src0), VK2WM:$mask),
8421            (VCVTPD2PHZ128rmbk VR128X:$src0, VK2WM:$mask, addr:$src)>;
8422  def : Pat<(X86vmfpround (v2f64 (X86VBroadcastld64 addr:$src)),
8423                          v8f16x_info.ImmAllZerosV, VK2WM:$mask),
8424            (VCVTPD2PHZ128rmbkz VK2WM:$mask, addr:$src)>;
8425}
8426
8427// Convert Signed/Unsigned Doubleword to Double
8428let Uses = []<Register>, mayRaiseFPException = 0 in
8429multiclass avx512_cvtdq2pd<bits<8> opc, string OpcodeStr, SDPatternOperator OpNode,
8430                           SDNode MaskOpNode, SDPatternOperator OpNode128,
8431                           SDNode MaskOpNode128,
8432                           X86SchedWriteWidths sched> {
8433  // No rounding in this op
8434  let Predicates = [HasAVX512] in
8435    defm Z : avx512_vcvt_fp<opc, OpcodeStr, v8f64_info, v8i32x_info, OpNode,
8436                            MaskOpNode, sched.ZMM>, EVEX_V512;
8437
8438  let Predicates = [HasVLX] in {
8439    defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v2f64x_info, v4i32x_info,
8440                               OpNode128, MaskOpNode128, sched.XMM, "{1to2}",
8441                               "", i64mem, VK2WM,
8442                               (v2f64 (OpNode128 (bc_v4i32
8443                                (v2i64
8444                                 (scalar_to_vector (loadi64 addr:$src)))))),
8445                               (v2f64 (MaskOpNode128 (bc_v4i32
8446                                (v2i64
8447                                 (scalar_to_vector (loadi64 addr:$src))))))>,
8448                               EVEX_V128;
8449    defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v4f64x_info, v4i32x_info, OpNode,
8450                               MaskOpNode, sched.YMM>, EVEX_V256;
8451  }
8452}
8453
8454// Convert Signed/Unsigned Doubleword to Float
8455multiclass avx512_cvtdq2ps<bits<8> opc, string OpcodeStr, SDPatternOperator OpNode,
8456                           SDNode MaskOpNode, SDNode OpNodeRnd,
8457                           X86SchedWriteWidths sched> {
8458  let Predicates = [HasAVX512] in
8459    defm Z : avx512_vcvt_fp<opc, OpcodeStr, v16f32_info, v16i32_info, OpNode,
8460                            MaskOpNode, sched.ZMM>,
8461             avx512_vcvt_fp_rc<opc, OpcodeStr, v16f32_info, v16i32_info,
8462                               OpNodeRnd, sched.ZMM>, EVEX_V512;
8463
8464  let Predicates = [HasVLX] in {
8465    defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v4f32x_info, v4i32x_info, OpNode,
8466                               MaskOpNode, sched.XMM>, EVEX_V128;
8467    defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v8f32x_info, v8i32x_info, OpNode,
8468                               MaskOpNode, sched.YMM>, EVEX_V256;
8469  }
8470}
8471
8472// Convert Float to Signed/Unsigned Doubleword with truncation
8473multiclass avx512_cvttps2dq<bits<8> opc, string OpcodeStr, SDPatternOperator OpNode,
8474                            SDNode MaskOpNode,
8475                            SDNode OpNodeSAE, X86SchedWriteWidths sched> {
8476  let Predicates = [HasAVX512] in {
8477    defm Z : avx512_vcvt_fp<opc, OpcodeStr, v16i32_info, v16f32_info, OpNode,
8478                            MaskOpNode, sched.ZMM>,
8479             avx512_vcvt_fp_sae<opc, OpcodeStr, v16i32_info, v16f32_info,
8480                                OpNodeSAE, sched.ZMM>, EVEX_V512;
8481  }
8482  let Predicates = [HasVLX] in {
8483    defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v4i32x_info, v4f32x_info, OpNode,
8484                               MaskOpNode, sched.XMM>, EVEX_V128;
8485    defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v8i32x_info, v8f32x_info, OpNode,
8486                               MaskOpNode, sched.YMM>, EVEX_V256;
8487  }
8488}
8489
8490// Convert Float to Signed/Unsigned Doubleword
8491multiclass avx512_cvtps2dq<bits<8> opc, string OpcodeStr, SDNode OpNode,
8492                           SDNode MaskOpNode, SDNode OpNodeRnd,
8493                           X86SchedWriteWidths sched> {
8494  let Predicates = [HasAVX512] in {
8495    defm Z : avx512_vcvt_fp<opc, OpcodeStr, v16i32_info, v16f32_info, OpNode,
8496                            MaskOpNode, sched.ZMM>,
8497             avx512_vcvt_fp_rc<opc, OpcodeStr, v16i32_info, v16f32_info,
8498                                OpNodeRnd, sched.ZMM>, EVEX_V512;
8499  }
8500  let Predicates = [HasVLX] in {
8501    defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v4i32x_info, v4f32x_info, OpNode,
8502                               MaskOpNode, sched.XMM>, EVEX_V128;
8503    defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v8i32x_info, v8f32x_info, OpNode,
8504                               MaskOpNode, sched.YMM>, EVEX_V256;
8505  }
8506}
8507
8508// Convert Double to Signed/Unsigned Doubleword with truncation
8509multiclass avx512_cvttpd2dq<bits<8> opc, string OpcodeStr, SDPatternOperator OpNode,
8510                            SDNode MaskOpNode, SDNode OpNodeSAE,
8511                            X86SchedWriteWidths sched> {
8512  let Predicates = [HasAVX512] in {
8513    defm Z : avx512_vcvt_fp<opc, OpcodeStr, v8i32x_info, v8f64_info, OpNode,
8514                            MaskOpNode, sched.ZMM>,
8515             avx512_vcvt_fp_sae<opc, OpcodeStr, v8i32x_info, v8f64_info,
8516                                OpNodeSAE, sched.ZMM>, EVEX_V512;
8517  }
8518  let Predicates = [HasVLX] in {
8519    // we need "x"/"y" suffixes in order to distinguish between 128 and 256
8520    // memory forms of these instructions in Asm Parser. They have the same
8521    // dest type - 'v4i32x_info'. We also specify the broadcast string explicitly
8522    // due to the same reason.
8523    defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v4i32x_info, v2f64x_info,
8524                               null_frag, null_frag, sched.XMM, "{1to2}", "{x}", f128mem,
8525                               VK2WM>, EVEX_V128;
8526    defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v4i32x_info, v4f64x_info, OpNode,
8527                               MaskOpNode, sched.YMM, "{1to4}", "{y}">, EVEX_V256;
8528  }
8529
8530  def : InstAlias<OpcodeStr#"x\t{$src, $dst|$dst, $src}",
8531                  (!cast<Instruction>(NAME # "Z128rr") VR128X:$dst,
8532                  VR128X:$src), 0, "att">;
8533  def : InstAlias<OpcodeStr#"x\t{$src, $dst {${mask}}|$dst {${mask}}, $src}",
8534                  (!cast<Instruction>(NAME # "Z128rrk") VR128X:$dst,
8535                  VK2WM:$mask, VR128X:$src), 0, "att">;
8536  def : InstAlias<OpcodeStr#"x\t{$src, $dst {${mask}} {z}|$dst {${mask}} {z}, $src}",
8537                  (!cast<Instruction>(NAME # "Z128rrkz") VR128X:$dst,
8538                  VK2WM:$mask, VR128X:$src), 0, "att">;
8539  def : InstAlias<OpcodeStr#"x\t{${src}{1to2}, $dst|$dst, ${src}{1to2}}",
8540                  (!cast<Instruction>(NAME # "Z128rmb") VR128X:$dst,
8541                  f64mem:$src), 0, "att">;
8542  def : InstAlias<OpcodeStr#"x\t{${src}{1to2}, $dst {${mask}}|"
8543                  "$dst {${mask}}, ${src}{1to2}}",
8544                  (!cast<Instruction>(NAME # "Z128rmbk") VR128X:$dst,
8545                  VK2WM:$mask, f64mem:$src), 0, "att">;
8546  def : InstAlias<OpcodeStr#"x\t{${src}{1to2}, $dst {${mask}} {z}|"
8547                  "$dst {${mask}} {z}, ${src}{1to2}}",
8548                  (!cast<Instruction>(NAME # "Z128rmbkz") VR128X:$dst,
8549                  VK2WM:$mask, f64mem:$src), 0, "att">;
8550
8551  def : InstAlias<OpcodeStr#"y\t{$src, $dst|$dst, $src}",
8552                  (!cast<Instruction>(NAME # "Z256rr") VR128X:$dst,
8553                  VR256X:$src), 0, "att">;
8554  def : InstAlias<OpcodeStr#"y\t{$src, $dst {${mask}}|$dst {${mask}}, $src}",
8555                  (!cast<Instruction>(NAME # "Z256rrk") VR128X:$dst,
8556                  VK4WM:$mask, VR256X:$src), 0, "att">;
8557  def : InstAlias<OpcodeStr#"y\t{$src, $dst {${mask}} {z}|$dst {${mask}} {z}, $src}",
8558                  (!cast<Instruction>(NAME # "Z256rrkz") VR128X:$dst,
8559                  VK4WM:$mask, VR256X:$src), 0, "att">;
8560  def : InstAlias<OpcodeStr#"y\t{${src}{1to4}, $dst|$dst, ${src}{1to4}}",
8561                  (!cast<Instruction>(NAME # "Z256rmb") VR128X:$dst,
8562                  f64mem:$src), 0, "att">;
8563  def : InstAlias<OpcodeStr#"y\t{${src}{1to4}, $dst {${mask}}|"
8564                  "$dst {${mask}}, ${src}{1to4}}",
8565                  (!cast<Instruction>(NAME # "Z256rmbk") VR128X:$dst,
8566                  VK4WM:$mask, f64mem:$src), 0, "att">;
8567  def : InstAlias<OpcodeStr#"y\t{${src}{1to4}, $dst {${mask}} {z}|"
8568                  "$dst {${mask}} {z}, ${src}{1to4}}",
8569                  (!cast<Instruction>(NAME # "Z256rmbkz") VR128X:$dst,
8570                  VK4WM:$mask, f64mem:$src), 0, "att">;
8571}
8572
8573// Convert Double to Signed/Unsigned Doubleword
8574multiclass avx512_cvtpd2dq<bits<8> opc, string OpcodeStr, SDNode OpNode,
8575                           SDNode MaskOpNode, SDNode OpNodeRnd,
8576                           X86SchedWriteWidths sched> {
8577  let Predicates = [HasAVX512] in {
8578    defm Z : avx512_vcvt_fp<opc, OpcodeStr, v8i32x_info, v8f64_info, OpNode,
8579                            MaskOpNode, sched.ZMM>,
8580             avx512_vcvt_fp_rc<opc, OpcodeStr, v8i32x_info, v8f64_info,
8581                               OpNodeRnd, sched.ZMM>, EVEX_V512;
8582  }
8583  let Predicates = [HasVLX] in {
8584    // we need "x"/"y" suffixes in order to distinguish between 128 and 256
8585    // memory forms of these instructions in Asm Parcer. They have the same
8586    // dest type - 'v4i32x_info'. We also specify the broadcast string explicitly
8587    // due to the same reason.
8588    defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v4i32x_info, v2f64x_info,
8589                               null_frag, null_frag, sched.XMM, "{1to2}", "{x}", f128mem,
8590                               VK2WM>, EVEX_V128;
8591    defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v4i32x_info, v4f64x_info, OpNode,
8592                               MaskOpNode, sched.YMM, "{1to4}", "{y}">, EVEX_V256;
8593  }
8594
8595  def : InstAlias<OpcodeStr#"x\t{$src, $dst|$dst, $src}",
8596                  (!cast<Instruction>(NAME # "Z128rr") VR128X:$dst, VR128X:$src), 0, "att">;
8597  def : InstAlias<OpcodeStr#"x\t{$src, $dst {${mask}}|$dst {${mask}}, $src}",
8598                  (!cast<Instruction>(NAME # "Z128rrk") VR128X:$dst,
8599                  VK2WM:$mask, VR128X:$src), 0, "att">;
8600  def : InstAlias<OpcodeStr#"x\t{$src, $dst {${mask}} {z}|$dst {${mask}} {z}, $src}",
8601                  (!cast<Instruction>(NAME # "Z128rrkz") VR128X:$dst,
8602                  VK2WM:$mask, VR128X:$src), 0, "att">;
8603  def : InstAlias<OpcodeStr#"x\t{${src}{1to2}, $dst|$dst, ${src}{1to2}}",
8604                  (!cast<Instruction>(NAME # "Z128rmb") VR128X:$dst,
8605                  f64mem:$src), 0, "att">;
8606  def : InstAlias<OpcodeStr#"x\t{${src}{1to2}, $dst {${mask}}|"
8607                  "$dst {${mask}}, ${src}{1to2}}",
8608                  (!cast<Instruction>(NAME # "Z128rmbk") VR128X:$dst,
8609                  VK2WM:$mask, f64mem:$src), 0, "att">;
8610  def : InstAlias<OpcodeStr#"x\t{${src}{1to2}, $dst {${mask}} {z}|"
8611                  "$dst {${mask}} {z}, ${src}{1to2}}",
8612                  (!cast<Instruction>(NAME # "Z128rmbkz") VR128X:$dst,
8613                  VK2WM:$mask, f64mem:$src), 0, "att">;
8614
8615  def : InstAlias<OpcodeStr#"y\t{$src, $dst|$dst, $src}",
8616                  (!cast<Instruction>(NAME # "Z256rr") VR128X:$dst, VR256X:$src), 0, "att">;
8617  def : InstAlias<OpcodeStr#"y\t{$src, $dst {${mask}}|$dst {${mask}}, $src}",
8618                  (!cast<Instruction>(NAME # "Z256rrk") VR128X:$dst,
8619                  VK4WM:$mask, VR256X:$src), 0, "att">;
8620  def : InstAlias<OpcodeStr#"y\t{$src, $dst {${mask}} {z}|$dst {${mask}} {z}, $src}",
8621                  (!cast<Instruction>(NAME # "Z256rrkz") VR128X:$dst,
8622                  VK4WM:$mask, VR256X:$src), 0, "att">;
8623  def : InstAlias<OpcodeStr#"y\t{${src}{1to4}, $dst|$dst, ${src}{1to4}}",
8624                  (!cast<Instruction>(NAME # "Z256rmb") VR128X:$dst,
8625                  f64mem:$src), 0, "att">;
8626  def : InstAlias<OpcodeStr#"y\t{${src}{1to4}, $dst {${mask}}|"
8627                  "$dst {${mask}}, ${src}{1to4}}",
8628                  (!cast<Instruction>(NAME # "Z256rmbk") VR128X:$dst,
8629                  VK4WM:$mask, f64mem:$src), 0, "att">;
8630  def : InstAlias<OpcodeStr#"y\t{${src}{1to4}, $dst {${mask}} {z}|"
8631                  "$dst {${mask}} {z}, ${src}{1to4}}",
8632                  (!cast<Instruction>(NAME # "Z256rmbkz") VR128X:$dst,
8633                  VK4WM:$mask, f64mem:$src), 0, "att">;
8634}
8635
8636// Convert Double to Signed/Unsigned Quardword
8637multiclass avx512_cvtpd2qq<bits<8> opc, string OpcodeStr, SDNode OpNode,
8638                           SDNode MaskOpNode, SDNode OpNodeRnd,
8639                           X86SchedWriteWidths sched> {
8640  let Predicates = [HasDQI] in {
8641    defm Z : avx512_vcvt_fp<opc, OpcodeStr, v8i64_info, v8f64_info, OpNode,
8642                            MaskOpNode, sched.ZMM>,
8643             avx512_vcvt_fp_rc<opc, OpcodeStr, v8i64_info, v8f64_info,
8644                               OpNodeRnd, sched.ZMM>, EVEX_V512;
8645  }
8646  let Predicates = [HasDQI, HasVLX] in {
8647    defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v2i64x_info, v2f64x_info, OpNode,
8648                               MaskOpNode, sched.XMM>, EVEX_V128;
8649    defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v4i64x_info, v4f64x_info, OpNode,
8650                               MaskOpNode, sched.YMM>, EVEX_V256;
8651  }
8652}
8653
8654// Convert Double to Signed/Unsigned Quardword with truncation
8655multiclass avx512_cvttpd2qq<bits<8> opc, string OpcodeStr, SDPatternOperator OpNode,
8656                            SDNode MaskOpNode, SDNode OpNodeRnd,
8657                            X86SchedWriteWidths sched> {
8658  let Predicates = [HasDQI] in {
8659    defm Z : avx512_vcvt_fp<opc, OpcodeStr, v8i64_info, v8f64_info, OpNode,
8660                            MaskOpNode, sched.ZMM>,
8661             avx512_vcvt_fp_sae<opc, OpcodeStr, v8i64_info, v8f64_info,
8662                                OpNodeRnd, sched.ZMM>, EVEX_V512;
8663  }
8664  let Predicates = [HasDQI, HasVLX] in {
8665    defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v2i64x_info, v2f64x_info, OpNode,
8666                               MaskOpNode, sched.XMM>, EVEX_V128;
8667    defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v4i64x_info, v4f64x_info, OpNode,
8668                               MaskOpNode, sched.YMM>, EVEX_V256;
8669  }
8670}
8671
8672// Convert Signed/Unsigned Quardword to Double
8673multiclass avx512_cvtqq2pd<bits<8> opc, string OpcodeStr, SDPatternOperator OpNode,
8674                           SDNode MaskOpNode, SDNode OpNodeRnd,
8675                           X86SchedWriteWidths sched> {
8676  let Predicates = [HasDQI] in {
8677    defm Z : avx512_vcvt_fp<opc, OpcodeStr, v8f64_info, v8i64_info, OpNode,
8678                            MaskOpNode, sched.ZMM>,
8679             avx512_vcvt_fp_rc<opc, OpcodeStr, v8f64_info, v8i64_info,
8680                               OpNodeRnd, sched.ZMM>, EVEX_V512;
8681  }
8682  let Predicates = [HasDQI, HasVLX] in {
8683    defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v2f64x_info, v2i64x_info, OpNode,
8684                               MaskOpNode, sched.XMM>, EVEX_V128, NotEVEX2VEXConvertible;
8685    defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v4f64x_info, v4i64x_info, OpNode,
8686                               MaskOpNode, sched.YMM>, EVEX_V256, NotEVEX2VEXConvertible;
8687  }
8688}
8689
8690// Convert Float to Signed/Unsigned Quardword
8691multiclass avx512_cvtps2qq<bits<8> opc, string OpcodeStr, SDNode OpNode,
8692                           SDNode MaskOpNode, SDNode OpNodeRnd,
8693                           X86SchedWriteWidths sched> {
8694  let Predicates = [HasDQI] in {
8695    defm Z : avx512_vcvt_fp<opc, OpcodeStr, v8i64_info, v8f32x_info, OpNode,
8696                            MaskOpNode, sched.ZMM>,
8697             avx512_vcvt_fp_rc<opc, OpcodeStr, v8i64_info, v8f32x_info,
8698                               OpNodeRnd, sched.ZMM>, EVEX_V512;
8699  }
8700  let Predicates = [HasDQI, HasVLX] in {
8701    // Explicitly specified broadcast string, since we take only 2 elements
8702    // from v4f32x_info source
8703    defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v2i64x_info, v4f32x_info, OpNode,
8704                               MaskOpNode, sched.XMM, "{1to2}", "", f64mem, VK2WM,
8705                               (v2i64 (OpNode (bc_v4f32
8706                                (v2f64
8707                                 (scalar_to_vector (loadf64 addr:$src)))))),
8708                               (v2i64 (MaskOpNode (bc_v4f32
8709                                (v2f64
8710                                 (scalar_to_vector (loadf64 addr:$src))))))>,
8711                               EVEX_V128;
8712    defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v4i64x_info, v4f32x_info, OpNode,
8713                               MaskOpNode, sched.YMM>, EVEX_V256;
8714  }
8715}
8716
8717// Convert Float to Signed/Unsigned Quardword with truncation
8718multiclass avx512_cvttps2qq<bits<8> opc, string OpcodeStr, SDPatternOperator OpNode,
8719                            SDNode MaskOpNode, SDNode OpNodeRnd,
8720                            X86SchedWriteWidths sched> {
8721  let Predicates = [HasDQI] in {
8722    defm Z : avx512_vcvt_fp<opc, OpcodeStr, v8i64_info, v8f32x_info, OpNode,
8723                            MaskOpNode, sched.ZMM>,
8724             avx512_vcvt_fp_sae<opc, OpcodeStr, v8i64_info, v8f32x_info,
8725                                OpNodeRnd, sched.ZMM>, EVEX_V512;
8726  }
8727  let Predicates = [HasDQI, HasVLX] in {
8728    // Explicitly specified broadcast string, since we take only 2 elements
8729    // from v4f32x_info source
8730    defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v2i64x_info, v4f32x_info, OpNode,
8731                               MaskOpNode, sched.XMM, "{1to2}", "", f64mem, VK2WM,
8732                               (v2i64 (OpNode (bc_v4f32
8733                                (v2f64
8734                                 (scalar_to_vector (loadf64 addr:$src)))))),
8735                               (v2i64 (MaskOpNode (bc_v4f32
8736                                (v2f64
8737                                 (scalar_to_vector (loadf64 addr:$src))))))>,
8738                               EVEX_V128;
8739    defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v4i64x_info, v4f32x_info, OpNode,
8740                               MaskOpNode, sched.YMM>, EVEX_V256;
8741  }
8742}
8743
8744// Convert Signed/Unsigned Quardword to Float
8745// Also Convert Signed/Unsigned Doubleword to Half
8746multiclass avx512_cvtqq2ps_dq2ph<bits<8> opc, string OpcodeStr, SDPatternOperator OpNode,
8747                                 SDPatternOperator MaskOpNode, SDPatternOperator OpNode128,
8748                                 SDPatternOperator OpNode128M, SDPatternOperator OpNodeRnd,
8749                                 AVX512VLVectorVTInfo _dst, AVX512VLVectorVTInfo _src,
8750                                 X86SchedWriteWidths sched, Predicate prd = HasDQI> {
8751  let Predicates = [prd] in {
8752    defm Z : avx512_vcvt_fp<opc, OpcodeStr, _dst.info256, _src.info512, OpNode,
8753                            MaskOpNode, sched.ZMM>,
8754             avx512_vcvt_fp_rc<opc, OpcodeStr, _dst.info256, _src.info512,
8755                               OpNodeRnd, sched.ZMM>, EVEX_V512;
8756  }
8757  let Predicates = [prd, HasVLX] in {
8758    // we need "x"/"y" suffixes in order to distinguish between 128 and 256
8759    // memory forms of these instructions in Asm Parcer. They have the same
8760    // dest type - 'v4i32x_info'. We also specify the broadcast string explicitly
8761    // due to the same reason.
8762    defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, _dst.info128, _src.info128, null_frag,
8763                               null_frag, sched.XMM, _src.info128.BroadcastStr,
8764                               "{x}", i128mem, _src.info128.KRCWM>,
8765                               EVEX_V128, NotEVEX2VEXConvertible;
8766    defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, _dst.info128, _src.info256, OpNode,
8767                               MaskOpNode, sched.YMM, _src.info256.BroadcastStr,
8768                               "{y}">, EVEX_V256,
8769                               NotEVEX2VEXConvertible;
8770
8771    // Special patterns to allow use of X86VM[SU]intToFP for masking. Instruction
8772    // patterns have been disabled with null_frag.
8773    def : Pat<(_dst.info128.VT (OpNode128 (_src.info128.VT VR128X:$src))),
8774              (!cast<Instruction>(NAME # "Z128rr") VR128X:$src)>;
8775    def : Pat<(OpNode128M (_src.info128.VT VR128X:$src), (_dst.info128.VT VR128X:$src0),
8776                             _src.info128.KRCWM:$mask),
8777              (!cast<Instruction>(NAME # "Z128rrk") VR128X:$src0, _src.info128.KRCWM:$mask, VR128X:$src)>;
8778    def : Pat<(OpNode128M (_src.info128.VT VR128X:$src), _dst.info128.ImmAllZerosV,
8779                             _src.info128.KRCWM:$mask),
8780              (!cast<Instruction>(NAME # "Z128rrkz") _src.info128.KRCWM:$mask, VR128X:$src)>;
8781
8782    def : Pat<(_dst.info128.VT (OpNode128 (_src.info128.LdFrag addr:$src))),
8783              (!cast<Instruction>(NAME # "Z128rm") addr:$src)>;
8784    def : Pat<(OpNode128M (_src.info128.LdFrag addr:$src), (_dst.info128.VT VR128X:$src0),
8785                             _src.info128.KRCWM:$mask),
8786              (!cast<Instruction>(NAME # "Z128rmk") VR128X:$src0, _src.info128.KRCWM:$mask, addr:$src)>;
8787    def : Pat<(OpNode128M (_src.info128.LdFrag addr:$src), _dst.info128.ImmAllZerosV,
8788                             _src.info128.KRCWM:$mask),
8789              (!cast<Instruction>(NAME # "Z128rmkz") _src.info128.KRCWM:$mask, addr:$src)>;
8790
8791    def : Pat<(_dst.info128.VT (OpNode128 (_src.info128.VT (X86VBroadcastld64 addr:$src)))),
8792              (!cast<Instruction>(NAME # "Z128rmb") addr:$src)>;
8793    def : Pat<(OpNode128M (_src.info128.VT (X86VBroadcastld64 addr:$src)),
8794                             (_dst.info128.VT VR128X:$src0), _src.info128.KRCWM:$mask),
8795              (!cast<Instruction>(NAME # "Z128rmbk") VR128X:$src0, _src.info128.KRCWM:$mask, addr:$src)>;
8796    def : Pat<(OpNode128M (_src.info128.VT (X86VBroadcastld64 addr:$src)),
8797                             _dst.info128.ImmAllZerosV, _src.info128.KRCWM:$mask),
8798              (!cast<Instruction>(NAME # "Z128rmbkz") _src.info128.KRCWM:$mask, addr:$src)>;
8799  }
8800
8801  def : InstAlias<OpcodeStr#"x\t{$src, $dst|$dst, $src}",
8802                  (!cast<Instruction>(NAME # "Z128rr") VR128X:$dst,
8803                  VR128X:$src), 0, "att">;
8804  def : InstAlias<OpcodeStr#"x\t{$src, $dst {${mask}}|$dst {${mask}}, $src}",
8805                  (!cast<Instruction>(NAME # "Z128rrk") VR128X:$dst,
8806                  VK2WM:$mask, VR128X:$src), 0, "att">;
8807  def : InstAlias<OpcodeStr#"x\t{$src, $dst {${mask}} {z}|$dst {${mask}} {z}, $src}",
8808                  (!cast<Instruction>(NAME # "Z128rrkz") VR128X:$dst,
8809                  VK2WM:$mask, VR128X:$src), 0, "att">;
8810  def : InstAlias<OpcodeStr#"x\t{${src}{1to2}, $dst|$dst, ${src}{1to2}}",
8811                  (!cast<Instruction>(NAME # "Z128rmb") VR128X:$dst,
8812                  i64mem:$src), 0, "att">;
8813  def : InstAlias<OpcodeStr#"x\t{${src}{1to2}, $dst {${mask}}|"
8814                  "$dst {${mask}}, ${src}{1to2}}",
8815                  (!cast<Instruction>(NAME # "Z128rmbk") VR128X:$dst,
8816                  VK2WM:$mask, i64mem:$src), 0, "att">;
8817  def : InstAlias<OpcodeStr#"x\t{${src}{1to2}, $dst {${mask}} {z}|"
8818                  "$dst {${mask}} {z}, ${src}{1to2}}",
8819                  (!cast<Instruction>(NAME # "Z128rmbkz") VR128X:$dst,
8820                  VK2WM:$mask, i64mem:$src), 0, "att">;
8821
8822  def : InstAlias<OpcodeStr#"y\t{$src, $dst|$dst, $src}",
8823                  (!cast<Instruction>(NAME # "Z256rr") VR128X:$dst,
8824                  VR256X:$src), 0, "att">;
8825  def : InstAlias<OpcodeStr#"y\t{$src, $dst {${mask}}|"
8826                  "$dst {${mask}}, $src}",
8827                  (!cast<Instruction>(NAME # "Z256rrk") VR128X:$dst,
8828                  VK4WM:$mask, VR256X:$src), 0, "att">;
8829  def : InstAlias<OpcodeStr#"y\t{$src, $dst {${mask}} {z}|"
8830                  "$dst {${mask}} {z}, $src}",
8831                  (!cast<Instruction>(NAME # "Z256rrkz") VR128X:$dst,
8832                  VK4WM:$mask, VR256X:$src), 0, "att">;
8833  def : InstAlias<OpcodeStr#"y\t{${src}{1to4}, $dst|$dst, ${src}{1to4}}",
8834                  (!cast<Instruction>(NAME # "Z256rmb") VR128X:$dst,
8835                  i64mem:$src), 0, "att">;
8836  def : InstAlias<OpcodeStr#"y\t{${src}{1to4}, $dst {${mask}}|"
8837                  "$dst {${mask}}, ${src}{1to4}}",
8838                  (!cast<Instruction>(NAME # "Z256rmbk") VR128X:$dst,
8839                  VK4WM:$mask, i64mem:$src), 0, "att">;
8840  def : InstAlias<OpcodeStr#"y\t{${src}{1to4}, $dst {${mask}} {z}|"
8841                  "$dst {${mask}} {z}, ${src}{1to4}}",
8842                  (!cast<Instruction>(NAME # "Z256rmbkz") VR128X:$dst,
8843                  VK4WM:$mask, i64mem:$src), 0, "att">;
8844}
8845
8846defm VCVTDQ2PD : avx512_cvtdq2pd<0xE6, "vcvtdq2pd", any_sint_to_fp, sint_to_fp,
8847                                 X86any_VSintToFP, X86VSintToFP,
8848                                 SchedWriteCvtDQ2PD>, XS, EVEX_CD8<32, CD8VH>;
8849
8850defm VCVTDQ2PS : avx512_cvtdq2ps<0x5B, "vcvtdq2ps", any_sint_to_fp, sint_to_fp,
8851                                X86VSintToFpRnd, SchedWriteCvtDQ2PS>,
8852                                PS, EVEX_CD8<32, CD8VF>;
8853
8854defm VCVTTPS2DQ : avx512_cvttps2dq<0x5B, "vcvttps2dq", X86any_cvttp2si,
8855                                 X86cvttp2si, X86cvttp2siSAE,
8856                                 SchedWriteCvtPS2DQ>, XS, EVEX_CD8<32, CD8VF>;
8857
8858defm VCVTTPD2DQ : avx512_cvttpd2dq<0xE6, "vcvttpd2dq", X86any_cvttp2si,
8859                                 X86cvttp2si, X86cvttp2siSAE,
8860                                 SchedWriteCvtPD2DQ>,
8861                                 PD, VEX_W, EVEX_CD8<64, CD8VF>;
8862
8863defm VCVTTPS2UDQ : avx512_cvttps2dq<0x78, "vcvttps2udq", X86any_cvttp2ui,
8864                                 X86cvttp2ui, X86cvttp2uiSAE,
8865                                 SchedWriteCvtPS2DQ>, PS, EVEX_CD8<32, CD8VF>;
8866
8867defm VCVTTPD2UDQ : avx512_cvttpd2dq<0x78, "vcvttpd2udq", X86any_cvttp2ui,
8868                                 X86cvttp2ui, X86cvttp2uiSAE,
8869                                 SchedWriteCvtPD2DQ>,
8870                                 PS, VEX_W, EVEX_CD8<64, CD8VF>;
8871
8872defm VCVTUDQ2PD : avx512_cvtdq2pd<0x7A, "vcvtudq2pd", any_uint_to_fp,
8873                                  uint_to_fp, X86any_VUintToFP, X86VUintToFP,
8874                                  SchedWriteCvtDQ2PD>, XS, EVEX_CD8<32, CD8VH>;
8875
8876defm VCVTUDQ2PS : avx512_cvtdq2ps<0x7A, "vcvtudq2ps", any_uint_to_fp,
8877                                 uint_to_fp, X86VUintToFpRnd,
8878                                 SchedWriteCvtDQ2PS>, XD, EVEX_CD8<32, CD8VF>;
8879
8880defm VCVTPS2DQ : avx512_cvtps2dq<0x5B, "vcvtps2dq", X86cvtp2Int, X86cvtp2Int,
8881                                 X86cvtp2IntRnd, SchedWriteCvtPS2DQ>, PD,
8882                                 EVEX_CD8<32, CD8VF>;
8883
8884defm VCVTPD2DQ : avx512_cvtpd2dq<0xE6, "vcvtpd2dq", X86cvtp2Int, X86cvtp2Int,
8885                                 X86cvtp2IntRnd, SchedWriteCvtPD2DQ>, XD,
8886                                 VEX_W, EVEX_CD8<64, CD8VF>;
8887
8888defm VCVTPS2UDQ : avx512_cvtps2dq<0x79, "vcvtps2udq", X86cvtp2UInt, X86cvtp2UInt,
8889                                 X86cvtp2UIntRnd, SchedWriteCvtPS2DQ>,
8890                                 PS, EVEX_CD8<32, CD8VF>;
8891
8892defm VCVTPD2UDQ : avx512_cvtpd2dq<0x79, "vcvtpd2udq", X86cvtp2UInt, X86cvtp2UInt,
8893                                 X86cvtp2UIntRnd, SchedWriteCvtPD2DQ>, VEX_W,
8894                                 PS, EVEX_CD8<64, CD8VF>;
8895
8896defm VCVTPD2QQ : avx512_cvtpd2qq<0x7B, "vcvtpd2qq", X86cvtp2Int, X86cvtp2Int,
8897                                 X86cvtp2IntRnd, SchedWriteCvtPD2DQ>, VEX_W,
8898                                 PD, EVEX_CD8<64, CD8VF>;
8899
8900defm VCVTPS2QQ : avx512_cvtps2qq<0x7B, "vcvtps2qq", X86cvtp2Int, X86cvtp2Int,
8901                                 X86cvtp2IntRnd, SchedWriteCvtPS2DQ>, PD,
8902                                 EVEX_CD8<32, CD8VH>;
8903
8904defm VCVTPD2UQQ : avx512_cvtpd2qq<0x79, "vcvtpd2uqq", X86cvtp2UInt, X86cvtp2UInt,
8905                                 X86cvtp2UIntRnd, SchedWriteCvtPD2DQ>, VEX_W,
8906                                 PD, EVEX_CD8<64, CD8VF>;
8907
8908defm VCVTPS2UQQ : avx512_cvtps2qq<0x79, "vcvtps2uqq", X86cvtp2UInt, X86cvtp2UInt,
8909                                 X86cvtp2UIntRnd, SchedWriteCvtPS2DQ>, PD,
8910                                 EVEX_CD8<32, CD8VH>;
8911
8912defm VCVTTPD2QQ : avx512_cvttpd2qq<0x7A, "vcvttpd2qq", X86any_cvttp2si,
8913                                 X86cvttp2si, X86cvttp2siSAE,
8914                                 SchedWriteCvtPD2DQ>, VEX_W,
8915                                 PD, EVEX_CD8<64, CD8VF>;
8916
8917defm VCVTTPS2QQ : avx512_cvttps2qq<0x7A, "vcvttps2qq", X86any_cvttp2si,
8918                                 X86cvttp2si, X86cvttp2siSAE,
8919                                 SchedWriteCvtPS2DQ>, PD,
8920                                 EVEX_CD8<32, CD8VH>;
8921
8922defm VCVTTPD2UQQ : avx512_cvttpd2qq<0x78, "vcvttpd2uqq", X86any_cvttp2ui,
8923                                 X86cvttp2ui, X86cvttp2uiSAE,
8924                                 SchedWriteCvtPD2DQ>, VEX_W,
8925                                 PD, EVEX_CD8<64, CD8VF>;
8926
8927defm VCVTTPS2UQQ : avx512_cvttps2qq<0x78, "vcvttps2uqq", X86any_cvttp2ui,
8928                                 X86cvttp2ui, X86cvttp2uiSAE,
8929                                 SchedWriteCvtPS2DQ>, PD,
8930                                 EVEX_CD8<32, CD8VH>;
8931
8932defm VCVTQQ2PD : avx512_cvtqq2pd<0xE6, "vcvtqq2pd", any_sint_to_fp,
8933                            sint_to_fp, X86VSintToFpRnd,
8934                            SchedWriteCvtDQ2PD>, VEX_W, XS, EVEX_CD8<64, CD8VF>;
8935
8936defm VCVTUQQ2PD : avx512_cvtqq2pd<0x7A, "vcvtuqq2pd", any_uint_to_fp,
8937                            uint_to_fp, X86VUintToFpRnd, SchedWriteCvtDQ2PD>,
8938                            VEX_W, XS, EVEX_CD8<64, CD8VF>;
8939
8940defm VCVTDQ2PH : avx512_cvtqq2ps_dq2ph<0x5B, "vcvtdq2ph", any_sint_to_fp, sint_to_fp,
8941                            X86any_VSintToFP, X86VMSintToFP,
8942                            X86VSintToFpRnd, avx512vl_f16_info, avx512vl_i32_info,
8943                            SchedWriteCvtDQ2PS, HasFP16>,
8944                            T_MAP5PS, EVEX_CD8<32, CD8VF>;
8945
8946defm VCVTUDQ2PH : avx512_cvtqq2ps_dq2ph<0x7A, "vcvtudq2ph", any_uint_to_fp, uint_to_fp,
8947                            X86any_VUintToFP, X86VMUintToFP,
8948                            X86VUintToFpRnd, avx512vl_f16_info, avx512vl_i32_info,
8949                            SchedWriteCvtDQ2PS, HasFP16>, T_MAP5XD,
8950                            EVEX_CD8<32, CD8VF>;
8951
8952defm VCVTQQ2PS : avx512_cvtqq2ps_dq2ph<0x5B, "vcvtqq2ps", any_sint_to_fp, sint_to_fp,
8953                            X86any_VSintToFP, X86VMSintToFP,
8954                            X86VSintToFpRnd, avx512vl_f32_info, avx512vl_i64_info,
8955                            SchedWriteCvtDQ2PS>, VEX_W, PS,
8956                            EVEX_CD8<64, CD8VF>;
8957
8958defm VCVTUQQ2PS : avx512_cvtqq2ps_dq2ph<0x7A, "vcvtuqq2ps", any_uint_to_fp, uint_to_fp,
8959                            X86any_VUintToFP, X86VMUintToFP,
8960                            X86VUintToFpRnd, avx512vl_f32_info, avx512vl_i64_info,
8961                            SchedWriteCvtDQ2PS>, VEX_W, XD,
8962                            EVEX_CD8<64, CD8VF>;
8963
8964let Predicates = [HasVLX] in {
8965  // Special patterns to allow use of X86mcvtp2Int for masking. Instruction
8966  // patterns have been disabled with null_frag.
8967  def : Pat<(v4i32 (X86cvtp2Int (v2f64 VR128X:$src))),
8968            (VCVTPD2DQZ128rr VR128X:$src)>;
8969  def : Pat<(X86mcvtp2Int (v2f64 VR128X:$src), (v4i32 VR128X:$src0),
8970                          VK2WM:$mask),
8971            (VCVTPD2DQZ128rrk VR128X:$src0, VK2WM:$mask, VR128X:$src)>;
8972  def : Pat<(X86mcvtp2Int (v2f64 VR128X:$src), v4i32x_info.ImmAllZerosV,
8973                          VK2WM:$mask),
8974            (VCVTPD2DQZ128rrkz VK2WM:$mask, VR128X:$src)>;
8975
8976  def : Pat<(v4i32 (X86cvtp2Int (loadv2f64 addr:$src))),
8977            (VCVTPD2DQZ128rm addr:$src)>;
8978  def : Pat<(X86mcvtp2Int (loadv2f64 addr:$src), (v4i32 VR128X:$src0),
8979                          VK2WM:$mask),
8980            (VCVTPD2DQZ128rmk VR128X:$src0, VK2WM:$mask, addr:$src)>;
8981  def : Pat<(X86mcvtp2Int (loadv2f64 addr:$src), v4i32x_info.ImmAllZerosV,
8982                          VK2WM:$mask),
8983            (VCVTPD2DQZ128rmkz VK2WM:$mask, addr:$src)>;
8984
8985  def : Pat<(v4i32 (X86cvtp2Int (v2f64 (X86VBroadcastld64 addr:$src)))),
8986            (VCVTPD2DQZ128rmb addr:$src)>;
8987  def : Pat<(X86mcvtp2Int (v2f64 (X86VBroadcastld64 addr:$src)),
8988                          (v4i32 VR128X:$src0), VK2WM:$mask),
8989            (VCVTPD2DQZ128rmbk VR128X:$src0, VK2WM:$mask, addr:$src)>;
8990  def : Pat<(X86mcvtp2Int (v2f64 (X86VBroadcastld64 addr:$src)),
8991                          v4i32x_info.ImmAllZerosV, VK2WM:$mask),
8992            (VCVTPD2DQZ128rmbkz VK2WM:$mask, addr:$src)>;
8993
8994  // Special patterns to allow use of X86mcvttp2si for masking. Instruction
8995  // patterns have been disabled with null_frag.
8996  def : Pat<(v4i32 (X86any_cvttp2si (v2f64 VR128X:$src))),
8997            (VCVTTPD2DQZ128rr VR128X:$src)>;
8998  def : Pat<(X86mcvttp2si (v2f64 VR128X:$src), (v4i32 VR128X:$src0),
8999                          VK2WM:$mask),
9000            (VCVTTPD2DQZ128rrk VR128X:$src0, VK2WM:$mask, VR128X:$src)>;
9001  def : Pat<(X86mcvttp2si (v2f64 VR128X:$src), v4i32x_info.ImmAllZerosV,
9002                          VK2WM:$mask),
9003            (VCVTTPD2DQZ128rrkz VK2WM:$mask, VR128X:$src)>;
9004
9005  def : Pat<(v4i32 (X86any_cvttp2si (loadv2f64 addr:$src))),
9006            (VCVTTPD2DQZ128rm addr:$src)>;
9007  def : Pat<(X86mcvttp2si (loadv2f64 addr:$src), (v4i32 VR128X:$src0),
9008                          VK2WM:$mask),
9009            (VCVTTPD2DQZ128rmk VR128X:$src0, VK2WM:$mask, addr:$src)>;
9010  def : Pat<(X86mcvttp2si (loadv2f64 addr:$src), v4i32x_info.ImmAllZerosV,
9011                          VK2WM:$mask),
9012            (VCVTTPD2DQZ128rmkz VK2WM:$mask, addr:$src)>;
9013
9014  def : Pat<(v4i32 (X86any_cvttp2si (v2f64 (X86VBroadcastld64 addr:$src)))),
9015            (VCVTTPD2DQZ128rmb addr:$src)>;
9016  def : Pat<(X86mcvttp2si (v2f64 (X86VBroadcastld64 addr:$src)),
9017                          (v4i32 VR128X:$src0), VK2WM:$mask),
9018            (VCVTTPD2DQZ128rmbk VR128X:$src0, VK2WM:$mask, addr:$src)>;
9019  def : Pat<(X86mcvttp2si (v2f64 (X86VBroadcastld64 addr:$src)),
9020                          v4i32x_info.ImmAllZerosV, VK2WM:$mask),
9021            (VCVTTPD2DQZ128rmbkz VK2WM:$mask, addr:$src)>;
9022
9023  // Special patterns to allow use of X86mcvtp2UInt for masking. Instruction
9024  // patterns have been disabled with null_frag.
9025  def : Pat<(v4i32 (X86cvtp2UInt (v2f64 VR128X:$src))),
9026            (VCVTPD2UDQZ128rr VR128X:$src)>;
9027  def : Pat<(X86mcvtp2UInt (v2f64 VR128X:$src), (v4i32 VR128X:$src0),
9028                           VK2WM:$mask),
9029            (VCVTPD2UDQZ128rrk VR128X:$src0, VK2WM:$mask, VR128X:$src)>;
9030  def : Pat<(X86mcvtp2UInt (v2f64 VR128X:$src), v4i32x_info.ImmAllZerosV,
9031                           VK2WM:$mask),
9032            (VCVTPD2UDQZ128rrkz VK2WM:$mask, VR128X:$src)>;
9033
9034  def : Pat<(v4i32 (X86cvtp2UInt (loadv2f64 addr:$src))),
9035            (VCVTPD2UDQZ128rm addr:$src)>;
9036  def : Pat<(X86mcvtp2UInt (loadv2f64 addr:$src), (v4i32 VR128X:$src0),
9037                           VK2WM:$mask),
9038            (VCVTPD2UDQZ128rmk VR128X:$src0, VK2WM:$mask, addr:$src)>;
9039  def : Pat<(X86mcvtp2UInt (loadv2f64 addr:$src), v4i32x_info.ImmAllZerosV,
9040                           VK2WM:$mask),
9041            (VCVTPD2UDQZ128rmkz VK2WM:$mask, addr:$src)>;
9042
9043  def : Pat<(v4i32 (X86cvtp2UInt (v2f64 (X86VBroadcastld64 addr:$src)))),
9044            (VCVTPD2UDQZ128rmb addr:$src)>;
9045  def : Pat<(X86mcvtp2UInt (v2f64 (X86VBroadcastld64 addr:$src)),
9046                           (v4i32 VR128X:$src0), VK2WM:$mask),
9047            (VCVTPD2UDQZ128rmbk VR128X:$src0, VK2WM:$mask, addr:$src)>;
9048  def : Pat<(X86mcvtp2UInt (v2f64 (X86VBroadcastld64 addr:$src)),
9049                           v4i32x_info.ImmAllZerosV, VK2WM:$mask),
9050            (VCVTPD2UDQZ128rmbkz VK2WM:$mask, addr:$src)>;
9051
9052  // Special patterns to allow use of X86mcvtp2UInt for masking. Instruction
9053  // patterns have been disabled with null_frag.
9054  def : Pat<(v4i32 (X86any_cvttp2ui (v2f64 VR128X:$src))),
9055            (VCVTTPD2UDQZ128rr VR128X:$src)>;
9056  def : Pat<(X86mcvttp2ui (v2f64 VR128X:$src), (v4i32 VR128X:$src0),
9057                          VK2WM:$mask),
9058            (VCVTTPD2UDQZ128rrk VR128X:$src0, VK2WM:$mask, VR128X:$src)>;
9059  def : Pat<(X86mcvttp2ui (v2f64 VR128X:$src), v4i32x_info.ImmAllZerosV,
9060                          VK2WM:$mask),
9061            (VCVTTPD2UDQZ128rrkz VK2WM:$mask, VR128X:$src)>;
9062
9063  def : Pat<(v4i32 (X86any_cvttp2ui (loadv2f64 addr:$src))),
9064            (VCVTTPD2UDQZ128rm addr:$src)>;
9065  def : Pat<(X86mcvttp2ui (loadv2f64 addr:$src), (v4i32 VR128X:$src0),
9066                          VK2WM:$mask),
9067            (VCVTTPD2UDQZ128rmk VR128X:$src0, VK2WM:$mask, addr:$src)>;
9068  def : Pat<(X86mcvttp2ui (loadv2f64 addr:$src), v4i32x_info.ImmAllZerosV,
9069                          VK2WM:$mask),
9070            (VCVTTPD2UDQZ128rmkz VK2WM:$mask, addr:$src)>;
9071
9072  def : Pat<(v4i32 (X86any_cvttp2ui (v2f64 (X86VBroadcastld64 addr:$src)))),
9073            (VCVTTPD2UDQZ128rmb addr:$src)>;
9074  def : Pat<(X86mcvttp2ui (v2f64 (X86VBroadcastld64 addr:$src)),
9075                          (v4i32 VR128X:$src0), VK2WM:$mask),
9076            (VCVTTPD2UDQZ128rmbk VR128X:$src0, VK2WM:$mask, addr:$src)>;
9077  def : Pat<(X86mcvttp2ui (v2f64 (X86VBroadcastld64 addr:$src)),
9078                          v4i32x_info.ImmAllZerosV, VK2WM:$mask),
9079            (VCVTTPD2UDQZ128rmbkz VK2WM:$mask, addr:$src)>;
9080}
9081
9082let Predicates = [HasDQI, HasVLX] in {
9083  def : Pat<(v2i64 (X86cvtp2Int (bc_v4f32 (v2f64 (X86vzload64 addr:$src))))),
9084            (VCVTPS2QQZ128rm addr:$src)>;
9085  def : Pat<(v2i64 (vselect_mask VK2WM:$mask,
9086                                 (X86cvtp2Int (bc_v4f32 (v2f64 (X86vzload64 addr:$src)))),
9087                                 VR128X:$src0)),
9088            (VCVTPS2QQZ128rmk VR128X:$src0, VK2WM:$mask, addr:$src)>;
9089  def : Pat<(v2i64 (vselect_mask VK2WM:$mask,
9090                                 (X86cvtp2Int (bc_v4f32 (v2f64 (X86vzload64 addr:$src)))),
9091                                 v2i64x_info.ImmAllZerosV)),
9092            (VCVTPS2QQZ128rmkz VK2WM:$mask, addr:$src)>;
9093
9094  def : Pat<(v2i64 (X86cvtp2UInt (bc_v4f32 (v2f64 (X86vzload64 addr:$src))))),
9095            (VCVTPS2UQQZ128rm addr:$src)>;
9096  def : Pat<(v2i64 (vselect_mask VK2WM:$mask,
9097                                 (X86cvtp2UInt (bc_v4f32 (v2f64 (X86vzload64 addr:$src)))),
9098                                 VR128X:$src0)),
9099            (VCVTPS2UQQZ128rmk VR128X:$src0, VK2WM:$mask, addr:$src)>;
9100  def : Pat<(v2i64 (vselect_mask VK2WM:$mask,
9101                                 (X86cvtp2UInt (bc_v4f32 (v2f64 (X86vzload64 addr:$src)))),
9102                                 v2i64x_info.ImmAllZerosV)),
9103            (VCVTPS2UQQZ128rmkz VK2WM:$mask, addr:$src)>;
9104
9105  def : Pat<(v2i64 (X86any_cvttp2si (bc_v4f32 (v2f64 (X86vzload64 addr:$src))))),
9106            (VCVTTPS2QQZ128rm addr:$src)>;
9107  def : Pat<(v2i64 (vselect_mask VK2WM:$mask,
9108                                 (X86cvttp2si (bc_v4f32 (v2f64 (X86vzload64 addr:$src)))),
9109                                 VR128X:$src0)),
9110            (VCVTTPS2QQZ128rmk VR128X:$src0, VK2WM:$mask, addr:$src)>;
9111  def : Pat<(v2i64 (vselect_mask VK2WM:$mask,
9112                                 (X86cvttp2si (bc_v4f32 (v2f64 (X86vzload64 addr:$src)))),
9113                                 v2i64x_info.ImmAllZerosV)),
9114            (VCVTTPS2QQZ128rmkz VK2WM:$mask, addr:$src)>;
9115
9116  def : Pat<(v2i64 (X86any_cvttp2ui (bc_v4f32 (v2f64 (X86vzload64 addr:$src))))),
9117            (VCVTTPS2UQQZ128rm addr:$src)>;
9118  def : Pat<(v2i64 (vselect_mask VK2WM:$mask,
9119                                 (X86cvttp2ui (bc_v4f32 (v2f64 (X86vzload64 addr:$src)))),
9120                                 VR128X:$src0)),
9121            (VCVTTPS2UQQZ128rmk VR128X:$src0, VK2WM:$mask, addr:$src)>;
9122  def : Pat<(v2i64 (vselect_mask VK2WM:$mask,
9123                                 (X86cvttp2ui (bc_v4f32 (v2f64 (X86vzload64 addr:$src)))),
9124                                 v2i64x_info.ImmAllZerosV)),
9125            (VCVTTPS2UQQZ128rmkz VK2WM:$mask, addr:$src)>;
9126}
9127
9128let Predicates = [HasVLX] in {
9129  def : Pat<(v2f64 (X86any_VSintToFP (bc_v4i32 (v2i64 (X86vzload64 addr:$src))))),
9130            (VCVTDQ2PDZ128rm addr:$src)>;
9131  def : Pat<(v2f64 (vselect_mask VK2WM:$mask,
9132                                 (X86VSintToFP (bc_v4i32 (v2i64 (X86vzload64 addr:$src)))),
9133                                 VR128X:$src0)),
9134            (VCVTDQ2PDZ128rmk VR128X:$src0, VK2WM:$mask, addr:$src)>;
9135  def : Pat<(v2f64 (vselect_mask VK2WM:$mask,
9136                                 (X86VSintToFP (bc_v4i32 (v2i64 (X86vzload64 addr:$src)))),
9137                                 v2f64x_info.ImmAllZerosV)),
9138            (VCVTDQ2PDZ128rmkz VK2WM:$mask, addr:$src)>;
9139
9140  def : Pat<(v2f64 (X86any_VUintToFP (bc_v4i32 (v2i64 (X86vzload64 addr:$src))))),
9141            (VCVTUDQ2PDZ128rm addr:$src)>;
9142  def : Pat<(v2f64 (vselect_mask VK2WM:$mask,
9143                                 (X86VUintToFP (bc_v4i32 (v2i64 (X86vzload64 addr:$src)))),
9144                                 VR128X:$src0)),
9145            (VCVTUDQ2PDZ128rmk VR128X:$src0, VK2WM:$mask, addr:$src)>;
9146  def : Pat<(v2f64 (vselect_mask VK2WM:$mask,
9147                                 (X86VUintToFP (bc_v4i32 (v2i64 (X86vzload64 addr:$src)))),
9148                                 v2f64x_info.ImmAllZerosV)),
9149            (VCVTUDQ2PDZ128rmkz VK2WM:$mask, addr:$src)>;
9150}
9151
9152//===----------------------------------------------------------------------===//
9153// Half precision conversion instructions
9154//===----------------------------------------------------------------------===//
9155
9156let Uses = [MXCSR], mayRaiseFPException = 1 in
9157multiclass avx512_cvtph2ps<X86VectorVTInfo _dest, X86VectorVTInfo _src,
9158                           X86MemOperand x86memop, dag ld_dag,
9159                           X86FoldableSchedWrite sched> {
9160  defm rr : AVX512_maskable_split<0x13, MRMSrcReg, _dest ,(outs _dest.RC:$dst),
9161                            (ins _src.RC:$src), "vcvtph2ps", "$src", "$src",
9162                            (X86any_cvtph2ps (_src.VT _src.RC:$src)),
9163                            (X86cvtph2ps (_src.VT _src.RC:$src))>,
9164                            T8PD, Sched<[sched]>;
9165  defm rm : AVX512_maskable_split<0x13, MRMSrcMem, _dest, (outs _dest.RC:$dst),
9166                            (ins x86memop:$src), "vcvtph2ps", "$src", "$src",
9167                            (X86any_cvtph2ps (_src.VT ld_dag)),
9168                            (X86cvtph2ps (_src.VT ld_dag))>,
9169                            T8PD, Sched<[sched.Folded]>;
9170}
9171
9172multiclass avx512_cvtph2ps_sae<X86VectorVTInfo _dest, X86VectorVTInfo _src,
9173                               X86FoldableSchedWrite sched> {
9174  let Uses = [MXCSR] in
9175  defm rrb : AVX512_maskable<0x13, MRMSrcReg, _dest, (outs _dest.RC:$dst),
9176                             (ins _src.RC:$src), "vcvtph2ps",
9177                             "{sae}, $src", "$src, {sae}",
9178                             (X86cvtph2psSAE (_src.VT _src.RC:$src))>,
9179                             T8PD, EVEX_B, Sched<[sched]>;
9180}
9181
9182let Predicates = [HasAVX512] in
9183  defm VCVTPH2PSZ : avx512_cvtph2ps<v16f32_info, v16i16x_info, f256mem,
9184                                    (load addr:$src), WriteCvtPH2PSZ>,
9185                    avx512_cvtph2ps_sae<v16f32_info, v16i16x_info, WriteCvtPH2PSZ>,
9186                    EVEX, EVEX_V512, EVEX_CD8<32, CD8VH>;
9187
9188let Predicates = [HasVLX] in {
9189  defm VCVTPH2PSZ256 : avx512_cvtph2ps<v8f32x_info, v8i16x_info, f128mem,
9190                       (load addr:$src), WriteCvtPH2PSY>, EVEX, EVEX_V256,
9191                       EVEX_CD8<32, CD8VH>;
9192  defm VCVTPH2PSZ128 : avx512_cvtph2ps<v4f32x_info, v8i16x_info, f64mem,
9193                       (bitconvert (v2i64 (X86vzload64 addr:$src))),
9194                       WriteCvtPH2PS>, EVEX, EVEX_V128,
9195                       EVEX_CD8<32, CD8VH>;
9196
9197  // Pattern match vcvtph2ps of a scalar i64 load.
9198  def : Pat<(v4f32 (X86any_cvtph2ps (v8i16 (bitconvert
9199              (v2i64 (scalar_to_vector (loadi64 addr:$src))))))),
9200            (VCVTPH2PSZ128rm addr:$src)>;
9201}
9202
9203multiclass avx512_cvtps2ph<X86VectorVTInfo _dest, X86VectorVTInfo _src,
9204                           X86MemOperand x86memop, SchedWrite RR, SchedWrite MR> {
9205let ExeDomain = GenericDomain, Uses = [MXCSR], mayRaiseFPException = 1 in {
9206  def rr : AVX512AIi8<0x1D, MRMDestReg, (outs _dest.RC:$dst),
9207             (ins _src.RC:$src1, i32u8imm:$src2),
9208             "vcvtps2ph\t{$src2, $src1, $dst|$dst, $src1, $src2}",
9209             [(set _dest.RC:$dst,
9210                   (X86any_cvtps2ph (_src.VT _src.RC:$src1), (i32 timm:$src2)))]>,
9211             Sched<[RR]>;
9212  let Constraints = "$src0 = $dst" in
9213  def rrk : AVX512AIi8<0x1D, MRMDestReg, (outs _dest.RC:$dst),
9214             (ins _dest.RC:$src0, _src.KRCWM:$mask, _src.RC:$src1, i32u8imm:$src2),
9215             "vcvtps2ph\t{$src2, $src1, $dst {${mask}}|$dst {${mask}}, $src1, $src2}",
9216             [(set _dest.RC:$dst,
9217                   (X86mcvtps2ph (_src.VT _src.RC:$src1), (i32 timm:$src2),
9218                                 _dest.RC:$src0, _src.KRCWM:$mask))]>,
9219             Sched<[RR]>, EVEX_K;
9220  def rrkz : AVX512AIi8<0x1D, MRMDestReg, (outs _dest.RC:$dst),
9221             (ins _src.KRCWM:$mask, _src.RC:$src1, i32u8imm:$src2),
9222             "vcvtps2ph\t{$src2, $src1, $dst {${mask}} {z}|$dst {${mask}} {z}, $src1, $src2}",
9223             [(set _dest.RC:$dst,
9224                   (X86mcvtps2ph (_src.VT _src.RC:$src1), (i32 timm:$src2),
9225                                 _dest.ImmAllZerosV, _src.KRCWM:$mask))]>,
9226             Sched<[RR]>, EVEX_KZ;
9227  let hasSideEffects = 0, mayStore = 1 in {
9228    def mr : AVX512AIi8<0x1D, MRMDestMem, (outs),
9229               (ins x86memop:$dst, _src.RC:$src1, i32u8imm:$src2),
9230               "vcvtps2ph\t{$src2, $src1, $dst|$dst, $src1, $src2}", []>,
9231               Sched<[MR]>;
9232    def mrk : AVX512AIi8<0x1D, MRMDestMem, (outs),
9233               (ins x86memop:$dst, _dest.KRCWM:$mask, _src.RC:$src1, i32u8imm:$src2),
9234               "vcvtps2ph\t{$src2, $src1, $dst {${mask}}|$dst {${mask}}, $src1, $src2}", []>,
9235                EVEX_K, Sched<[MR]>, NotMemoryFoldable;
9236  }
9237}
9238}
9239
9240multiclass avx512_cvtps2ph_sae<X86VectorVTInfo _dest, X86VectorVTInfo _src,
9241                               SchedWrite Sched> {
9242  let hasSideEffects = 0, Uses = [MXCSR] in {
9243    def rrb : AVX512AIi8<0x1D, MRMDestReg, (outs _dest.RC:$dst),
9244              (ins _src.RC:$src1, i32u8imm:$src2),
9245              "vcvtps2ph\t{$src2, {sae}, $src1, $dst|$dst, $src1, {sae}, $src2}",
9246              [(set _dest.RC:$dst,
9247                    (X86cvtps2phSAE (_src.VT _src.RC:$src1), (i32 timm:$src2)))]>,
9248              EVEX_B, Sched<[Sched]>;
9249    let Constraints = "$src0 = $dst" in
9250    def rrbk : AVX512AIi8<0x1D, MRMDestReg, (outs _dest.RC:$dst),
9251              (ins _dest.RC:$src0, _src.KRCWM:$mask, _src.RC:$src1, i32u8imm:$src2),
9252              "vcvtps2ph\t{$src2, {sae}, $src1, $dst {${mask}}|$dst {${mask}}, $src1, {sae}, $src2}",
9253              [(set _dest.RC:$dst,
9254                    (X86mcvtps2phSAE (_src.VT _src.RC:$src1), (i32 timm:$src2),
9255                                  _dest.RC:$src0, _src.KRCWM:$mask))]>,
9256              EVEX_B, Sched<[Sched]>, EVEX_K;
9257    def rrbkz : AVX512AIi8<0x1D, MRMDestReg, (outs _dest.RC:$dst),
9258              (ins _src.KRCWM:$mask, _src.RC:$src1, i32u8imm:$src2),
9259              "vcvtps2ph\t{$src2, {sae}, $src1, $dst {${mask}} {z}|$dst {${mask}} {z}, $src1, {sae}, $src2}",
9260              [(set _dest.RC:$dst,
9261                    (X86mcvtps2phSAE (_src.VT _src.RC:$src1), (i32 timm:$src2),
9262                                  _dest.ImmAllZerosV, _src.KRCWM:$mask))]>,
9263              EVEX_B, Sched<[Sched]>, EVEX_KZ;
9264}
9265}
9266
9267let Predicates = [HasAVX512] in {
9268  defm VCVTPS2PHZ : avx512_cvtps2ph<v16i16x_info, v16f32_info, f256mem,
9269                                    WriteCvtPS2PHZ, WriteCvtPS2PHZSt>,
9270                    avx512_cvtps2ph_sae<v16i16x_info, v16f32_info, WriteCvtPS2PHZ>,
9271                                        EVEX, EVEX_V512, EVEX_CD8<32, CD8VH>;
9272
9273  def : Pat<(store (v16i16 (X86any_cvtps2ph VR512:$src1, timm:$src2)), addr:$dst),
9274            (VCVTPS2PHZmr addr:$dst, VR512:$src1, timm:$src2)>;
9275}
9276
9277let Predicates = [HasVLX] in {
9278  defm VCVTPS2PHZ256 : avx512_cvtps2ph<v8i16x_info, v8f32x_info, f128mem,
9279                                       WriteCvtPS2PHY, WriteCvtPS2PHYSt>,
9280                                       EVEX, EVEX_V256, EVEX_CD8<32, CD8VH>;
9281  defm VCVTPS2PHZ128 : avx512_cvtps2ph<v8i16x_info, v4f32x_info, f64mem,
9282                                       WriteCvtPS2PH, WriteCvtPS2PHSt>,
9283                                       EVEX, EVEX_V128, EVEX_CD8<32, CD8VH>;
9284
9285  def : Pat<(store (f64 (extractelt
9286                         (bc_v2f64 (v8i16 (X86any_cvtps2ph VR128X:$src1, timm:$src2))),
9287                         (iPTR 0))), addr:$dst),
9288            (VCVTPS2PHZ128mr addr:$dst, VR128X:$src1, timm:$src2)>;
9289  def : Pat<(store (i64 (extractelt
9290                         (bc_v2i64 (v8i16 (X86any_cvtps2ph VR128X:$src1, timm:$src2))),
9291                         (iPTR 0))), addr:$dst),
9292            (VCVTPS2PHZ128mr addr:$dst, VR128X:$src1, timm:$src2)>;
9293  def : Pat<(store (v8i16 (X86any_cvtps2ph VR256X:$src1, timm:$src2)), addr:$dst),
9294            (VCVTPS2PHZ256mr addr:$dst, VR256X:$src1, timm:$src2)>;
9295}
9296
9297//  Unordered/Ordered scalar fp compare with Sae and set EFLAGS
9298multiclass avx512_ord_cmp_sae<bits<8> opc, X86VectorVTInfo _,
9299                              string OpcodeStr, Domain d,
9300                              X86FoldableSchedWrite sched = WriteFComX> {
9301  let ExeDomain = d, hasSideEffects = 0, Uses = [MXCSR] in
9302  def rrb: AVX512<opc, MRMSrcReg, (outs), (ins _.RC:$src1, _.RC:$src2),
9303                  !strconcat(OpcodeStr, "\t{{sae}, $src2, $src1|$src1, $src2, {sae}}"), []>,
9304                  EVEX, EVEX_B, VEX_LIG, EVEX_V128, Sched<[sched]>;
9305}
9306
9307let Defs = [EFLAGS], Predicates = [HasAVX512] in {
9308  defm VUCOMISSZ : avx512_ord_cmp_sae<0x2E, v4f32x_info, "vucomiss", SSEPackedSingle>,
9309                                   AVX512PSIi8Base, EVEX_CD8<32, CD8VT1>;
9310  defm VUCOMISDZ : avx512_ord_cmp_sae<0x2E, v2f64x_info, "vucomisd", SSEPackedDouble>,
9311                                   AVX512PDIi8Base, VEX_W, EVEX_CD8<64, CD8VT1>;
9312  defm VCOMISSZ : avx512_ord_cmp_sae<0x2F, v4f32x_info, "vcomiss", SSEPackedSingle>,
9313                                   AVX512PSIi8Base, EVEX_CD8<32, CD8VT1>;
9314  defm VCOMISDZ : avx512_ord_cmp_sae<0x2F, v2f64x_info, "vcomisd", SSEPackedDouble>,
9315                                   AVX512PDIi8Base, VEX_W, EVEX_CD8<64, CD8VT1>;
9316}
9317
9318let Defs = [EFLAGS], Predicates = [HasAVX512] in {
9319  defm VUCOMISSZ : sse12_ord_cmp<0x2E, FR32X, X86any_fcmp, f32, f32mem, loadf32,
9320                                 "ucomiss", SSEPackedSingle>, PS, EVEX, VEX_LIG,
9321                                 EVEX_CD8<32, CD8VT1>;
9322  defm VUCOMISDZ : sse12_ord_cmp<0x2E, FR64X, X86any_fcmp, f64, f64mem, loadf64,
9323                                  "ucomisd", SSEPackedDouble>, PD, EVEX,
9324                                  VEX_LIG, VEX_W, EVEX_CD8<64, CD8VT1>;
9325  defm VCOMISSZ  : sse12_ord_cmp<0x2F, FR32X, X86strict_fcmps, f32, f32mem, loadf32,
9326                                 "comiss", SSEPackedSingle>, PS, EVEX, VEX_LIG,
9327                                 EVEX_CD8<32, CD8VT1>;
9328  defm VCOMISDZ  : sse12_ord_cmp<0x2F, FR64X, X86strict_fcmps, f64, f64mem, loadf64,
9329                                 "comisd", SSEPackedDouble>, PD, EVEX,
9330                                  VEX_LIG, VEX_W, EVEX_CD8<64, CD8VT1>;
9331  let isCodeGenOnly = 1 in {
9332    defm VUCOMISSZ  : sse12_ord_cmp_int<0x2E, VR128X, X86ucomi, v4f32, ssmem,
9333                          sse_load_f32, "ucomiss", SSEPackedSingle>, PS, EVEX, VEX_LIG,
9334                          EVEX_CD8<32, CD8VT1>;
9335    defm VUCOMISDZ  : sse12_ord_cmp_int<0x2E, VR128X, X86ucomi, v2f64, sdmem,
9336                          sse_load_f64, "ucomisd", SSEPackedDouble>, PD, EVEX,
9337                          VEX_LIG, VEX_W, EVEX_CD8<64, CD8VT1>;
9338
9339    defm VCOMISSZ  : sse12_ord_cmp_int<0x2F, VR128X, X86comi, v4f32, ssmem,
9340                          sse_load_f32, "comiss", SSEPackedSingle>, PS, EVEX, VEX_LIG,
9341                          EVEX_CD8<32, CD8VT1>;
9342    defm VCOMISDZ  : sse12_ord_cmp_int<0x2F, VR128X, X86comi, v2f64, sdmem,
9343                          sse_load_f64, "comisd", SSEPackedDouble>, PD, EVEX,
9344                          VEX_LIG, VEX_W, EVEX_CD8<64, CD8VT1>;
9345  }
9346}
9347
9348let Defs = [EFLAGS], Predicates = [HasFP16] in {
9349  defm VUCOMISHZ : avx512_ord_cmp_sae<0x2E, v8f16x_info, "vucomish",
9350                                SSEPackedSingle>, AVX512PSIi8Base, T_MAP5PS,
9351                                EVEX_CD8<16, CD8VT1>;
9352  defm VCOMISHZ : avx512_ord_cmp_sae<0x2F, v8f16x_info, "vcomish",
9353                                SSEPackedSingle>, AVX512PSIi8Base, T_MAP5PS,
9354                                EVEX_CD8<16, CD8VT1>;
9355  defm VUCOMISHZ : sse12_ord_cmp<0x2E, FR16X, X86any_fcmp, f16, f16mem, loadf16,
9356                                "ucomish", SSEPackedSingle>, T_MAP5PS, EVEX,
9357                                VEX_LIG, EVEX_CD8<16, CD8VT1>;
9358  defm VCOMISHZ : sse12_ord_cmp<0x2F, FR16X, X86strict_fcmps, f16, f16mem, loadf16,
9359                                "comish", SSEPackedSingle>, T_MAP5PS, EVEX,
9360                                VEX_LIG, EVEX_CD8<16, CD8VT1>;
9361  let isCodeGenOnly = 1 in {
9362    defm VUCOMISHZ : sse12_ord_cmp_int<0x2E, VR128X, X86ucomi, v8f16, shmem,
9363                                sse_load_f16, "ucomish", SSEPackedSingle>,
9364                                T_MAP5PS, EVEX, VEX_LIG, EVEX_CD8<16, CD8VT1>;
9365
9366    defm VCOMISHZ : sse12_ord_cmp_int<0x2F, VR128X, X86comi, v8f16, shmem,
9367                                sse_load_f16, "comish", SSEPackedSingle>,
9368                                T_MAP5PS, EVEX, VEX_LIG, EVEX_CD8<16, CD8VT1>;
9369  }
9370}
9371
9372/// avx512_fp14_s rcp14ss, rcp14sd, rsqrt14ss, rsqrt14sd, rcpsh, rsqrtsh
9373multiclass avx512_fp14_s<bits<8> opc, string OpcodeStr, SDNode OpNode,
9374                         X86FoldableSchedWrite sched, X86VectorVTInfo _,
9375                         Predicate prd = HasAVX512> {
9376  let Predicates = [prd], ExeDomain = _.ExeDomain in {
9377  defm rr : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
9378                           (ins _.RC:$src1, _.RC:$src2), OpcodeStr,
9379                           "$src2, $src1", "$src1, $src2",
9380                           (OpNode (_.VT _.RC:$src1), (_.VT _.RC:$src2))>,
9381                           EVEX_4V, VEX_LIG, Sched<[sched]>;
9382  defm rm : AVX512_maskable_scalar<opc, MRMSrcMem, _, (outs _.RC:$dst),
9383                         (ins _.RC:$src1, _.IntScalarMemOp:$src2), OpcodeStr,
9384                         "$src2, $src1", "$src1, $src2",
9385                         (OpNode (_.VT _.RC:$src1),
9386                          (_.ScalarIntMemFrags addr:$src2))>, EVEX_4V, VEX_LIG,
9387                          Sched<[sched.Folded, sched.ReadAfterFold]>;
9388}
9389}
9390
9391defm VRCPSHZ : avx512_fp14_s<0x4D, "vrcpsh", X86rcp14s, SchedWriteFRcp.Scl,
9392                               f16x_info, HasFP16>, EVEX_CD8<16, CD8VT1>,
9393                               T_MAP6PD;
9394defm VRSQRTSHZ : avx512_fp14_s<0x4F, "vrsqrtsh", X86rsqrt14s,
9395                                 SchedWriteFRsqrt.Scl, f16x_info, HasFP16>,
9396                                 EVEX_CD8<16, CD8VT1>, T_MAP6PD;
9397let Uses = [MXCSR] in {
9398defm VRCP14SSZ : avx512_fp14_s<0x4D, "vrcp14ss", X86rcp14s, SchedWriteFRcp.Scl,
9399                               f32x_info>, EVEX_CD8<32, CD8VT1>,
9400                               T8PD;
9401defm VRCP14SDZ : avx512_fp14_s<0x4D, "vrcp14sd", X86rcp14s, SchedWriteFRcp.Scl,
9402                               f64x_info>, VEX_W, EVEX_CD8<64, CD8VT1>,
9403                               T8PD;
9404defm VRSQRT14SSZ : avx512_fp14_s<0x4F, "vrsqrt14ss", X86rsqrt14s,
9405                                 SchedWriteFRsqrt.Scl, f32x_info>,
9406                                 EVEX_CD8<32, CD8VT1>, T8PD;
9407defm VRSQRT14SDZ : avx512_fp14_s<0x4F, "vrsqrt14sd", X86rsqrt14s,
9408                                 SchedWriteFRsqrt.Scl, f64x_info>, VEX_W,
9409                                 EVEX_CD8<64, CD8VT1>, T8PD;
9410}
9411
9412/// avx512_fp14_p rcp14ps, rcp14pd, rsqrt14ps, rsqrt14pd
9413multiclass avx512_fp14_p<bits<8> opc, string OpcodeStr, SDNode OpNode,
9414                         X86FoldableSchedWrite sched, X86VectorVTInfo _> {
9415  let ExeDomain = _.ExeDomain in {
9416  defm r: AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
9417                         (ins _.RC:$src), OpcodeStr, "$src", "$src",
9418                         (_.VT (OpNode _.RC:$src))>, EVEX, T8PD,
9419                         Sched<[sched]>;
9420  defm m: AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
9421                         (ins _.MemOp:$src), OpcodeStr, "$src", "$src",
9422                         (OpNode (_.VT
9423                           (bitconvert (_.LdFrag addr:$src))))>, EVEX, T8PD,
9424                         Sched<[sched.Folded, sched.ReadAfterFold]>;
9425  defm mb: AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
9426                          (ins _.ScalarMemOp:$src), OpcodeStr,
9427                          "${src}"#_.BroadcastStr, "${src}"#_.BroadcastStr,
9428                          (OpNode (_.VT
9429                            (_.BroadcastLdFrag addr:$src)))>,
9430                          EVEX, T8PD, EVEX_B, Sched<[sched.Folded, sched.ReadAfterFold]>;
9431  }
9432}
9433
9434multiclass avx512_fp14_p_vl_all<bits<8> opc, string OpcodeStr, SDNode OpNode,
9435                                X86SchedWriteWidths sched> {
9436  let Uses = [MXCSR] in {
9437  defm 14PSZ : avx512_fp14_p<opc, !strconcat(OpcodeStr, "14ps"), OpNode, sched.ZMM,
9438                             v16f32_info>, EVEX_V512, EVEX_CD8<32, CD8VF>;
9439  defm 14PDZ : avx512_fp14_p<opc, !strconcat(OpcodeStr, "14pd"), OpNode, sched.ZMM,
9440                             v8f64_info>, EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>;
9441  }
9442  let Predicates = [HasFP16] in
9443  defm PHZ : avx512_fp14_p<opc, !strconcat(OpcodeStr, "ph"), OpNode, sched.ZMM,
9444                           v32f16_info>, EVEX_V512, T_MAP6PD, EVEX_CD8<16, CD8VF>;
9445
9446  // Define only if AVX512VL feature is present.
9447  let Predicates = [HasVLX], Uses = [MXCSR] in {
9448    defm 14PSZ128 : avx512_fp14_p<opc, !strconcat(OpcodeStr, "14ps"),
9449                                  OpNode, sched.XMM, v4f32x_info>,
9450                                  EVEX_V128, EVEX_CD8<32, CD8VF>;
9451    defm 14PSZ256 : avx512_fp14_p<opc, !strconcat(OpcodeStr, "14ps"),
9452                                  OpNode, sched.YMM, v8f32x_info>,
9453                                  EVEX_V256, EVEX_CD8<32, CD8VF>;
9454    defm 14PDZ128 : avx512_fp14_p<opc, !strconcat(OpcodeStr, "14pd"),
9455                                  OpNode, sched.XMM, v2f64x_info>,
9456                                  EVEX_V128, VEX_W, EVEX_CD8<64, CD8VF>;
9457    defm 14PDZ256 : avx512_fp14_p<opc, !strconcat(OpcodeStr, "14pd"),
9458                                  OpNode, sched.YMM, v4f64x_info>,
9459                                  EVEX_V256, VEX_W, EVEX_CD8<64, CD8VF>;
9460  }
9461  let Predicates = [HasFP16, HasVLX] in {
9462    defm PHZ128 : avx512_fp14_p<opc, !strconcat(OpcodeStr, "ph"),
9463                                OpNode, sched.XMM, v8f16x_info>,
9464                                EVEX_V128, T_MAP6PD, EVEX_CD8<16, CD8VF>;
9465    defm PHZ256 : avx512_fp14_p<opc, !strconcat(OpcodeStr, "ph"),
9466                                OpNode, sched.YMM, v16f16x_info>,
9467                                EVEX_V256, T_MAP6PD, EVEX_CD8<16, CD8VF>;
9468  }
9469}
9470
9471defm VRSQRT : avx512_fp14_p_vl_all<0x4E, "vrsqrt", X86rsqrt14, SchedWriteFRsqrt>;
9472defm VRCP : avx512_fp14_p_vl_all<0x4C, "vrcp", X86rcp14, SchedWriteFRcp>;
9473
9474/// avx512_fp28_s rcp28ss, rcp28sd, rsqrt28ss, rsqrt28sd
9475multiclass avx512_fp28_s<bits<8> opc, string OpcodeStr,X86VectorVTInfo _,
9476                         SDNode OpNode, SDNode OpNodeSAE,
9477                         X86FoldableSchedWrite sched> {
9478  let ExeDomain = _.ExeDomain, Uses = [MXCSR] in {
9479  defm r : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
9480                           (ins _.RC:$src1, _.RC:$src2), OpcodeStr,
9481                           "$src2, $src1", "$src1, $src2",
9482                           (OpNode (_.VT _.RC:$src1), (_.VT _.RC:$src2))>,
9483                           Sched<[sched]>, SIMD_EXC;
9484
9485  defm rb : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
9486                            (ins _.RC:$src1, _.RC:$src2), OpcodeStr,
9487                            "{sae}, $src2, $src1", "$src1, $src2, {sae}",
9488                            (OpNodeSAE (_.VT _.RC:$src1), (_.VT _.RC:$src2))>,
9489                            EVEX_B, Sched<[sched]>;
9490
9491  defm m : AVX512_maskable_scalar<opc, MRMSrcMem, _, (outs _.RC:$dst),
9492                         (ins _.RC:$src1, _.IntScalarMemOp:$src2), OpcodeStr,
9493                         "$src2, $src1", "$src1, $src2",
9494                         (OpNode (_.VT _.RC:$src1), (_.ScalarIntMemFrags addr:$src2))>,
9495                         Sched<[sched.Folded, sched.ReadAfterFold]>, SIMD_EXC;
9496  }
9497}
9498
9499multiclass avx512_eri_s<bits<8> opc, string OpcodeStr, SDNode OpNode,
9500                        SDNode OpNodeSAE, X86FoldableSchedWrite sched> {
9501  defm SSZ : avx512_fp28_s<opc, OpcodeStr#"ss", f32x_info, OpNode, OpNodeSAE,
9502                           sched>, EVEX_CD8<32, CD8VT1>, VEX_LIG, T8PD, EVEX_4V;
9503  defm SDZ : avx512_fp28_s<opc, OpcodeStr#"sd", f64x_info, OpNode, OpNodeSAE,
9504                           sched>, EVEX_CD8<64, CD8VT1>, VEX_LIG, VEX_W, T8PD, EVEX_4V;
9505}
9506
9507multiclass avx512_vgetexpsh<bits<8> opc, string OpcodeStr, SDNode OpNode,
9508                        SDNode OpNodeSAE, X86FoldableSchedWrite sched> {
9509  let Predicates = [HasFP16] in
9510  defm SHZ : avx512_fp28_s<opc, OpcodeStr#"sh", f16x_info, OpNode,  OpNodeSAE, sched>,
9511               EVEX_CD8<16, CD8VT1>, T_MAP6PD, EVEX_4V;
9512}
9513
9514let Predicates = [HasERI] in {
9515  defm VRCP28   : avx512_eri_s<0xCB, "vrcp28", X86rcp28s, X86rcp28SAEs,
9516                               SchedWriteFRcp.Scl>;
9517  defm VRSQRT28 : avx512_eri_s<0xCD, "vrsqrt28", X86rsqrt28s, X86rsqrt28SAEs,
9518                               SchedWriteFRsqrt.Scl>;
9519}
9520
9521defm VGETEXP   : avx512_eri_s<0x43, "vgetexp", X86fgetexps, X86fgetexpSAEs,
9522                              SchedWriteFRnd.Scl>,
9523                 avx512_vgetexpsh<0x43, "vgetexp", X86fgetexps, X86fgetexpSAEs,
9524                                  SchedWriteFRnd.Scl>;
9525/// avx512_fp28_p rcp28ps, rcp28pd, rsqrt28ps, rsqrt28pd
9526
9527multiclass avx512_fp28_p<bits<8> opc, string OpcodeStr, X86VectorVTInfo _,
9528                         SDNode OpNode, X86FoldableSchedWrite sched> {
9529  let ExeDomain = _.ExeDomain, Uses = [MXCSR], mayRaiseFPException = 1 in {
9530  defm r : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
9531                         (ins _.RC:$src), OpcodeStr, "$src", "$src",
9532                         (OpNode (_.VT _.RC:$src))>,
9533                         Sched<[sched]>;
9534
9535  defm m : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
9536                         (ins _.MemOp:$src), OpcodeStr, "$src", "$src",
9537                         (OpNode (_.VT
9538                             (bitconvert (_.LdFrag addr:$src))))>,
9539                          Sched<[sched.Folded, sched.ReadAfterFold]>;
9540
9541  defm mb : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
9542                         (ins _.ScalarMemOp:$src), OpcodeStr,
9543                         "${src}"#_.BroadcastStr, "${src}"#_.BroadcastStr,
9544                         (OpNode (_.VT
9545                                  (_.BroadcastLdFrag addr:$src)))>,
9546                         EVEX_B, Sched<[sched.Folded, sched.ReadAfterFold]>;
9547  }
9548}
9549multiclass avx512_fp28_p_sae<bits<8> opc, string OpcodeStr, X86VectorVTInfo _,
9550                         SDNode OpNode, X86FoldableSchedWrite sched> {
9551  let ExeDomain = _.ExeDomain, Uses = [MXCSR] in
9552  defm rb : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
9553                        (ins _.RC:$src), OpcodeStr,
9554                        "{sae}, $src", "$src, {sae}",
9555                        (OpNode (_.VT _.RC:$src))>,
9556                        EVEX_B, Sched<[sched]>;
9557}
9558
9559multiclass  avx512_eri<bits<8> opc, string OpcodeStr, SDNode OpNode,
9560                       SDNode OpNodeSAE, X86SchedWriteWidths sched> {
9561   defm PSZ : avx512_fp28_p<opc, OpcodeStr#"ps", v16f32_info, OpNode, sched.ZMM>,
9562              avx512_fp28_p_sae<opc, OpcodeStr#"ps", v16f32_info, OpNodeSAE, sched.ZMM>,
9563              T8PD, EVEX_V512, EVEX_CD8<32, CD8VF>;
9564   defm PDZ : avx512_fp28_p<opc, OpcodeStr#"pd", v8f64_info, OpNode, sched.ZMM>,
9565              avx512_fp28_p_sae<opc, OpcodeStr#"pd", v8f64_info, OpNodeSAE, sched.ZMM>,
9566              T8PD, EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>;
9567}
9568
9569multiclass avx512_fp_unaryop_packed<bits<8> opc, string OpcodeStr,
9570                                  SDNode OpNode, X86SchedWriteWidths sched> {
9571  // Define only if AVX512VL feature is present.
9572  let Predicates = [HasVLX] in {
9573    defm PSZ128 : avx512_fp28_p<opc, OpcodeStr#"ps", v4f32x_info, OpNode,
9574                                sched.XMM>,
9575                                EVEX_V128, T8PD, EVEX_CD8<32, CD8VF>;
9576    defm PSZ256 : avx512_fp28_p<opc, OpcodeStr#"ps", v8f32x_info, OpNode,
9577                                sched.YMM>,
9578                                EVEX_V256, T8PD, EVEX_CD8<32, CD8VF>;
9579    defm PDZ128 : avx512_fp28_p<opc, OpcodeStr#"pd", v2f64x_info, OpNode,
9580                                sched.XMM>,
9581                                EVEX_V128, VEX_W, T8PD, EVEX_CD8<64, CD8VF>;
9582    defm PDZ256 : avx512_fp28_p<opc, OpcodeStr#"pd", v4f64x_info, OpNode,
9583                                sched.YMM>,
9584                                EVEX_V256, VEX_W, T8PD, EVEX_CD8<64, CD8VF>;
9585  }
9586}
9587
9588multiclass  avx512_vgetexp_fp16<bits<8> opc, string OpcodeStr, SDNode OpNode,
9589                       SDNode OpNodeSAE, X86SchedWriteWidths sched> {
9590  let Predicates = [HasFP16] in
9591  defm PHZ : avx512_fp28_p<opc, OpcodeStr#"ph", v32f16_info, OpNode, sched.ZMM>,
9592              avx512_fp28_p_sae<opc, OpcodeStr#"ph", v32f16_info, OpNodeSAE, sched.ZMM>,
9593              T_MAP6PD, EVEX_V512, EVEX_CD8<16, CD8VF>;
9594  let Predicates = [HasFP16, HasVLX] in {
9595    defm PHZ128 : avx512_fp28_p<opc, OpcodeStr#"ph", v8f16x_info, OpNode, sched.XMM>,
9596                                     EVEX_V128, T_MAP6PD, EVEX_CD8<16, CD8VF>;
9597    defm PHZ256 : avx512_fp28_p<opc, OpcodeStr#"ph", v16f16x_info, OpNode, sched.YMM>,
9598                                     EVEX_V256, T_MAP6PD, EVEX_CD8<16, CD8VF>;
9599  }
9600}
9601let Predicates = [HasERI] in {
9602 defm VRSQRT28 : avx512_eri<0xCC, "vrsqrt28", X86rsqrt28, X86rsqrt28SAE,
9603                            SchedWriteFRsqrt>, EVEX;
9604 defm VRCP28   : avx512_eri<0xCA, "vrcp28", X86rcp28, X86rcp28SAE,
9605                            SchedWriteFRcp>, EVEX;
9606 defm VEXP2    : avx512_eri<0xC8, "vexp2", X86exp2, X86exp2SAE,
9607                            SchedWriteFAdd>, EVEX;
9608}
9609defm VGETEXP   : avx512_eri<0x42, "vgetexp", X86fgetexp, X86fgetexpSAE,
9610                            SchedWriteFRnd>,
9611                 avx512_vgetexp_fp16<0x42, "vgetexp", X86fgetexp, X86fgetexpSAE,
9612                                     SchedWriteFRnd>,
9613                 avx512_fp_unaryop_packed<0x42, "vgetexp", X86fgetexp,
9614                                          SchedWriteFRnd>, EVEX;
9615
9616multiclass avx512_sqrt_packed_round<bits<8> opc, string OpcodeStr,
9617                                    X86FoldableSchedWrite sched, X86VectorVTInfo _>{
9618  let ExeDomain = _.ExeDomain in
9619  defm rb: AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
9620                         (ins _.RC:$src, AVX512RC:$rc), OpcodeStr, "$rc, $src", "$src, $rc",
9621                         (_.VT (X86fsqrtRnd _.RC:$src, (i32 timm:$rc)))>,
9622                         EVEX, EVEX_B, EVEX_RC, Sched<[sched]>;
9623}
9624
9625multiclass avx512_sqrt_packed<bits<8> opc, string OpcodeStr,
9626                              X86FoldableSchedWrite sched, X86VectorVTInfo _>{
9627  let ExeDomain = _.ExeDomain, Uses = [MXCSR], mayRaiseFPException = 1 in {
9628  defm r: AVX512_maskable_split<opc, MRMSrcReg, _, (outs _.RC:$dst),
9629                         (ins _.RC:$src), OpcodeStr, "$src", "$src",
9630                         (_.VT (any_fsqrt _.RC:$src)),
9631                         (_.VT (fsqrt _.RC:$src))>, EVEX,
9632                         Sched<[sched]>;
9633  defm m: AVX512_maskable_split<opc, MRMSrcMem, _, (outs _.RC:$dst),
9634                         (ins _.MemOp:$src), OpcodeStr, "$src", "$src",
9635                         (any_fsqrt (_.VT (_.LdFrag addr:$src))),
9636                         (fsqrt (_.VT (_.LdFrag addr:$src)))>, EVEX,
9637                         Sched<[sched.Folded, sched.ReadAfterFold]>;
9638  defm mb: AVX512_maskable_split<opc, MRMSrcMem, _, (outs _.RC:$dst),
9639                          (ins _.ScalarMemOp:$src), OpcodeStr,
9640                          "${src}"#_.BroadcastStr, "${src}"#_.BroadcastStr,
9641                          (any_fsqrt (_.VT (_.BroadcastLdFrag addr:$src))),
9642                          (fsqrt (_.VT (_.BroadcastLdFrag addr:$src)))>,
9643                          EVEX, EVEX_B, Sched<[sched.Folded, sched.ReadAfterFold]>;
9644  }
9645}
9646
9647let Uses = [MXCSR], mayRaiseFPException = 1 in
9648multiclass avx512_sqrt_packed_all<bits<8> opc, string OpcodeStr,
9649                                  X86SchedWriteSizes sched> {
9650  let Predicates = [HasFP16] in
9651  defm PHZ : avx512_sqrt_packed<opc, !strconcat(OpcodeStr, "ph"),
9652                                sched.PH.ZMM, v32f16_info>,
9653                                EVEX_V512, T_MAP5PS, EVEX_CD8<16, CD8VF>;
9654  let Predicates = [HasFP16, HasVLX] in {
9655    defm PHZ128 : avx512_sqrt_packed<opc, !strconcat(OpcodeStr, "ph"),
9656                                     sched.PH.XMM, v8f16x_info>,
9657                                     EVEX_V128, T_MAP5PS, EVEX_CD8<16, CD8VF>;
9658    defm PHZ256 : avx512_sqrt_packed<opc, !strconcat(OpcodeStr, "ph"),
9659                                     sched.PH.YMM, v16f16x_info>,
9660                                     EVEX_V256, T_MAP5PS, EVEX_CD8<16, CD8VF>;
9661  }
9662  defm PSZ : avx512_sqrt_packed<opc, !strconcat(OpcodeStr, "ps"),
9663                                sched.PS.ZMM, v16f32_info>,
9664                                EVEX_V512, PS, EVEX_CD8<32, CD8VF>;
9665  defm PDZ : avx512_sqrt_packed<opc, !strconcat(OpcodeStr, "pd"),
9666                                sched.PD.ZMM, v8f64_info>,
9667                                EVEX_V512, VEX_W, PD, EVEX_CD8<64, CD8VF>;
9668  // Define only if AVX512VL feature is present.
9669  let Predicates = [HasVLX] in {
9670    defm PSZ128 : avx512_sqrt_packed<opc, !strconcat(OpcodeStr, "ps"),
9671                                     sched.PS.XMM, v4f32x_info>,
9672                                     EVEX_V128, PS, EVEX_CD8<32, CD8VF>;
9673    defm PSZ256 : avx512_sqrt_packed<opc, !strconcat(OpcodeStr, "ps"),
9674                                     sched.PS.YMM, v8f32x_info>,
9675                                     EVEX_V256, PS, EVEX_CD8<32, CD8VF>;
9676    defm PDZ128 : avx512_sqrt_packed<opc, !strconcat(OpcodeStr, "pd"),
9677                                     sched.PD.XMM, v2f64x_info>,
9678                                     EVEX_V128, VEX_W, PD, EVEX_CD8<64, CD8VF>;
9679    defm PDZ256 : avx512_sqrt_packed<opc, !strconcat(OpcodeStr, "pd"),
9680                                     sched.PD.YMM, v4f64x_info>,
9681                                     EVEX_V256, VEX_W, PD, EVEX_CD8<64, CD8VF>;
9682  }
9683}
9684
9685let Uses = [MXCSR] in
9686multiclass avx512_sqrt_packed_all_round<bits<8> opc, string OpcodeStr,
9687                                        X86SchedWriteSizes sched> {
9688  let Predicates = [HasFP16] in
9689  defm PHZ : avx512_sqrt_packed_round<opc, !strconcat(OpcodeStr, "ph"),
9690                                      sched.PH.ZMM, v32f16_info>,
9691                                      EVEX_V512, T_MAP5PS, EVEX_CD8<16, CD8VF>;
9692  defm PSZ : avx512_sqrt_packed_round<opc, !strconcat(OpcodeStr, "ps"),
9693                                      sched.PS.ZMM, v16f32_info>,
9694                                      EVEX_V512, PS, EVEX_CD8<32, CD8VF>;
9695  defm PDZ : avx512_sqrt_packed_round<opc, !strconcat(OpcodeStr, "pd"),
9696                                      sched.PD.ZMM, v8f64_info>,
9697                                      EVEX_V512, VEX_W, PD, EVEX_CD8<64, CD8VF>;
9698}
9699
9700multiclass avx512_sqrt_scalar<bits<8> opc, string OpcodeStr, X86FoldableSchedWrite sched,
9701                              X86VectorVTInfo _, string Name, Predicate prd = HasAVX512> {
9702  let ExeDomain = _.ExeDomain, Predicates = [prd] in {
9703    defm r_Int : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
9704                         (ins _.RC:$src1, _.RC:$src2), OpcodeStr,
9705                         "$src2, $src1", "$src1, $src2",
9706                         (X86fsqrts (_.VT _.RC:$src1),
9707                                    (_.VT _.RC:$src2))>,
9708                         Sched<[sched]>, SIMD_EXC;
9709    defm m_Int : AVX512_maskable_scalar<opc, MRMSrcMem, _, (outs _.RC:$dst),
9710                         (ins _.RC:$src1, _.IntScalarMemOp:$src2), OpcodeStr,
9711                         "$src2, $src1", "$src1, $src2",
9712                         (X86fsqrts (_.VT _.RC:$src1),
9713                                    (_.ScalarIntMemFrags addr:$src2))>,
9714                         Sched<[sched.Folded, sched.ReadAfterFold]>, SIMD_EXC;
9715    let Uses = [MXCSR] in
9716    defm rb_Int : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
9717                         (ins _.RC:$src1, _.RC:$src2, AVX512RC:$rc), OpcodeStr,
9718                         "$rc, $src2, $src1", "$src1, $src2, $rc",
9719                         (X86fsqrtRnds (_.VT _.RC:$src1),
9720                                     (_.VT _.RC:$src2),
9721                                     (i32 timm:$rc))>,
9722                         EVEX_B, EVEX_RC, Sched<[sched]>;
9723
9724    let isCodeGenOnly = 1, hasSideEffects = 0 in {
9725      def r : I<opc, MRMSrcReg, (outs _.FRC:$dst),
9726                (ins _.FRC:$src1, _.FRC:$src2),
9727                OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}", []>,
9728                Sched<[sched]>, SIMD_EXC;
9729      let mayLoad = 1 in
9730        def m : I<opc, MRMSrcMem, (outs _.FRC:$dst),
9731                  (ins _.FRC:$src1, _.ScalarMemOp:$src2),
9732                  OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}", []>,
9733                  Sched<[sched.Folded, sched.ReadAfterFold]>, SIMD_EXC;
9734    }
9735  }
9736
9737  let Predicates = [prd] in {
9738    def : Pat<(_.EltVT (any_fsqrt _.FRC:$src)),
9739              (!cast<Instruction>(Name#Zr)
9740                  (_.EltVT (IMPLICIT_DEF)), _.FRC:$src)>;
9741  }
9742
9743  let Predicates = [prd, OptForSize] in {
9744    def : Pat<(_.EltVT (any_fsqrt (load addr:$src))),
9745              (!cast<Instruction>(Name#Zm)
9746                  (_.EltVT (IMPLICIT_DEF)), addr:$src)>;
9747  }
9748}
9749
9750multiclass avx512_sqrt_scalar_all<bits<8> opc, string OpcodeStr,
9751                                  X86SchedWriteSizes sched> {
9752  defm SHZ : avx512_sqrt_scalar<opc, OpcodeStr#"sh", sched.PH.Scl, f16x_info, NAME#"SH", HasFP16>,
9753                        EVEX_CD8<16, CD8VT1>, EVEX_4V, T_MAP5XS;
9754  defm SSZ : avx512_sqrt_scalar<opc, OpcodeStr#"ss", sched.PS.Scl, f32x_info, NAME#"SS">,
9755                        EVEX_CD8<32, CD8VT1>, EVEX_4V, XS;
9756  defm SDZ : avx512_sqrt_scalar<opc, OpcodeStr#"sd", sched.PD.Scl, f64x_info, NAME#"SD">,
9757                        EVEX_CD8<64, CD8VT1>, EVEX_4V, XD, VEX_W;
9758}
9759
9760defm VSQRT : avx512_sqrt_packed_all<0x51, "vsqrt", SchedWriteFSqrtSizes>,
9761             avx512_sqrt_packed_all_round<0x51, "vsqrt", SchedWriteFSqrtSizes>;
9762
9763defm VSQRT : avx512_sqrt_scalar_all<0x51, "vsqrt", SchedWriteFSqrtSizes>, VEX_LIG;
9764
9765multiclass avx512_rndscale_scalar<bits<8> opc, string OpcodeStr,
9766                                  X86FoldableSchedWrite sched, X86VectorVTInfo _> {
9767  let ExeDomain = _.ExeDomain in {
9768  defm r_Int : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
9769                           (ins _.RC:$src1, _.RC:$src2, i32u8imm:$src3), OpcodeStr,
9770                           "$src3, $src2, $src1", "$src1, $src2, $src3",
9771                           (_.VT (X86RndScales (_.VT _.RC:$src1), (_.VT _.RC:$src2),
9772                           (i32 timm:$src3)))>,
9773                           Sched<[sched]>, SIMD_EXC;
9774
9775  let Uses = [MXCSR] in
9776  defm rb_Int : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
9777                         (ins _.RC:$src1, _.RC:$src2, i32u8imm:$src3), OpcodeStr,
9778                         "$src3, {sae}, $src2, $src1", "$src1, $src2, {sae}, $src3",
9779                         (_.VT (X86RndScalesSAE (_.VT _.RC:$src1), (_.VT _.RC:$src2),
9780                         (i32 timm:$src3)))>, EVEX_B,
9781                         Sched<[sched]>;
9782
9783  defm m_Int : AVX512_maskable_scalar<opc, MRMSrcMem, _, (outs _.RC:$dst),
9784                         (ins _.RC:$src1, _.IntScalarMemOp:$src2, i32u8imm:$src3),
9785                         OpcodeStr,
9786                         "$src3, $src2, $src1", "$src1, $src2, $src3",
9787                         (_.VT (X86RndScales _.RC:$src1,
9788                                (_.ScalarIntMemFrags addr:$src2), (i32 timm:$src3)))>,
9789                         Sched<[sched.Folded, sched.ReadAfterFold]>, SIMD_EXC;
9790
9791  let isCodeGenOnly = 1, hasSideEffects = 0, Predicates = [HasAVX512] in {
9792    def r : I<opc, MRMSrcReg, (outs _.FRC:$dst),
9793               (ins _.FRC:$src1, _.FRC:$src2, i32u8imm:$src3),
9794               OpcodeStr#"\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}",
9795               []>, Sched<[sched]>, SIMD_EXC;
9796
9797    let mayLoad = 1 in
9798      def m : I<opc, MRMSrcMem, (outs _.FRC:$dst),
9799                 (ins _.FRC:$src1, _.ScalarMemOp:$src2, i32u8imm:$src3),
9800                 OpcodeStr#"\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}",
9801                 []>, Sched<[sched.Folded, sched.ReadAfterFold]>, SIMD_EXC;
9802  }
9803  }
9804
9805  let Predicates = [HasAVX512] in {
9806    def : Pat<(X86any_VRndScale _.FRC:$src1, timm:$src2),
9807              (_.EltVT (!cast<Instruction>(NAME#r) (_.EltVT (IMPLICIT_DEF)),
9808               _.FRC:$src1, timm:$src2))>;
9809  }
9810
9811  let Predicates = [HasAVX512, OptForSize] in {
9812    def : Pat<(X86any_VRndScale (_.ScalarLdFrag addr:$src1), timm:$src2),
9813              (_.EltVT (!cast<Instruction>(NAME#m) (_.EltVT (IMPLICIT_DEF)),
9814               addr:$src1, timm:$src2))>;
9815  }
9816}
9817
9818let Predicates = [HasFP16] in
9819defm VRNDSCALESHZ : avx512_rndscale_scalar<0x0A, "vrndscalesh",
9820                                           SchedWriteFRnd.Scl, f16x_info>,
9821                                           AVX512PSIi8Base, TA, EVEX_4V,
9822                                           EVEX_CD8<16, CD8VT1>;
9823
9824defm VRNDSCALESSZ : avx512_rndscale_scalar<0x0A, "vrndscaless",
9825                                           SchedWriteFRnd.Scl, f32x_info>,
9826                                           AVX512AIi8Base, EVEX_4V, VEX_LIG,
9827                                           EVEX_CD8<32, CD8VT1>;
9828
9829defm VRNDSCALESDZ : avx512_rndscale_scalar<0x0B, "vrndscalesd",
9830                                           SchedWriteFRnd.Scl, f64x_info>,
9831                                           VEX_W, AVX512AIi8Base, EVEX_4V, VEX_LIG,
9832                                           EVEX_CD8<64, CD8VT1>;
9833
9834multiclass avx512_masked_scalar<SDNode OpNode, string OpcPrefix, SDNode Move,
9835                                dag Mask, X86VectorVTInfo _, PatLeaf ZeroFP,
9836                                dag OutMask, Predicate BasePredicate> {
9837  let Predicates = [BasePredicate] in {
9838    def : Pat<(Move _.VT:$src1, (scalar_to_vector (X86selects_mask Mask,
9839               (OpNode (extractelt _.VT:$src2, (iPTR 0))),
9840               (extractelt _.VT:$dst, (iPTR 0))))),
9841              (!cast<Instruction>("V"#OpcPrefix#r_Intk)
9842               _.VT:$dst, OutMask, _.VT:$src2, _.VT:$src1)>;
9843
9844    def : Pat<(Move _.VT:$src1, (scalar_to_vector (X86selects_mask Mask,
9845               (OpNode (extractelt _.VT:$src2, (iPTR 0))),
9846               ZeroFP))),
9847              (!cast<Instruction>("V"#OpcPrefix#r_Intkz)
9848               OutMask, _.VT:$src2, _.VT:$src1)>;
9849  }
9850}
9851
9852defm : avx512_masked_scalar<fsqrt, "SQRTSHZ", X86Movsh,
9853                            (v1i1 (scalar_to_vector (i8 (trunc (i32 GR32:$mask))))), v8f16x_info,
9854                            fp16imm0, (COPY_TO_REGCLASS  $mask, VK1WM), HasFP16>;
9855defm : avx512_masked_scalar<fsqrt, "SQRTSSZ", X86Movss,
9856                            (v1i1 (scalar_to_vector (i8 (trunc (i32 GR32:$mask))))), v4f32x_info,
9857                            fp32imm0, (COPY_TO_REGCLASS  $mask, VK1WM), HasAVX512>;
9858defm : avx512_masked_scalar<fsqrt, "SQRTSDZ", X86Movsd,
9859                            (v1i1 (scalar_to_vector (i8 (trunc (i32 GR32:$mask))))), v2f64x_info,
9860                            fp64imm0, (COPY_TO_REGCLASS  $mask, VK1WM), HasAVX512>;
9861
9862
9863//-------------------------------------------------
9864// Integer truncate and extend operations
9865//-------------------------------------------------
9866
9867// PatFrags that contain a select and a truncate op. The take operands in the
9868// same order as X86vmtrunc, X86vmtruncs, X86vmtruncus. This allows us to pass
9869// either to the multiclasses.
9870def select_trunc : PatFrag<(ops node:$src, node:$src0, node:$mask),
9871                           (vselect_mask node:$mask,
9872                                         (trunc node:$src), node:$src0)>;
9873def select_truncs : PatFrag<(ops node:$src, node:$src0, node:$mask),
9874                            (vselect_mask node:$mask,
9875                                          (X86vtruncs node:$src), node:$src0)>;
9876def select_truncus : PatFrag<(ops node:$src, node:$src0, node:$mask),
9877                             (vselect_mask node:$mask,
9878                                           (X86vtruncus node:$src), node:$src0)>;
9879
9880multiclass avx512_trunc_common<bits<8> opc, string OpcodeStr, SDNode OpNode,
9881                              SDPatternOperator MaskNode,
9882                              X86FoldableSchedWrite sched, X86VectorVTInfo SrcInfo,
9883                              X86VectorVTInfo DestInfo, X86MemOperand x86memop> {
9884  let ExeDomain = DestInfo.ExeDomain in {
9885  def rr : AVX512XS8I<opc, MRMDestReg, (outs DestInfo.RC:$dst),
9886             (ins SrcInfo.RC:$src),
9887             OpcodeStr # "\t{$src, $dst|$dst, $src}",
9888             [(set DestInfo.RC:$dst,
9889                   (DestInfo.VT (OpNode (SrcInfo.VT SrcInfo.RC:$src))))]>,
9890             EVEX, Sched<[sched]>;
9891  let Constraints = "$src0 = $dst" in
9892  def rrk : AVX512XS8I<opc, MRMDestReg, (outs DestInfo.RC:$dst),
9893             (ins DestInfo.RC:$src0, SrcInfo.KRCWM:$mask, SrcInfo.RC:$src),
9894             OpcodeStr # "\t{$src, $dst {${mask}}|$dst {${mask}}, $src}",
9895             [(set DestInfo.RC:$dst,
9896                   (MaskNode (SrcInfo.VT SrcInfo.RC:$src),
9897                             (DestInfo.VT DestInfo.RC:$src0),
9898                             SrcInfo.KRCWM:$mask))]>,
9899             EVEX, EVEX_K, Sched<[sched]>;
9900  def rrkz : AVX512XS8I<opc, MRMDestReg, (outs DestInfo.RC:$dst),
9901             (ins SrcInfo.KRCWM:$mask, SrcInfo.RC:$src),
9902             OpcodeStr # "\t{$src, $dst {${mask}} {z}|$dst {${mask}} {z}, $src}",
9903             [(set DestInfo.RC:$dst,
9904                   (DestInfo.VT (MaskNode (SrcInfo.VT SrcInfo.RC:$src),
9905                             DestInfo.ImmAllZerosV, SrcInfo.KRCWM:$mask)))]>,
9906             EVEX, EVEX_KZ, Sched<[sched]>;
9907  }
9908
9909  let mayStore = 1, hasSideEffects = 0, ExeDomain = DestInfo.ExeDomain in {
9910    def mr : AVX512XS8I<opc, MRMDestMem, (outs),
9911               (ins x86memop:$dst, SrcInfo.RC:$src),
9912               OpcodeStr # "\t{$src, $dst|$dst, $src}", []>,
9913               EVEX, Sched<[sched.Folded]>;
9914
9915    def mrk : AVX512XS8I<opc, MRMDestMem, (outs),
9916               (ins x86memop:$dst, SrcInfo.KRCWM:$mask, SrcInfo.RC:$src),
9917               OpcodeStr # "\t{$src, $dst {${mask}}|$dst {${mask}}, $src}", []>,
9918               EVEX, EVEX_K, Sched<[sched.Folded]>, NotMemoryFoldable;
9919  }//mayStore = 1, hasSideEffects = 0
9920}
9921
9922multiclass avx512_trunc_mr_lowering<X86VectorVTInfo SrcInfo,
9923                                    PatFrag truncFrag, PatFrag mtruncFrag,
9924                                    string Name> {
9925
9926  def : Pat<(truncFrag (SrcInfo.VT SrcInfo.RC:$src), addr:$dst),
9927            (!cast<Instruction>(Name#SrcInfo.ZSuffix#mr)
9928                                    addr:$dst, SrcInfo.RC:$src)>;
9929
9930  def : Pat<(mtruncFrag (SrcInfo.VT SrcInfo.RC:$src), addr:$dst,
9931                        SrcInfo.KRCWM:$mask),
9932            (!cast<Instruction>(Name#SrcInfo.ZSuffix#mrk)
9933                            addr:$dst, SrcInfo.KRCWM:$mask, SrcInfo.RC:$src)>;
9934}
9935
9936multiclass avx512_trunc<bits<8> opc, string OpcodeStr, SDNode OpNode128,
9937                        SDNode OpNode256, SDNode OpNode512,
9938                        SDPatternOperator MaskNode128,
9939                        SDPatternOperator MaskNode256,
9940                        SDPatternOperator MaskNode512,
9941                        X86SchedWriteWidths sched,
9942                        AVX512VLVectorVTInfo VTSrcInfo,
9943                        X86VectorVTInfo DestInfoZ128,
9944                        X86VectorVTInfo DestInfoZ256, X86VectorVTInfo DestInfoZ,
9945                        X86MemOperand x86memopZ128, X86MemOperand x86memopZ256,
9946                        X86MemOperand x86memopZ, PatFrag truncFrag,
9947                        PatFrag mtruncFrag, Predicate prd = HasAVX512>{
9948
9949  let Predicates = [HasVLX, prd] in {
9950    defm Z128:  avx512_trunc_common<opc, OpcodeStr, OpNode128, MaskNode128, sched.XMM,
9951                             VTSrcInfo.info128, DestInfoZ128, x86memopZ128>,
9952                avx512_trunc_mr_lowering<VTSrcInfo.info128, truncFrag,
9953                                         mtruncFrag, NAME>, EVEX_V128;
9954
9955    defm Z256:  avx512_trunc_common<opc, OpcodeStr, OpNode256, MaskNode256, sched.YMM,
9956                             VTSrcInfo.info256, DestInfoZ256, x86memopZ256>,
9957                avx512_trunc_mr_lowering<VTSrcInfo.info256, truncFrag,
9958                                         mtruncFrag, NAME>, EVEX_V256;
9959  }
9960  let Predicates = [prd] in
9961    defm Z:     avx512_trunc_common<opc, OpcodeStr, OpNode512, MaskNode512, sched.ZMM,
9962                             VTSrcInfo.info512, DestInfoZ, x86memopZ>,
9963                avx512_trunc_mr_lowering<VTSrcInfo.info512, truncFrag,
9964                                         mtruncFrag, NAME>, EVEX_V512;
9965}
9966
9967multiclass avx512_trunc_qb<bits<8> opc, string OpcodeStr,
9968                           X86SchedWriteWidths sched, PatFrag StoreNode,
9969                           PatFrag MaskedStoreNode, SDNode InVecNode,
9970                           SDPatternOperator InVecMaskNode> {
9971  defm NAME: avx512_trunc<opc, OpcodeStr, InVecNode, InVecNode, InVecNode,
9972                          InVecMaskNode, InVecMaskNode, InVecMaskNode, sched,
9973                          avx512vl_i64_info, v16i8x_info, v16i8x_info,
9974                          v16i8x_info, i16mem, i32mem, i64mem, StoreNode,
9975                          MaskedStoreNode>, EVEX_CD8<8, CD8VO>;
9976}
9977
9978multiclass avx512_trunc_qw<bits<8> opc, string OpcodeStr, SDNode OpNode,
9979                           SDPatternOperator MaskNode,
9980                           X86SchedWriteWidths sched, PatFrag StoreNode,
9981                           PatFrag MaskedStoreNode, SDNode InVecNode,
9982                           SDPatternOperator InVecMaskNode> {
9983  defm NAME: avx512_trunc<opc, OpcodeStr, InVecNode, InVecNode, OpNode,
9984                          InVecMaskNode, InVecMaskNode, MaskNode, sched,
9985                          avx512vl_i64_info, v8i16x_info, v8i16x_info,
9986                          v8i16x_info, i32mem, i64mem, i128mem, StoreNode,
9987                          MaskedStoreNode>, EVEX_CD8<16, CD8VQ>;
9988}
9989
9990multiclass avx512_trunc_qd<bits<8> opc, string OpcodeStr, SDNode OpNode,
9991                           SDPatternOperator MaskNode,
9992                           X86SchedWriteWidths sched, PatFrag StoreNode,
9993                           PatFrag MaskedStoreNode, SDNode InVecNode,
9994                           SDPatternOperator InVecMaskNode> {
9995  defm NAME: avx512_trunc<opc, OpcodeStr, InVecNode, OpNode, OpNode,
9996                          InVecMaskNode, MaskNode, MaskNode, sched,
9997                          avx512vl_i64_info, v4i32x_info, v4i32x_info,
9998                          v8i32x_info, i64mem, i128mem, i256mem, StoreNode,
9999                          MaskedStoreNode>, EVEX_CD8<32, CD8VH>;
10000}
10001
10002multiclass avx512_trunc_db<bits<8> opc, string OpcodeStr, SDNode OpNode,
10003                           SDPatternOperator MaskNode,
10004                           X86SchedWriteWidths sched, PatFrag StoreNode,
10005                           PatFrag MaskedStoreNode, SDNode InVecNode,
10006                           SDPatternOperator InVecMaskNode> {
10007  defm NAME: avx512_trunc<opc, OpcodeStr, InVecNode, InVecNode, OpNode,
10008                          InVecMaskNode, InVecMaskNode, MaskNode, sched,
10009                          avx512vl_i32_info, v16i8x_info, v16i8x_info,
10010                          v16i8x_info, i32mem, i64mem, i128mem, StoreNode,
10011                          MaskedStoreNode>, EVEX_CD8<8, CD8VQ>;
10012}
10013
10014multiclass avx512_trunc_dw<bits<8> opc, string OpcodeStr, SDNode OpNode,
10015                           SDPatternOperator MaskNode,
10016                           X86SchedWriteWidths sched, PatFrag StoreNode,
10017                           PatFrag MaskedStoreNode, SDNode InVecNode,
10018                           SDPatternOperator InVecMaskNode> {
10019  defm NAME: avx512_trunc<opc, OpcodeStr, InVecNode, OpNode, OpNode,
10020                          InVecMaskNode, MaskNode, MaskNode, sched,
10021                          avx512vl_i32_info, v8i16x_info, v8i16x_info,
10022                          v16i16x_info, i64mem, i128mem, i256mem, StoreNode,
10023                          MaskedStoreNode>, EVEX_CD8<16, CD8VH>;
10024}
10025
10026multiclass avx512_trunc_wb<bits<8> opc, string OpcodeStr, SDNode OpNode,
10027                           SDPatternOperator MaskNode,
10028                           X86SchedWriteWidths sched, PatFrag StoreNode,
10029                           PatFrag MaskedStoreNode, SDNode InVecNode,
10030                           SDPatternOperator InVecMaskNode> {
10031  defm NAME: avx512_trunc<opc, OpcodeStr, InVecNode, OpNode, OpNode,
10032                          InVecMaskNode, MaskNode, MaskNode, sched,
10033                          avx512vl_i16_info, v16i8x_info, v16i8x_info,
10034                          v32i8x_info, i64mem, i128mem, i256mem, StoreNode,
10035                          MaskedStoreNode, HasBWI>, EVEX_CD8<16, CD8VH>;
10036}
10037
10038defm VPMOVQB    : avx512_trunc_qb<0x32, "vpmovqb",
10039                                  SchedWriteVecTruncate, truncstorevi8,
10040                                  masked_truncstorevi8, X86vtrunc, X86vmtrunc>;
10041defm VPMOVSQB   : avx512_trunc_qb<0x22, "vpmovsqb",
10042                                  SchedWriteVecTruncate, truncstore_s_vi8,
10043                                  masked_truncstore_s_vi8, X86vtruncs,
10044                                  X86vmtruncs>;
10045defm VPMOVUSQB  : avx512_trunc_qb<0x12, "vpmovusqb",
10046                                  SchedWriteVecTruncate, truncstore_us_vi8,
10047                                  masked_truncstore_us_vi8, X86vtruncus, X86vmtruncus>;
10048
10049defm VPMOVQW    : avx512_trunc_qw<0x34, "vpmovqw", trunc, select_trunc,
10050                                  SchedWriteVecTruncate, truncstorevi16,
10051                                  masked_truncstorevi16, X86vtrunc, X86vmtrunc>;
10052defm VPMOVSQW   : avx512_trunc_qw<0x24, "vpmovsqw",  X86vtruncs, select_truncs,
10053                                  SchedWriteVecTruncate, truncstore_s_vi16,
10054                                  masked_truncstore_s_vi16, X86vtruncs,
10055                                  X86vmtruncs>;
10056defm VPMOVUSQW  : avx512_trunc_qw<0x14, "vpmovusqw", X86vtruncus,
10057                                  select_truncus, SchedWriteVecTruncate,
10058                                  truncstore_us_vi16, masked_truncstore_us_vi16,
10059                                  X86vtruncus, X86vmtruncus>;
10060
10061defm VPMOVQD    : avx512_trunc_qd<0x35, "vpmovqd", trunc, select_trunc,
10062                                  SchedWriteVecTruncate, truncstorevi32,
10063                                  masked_truncstorevi32, X86vtrunc, X86vmtrunc>;
10064defm VPMOVSQD   : avx512_trunc_qd<0x25, "vpmovsqd",  X86vtruncs, select_truncs,
10065                                  SchedWriteVecTruncate, truncstore_s_vi32,
10066                                  masked_truncstore_s_vi32, X86vtruncs,
10067                                  X86vmtruncs>;
10068defm VPMOVUSQD  : avx512_trunc_qd<0x15, "vpmovusqd", X86vtruncus,
10069                                  select_truncus, SchedWriteVecTruncate,
10070                                  truncstore_us_vi32, masked_truncstore_us_vi32,
10071                                  X86vtruncus, X86vmtruncus>;
10072
10073defm VPMOVDB    : avx512_trunc_db<0x31, "vpmovdb", trunc, select_trunc,
10074                                  SchedWriteVecTruncate, truncstorevi8,
10075                                  masked_truncstorevi8, X86vtrunc, X86vmtrunc>;
10076defm VPMOVSDB   : avx512_trunc_db<0x21, "vpmovsdb", X86vtruncs, select_truncs,
10077                                  SchedWriteVecTruncate, truncstore_s_vi8,
10078                                  masked_truncstore_s_vi8, X86vtruncs,
10079                                  X86vmtruncs>;
10080defm VPMOVUSDB  : avx512_trunc_db<0x11, "vpmovusdb",  X86vtruncus,
10081                                  select_truncus, SchedWriteVecTruncate,
10082                                  truncstore_us_vi8, masked_truncstore_us_vi8,
10083                                  X86vtruncus, X86vmtruncus>;
10084
10085defm VPMOVDW    : avx512_trunc_dw<0x33, "vpmovdw", trunc, select_trunc,
10086                                  SchedWriteVecTruncate, truncstorevi16,
10087                                  masked_truncstorevi16, X86vtrunc, X86vmtrunc>;
10088defm VPMOVSDW   : avx512_trunc_dw<0x23, "vpmovsdw", X86vtruncs, select_truncs,
10089                                  SchedWriteVecTruncate, truncstore_s_vi16,
10090                                  masked_truncstore_s_vi16, X86vtruncs,
10091                                  X86vmtruncs>;
10092defm VPMOVUSDW  : avx512_trunc_dw<0x13, "vpmovusdw", X86vtruncus,
10093                                  select_truncus, SchedWriteVecTruncate,
10094                                  truncstore_us_vi16, masked_truncstore_us_vi16,
10095                                  X86vtruncus, X86vmtruncus>;
10096
10097defm VPMOVWB    : avx512_trunc_wb<0x30, "vpmovwb", trunc, select_trunc,
10098                                  SchedWriteVecTruncate, truncstorevi8,
10099                                  masked_truncstorevi8, X86vtrunc,
10100                                  X86vmtrunc>;
10101defm VPMOVSWB   : avx512_trunc_wb<0x20, "vpmovswb", X86vtruncs, select_truncs,
10102                                  SchedWriteVecTruncate, truncstore_s_vi8,
10103                                  masked_truncstore_s_vi8, X86vtruncs,
10104                                  X86vmtruncs>;
10105defm VPMOVUSWB  : avx512_trunc_wb<0x10, "vpmovuswb", X86vtruncus,
10106                                  select_truncus, SchedWriteVecTruncate,
10107                                  truncstore_us_vi8, masked_truncstore_us_vi8,
10108                                  X86vtruncus, X86vmtruncus>;
10109
10110let Predicates = [HasAVX512, NoVLX] in {
10111def: Pat<(v8i16 (trunc (v8i32 VR256X:$src))),
10112         (v8i16 (EXTRACT_SUBREG
10113                 (v16i16 (VPMOVDWZrr (v16i32 (INSERT_SUBREG (IMPLICIT_DEF),
10114                                          VR256X:$src, sub_ymm)))), sub_xmm))>;
10115def: Pat<(v4i32 (trunc (v4i64 VR256X:$src))),
10116         (v4i32 (EXTRACT_SUBREG
10117                 (v8i32 (VPMOVQDZrr (v8i64 (INSERT_SUBREG (IMPLICIT_DEF),
10118                                           VR256X:$src, sub_ymm)))), sub_xmm))>;
10119}
10120
10121let Predicates = [HasBWI, NoVLX] in {
10122def: Pat<(v16i8 (trunc (v16i16 VR256X:$src))),
10123         (v16i8 (EXTRACT_SUBREG (VPMOVWBZrr (v32i16 (INSERT_SUBREG (IMPLICIT_DEF),
10124                                            VR256X:$src, sub_ymm))), sub_xmm))>;
10125}
10126
10127// Without BWI we can't use vXi16/vXi8 vselect so we have to use vmtrunc nodes.
10128multiclass mtrunc_lowering<string InstrName, SDNode OpNode,
10129                           X86VectorVTInfo DestInfo,
10130                           X86VectorVTInfo SrcInfo> {
10131  def : Pat<(DestInfo.VT (OpNode (SrcInfo.VT SrcInfo.RC:$src),
10132                                 DestInfo.RC:$src0,
10133                                 SrcInfo.KRCWM:$mask)),
10134            (!cast<Instruction>(InstrName#"rrk") DestInfo.RC:$src0,
10135                                                 SrcInfo.KRCWM:$mask,
10136                                                 SrcInfo.RC:$src)>;
10137
10138  def : Pat<(DestInfo.VT (OpNode (SrcInfo.VT SrcInfo.RC:$src),
10139                                 DestInfo.ImmAllZerosV,
10140                                 SrcInfo.KRCWM:$mask)),
10141            (!cast<Instruction>(InstrName#"rrkz") SrcInfo.KRCWM:$mask,
10142                                                  SrcInfo.RC:$src)>;
10143}
10144
10145let Predicates = [HasVLX] in {
10146defm : mtrunc_lowering<"VPMOVDWZ256", X86vmtrunc, v8i16x_info, v8i32x_info>;
10147defm : mtrunc_lowering<"VPMOVSDWZ256", X86vmtruncs, v8i16x_info, v8i32x_info>;
10148defm : mtrunc_lowering<"VPMOVUSDWZ256", X86vmtruncus, v8i16x_info, v8i32x_info>;
10149}
10150
10151let Predicates = [HasAVX512] in {
10152defm : mtrunc_lowering<"VPMOVDWZ", X86vmtrunc, v16i16x_info, v16i32_info>;
10153defm : mtrunc_lowering<"VPMOVSDWZ", X86vmtruncs, v16i16x_info, v16i32_info>;
10154defm : mtrunc_lowering<"VPMOVUSDWZ", X86vmtruncus, v16i16x_info, v16i32_info>;
10155
10156defm : mtrunc_lowering<"VPMOVDBZ", X86vmtrunc, v16i8x_info, v16i32_info>;
10157defm : mtrunc_lowering<"VPMOVSDBZ", X86vmtruncs, v16i8x_info, v16i32_info>;
10158defm : mtrunc_lowering<"VPMOVUSDBZ", X86vmtruncus, v16i8x_info, v16i32_info>;
10159
10160defm : mtrunc_lowering<"VPMOVQWZ", X86vmtrunc, v8i16x_info, v8i64_info>;
10161defm : mtrunc_lowering<"VPMOVSQWZ", X86vmtruncs, v8i16x_info, v8i64_info>;
10162defm : mtrunc_lowering<"VPMOVUSQWZ", X86vmtruncus, v8i16x_info, v8i64_info>;
10163}
10164
10165multiclass avx512_pmovx_common<bits<8> opc, string OpcodeStr, X86FoldableSchedWrite sched,
10166              X86VectorVTInfo DestInfo, X86VectorVTInfo SrcInfo,
10167              X86MemOperand x86memop, PatFrag LdFrag, SDNode OpNode>{
10168  let ExeDomain = DestInfo.ExeDomain in {
10169  defm rr   : AVX512_maskable<opc, MRMSrcReg, DestInfo, (outs DestInfo.RC:$dst),
10170                    (ins SrcInfo.RC:$src), OpcodeStr ,"$src", "$src",
10171                    (DestInfo.VT (OpNode (SrcInfo.VT SrcInfo.RC:$src)))>,
10172                  EVEX, Sched<[sched]>;
10173
10174  defm rm : AVX512_maskable<opc, MRMSrcMem, DestInfo, (outs DestInfo.RC:$dst),
10175                  (ins x86memop:$src), OpcodeStr ,"$src", "$src",
10176                  (DestInfo.VT (LdFrag addr:$src))>,
10177                EVEX, Sched<[sched.Folded]>;
10178  }
10179}
10180
10181multiclass avx512_pmovx_bw<bits<8> opc, string OpcodeStr,
10182          SDNode OpNode, SDNode InVecNode, string ExtTy,
10183          X86SchedWriteWidths sched,
10184          PatFrag LdFrag = !cast<PatFrag>(ExtTy#"extloadvi8")> {
10185  let Predicates = [HasVLX, HasBWI] in {
10186    defm Z128:  avx512_pmovx_common<opc, OpcodeStr, sched.XMM, v8i16x_info,
10187                    v16i8x_info, i64mem, LdFrag, InVecNode>,
10188                     EVEX_CD8<8, CD8VH>, T8PD, EVEX_V128, VEX_WIG;
10189
10190    defm Z256:  avx512_pmovx_common<opc, OpcodeStr, sched.YMM, v16i16x_info,
10191                    v16i8x_info, i128mem, LdFrag, OpNode>,
10192                     EVEX_CD8<8, CD8VH>, T8PD, EVEX_V256, VEX_WIG;
10193  }
10194  let Predicates = [HasBWI] in {
10195    defm Z   :  avx512_pmovx_common<opc, OpcodeStr, sched.ZMM, v32i16_info,
10196                    v32i8x_info, i256mem, LdFrag, OpNode>,
10197                     EVEX_CD8<8, CD8VH>, T8PD, EVEX_V512, VEX_WIG;
10198  }
10199}
10200
10201multiclass avx512_pmovx_bd<bits<8> opc, string OpcodeStr,
10202          SDNode OpNode, SDNode InVecNode, string ExtTy,
10203          X86SchedWriteWidths sched,
10204          PatFrag LdFrag = !cast<PatFrag>(ExtTy#"extloadvi8")> {
10205  let Predicates = [HasVLX, HasAVX512] in {
10206    defm Z128:  avx512_pmovx_common<opc, OpcodeStr, sched.XMM, v4i32x_info,
10207                   v16i8x_info, i32mem, LdFrag, InVecNode>,
10208                         EVEX_CD8<8, CD8VQ>, T8PD, EVEX_V128, VEX_WIG;
10209
10210    defm Z256:  avx512_pmovx_common<opc, OpcodeStr, sched.YMM, v8i32x_info,
10211                   v16i8x_info, i64mem, LdFrag, InVecNode>,
10212                         EVEX_CD8<8, CD8VQ>, T8PD, EVEX_V256, VEX_WIG;
10213  }
10214  let Predicates = [HasAVX512] in {
10215    defm Z   :  avx512_pmovx_common<opc, OpcodeStr, sched.ZMM, v16i32_info,
10216                   v16i8x_info, i128mem, LdFrag, OpNode>,
10217                         EVEX_CD8<8, CD8VQ>, T8PD, EVEX_V512, VEX_WIG;
10218  }
10219}
10220
10221multiclass avx512_pmovx_bq<bits<8> opc, string OpcodeStr,
10222                              SDNode InVecNode, string ExtTy,
10223                              X86SchedWriteWidths sched,
10224                              PatFrag LdFrag = !cast<PatFrag>(ExtTy#"extloadvi8")> {
10225  let Predicates = [HasVLX, HasAVX512] in {
10226    defm Z128:  avx512_pmovx_common<opc, OpcodeStr, sched.XMM, v2i64x_info,
10227                   v16i8x_info, i16mem, LdFrag, InVecNode>,
10228                     EVEX_CD8<8, CD8VO>, T8PD, EVEX_V128, VEX_WIG;
10229
10230    defm Z256:  avx512_pmovx_common<opc, OpcodeStr, sched.YMM, v4i64x_info,
10231                   v16i8x_info, i32mem, LdFrag, InVecNode>,
10232                     EVEX_CD8<8, CD8VO>, T8PD, EVEX_V256, VEX_WIG;
10233  }
10234  let Predicates = [HasAVX512] in {
10235    defm Z   :  avx512_pmovx_common<opc, OpcodeStr, sched.ZMM, v8i64_info,
10236                   v16i8x_info, i64mem, LdFrag, InVecNode>,
10237                     EVEX_CD8<8, CD8VO>, T8PD, EVEX_V512, VEX_WIG;
10238  }
10239}
10240
10241multiclass avx512_pmovx_wd<bits<8> opc, string OpcodeStr,
10242         SDNode OpNode, SDNode InVecNode, string ExtTy,
10243         X86SchedWriteWidths sched,
10244         PatFrag LdFrag = !cast<PatFrag>(ExtTy#"extloadvi16")> {
10245  let Predicates = [HasVLX, HasAVX512] in {
10246    defm Z128:  avx512_pmovx_common<opc, OpcodeStr, sched.XMM, v4i32x_info,
10247                   v8i16x_info, i64mem, LdFrag, InVecNode>,
10248                     EVEX_CD8<16, CD8VH>, T8PD, EVEX_V128, VEX_WIG;
10249
10250    defm Z256:  avx512_pmovx_common<opc, OpcodeStr, sched.YMM, v8i32x_info,
10251                   v8i16x_info, i128mem, LdFrag, OpNode>,
10252                     EVEX_CD8<16, CD8VH>, T8PD, EVEX_V256, VEX_WIG;
10253  }
10254  let Predicates = [HasAVX512] in {
10255    defm Z   :  avx512_pmovx_common<opc, OpcodeStr, sched.ZMM, v16i32_info,
10256                   v16i16x_info, i256mem, LdFrag, OpNode>,
10257                     EVEX_CD8<16, CD8VH>, T8PD, EVEX_V512, VEX_WIG;
10258  }
10259}
10260
10261multiclass avx512_pmovx_wq<bits<8> opc, string OpcodeStr,
10262         SDNode OpNode, SDNode InVecNode, string ExtTy,
10263         X86SchedWriteWidths sched,
10264         PatFrag LdFrag = !cast<PatFrag>(ExtTy#"extloadvi16")> {
10265  let Predicates = [HasVLX, HasAVX512] in {
10266    defm Z128:  avx512_pmovx_common<opc, OpcodeStr, sched.XMM, v2i64x_info,
10267                   v8i16x_info, i32mem, LdFrag, InVecNode>,
10268                     EVEX_CD8<16, CD8VQ>, T8PD, EVEX_V128, VEX_WIG;
10269
10270    defm Z256:  avx512_pmovx_common<opc, OpcodeStr, sched.YMM, v4i64x_info,
10271                   v8i16x_info, i64mem, LdFrag, InVecNode>,
10272                     EVEX_CD8<16, CD8VQ>, T8PD, EVEX_V256, VEX_WIG;
10273  }
10274  let Predicates = [HasAVX512] in {
10275    defm Z   :  avx512_pmovx_common<opc, OpcodeStr, sched.ZMM, v8i64_info,
10276                   v8i16x_info, i128mem, LdFrag, OpNode>,
10277                     EVEX_CD8<16, CD8VQ>, T8PD, EVEX_V512, VEX_WIG;
10278  }
10279}
10280
10281multiclass avx512_pmovx_dq<bits<8> opc, string OpcodeStr,
10282         SDNode OpNode, SDNode InVecNode, string ExtTy,
10283         X86SchedWriteWidths sched,
10284         PatFrag LdFrag = !cast<PatFrag>(ExtTy#"extloadvi32")> {
10285
10286  let Predicates = [HasVLX, HasAVX512] in {
10287    defm Z128:  avx512_pmovx_common<opc, OpcodeStr, sched.XMM, v2i64x_info,
10288                   v4i32x_info, i64mem, LdFrag, InVecNode>,
10289                     EVEX_CD8<32, CD8VH>, T8PD, EVEX_V128;
10290
10291    defm Z256:  avx512_pmovx_common<opc, OpcodeStr, sched.YMM, v4i64x_info,
10292                   v4i32x_info, i128mem, LdFrag, OpNode>,
10293                     EVEX_CD8<32, CD8VH>, T8PD, EVEX_V256;
10294  }
10295  let Predicates = [HasAVX512] in {
10296    defm Z   :  avx512_pmovx_common<opc, OpcodeStr, sched.ZMM, v8i64_info,
10297                   v8i32x_info, i256mem, LdFrag, OpNode>,
10298                     EVEX_CD8<32, CD8VH>, T8PD, EVEX_V512;
10299  }
10300}
10301
10302defm VPMOVZXBW : avx512_pmovx_bw<0x30, "vpmovzxbw", zext, zext_invec, "z", SchedWriteVecExtend>;
10303defm VPMOVZXBD : avx512_pmovx_bd<0x31, "vpmovzxbd", zext, zext_invec, "z", SchedWriteVecExtend>;
10304defm VPMOVZXBQ : avx512_pmovx_bq<0x32, "vpmovzxbq",       zext_invec, "z", SchedWriteVecExtend>;
10305defm VPMOVZXWD : avx512_pmovx_wd<0x33, "vpmovzxwd", zext, zext_invec, "z", SchedWriteVecExtend>;
10306defm VPMOVZXWQ : avx512_pmovx_wq<0x34, "vpmovzxwq", zext, zext_invec, "z", SchedWriteVecExtend>;
10307defm VPMOVZXDQ : avx512_pmovx_dq<0x35, "vpmovzxdq", zext, zext_invec, "z", SchedWriteVecExtend>;
10308
10309defm VPMOVSXBW: avx512_pmovx_bw<0x20, "vpmovsxbw", sext, sext_invec, "s", SchedWriteVecExtend>;
10310defm VPMOVSXBD: avx512_pmovx_bd<0x21, "vpmovsxbd", sext, sext_invec, "s", SchedWriteVecExtend>;
10311defm VPMOVSXBQ: avx512_pmovx_bq<0x22, "vpmovsxbq",       sext_invec, "s", SchedWriteVecExtend>;
10312defm VPMOVSXWD: avx512_pmovx_wd<0x23, "vpmovsxwd", sext, sext_invec, "s", SchedWriteVecExtend>;
10313defm VPMOVSXWQ: avx512_pmovx_wq<0x24, "vpmovsxwq", sext, sext_invec, "s", SchedWriteVecExtend>;
10314defm VPMOVSXDQ: avx512_pmovx_dq<0x25, "vpmovsxdq", sext, sext_invec, "s", SchedWriteVecExtend>;
10315
10316
10317// Patterns that we also need any extend versions of. aext_vector_inreg
10318// is currently legalized to zext_vector_inreg.
10319multiclass AVX512_pmovx_patterns_base<string OpcPrefix, SDNode ExtOp> {
10320  // 256-bit patterns
10321  let Predicates = [HasVLX, HasBWI] in {
10322    def : Pat<(v16i16 (ExtOp (loadv16i8 addr:$src))),
10323              (!cast<I>(OpcPrefix#BWZ256rm) addr:$src)>;
10324  }
10325
10326  let Predicates = [HasVLX] in {
10327    def : Pat<(v8i32 (ExtOp (loadv8i16 addr:$src))),
10328              (!cast<I>(OpcPrefix#WDZ256rm) addr:$src)>;
10329
10330    def : Pat<(v4i64 (ExtOp (loadv4i32 addr:$src))),
10331              (!cast<I>(OpcPrefix#DQZ256rm) addr:$src)>;
10332  }
10333
10334  // 512-bit patterns
10335  let Predicates = [HasBWI] in {
10336    def : Pat<(v32i16 (ExtOp (loadv32i8 addr:$src))),
10337              (!cast<I>(OpcPrefix#BWZrm) addr:$src)>;
10338  }
10339  let Predicates = [HasAVX512] in {
10340    def : Pat<(v16i32 (ExtOp (loadv16i8 addr:$src))),
10341              (!cast<I>(OpcPrefix#BDZrm) addr:$src)>;
10342    def : Pat<(v16i32 (ExtOp (loadv16i16 addr:$src))),
10343              (!cast<I>(OpcPrefix#WDZrm) addr:$src)>;
10344
10345    def : Pat<(v8i64 (ExtOp (loadv8i16 addr:$src))),
10346              (!cast<I>(OpcPrefix#WQZrm) addr:$src)>;
10347
10348    def : Pat<(v8i64 (ExtOp (loadv8i32 addr:$src))),
10349              (!cast<I>(OpcPrefix#DQZrm) addr:$src)>;
10350  }
10351}
10352
10353multiclass AVX512_pmovx_patterns<string OpcPrefix, SDNode ExtOp,
10354                                 SDNode InVecOp> :
10355    AVX512_pmovx_patterns_base<OpcPrefix, ExtOp> {
10356  // 128-bit patterns
10357  let Predicates = [HasVLX, HasBWI] in {
10358  def : Pat<(v8i16 (InVecOp (bc_v16i8 (v2i64 (scalar_to_vector (loadi64 addr:$src)))))),
10359            (!cast<I>(OpcPrefix#BWZ128rm) addr:$src)>;
10360  def : Pat<(v8i16 (InVecOp (bc_v16i8 (v2f64 (scalar_to_vector (loadf64 addr:$src)))))),
10361            (!cast<I>(OpcPrefix#BWZ128rm) addr:$src)>;
10362  def : Pat<(v8i16 (InVecOp (bc_v16i8 (v2i64 (X86vzload64 addr:$src))))),
10363            (!cast<I>(OpcPrefix#BWZ128rm) addr:$src)>;
10364  }
10365  let Predicates = [HasVLX] in {
10366  def : Pat<(v4i32 (InVecOp (bc_v16i8 (v4i32 (scalar_to_vector (loadi32 addr:$src)))))),
10367            (!cast<I>(OpcPrefix#BDZ128rm) addr:$src)>;
10368  def : Pat<(v4i32 (InVecOp (bc_v16i8 (v4i32 (X86vzload32 addr:$src))))),
10369            (!cast<I>(OpcPrefix#BDZ128rm) addr:$src)>;
10370
10371  def : Pat<(v2i64 (InVecOp (bc_v16i8 (v4i32 (scalar_to_vector (extloadi32i16 addr:$src)))))),
10372            (!cast<I>(OpcPrefix#BQZ128rm) addr:$src)>;
10373
10374  def : Pat<(v4i32 (InVecOp (bc_v8i16 (v2i64 (scalar_to_vector (loadi64 addr:$src)))))),
10375            (!cast<I>(OpcPrefix#WDZ128rm) addr:$src)>;
10376  def : Pat<(v4i32 (InVecOp (bc_v8i16 (v2f64 (scalar_to_vector (loadf64 addr:$src)))))),
10377            (!cast<I>(OpcPrefix#WDZ128rm) addr:$src)>;
10378  def : Pat<(v4i32 (InVecOp (bc_v8i16 (v2i64 (X86vzload64 addr:$src))))),
10379            (!cast<I>(OpcPrefix#WDZ128rm) addr:$src)>;
10380
10381  def : Pat<(v2i64 (InVecOp (bc_v8i16 (v4i32 (scalar_to_vector (loadi32 addr:$src)))))),
10382            (!cast<I>(OpcPrefix#WQZ128rm) addr:$src)>;
10383  def : Pat<(v2i64 (InVecOp (bc_v8i16 (v4i32 (X86vzload32 addr:$src))))),
10384            (!cast<I>(OpcPrefix#WQZ128rm) addr:$src)>;
10385
10386  def : Pat<(v2i64 (InVecOp (bc_v4i32 (v2i64 (scalar_to_vector (loadi64 addr:$src)))))),
10387            (!cast<I>(OpcPrefix#DQZ128rm) addr:$src)>;
10388  def : Pat<(v2i64 (InVecOp (bc_v4i32 (v2f64 (scalar_to_vector (loadf64 addr:$src)))))),
10389            (!cast<I>(OpcPrefix#DQZ128rm) addr:$src)>;
10390  def : Pat<(v2i64 (InVecOp (bc_v4i32 (v2i64 (X86vzload64 addr:$src))))),
10391            (!cast<I>(OpcPrefix#DQZ128rm) addr:$src)>;
10392  }
10393  let Predicates = [HasVLX] in {
10394  def : Pat<(v8i32 (InVecOp (bc_v16i8 (v2i64 (scalar_to_vector (loadi64 addr:$src)))))),
10395            (!cast<I>(OpcPrefix#BDZ256rm) addr:$src)>;
10396  def : Pat<(v8i32 (InVecOp (bc_v16i8 (v2i64 (scalar_to_vector (loadf64 addr:$src)))))),
10397            (!cast<I>(OpcPrefix#BDZ256rm) addr:$src)>;
10398  def : Pat<(v8i32 (InVecOp (bc_v16i8 (v2i64 (X86vzload64 addr:$src))))),
10399            (!cast<I>(OpcPrefix#BDZ256rm) addr:$src)>;
10400
10401  def : Pat<(v4i64 (InVecOp (bc_v16i8 (v4i32 (scalar_to_vector (loadi32 addr:$src)))))),
10402            (!cast<I>(OpcPrefix#BQZ256rm) addr:$src)>;
10403  def : Pat<(v4i64 (InVecOp (bc_v16i8 (v4i32 (X86vzload32 addr:$src))))),
10404            (!cast<I>(OpcPrefix#BQZ256rm) addr:$src)>;
10405
10406  def : Pat<(v4i64 (InVecOp (bc_v8i16 (v2i64 (scalar_to_vector (loadi64 addr:$src)))))),
10407            (!cast<I>(OpcPrefix#WQZ256rm) addr:$src)>;
10408  def : Pat<(v4i64 (InVecOp (bc_v8i16 (v2i64 (scalar_to_vector (loadf64 addr:$src)))))),
10409            (!cast<I>(OpcPrefix#WQZ256rm) addr:$src)>;
10410  def : Pat<(v4i64 (InVecOp (bc_v8i16 (v2i64 (X86vzload64 addr:$src))))),
10411            (!cast<I>(OpcPrefix#WQZ256rm) addr:$src)>;
10412  }
10413  // 512-bit patterns
10414  let Predicates = [HasAVX512] in {
10415  def : Pat<(v8i64 (InVecOp (bc_v16i8 (v2i64 (scalar_to_vector (loadi64 addr:$src)))))),
10416            (!cast<I>(OpcPrefix#BQZrm) addr:$src)>;
10417  def : Pat<(v8i64 (InVecOp (bc_v16i8 (v2f64 (scalar_to_vector (loadf64 addr:$src)))))),
10418            (!cast<I>(OpcPrefix#BQZrm) addr:$src)>;
10419  def : Pat<(v8i64 (InVecOp (bc_v16i8 (v2i64 (X86vzload64 addr:$src))))),
10420            (!cast<I>(OpcPrefix#BQZrm) addr:$src)>;
10421  }
10422}
10423
10424defm : AVX512_pmovx_patterns<"VPMOVSX", sext, sext_invec>;
10425defm : AVX512_pmovx_patterns<"VPMOVZX", zext, zext_invec>;
10426
10427// Without BWI we can't do a trunc from v16i16 to v16i8. DAG combine can merge
10428// ext+trunc aggressively making it impossible to legalize the DAG to this
10429// pattern directly.
10430let Predicates = [HasAVX512, NoBWI] in {
10431def: Pat<(v16i8 (trunc (v16i16 VR256X:$src))),
10432         (VPMOVDBZrr (v16i32 (VPMOVZXWDZrr VR256X:$src)))>;
10433def: Pat<(v16i8 (trunc (loadv16i16 addr:$src))),
10434         (VPMOVDBZrr (v16i32 (VPMOVZXWDZrm addr:$src)))>;
10435}
10436
10437//===----------------------------------------------------------------------===//
10438// GATHER - SCATTER Operations
10439
10440// FIXME: Improve scheduling of gather/scatter instructions.
10441multiclass avx512_gather<bits<8> opc, string OpcodeStr, X86VectorVTInfo _,
10442                         X86MemOperand memop, RegisterClass MaskRC = _.KRCWM> {
10443  let Constraints = "@earlyclobber $dst, $src1 = $dst, $mask = $mask_wb",
10444      ExeDomain = _.ExeDomain, mayLoad = 1, hasSideEffects = 0 in
10445  def rm  : AVX5128I<opc, MRMSrcMem, (outs _.RC:$dst, MaskRC:$mask_wb),
10446            (ins _.RC:$src1, MaskRC:$mask, memop:$src2),
10447            !strconcat(OpcodeStr#_.Suffix,
10448            "\t{$src2, ${dst} {${mask}}|${dst} {${mask}}, $src2}"),
10449            []>, EVEX, EVEX_K, EVEX_CD8<_.EltSize, CD8VT1>,
10450            Sched<[WriteLoad, WriteVecMaskedGatherWriteback]>;
10451}
10452
10453multiclass avx512_gather_q_pd<bits<8> dopc, bits<8> qopc,
10454                        AVX512VLVectorVTInfo _, string OpcodeStr, string SUFF> {
10455  defm NAME#D#SUFF#Z: avx512_gather<dopc, OpcodeStr#"d", _.info512,
10456                                      vy512xmem>, EVEX_V512, VEX_W;
10457  defm NAME#Q#SUFF#Z: avx512_gather<qopc, OpcodeStr#"q", _.info512,
10458                                      vz512mem>, EVEX_V512, VEX_W;
10459let Predicates = [HasVLX] in {
10460  defm NAME#D#SUFF#Z256: avx512_gather<dopc, OpcodeStr#"d", _.info256,
10461                              vx256xmem>, EVEX_V256, VEX_W;
10462  defm NAME#Q#SUFF#Z256: avx512_gather<qopc, OpcodeStr#"q", _.info256,
10463                              vy256xmem>, EVEX_V256, VEX_W;
10464  defm NAME#D#SUFF#Z128: avx512_gather<dopc, OpcodeStr#"d", _.info128,
10465                              vx128xmem>, EVEX_V128, VEX_W;
10466  defm NAME#Q#SUFF#Z128: avx512_gather<qopc, OpcodeStr#"q", _.info128,
10467                              vx128xmem>, EVEX_V128, VEX_W;
10468}
10469}
10470
10471multiclass avx512_gather_d_ps<bits<8> dopc, bits<8> qopc,
10472                       AVX512VLVectorVTInfo _, string OpcodeStr, string SUFF> {
10473  defm NAME#D#SUFF#Z: avx512_gather<dopc, OpcodeStr#"d", _.info512, vz512mem>,
10474                                       EVEX_V512;
10475  defm NAME#Q#SUFF#Z: avx512_gather<qopc, OpcodeStr#"q", _.info256, vz256mem>,
10476                                       EVEX_V512;
10477let Predicates = [HasVLX] in {
10478  defm NAME#D#SUFF#Z256: avx512_gather<dopc, OpcodeStr#"d", _.info256,
10479                                          vy256xmem>, EVEX_V256;
10480  defm NAME#Q#SUFF#Z256: avx512_gather<qopc, OpcodeStr#"q", _.info128,
10481                                          vy128xmem>, EVEX_V256;
10482  defm NAME#D#SUFF#Z128: avx512_gather<dopc, OpcodeStr#"d", _.info128,
10483                                          vx128xmem>, EVEX_V128;
10484  defm NAME#Q#SUFF#Z128: avx512_gather<qopc, OpcodeStr#"q", _.info128,
10485                                          vx64xmem, VK2WM>, EVEX_V128;
10486}
10487}
10488
10489
10490defm VGATHER : avx512_gather_q_pd<0x92, 0x93, avx512vl_f64_info, "vgather", "PD">,
10491               avx512_gather_d_ps<0x92, 0x93, avx512vl_f32_info, "vgather", "PS">;
10492
10493defm VPGATHER : avx512_gather_q_pd<0x90, 0x91, avx512vl_i64_info, "vpgather", "Q">,
10494                avx512_gather_d_ps<0x90, 0x91, avx512vl_i32_info, "vpgather", "D">;
10495
10496multiclass avx512_scatter<bits<8> opc, string OpcodeStr, X86VectorVTInfo _,
10497                          X86MemOperand memop, RegisterClass MaskRC = _.KRCWM> {
10498
10499let mayStore = 1, Constraints = "$mask = $mask_wb", ExeDomain = _.ExeDomain,
10500    hasSideEffects = 0 in
10501
10502  def mr  : AVX5128I<opc, MRMDestMem, (outs MaskRC:$mask_wb),
10503            (ins memop:$dst, MaskRC:$mask, _.RC:$src),
10504            !strconcat(OpcodeStr#_.Suffix,
10505            "\t{$src, ${dst} {${mask}}|${dst} {${mask}}, $src}"),
10506            []>, EVEX, EVEX_K, EVEX_CD8<_.EltSize, CD8VT1>,
10507            Sched<[WriteStore]>;
10508}
10509
10510multiclass avx512_scatter_q_pd<bits<8> dopc, bits<8> qopc,
10511                        AVX512VLVectorVTInfo _, string OpcodeStr, string SUFF> {
10512  defm NAME#D#SUFF#Z: avx512_scatter<dopc, OpcodeStr#"d", _.info512,
10513                                      vy512xmem>, EVEX_V512, VEX_W;
10514  defm NAME#Q#SUFF#Z: avx512_scatter<qopc, OpcodeStr#"q", _.info512,
10515                                      vz512mem>, EVEX_V512, VEX_W;
10516let Predicates = [HasVLX] in {
10517  defm NAME#D#SUFF#Z256: avx512_scatter<dopc, OpcodeStr#"d", _.info256,
10518                              vx256xmem>, EVEX_V256, VEX_W;
10519  defm NAME#Q#SUFF#Z256: avx512_scatter<qopc, OpcodeStr#"q", _.info256,
10520                              vy256xmem>, EVEX_V256, VEX_W;
10521  defm NAME#D#SUFF#Z128: avx512_scatter<dopc, OpcodeStr#"d", _.info128,
10522                              vx128xmem>, EVEX_V128, VEX_W;
10523  defm NAME#Q#SUFF#Z128: avx512_scatter<qopc, OpcodeStr#"q", _.info128,
10524                              vx128xmem>, EVEX_V128, VEX_W;
10525}
10526}
10527
10528multiclass avx512_scatter_d_ps<bits<8> dopc, bits<8> qopc,
10529                       AVX512VLVectorVTInfo _, string OpcodeStr, string SUFF> {
10530  defm NAME#D#SUFF#Z: avx512_scatter<dopc, OpcodeStr#"d", _.info512, vz512mem>,
10531                                       EVEX_V512;
10532  defm NAME#Q#SUFF#Z: avx512_scatter<qopc, OpcodeStr#"q", _.info256, vz256mem>,
10533                                       EVEX_V512;
10534let Predicates = [HasVLX] in {
10535  defm NAME#D#SUFF#Z256: avx512_scatter<dopc, OpcodeStr#"d", _.info256,
10536                                          vy256xmem>, EVEX_V256;
10537  defm NAME#Q#SUFF#Z256: avx512_scatter<qopc, OpcodeStr#"q", _.info128,
10538                                          vy128xmem>, EVEX_V256;
10539  defm NAME#D#SUFF#Z128: avx512_scatter<dopc, OpcodeStr#"d", _.info128,
10540                                          vx128xmem>, EVEX_V128;
10541  defm NAME#Q#SUFF#Z128: avx512_scatter<qopc, OpcodeStr#"q", _.info128,
10542                                          vx64xmem, VK2WM>, EVEX_V128;
10543}
10544}
10545
10546defm VSCATTER : avx512_scatter_q_pd<0xA2, 0xA3, avx512vl_f64_info, "vscatter", "PD">,
10547               avx512_scatter_d_ps<0xA2, 0xA3, avx512vl_f32_info, "vscatter", "PS">;
10548
10549defm VPSCATTER : avx512_scatter_q_pd<0xA0, 0xA1, avx512vl_i64_info, "vpscatter", "Q">,
10550                avx512_scatter_d_ps<0xA0, 0xA1, avx512vl_i32_info, "vpscatter", "D">;
10551
10552// prefetch
10553multiclass avx512_gather_scatter_prefetch<bits<8> opc, Format F, string OpcodeStr,
10554                       RegisterClass KRC, X86MemOperand memop> {
10555  let Predicates = [HasPFI], mayLoad = 1, mayStore = 1 in
10556  def m  : AVX5128I<opc, F, (outs), (ins KRC:$mask, memop:$src),
10557            !strconcat(OpcodeStr, "\t{$src {${mask}}|{${mask}}, $src}"), []>,
10558            EVEX, EVEX_K, Sched<[WriteLoad]>;
10559}
10560
10561defm VGATHERPF0DPS: avx512_gather_scatter_prefetch<0xC6, MRM1m, "vgatherpf0dps",
10562                     VK16WM, vz512mem>, EVEX_V512, EVEX_CD8<32, CD8VT1>;
10563
10564defm VGATHERPF0QPS: avx512_gather_scatter_prefetch<0xC7, MRM1m, "vgatherpf0qps",
10565                     VK8WM, vz256mem>, EVEX_V512, EVEX_CD8<64, CD8VT1>;
10566
10567defm VGATHERPF0DPD: avx512_gather_scatter_prefetch<0xC6, MRM1m, "vgatherpf0dpd",
10568                     VK8WM, vy512xmem>, EVEX_V512, VEX_W, EVEX_CD8<32, CD8VT1>;
10569
10570defm VGATHERPF0QPD: avx512_gather_scatter_prefetch<0xC7, MRM1m, "vgatherpf0qpd",
10571                     VK8WM, vz512mem>, EVEX_V512, VEX_W, EVEX_CD8<64, CD8VT1>;
10572
10573defm VGATHERPF1DPS: avx512_gather_scatter_prefetch<0xC6, MRM2m, "vgatherpf1dps",
10574                     VK16WM, vz512mem>, EVEX_V512, EVEX_CD8<32, CD8VT1>;
10575
10576defm VGATHERPF1QPS: avx512_gather_scatter_prefetch<0xC7, MRM2m, "vgatherpf1qps",
10577                     VK8WM, vz256mem>, EVEX_V512, EVEX_CD8<64, CD8VT1>;
10578
10579defm VGATHERPF1DPD: avx512_gather_scatter_prefetch<0xC6, MRM2m, "vgatherpf1dpd",
10580                     VK8WM, vy512xmem>, EVEX_V512, VEX_W, EVEX_CD8<32, CD8VT1>;
10581
10582defm VGATHERPF1QPD: avx512_gather_scatter_prefetch<0xC7, MRM2m, "vgatherpf1qpd",
10583                     VK8WM, vz512mem>, EVEX_V512, VEX_W, EVEX_CD8<64, CD8VT1>;
10584
10585defm VSCATTERPF0DPS: avx512_gather_scatter_prefetch<0xC6, MRM5m, "vscatterpf0dps",
10586                     VK16WM, vz512mem>, EVEX_V512, EVEX_CD8<32, CD8VT1>;
10587
10588defm VSCATTERPF0QPS: avx512_gather_scatter_prefetch<0xC7, MRM5m, "vscatterpf0qps",
10589                     VK8WM, vz256mem>, EVEX_V512, EVEX_CD8<64, CD8VT1>;
10590
10591defm VSCATTERPF0DPD: avx512_gather_scatter_prefetch<0xC6, MRM5m, "vscatterpf0dpd",
10592                     VK8WM, vy512xmem>, EVEX_V512, VEX_W, EVEX_CD8<32, CD8VT1>;
10593
10594defm VSCATTERPF0QPD: avx512_gather_scatter_prefetch<0xC7, MRM5m, "vscatterpf0qpd",
10595                     VK8WM, vz512mem>, EVEX_V512, VEX_W, EVEX_CD8<64, CD8VT1>;
10596
10597defm VSCATTERPF1DPS: avx512_gather_scatter_prefetch<0xC6, MRM6m, "vscatterpf1dps",
10598                     VK16WM, vz512mem>, EVEX_V512, EVEX_CD8<32, CD8VT1>;
10599
10600defm VSCATTERPF1QPS: avx512_gather_scatter_prefetch<0xC7, MRM6m, "vscatterpf1qps",
10601                     VK8WM, vz256mem>, EVEX_V512, EVEX_CD8<64, CD8VT1>;
10602
10603defm VSCATTERPF1DPD: avx512_gather_scatter_prefetch<0xC6, MRM6m, "vscatterpf1dpd",
10604                     VK8WM, vy512xmem>, EVEX_V512, VEX_W, EVEX_CD8<32, CD8VT1>;
10605
10606defm VSCATTERPF1QPD: avx512_gather_scatter_prefetch<0xC7, MRM6m, "vscatterpf1qpd",
10607                     VK8WM, vz512mem>, EVEX_V512, VEX_W, EVEX_CD8<64, CD8VT1>;
10608
10609multiclass cvt_by_vec_width<bits<8> opc, X86VectorVTInfo Vec, string OpcodeStr, SchedWrite Sched> {
10610def rr : AVX512XS8I<opc, MRMSrcReg, (outs Vec.RC:$dst), (ins Vec.KRC:$src),
10611                  !strconcat(OpcodeStr#Vec.Suffix, "\t{$src, $dst|$dst, $src}"),
10612                  [(set Vec.RC:$dst, (Vec.VT (sext Vec.KRC:$src)))]>,
10613                  EVEX, Sched<[Sched]>;
10614}
10615
10616multiclass cvt_mask_by_elt_width<bits<8> opc, AVX512VLVectorVTInfo VTInfo,
10617                                 string OpcodeStr, Predicate prd> {
10618let Predicates = [prd] in
10619  defm Z : cvt_by_vec_width<opc, VTInfo.info512, OpcodeStr, WriteVecMoveZ>, EVEX_V512;
10620
10621  let Predicates = [prd, HasVLX] in {
10622    defm Z256 : cvt_by_vec_width<opc, VTInfo.info256, OpcodeStr, WriteVecMoveY>, EVEX_V256;
10623    defm Z128 : cvt_by_vec_width<opc, VTInfo.info128, OpcodeStr, WriteVecMoveX>, EVEX_V128;
10624  }
10625}
10626
10627defm VPMOVM2B : cvt_mask_by_elt_width<0x28, avx512vl_i8_info, "vpmovm2" , HasBWI>;
10628defm VPMOVM2W : cvt_mask_by_elt_width<0x28, avx512vl_i16_info, "vpmovm2", HasBWI> , VEX_W;
10629defm VPMOVM2D : cvt_mask_by_elt_width<0x38, avx512vl_i32_info, "vpmovm2", HasDQI>;
10630defm VPMOVM2Q : cvt_mask_by_elt_width<0x38, avx512vl_i64_info, "vpmovm2", HasDQI> , VEX_W;
10631
10632multiclass convert_vector_to_mask_common<bits<8> opc, X86VectorVTInfo _, string OpcodeStr > {
10633    def rr : AVX512XS8I<opc, MRMSrcReg, (outs _.KRC:$dst), (ins _.RC:$src),
10634                        !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
10635                        [(set _.KRC:$dst, (X86pcmpgtm _.ImmAllZerosV, (_.VT _.RC:$src)))]>,
10636                        EVEX, Sched<[WriteMove]>;
10637}
10638
10639// Use 512bit version to implement 128/256 bit in case NoVLX.
10640multiclass convert_vector_to_mask_lowering<X86VectorVTInfo ExtendInfo,
10641                                           X86VectorVTInfo _,
10642                                           string Name> {
10643
10644  def : Pat<(_.KVT (X86pcmpgtm _.ImmAllZerosV, (_.VT _.RC:$src))),
10645            (_.KVT (COPY_TO_REGCLASS
10646                     (!cast<Instruction>(Name#"Zrr")
10647                       (INSERT_SUBREG (ExtendInfo.VT (IMPLICIT_DEF)),
10648                                      _.RC:$src, _.SubRegIdx)),
10649                   _.KRC))>;
10650}
10651
10652multiclass avx512_convert_vector_to_mask<bits<8> opc, string OpcodeStr,
10653                                   AVX512VLVectorVTInfo VTInfo, Predicate prd> {
10654  let Predicates = [prd] in
10655    defm Z : convert_vector_to_mask_common <opc, VTInfo.info512, OpcodeStr>,
10656                                            EVEX_V512;
10657
10658  let Predicates = [prd, HasVLX] in {
10659    defm Z256 : convert_vector_to_mask_common<opc, VTInfo.info256, OpcodeStr>,
10660                                              EVEX_V256;
10661    defm Z128 : convert_vector_to_mask_common<opc, VTInfo.info128, OpcodeStr>,
10662                                               EVEX_V128;
10663  }
10664  let Predicates = [prd, NoVLX] in {
10665    defm Z256_Alt : convert_vector_to_mask_lowering<VTInfo.info512, VTInfo.info256, NAME>;
10666    defm Z128_Alt : convert_vector_to_mask_lowering<VTInfo.info512, VTInfo.info128, NAME>;
10667  }
10668}
10669
10670defm VPMOVB2M : avx512_convert_vector_to_mask<0x29, "vpmovb2m",
10671                                              avx512vl_i8_info, HasBWI>;
10672defm VPMOVW2M : avx512_convert_vector_to_mask<0x29, "vpmovw2m",
10673                                              avx512vl_i16_info, HasBWI>, VEX_W;
10674defm VPMOVD2M : avx512_convert_vector_to_mask<0x39, "vpmovd2m",
10675                                              avx512vl_i32_info, HasDQI>;
10676defm VPMOVQ2M : avx512_convert_vector_to_mask<0x39, "vpmovq2m",
10677                                              avx512vl_i64_info, HasDQI>, VEX_W;
10678
10679// Patterns for handling sext from a mask register to v16i8/v16i16 when DQI
10680// is available, but BWI is not. We can't handle this in lowering because
10681// a target independent DAG combine likes to combine sext and trunc.
10682let Predicates = [HasDQI, NoBWI] in {
10683  def : Pat<(v16i8 (sext (v16i1 VK16:$src))),
10684            (VPMOVDBZrr (v16i32 (VPMOVM2DZrr VK16:$src)))>;
10685  def : Pat<(v16i16 (sext (v16i1 VK16:$src))),
10686            (VPMOVDWZrr (v16i32 (VPMOVM2DZrr VK16:$src)))>;
10687}
10688
10689let Predicates = [HasDQI, NoBWI, HasVLX] in {
10690  def : Pat<(v8i16 (sext (v8i1 VK8:$src))),
10691            (VPMOVDWZ256rr (v8i32 (VPMOVM2DZ256rr VK8:$src)))>;
10692}
10693
10694//===----------------------------------------------------------------------===//
10695// AVX-512 - COMPRESS and EXPAND
10696//
10697
10698multiclass compress_by_vec_width_common<bits<8> opc, X86VectorVTInfo _,
10699                                 string OpcodeStr, X86FoldableSchedWrite sched> {
10700  defm rr : AVX512_maskable<opc, MRMDestReg, _, (outs _.RC:$dst),
10701              (ins _.RC:$src1), OpcodeStr, "$src1", "$src1",
10702              (null_frag)>, AVX5128IBase,
10703              Sched<[sched]>;
10704
10705  let mayStore = 1, hasSideEffects = 0 in
10706  def mr : AVX5128I<opc, MRMDestMem, (outs),
10707              (ins _.MemOp:$dst, _.RC:$src),
10708              OpcodeStr # "\t{$src, $dst|$dst, $src}",
10709              []>, EVEX_CD8<_.EltSize, CD8VT1>,
10710              Sched<[sched.Folded]>;
10711
10712  def mrk : AVX5128I<opc, MRMDestMem, (outs),
10713              (ins _.MemOp:$dst, _.KRCWM:$mask, _.RC:$src),
10714              OpcodeStr # "\t{$src, $dst {${mask}}|$dst {${mask}}, $src}",
10715              []>,
10716              EVEX_K, EVEX_CD8<_.EltSize, CD8VT1>,
10717              Sched<[sched.Folded]>;
10718}
10719
10720multiclass compress_by_vec_width_lowering<X86VectorVTInfo _, string Name> {
10721  def : Pat<(X86mCompressingStore (_.VT _.RC:$src), addr:$dst, _.KRCWM:$mask),
10722            (!cast<Instruction>(Name#_.ZSuffix#mrk)
10723                            addr:$dst, _.KRCWM:$mask, _.RC:$src)>;
10724
10725  def : Pat<(X86compress (_.VT _.RC:$src), _.RC:$src0, _.KRCWM:$mask),
10726            (!cast<Instruction>(Name#_.ZSuffix#rrk)
10727                            _.RC:$src0, _.KRCWM:$mask, _.RC:$src)>;
10728  def : Pat<(X86compress (_.VT _.RC:$src), _.ImmAllZerosV, _.KRCWM:$mask),
10729            (!cast<Instruction>(Name#_.ZSuffix#rrkz)
10730                            _.KRCWM:$mask, _.RC:$src)>;
10731}
10732
10733multiclass compress_by_elt_width<bits<8> opc, string OpcodeStr,
10734                                 X86FoldableSchedWrite sched,
10735                                 AVX512VLVectorVTInfo VTInfo,
10736                                 Predicate Pred = HasAVX512> {
10737  let Predicates = [Pred] in
10738  defm Z : compress_by_vec_width_common<opc, VTInfo.info512, OpcodeStr, sched>,
10739           compress_by_vec_width_lowering<VTInfo.info512, NAME>, EVEX_V512;
10740
10741  let Predicates = [Pred, HasVLX] in {
10742    defm Z256 : compress_by_vec_width_common<opc, VTInfo.info256, OpcodeStr, sched>,
10743                compress_by_vec_width_lowering<VTInfo.info256, NAME>, EVEX_V256;
10744    defm Z128 : compress_by_vec_width_common<opc, VTInfo.info128, OpcodeStr, sched>,
10745                compress_by_vec_width_lowering<VTInfo.info128, NAME>, EVEX_V128;
10746  }
10747}
10748
10749// FIXME: Is there a better scheduler class for VPCOMPRESS?
10750defm VPCOMPRESSD : compress_by_elt_width <0x8B, "vpcompressd", WriteVarShuffle256,
10751                                          avx512vl_i32_info>, EVEX, NotMemoryFoldable;
10752defm VPCOMPRESSQ : compress_by_elt_width <0x8B, "vpcompressq", WriteVarShuffle256,
10753                                          avx512vl_i64_info>, EVEX, VEX_W, NotMemoryFoldable;
10754defm VCOMPRESSPS : compress_by_elt_width <0x8A, "vcompressps", WriteVarShuffle256,
10755                                          avx512vl_f32_info>, EVEX, NotMemoryFoldable;
10756defm VCOMPRESSPD : compress_by_elt_width <0x8A, "vcompresspd", WriteVarShuffle256,
10757                                          avx512vl_f64_info>, EVEX, VEX_W, NotMemoryFoldable;
10758
10759// expand
10760multiclass expand_by_vec_width<bits<8> opc, X86VectorVTInfo _,
10761                                 string OpcodeStr, X86FoldableSchedWrite sched> {
10762  defm rr : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
10763              (ins _.RC:$src1), OpcodeStr, "$src1", "$src1",
10764              (null_frag)>, AVX5128IBase,
10765              Sched<[sched]>;
10766
10767  defm rm : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
10768              (ins _.MemOp:$src1), OpcodeStr, "$src1", "$src1",
10769              (null_frag)>,
10770            AVX5128IBase, EVEX_CD8<_.EltSize, CD8VT1>,
10771            Sched<[sched.Folded, sched.ReadAfterFold]>;
10772}
10773
10774multiclass expand_by_vec_width_lowering<X86VectorVTInfo _, string Name> {
10775
10776  def : Pat<(_.VT (X86mExpandingLoad addr:$src, _.KRCWM:$mask, undef)),
10777            (!cast<Instruction>(Name#_.ZSuffix#rmkz)
10778                                        _.KRCWM:$mask, addr:$src)>;
10779
10780  def : Pat<(_.VT (X86mExpandingLoad addr:$src, _.KRCWM:$mask, _.ImmAllZerosV)),
10781            (!cast<Instruction>(Name#_.ZSuffix#rmkz)
10782                                        _.KRCWM:$mask, addr:$src)>;
10783
10784  def : Pat<(_.VT (X86mExpandingLoad addr:$src, _.KRCWM:$mask,
10785                                               (_.VT _.RC:$src0))),
10786            (!cast<Instruction>(Name#_.ZSuffix#rmk)
10787                            _.RC:$src0, _.KRCWM:$mask, addr:$src)>;
10788
10789  def : Pat<(X86expand (_.VT _.RC:$src), _.RC:$src0, _.KRCWM:$mask),
10790            (!cast<Instruction>(Name#_.ZSuffix#rrk)
10791                            _.RC:$src0, _.KRCWM:$mask, _.RC:$src)>;
10792  def : Pat<(X86expand (_.VT _.RC:$src), _.ImmAllZerosV, _.KRCWM:$mask),
10793            (!cast<Instruction>(Name#_.ZSuffix#rrkz)
10794                            _.KRCWM:$mask, _.RC:$src)>;
10795}
10796
10797multiclass expand_by_elt_width<bits<8> opc, string OpcodeStr,
10798                               X86FoldableSchedWrite sched,
10799                               AVX512VLVectorVTInfo VTInfo,
10800                               Predicate Pred = HasAVX512> {
10801  let Predicates = [Pred] in
10802  defm Z : expand_by_vec_width<opc, VTInfo.info512, OpcodeStr, sched>,
10803           expand_by_vec_width_lowering<VTInfo.info512, NAME>, EVEX_V512;
10804
10805  let Predicates = [Pred, HasVLX] in {
10806    defm Z256 : expand_by_vec_width<opc, VTInfo.info256, OpcodeStr, sched>,
10807                expand_by_vec_width_lowering<VTInfo.info256, NAME>, EVEX_V256;
10808    defm Z128 : expand_by_vec_width<opc, VTInfo.info128, OpcodeStr, sched>,
10809                expand_by_vec_width_lowering<VTInfo.info128, NAME>, EVEX_V128;
10810  }
10811}
10812
10813// FIXME: Is there a better scheduler class for VPEXPAND?
10814defm VPEXPANDD : expand_by_elt_width <0x89, "vpexpandd", WriteVarShuffle256,
10815                                      avx512vl_i32_info>, EVEX;
10816defm VPEXPANDQ : expand_by_elt_width <0x89, "vpexpandq", WriteVarShuffle256,
10817                                      avx512vl_i64_info>, EVEX, VEX_W;
10818defm VEXPANDPS : expand_by_elt_width <0x88, "vexpandps", WriteVarShuffle256,
10819                                      avx512vl_f32_info>, EVEX;
10820defm VEXPANDPD : expand_by_elt_width <0x88, "vexpandpd", WriteVarShuffle256,
10821                                      avx512vl_f64_info>, EVEX, VEX_W;
10822
10823//handle instruction  reg_vec1 = op(reg_vec,imm)
10824//                               op(mem_vec,imm)
10825//                               op(broadcast(eltVt),imm)
10826//all instruction created with FROUND_CURRENT
10827multiclass avx512_unary_fp_packed_imm<bits<8> opc, string OpcodeStr,
10828                                      SDPatternOperator OpNode,
10829                                      SDPatternOperator MaskOpNode,
10830                                      X86FoldableSchedWrite sched,
10831                                      X86VectorVTInfo _> {
10832  let ExeDomain = _.ExeDomain, Uses = [MXCSR], mayRaiseFPException = 1 in {
10833  defm rri : AVX512_maskable_split<opc, MRMSrcReg, _, (outs _.RC:$dst),
10834                      (ins _.RC:$src1, i32u8imm:$src2),
10835                      OpcodeStr#_.Suffix, "$src2, $src1", "$src1, $src2",
10836                      (OpNode (_.VT _.RC:$src1), (i32 timm:$src2)),
10837                      (MaskOpNode (_.VT _.RC:$src1), (i32 timm:$src2))>,
10838                      Sched<[sched]>;
10839  defm rmi : AVX512_maskable_split<opc, MRMSrcMem, _, (outs _.RC:$dst),
10840                    (ins _.MemOp:$src1, i32u8imm:$src2),
10841                    OpcodeStr#_.Suffix, "$src2, $src1", "$src1, $src2",
10842                    (OpNode (_.VT (bitconvert (_.LdFrag addr:$src1))),
10843                            (i32 timm:$src2)),
10844                    (MaskOpNode (_.VT (bitconvert (_.LdFrag addr:$src1))),
10845                                (i32 timm:$src2))>,
10846                    Sched<[sched.Folded, sched.ReadAfterFold]>;
10847  defm rmbi : AVX512_maskable_split<opc, MRMSrcMem, _, (outs _.RC:$dst),
10848                    (ins _.ScalarMemOp:$src1, i32u8imm:$src2),
10849                    OpcodeStr#_.Suffix, "$src2, ${src1}"#_.BroadcastStr,
10850                    "${src1}"#_.BroadcastStr#", $src2",
10851                    (OpNode (_.VT (_.BroadcastLdFrag addr:$src1)),
10852                            (i32 timm:$src2)),
10853                    (MaskOpNode (_.VT (_.BroadcastLdFrag addr:$src1)),
10854                                (i32 timm:$src2))>, EVEX_B,
10855                    Sched<[sched.Folded, sched.ReadAfterFold]>;
10856  }
10857}
10858
10859//handle instruction  reg_vec1 = op(reg_vec2,reg_vec3,imm),{sae}
10860multiclass avx512_unary_fp_sae_packed_imm<bits<8> opc, string OpcodeStr,
10861                                          SDNode OpNode, X86FoldableSchedWrite sched,
10862                                          X86VectorVTInfo _> {
10863  let ExeDomain = _.ExeDomain, Uses = [MXCSR] in
10864  defm rrib : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
10865                      (ins _.RC:$src1, i32u8imm:$src2),
10866                      OpcodeStr#_.Suffix, "$src2, {sae}, $src1",
10867                      "$src1, {sae}, $src2",
10868                      (OpNode (_.VT _.RC:$src1),
10869                              (i32 timm:$src2))>,
10870                      EVEX_B, Sched<[sched]>;
10871}
10872
10873multiclass avx512_common_unary_fp_sae_packed_imm<string OpcodeStr,
10874            AVX512VLVectorVTInfo _, bits<8> opc, SDPatternOperator OpNode,
10875            SDPatternOperator MaskOpNode, SDNode OpNodeSAE, X86SchedWriteWidths sched,
10876            Predicate prd>{
10877  let Predicates = [prd] in {
10878    defm Z    : avx512_unary_fp_packed_imm<opc, OpcodeStr, OpNode, MaskOpNode,
10879                                           sched.ZMM, _.info512>,
10880                avx512_unary_fp_sae_packed_imm<opc, OpcodeStr, OpNodeSAE,
10881                                               sched.ZMM, _.info512>, EVEX_V512;
10882  }
10883  let Predicates = [prd, HasVLX] in {
10884    defm Z128 : avx512_unary_fp_packed_imm<opc, OpcodeStr, OpNode, MaskOpNode,
10885                                           sched.XMM, _.info128>, EVEX_V128;
10886    defm Z256 : avx512_unary_fp_packed_imm<opc, OpcodeStr, OpNode, MaskOpNode,
10887                                           sched.YMM, _.info256>, EVEX_V256;
10888  }
10889}
10890
10891//handle instruction  reg_vec1 = op(reg_vec2,reg_vec3,imm)
10892//                               op(reg_vec2,mem_vec,imm)
10893//                               op(reg_vec2,broadcast(eltVt),imm)
10894//all instruction created with FROUND_CURRENT
10895multiclass avx512_fp_packed_imm<bits<8> opc, string OpcodeStr, SDNode OpNode,
10896                                X86FoldableSchedWrite sched, X86VectorVTInfo _>{
10897  let ExeDomain = _.ExeDomain, Uses = [MXCSR], mayRaiseFPException = 1 in {
10898  defm rri : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
10899                      (ins _.RC:$src1, _.RC:$src2, i32u8imm:$src3),
10900                      OpcodeStr, "$src3, $src2, $src1", "$src1, $src2, $src3",
10901                      (OpNode (_.VT _.RC:$src1),
10902                              (_.VT _.RC:$src2),
10903                              (i32 timm:$src3))>,
10904                      Sched<[sched]>;
10905  defm rmi : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
10906                    (ins _.RC:$src1, _.MemOp:$src2, i32u8imm:$src3),
10907                    OpcodeStr, "$src3, $src2, $src1", "$src1, $src2, $src3",
10908                    (OpNode (_.VT _.RC:$src1),
10909                            (_.VT (bitconvert (_.LdFrag addr:$src2))),
10910                            (i32 timm:$src3))>,
10911                    Sched<[sched.Folded, sched.ReadAfterFold]>;
10912  defm rmbi : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
10913                    (ins _.RC:$src1, _.ScalarMemOp:$src2, i32u8imm:$src3),
10914                    OpcodeStr, "$src3, ${src2}"#_.BroadcastStr#", $src1",
10915                    "$src1, ${src2}"#_.BroadcastStr#", $src3",
10916                    (OpNode (_.VT _.RC:$src1),
10917                            (_.VT (_.BroadcastLdFrag addr:$src2)),
10918                            (i32 timm:$src3))>, EVEX_B,
10919                    Sched<[sched.Folded, sched.ReadAfterFold]>;
10920  }
10921}
10922
10923//handle instruction  reg_vec1 = op(reg_vec2,reg_vec3,imm)
10924//                               op(reg_vec2,mem_vec,imm)
10925multiclass avx512_3Op_rm_imm8<bits<8> opc, string OpcodeStr, SDNode OpNode,
10926                              X86FoldableSchedWrite sched, X86VectorVTInfo DestInfo,
10927                              X86VectorVTInfo SrcInfo>{
10928  let ExeDomain = DestInfo.ExeDomain in {
10929  defm rri : AVX512_maskable<opc, MRMSrcReg, DestInfo, (outs DestInfo.RC:$dst),
10930                  (ins SrcInfo.RC:$src1, SrcInfo.RC:$src2, u8imm:$src3),
10931                  OpcodeStr, "$src3, $src2, $src1", "$src1, $src2, $src3",
10932                  (DestInfo.VT (OpNode (SrcInfo.VT SrcInfo.RC:$src1),
10933                               (SrcInfo.VT SrcInfo.RC:$src2),
10934                               (i8 timm:$src3)))>,
10935                  Sched<[sched]>;
10936  defm rmi : AVX512_maskable<opc, MRMSrcMem, DestInfo, (outs DestInfo.RC:$dst),
10937                (ins SrcInfo.RC:$src1, SrcInfo.MemOp:$src2, u8imm:$src3),
10938                OpcodeStr, "$src3, $src2, $src1", "$src1, $src2, $src3",
10939                (DestInfo.VT (OpNode (SrcInfo.VT SrcInfo.RC:$src1),
10940                             (SrcInfo.VT (bitconvert
10941                                                (SrcInfo.LdFrag addr:$src2))),
10942                             (i8 timm:$src3)))>,
10943                Sched<[sched.Folded, sched.ReadAfterFold]>;
10944  }
10945}
10946
10947//handle instruction  reg_vec1 = op(reg_vec2,reg_vec3,imm)
10948//                               op(reg_vec2,mem_vec,imm)
10949//                               op(reg_vec2,broadcast(eltVt),imm)
10950multiclass avx512_3Op_imm8<bits<8> opc, string OpcodeStr, SDNode OpNode,
10951                           X86FoldableSchedWrite sched, X86VectorVTInfo _>:
10952  avx512_3Op_rm_imm8<opc, OpcodeStr, OpNode, sched, _, _>{
10953
10954  let ExeDomain = _.ExeDomain in
10955  defm rmbi : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
10956                    (ins _.RC:$src1, _.ScalarMemOp:$src2, u8imm:$src3),
10957                    OpcodeStr, "$src3, ${src2}"#_.BroadcastStr#", $src1",
10958                    "$src1, ${src2}"#_.BroadcastStr#", $src3",
10959                    (OpNode (_.VT _.RC:$src1),
10960                            (_.VT (_.BroadcastLdFrag addr:$src2)),
10961                            (i8 timm:$src3))>, EVEX_B,
10962                    Sched<[sched.Folded, sched.ReadAfterFold]>;
10963}
10964
10965//handle scalar instruction  reg_vec1 = op(reg_vec2,reg_vec3,imm)
10966//                                      op(reg_vec2,mem_scalar,imm)
10967multiclass avx512_fp_scalar_imm<bits<8> opc, string OpcodeStr, SDNode OpNode,
10968                                X86FoldableSchedWrite sched, X86VectorVTInfo _> {
10969  let ExeDomain = _.ExeDomain, Uses = [MXCSR], mayRaiseFPException = 1 in {
10970  defm rri : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
10971                      (ins _.RC:$src1, _.RC:$src2, i32u8imm:$src3),
10972                      OpcodeStr, "$src3, $src2, $src1", "$src1, $src2, $src3",
10973                      (OpNode (_.VT _.RC:$src1),
10974                              (_.VT _.RC:$src2),
10975                              (i32 timm:$src3))>,
10976                      Sched<[sched]>;
10977  defm rmi : AVX512_maskable_scalar<opc, MRMSrcMem, _, (outs _.RC:$dst),
10978                    (ins _.RC:$src1, _.IntScalarMemOp:$src2, i32u8imm:$src3),
10979                    OpcodeStr, "$src3, $src2, $src1", "$src1, $src2, $src3",
10980                    (OpNode (_.VT _.RC:$src1),
10981                            (_.ScalarIntMemFrags addr:$src2),
10982                            (i32 timm:$src3))>,
10983                    Sched<[sched.Folded, sched.ReadAfterFold]>;
10984  }
10985}
10986
10987//handle instruction  reg_vec1 = op(reg_vec2,reg_vec3,imm),{sae}
10988multiclass avx512_fp_sae_packed_imm<bits<8> opc, string OpcodeStr,
10989                                    SDNode OpNode, X86FoldableSchedWrite sched,
10990                                    X86VectorVTInfo _> {
10991  let ExeDomain = _.ExeDomain, Uses = [MXCSR] in
10992  defm rrib : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
10993                      (ins _.RC:$src1, _.RC:$src2, i32u8imm:$src3),
10994                      OpcodeStr, "$src3, {sae}, $src2, $src1",
10995                      "$src1, $src2, {sae}, $src3",
10996                      (OpNode (_.VT _.RC:$src1),
10997                              (_.VT _.RC:$src2),
10998                              (i32 timm:$src3))>,
10999                      EVEX_B, Sched<[sched]>;
11000}
11001
11002//handle scalar instruction  reg_vec1 = op(reg_vec2,reg_vec3,imm),{sae}
11003multiclass avx512_fp_sae_scalar_imm<bits<8> opc, string OpcodeStr, SDNode OpNode,
11004                                    X86FoldableSchedWrite sched, X86VectorVTInfo _> {
11005  let ExeDomain = _.ExeDomain, Uses = [MXCSR] in
11006  defm NAME#rrib : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
11007                      (ins _.RC:$src1, _.RC:$src2, i32u8imm:$src3),
11008                      OpcodeStr, "$src3, {sae}, $src2, $src1",
11009                      "$src1, $src2, {sae}, $src3",
11010                      (OpNode (_.VT _.RC:$src1),
11011                              (_.VT _.RC:$src2),
11012                              (i32 timm:$src3))>,
11013                      EVEX_B, Sched<[sched]>;
11014}
11015
11016multiclass avx512_common_fp_sae_packed_imm<string OpcodeStr,
11017            AVX512VLVectorVTInfo _, bits<8> opc, SDNode OpNode,
11018            SDNode OpNodeSAE, X86SchedWriteWidths sched, Predicate prd>{
11019  let Predicates = [prd] in {
11020    defm Z    : avx512_fp_packed_imm<opc, OpcodeStr, OpNode, sched.ZMM, _.info512>,
11021                avx512_fp_sae_packed_imm<opc, OpcodeStr, OpNodeSAE, sched.ZMM, _.info512>,
11022                                  EVEX_V512;
11023
11024  }
11025  let Predicates = [prd, HasVLX] in {
11026    defm Z128 : avx512_fp_packed_imm<opc, OpcodeStr, OpNode, sched.XMM, _.info128>,
11027                                  EVEX_V128;
11028    defm Z256 : avx512_fp_packed_imm<opc, OpcodeStr, OpNode, sched.YMM, _.info256>,
11029                                  EVEX_V256;
11030  }
11031}
11032
11033multiclass avx512_common_3Op_rm_imm8<bits<8> opc, SDNode OpNode, string OpStr,
11034                   X86SchedWriteWidths sched, AVX512VLVectorVTInfo DestInfo,
11035                   AVX512VLVectorVTInfo SrcInfo, Predicate Pred = HasBWI> {
11036  let Predicates = [Pred] in {
11037    defm Z    : avx512_3Op_rm_imm8<opc, OpStr, OpNode, sched.ZMM, DestInfo.info512,
11038                           SrcInfo.info512>, EVEX_V512, AVX512AIi8Base, EVEX_4V;
11039  }
11040  let Predicates = [Pred, HasVLX] in {
11041    defm Z128 : avx512_3Op_rm_imm8<opc, OpStr, OpNode, sched.XMM, DestInfo.info128,
11042                           SrcInfo.info128>, EVEX_V128, AVX512AIi8Base, EVEX_4V;
11043    defm Z256 : avx512_3Op_rm_imm8<opc, OpStr, OpNode, sched.YMM, DestInfo.info256,
11044                           SrcInfo.info256>, EVEX_V256, AVX512AIi8Base, EVEX_4V;
11045  }
11046}
11047
11048multiclass avx512_common_3Op_imm8<string OpcodeStr, AVX512VLVectorVTInfo _,
11049                                  bits<8> opc, SDNode OpNode, X86SchedWriteWidths sched,
11050                                  Predicate Pred = HasAVX512> {
11051  let Predicates = [Pred] in {
11052    defm Z    : avx512_3Op_imm8<opc, OpcodeStr, OpNode, sched.ZMM, _.info512>,
11053                                EVEX_V512;
11054  }
11055  let Predicates = [Pred, HasVLX] in {
11056    defm Z128 : avx512_3Op_imm8<opc, OpcodeStr, OpNode, sched.XMM, _.info128>,
11057                                EVEX_V128;
11058    defm Z256 : avx512_3Op_imm8<opc, OpcodeStr, OpNode, sched.YMM, _.info256>,
11059                                EVEX_V256;
11060  }
11061}
11062
11063multiclass avx512_common_fp_sae_scalar_imm<string OpcodeStr,
11064                  X86VectorVTInfo _, bits<8> opc, SDNode OpNode,
11065                  SDNode OpNodeSAE, X86SchedWriteWidths sched, Predicate prd> {
11066  let Predicates = [prd] in {
11067     defm Z : avx512_fp_scalar_imm<opc, OpcodeStr, OpNode, sched.XMM, _>,
11068              avx512_fp_sae_scalar_imm<opc, OpcodeStr, OpNodeSAE, sched.XMM, _>;
11069  }
11070}
11071
11072multiclass avx512_common_unary_fp_sae_packed_imm_all<string OpcodeStr,
11073                    bits<8> opcPs, bits<8> opcPd, SDPatternOperator OpNode,
11074                    SDPatternOperator MaskOpNode, SDNode OpNodeSAE,
11075                    X86SchedWriteWidths sched, Predicate prd>{
11076  defm PH : avx512_common_unary_fp_sae_packed_imm<OpcodeStr, avx512vl_f16_info,
11077                            opcPs, OpNode, MaskOpNode, OpNodeSAE, sched, HasFP16>,
11078                            AVX512PSIi8Base, TA, EVEX, EVEX_CD8<16, CD8VF>;
11079  defm PS : avx512_common_unary_fp_sae_packed_imm<OpcodeStr, avx512vl_f32_info,
11080                            opcPs, OpNode, MaskOpNode, OpNodeSAE, sched, prd>,
11081                            AVX512AIi8Base, EVEX, EVEX_CD8<32, CD8VF>;
11082  defm PD : avx512_common_unary_fp_sae_packed_imm<OpcodeStr, avx512vl_f64_info,
11083                            opcPd, OpNode, MaskOpNode, OpNodeSAE, sched, prd>,
11084                            AVX512AIi8Base, EVEX, EVEX_CD8<64, CD8VF>, VEX_W;
11085}
11086
11087defm VREDUCE   : avx512_common_unary_fp_sae_packed_imm_all<"vreduce", 0x56, 0x56,
11088                              X86VReduce, X86VReduce, X86VReduceSAE,
11089                              SchedWriteFRnd, HasDQI>;
11090defm VRNDSCALE : avx512_common_unary_fp_sae_packed_imm_all<"vrndscale", 0x08, 0x09,
11091                              X86any_VRndScale, X86VRndScale, X86VRndScaleSAE,
11092                              SchedWriteFRnd, HasAVX512>;
11093defm VGETMANT : avx512_common_unary_fp_sae_packed_imm_all<"vgetmant", 0x26, 0x26,
11094                              X86VGetMant, X86VGetMant, X86VGetMantSAE,
11095                              SchedWriteFRnd, HasAVX512>;
11096
11097defm VRANGEPD : avx512_common_fp_sae_packed_imm<"vrangepd", avx512vl_f64_info,
11098                                                0x50, X86VRange, X86VRangeSAE,
11099                                                SchedWriteFAdd, HasDQI>,
11100      AVX512AIi8Base, EVEX_4V, EVEX_CD8<64, CD8VF>, VEX_W;
11101defm VRANGEPS : avx512_common_fp_sae_packed_imm<"vrangeps", avx512vl_f32_info,
11102                                                0x50, X86VRange, X86VRangeSAE,
11103                                                SchedWriteFAdd, HasDQI>,
11104      AVX512AIi8Base, EVEX_4V, EVEX_CD8<32, CD8VF>;
11105
11106defm VRANGESD: avx512_common_fp_sae_scalar_imm<"vrangesd",
11107      f64x_info, 0x51, X86Ranges, X86RangesSAE, SchedWriteFAdd, HasDQI>,
11108      AVX512AIi8Base, VEX_LIG, EVEX_4V, EVEX_CD8<64, CD8VT1>, VEX_W;
11109defm VRANGESS: avx512_common_fp_sae_scalar_imm<"vrangess", f32x_info,
11110      0x51, X86Ranges, X86RangesSAE, SchedWriteFAdd, HasDQI>,
11111      AVX512AIi8Base, VEX_LIG, EVEX_4V, EVEX_CD8<32, CD8VT1>;
11112
11113defm VREDUCESD: avx512_common_fp_sae_scalar_imm<"vreducesd", f64x_info,
11114      0x57, X86Reduces, X86ReducesSAE, SchedWriteFRnd, HasDQI>,
11115      AVX512AIi8Base, VEX_LIG, EVEX_4V, EVEX_CD8<64, CD8VT1>, VEX_W;
11116defm VREDUCESS: avx512_common_fp_sae_scalar_imm<"vreducess", f32x_info,
11117      0x57, X86Reduces, X86ReducesSAE, SchedWriteFRnd, HasDQI>,
11118      AVX512AIi8Base, VEX_LIG, EVEX_4V, EVEX_CD8<32, CD8VT1>;
11119defm VREDUCESH: avx512_common_fp_sae_scalar_imm<"vreducesh", f16x_info,
11120      0x57, X86Reduces, X86ReducesSAE, SchedWriteFRnd, HasFP16>,
11121      AVX512PSIi8Base, TA, VEX_LIG, EVEX_4V, EVEX_CD8<16, CD8VT1>;
11122
11123defm VGETMANTSD: avx512_common_fp_sae_scalar_imm<"vgetmantsd", f64x_info,
11124      0x27, X86GetMants, X86GetMantsSAE, SchedWriteFRnd, HasAVX512>,
11125      AVX512AIi8Base, VEX_LIG, EVEX_4V, EVEX_CD8<64, CD8VT1>, VEX_W;
11126defm VGETMANTSS: avx512_common_fp_sae_scalar_imm<"vgetmantss", f32x_info,
11127      0x27, X86GetMants, X86GetMantsSAE, SchedWriteFRnd, HasAVX512>,
11128      AVX512AIi8Base, VEX_LIG, EVEX_4V, EVEX_CD8<32, CD8VT1>;
11129defm VGETMANTSH: avx512_common_fp_sae_scalar_imm<"vgetmantsh", f16x_info,
11130      0x27, X86GetMants, X86GetMantsSAE, SchedWriteFRnd, HasFP16>,
11131      AVX512PSIi8Base, TA, VEX_LIG, EVEX_4V, EVEX_CD8<16, CD8VT1>;
11132
11133multiclass avx512_shuff_packed_128_common<bits<8> opc, string OpcodeStr,
11134                                          X86FoldableSchedWrite sched,
11135                                          X86VectorVTInfo _,
11136                                          X86VectorVTInfo CastInfo,
11137                                          string EVEX2VEXOvrd> {
11138  let ExeDomain = _.ExeDomain in {
11139  defm rri : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
11140                  (ins _.RC:$src1, _.RC:$src2, u8imm:$src3),
11141                  OpcodeStr, "$src3, $src2, $src1", "$src1, $src2, $src3",
11142                  (_.VT (bitconvert
11143                         (CastInfo.VT (X86Shuf128 _.RC:$src1, _.RC:$src2,
11144                                                  (i8 timm:$src3)))))>,
11145                  Sched<[sched]>, EVEX2VEXOverride<EVEX2VEXOvrd#"rr">;
11146  defm rmi : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
11147                (ins _.RC:$src1, _.MemOp:$src2, u8imm:$src3),
11148                OpcodeStr, "$src3, $src2, $src1", "$src1, $src2, $src3",
11149                (_.VT
11150                 (bitconvert
11151                  (CastInfo.VT (X86Shuf128 _.RC:$src1,
11152                                           (CastInfo.LdFrag addr:$src2),
11153                                           (i8 timm:$src3)))))>,
11154                Sched<[sched.Folded, sched.ReadAfterFold]>,
11155                EVEX2VEXOverride<EVEX2VEXOvrd#"rm">;
11156  defm rmbi : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
11157                    (ins _.RC:$src1, _.ScalarMemOp:$src2, u8imm:$src3),
11158                    OpcodeStr, "$src3, ${src2}"#_.BroadcastStr#", $src1",
11159                    "$src1, ${src2}"#_.BroadcastStr#", $src3",
11160                    (_.VT
11161                     (bitconvert
11162                      (CastInfo.VT
11163                       (X86Shuf128 _.RC:$src1,
11164                                   (_.BroadcastLdFrag addr:$src2),
11165                                   (i8 timm:$src3)))))>, EVEX_B,
11166                    Sched<[sched.Folded, sched.ReadAfterFold]>;
11167  }
11168}
11169
11170multiclass avx512_shuff_packed_128<string OpcodeStr, X86FoldableSchedWrite sched,
11171                                   AVX512VLVectorVTInfo _,
11172                                   AVX512VLVectorVTInfo CastInfo, bits<8> opc,
11173                                   string EVEX2VEXOvrd>{
11174  let Predicates = [HasAVX512] in
11175  defm Z : avx512_shuff_packed_128_common<opc, OpcodeStr, sched,
11176                                          _.info512, CastInfo.info512, "">, EVEX_V512;
11177
11178  let Predicates = [HasAVX512, HasVLX] in
11179  defm Z256 : avx512_shuff_packed_128_common<opc, OpcodeStr, sched,
11180                                             _.info256, CastInfo.info256,
11181                                             EVEX2VEXOvrd>, EVEX_V256;
11182}
11183
11184defm VSHUFF32X4 : avx512_shuff_packed_128<"vshuff32x4", WriteFShuffle256,
11185      avx512vl_f32_info, avx512vl_f64_info, 0x23, "VPERM2F128">, AVX512AIi8Base, EVEX_4V, EVEX_CD8<32, CD8VF>;
11186defm VSHUFF64X2 : avx512_shuff_packed_128<"vshuff64x2", WriteFShuffle256,
11187      avx512vl_f64_info, avx512vl_f64_info, 0x23, "VPERM2F128">, AVX512AIi8Base, EVEX_4V, EVEX_CD8<64, CD8VF>, VEX_W;
11188defm VSHUFI32X4 : avx512_shuff_packed_128<"vshufi32x4", WriteFShuffle256,
11189      avx512vl_i32_info, avx512vl_i64_info, 0x43, "VPERM2I128">, AVX512AIi8Base, EVEX_4V, EVEX_CD8<32, CD8VF>;
11190defm VSHUFI64X2 : avx512_shuff_packed_128<"vshufi64x2", WriteFShuffle256,
11191      avx512vl_i64_info, avx512vl_i64_info, 0x43, "VPERM2I128">, AVX512AIi8Base, EVEX_4V, EVEX_CD8<64, CD8VF>, VEX_W;
11192
11193multiclass avx512_valign<bits<8> opc, string OpcodeStr,
11194                         X86FoldableSchedWrite sched, X86VectorVTInfo _>{
11195  // NOTE: EVEX2VEXOverride changed back to Unset for 256-bit at the
11196  // instantiation of this class.
11197  let ExeDomain = _.ExeDomain in {
11198  defm rri : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
11199                  (ins _.RC:$src1, _.RC:$src2, u8imm:$src3),
11200                  OpcodeStr, "$src3, $src2, $src1", "$src1, $src2, $src3",
11201                  (_.VT (X86VAlign _.RC:$src1, _.RC:$src2, (i8 timm:$src3)))>,
11202                  Sched<[sched]>, EVEX2VEXOverride<"VPALIGNRrri">;
11203  defm rmi : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
11204                (ins _.RC:$src1, _.MemOp:$src2, u8imm:$src3),
11205                OpcodeStr, "$src3, $src2, $src1", "$src1, $src2, $src3",
11206                (_.VT (X86VAlign _.RC:$src1,
11207                                 (bitconvert (_.LdFrag addr:$src2)),
11208                                 (i8 timm:$src3)))>,
11209                Sched<[sched.Folded, sched.ReadAfterFold]>,
11210                EVEX2VEXOverride<"VPALIGNRrmi">;
11211
11212  defm rmbi : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
11213                   (ins _.RC:$src1, _.ScalarMemOp:$src2, u8imm:$src3),
11214                   OpcodeStr, "$src3, ${src2}"#_.BroadcastStr#", $src1",
11215                   "$src1, ${src2}"#_.BroadcastStr#", $src3",
11216                   (X86VAlign _.RC:$src1,
11217                              (_.VT (_.BroadcastLdFrag addr:$src2)),
11218                              (i8 timm:$src3))>, EVEX_B,
11219                   Sched<[sched.Folded, sched.ReadAfterFold]>;
11220  }
11221}
11222
11223multiclass avx512_valign_common<string OpcodeStr, X86SchedWriteWidths sched,
11224                                AVX512VLVectorVTInfo _> {
11225  let Predicates = [HasAVX512] in {
11226    defm Z    : avx512_valign<0x03, OpcodeStr, sched.ZMM, _.info512>,
11227                                AVX512AIi8Base, EVEX_4V, EVEX_V512;
11228  }
11229  let Predicates = [HasAVX512, HasVLX] in {
11230    defm Z128 : avx512_valign<0x03, OpcodeStr, sched.XMM, _.info128>,
11231                                AVX512AIi8Base, EVEX_4V, EVEX_V128;
11232    // We can't really override the 256-bit version so change it back to unset.
11233    let EVEX2VEXOverride = ? in
11234    defm Z256 : avx512_valign<0x03, OpcodeStr, sched.YMM, _.info256>,
11235                                AVX512AIi8Base, EVEX_4V, EVEX_V256;
11236  }
11237}
11238
11239defm VALIGND: avx512_valign_common<"valignd", SchedWriteShuffle,
11240                                   avx512vl_i32_info>, EVEX_CD8<32, CD8VF>;
11241defm VALIGNQ: avx512_valign_common<"valignq", SchedWriteShuffle,
11242                                   avx512vl_i64_info>, EVEX_CD8<64, CD8VF>,
11243                                   VEX_W;
11244
11245defm VPALIGNR: avx512_common_3Op_rm_imm8<0x0F, X86PAlignr, "vpalignr",
11246                                         SchedWriteShuffle, avx512vl_i8_info,
11247                                         avx512vl_i8_info>, EVEX_CD8<8, CD8VF>;
11248
11249// Fragments to help convert valignq into masked valignd. Or valignq/valignd
11250// into vpalignr.
11251def ValignqImm32XForm : SDNodeXForm<timm, [{
11252  return getI8Imm(N->getZExtValue() * 2, SDLoc(N));
11253}]>;
11254def ValignqImm8XForm : SDNodeXForm<timm, [{
11255  return getI8Imm(N->getZExtValue() * 8, SDLoc(N));
11256}]>;
11257def ValigndImm8XForm : SDNodeXForm<timm, [{
11258  return getI8Imm(N->getZExtValue() * 4, SDLoc(N));
11259}]>;
11260
11261multiclass avx512_vpalign_mask_lowering<string OpcodeStr, SDNode OpNode,
11262                                        X86VectorVTInfo From, X86VectorVTInfo To,
11263                                        SDNodeXForm ImmXForm> {
11264  def : Pat<(To.VT (vselect_mask To.KRCWM:$mask,
11265                                 (bitconvert
11266                                  (From.VT (OpNode From.RC:$src1, From.RC:$src2,
11267                                                   timm:$src3))),
11268                                 To.RC:$src0)),
11269            (!cast<Instruction>(OpcodeStr#"rrik") To.RC:$src0, To.KRCWM:$mask,
11270                                                  To.RC:$src1, To.RC:$src2,
11271                                                  (ImmXForm timm:$src3))>;
11272
11273  def : Pat<(To.VT (vselect_mask To.KRCWM:$mask,
11274                                 (bitconvert
11275                                  (From.VT (OpNode From.RC:$src1, From.RC:$src2,
11276                                                   timm:$src3))),
11277                                 To.ImmAllZerosV)),
11278            (!cast<Instruction>(OpcodeStr#"rrikz") To.KRCWM:$mask,
11279                                                   To.RC:$src1, To.RC:$src2,
11280                                                   (ImmXForm timm:$src3))>;
11281
11282  def : Pat<(To.VT (vselect_mask To.KRCWM:$mask,
11283                                 (bitconvert
11284                                  (From.VT (OpNode From.RC:$src1,
11285                                                   (From.LdFrag addr:$src2),
11286                                           timm:$src3))),
11287                                 To.RC:$src0)),
11288            (!cast<Instruction>(OpcodeStr#"rmik") To.RC:$src0, To.KRCWM:$mask,
11289                                                  To.RC:$src1, addr:$src2,
11290                                                  (ImmXForm timm:$src3))>;
11291
11292  def : Pat<(To.VT (vselect_mask To.KRCWM:$mask,
11293                                 (bitconvert
11294                                  (From.VT (OpNode From.RC:$src1,
11295                                                   (From.LdFrag addr:$src2),
11296                                           timm:$src3))),
11297                                 To.ImmAllZerosV)),
11298            (!cast<Instruction>(OpcodeStr#"rmikz") To.KRCWM:$mask,
11299                                                   To.RC:$src1, addr:$src2,
11300                                                   (ImmXForm timm:$src3))>;
11301}
11302
11303multiclass avx512_vpalign_mask_lowering_mb<string OpcodeStr, SDNode OpNode,
11304                                           X86VectorVTInfo From,
11305                                           X86VectorVTInfo To,
11306                                           SDNodeXForm ImmXForm> :
11307      avx512_vpalign_mask_lowering<OpcodeStr, OpNode, From, To, ImmXForm> {
11308  def : Pat<(From.VT (OpNode From.RC:$src1,
11309                             (bitconvert (To.VT (To.BroadcastLdFrag addr:$src2))),
11310                             timm:$src3)),
11311            (!cast<Instruction>(OpcodeStr#"rmbi") To.RC:$src1, addr:$src2,
11312                                                  (ImmXForm timm:$src3))>;
11313
11314  def : Pat<(To.VT (vselect_mask To.KRCWM:$mask,
11315                                 (bitconvert
11316                                  (From.VT (OpNode From.RC:$src1,
11317                                           (bitconvert
11318                                            (To.VT (To.BroadcastLdFrag addr:$src2))),
11319                                           timm:$src3))),
11320                                 To.RC:$src0)),
11321            (!cast<Instruction>(OpcodeStr#"rmbik") To.RC:$src0, To.KRCWM:$mask,
11322                                                   To.RC:$src1, addr:$src2,
11323                                                   (ImmXForm timm:$src3))>;
11324
11325  def : Pat<(To.VT (vselect_mask To.KRCWM:$mask,
11326                                 (bitconvert
11327                                  (From.VT (OpNode From.RC:$src1,
11328                                           (bitconvert
11329                                            (To.VT (To.BroadcastLdFrag addr:$src2))),
11330                                           timm:$src3))),
11331                                 To.ImmAllZerosV)),
11332            (!cast<Instruction>(OpcodeStr#"rmbikz") To.KRCWM:$mask,
11333                                                    To.RC:$src1, addr:$src2,
11334                                                    (ImmXForm timm:$src3))>;
11335}
11336
11337let Predicates = [HasAVX512] in {
11338  // For 512-bit we lower to the widest element type we can. So we only need
11339  // to handle converting valignq to valignd.
11340  defm : avx512_vpalign_mask_lowering_mb<"VALIGNDZ", X86VAlign, v8i64_info,
11341                                         v16i32_info, ValignqImm32XForm>;
11342}
11343
11344let Predicates = [HasVLX] in {
11345  // For 128-bit we lower to the widest element type we can. So we only need
11346  // to handle converting valignq to valignd.
11347  defm : avx512_vpalign_mask_lowering_mb<"VALIGNDZ128", X86VAlign, v2i64x_info,
11348                                         v4i32x_info, ValignqImm32XForm>;
11349  // For 256-bit we lower to the widest element type we can. So we only need
11350  // to handle converting valignq to valignd.
11351  defm : avx512_vpalign_mask_lowering_mb<"VALIGNDZ256", X86VAlign, v4i64x_info,
11352                                         v8i32x_info, ValignqImm32XForm>;
11353}
11354
11355let Predicates = [HasVLX, HasBWI] in {
11356  // We can turn 128 and 256 bit VALIGND/VALIGNQ into VPALIGNR.
11357  defm : avx512_vpalign_mask_lowering<"VPALIGNRZ128", X86VAlign, v2i64x_info,
11358                                      v16i8x_info, ValignqImm8XForm>;
11359  defm : avx512_vpalign_mask_lowering<"VPALIGNRZ128", X86VAlign, v4i32x_info,
11360                                      v16i8x_info, ValigndImm8XForm>;
11361}
11362
11363defm VDBPSADBW: avx512_common_3Op_rm_imm8<0x42, X86dbpsadbw, "vdbpsadbw",
11364                SchedWritePSADBW, avx512vl_i16_info, avx512vl_i8_info>,
11365                EVEX_CD8<8, CD8VF>, NotEVEX2VEXConvertible;
11366
11367multiclass avx512_unary_rm<bits<8> opc, string OpcodeStr, SDNode OpNode,
11368                           X86FoldableSchedWrite sched, X86VectorVTInfo _> {
11369  let ExeDomain = _.ExeDomain in {
11370  defm rr : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
11371                    (ins _.RC:$src1), OpcodeStr,
11372                    "$src1", "$src1",
11373                    (_.VT (OpNode (_.VT _.RC:$src1)))>, EVEX, AVX5128IBase,
11374                    Sched<[sched]>;
11375
11376  defm rm : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
11377                  (ins _.MemOp:$src1), OpcodeStr,
11378                  "$src1", "$src1",
11379                  (_.VT (OpNode (_.VT (bitconvert (_.LdFrag addr:$src1)))))>,
11380            EVEX, AVX5128IBase, EVEX_CD8<_.EltSize, CD8VF>,
11381            Sched<[sched.Folded]>;
11382  }
11383}
11384
11385multiclass avx512_unary_rmb<bits<8> opc, string OpcodeStr, SDNode OpNode,
11386                            X86FoldableSchedWrite sched, X86VectorVTInfo _> :
11387           avx512_unary_rm<opc, OpcodeStr, OpNode, sched, _> {
11388  defm rmb : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
11389                  (ins _.ScalarMemOp:$src1), OpcodeStr,
11390                  "${src1}"#_.BroadcastStr,
11391                  "${src1}"#_.BroadcastStr,
11392                  (_.VT (OpNode (_.VT (_.BroadcastLdFrag addr:$src1))))>,
11393             EVEX, AVX5128IBase, EVEX_B, EVEX_CD8<_.EltSize, CD8VF>,
11394             Sched<[sched.Folded]>;
11395}
11396
11397multiclass avx512_unary_rm_vl<bits<8> opc, string OpcodeStr, SDNode OpNode,
11398                              X86SchedWriteWidths sched,
11399                              AVX512VLVectorVTInfo VTInfo, Predicate prd> {
11400  let Predicates = [prd] in
11401    defm Z : avx512_unary_rm<opc, OpcodeStr, OpNode, sched.ZMM, VTInfo.info512>,
11402                             EVEX_V512;
11403
11404  let Predicates = [prd, HasVLX] in {
11405    defm Z256 : avx512_unary_rm<opc, OpcodeStr, OpNode, sched.YMM, VTInfo.info256>,
11406                              EVEX_V256;
11407    defm Z128 : avx512_unary_rm<opc, OpcodeStr, OpNode, sched.XMM, VTInfo.info128>,
11408                              EVEX_V128;
11409  }
11410}
11411
11412multiclass avx512_unary_rmb_vl<bits<8> opc, string OpcodeStr, SDNode OpNode,
11413                               X86SchedWriteWidths sched, AVX512VLVectorVTInfo VTInfo,
11414                               Predicate prd> {
11415  let Predicates = [prd] in
11416    defm Z : avx512_unary_rmb<opc, OpcodeStr, OpNode, sched.ZMM, VTInfo.info512>,
11417                              EVEX_V512;
11418
11419  let Predicates = [prd, HasVLX] in {
11420    defm Z256 : avx512_unary_rmb<opc, OpcodeStr, OpNode, sched.YMM, VTInfo.info256>,
11421                                 EVEX_V256;
11422    defm Z128 : avx512_unary_rmb<opc, OpcodeStr, OpNode, sched.XMM, VTInfo.info128>,
11423                                 EVEX_V128;
11424  }
11425}
11426
11427multiclass avx512_unary_rm_vl_dq<bits<8> opc_d, bits<8> opc_q, string OpcodeStr,
11428                                 SDNode OpNode, X86SchedWriteWidths sched,
11429                                 Predicate prd> {
11430  defm Q : avx512_unary_rmb_vl<opc_q, OpcodeStr#"q", OpNode, sched,
11431                               avx512vl_i64_info, prd>, VEX_W;
11432  defm D : avx512_unary_rmb_vl<opc_d, OpcodeStr#"d", OpNode, sched,
11433                               avx512vl_i32_info, prd>;
11434}
11435
11436multiclass avx512_unary_rm_vl_bw<bits<8> opc_b, bits<8> opc_w, string OpcodeStr,
11437                                 SDNode OpNode, X86SchedWriteWidths sched,
11438                                 Predicate prd> {
11439  defm W : avx512_unary_rm_vl<opc_w, OpcodeStr#"w", OpNode, sched,
11440                              avx512vl_i16_info, prd>, VEX_WIG;
11441  defm B : avx512_unary_rm_vl<opc_b, OpcodeStr#"b", OpNode, sched,
11442                              avx512vl_i8_info, prd>, VEX_WIG;
11443}
11444
11445multiclass avx512_unary_rm_vl_all<bits<8> opc_b, bits<8> opc_w,
11446                                  bits<8> opc_d, bits<8> opc_q,
11447                                  string OpcodeStr, SDNode OpNode,
11448                                  X86SchedWriteWidths sched> {
11449  defm NAME : avx512_unary_rm_vl_dq<opc_d, opc_q, OpcodeStr, OpNode, sched,
11450                                    HasAVX512>,
11451              avx512_unary_rm_vl_bw<opc_b, opc_w, OpcodeStr, OpNode, sched,
11452                                    HasBWI>;
11453}
11454
11455defm VPABS : avx512_unary_rm_vl_all<0x1C, 0x1D, 0x1E, 0x1F, "vpabs", abs,
11456                                    SchedWriteVecALU>;
11457
11458// VPABS: Use 512bit version to implement 128/256 bit in case NoVLX.
11459let Predicates = [HasAVX512, NoVLX] in {
11460  def : Pat<(v4i64 (abs VR256X:$src)),
11461            (EXTRACT_SUBREG
11462                (VPABSQZrr
11463                    (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src, sub_ymm)),
11464             sub_ymm)>;
11465  def : Pat<(v2i64 (abs VR128X:$src)),
11466            (EXTRACT_SUBREG
11467                (VPABSQZrr
11468                    (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm)),
11469             sub_xmm)>;
11470}
11471
11472// Use 512bit version to implement 128/256 bit.
11473multiclass avx512_unary_lowering<string InstrStr, SDNode OpNode,
11474                                 AVX512VLVectorVTInfo _, Predicate prd> {
11475  let Predicates = [prd, NoVLX] in {
11476    def : Pat<(_.info256.VT (OpNode (_.info256.VT _.info256.RC:$src1))),
11477              (EXTRACT_SUBREG
11478                (!cast<Instruction>(InstrStr # "Zrr")
11479                  (INSERT_SUBREG(_.info512.VT(IMPLICIT_DEF)),
11480                                 _.info256.RC:$src1,
11481                                 _.info256.SubRegIdx)),
11482              _.info256.SubRegIdx)>;
11483
11484    def : Pat<(_.info128.VT (OpNode (_.info128.VT _.info128.RC:$src1))),
11485              (EXTRACT_SUBREG
11486                (!cast<Instruction>(InstrStr # "Zrr")
11487                  (INSERT_SUBREG(_.info512.VT(IMPLICIT_DEF)),
11488                                 _.info128.RC:$src1,
11489                                 _.info128.SubRegIdx)),
11490              _.info128.SubRegIdx)>;
11491  }
11492}
11493
11494defm VPLZCNT    : avx512_unary_rm_vl_dq<0x44, 0x44, "vplzcnt", ctlz,
11495                                        SchedWriteVecIMul, HasCDI>;
11496
11497// FIXME: Is there a better scheduler class for VPCONFLICT?
11498defm VPCONFLICT : avx512_unary_rm_vl_dq<0xC4, 0xC4, "vpconflict", X86Conflict,
11499                                        SchedWriteVecALU, HasCDI>;
11500
11501// VPLZCNT: Use 512bit version to implement 128/256 bit in case NoVLX.
11502defm : avx512_unary_lowering<"VPLZCNTQ", ctlz, avx512vl_i64_info, HasCDI>;
11503defm : avx512_unary_lowering<"VPLZCNTD", ctlz, avx512vl_i32_info, HasCDI>;
11504
11505//===---------------------------------------------------------------------===//
11506// Counts number of ones - VPOPCNTD and VPOPCNTQ
11507//===---------------------------------------------------------------------===//
11508
11509// FIXME: Is there a better scheduler class for VPOPCNTD/VPOPCNTQ?
11510defm VPOPCNT : avx512_unary_rm_vl_dq<0x55, 0x55, "vpopcnt", ctpop,
11511                                     SchedWriteVecALU, HasVPOPCNTDQ>;
11512
11513defm : avx512_unary_lowering<"VPOPCNTQ", ctpop, avx512vl_i64_info, HasVPOPCNTDQ>;
11514defm : avx512_unary_lowering<"VPOPCNTD", ctpop, avx512vl_i32_info, HasVPOPCNTDQ>;
11515
11516//===---------------------------------------------------------------------===//
11517// Replicate Single FP - MOVSHDUP and MOVSLDUP
11518//===---------------------------------------------------------------------===//
11519
11520multiclass avx512_replicate<bits<8> opc, string OpcodeStr, SDNode OpNode,
11521                            X86SchedWriteWidths sched> {
11522  defm NAME:       avx512_unary_rm_vl<opc, OpcodeStr, OpNode, sched,
11523                                      avx512vl_f32_info, HasAVX512>, XS;
11524}
11525
11526defm VMOVSHDUP : avx512_replicate<0x16, "vmovshdup", X86Movshdup,
11527                                  SchedWriteFShuffle>;
11528defm VMOVSLDUP : avx512_replicate<0x12, "vmovsldup", X86Movsldup,
11529                                  SchedWriteFShuffle>;
11530
11531//===----------------------------------------------------------------------===//
11532// AVX-512 - MOVDDUP
11533//===----------------------------------------------------------------------===//
11534
11535multiclass avx512_movddup_128<bits<8> opc, string OpcodeStr,
11536                              X86FoldableSchedWrite sched, X86VectorVTInfo _> {
11537  let ExeDomain = _.ExeDomain in {
11538  defm rr : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
11539                   (ins _.RC:$src), OpcodeStr, "$src", "$src",
11540                   (_.VT (X86VBroadcast (_.VT _.RC:$src)))>, EVEX,
11541                   Sched<[sched]>;
11542  defm rm : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
11543                 (ins _.ScalarMemOp:$src), OpcodeStr, "$src", "$src",
11544                 (_.VT (_.BroadcastLdFrag addr:$src))>,
11545                 EVEX, EVEX_CD8<_.EltSize, CD8VH>,
11546                 Sched<[sched.Folded]>;
11547  }
11548}
11549
11550multiclass avx512_movddup_common<bits<8> opc, string OpcodeStr,
11551                                 X86SchedWriteWidths sched, AVX512VLVectorVTInfo VTInfo> {
11552  defm Z : avx512_unary_rm<opc, OpcodeStr, X86Movddup, sched.ZMM,
11553                           VTInfo.info512>, EVEX_V512;
11554
11555  let Predicates = [HasAVX512, HasVLX] in {
11556    defm Z256 : avx512_unary_rm<opc, OpcodeStr, X86Movddup, sched.YMM,
11557                                VTInfo.info256>, EVEX_V256;
11558    defm Z128 : avx512_movddup_128<opc, OpcodeStr, sched.XMM,
11559                                   VTInfo.info128>, EVEX_V128;
11560  }
11561}
11562
11563multiclass avx512_movddup<bits<8> opc, string OpcodeStr,
11564                          X86SchedWriteWidths sched> {
11565  defm NAME:      avx512_movddup_common<opc, OpcodeStr, sched,
11566                                        avx512vl_f64_info>, XD, VEX_W;
11567}
11568
11569defm VMOVDDUP : avx512_movddup<0x12, "vmovddup", SchedWriteFShuffle>;
11570
11571let Predicates = [HasVLX] in {
11572def : Pat<(v2f64 (X86VBroadcast f64:$src)),
11573          (VMOVDDUPZ128rr (v2f64 (COPY_TO_REGCLASS FR64X:$src, VR128X)))>;
11574
11575def : Pat<(vselect_mask (v2i1 VK2WM:$mask), (v2f64 (X86VBroadcast f64:$src)),
11576                        (v2f64 VR128X:$src0)),
11577          (VMOVDDUPZ128rrk VR128X:$src0, VK2WM:$mask,
11578                           (v2f64 (COPY_TO_REGCLASS FR64X:$src, VR128X)))>;
11579def : Pat<(vselect_mask (v2i1 VK2WM:$mask), (v2f64 (X86VBroadcast f64:$src)),
11580                        immAllZerosV),
11581          (VMOVDDUPZ128rrkz VK2WM:$mask, (v2f64 (COPY_TO_REGCLASS FR64X:$src, VR128X)))>;
11582}
11583
11584//===----------------------------------------------------------------------===//
11585// AVX-512 - Unpack Instructions
11586//===----------------------------------------------------------------------===//
11587
11588let Uses = []<Register>, mayRaiseFPException = 0 in {
11589defm VUNPCKH : avx512_fp_binop_p<0x15, "vunpckh", X86Unpckh, X86Unpckh, HasAVX512,
11590                                 SchedWriteFShuffleSizes, 0, 1>;
11591defm VUNPCKL : avx512_fp_binop_p<0x14, "vunpckl", X86Unpckl, X86Unpckl, HasAVX512,
11592                                 SchedWriteFShuffleSizes>;
11593}
11594
11595defm VPUNPCKLBW : avx512_binop_rm_vl_b<0x60, "vpunpcklbw", X86Unpckl,
11596                                       SchedWriteShuffle, HasBWI>;
11597defm VPUNPCKHBW : avx512_binop_rm_vl_b<0x68, "vpunpckhbw", X86Unpckh,
11598                                       SchedWriteShuffle, HasBWI>;
11599defm VPUNPCKLWD : avx512_binop_rm_vl_w<0x61, "vpunpcklwd", X86Unpckl,
11600                                       SchedWriteShuffle, HasBWI>;
11601defm VPUNPCKHWD : avx512_binop_rm_vl_w<0x69, "vpunpckhwd", X86Unpckh,
11602                                       SchedWriteShuffle, HasBWI>;
11603
11604defm VPUNPCKLDQ : avx512_binop_rm_vl_d<0x62, "vpunpckldq", X86Unpckl,
11605                                       SchedWriteShuffle, HasAVX512>;
11606defm VPUNPCKHDQ : avx512_binop_rm_vl_d<0x6A, "vpunpckhdq", X86Unpckh,
11607                                       SchedWriteShuffle, HasAVX512>;
11608defm VPUNPCKLQDQ : avx512_binop_rm_vl_q<0x6C, "vpunpcklqdq", X86Unpckl,
11609                                        SchedWriteShuffle, HasAVX512>;
11610defm VPUNPCKHQDQ : avx512_binop_rm_vl_q<0x6D, "vpunpckhqdq", X86Unpckh,
11611                                        SchedWriteShuffle, HasAVX512>;
11612
11613//===----------------------------------------------------------------------===//
11614// AVX-512 - Extract & Insert Integer Instructions
11615//===----------------------------------------------------------------------===//
11616
11617multiclass avx512_extract_elt_bw_m<bits<8> opc, string OpcodeStr, SDNode OpNode,
11618                                                            X86VectorVTInfo _> {
11619  def mr : AVX512Ii8<opc, MRMDestMem, (outs),
11620              (ins _.ScalarMemOp:$dst, _.RC:$src1, u8imm:$src2),
11621              OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}",
11622              [(store (_.EltVT (trunc (OpNode (_.VT _.RC:$src1), timm:$src2))),
11623                       addr:$dst)]>,
11624              EVEX, EVEX_CD8<_.EltSize, CD8VT1>, Sched<[WriteVecExtractSt]>;
11625}
11626
11627multiclass avx512_extract_elt_b<string OpcodeStr, X86VectorVTInfo _> {
11628  let Predicates = [HasBWI] in {
11629    def rr : AVX512Ii8<0x14, MRMDestReg, (outs GR32orGR64:$dst),
11630                  (ins _.RC:$src1, u8imm:$src2),
11631                  OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}",
11632                  [(set GR32orGR64:$dst,
11633                        (X86pextrb (_.VT _.RC:$src1), timm:$src2))]>,
11634                  EVEX, TAPD, Sched<[WriteVecExtract]>;
11635
11636    defm NAME : avx512_extract_elt_bw_m<0x14, OpcodeStr, X86pextrb, _>, TAPD;
11637  }
11638}
11639
11640multiclass avx512_extract_elt_w<string OpcodeStr, X86VectorVTInfo _> {
11641  let Predicates = [HasBWI] in {
11642    def rr : AVX512Ii8<0xC5, MRMSrcReg, (outs GR32orGR64:$dst),
11643                  (ins _.RC:$src1, u8imm:$src2),
11644                  OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}",
11645                  [(set GR32orGR64:$dst,
11646                        (X86pextrw (_.VT _.RC:$src1), timm:$src2))]>,
11647                  EVEX, PD, Sched<[WriteVecExtract]>;
11648
11649    let hasSideEffects = 0, isCodeGenOnly = 1, ForceDisassemble = 1 in
11650    def rr_REV : AVX512Ii8<0x15, MRMDestReg, (outs GR32orGR64:$dst),
11651                   (ins _.RC:$src1, u8imm:$src2),
11652                   OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}", []>,
11653                   EVEX, TAPD, FoldGenData<NAME#rr>,
11654                   Sched<[WriteVecExtract]>;
11655
11656    defm NAME : avx512_extract_elt_bw_m<0x15, OpcodeStr, X86pextrw, _>, TAPD;
11657  }
11658}
11659
11660multiclass avx512_extract_elt_dq<string OpcodeStr, X86VectorVTInfo _,
11661                                                            RegisterClass GRC> {
11662  let Predicates = [HasDQI] in {
11663    def rr : AVX512Ii8<0x16, MRMDestReg, (outs GRC:$dst),
11664                  (ins _.RC:$src1, u8imm:$src2),
11665                  OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}",
11666                  [(set GRC:$dst,
11667                      (extractelt (_.VT _.RC:$src1), imm:$src2))]>,
11668                  EVEX, TAPD, Sched<[WriteVecExtract]>;
11669
11670    def mr : AVX512Ii8<0x16, MRMDestMem, (outs),
11671                (ins _.ScalarMemOp:$dst, _.RC:$src1, u8imm:$src2),
11672                OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}",
11673                [(store (extractelt (_.VT _.RC:$src1),
11674                                    imm:$src2),addr:$dst)]>,
11675                EVEX, EVEX_CD8<_.EltSize, CD8VT1>, TAPD,
11676                Sched<[WriteVecExtractSt]>;
11677  }
11678}
11679
11680defm VPEXTRBZ : avx512_extract_elt_b<"vpextrb", v16i8x_info>, VEX_WIG;
11681defm VPEXTRWZ : avx512_extract_elt_w<"vpextrw", v8i16x_info>, VEX_WIG;
11682defm VPEXTRDZ : avx512_extract_elt_dq<"vpextrd", v4i32x_info, GR32>;
11683defm VPEXTRQZ : avx512_extract_elt_dq<"vpextrq", v2i64x_info, GR64>, VEX_W;
11684
11685multiclass avx512_insert_elt_m<bits<8> opc, string OpcodeStr, SDNode OpNode,
11686                                            X86VectorVTInfo _, PatFrag LdFrag,
11687                                            SDPatternOperator immoperator> {
11688  def rm : AVX512Ii8<opc, MRMSrcMem, (outs _.RC:$dst),
11689      (ins _.RC:$src1,  _.ScalarMemOp:$src2, u8imm:$src3),
11690      OpcodeStr#"\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}",
11691      [(set _.RC:$dst,
11692          (_.VT (OpNode _.RC:$src1, (LdFrag addr:$src2), immoperator:$src3)))]>,
11693      EVEX_4V, EVEX_CD8<_.EltSize, CD8VT1>, Sched<[WriteVecInsert.Folded, WriteVecInsert.ReadAfterFold]>;
11694}
11695
11696multiclass avx512_insert_elt_bw<bits<8> opc, string OpcodeStr, SDNode OpNode,
11697                                            X86VectorVTInfo _, PatFrag LdFrag> {
11698  let Predicates = [HasBWI] in {
11699    def rr : AVX512Ii8<opc, MRMSrcReg, (outs _.RC:$dst),
11700        (ins _.RC:$src1, GR32orGR64:$src2, u8imm:$src3),
11701        OpcodeStr#"\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}",
11702        [(set _.RC:$dst,
11703            (OpNode _.RC:$src1, GR32orGR64:$src2, timm:$src3))]>, EVEX_4V,
11704        Sched<[WriteVecInsert]>;
11705
11706    defm NAME : avx512_insert_elt_m<opc, OpcodeStr, OpNode, _, LdFrag, timm>;
11707  }
11708}
11709
11710multiclass avx512_insert_elt_dq<bits<8> opc, string OpcodeStr,
11711                                         X86VectorVTInfo _, RegisterClass GRC> {
11712  let Predicates = [HasDQI] in {
11713    def rr : AVX512Ii8<opc, MRMSrcReg, (outs _.RC:$dst),
11714        (ins _.RC:$src1, GRC:$src2, u8imm:$src3),
11715        OpcodeStr#"\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}",
11716        [(set _.RC:$dst,
11717            (_.VT (insertelt _.RC:$src1, GRC:$src2, imm:$src3)))]>,
11718        EVEX_4V, TAPD, Sched<[WriteVecInsert]>;
11719
11720    defm NAME : avx512_insert_elt_m<opc, OpcodeStr, insertelt, _,
11721                                    _.ScalarLdFrag, imm>, TAPD;
11722  }
11723}
11724
11725defm VPINSRBZ : avx512_insert_elt_bw<0x20, "vpinsrb", X86pinsrb, v16i8x_info,
11726                                     extloadi8>, TAPD, VEX_WIG;
11727defm VPINSRWZ : avx512_insert_elt_bw<0xC4, "vpinsrw", X86pinsrw, v8i16x_info,
11728                                     extloadi16>, PD, VEX_WIG;
11729defm VPINSRDZ : avx512_insert_elt_dq<0x22, "vpinsrd", v4i32x_info, GR32>;
11730defm VPINSRQZ : avx512_insert_elt_dq<0x22, "vpinsrq", v2i64x_info, GR64>, VEX_W;
11731
11732let Predicates = [HasAVX512, NoBWI] in {
11733  def : Pat<(X86pinsrb VR128:$src1,
11734                       (i32 (anyext (i8 (bitconvert v8i1:$src2)))),
11735                       timm:$src3),
11736            (VPINSRBrr VR128:$src1, (i32 (COPY_TO_REGCLASS VK8:$src2, GR32)),
11737                       timm:$src3)>;
11738}
11739
11740let Predicates = [HasBWI] in {
11741  def : Pat<(X86pinsrb VR128:$src1, (i32 (anyext (i8 GR8:$src2))), timm:$src3),
11742            (VPINSRBZrr VR128:$src1, (INSERT_SUBREG (i32 (IMPLICIT_DEF)),
11743                        GR8:$src2, sub_8bit), timm:$src3)>;
11744  def : Pat<(X86pinsrb VR128:$src1,
11745                       (i32 (anyext (i8 (bitconvert v8i1:$src2)))),
11746                       timm:$src3),
11747            (VPINSRBZrr VR128:$src1, (i32 (COPY_TO_REGCLASS VK8:$src2, GR32)),
11748                        timm:$src3)>;
11749}
11750
11751// Always select FP16 instructions if available.
11752let Predicates = [HasBWI], AddedComplexity = -10 in {
11753  def : Pat<(f16 (load addr:$src)), (COPY_TO_REGCLASS (VPINSRWZrm (v8i16 (IMPLICIT_DEF)), addr:$src, 0), FR16X)>;
11754  def : Pat<(store f16:$src, addr:$dst), (VPEXTRWZmr addr:$dst, (v8i16 (COPY_TO_REGCLASS FR16:$src, VR128)), 0)>;
11755  def : Pat<(i16 (bitconvert f16:$src)), (EXTRACT_SUBREG (VPEXTRWZrr (v8i16 (COPY_TO_REGCLASS FR16X:$src, VR128X)), 0), sub_16bit)>;
11756  def : Pat<(f16 (bitconvert i16:$src)), (COPY_TO_REGCLASS (VPINSRWZrr (v8i16 (IMPLICIT_DEF)), (INSERT_SUBREG (IMPLICIT_DEF), GR16:$src, sub_16bit), 0), FR16X)>;
11757}
11758
11759//===----------------------------------------------------------------------===//
11760// VSHUFPS - VSHUFPD Operations
11761//===----------------------------------------------------------------------===//
11762
11763multiclass avx512_shufp<string OpcodeStr, AVX512VLVectorVTInfo VTInfo_FP>{
11764  defm NAME: avx512_common_3Op_imm8<OpcodeStr, VTInfo_FP, 0xC6, X86Shufp,
11765                                    SchedWriteFShuffle>,
11766                                    EVEX_CD8<VTInfo_FP.info512.EltSize, CD8VF>,
11767                                    AVX512AIi8Base, EVEX_4V;
11768}
11769
11770defm VSHUFPS: avx512_shufp<"vshufps", avx512vl_f32_info>, PS;
11771defm VSHUFPD: avx512_shufp<"vshufpd", avx512vl_f64_info>, PD, VEX_W;
11772
11773//===----------------------------------------------------------------------===//
11774// AVX-512 - Byte shift Left/Right
11775//===----------------------------------------------------------------------===//
11776
11777multiclass avx512_shift_packed<bits<8> opc, SDNode OpNode, Format MRMr,
11778                               Format MRMm, string OpcodeStr,
11779                               X86FoldableSchedWrite sched, X86VectorVTInfo _>{
11780  def ri : AVX512<opc, MRMr,
11781             (outs _.RC:$dst), (ins _.RC:$src1, u8imm:$src2),
11782             !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
11783             [(set _.RC:$dst,(_.VT (OpNode _.RC:$src1, (i8 timm:$src2))))]>,
11784             Sched<[sched]>;
11785  def mi : AVX512<opc, MRMm,
11786           (outs _.RC:$dst), (ins _.MemOp:$src1, u8imm:$src2),
11787           !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
11788           [(set _.RC:$dst,(_.VT (OpNode
11789                                 (_.VT (bitconvert (_.LdFrag addr:$src1))),
11790                                 (i8 timm:$src2))))]>,
11791           Sched<[sched.Folded, sched.ReadAfterFold]>;
11792}
11793
11794multiclass avx512_shift_packed_all<bits<8> opc, SDNode OpNode, Format MRMr,
11795                                   Format MRMm, string OpcodeStr,
11796                                   X86SchedWriteWidths sched, Predicate prd>{
11797  let Predicates = [prd] in
11798    defm Z : avx512_shift_packed<opc, OpNode, MRMr, MRMm, OpcodeStr,
11799                                 sched.ZMM, v64i8_info>, EVEX_V512;
11800  let Predicates = [prd, HasVLX] in {
11801    defm Z256 : avx512_shift_packed<opc, OpNode, MRMr, MRMm, OpcodeStr,
11802                                    sched.YMM, v32i8x_info>, EVEX_V256;
11803    defm Z128 : avx512_shift_packed<opc, OpNode, MRMr, MRMm, OpcodeStr,
11804                                    sched.XMM, v16i8x_info>, EVEX_V128;
11805  }
11806}
11807defm VPSLLDQ : avx512_shift_packed_all<0x73, X86vshldq, MRM7r, MRM7m, "vpslldq",
11808                                       SchedWriteShuffle, HasBWI>,
11809                                       AVX512PDIi8Base, EVEX_4V, VEX_WIG;
11810defm VPSRLDQ : avx512_shift_packed_all<0x73, X86vshrdq, MRM3r, MRM3m, "vpsrldq",
11811                                       SchedWriteShuffle, HasBWI>,
11812                                       AVX512PDIi8Base, EVEX_4V, VEX_WIG;
11813
11814multiclass avx512_psadbw_packed<bits<8> opc, SDNode OpNode,
11815                                string OpcodeStr, X86FoldableSchedWrite sched,
11816                                X86VectorVTInfo _dst, X86VectorVTInfo _src> {
11817  let isCommutable = 1 in
11818  def rr : AVX512BI<opc, MRMSrcReg,
11819             (outs _dst.RC:$dst), (ins _src.RC:$src1, _src.RC:$src2),
11820             !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
11821             [(set _dst.RC:$dst,(_dst.VT
11822                                (OpNode (_src.VT _src.RC:$src1),
11823                                        (_src.VT _src.RC:$src2))))]>,
11824             Sched<[sched]>;
11825  def rm : AVX512BI<opc, MRMSrcMem,
11826           (outs _dst.RC:$dst), (ins _src.RC:$src1, _src.MemOp:$src2),
11827           !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
11828           [(set _dst.RC:$dst,(_dst.VT
11829                              (OpNode (_src.VT _src.RC:$src1),
11830                              (_src.VT (bitconvert
11831                                        (_src.LdFrag addr:$src2))))))]>,
11832           Sched<[sched.Folded, sched.ReadAfterFold]>;
11833}
11834
11835multiclass avx512_psadbw_packed_all<bits<8> opc, SDNode OpNode,
11836                                    string OpcodeStr, X86SchedWriteWidths sched,
11837                                    Predicate prd> {
11838  let Predicates = [prd] in
11839    defm Z : avx512_psadbw_packed<opc, OpNode, OpcodeStr, sched.ZMM,
11840                                  v8i64_info, v64i8_info>, EVEX_V512;
11841  let Predicates = [prd, HasVLX] in {
11842    defm Z256 : avx512_psadbw_packed<opc, OpNode, OpcodeStr, sched.YMM,
11843                                     v4i64x_info, v32i8x_info>, EVEX_V256;
11844    defm Z128 : avx512_psadbw_packed<opc, OpNode, OpcodeStr, sched.XMM,
11845                                     v2i64x_info, v16i8x_info>, EVEX_V128;
11846  }
11847}
11848
11849defm VPSADBW : avx512_psadbw_packed_all<0xf6, X86psadbw, "vpsadbw",
11850                                        SchedWritePSADBW, HasBWI>, EVEX_4V, VEX_WIG;
11851
11852// Transforms to swizzle an immediate to enable better matching when
11853// memory operand isn't in the right place.
11854def VPTERNLOG321_imm8 : SDNodeXForm<timm, [{
11855  // Convert a VPTERNLOG immediate by swapping operand 0 and operand 2.
11856  uint8_t Imm = N->getZExtValue();
11857  // Swap bits 1/4 and 3/6.
11858  uint8_t NewImm = Imm & 0xa5;
11859  if (Imm & 0x02) NewImm |= 0x10;
11860  if (Imm & 0x10) NewImm |= 0x02;
11861  if (Imm & 0x08) NewImm |= 0x40;
11862  if (Imm & 0x40) NewImm |= 0x08;
11863  return getI8Imm(NewImm, SDLoc(N));
11864}]>;
11865def VPTERNLOG213_imm8 : SDNodeXForm<timm, [{
11866  // Convert a VPTERNLOG immediate by swapping operand 1 and operand 2.
11867  uint8_t Imm = N->getZExtValue();
11868  // Swap bits 2/4 and 3/5.
11869  uint8_t NewImm = Imm & 0xc3;
11870  if (Imm & 0x04) NewImm |= 0x10;
11871  if (Imm & 0x10) NewImm |= 0x04;
11872  if (Imm & 0x08) NewImm |= 0x20;
11873  if (Imm & 0x20) NewImm |= 0x08;
11874  return getI8Imm(NewImm, SDLoc(N));
11875}]>;
11876def VPTERNLOG132_imm8 : SDNodeXForm<timm, [{
11877  // Convert a VPTERNLOG immediate by swapping operand 1 and operand 2.
11878  uint8_t Imm = N->getZExtValue();
11879  // Swap bits 1/2 and 5/6.
11880  uint8_t NewImm = Imm & 0x99;
11881  if (Imm & 0x02) NewImm |= 0x04;
11882  if (Imm & 0x04) NewImm |= 0x02;
11883  if (Imm & 0x20) NewImm |= 0x40;
11884  if (Imm & 0x40) NewImm |= 0x20;
11885  return getI8Imm(NewImm, SDLoc(N));
11886}]>;
11887def VPTERNLOG231_imm8 : SDNodeXForm<timm, [{
11888  // Convert a VPTERNLOG immediate by moving operand 1 to the end.
11889  uint8_t Imm = N->getZExtValue();
11890  // Move bits 1->2, 2->4, 3->6, 4->1, 5->3, 6->5
11891  uint8_t NewImm = Imm & 0x81;
11892  if (Imm & 0x02) NewImm |= 0x04;
11893  if (Imm & 0x04) NewImm |= 0x10;
11894  if (Imm & 0x08) NewImm |= 0x40;
11895  if (Imm & 0x10) NewImm |= 0x02;
11896  if (Imm & 0x20) NewImm |= 0x08;
11897  if (Imm & 0x40) NewImm |= 0x20;
11898  return getI8Imm(NewImm, SDLoc(N));
11899}]>;
11900def VPTERNLOG312_imm8 : SDNodeXForm<timm, [{
11901  // Convert a VPTERNLOG immediate by moving operand 2 to the beginning.
11902  uint8_t Imm = N->getZExtValue();
11903  // Move bits 1->4, 2->1, 3->5, 4->2, 5->6, 6->3
11904  uint8_t NewImm = Imm & 0x81;
11905  if (Imm & 0x02) NewImm |= 0x10;
11906  if (Imm & 0x04) NewImm |= 0x02;
11907  if (Imm & 0x08) NewImm |= 0x20;
11908  if (Imm & 0x10) NewImm |= 0x04;
11909  if (Imm & 0x20) NewImm |= 0x40;
11910  if (Imm & 0x40) NewImm |= 0x08;
11911  return getI8Imm(NewImm, SDLoc(N));
11912}]>;
11913
11914multiclass avx512_ternlog<bits<8> opc, string OpcodeStr, SDNode OpNode,
11915                          X86FoldableSchedWrite sched, X86VectorVTInfo _,
11916                          string Name>{
11917  let Constraints = "$src1 = $dst", ExeDomain = _.ExeDomain in {
11918  defm rri : AVX512_maskable_3src<opc, MRMSrcReg, _, (outs _.RC:$dst),
11919                      (ins _.RC:$src2, _.RC:$src3, u8imm:$src4),
11920                      OpcodeStr, "$src4, $src3, $src2", "$src2, $src3, $src4",
11921                      (OpNode (_.VT _.RC:$src1),
11922                              (_.VT _.RC:$src2),
11923                              (_.VT _.RC:$src3),
11924                              (i8 timm:$src4)), 1, 1>,
11925                      AVX512AIi8Base, EVEX_4V, Sched<[sched]>;
11926  defm rmi : AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst),
11927                    (ins _.RC:$src2, _.MemOp:$src3, u8imm:$src4),
11928                    OpcodeStr, "$src4, $src3, $src2", "$src2, $src3, $src4",
11929                    (OpNode (_.VT _.RC:$src1),
11930                            (_.VT _.RC:$src2),
11931                            (_.VT (bitconvert (_.LdFrag addr:$src3))),
11932                            (i8 timm:$src4)), 1, 0>,
11933                    AVX512AIi8Base, EVEX_4V, EVEX_CD8<_.EltSize, CD8VF>,
11934                    Sched<[sched.Folded, sched.ReadAfterFold]>;
11935  defm rmbi : AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst),
11936                    (ins _.RC:$src2, _.ScalarMemOp:$src3, u8imm:$src4),
11937                    OpcodeStr, "$src4, ${src3}"#_.BroadcastStr#", $src2",
11938                    "$src2, ${src3}"#_.BroadcastStr#", $src4",
11939                    (OpNode (_.VT _.RC:$src1),
11940                            (_.VT _.RC:$src2),
11941                            (_.VT (_.BroadcastLdFrag addr:$src3)),
11942                            (i8 timm:$src4)), 1, 0>, EVEX_B,
11943                    AVX512AIi8Base, EVEX_4V, EVEX_CD8<_.EltSize, CD8VF>,
11944                    Sched<[sched.Folded, sched.ReadAfterFold]>;
11945  }// Constraints = "$src1 = $dst"
11946
11947  // Additional patterns for matching passthru operand in other positions.
11948  def : Pat<(_.VT (vselect_mask _.KRCWM:$mask,
11949                   (OpNode _.RC:$src3, _.RC:$src2, _.RC:$src1, (i8 timm:$src4)),
11950                   _.RC:$src1)),
11951            (!cast<Instruction>(Name#_.ZSuffix#rrik) _.RC:$src1, _.KRCWM:$mask,
11952             _.RC:$src2, _.RC:$src3, (VPTERNLOG321_imm8 timm:$src4))>;
11953  def : Pat<(_.VT (vselect_mask _.KRCWM:$mask,
11954                   (OpNode _.RC:$src2, _.RC:$src1, _.RC:$src3, (i8 timm:$src4)),
11955                   _.RC:$src1)),
11956            (!cast<Instruction>(Name#_.ZSuffix#rrik) _.RC:$src1, _.KRCWM:$mask,
11957             _.RC:$src2, _.RC:$src3, (VPTERNLOG213_imm8 timm:$src4))>;
11958
11959  // Additional patterns for matching zero masking with loads in other
11960  // positions.
11961  def : Pat<(_.VT (vselect_mask _.KRCWM:$mask,
11962                   (OpNode (bitconvert (_.LdFrag addr:$src3)),
11963                    _.RC:$src2, _.RC:$src1, (i8 timm:$src4)),
11964                   _.ImmAllZerosV)),
11965            (!cast<Instruction>(Name#_.ZSuffix#rmikz) _.RC:$src1, _.KRCWM:$mask,
11966             _.RC:$src2, addr:$src3, (VPTERNLOG321_imm8 timm:$src4))>;
11967  def : Pat<(_.VT (vselect_mask _.KRCWM:$mask,
11968                   (OpNode _.RC:$src1, (bitconvert (_.LdFrag addr:$src3)),
11969                    _.RC:$src2, (i8 timm:$src4)),
11970                   _.ImmAllZerosV)),
11971            (!cast<Instruction>(Name#_.ZSuffix#rmikz) _.RC:$src1, _.KRCWM:$mask,
11972             _.RC:$src2, addr:$src3, (VPTERNLOG132_imm8 timm:$src4))>;
11973
11974  // Additional patterns for matching masked loads with different
11975  // operand orders.
11976  def : Pat<(_.VT (vselect_mask _.KRCWM:$mask,
11977                   (OpNode _.RC:$src1, (bitconvert (_.LdFrag addr:$src3)),
11978                    _.RC:$src2, (i8 timm:$src4)),
11979                   _.RC:$src1)),
11980            (!cast<Instruction>(Name#_.ZSuffix#rmik) _.RC:$src1, _.KRCWM:$mask,
11981             _.RC:$src2, addr:$src3, (VPTERNLOG132_imm8 timm:$src4))>;
11982  def : Pat<(_.VT (vselect_mask _.KRCWM:$mask,
11983                   (OpNode (bitconvert (_.LdFrag addr:$src3)),
11984                    _.RC:$src2, _.RC:$src1, (i8 timm:$src4)),
11985                   _.RC:$src1)),
11986            (!cast<Instruction>(Name#_.ZSuffix#rmik) _.RC:$src1, _.KRCWM:$mask,
11987             _.RC:$src2, addr:$src3, (VPTERNLOG321_imm8 timm:$src4))>;
11988  def : Pat<(_.VT (vselect_mask _.KRCWM:$mask,
11989                   (OpNode _.RC:$src2, _.RC:$src1,
11990                    (bitconvert (_.LdFrag addr:$src3)), (i8 timm:$src4)),
11991                   _.RC:$src1)),
11992            (!cast<Instruction>(Name#_.ZSuffix#rmik) _.RC:$src1, _.KRCWM:$mask,
11993             _.RC:$src2, addr:$src3, (VPTERNLOG213_imm8 timm:$src4))>;
11994  def : Pat<(_.VT (vselect_mask _.KRCWM:$mask,
11995                   (OpNode _.RC:$src2, (bitconvert (_.LdFrag addr:$src3)),
11996                    _.RC:$src1, (i8 timm:$src4)),
11997                   _.RC:$src1)),
11998            (!cast<Instruction>(Name#_.ZSuffix#rmik) _.RC:$src1, _.KRCWM:$mask,
11999             _.RC:$src2, addr:$src3, (VPTERNLOG231_imm8 timm:$src4))>;
12000  def : Pat<(_.VT (vselect_mask _.KRCWM:$mask,
12001                   (OpNode (bitconvert (_.LdFrag addr:$src3)),
12002                    _.RC:$src1, _.RC:$src2, (i8 timm:$src4)),
12003                   _.RC:$src1)),
12004            (!cast<Instruction>(Name#_.ZSuffix#rmik) _.RC:$src1, _.KRCWM:$mask,
12005             _.RC:$src2, addr:$src3, (VPTERNLOG312_imm8 timm:$src4))>;
12006
12007  // Additional patterns for matching zero masking with broadcasts in other
12008  // positions.
12009  def : Pat<(_.VT (vselect_mask _.KRCWM:$mask,
12010                   (OpNode (_.BroadcastLdFrag addr:$src3),
12011                    _.RC:$src2, _.RC:$src1, (i8 timm:$src4)),
12012                   _.ImmAllZerosV)),
12013            (!cast<Instruction>(Name#_.ZSuffix#rmbikz) _.RC:$src1,
12014             _.KRCWM:$mask, _.RC:$src2, addr:$src3,
12015             (VPTERNLOG321_imm8 timm:$src4))>;
12016  def : Pat<(_.VT (vselect_mask _.KRCWM:$mask,
12017                   (OpNode _.RC:$src1,
12018                    (_.BroadcastLdFrag addr:$src3),
12019                    _.RC:$src2, (i8 timm:$src4)),
12020                   _.ImmAllZerosV)),
12021            (!cast<Instruction>(Name#_.ZSuffix#rmbikz) _.RC:$src1,
12022             _.KRCWM:$mask, _.RC:$src2, addr:$src3,
12023             (VPTERNLOG132_imm8 timm:$src4))>;
12024
12025  // Additional patterns for matching masked broadcasts with different
12026  // operand orders.
12027  def : Pat<(_.VT (vselect_mask _.KRCWM:$mask,
12028                   (OpNode _.RC:$src1, (_.BroadcastLdFrag addr:$src3),
12029                    _.RC:$src2, (i8 timm:$src4)),
12030                   _.RC:$src1)),
12031            (!cast<Instruction>(Name#_.ZSuffix#rmbik) _.RC:$src1, _.KRCWM:$mask,
12032             _.RC:$src2, addr:$src3, (VPTERNLOG132_imm8 timm:$src4))>;
12033  def : Pat<(_.VT (vselect_mask _.KRCWM:$mask,
12034                   (OpNode (_.BroadcastLdFrag addr:$src3),
12035                    _.RC:$src2, _.RC:$src1, (i8 timm:$src4)),
12036                   _.RC:$src1)),
12037            (!cast<Instruction>(Name#_.ZSuffix#rmbik) _.RC:$src1, _.KRCWM:$mask,
12038             _.RC:$src2, addr:$src3, (VPTERNLOG321_imm8 timm:$src4))>;
12039  def : Pat<(_.VT (vselect_mask _.KRCWM:$mask,
12040                   (OpNode _.RC:$src2, _.RC:$src1,
12041                    (_.BroadcastLdFrag addr:$src3),
12042                    (i8 timm:$src4)), _.RC:$src1)),
12043            (!cast<Instruction>(Name#_.ZSuffix#rmbik) _.RC:$src1, _.KRCWM:$mask,
12044             _.RC:$src2, addr:$src3, (VPTERNLOG213_imm8 timm:$src4))>;
12045  def : Pat<(_.VT (vselect_mask _.KRCWM:$mask,
12046                   (OpNode _.RC:$src2,
12047                    (_.BroadcastLdFrag addr:$src3),
12048                    _.RC:$src1, (i8 timm:$src4)),
12049                   _.RC:$src1)),
12050            (!cast<Instruction>(Name#_.ZSuffix#rmbik) _.RC:$src1, _.KRCWM:$mask,
12051             _.RC:$src2, addr:$src3, (VPTERNLOG231_imm8 timm:$src4))>;
12052  def : Pat<(_.VT (vselect_mask _.KRCWM:$mask,
12053                   (OpNode (_.BroadcastLdFrag addr:$src3),
12054                    _.RC:$src1, _.RC:$src2, (i8 timm:$src4)),
12055                   _.RC:$src1)),
12056            (!cast<Instruction>(Name#_.ZSuffix#rmbik) _.RC:$src1, _.KRCWM:$mask,
12057             _.RC:$src2, addr:$src3, (VPTERNLOG312_imm8 timm:$src4))>;
12058}
12059
12060multiclass avx512_common_ternlog<string OpcodeStr, X86SchedWriteWidths sched,
12061                                 AVX512VLVectorVTInfo _> {
12062  let Predicates = [HasAVX512] in
12063    defm Z    : avx512_ternlog<0x25, OpcodeStr, X86vpternlog, sched.ZMM,
12064                               _.info512, NAME>, EVEX_V512;
12065  let Predicates = [HasAVX512, HasVLX] in {
12066    defm Z128 : avx512_ternlog<0x25, OpcodeStr, X86vpternlog, sched.XMM,
12067                               _.info128, NAME>, EVEX_V128;
12068    defm Z256 : avx512_ternlog<0x25, OpcodeStr, X86vpternlog, sched.YMM,
12069                               _.info256, NAME>, EVEX_V256;
12070  }
12071}
12072
12073defm VPTERNLOGD : avx512_common_ternlog<"vpternlogd", SchedWriteVecALU,
12074                                        avx512vl_i32_info>;
12075defm VPTERNLOGQ : avx512_common_ternlog<"vpternlogq", SchedWriteVecALU,
12076                                        avx512vl_i64_info>, VEX_W;
12077
12078// Patterns to implement vnot using vpternlog instead of creating all ones
12079// using pcmpeq or vpternlog and then xoring with that. The value 15 is chosen
12080// so that the result is only dependent on src0. But we use the same source
12081// for all operands to prevent a false dependency.
12082// TODO: We should maybe have a more generalized algorithm for folding to
12083// vpternlog.
12084let Predicates = [HasAVX512] in {
12085  def : Pat<(v64i8 (vnot VR512:$src)),
12086            (VPTERNLOGQZrri VR512:$src, VR512:$src, VR512:$src, (i8 15))>;
12087  def : Pat<(v32i16 (vnot VR512:$src)),
12088            (VPTERNLOGQZrri VR512:$src, VR512:$src, VR512:$src, (i8 15))>;
12089  def : Pat<(v16i32 (vnot VR512:$src)),
12090            (VPTERNLOGQZrri VR512:$src, VR512:$src, VR512:$src, (i8 15))>;
12091  def : Pat<(v8i64 (vnot VR512:$src)),
12092            (VPTERNLOGQZrri VR512:$src, VR512:$src, VR512:$src, (i8 15))>;
12093}
12094
12095let Predicates = [HasAVX512, NoVLX] in {
12096  def : Pat<(v16i8 (vnot VR128X:$src)),
12097            (EXTRACT_SUBREG
12098             (VPTERNLOGQZrri
12099              (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
12100              (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
12101              (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
12102              (i8 15)), sub_xmm)>;
12103  def : Pat<(v8i16 (vnot VR128X:$src)),
12104            (EXTRACT_SUBREG
12105             (VPTERNLOGQZrri
12106              (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
12107              (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
12108              (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
12109              (i8 15)), sub_xmm)>;
12110  def : Pat<(v4i32 (vnot VR128X:$src)),
12111            (EXTRACT_SUBREG
12112             (VPTERNLOGQZrri
12113              (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
12114              (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
12115              (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
12116              (i8 15)), sub_xmm)>;
12117  def : Pat<(v2i64 (vnot VR128X:$src)),
12118            (EXTRACT_SUBREG
12119             (VPTERNLOGQZrri
12120              (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
12121              (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
12122              (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
12123              (i8 15)), sub_xmm)>;
12124
12125  def : Pat<(v32i8 (vnot VR256X:$src)),
12126            (EXTRACT_SUBREG
12127             (VPTERNLOGQZrri
12128              (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src, sub_ymm),
12129              (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src, sub_ymm),
12130              (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src, sub_ymm),
12131              (i8 15)), sub_ymm)>;
12132  def : Pat<(v16i16 (vnot VR256X:$src)),
12133            (EXTRACT_SUBREG
12134             (VPTERNLOGQZrri
12135              (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src, sub_ymm),
12136              (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src, sub_ymm),
12137              (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src, sub_ymm),
12138              (i8 15)), sub_ymm)>;
12139  def : Pat<(v8i32 (vnot VR256X:$src)),
12140            (EXTRACT_SUBREG
12141             (VPTERNLOGQZrri
12142              (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src, sub_ymm),
12143              (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src, sub_ymm),
12144              (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src, sub_ymm),
12145              (i8 15)), sub_ymm)>;
12146  def : Pat<(v4i64 (vnot VR256X:$src)),
12147            (EXTRACT_SUBREG
12148             (VPTERNLOGQZrri
12149              (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src, sub_ymm),
12150              (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src, sub_ymm),
12151              (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src, sub_ymm),
12152              (i8 15)), sub_ymm)>;
12153}
12154
12155let Predicates = [HasVLX] in {
12156  def : Pat<(v16i8 (vnot VR128X:$src)),
12157            (VPTERNLOGQZ128rri VR128X:$src, VR128X:$src, VR128X:$src, (i8 15))>;
12158  def : Pat<(v8i16 (vnot VR128X:$src)),
12159            (VPTERNLOGQZ128rri VR128X:$src, VR128X:$src, VR128X:$src, (i8 15))>;
12160  def : Pat<(v4i32 (vnot VR128X:$src)),
12161            (VPTERNLOGQZ128rri VR128X:$src, VR128X:$src, VR128X:$src, (i8 15))>;
12162  def : Pat<(v2i64 (vnot VR128X:$src)),
12163            (VPTERNLOGQZ128rri VR128X:$src, VR128X:$src, VR128X:$src, (i8 15))>;
12164
12165  def : Pat<(v32i8 (vnot VR256X:$src)),
12166            (VPTERNLOGQZ256rri VR256X:$src, VR256X:$src, VR256X:$src, (i8 15))>;
12167  def : Pat<(v16i16 (vnot VR256X:$src)),
12168            (VPTERNLOGQZ256rri VR256X:$src, VR256X:$src, VR256X:$src, (i8 15))>;
12169  def : Pat<(v8i32 (vnot VR256X:$src)),
12170            (VPTERNLOGQZ256rri VR256X:$src, VR256X:$src, VR256X:$src, (i8 15))>;
12171  def : Pat<(v4i64 (vnot VR256X:$src)),
12172            (VPTERNLOGQZ256rri VR256X:$src, VR256X:$src, VR256X:$src, (i8 15))>;
12173}
12174
12175//===----------------------------------------------------------------------===//
12176// AVX-512 - FixupImm
12177//===----------------------------------------------------------------------===//
12178
12179multiclass avx512_fixupimm_packed<bits<8> opc, string OpcodeStr,
12180                                  X86FoldableSchedWrite sched, X86VectorVTInfo _,
12181                                  X86VectorVTInfo TblVT>{
12182  let Constraints = "$src1 = $dst", ExeDomain = _.ExeDomain,
12183      Uses = [MXCSR], mayRaiseFPException = 1 in {
12184    defm rri : AVX512_maskable_3src<opc, MRMSrcReg, _, (outs _.RC:$dst),
12185                        (ins _.RC:$src2, _.RC:$src3, i32u8imm:$src4),
12186                         OpcodeStr#_.Suffix, "$src4, $src3, $src2", "$src2, $src3, $src4",
12187                        (X86VFixupimm (_.VT _.RC:$src1),
12188                                      (_.VT _.RC:$src2),
12189                                      (TblVT.VT _.RC:$src3),
12190                                      (i32 timm:$src4))>, Sched<[sched]>;
12191    defm rmi : AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst),
12192                      (ins _.RC:$src2, _.MemOp:$src3, i32u8imm:$src4),
12193                      OpcodeStr#_.Suffix, "$src4, $src3, $src2", "$src2, $src3, $src4",
12194                      (X86VFixupimm (_.VT _.RC:$src1),
12195                                    (_.VT _.RC:$src2),
12196                                    (TblVT.VT (bitconvert (TblVT.LdFrag addr:$src3))),
12197                                    (i32 timm:$src4))>,
12198                      Sched<[sched.Folded, sched.ReadAfterFold]>;
12199    defm rmbi : AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst),
12200                      (ins _.RC:$src2, _.ScalarMemOp:$src3, i32u8imm:$src4),
12201                    OpcodeStr#_.Suffix, "$src4, ${src3}"#_.BroadcastStr#", $src2",
12202                    "$src2, ${src3}"#_.BroadcastStr#", $src4",
12203                      (X86VFixupimm (_.VT _.RC:$src1),
12204                                    (_.VT _.RC:$src2),
12205                                    (TblVT.VT (TblVT.BroadcastLdFrag addr:$src3)),
12206                                    (i32 timm:$src4))>,
12207                    EVEX_B, Sched<[sched.Folded, sched.ReadAfterFold]>;
12208  } // Constraints = "$src1 = $dst"
12209}
12210
12211multiclass avx512_fixupimm_packed_sae<bits<8> opc, string OpcodeStr,
12212                                      X86FoldableSchedWrite sched,
12213                                      X86VectorVTInfo _, X86VectorVTInfo TblVT>
12214  : avx512_fixupimm_packed<opc, OpcodeStr, sched, _, TblVT> {
12215let Constraints = "$src1 = $dst", ExeDomain = _.ExeDomain, Uses = [MXCSR] in {
12216  defm rrib : AVX512_maskable_3src<opc, MRMSrcReg, _, (outs _.RC:$dst),
12217                      (ins _.RC:$src2, _.RC:$src3, i32u8imm:$src4),
12218                      OpcodeStr#_.Suffix, "$src4, {sae}, $src3, $src2",
12219                      "$src2, $src3, {sae}, $src4",
12220                      (X86VFixupimmSAE (_.VT _.RC:$src1),
12221                                       (_.VT _.RC:$src2),
12222                                       (TblVT.VT _.RC:$src3),
12223                                       (i32 timm:$src4))>,
12224                      EVEX_B, Sched<[sched]>;
12225  }
12226}
12227
12228multiclass avx512_fixupimm_scalar<bits<8> opc, string OpcodeStr,
12229                                  X86FoldableSchedWrite sched, X86VectorVTInfo _,
12230                                  X86VectorVTInfo _src3VT> {
12231  let Constraints = "$src1 = $dst" , Predicates = [HasAVX512],
12232      ExeDomain = _.ExeDomain in {
12233    defm rri : AVX512_maskable_3src_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
12234                      (ins _.RC:$src2, _.RC:$src3, i32u8imm:$src4),
12235                      OpcodeStr#_.Suffix, "$src4, $src3, $src2", "$src2, $src3, $src4",
12236                      (X86VFixupimms (_.VT _.RC:$src1),
12237                                     (_.VT _.RC:$src2),
12238                                     (_src3VT.VT _src3VT.RC:$src3),
12239                                     (i32 timm:$src4))>, Sched<[sched]>, SIMD_EXC;
12240    let Uses = [MXCSR] in
12241    defm rrib : AVX512_maskable_3src_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
12242                      (ins _.RC:$src2, _.RC:$src3, i32u8imm:$src4),
12243                      OpcodeStr#_.Suffix, "$src4, {sae}, $src3, $src2",
12244                      "$src2, $src3, {sae}, $src4",
12245                      (X86VFixupimmSAEs (_.VT _.RC:$src1),
12246                                        (_.VT _.RC:$src2),
12247                                        (_src3VT.VT _src3VT.RC:$src3),
12248                                        (i32 timm:$src4))>,
12249                      EVEX_B, Sched<[sched.Folded, sched.ReadAfterFold]>;
12250    defm rmi : AVX512_maskable_3src_scalar<opc, MRMSrcMem, _, (outs _.RC:$dst),
12251                     (ins _.RC:$src2, _.ScalarMemOp:$src3, i32u8imm:$src4),
12252                     OpcodeStr#_.Suffix, "$src4, $src3, $src2", "$src2, $src3, $src4",
12253                     (X86VFixupimms (_.VT _.RC:$src1),
12254                                    (_.VT _.RC:$src2),
12255                                    (_src3VT.VT (scalar_to_vector
12256                                              (_src3VT.ScalarLdFrag addr:$src3))),
12257                                    (i32 timm:$src4))>,
12258                     Sched<[sched.Folded, sched.ReadAfterFold]>, SIMD_EXC;
12259  }
12260}
12261
12262multiclass avx512_fixupimm_packed_all<X86SchedWriteWidths sched,
12263                                      AVX512VLVectorVTInfo _Vec,
12264                                      AVX512VLVectorVTInfo _Tbl> {
12265  let Predicates = [HasAVX512] in
12266    defm Z    : avx512_fixupimm_packed_sae<0x54, "vfixupimm", sched.ZMM,
12267                                _Vec.info512, _Tbl.info512>, AVX512AIi8Base,
12268                                EVEX_4V, EVEX_V512;
12269  let Predicates = [HasAVX512, HasVLX] in {
12270    defm Z128 : avx512_fixupimm_packed<0x54, "vfixupimm", sched.XMM,
12271                            _Vec.info128, _Tbl.info128>, AVX512AIi8Base,
12272                            EVEX_4V, EVEX_V128;
12273    defm Z256 : avx512_fixupimm_packed<0x54, "vfixupimm", sched.YMM,
12274                            _Vec.info256, _Tbl.info256>, AVX512AIi8Base,
12275                            EVEX_4V, EVEX_V256;
12276  }
12277}
12278
12279defm VFIXUPIMMSSZ : avx512_fixupimm_scalar<0x55, "vfixupimm",
12280                                           SchedWriteFAdd.Scl, f32x_info, v4i32x_info>,
12281                          AVX512AIi8Base, VEX_LIG, EVEX_4V, EVEX_CD8<32, CD8VT1>;
12282defm VFIXUPIMMSDZ : avx512_fixupimm_scalar<0x55, "vfixupimm",
12283                                           SchedWriteFAdd.Scl, f64x_info, v2i64x_info>,
12284                          AVX512AIi8Base, VEX_LIG, EVEX_4V, EVEX_CD8<64, CD8VT1>, VEX_W;
12285defm VFIXUPIMMPS : avx512_fixupimm_packed_all<SchedWriteFAdd, avx512vl_f32_info,
12286                         avx512vl_i32_info>, EVEX_CD8<32, CD8VF>;
12287defm VFIXUPIMMPD : avx512_fixupimm_packed_all<SchedWriteFAdd, avx512vl_f64_info,
12288                         avx512vl_i64_info>, EVEX_CD8<64, CD8VF>, VEX_W;
12289
12290// Patterns used to select SSE scalar fp arithmetic instructions from
12291// either:
12292//
12293// (1) a scalar fp operation followed by a blend
12294//
12295// The effect is that the backend no longer emits unnecessary vector
12296// insert instructions immediately after SSE scalar fp instructions
12297// like addss or mulss.
12298//
12299// For example, given the following code:
12300//   __m128 foo(__m128 A, __m128 B) {
12301//     A[0] += B[0];
12302//     return A;
12303//   }
12304//
12305// Previously we generated:
12306//   addss %xmm0, %xmm1
12307//   movss %xmm1, %xmm0
12308//
12309// We now generate:
12310//   addss %xmm1, %xmm0
12311//
12312// (2) a vector packed single/double fp operation followed by a vector insert
12313//
12314// The effect is that the backend converts the packed fp instruction
12315// followed by a vector insert into a single SSE scalar fp instruction.
12316//
12317// For example, given the following code:
12318//   __m128 foo(__m128 A, __m128 B) {
12319//     __m128 C = A + B;
12320//     return (__m128) {c[0], a[1], a[2], a[3]};
12321//   }
12322//
12323// Previously we generated:
12324//   addps %xmm0, %xmm1
12325//   movss %xmm1, %xmm0
12326//
12327// We now generate:
12328//   addss %xmm1, %xmm0
12329
12330// TODO: Some canonicalization in lowering would simplify the number of
12331// patterns we have to try to match.
12332multiclass AVX512_scalar_math_fp_patterns<SDPatternOperator Op, SDNode MaskedOp,
12333                                          string OpcPrefix, SDNode MoveNode,
12334                                          X86VectorVTInfo _, PatLeaf ZeroFP> {
12335  let Predicates = [HasAVX512] in {
12336    // extracted scalar math op with insert via movss
12337    def : Pat<(MoveNode
12338               (_.VT VR128X:$dst),
12339               (_.VT (scalar_to_vector
12340                      (Op (_.EltVT (extractelt (_.VT VR128X:$dst), (iPTR 0))),
12341                          _.FRC:$src)))),
12342              (!cast<Instruction>("V"#OpcPrefix#"Zrr_Int") _.VT:$dst,
12343               (_.VT (COPY_TO_REGCLASS _.FRC:$src, VR128X)))>;
12344    def : Pat<(MoveNode
12345               (_.VT VR128X:$dst),
12346               (_.VT (scalar_to_vector
12347                      (Op (_.EltVT (extractelt (_.VT VR128X:$dst), (iPTR 0))),
12348                          (_.ScalarLdFrag addr:$src))))),
12349              (!cast<Instruction>("V"#OpcPrefix#"Zrm_Int") _.VT:$dst, addr:$src)>;
12350
12351    // extracted masked scalar math op with insert via movss
12352    def : Pat<(MoveNode (_.VT VR128X:$src1),
12353               (scalar_to_vector
12354                (X86selects_mask VK1WM:$mask,
12355                            (MaskedOp (_.EltVT
12356                                       (extractelt (_.VT VR128X:$src1), (iPTR 0))),
12357                                      _.FRC:$src2),
12358                            _.FRC:$src0))),
12359              (!cast<Instruction>("V"#OpcPrefix#"Zrr_Intk")
12360               (_.VT (COPY_TO_REGCLASS _.FRC:$src0, VR128X)),
12361               VK1WM:$mask, _.VT:$src1,
12362               (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)))>;
12363    def : Pat<(MoveNode (_.VT VR128X:$src1),
12364               (scalar_to_vector
12365                (X86selects_mask VK1WM:$mask,
12366                            (MaskedOp (_.EltVT
12367                                       (extractelt (_.VT VR128X:$src1), (iPTR 0))),
12368                                      (_.ScalarLdFrag addr:$src2)),
12369                            _.FRC:$src0))),
12370              (!cast<Instruction>("V"#OpcPrefix#"Zrm_Intk")
12371               (_.VT (COPY_TO_REGCLASS _.FRC:$src0, VR128X)),
12372               VK1WM:$mask, _.VT:$src1, addr:$src2)>;
12373
12374    // extracted masked scalar math op with insert via movss
12375    def : Pat<(MoveNode (_.VT VR128X:$src1),
12376               (scalar_to_vector
12377                (X86selects_mask VK1WM:$mask,
12378                            (MaskedOp (_.EltVT
12379                                       (extractelt (_.VT VR128X:$src1), (iPTR 0))),
12380                                      _.FRC:$src2), (_.EltVT ZeroFP)))),
12381      (!cast<I>("V"#OpcPrefix#"Zrr_Intkz")
12382          VK1WM:$mask, _.VT:$src1,
12383          (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)))>;
12384    def : Pat<(MoveNode (_.VT VR128X:$src1),
12385               (scalar_to_vector
12386                (X86selects_mask VK1WM:$mask,
12387                            (MaskedOp (_.EltVT
12388                                       (extractelt (_.VT VR128X:$src1), (iPTR 0))),
12389                                      (_.ScalarLdFrag addr:$src2)), (_.EltVT ZeroFP)))),
12390      (!cast<I>("V"#OpcPrefix#"Zrm_Intkz") VK1WM:$mask, _.VT:$src1, addr:$src2)>;
12391  }
12392}
12393
12394defm : AVX512_scalar_math_fp_patterns<any_fadd, fadd, "ADDSS", X86Movss, v4f32x_info, fp32imm0>;
12395defm : AVX512_scalar_math_fp_patterns<any_fsub, fsub, "SUBSS", X86Movss, v4f32x_info, fp32imm0>;
12396defm : AVX512_scalar_math_fp_patterns<any_fmul, fmul, "MULSS", X86Movss, v4f32x_info, fp32imm0>;
12397defm : AVX512_scalar_math_fp_patterns<any_fdiv, fdiv, "DIVSS", X86Movss, v4f32x_info, fp32imm0>;
12398
12399defm : AVX512_scalar_math_fp_patterns<any_fadd, fadd, "ADDSD", X86Movsd, v2f64x_info, fp64imm0>;
12400defm : AVX512_scalar_math_fp_patterns<any_fsub, fsub, "SUBSD", X86Movsd, v2f64x_info, fp64imm0>;
12401defm : AVX512_scalar_math_fp_patterns<any_fmul, fmul, "MULSD", X86Movsd, v2f64x_info, fp64imm0>;
12402defm : AVX512_scalar_math_fp_patterns<any_fdiv, fdiv, "DIVSD", X86Movsd, v2f64x_info, fp64imm0>;
12403
12404defm : AVX512_scalar_math_fp_patterns<any_fadd, fadd, "ADDSH", X86Movsh, v8f16x_info, fp16imm0>;
12405defm : AVX512_scalar_math_fp_patterns<any_fsub, fsub, "SUBSH", X86Movsh, v8f16x_info, fp16imm0>;
12406defm : AVX512_scalar_math_fp_patterns<any_fmul, fmul, "MULSH", X86Movsh, v8f16x_info, fp16imm0>;
12407defm : AVX512_scalar_math_fp_patterns<any_fdiv, fdiv, "DIVSH", X86Movsh, v8f16x_info, fp16imm0>;
12408
12409multiclass AVX512_scalar_unary_math_patterns<SDPatternOperator OpNode, string OpcPrefix,
12410                                             SDNode Move, X86VectorVTInfo _> {
12411  let Predicates = [HasAVX512] in {
12412    def : Pat<(_.VT (Move _.VT:$dst,
12413                     (scalar_to_vector (OpNode (extractelt _.VT:$src, 0))))),
12414              (!cast<Instruction>("V"#OpcPrefix#"Zr_Int") _.VT:$dst, _.VT:$src)>;
12415  }
12416}
12417
12418defm : AVX512_scalar_unary_math_patterns<any_fsqrt, "SQRTSS", X86Movss, v4f32x_info>;
12419defm : AVX512_scalar_unary_math_patterns<any_fsqrt, "SQRTSD", X86Movsd, v2f64x_info>;
12420defm : AVX512_scalar_unary_math_patterns<any_fsqrt, "SQRTSH", X86Movsh, v8f16x_info>;
12421
12422//===----------------------------------------------------------------------===//
12423// AES instructions
12424//===----------------------------------------------------------------------===//
12425
12426multiclass avx512_vaes<bits<8> Op, string OpStr, string IntPrefix> {
12427  let Predicates = [HasVLX, HasVAES] in {
12428    defm Z128 : AESI_binop_rm_int<Op, OpStr,
12429                                  !cast<Intrinsic>(IntPrefix),
12430                                  loadv2i64, 0, VR128X, i128mem>,
12431                  EVEX_4V, EVEX_CD8<64, CD8VF>, EVEX_V128, VEX_WIG;
12432    defm Z256 : AESI_binop_rm_int<Op, OpStr,
12433                                  !cast<Intrinsic>(IntPrefix#"_256"),
12434                                  loadv4i64, 0, VR256X, i256mem>,
12435                  EVEX_4V, EVEX_CD8<64, CD8VF>, EVEX_V256, VEX_WIG;
12436    }
12437    let Predicates = [HasAVX512, HasVAES] in
12438    defm Z    : AESI_binop_rm_int<Op, OpStr,
12439                                  !cast<Intrinsic>(IntPrefix#"_512"),
12440                                  loadv8i64, 0, VR512, i512mem>,
12441                  EVEX_4V, EVEX_CD8<64, CD8VF>, EVEX_V512, VEX_WIG;
12442}
12443
12444defm VAESENC      : avx512_vaes<0xDC, "vaesenc", "int_x86_aesni_aesenc">;
12445defm VAESENCLAST  : avx512_vaes<0xDD, "vaesenclast", "int_x86_aesni_aesenclast">;
12446defm VAESDEC      : avx512_vaes<0xDE, "vaesdec", "int_x86_aesni_aesdec">;
12447defm VAESDECLAST  : avx512_vaes<0xDF, "vaesdeclast", "int_x86_aesni_aesdeclast">;
12448
12449//===----------------------------------------------------------------------===//
12450// PCLMUL instructions - Carry less multiplication
12451//===----------------------------------------------------------------------===//
12452
12453let Predicates = [HasAVX512, HasVPCLMULQDQ] in
12454defm VPCLMULQDQZ : vpclmulqdq<VR512, i512mem, loadv8i64, int_x86_pclmulqdq_512>,
12455                              EVEX_4V, EVEX_V512, EVEX_CD8<64, CD8VF>, VEX_WIG;
12456
12457let Predicates = [HasVLX, HasVPCLMULQDQ] in {
12458defm VPCLMULQDQZ128 : vpclmulqdq<VR128X, i128mem, loadv2i64, int_x86_pclmulqdq>,
12459                              EVEX_4V, EVEX_V128, EVEX_CD8<64, CD8VF>, VEX_WIG;
12460
12461defm VPCLMULQDQZ256: vpclmulqdq<VR256X, i256mem, loadv4i64,
12462                                int_x86_pclmulqdq_256>, EVEX_4V, EVEX_V256,
12463                                EVEX_CD8<64, CD8VF>, VEX_WIG;
12464}
12465
12466// Aliases
12467defm : vpclmulqdq_aliases<"VPCLMULQDQZ", VR512, i512mem>;
12468defm : vpclmulqdq_aliases<"VPCLMULQDQZ128", VR128X, i128mem>;
12469defm : vpclmulqdq_aliases<"VPCLMULQDQZ256", VR256X, i256mem>;
12470
12471//===----------------------------------------------------------------------===//
12472// VBMI2
12473//===----------------------------------------------------------------------===//
12474
12475multiclass VBMI2_shift_var_rm<bits<8> Op, string OpStr, SDNode OpNode,
12476                              X86FoldableSchedWrite sched, X86VectorVTInfo VTI> {
12477  let Constraints = "$src1 = $dst",
12478      ExeDomain   = VTI.ExeDomain in {
12479    defm r:   AVX512_maskable_3src<Op, MRMSrcReg, VTI, (outs VTI.RC:$dst),
12480                (ins VTI.RC:$src2, VTI.RC:$src3), OpStr,
12481                "$src3, $src2", "$src2, $src3",
12482                (VTI.VT (OpNode VTI.RC:$src1, VTI.RC:$src2, VTI.RC:$src3))>,
12483                T8PD, EVEX_4V, Sched<[sched]>;
12484    defm m:   AVX512_maskable_3src<Op, MRMSrcMem, VTI, (outs VTI.RC:$dst),
12485                (ins VTI.RC:$src2, VTI.MemOp:$src3), OpStr,
12486                "$src3, $src2", "$src2, $src3",
12487                (VTI.VT (OpNode VTI.RC:$src1, VTI.RC:$src2,
12488                        (VTI.VT (VTI.LdFrag addr:$src3))))>,
12489                T8PD, EVEX_4V,
12490                Sched<[sched.Folded, sched.ReadAfterFold]>;
12491  }
12492}
12493
12494multiclass VBMI2_shift_var_rmb<bits<8> Op, string OpStr, SDNode OpNode,
12495                               X86FoldableSchedWrite sched, X86VectorVTInfo VTI>
12496         : VBMI2_shift_var_rm<Op, OpStr, OpNode, sched, VTI> {
12497  let Constraints = "$src1 = $dst",
12498      ExeDomain   = VTI.ExeDomain in
12499  defm mb:  AVX512_maskable_3src<Op, MRMSrcMem, VTI, (outs VTI.RC:$dst),
12500              (ins VTI.RC:$src2, VTI.ScalarMemOp:$src3), OpStr,
12501              "${src3}"#VTI.BroadcastStr#", $src2",
12502              "$src2, ${src3}"#VTI.BroadcastStr,
12503              (OpNode VTI.RC:$src1, VTI.RC:$src2,
12504               (VTI.VT (VTI.BroadcastLdFrag addr:$src3)))>,
12505              T8PD, EVEX_4V, EVEX_B,
12506              Sched<[sched.Folded, sched.ReadAfterFold]>;
12507}
12508
12509multiclass VBMI2_shift_var_rm_common<bits<8> Op, string OpStr, SDNode OpNode,
12510                                     X86SchedWriteWidths sched, AVX512VLVectorVTInfo VTI> {
12511  let Predicates = [HasVBMI2] in
12512  defm Z      : VBMI2_shift_var_rm<Op, OpStr, OpNode, sched.ZMM, VTI.info512>,
12513                                   EVEX_V512;
12514  let Predicates = [HasVBMI2, HasVLX] in {
12515    defm Z256 : VBMI2_shift_var_rm<Op, OpStr, OpNode, sched.YMM, VTI.info256>,
12516                                   EVEX_V256;
12517    defm Z128 : VBMI2_shift_var_rm<Op, OpStr, OpNode, sched.XMM, VTI.info128>,
12518                                   EVEX_V128;
12519  }
12520}
12521
12522multiclass VBMI2_shift_var_rmb_common<bits<8> Op, string OpStr, SDNode OpNode,
12523                                      X86SchedWriteWidths sched, AVX512VLVectorVTInfo VTI> {
12524  let Predicates = [HasVBMI2] in
12525  defm Z      : VBMI2_shift_var_rmb<Op, OpStr, OpNode, sched.ZMM, VTI.info512>,
12526                                    EVEX_V512;
12527  let Predicates = [HasVBMI2, HasVLX] in {
12528    defm Z256 : VBMI2_shift_var_rmb<Op, OpStr, OpNode, sched.YMM, VTI.info256>,
12529                                    EVEX_V256;
12530    defm Z128 : VBMI2_shift_var_rmb<Op, OpStr, OpNode, sched.XMM, VTI.info128>,
12531                                    EVEX_V128;
12532  }
12533}
12534multiclass VBMI2_shift_var<bits<8> wOp, bits<8> dqOp, string Prefix,
12535                           SDNode OpNode, X86SchedWriteWidths sched> {
12536  defm W : VBMI2_shift_var_rm_common<wOp, Prefix#"w", OpNode, sched,
12537             avx512vl_i16_info>, VEX_W, EVEX_CD8<16, CD8VF>;
12538  defm D : VBMI2_shift_var_rmb_common<dqOp, Prefix#"d", OpNode, sched,
12539             avx512vl_i32_info>, EVEX_CD8<32, CD8VF>;
12540  defm Q : VBMI2_shift_var_rmb_common<dqOp, Prefix#"q", OpNode, sched,
12541             avx512vl_i64_info>, VEX_W, EVEX_CD8<64, CD8VF>;
12542}
12543
12544multiclass VBMI2_shift_imm<bits<8> wOp, bits<8> dqOp, string Prefix,
12545                           SDNode OpNode, X86SchedWriteWidths sched> {
12546  defm W : avx512_common_3Op_rm_imm8<wOp, OpNode, Prefix#"w", sched,
12547             avx512vl_i16_info, avx512vl_i16_info, HasVBMI2>,
12548             VEX_W, EVEX_CD8<16, CD8VF>;
12549  defm D : avx512_common_3Op_imm8<Prefix#"d", avx512vl_i32_info, dqOp,
12550             OpNode, sched, HasVBMI2>, AVX512AIi8Base, EVEX_4V, EVEX_CD8<32, CD8VF>;
12551  defm Q : avx512_common_3Op_imm8<Prefix#"q", avx512vl_i64_info, dqOp, OpNode,
12552             sched, HasVBMI2>, AVX512AIi8Base, EVEX_4V, EVEX_CD8<64, CD8VF>, VEX_W;
12553}
12554
12555// Concat & Shift
12556defm VPSHLDV : VBMI2_shift_var<0x70, 0x71, "vpshldv", X86VShldv, SchedWriteVecIMul>;
12557defm VPSHRDV : VBMI2_shift_var<0x72, 0x73, "vpshrdv", X86VShrdv, SchedWriteVecIMul>;
12558defm VPSHLD  : VBMI2_shift_imm<0x70, 0x71, "vpshld", X86VShld, SchedWriteVecIMul>;
12559defm VPSHRD  : VBMI2_shift_imm<0x72, 0x73, "vpshrd", X86VShrd, SchedWriteVecIMul>;
12560
12561// Compress
12562defm VPCOMPRESSB : compress_by_elt_width<0x63, "vpcompressb", WriteVarShuffle256,
12563                                         avx512vl_i8_info, HasVBMI2>, EVEX,
12564                                         NotMemoryFoldable;
12565defm VPCOMPRESSW : compress_by_elt_width <0x63, "vpcompressw", WriteVarShuffle256,
12566                                          avx512vl_i16_info, HasVBMI2>, EVEX, VEX_W,
12567                                          NotMemoryFoldable;
12568// Expand
12569defm VPEXPANDB : expand_by_elt_width <0x62, "vpexpandb", WriteVarShuffle256,
12570                                      avx512vl_i8_info, HasVBMI2>, EVEX;
12571defm VPEXPANDW : expand_by_elt_width <0x62, "vpexpandw", WriteVarShuffle256,
12572                                      avx512vl_i16_info, HasVBMI2>, EVEX, VEX_W;
12573
12574//===----------------------------------------------------------------------===//
12575// VNNI
12576//===----------------------------------------------------------------------===//
12577
12578let Constraints = "$src1 = $dst" in
12579multiclass VNNI_rmb<bits<8> Op, string OpStr, SDNode OpNode,
12580                    X86FoldableSchedWrite sched, X86VectorVTInfo VTI,
12581                    bit IsCommutable> {
12582  let ExeDomain = VTI.ExeDomain in {
12583  defm r  :   AVX512_maskable_3src<Op, MRMSrcReg, VTI, (outs VTI.RC:$dst),
12584                                   (ins VTI.RC:$src2, VTI.RC:$src3), OpStr,
12585                                   "$src3, $src2", "$src2, $src3",
12586                                   (VTI.VT (OpNode VTI.RC:$src1,
12587                                            VTI.RC:$src2, VTI.RC:$src3)),
12588                                   IsCommutable, IsCommutable>,
12589                                   EVEX_4V, T8PD, Sched<[sched]>;
12590  defm m  :   AVX512_maskable_3src<Op, MRMSrcMem, VTI, (outs VTI.RC:$dst),
12591                                   (ins VTI.RC:$src2, VTI.MemOp:$src3), OpStr,
12592                                   "$src3, $src2", "$src2, $src3",
12593                                   (VTI.VT (OpNode VTI.RC:$src1, VTI.RC:$src2,
12594                                            (VTI.VT (VTI.LdFrag addr:$src3))))>,
12595                                   EVEX_4V, EVEX_CD8<32, CD8VF>, T8PD,
12596                                   Sched<[sched.Folded, sched.ReadAfterFold]>;
12597  defm mb :   AVX512_maskable_3src<Op, MRMSrcMem, VTI, (outs VTI.RC:$dst),
12598                                   (ins VTI.RC:$src2, VTI.ScalarMemOp:$src3),
12599                                   OpStr, "${src3}"#VTI.BroadcastStr#", $src2",
12600                                   "$src2, ${src3}"#VTI.BroadcastStr,
12601                                   (OpNode VTI.RC:$src1, VTI.RC:$src2,
12602                                    (VTI.VT (VTI.BroadcastLdFrag addr:$src3)))>,
12603                                   EVEX_4V, EVEX_CD8<32, CD8VF>, EVEX_B,
12604                                   T8PD, Sched<[sched.Folded, sched.ReadAfterFold]>;
12605  }
12606}
12607
12608multiclass VNNI_common<bits<8> Op, string OpStr, SDNode OpNode,
12609                       X86SchedWriteWidths sched, bit IsCommutable> {
12610  let Predicates = [HasVNNI] in
12611  defm Z      :   VNNI_rmb<Op, OpStr, OpNode, sched.ZMM, v16i32_info,
12612                           IsCommutable>, EVEX_V512;
12613  let Predicates = [HasVNNI, HasVLX] in {
12614    defm Z256 :   VNNI_rmb<Op, OpStr, OpNode, sched.YMM, v8i32x_info,
12615                           IsCommutable>, EVEX_V256;
12616    defm Z128 :   VNNI_rmb<Op, OpStr, OpNode, sched.XMM, v4i32x_info,
12617                           IsCommutable>, EVEX_V128;
12618  }
12619}
12620
12621// FIXME: Is there a better scheduler class for VPDP?
12622defm VPDPBUSD   : VNNI_common<0x50, "vpdpbusd", X86Vpdpbusd, SchedWriteVecIMul, 0>;
12623defm VPDPBUSDS  : VNNI_common<0x51, "vpdpbusds", X86Vpdpbusds, SchedWriteVecIMul, 0>;
12624defm VPDPWSSD   : VNNI_common<0x52, "vpdpwssd", X86Vpdpwssd, SchedWriteVecIMul, 1>;
12625defm VPDPWSSDS  : VNNI_common<0x53, "vpdpwssds", X86Vpdpwssds, SchedWriteVecIMul, 1>;
12626
12627// Patterns to match VPDPWSSD from existing instructions/intrinsics.
12628let Predicates = [HasVNNI] in {
12629  def : Pat<(v16i32 (add VR512:$src1,
12630                         (X86vpmaddwd_su VR512:$src2, VR512:$src3))),
12631            (VPDPWSSDZr VR512:$src1, VR512:$src2, VR512:$src3)>;
12632  def : Pat<(v16i32 (add VR512:$src1,
12633                         (X86vpmaddwd_su VR512:$src2, (load addr:$src3)))),
12634            (VPDPWSSDZm VR512:$src1, VR512:$src2, addr:$src3)>;
12635}
12636let Predicates = [HasVNNI,HasVLX] in {
12637  def : Pat<(v8i32 (add VR256X:$src1,
12638                        (X86vpmaddwd_su VR256X:$src2, VR256X:$src3))),
12639            (VPDPWSSDZ256r VR256X:$src1, VR256X:$src2, VR256X:$src3)>;
12640  def : Pat<(v8i32 (add VR256X:$src1,
12641                        (X86vpmaddwd_su VR256X:$src2, (load addr:$src3)))),
12642            (VPDPWSSDZ256m VR256X:$src1, VR256X:$src2, addr:$src3)>;
12643  def : Pat<(v4i32 (add VR128X:$src1,
12644                        (X86vpmaddwd_su VR128X:$src2, VR128X:$src3))),
12645            (VPDPWSSDZ128r VR128X:$src1, VR128X:$src2, VR128X:$src3)>;
12646  def : Pat<(v4i32 (add VR128X:$src1,
12647                        (X86vpmaddwd_su VR128X:$src2, (load addr:$src3)))),
12648            (VPDPWSSDZ128m VR128X:$src1, VR128X:$src2, addr:$src3)>;
12649}
12650
12651//===----------------------------------------------------------------------===//
12652// Bit Algorithms
12653//===----------------------------------------------------------------------===//
12654
12655// FIXME: Is there a better scheduler class for VPOPCNTB/VPOPCNTW?
12656defm VPOPCNTB : avx512_unary_rm_vl<0x54, "vpopcntb", ctpop, SchedWriteVecALU,
12657                                   avx512vl_i8_info, HasBITALG>;
12658defm VPOPCNTW : avx512_unary_rm_vl<0x54, "vpopcntw", ctpop, SchedWriteVecALU,
12659                                   avx512vl_i16_info, HasBITALG>, VEX_W;
12660
12661defm : avx512_unary_lowering<"VPOPCNTB", ctpop, avx512vl_i8_info, HasBITALG>;
12662defm : avx512_unary_lowering<"VPOPCNTW", ctpop, avx512vl_i16_info, HasBITALG>;
12663
12664def X86Vpshufbitqmb_su : PatFrag<(ops node:$src1, node:$src2),
12665                                 (X86Vpshufbitqmb node:$src1, node:$src2), [{
12666  return N->hasOneUse();
12667}]>;
12668
12669multiclass VPSHUFBITQMB_rm<X86FoldableSchedWrite sched, X86VectorVTInfo VTI> {
12670  defm rr : AVX512_maskable_cmp<0x8F, MRMSrcReg, VTI, (outs VTI.KRC:$dst),
12671                                (ins VTI.RC:$src1, VTI.RC:$src2),
12672                                "vpshufbitqmb",
12673                                "$src2, $src1", "$src1, $src2",
12674                                (X86Vpshufbitqmb (VTI.VT VTI.RC:$src1),
12675                                (VTI.VT VTI.RC:$src2)),
12676                                (X86Vpshufbitqmb_su (VTI.VT VTI.RC:$src1),
12677                                (VTI.VT VTI.RC:$src2))>, EVEX_4V, T8PD,
12678                                Sched<[sched]>;
12679  defm rm : AVX512_maskable_cmp<0x8F, MRMSrcMem, VTI, (outs VTI.KRC:$dst),
12680                                (ins VTI.RC:$src1, VTI.MemOp:$src2),
12681                                "vpshufbitqmb",
12682                                "$src2, $src1", "$src1, $src2",
12683                                (X86Vpshufbitqmb (VTI.VT VTI.RC:$src1),
12684                                (VTI.VT (VTI.LdFrag addr:$src2))),
12685                                (X86Vpshufbitqmb_su (VTI.VT VTI.RC:$src1),
12686                                (VTI.VT (VTI.LdFrag addr:$src2)))>,
12687                                EVEX_4V, EVEX_CD8<8, CD8VF>, T8PD,
12688                                Sched<[sched.Folded, sched.ReadAfterFold]>;
12689}
12690
12691multiclass VPSHUFBITQMB_common<X86SchedWriteWidths sched, AVX512VLVectorVTInfo VTI> {
12692  let Predicates = [HasBITALG] in
12693  defm Z      : VPSHUFBITQMB_rm<sched.ZMM, VTI.info512>, EVEX_V512;
12694  let Predicates = [HasBITALG, HasVLX] in {
12695    defm Z256 : VPSHUFBITQMB_rm<sched.YMM, VTI.info256>, EVEX_V256;
12696    defm Z128 : VPSHUFBITQMB_rm<sched.XMM, VTI.info128>, EVEX_V128;
12697  }
12698}
12699
12700// FIXME: Is there a better scheduler class for VPSHUFBITQMB?
12701defm VPSHUFBITQMB : VPSHUFBITQMB_common<SchedWriteVecIMul, avx512vl_i8_info>;
12702
12703//===----------------------------------------------------------------------===//
12704// GFNI
12705//===----------------------------------------------------------------------===//
12706
12707multiclass GF2P8MULB_avx512_common<bits<8> Op, string OpStr, SDNode OpNode,
12708                                   X86SchedWriteWidths sched> {
12709  let Predicates = [HasGFNI, HasAVX512] in
12710  defm Z      : avx512_binop_rm<Op, OpStr, OpNode, v64i8_info, sched.ZMM, 1>,
12711                                EVEX_V512;
12712  let Predicates = [HasGFNI, HasVLX] in {
12713    defm Z256 : avx512_binop_rm<Op, OpStr, OpNode, v32i8x_info, sched.YMM, 1>,
12714                                EVEX_V256;
12715    defm Z128 : avx512_binop_rm<Op, OpStr, OpNode, v16i8x_info, sched.XMM, 1>,
12716                                EVEX_V128;
12717  }
12718}
12719
12720defm VGF2P8MULB : GF2P8MULB_avx512_common<0xCF, "vgf2p8mulb", X86GF2P8mulb,
12721                                          SchedWriteVecALU>,
12722                                          EVEX_CD8<8, CD8VF>, T8PD;
12723
12724multiclass GF2P8AFFINE_avx512_rmb_imm<bits<8> Op, string OpStr, SDNode OpNode,
12725                                      X86FoldableSchedWrite sched, X86VectorVTInfo VTI,
12726                                      X86VectorVTInfo BcstVTI>
12727           : avx512_3Op_rm_imm8<Op, OpStr, OpNode, sched, VTI, VTI> {
12728  let ExeDomain = VTI.ExeDomain in
12729  defm rmbi : AVX512_maskable<Op, MRMSrcMem, VTI, (outs VTI.RC:$dst),
12730                (ins VTI.RC:$src1, VTI.ScalarMemOp:$src2, u8imm:$src3),
12731                OpStr, "$src3, ${src2}"#BcstVTI.BroadcastStr#", $src1",
12732                "$src1, ${src2}"#BcstVTI.BroadcastStr#", $src3",
12733                (OpNode (VTI.VT VTI.RC:$src1),
12734                 (bitconvert (BcstVTI.VT (X86VBroadcastld64 addr:$src2))),
12735                 (i8 timm:$src3))>, EVEX_B,
12736                 Sched<[sched.Folded, sched.ReadAfterFold]>;
12737}
12738
12739multiclass GF2P8AFFINE_avx512_common<bits<8> Op, string OpStr, SDNode OpNode,
12740                                     X86SchedWriteWidths sched> {
12741  let Predicates = [HasGFNI, HasAVX512] in
12742  defm Z      : GF2P8AFFINE_avx512_rmb_imm<Op, OpStr, OpNode, sched.ZMM,
12743                                           v64i8_info, v8i64_info>, EVEX_V512;
12744  let Predicates = [HasGFNI, HasVLX] in {
12745    defm Z256 : GF2P8AFFINE_avx512_rmb_imm<Op, OpStr, OpNode, sched.YMM,
12746                                           v32i8x_info, v4i64x_info>, EVEX_V256;
12747    defm Z128 : GF2P8AFFINE_avx512_rmb_imm<Op, OpStr, OpNode, sched.XMM,
12748                                           v16i8x_info, v2i64x_info>, EVEX_V128;
12749  }
12750}
12751
12752defm VGF2P8AFFINEINVQB : GF2P8AFFINE_avx512_common<0xCF, "vgf2p8affineinvqb",
12753                         X86GF2P8affineinvqb, SchedWriteVecIMul>,
12754                         EVEX_4V, EVEX_CD8<8, CD8VF>, VEX_W, AVX512AIi8Base;
12755defm VGF2P8AFFINEQB    : GF2P8AFFINE_avx512_common<0xCE, "vgf2p8affineqb",
12756                         X86GF2P8affineqb, SchedWriteVecIMul>,
12757                         EVEX_4V, EVEX_CD8<8, CD8VF>, VEX_W, AVX512AIi8Base;
12758
12759
12760//===----------------------------------------------------------------------===//
12761// AVX5124FMAPS
12762//===----------------------------------------------------------------------===//
12763
12764let hasSideEffects = 0, mayLoad = 1, ExeDomain = SSEPackedSingle,
12765    Constraints = "$src1 = $dst", Uses = [MXCSR], mayRaiseFPException = 1 in {
12766defm V4FMADDPSrm : AVX512_maskable_3src_in_asm<0x9A, MRMSrcMem, v16f32_info,
12767                    (outs VR512:$dst), (ins VR512:$src2, f128mem:$src3),
12768                    "v4fmaddps", "$src3, $src2", "$src2, $src3",
12769                    []>, EVEX_V512, EVEX_4V, T8XD, EVEX_CD8<32, CD8VQ>,
12770                    Sched<[SchedWriteFMA.ZMM.Folded]>;
12771
12772defm V4FNMADDPSrm : AVX512_maskable_3src_in_asm<0xAA, MRMSrcMem, v16f32_info,
12773                     (outs VR512:$dst), (ins VR512:$src2, f128mem:$src3),
12774                     "v4fnmaddps", "$src3, $src2", "$src2, $src3",
12775                     []>, EVEX_V512, EVEX_4V, T8XD, EVEX_CD8<32, CD8VQ>,
12776                     Sched<[SchedWriteFMA.ZMM.Folded]>;
12777
12778defm V4FMADDSSrm : AVX512_maskable_3src_in_asm<0x9B, MRMSrcMem, f32x_info,
12779                    (outs VR128X:$dst), (ins  VR128X:$src2, f128mem:$src3),
12780                    "v4fmaddss", "$src3, $src2", "$src2, $src3",
12781                    []>, VEX_LIG, EVEX_4V, T8XD, EVEX_CD8<32, CD8VF>,
12782                    Sched<[SchedWriteFMA.Scl.Folded]>;
12783
12784defm V4FNMADDSSrm : AVX512_maskable_3src_in_asm<0xAB, MRMSrcMem, f32x_info,
12785                     (outs VR128X:$dst), (ins VR128X:$src2, f128mem:$src3),
12786                     "v4fnmaddss", "$src3, $src2", "$src2, $src3",
12787                     []>, VEX_LIG, EVEX_4V, T8XD, EVEX_CD8<32, CD8VF>,
12788                     Sched<[SchedWriteFMA.Scl.Folded]>;
12789}
12790
12791//===----------------------------------------------------------------------===//
12792// AVX5124VNNIW
12793//===----------------------------------------------------------------------===//
12794
12795let hasSideEffects = 0, mayLoad = 1, ExeDomain = SSEPackedInt,
12796    Constraints = "$src1 = $dst" in {
12797defm VP4DPWSSDrm : AVX512_maskable_3src_in_asm<0x52, MRMSrcMem, v16i32_info,
12798                    (outs VR512:$dst), (ins VR512:$src2, f128mem:$src3),
12799                     "vp4dpwssd", "$src3, $src2", "$src2, $src3",
12800                    []>, EVEX_V512, EVEX_4V, T8XD, EVEX_CD8<32, CD8VQ>,
12801                    Sched<[SchedWriteFMA.ZMM.Folded]>;
12802
12803defm VP4DPWSSDSrm : AVX512_maskable_3src_in_asm<0x53, MRMSrcMem, v16i32_info,
12804                     (outs VR512:$dst), (ins VR512:$src2, f128mem:$src3),
12805                     "vp4dpwssds", "$src3, $src2", "$src2, $src3",
12806                     []>, EVEX_V512, EVEX_4V, T8XD, EVEX_CD8<32, CD8VQ>,
12807                     Sched<[SchedWriteFMA.ZMM.Folded]>;
12808}
12809
12810let hasSideEffects = 0 in {
12811  let mayStore = 1, SchedRW = [WriteFStoreX] in
12812  def MASKPAIR16STORE : PseudoI<(outs), (ins anymem:$dst, VK16PAIR:$src), []>;
12813  let mayLoad = 1, SchedRW = [WriteFLoadX] in
12814  def MASKPAIR16LOAD : PseudoI<(outs VK16PAIR:$dst), (ins anymem:$src), []>;
12815}
12816
12817//===----------------------------------------------------------------------===//
12818// VP2INTERSECT
12819//===----------------------------------------------------------------------===//
12820
12821multiclass avx512_vp2intersect_modes<X86FoldableSchedWrite sched, X86VectorVTInfo _> {
12822  def rr : I<0x68, MRMSrcReg,
12823                  (outs _.KRPC:$dst),
12824                  (ins _.RC:$src1, _.RC:$src2),
12825                  !strconcat("vp2intersect", _.Suffix,
12826                             "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
12827                  [(set _.KRPC:$dst, (X86vp2intersect
12828                            _.RC:$src1, (_.VT _.RC:$src2)))]>,
12829                  EVEX_4V, T8XD, Sched<[sched]>;
12830
12831  def rm : I<0x68, MRMSrcMem,
12832                  (outs _.KRPC:$dst),
12833                  (ins  _.RC:$src1, _.MemOp:$src2),
12834                  !strconcat("vp2intersect", _.Suffix,
12835                             "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
12836                  [(set _.KRPC:$dst, (X86vp2intersect
12837                            _.RC:$src1, (_.VT (bitconvert (_.LdFrag addr:$src2)))))]>,
12838                  EVEX_4V, T8XD, EVEX_CD8<_.EltSize, CD8VF>,
12839                  Sched<[sched.Folded, sched.ReadAfterFold]>;
12840
12841  def rmb : I<0x68, MRMSrcMem,
12842                  (outs _.KRPC:$dst),
12843                  (ins _.RC:$src1, _.ScalarMemOp:$src2),
12844                  !strconcat("vp2intersect", _.Suffix, "\t{${src2}", _.BroadcastStr,
12845                             ", $src1, $dst|$dst, $src1, ${src2}", _.BroadcastStr ,"}"),
12846                  [(set _.KRPC:$dst, (X86vp2intersect
12847                             _.RC:$src1, (_.VT (_.BroadcastLdFrag addr:$src2))))]>,
12848                  EVEX_4V, T8XD, EVEX_B, EVEX_CD8<_.EltSize, CD8VF>,
12849                  Sched<[sched.Folded, sched.ReadAfterFold]>;
12850}
12851
12852multiclass avx512_vp2intersect<X86SchedWriteWidths sched, AVX512VLVectorVTInfo _> {
12853  let Predicates  = [HasAVX512, HasVP2INTERSECT] in
12854    defm Z : avx512_vp2intersect_modes<sched.ZMM, _.info512>, EVEX_V512;
12855
12856  let Predicates = [HasAVX512, HasVP2INTERSECT, HasVLX] in {
12857    defm Z256 : avx512_vp2intersect_modes<sched.YMM, _.info256>, EVEX_V256;
12858    defm Z128 : avx512_vp2intersect_modes<sched.XMM, _.info128>, EVEX_V128;
12859  }
12860}
12861
12862defm VP2INTERSECTD : avx512_vp2intersect<SchedWriteVecALU, avx512vl_i32_info>;
12863defm VP2INTERSECTQ : avx512_vp2intersect<SchedWriteVecALU, avx512vl_i64_info>, VEX_W;
12864
12865multiclass avx512_binop_all2<bits<8> opc, string OpcodeStr,
12866                             X86SchedWriteWidths sched,
12867                             AVX512VLVectorVTInfo _SrcVTInfo,
12868                             AVX512VLVectorVTInfo _DstVTInfo,
12869                             SDNode OpNode, Predicate prd,
12870                             bit IsCommutable = 0> {
12871  let Predicates = [prd] in
12872    defm NAME#Z : avx512_binop_rm2<opc, OpcodeStr, sched.ZMM, OpNode,
12873                                   _SrcVTInfo.info512, _DstVTInfo.info512,
12874                                   _SrcVTInfo.info512, IsCommutable>,
12875                                   EVEX_V512, EVEX_CD8<32, CD8VF>;
12876  let Predicates = [HasVLX, prd] in {
12877    defm NAME#Z256 : avx512_binop_rm2<opc, OpcodeStr, sched.YMM, OpNode,
12878                                      _SrcVTInfo.info256, _DstVTInfo.info256,
12879                                      _SrcVTInfo.info256, IsCommutable>,
12880                                     EVEX_V256, EVEX_CD8<32, CD8VF>;
12881    defm NAME#Z128 : avx512_binop_rm2<opc, OpcodeStr, sched.XMM, OpNode,
12882                                      _SrcVTInfo.info128, _DstVTInfo.info128,
12883                                      _SrcVTInfo.info128, IsCommutable>,
12884                                      EVEX_V128, EVEX_CD8<32, CD8VF>;
12885  }
12886}
12887
12888let ExeDomain = SSEPackedSingle in
12889defm VCVTNE2PS2BF16 : avx512_binop_all2<0x72, "vcvtne2ps2bf16",
12890                                        SchedWriteCvtPD2PS, //FIXME: Should be SchedWriteCvtPS2BF
12891                                        avx512vl_f32_info, avx512vl_bf16_info,
12892                                        X86cvtne2ps2bf16, HasBF16, 0>, T8XD;
12893
12894// Truncate Float to BFloat16
12895multiclass avx512_cvtps2bf16<bits<8> opc, string OpcodeStr,
12896                             X86SchedWriteWidths sched> {
12897  let ExeDomain = SSEPackedSingle in {
12898  let Predicates = [HasBF16], Uses = []<Register>, mayRaiseFPException = 0 in {
12899    defm Z : avx512_vcvt_fp<opc, OpcodeStr, v16bf16x_info, v16f32_info,
12900                            X86cvtneps2bf16, X86cvtneps2bf16, sched.ZMM>, EVEX_V512;
12901  }
12902  let Predicates = [HasBF16, HasVLX] in {
12903    let Uses = []<Register>, mayRaiseFPException = 0 in {
12904    defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v8bf16x_info, v4f32x_info,
12905                               null_frag, null_frag, sched.XMM, "{1to4}", "{x}", f128mem,
12906                               VK4WM>, EVEX_V128;
12907    defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v8bf16x_info, v8f32x_info,
12908                               X86cvtneps2bf16, X86cvtneps2bf16,
12909                               sched.YMM, "{1to8}", "{y}">, EVEX_V256;
12910    }
12911  } // Predicates = [HasBF16, HasVLX]
12912  } // ExeDomain = SSEPackedSingle
12913
12914  def : InstAlias<OpcodeStr#"x\t{$src, $dst|$dst, $src}",
12915                  (!cast<Instruction>(NAME # "Z128rr") VR128X:$dst,
12916                  VR128X:$src), 0>;
12917  def : InstAlias<OpcodeStr#"x\t{$src, $dst|$dst, $src}",
12918                  (!cast<Instruction>(NAME # "Z128rm") VR128X:$dst,
12919                  f128mem:$src), 0, "intel">;
12920  def : InstAlias<OpcodeStr#"y\t{$src, $dst|$dst, $src}",
12921                  (!cast<Instruction>(NAME # "Z256rr") VR128X:$dst,
12922                  VR256X:$src), 0>;
12923  def : InstAlias<OpcodeStr#"y\t{$src, $dst|$dst, $src}",
12924                  (!cast<Instruction>(NAME # "Z256rm") VR128X:$dst,
12925                  f256mem:$src), 0, "intel">;
12926}
12927
12928defm VCVTNEPS2BF16 : avx512_cvtps2bf16<0x72, "vcvtneps2bf16",
12929                                       SchedWriteCvtPD2PS>, T8XS,
12930                                       EVEX_CD8<32, CD8VF>;
12931
12932let Predicates = [HasBF16, HasVLX] in {
12933  // Special patterns to allow use of X86mcvtneps2bf16 for masking. Instruction
12934  // patterns have been disabled with null_frag.
12935  def : Pat<(v8bf16 (X86cvtneps2bf16 (v4f32 VR128X:$src))),
12936            (VCVTNEPS2BF16Z128rr VR128X:$src)>;
12937  def : Pat<(X86mcvtneps2bf16 (v4f32 VR128X:$src), (v8bf16 VR128X:$src0),
12938                              VK4WM:$mask),
12939            (VCVTNEPS2BF16Z128rrk VR128X:$src0, VK4WM:$mask, VR128X:$src)>;
12940  def : Pat<(X86mcvtneps2bf16 (v4f32 VR128X:$src), v8bf16x_info.ImmAllZerosV,
12941                              VK4WM:$mask),
12942            (VCVTNEPS2BF16Z128rrkz VK4WM:$mask, VR128X:$src)>;
12943
12944  def : Pat<(v8bf16 (X86cvtneps2bf16 (loadv4f32 addr:$src))),
12945            (VCVTNEPS2BF16Z128rm addr:$src)>;
12946  def : Pat<(X86mcvtneps2bf16 (loadv4f32 addr:$src), (v8bf16 VR128X:$src0),
12947                              VK4WM:$mask),
12948            (VCVTNEPS2BF16Z128rmk VR128X:$src0, VK4WM:$mask, addr:$src)>;
12949  def : Pat<(X86mcvtneps2bf16 (loadv4f32 addr:$src), v8bf16x_info.ImmAllZerosV,
12950                              VK4WM:$mask),
12951            (VCVTNEPS2BF16Z128rmkz VK4WM:$mask, addr:$src)>;
12952
12953  def : Pat<(v8bf16 (X86cvtneps2bf16 (v4f32
12954                                     (X86VBroadcastld32 addr:$src)))),
12955            (VCVTNEPS2BF16Z128rmb addr:$src)>;
12956  def : Pat<(X86mcvtneps2bf16 (v4f32 (X86VBroadcastld32 addr:$src)),
12957                              (v8bf16 VR128X:$src0), VK4WM:$mask),
12958            (VCVTNEPS2BF16Z128rmbk VR128X:$src0, VK4WM:$mask, addr:$src)>;
12959  def : Pat<(X86mcvtneps2bf16 (v4f32 (X86VBroadcastld32 addr:$src)),
12960                              v8bf16x_info.ImmAllZerosV, VK4WM:$mask),
12961            (VCVTNEPS2BF16Z128rmbkz VK4WM:$mask, addr:$src)>;
12962
12963  def : Pat<(v8bf16 (int_x86_vcvtneps2bf16128 (v4f32 VR128X:$src))),
12964            (VCVTNEPS2BF16Z128rr VR128X:$src)>;
12965  def : Pat<(v8bf16 (int_x86_vcvtneps2bf16128 (loadv4f32 addr:$src))),
12966            (VCVTNEPS2BF16Z128rm addr:$src)>;
12967
12968  def : Pat<(v8bf16 (int_x86_vcvtneps2bf16256 (v8f32 VR256X:$src))),
12969            (VCVTNEPS2BF16Z256rr VR256X:$src)>;
12970  def : Pat<(v8bf16 (int_x86_vcvtneps2bf16256 (loadv8f32 addr:$src))),
12971            (VCVTNEPS2BF16Z256rm addr:$src)>;
12972
12973  def : Pat<(v8bf16 (X86VBroadcastld16 addr:$src)),
12974            (VPBROADCASTWZ128rm addr:$src)>;
12975  def : Pat<(v16bf16 (X86VBroadcastld16 addr:$src)),
12976            (VPBROADCASTWZ256rm addr:$src)>;
12977
12978  def : Pat<(v8bf16 (X86VBroadcast (v8bf16 VR128X:$src))),
12979            (VPBROADCASTWZ128rr VR128X:$src)>;
12980  def : Pat<(v16bf16 (X86VBroadcast (v8bf16 VR128X:$src))),
12981            (VPBROADCASTWZ256rr VR128X:$src)>;
12982
12983  // TODO: No scalar broadcast due to we don't support legal scalar bf16 so far.
12984}
12985
12986let Predicates = [HasBF16] in {
12987  def : Pat<(v32bf16 (X86VBroadcastld16 addr:$src)),
12988            (VPBROADCASTWZrm addr:$src)>;
12989
12990  def : Pat<(v32bf16 (X86VBroadcast (v8bf16 VR128X:$src))),
12991            (VPBROADCASTWZrr VR128X:$src)>;
12992  // TODO: No scalar broadcast due to we don't support legal scalar bf16 so far.
12993}
12994
12995let Constraints = "$src1 = $dst" in {
12996multiclass avx512_dpbf16ps_rm<bits<8> opc, string OpcodeStr, SDNode OpNode,
12997                              X86FoldableSchedWrite sched,
12998                              X86VectorVTInfo _, X86VectorVTInfo src_v> {
12999  defm r: AVX512_maskable_3src<opc, MRMSrcReg, _, (outs _.RC:$dst),
13000                           (ins src_v.RC:$src2, src_v.RC:$src3),
13001                           OpcodeStr, "$src3, $src2", "$src2, $src3",
13002                           (_.VT (OpNode _.RC:$src1, src_v.RC:$src2, src_v.RC:$src3))>,
13003                           EVEX_4V, Sched<[sched]>;
13004
13005  defm m: AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst),
13006                               (ins src_v.RC:$src2, src_v.MemOp:$src3),
13007                               OpcodeStr, "$src3, $src2", "$src2, $src3",
13008                               (_.VT (OpNode _.RC:$src1, src_v.RC:$src2,
13009                               (src_v.LdFrag addr:$src3)))>, EVEX_4V,
13010                               Sched<[sched.Folded, sched.ReadAfterFold]>;
13011
13012  defm mb: AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst),
13013                  (ins src_v.RC:$src2, f32mem:$src3),
13014                  OpcodeStr,
13015                  !strconcat("${src3}", _.BroadcastStr,", $src2"),
13016                  !strconcat("$src2, ${src3}", _.BroadcastStr),
13017                  (_.VT (OpNode _.RC:$src1, src_v.RC:$src2,
13018                  (src_v.VT (src_v.BroadcastLdFrag addr:$src3))))>,
13019                  EVEX_B, EVEX_4V, Sched<[sched.Folded, sched.ReadAfterFold]>;
13020
13021}
13022} // Constraints = "$src1 = $dst"
13023
13024multiclass avx512_dpbf16ps_sizes<bits<8> opc, string OpcodeStr, SDNode OpNode,
13025                                 X86SchedWriteWidths sched, AVX512VLVectorVTInfo _,
13026                                 AVX512VLVectorVTInfo src_v, Predicate prd> {
13027  let Predicates = [prd] in {
13028    defm Z    : avx512_dpbf16ps_rm<opc, OpcodeStr, OpNode, sched.ZMM, _.info512,
13029                                   src_v.info512>, EVEX_V512;
13030  }
13031  let Predicates = [HasVLX, prd] in {
13032    defm Z256 : avx512_dpbf16ps_rm<opc, OpcodeStr, OpNode, sched.YMM, _.info256,
13033                                   src_v.info256>, EVEX_V256;
13034    defm Z128 : avx512_dpbf16ps_rm<opc, OpcodeStr, OpNode, sched.XMM, _.info128,
13035                                   src_v.info128>, EVEX_V128;
13036  }
13037}
13038
13039let ExeDomain = SSEPackedSingle in
13040defm VDPBF16PS : avx512_dpbf16ps_sizes<0x52, "vdpbf16ps", X86dpbf16ps, SchedWriteFMA,
13041                                       avx512vl_f32_info, avx512vl_bf16_info,
13042                                       HasBF16>, T8XS, EVEX_CD8<32, CD8VF>;
13043
13044//===----------------------------------------------------------------------===//
13045// AVX512FP16
13046//===----------------------------------------------------------------------===//
13047
13048let Predicates = [HasFP16] in {
13049// Move word ( r/m16) to Packed word
13050def VMOVW2SHrr : AVX512<0x6E, MRMSrcReg, (outs VR128X:$dst), (ins GR32:$src),
13051                      "vmovw\t{$src, $dst|$dst, $src}", []>, T_MAP5PD, EVEX, Sched<[WriteVecMoveFromGpr]>;
13052def VMOVWrm : AVX512<0x6E, MRMSrcMem, (outs VR128X:$dst), (ins i16mem:$src),
13053                      "vmovw\t{$src, $dst|$dst, $src}",
13054                      [(set VR128X:$dst,
13055                        (v8i16 (scalar_to_vector (loadi16 addr:$src))))]>,
13056                      T_MAP5PD, EVEX, EVEX_CD8<16, CD8VT1>, Sched<[WriteFLoad]>;
13057
13058def : Pat<(f16 (bitconvert GR16:$src)),
13059          (f16 (COPY_TO_REGCLASS
13060                (VMOVW2SHrr
13061                 (INSERT_SUBREG (IMPLICIT_DEF), GR16:$src, sub_16bit)),
13062                FR16X))>;
13063def : Pat<(v8i16 (scalar_to_vector (i16 GR16:$src))),
13064          (VMOVW2SHrr (INSERT_SUBREG (IMPLICIT_DEF), GR16:$src, sub_16bit))>;
13065def : Pat<(v4i32 (X86vzmovl (scalar_to_vector (and GR32:$src, 0xffff)))),
13066          (VMOVW2SHrr GR32:$src)>;
13067// FIXME: We should really find a way to improve these patterns.
13068def : Pat<(v8i32 (X86vzmovl
13069                  (insert_subvector undef,
13070                                    (v4i32 (scalar_to_vector
13071                                            (and GR32:$src, 0xffff))),
13072                                    (iPTR 0)))),
13073          (SUBREG_TO_REG (i32 0), (VMOVW2SHrr GR32:$src), sub_xmm)>;
13074def : Pat<(v16i32 (X86vzmovl
13075                   (insert_subvector undef,
13076                                     (v4i32 (scalar_to_vector
13077                                             (and GR32:$src, 0xffff))),
13078                                     (iPTR 0)))),
13079          (SUBREG_TO_REG (i32 0), (VMOVW2SHrr GR32:$src), sub_xmm)>;
13080
13081def : Pat<(v8i16 (X86vzmovl (scalar_to_vector (i16 GR16:$src)))),
13082          (VMOVW2SHrr (INSERT_SUBREG (IMPLICIT_DEF), GR16:$src, sub_16bit))>;
13083
13084// AVX 128-bit movw instruction write zeros in the high 128-bit part.
13085def : Pat<(v8i16 (X86vzload16 addr:$src)),
13086          (VMOVWrm addr:$src)>;
13087def : Pat<(v16i16 (X86vzload16 addr:$src)),
13088          (SUBREG_TO_REG (i32 0), (v8i16 (VMOVWrm addr:$src)), sub_xmm)>;
13089
13090// Use regular 128-bit instructions to match 512-bit scalar_to_vec+zext.
13091def : Pat<(v32i16 (X86vzload16 addr:$src)),
13092          (SUBREG_TO_REG (i32 0), (v8i16 (VMOVWrm addr:$src)), sub_xmm)>;
13093
13094def : Pat<(v4i32 (scalar_to_vector (i32 (extloadi16 addr:$src)))),
13095          (VMOVWrm addr:$src)>;
13096def : Pat<(v4i32 (X86vzmovl (scalar_to_vector (i32 (zextloadi16 addr:$src))))),
13097          (VMOVWrm addr:$src)>;
13098def : Pat<(v8i32 (X86vzmovl
13099                  (insert_subvector undef,
13100                                    (v4i32 (scalar_to_vector
13101                                            (i32 (zextloadi16 addr:$src)))),
13102                                    (iPTR 0)))),
13103          (SUBREG_TO_REG (i32 0), (VMOVWrm addr:$src), sub_xmm)>;
13104def : Pat<(v16i32 (X86vzmovl
13105                   (insert_subvector undef,
13106                                     (v4i32 (scalar_to_vector
13107                                             (i32 (zextloadi16 addr:$src)))),
13108                                     (iPTR 0)))),
13109          (SUBREG_TO_REG (i32 0), (VMOVWrm addr:$src), sub_xmm)>;
13110
13111// Move word from xmm register to r/m16
13112def VMOVSH2Wrr  : AVX512<0x7E, MRMDestReg, (outs GR32:$dst), (ins VR128X:$src),
13113                       "vmovw\t{$src, $dst|$dst, $src}", []>, T_MAP5PD, EVEX, Sched<[WriteVecMoveToGpr]>;
13114def VMOVWmr  : AVX512<0x7E, MRMDestMem, (outs),
13115                       (ins i16mem:$dst, VR128X:$src),
13116                       "vmovw\t{$src, $dst|$dst, $src}",
13117                       [(store (i16 (extractelt (v8i16 VR128X:$src),
13118                                     (iPTR 0))), addr:$dst)]>,
13119                       T_MAP5PD, EVEX, EVEX_CD8<16, CD8VT1>, Sched<[WriteFStore]>;
13120
13121def : Pat<(i16 (bitconvert FR16X:$src)),
13122          (i16 (EXTRACT_SUBREG
13123                (VMOVSH2Wrr (COPY_TO_REGCLASS FR16X:$src, VR128X)),
13124                sub_16bit))>;
13125def : Pat<(i16 (extractelt (v8i16 VR128X:$src), (iPTR 0))),
13126          (i16 (EXTRACT_SUBREG (VMOVSH2Wrr VR128X:$src), sub_16bit))>;
13127
13128// Allow "vmovw" to use GR64
13129let hasSideEffects = 0 in {
13130  def VMOVW64toSHrr : AVX512<0x6E, MRMSrcReg, (outs VR128X:$dst), (ins GR64:$src),
13131                     "vmovw\t{$src, $dst|$dst, $src}", []>, T_MAP5PD, EVEX, VEX_W, Sched<[WriteVecMoveFromGpr]>;
13132  def VMOVSHtoW64rr : AVX512<0x7E, MRMDestReg, (outs GR64:$dst), (ins VR128X:$src),
13133                     "vmovw\t{$src, $dst|$dst, $src}", []>, T_MAP5PD, EVEX, VEX_W, Sched<[WriteVecMoveToGpr]>;
13134}
13135}
13136
13137// Convert 16-bit float to i16/u16
13138multiclass avx512_cvtph2w<bits<8> opc, string OpcodeStr, SDPatternOperator OpNode,
13139                          SDPatternOperator MaskOpNode, SDNode OpNodeRnd,
13140                          AVX512VLVectorVTInfo _Dst,
13141                          AVX512VLVectorVTInfo _Src,
13142                          X86SchedWriteWidths sched> {
13143  let Predicates = [HasFP16] in {
13144    defm Z : avx512_vcvt_fp<opc, OpcodeStr, _Dst.info512, _Src.info512,
13145                            OpNode, MaskOpNode, sched.ZMM>,
13146             avx512_vcvt_fp_rc<opc, OpcodeStr, _Dst.info512, _Src.info512,
13147                               OpNodeRnd, sched.ZMM>, EVEX_V512;
13148  }
13149  let Predicates = [HasFP16, HasVLX] in {
13150    defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, _Dst.info128, _Src.info128,
13151                               OpNode, MaskOpNode, sched.XMM>, EVEX_V128;
13152    defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, _Dst.info256, _Src.info256,
13153                               OpNode, MaskOpNode, sched.YMM>, EVEX_V256;
13154  }
13155}
13156
13157// Convert 16-bit float to i16/u16 truncate
13158multiclass avx512_cvttph2w<bits<8> opc, string OpcodeStr, SDPatternOperator OpNode,
13159                           SDPatternOperator MaskOpNode, SDNode OpNodeRnd,
13160                           AVX512VLVectorVTInfo _Dst, AVX512VLVectorVTInfo _Src,
13161                           X86SchedWriteWidths sched> {
13162  let Predicates = [HasFP16] in {
13163    defm Z : avx512_vcvt_fp<opc, OpcodeStr, _Dst.info512, _Src.info512,
13164                            OpNode, MaskOpNode, sched.ZMM>,
13165             avx512_vcvt_fp_sae<opc, OpcodeStr, _Dst.info512, _Src.info512,
13166                               OpNodeRnd, sched.ZMM>, EVEX_V512;
13167  }
13168  let Predicates = [HasFP16, HasVLX] in {
13169    defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, _Dst.info128, _Src.info128,
13170                               OpNode, MaskOpNode, sched.XMM>, EVEX_V128;
13171    defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, _Dst.info256, _Src.info256,
13172                               OpNode, MaskOpNode, sched.YMM>, EVEX_V256;
13173  }
13174}
13175
13176defm VCVTPH2UW : avx512_cvtph2w<0x7D, "vcvtph2uw", X86cvtp2UInt, X86cvtp2UInt,
13177                                X86cvtp2UIntRnd, avx512vl_i16_info,
13178                                avx512vl_f16_info, SchedWriteCvtPD2DQ>,
13179                                T_MAP5PS, EVEX_CD8<16, CD8VF>;
13180defm VCVTUW2PH : avx512_cvtph2w<0x7D, "vcvtuw2ph", any_uint_to_fp, uint_to_fp,
13181                                X86VUintToFpRnd, avx512vl_f16_info,
13182                                avx512vl_i16_info, SchedWriteCvtPD2DQ>,
13183                                T_MAP5XD, EVEX_CD8<16, CD8VF>;
13184defm VCVTTPH2W : avx512_cvttph2w<0x7C, "vcvttph2w", X86any_cvttp2si,
13185                                X86cvttp2si, X86cvttp2siSAE,
13186                                avx512vl_i16_info, avx512vl_f16_info,
13187                                SchedWriteCvtPD2DQ>, T_MAP5PD, EVEX_CD8<16, CD8VF>;
13188defm VCVTTPH2UW : avx512_cvttph2w<0x7C, "vcvttph2uw", X86any_cvttp2ui,
13189                                X86cvttp2ui, X86cvttp2uiSAE,
13190                                avx512vl_i16_info, avx512vl_f16_info,
13191                                SchedWriteCvtPD2DQ>, T_MAP5PS, EVEX_CD8<16, CD8VF>;
13192defm VCVTPH2W : avx512_cvtph2w<0x7D, "vcvtph2w", X86cvtp2Int, X86cvtp2Int,
13193                                X86cvtp2IntRnd, avx512vl_i16_info,
13194                                avx512vl_f16_info, SchedWriteCvtPD2DQ>,
13195                                T_MAP5PD, EVEX_CD8<16, CD8VF>;
13196defm VCVTW2PH : avx512_cvtph2w<0x7D, "vcvtw2ph", any_sint_to_fp, sint_to_fp,
13197                                X86VSintToFpRnd, avx512vl_f16_info,
13198                                avx512vl_i16_info, SchedWriteCvtPD2DQ>,
13199                                T_MAP5XS, EVEX_CD8<16, CD8VF>;
13200
13201// Convert Half to Signed/Unsigned Doubleword
13202multiclass avx512_cvtph2dq<bits<8> opc, string OpcodeStr, SDPatternOperator OpNode,
13203                           SDPatternOperator MaskOpNode, SDNode OpNodeRnd,
13204                           X86SchedWriteWidths sched> {
13205  let Predicates = [HasFP16] in {
13206    defm Z : avx512_vcvt_fp<opc, OpcodeStr, v16i32_info, v16f16x_info, OpNode,
13207                            MaskOpNode, sched.ZMM>,
13208             avx512_vcvt_fp_rc<opc, OpcodeStr, v16i32_info, v16f16x_info,
13209                                OpNodeRnd, sched.ZMM>, EVEX_V512;
13210  }
13211  let Predicates = [HasFP16, HasVLX] in {
13212    defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v4i32x_info, v8f16x_info, OpNode,
13213                               MaskOpNode, sched.XMM, "{1to4}", "", f64mem>, EVEX_V128;
13214    defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v8i32x_info, v8f16x_info, OpNode,
13215                               MaskOpNode, sched.YMM>, EVEX_V256;
13216  }
13217}
13218
13219// Convert Half to Signed/Unsigned Doubleword with truncation
13220multiclass avx512_cvttph2dq<bits<8> opc, string OpcodeStr, SDPatternOperator OpNode,
13221                            SDPatternOperator MaskOpNode, SDNode OpNodeRnd,
13222                            X86SchedWriteWidths sched> {
13223  let Predicates = [HasFP16] in {
13224    defm Z : avx512_vcvt_fp<opc, OpcodeStr, v16i32_info, v16f16x_info, OpNode,
13225                            MaskOpNode, sched.ZMM>,
13226             avx512_vcvt_fp_sae<opc, OpcodeStr, v16i32_info, v16f16x_info,
13227                                OpNodeRnd, sched.ZMM>, EVEX_V512;
13228  }
13229  let Predicates = [HasFP16, HasVLX] in {
13230    defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v4i32x_info, v8f16x_info, OpNode,
13231                               MaskOpNode, sched.XMM, "{1to4}", "", f64mem>, EVEX_V128;
13232    defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v8i32x_info, v8f16x_info, OpNode,
13233                               MaskOpNode, sched.YMM>, EVEX_V256;
13234  }
13235}
13236
13237
13238defm VCVTPH2DQ : avx512_cvtph2dq<0x5B, "vcvtph2dq", X86cvtp2Int, X86cvtp2Int,
13239                                 X86cvtp2IntRnd, SchedWriteCvtPS2DQ>, T_MAP5PD,
13240                                 EVEX_CD8<16, CD8VH>;
13241defm VCVTPH2UDQ : avx512_cvtph2dq<0x79, "vcvtph2udq", X86cvtp2UInt, X86cvtp2UInt,
13242                                 X86cvtp2UIntRnd, SchedWriteCvtPS2DQ>, T_MAP5PS,
13243                                 EVEX_CD8<16, CD8VH>;
13244
13245defm VCVTTPH2DQ : avx512_cvttph2dq<0x5B, "vcvttph2dq", X86any_cvttp2si,
13246                                X86cvttp2si, X86cvttp2siSAE,
13247                                SchedWriteCvtPS2DQ>, T_MAP5XS,
13248                                EVEX_CD8<16, CD8VH>;
13249
13250defm VCVTTPH2UDQ : avx512_cvttph2dq<0x78, "vcvttph2udq", X86any_cvttp2ui,
13251                                 X86cvttp2ui, X86cvttp2uiSAE,
13252                                 SchedWriteCvtPS2DQ>, T_MAP5PS,
13253                                 EVEX_CD8<16, CD8VH>;
13254
13255// Convert Half to Signed/Unsigned Quardword
13256multiclass avx512_cvtph2qq<bits<8> opc, string OpcodeStr, SDPatternOperator OpNode,
13257                           SDPatternOperator MaskOpNode, SDNode OpNodeRnd,
13258                           X86SchedWriteWidths sched> {
13259  let Predicates = [HasFP16] in {
13260    defm Z : avx512_vcvt_fp<opc, OpcodeStr, v8i64_info, v8f16x_info, OpNode,
13261                            MaskOpNode, sched.ZMM>,
13262             avx512_vcvt_fp_rc<opc, OpcodeStr, v8i64_info, v8f16x_info,
13263                               OpNodeRnd, sched.ZMM>, EVEX_V512;
13264  }
13265  let Predicates = [HasFP16, HasVLX] in {
13266    // Explicitly specified broadcast string, since we take only 2 elements
13267    // from v8f16x_info source
13268    defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v2i64x_info, v8f16x_info, OpNode,
13269                               MaskOpNode, sched.XMM, "{1to2}", "", f32mem>,
13270                               EVEX_V128;
13271    // Explicitly specified broadcast string, since we take only 4 elements
13272    // from v8f16x_info source
13273    defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v4i64x_info, v8f16x_info, OpNode,
13274                               MaskOpNode, sched.YMM, "{1to4}", "", f64mem>,
13275                               EVEX_V256;
13276  }
13277}
13278
13279// Convert Half to Signed/Unsigned Quardword with truncation
13280multiclass avx512_cvttph2qq<bits<8> opc, string OpcodeStr, SDPatternOperator OpNode,
13281                            SDPatternOperator MaskOpNode, SDNode OpNodeRnd,
13282                            X86SchedWriteWidths sched> {
13283  let Predicates = [HasFP16] in {
13284    defm Z : avx512_vcvt_fp<opc, OpcodeStr, v8i64_info, v8f16x_info, OpNode,
13285                            MaskOpNode, sched.ZMM>,
13286             avx512_vcvt_fp_sae<opc, OpcodeStr, v8i64_info, v8f16x_info,
13287                                OpNodeRnd, sched.ZMM>, EVEX_V512;
13288  }
13289  let Predicates = [HasFP16, HasVLX] in {
13290    // Explicitly specified broadcast string, since we take only 2 elements
13291    // from v8f16x_info source
13292    defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v2i64x_info, v8f16x_info, OpNode,
13293                               MaskOpNode, sched.XMM, "{1to2}", "", f32mem>, EVEX_V128;
13294    // Explicitly specified broadcast string, since we take only 4 elements
13295    // from v8f16x_info source
13296    defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v4i64x_info, v8f16x_info, OpNode,
13297                               MaskOpNode, sched.YMM, "{1to4}", "", f64mem>, EVEX_V256;
13298  }
13299}
13300
13301defm VCVTPH2QQ : avx512_cvtph2qq<0x7B, "vcvtph2qq", X86cvtp2Int, X86cvtp2Int,
13302                                 X86cvtp2IntRnd, SchedWriteCvtPS2DQ>, T_MAP5PD,
13303                                 EVEX_CD8<16, CD8VQ>;
13304
13305defm VCVTPH2UQQ : avx512_cvtph2qq<0x79, "vcvtph2uqq", X86cvtp2UInt, X86cvtp2UInt,
13306                                 X86cvtp2UIntRnd, SchedWriteCvtPS2DQ>, T_MAP5PD,
13307                                 EVEX_CD8<16, CD8VQ>;
13308
13309defm VCVTTPH2QQ : avx512_cvttph2qq<0x7A, "vcvttph2qq", X86any_cvttp2si,
13310                                 X86cvttp2si, X86cvttp2siSAE,
13311                                 SchedWriteCvtPS2DQ>, T_MAP5PD,
13312                                 EVEX_CD8<16, CD8VQ>;
13313
13314defm VCVTTPH2UQQ : avx512_cvttph2qq<0x78, "vcvttph2uqq", X86any_cvttp2ui,
13315                                 X86cvttp2ui, X86cvttp2uiSAE,
13316                                 SchedWriteCvtPS2DQ>, T_MAP5PD,
13317                                 EVEX_CD8<16, CD8VQ>;
13318
13319// Convert Signed/Unsigned Quardword to Half
13320multiclass avx512_cvtqq2ph<bits<8> opc, string OpcodeStr, SDPatternOperator OpNode,
13321                           SDPatternOperator MaskOpNode, SDNode OpNodeRnd,
13322                           X86SchedWriteWidths sched> {
13323  // we need "x"/"y"/"z" suffixes in order to distinguish between 128, 256 and
13324  // 512 memory forms of these instructions in Asm Parcer. They have the same
13325  // dest type - 'v8f16x_info'. We also specify the broadcast string explicitly
13326  // due to the same reason.
13327  let Predicates = [HasFP16] in {
13328    defm Z : avx512_vcvt_fp<opc, OpcodeStr, v8f16x_info, v8i64_info, OpNode,
13329                            MaskOpNode, sched.ZMM, "{1to8}", "{z}">,
13330             avx512_vcvt_fp_rc<opc, OpcodeStr, v8f16x_info, v8i64_info,
13331                               OpNodeRnd, sched.ZMM>, EVEX_V512;
13332  }
13333  let Predicates = [HasFP16, HasVLX] in {
13334    defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v8f16x_info, v2i64x_info,
13335                               null_frag, null_frag, sched.XMM, "{1to2}", "{x}",
13336                               i128mem, VK2WM>,
13337                               EVEX_V128, NotEVEX2VEXConvertible;
13338    defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v8f16x_info, v4i64x_info,
13339                               null_frag, null_frag, sched.YMM, "{1to4}", "{y}",
13340                               i256mem, VK4WM>,
13341                               EVEX_V256, NotEVEX2VEXConvertible;
13342  }
13343
13344  def : InstAlias<OpcodeStr#"x\t{$src, $dst|$dst, $src}",
13345                  (!cast<Instruction>(NAME # "Z128rr") VR128X:$dst,
13346                  VR128X:$src), 0, "att">;
13347  def : InstAlias<OpcodeStr#"x\t{$src, $dst {${mask}}|$dst {${mask}}, $src}",
13348                  (!cast<Instruction>(NAME # "Z128rrk") VR128X:$dst,
13349                  VK2WM:$mask, VR128X:$src), 0, "att">;
13350  def : InstAlias<OpcodeStr#"x\t{$src, $dst {${mask}} {z}|$dst {${mask}} {z}, $src}",
13351                  (!cast<Instruction>(NAME # "Z128rrkz") VR128X:$dst,
13352                  VK2WM:$mask, VR128X:$src), 0, "att">;
13353  def : InstAlias<OpcodeStr#"x\t{${src}{1to2}, $dst|$dst, ${src}{1to2}}",
13354                  (!cast<Instruction>(NAME # "Z128rmb") VR128X:$dst,
13355                  i64mem:$src), 0, "att">;
13356  def : InstAlias<OpcodeStr#"x\t{${src}{1to2}, $dst {${mask}}|"
13357                  "$dst {${mask}}, ${src}{1to2}}",
13358                  (!cast<Instruction>(NAME # "Z128rmbk") VR128X:$dst,
13359                  VK2WM:$mask, i64mem:$src), 0, "att">;
13360  def : InstAlias<OpcodeStr#"x\t{${src}{1to2}, $dst {${mask}} {z}|"
13361                  "$dst {${mask}} {z}, ${src}{1to2}}",
13362                  (!cast<Instruction>(NAME # "Z128rmbkz") VR128X:$dst,
13363                  VK2WM:$mask, i64mem:$src), 0, "att">;
13364
13365  def : InstAlias<OpcodeStr#"y\t{$src, $dst|$dst, $src}",
13366                  (!cast<Instruction>(NAME # "Z256rr") VR128X:$dst,
13367                  VR256X:$src), 0, "att">;
13368  def : InstAlias<OpcodeStr#"y\t{$src, $dst {${mask}}|"
13369                  "$dst {${mask}}, $src}",
13370                  (!cast<Instruction>(NAME # "Z256rrk") VR128X:$dst,
13371                  VK4WM:$mask, VR256X:$src), 0, "att">;
13372  def : InstAlias<OpcodeStr#"y\t{$src, $dst {${mask}} {z}|"
13373                  "$dst {${mask}} {z}, $src}",
13374                  (!cast<Instruction>(NAME # "Z256rrkz") VR128X:$dst,
13375                  VK4WM:$mask, VR256X:$src), 0, "att">;
13376  def : InstAlias<OpcodeStr#"y\t{${src}{1to4}, $dst|$dst, ${src}{1to4}}",
13377                  (!cast<Instruction>(NAME # "Z256rmb") VR128X:$dst,
13378                  i64mem:$src), 0, "att">;
13379  def : InstAlias<OpcodeStr#"y\t{${src}{1to4}, $dst {${mask}}|"
13380                  "$dst {${mask}}, ${src}{1to4}}",
13381                  (!cast<Instruction>(NAME # "Z256rmbk") VR128X:$dst,
13382                  VK4WM:$mask, i64mem:$src), 0, "att">;
13383  def : InstAlias<OpcodeStr#"y\t{${src}{1to4}, $dst {${mask}} {z}|"
13384                  "$dst {${mask}} {z}, ${src}{1to4}}",
13385                  (!cast<Instruction>(NAME # "Z256rmbkz") VR128X:$dst,
13386                  VK4WM:$mask, i64mem:$src), 0, "att">;
13387
13388  def : InstAlias<OpcodeStr#"z\t{$src, $dst|$dst, $src}",
13389                  (!cast<Instruction>(NAME # "Zrr") VR128X:$dst,
13390                  VR512:$src), 0, "att">;
13391  def : InstAlias<OpcodeStr#"z\t{$src, $dst {${mask}}|"
13392                  "$dst {${mask}}, $src}",
13393                  (!cast<Instruction>(NAME # "Zrrk") VR128X:$dst,
13394                  VK8WM:$mask, VR512:$src), 0, "att">;
13395  def : InstAlias<OpcodeStr#"z\t{$src, $dst {${mask}} {z}|"
13396                  "$dst {${mask}} {z}, $src}",
13397                  (!cast<Instruction>(NAME # "Zrrkz") VR128X:$dst,
13398                  VK8WM:$mask, VR512:$src), 0, "att">;
13399  def : InstAlias<OpcodeStr#"z\t{${src}{1to8}, $dst|$dst, ${src}{1to8}}",
13400                  (!cast<Instruction>(NAME # "Zrmb") VR128X:$dst,
13401                  i64mem:$src), 0, "att">;
13402  def : InstAlias<OpcodeStr#"z\t{${src}{1to8}, $dst {${mask}}|"
13403                  "$dst {${mask}}, ${src}{1to8}}",
13404                  (!cast<Instruction>(NAME # "Zrmbk") VR128X:$dst,
13405                  VK8WM:$mask, i64mem:$src), 0, "att">;
13406  def : InstAlias<OpcodeStr#"z\t{${src}{1to8}, $dst {${mask}} {z}|"
13407                  "$dst {${mask}} {z}, ${src}{1to8}}",
13408                  (!cast<Instruction>(NAME # "Zrmbkz") VR128X:$dst,
13409                  VK8WM:$mask, i64mem:$src), 0, "att">;
13410}
13411
13412defm VCVTQQ2PH : avx512_cvtqq2ph<0x5B, "vcvtqq2ph", any_sint_to_fp, sint_to_fp,
13413                            X86VSintToFpRnd, SchedWriteCvtDQ2PS>, VEX_W, T_MAP5PS,
13414                            EVEX_CD8<64, CD8VF>;
13415
13416defm VCVTUQQ2PH : avx512_cvtqq2ph<0x7A, "vcvtuqq2ph", any_uint_to_fp, uint_to_fp,
13417                            X86VUintToFpRnd, SchedWriteCvtDQ2PS>, VEX_W, T_MAP5XD,
13418                            EVEX_CD8<64, CD8VF>;
13419
13420// Convert half to signed/unsigned int 32/64
13421defm VCVTSH2SIZ: avx512_cvt_s_int_round<0x2D, f16x_info, i32x_info, X86cvts2si,
13422                                   X86cvts2siRnd, WriteCvtSS2I, "cvtsh2si", "{l}", HasFP16>,
13423                                   T_MAP5XS, EVEX_CD8<16, CD8VT1>;
13424defm VCVTSH2SI64Z: avx512_cvt_s_int_round<0x2D, f16x_info, i64x_info, X86cvts2si,
13425                                   X86cvts2siRnd, WriteCvtSS2I, "cvtsh2si", "{q}", HasFP16>,
13426                                   T_MAP5XS, VEX_W, EVEX_CD8<16, CD8VT1>;
13427defm VCVTSH2USIZ: avx512_cvt_s_int_round<0x79, f16x_info, i32x_info, X86cvts2usi,
13428                                   X86cvts2usiRnd, WriteCvtSS2I, "cvtsh2usi", "{l}", HasFP16>,
13429                                   T_MAP5XS, EVEX_CD8<16, CD8VT1>;
13430defm VCVTSH2USI64Z: avx512_cvt_s_int_round<0x79, f16x_info, i64x_info, X86cvts2usi,
13431                                   X86cvts2usiRnd, WriteCvtSS2I, "cvtsh2usi", "{q}", HasFP16>,
13432                                   T_MAP5XS, VEX_W, EVEX_CD8<16, CD8VT1>;
13433
13434defm VCVTTSH2SIZ: avx512_cvt_s_all<0x2C, "vcvttsh2si", f16x_info, i32x_info,
13435                        any_fp_to_sint, X86cvtts2Int, X86cvtts2IntSAE, WriteCvtSS2I,
13436                        "{l}", HasFP16>, T_MAP5XS, EVEX_CD8<16, CD8VT1>;
13437defm VCVTTSH2SI64Z: avx512_cvt_s_all<0x2C, "vcvttsh2si", f16x_info, i64x_info,
13438                        any_fp_to_sint, X86cvtts2Int, X86cvtts2IntSAE, WriteCvtSS2I,
13439                        "{q}", HasFP16>, VEX_W, T_MAP5XS, EVEX_CD8<16, CD8VT1>;
13440defm VCVTTSH2USIZ: avx512_cvt_s_all<0x78, "vcvttsh2usi", f16x_info, i32x_info,
13441                        any_fp_to_uint, X86cvtts2UInt, X86cvtts2UIntSAE, WriteCvtSS2I,
13442                        "{l}", HasFP16>, T_MAP5XS, EVEX_CD8<16, CD8VT1>;
13443defm VCVTTSH2USI64Z: avx512_cvt_s_all<0x78, "vcvttsh2usi", f16x_info, i64x_info,
13444                        any_fp_to_uint, X86cvtts2UInt, X86cvtts2UIntSAE, WriteCvtSS2I,
13445                        "{q}", HasFP16>, T_MAP5XS, VEX_W, EVEX_CD8<16, CD8VT1>;
13446
13447let Predicates = [HasFP16] in {
13448  defm VCVTSI2SHZ  : avx512_vcvtsi_common<0x2A,  X86SintToFp, X86SintToFpRnd, WriteCvtI2SS, GR32,
13449                                   v8f16x_info, i32mem, loadi32, "cvtsi2sh", "l">,
13450                                   T_MAP5XS, EVEX_CD8<32, CD8VT1>;
13451  defm VCVTSI642SHZ: avx512_vcvtsi_common<0x2A,  X86SintToFp, X86SintToFpRnd, WriteCvtI2SS, GR64,
13452                                   v8f16x_info, i64mem, loadi64, "cvtsi2sh","q">,
13453                                   T_MAP5XS, VEX_W, EVEX_CD8<64, CD8VT1>;
13454  defm VCVTUSI2SHZ   : avx512_vcvtsi_common<0x7B,  X86UintToFp, X86UintToFpRnd, WriteCvtI2SS, GR32,
13455                                    v8f16x_info, i32mem, loadi32,
13456                                    "cvtusi2sh","l">, T_MAP5XS, EVEX_CD8<32, CD8VT1>;
13457  defm VCVTUSI642SHZ : avx512_vcvtsi_common<0x7B,  X86UintToFp, X86UintToFpRnd, WriteCvtI2SS, GR64,
13458                                    v8f16x_info, i64mem, loadi64, "cvtusi2sh", "q">,
13459                                    T_MAP5XS, VEX_W, EVEX_CD8<64, CD8VT1>;
13460  def : InstAlias<"vcvtsi2sh\t{$src, $src1, $dst|$dst, $src1, $src}",
13461              (VCVTSI2SHZrm_Int VR128X:$dst, VR128X:$src1, i32mem:$src), 0, "att">;
13462
13463  def : InstAlias<"vcvtusi2sh\t{$src, $src1, $dst|$dst, $src1, $src}",
13464              (VCVTUSI2SHZrm_Int VR128X:$dst, VR128X:$src1, i32mem:$src), 0, "att">;
13465
13466
13467  def : Pat<(f16 (any_sint_to_fp (loadi32 addr:$src))),
13468            (VCVTSI2SHZrm (f16 (IMPLICIT_DEF)), addr:$src)>;
13469  def : Pat<(f16 (any_sint_to_fp (loadi64 addr:$src))),
13470            (VCVTSI642SHZrm (f16 (IMPLICIT_DEF)), addr:$src)>;
13471
13472  def : Pat<(f16 (any_sint_to_fp GR32:$src)),
13473            (VCVTSI2SHZrr (f16 (IMPLICIT_DEF)), GR32:$src)>;
13474  def : Pat<(f16 (any_sint_to_fp GR64:$src)),
13475            (VCVTSI642SHZrr (f16 (IMPLICIT_DEF)), GR64:$src)>;
13476
13477  def : Pat<(f16 (any_uint_to_fp (loadi32 addr:$src))),
13478            (VCVTUSI2SHZrm (f16 (IMPLICIT_DEF)), addr:$src)>;
13479  def : Pat<(f16 (any_uint_to_fp (loadi64 addr:$src))),
13480            (VCVTUSI642SHZrm (f16 (IMPLICIT_DEF)), addr:$src)>;
13481
13482  def : Pat<(f16 (any_uint_to_fp GR32:$src)),
13483            (VCVTUSI2SHZrr (f16 (IMPLICIT_DEF)), GR32:$src)>;
13484  def : Pat<(f16 (any_uint_to_fp GR64:$src)),
13485            (VCVTUSI642SHZrr (f16 (IMPLICIT_DEF)), GR64:$src)>;
13486
13487  // Patterns used for matching vcvtsi2sh intrinsic sequences from clang
13488  // which produce unnecessary vmovsh instructions
13489  def : Pat<(v8f16 (X86Movsh
13490                     (v8f16 VR128X:$dst),
13491                     (v8f16 (scalar_to_vector (f16 (any_sint_to_fp GR64:$src)))))),
13492            (VCVTSI642SHZrr_Int VR128X:$dst, GR64:$src)>;
13493
13494  def : Pat<(v8f16 (X86Movsh
13495                     (v8f16 VR128X:$dst),
13496                     (v8f16 (scalar_to_vector (f16 (any_sint_to_fp (loadi64 addr:$src))))))),
13497            (VCVTSI642SHZrm_Int VR128X:$dst, addr:$src)>;
13498
13499  def : Pat<(v8f16 (X86Movsh
13500                     (v8f16 VR128X:$dst),
13501                     (v8f16 (scalar_to_vector (f16 (any_sint_to_fp GR32:$src)))))),
13502            (VCVTSI2SHZrr_Int VR128X:$dst, GR32:$src)>;
13503
13504  def : Pat<(v8f16 (X86Movsh
13505                     (v8f16 VR128X:$dst),
13506                     (v8f16 (scalar_to_vector (f16 (any_sint_to_fp (loadi32 addr:$src))))))),
13507            (VCVTSI2SHZrm_Int VR128X:$dst, addr:$src)>;
13508
13509  def : Pat<(v8f16 (X86Movsh
13510                     (v8f16 VR128X:$dst),
13511                     (v8f16 (scalar_to_vector (f16 (any_uint_to_fp GR64:$src)))))),
13512            (VCVTUSI642SHZrr_Int VR128X:$dst, GR64:$src)>;
13513
13514  def : Pat<(v8f16 (X86Movsh
13515                     (v8f16 VR128X:$dst),
13516                     (v8f16 (scalar_to_vector (f16 (any_uint_to_fp (loadi64 addr:$src))))))),
13517            (VCVTUSI642SHZrm_Int VR128X:$dst, addr:$src)>;
13518
13519  def : Pat<(v8f16 (X86Movsh
13520                     (v8f16 VR128X:$dst),
13521                     (v8f16 (scalar_to_vector (f16 (any_uint_to_fp GR32:$src)))))),
13522            (VCVTUSI2SHZrr_Int VR128X:$dst, GR32:$src)>;
13523
13524  def : Pat<(v8f16 (X86Movsh
13525                     (v8f16 VR128X:$dst),
13526                     (v8f16 (scalar_to_vector (f16 (any_uint_to_fp (loadi32 addr:$src))))))),
13527            (VCVTUSI2SHZrm_Int VR128X:$dst, addr:$src)>;
13528} // Predicates = [HasFP16]
13529
13530let Predicates = [HasFP16, HasVLX] in {
13531  // Special patterns to allow use of X86VMSintToFP for masking. Instruction
13532  // patterns have been disabled with null_frag.
13533  def : Pat<(v8f16 (X86any_VSintToFP (v4i64 VR256X:$src))),
13534            (VCVTQQ2PHZ256rr VR256X:$src)>;
13535  def : Pat<(X86VMSintToFP (v4i64 VR256X:$src), (v8f16 VR128X:$src0),
13536                           VK4WM:$mask),
13537            (VCVTQQ2PHZ256rrk VR128X:$src0, VK4WM:$mask, VR256X:$src)>;
13538  def : Pat<(X86VMSintToFP (v4i64 VR256X:$src), v8f16x_info.ImmAllZerosV,
13539                           VK4WM:$mask),
13540            (VCVTQQ2PHZ256rrkz VK4WM:$mask, VR256X:$src)>;
13541
13542  def : Pat<(v8f16 (X86any_VSintToFP (loadv4i64 addr:$src))),
13543            (VCVTQQ2PHZ256rm addr:$src)>;
13544  def : Pat<(X86VMSintToFP (loadv4i64 addr:$src), (v8f16 VR128X:$src0),
13545                           VK4WM:$mask),
13546            (VCVTQQ2PHZ256rmk VR128X:$src0, VK4WM:$mask, addr:$src)>;
13547  def : Pat<(X86VMSintToFP (loadv4i64 addr:$src), v8f16x_info.ImmAllZerosV,
13548                           VK4WM:$mask),
13549            (VCVTQQ2PHZ256rmkz VK4WM:$mask, addr:$src)>;
13550
13551  def : Pat<(v8f16 (X86any_VSintToFP (v4i64 (X86VBroadcastld64 addr:$src)))),
13552            (VCVTQQ2PHZ256rmb addr:$src)>;
13553  def : Pat<(X86VMSintToFP (v4i64 (X86VBroadcastld64 addr:$src)),
13554                           (v8f16 VR128X:$src0), VK4WM:$mask),
13555            (VCVTQQ2PHZ256rmbk VR128X:$src0, VK4WM:$mask, addr:$src)>;
13556  def : Pat<(X86VMSintToFP (v4i64 (X86VBroadcastld64 addr:$src)),
13557                           v8f16x_info.ImmAllZerosV, VK4WM:$mask),
13558            (VCVTQQ2PHZ256rmbkz VK4WM:$mask, addr:$src)>;
13559
13560  def : Pat<(v8f16 (X86any_VSintToFP (v2i64 VR128X:$src))),
13561            (VCVTQQ2PHZ128rr VR128X:$src)>;
13562  def : Pat<(X86VMSintToFP (v2i64 VR128X:$src), (v8f16 VR128X:$src0),
13563                           VK2WM:$mask),
13564            (VCVTQQ2PHZ128rrk VR128X:$src0, VK2WM:$mask, VR128X:$src)>;
13565  def : Pat<(X86VMSintToFP (v2i64 VR128X:$src), v8f16x_info.ImmAllZerosV,
13566                           VK2WM:$mask),
13567            (VCVTQQ2PHZ128rrkz VK2WM:$mask, VR128X:$src)>;
13568
13569  def : Pat<(v8f16 (X86any_VSintToFP (loadv2i64 addr:$src))),
13570            (VCVTQQ2PHZ128rm addr:$src)>;
13571  def : Pat<(X86VMSintToFP (loadv2i64 addr:$src), (v8f16 VR128X:$src0),
13572                           VK2WM:$mask),
13573            (VCVTQQ2PHZ128rmk VR128X:$src0, VK2WM:$mask, addr:$src)>;
13574  def : Pat<(X86VMSintToFP (loadv2i64 addr:$src), v8f16x_info.ImmAllZerosV,
13575                           VK2WM:$mask),
13576            (VCVTQQ2PHZ128rmkz VK2WM:$mask, addr:$src)>;
13577
13578  def : Pat<(v8f16 (X86any_VSintToFP (v2i64 (X86VBroadcastld64 addr:$src)))),
13579            (VCVTQQ2PHZ128rmb addr:$src)>;
13580  def : Pat<(X86VMSintToFP (v2i64 (X86VBroadcastld64 addr:$src)),
13581                           (v8f16 VR128X:$src0), VK2WM:$mask),
13582            (VCVTQQ2PHZ128rmbk VR128X:$src0, VK2WM:$mask, addr:$src)>;
13583  def : Pat<(X86VMSintToFP (v2i64 (X86VBroadcastld64 addr:$src)),
13584                           v8f16x_info.ImmAllZerosV, VK2WM:$mask),
13585            (VCVTQQ2PHZ128rmbkz VK2WM:$mask, addr:$src)>;
13586
13587  // Special patterns to allow use of X86VMUintToFP for masking. Instruction
13588  // patterns have been disabled with null_frag.
13589  def : Pat<(v8f16 (X86any_VUintToFP (v4i64 VR256X:$src))),
13590            (VCVTUQQ2PHZ256rr VR256X:$src)>;
13591  def : Pat<(X86VMUintToFP (v4i64 VR256X:$src), (v8f16 VR128X:$src0),
13592                           VK4WM:$mask),
13593            (VCVTUQQ2PHZ256rrk VR128X:$src0, VK4WM:$mask, VR256X:$src)>;
13594  def : Pat<(X86VMUintToFP (v4i64 VR256X:$src), v8f16x_info.ImmAllZerosV,
13595                           VK4WM:$mask),
13596            (VCVTUQQ2PHZ256rrkz VK4WM:$mask, VR256X:$src)>;
13597
13598  def : Pat<(v8f16 (X86any_VUintToFP (loadv4i64 addr:$src))),
13599            (VCVTUQQ2PHZ256rm addr:$src)>;
13600  def : Pat<(X86VMUintToFP (loadv4i64 addr:$src), (v8f16 VR128X:$src0),
13601                           VK4WM:$mask),
13602            (VCVTUQQ2PHZ256rmk VR128X:$src0, VK4WM:$mask, addr:$src)>;
13603  def : Pat<(X86VMUintToFP (loadv4i64 addr:$src), v8f16x_info.ImmAllZerosV,
13604                           VK4WM:$mask),
13605            (VCVTUQQ2PHZ256rmkz VK4WM:$mask, addr:$src)>;
13606
13607  def : Pat<(v8f16 (X86any_VUintToFP (v4i64 (X86VBroadcastld64 addr:$src)))),
13608            (VCVTUQQ2PHZ256rmb addr:$src)>;
13609  def : Pat<(X86VMUintToFP (v4i64 (X86VBroadcastld64 addr:$src)),
13610                           (v8f16 VR128X:$src0), VK4WM:$mask),
13611            (VCVTUQQ2PHZ256rmbk VR128X:$src0, VK4WM:$mask, addr:$src)>;
13612  def : Pat<(X86VMUintToFP (v4i64 (X86VBroadcastld64 addr:$src)),
13613                           v8f16x_info.ImmAllZerosV, VK4WM:$mask),
13614            (VCVTUQQ2PHZ256rmbkz VK4WM:$mask, addr:$src)>;
13615
13616  def : Pat<(v8f16 (X86any_VUintToFP (v2i64 VR128X:$src))),
13617            (VCVTUQQ2PHZ128rr VR128X:$src)>;
13618  def : Pat<(X86VMUintToFP (v2i64 VR128X:$src), (v8f16 VR128X:$src0),
13619                           VK2WM:$mask),
13620            (VCVTUQQ2PHZ128rrk VR128X:$src0, VK2WM:$mask, VR128X:$src)>;
13621  def : Pat<(X86VMUintToFP (v2i64 VR128X:$src), v8f16x_info.ImmAllZerosV,
13622                           VK2WM:$mask),
13623            (VCVTUQQ2PHZ128rrkz VK2WM:$mask, VR128X:$src)>;
13624
13625  def : Pat<(v8f16 (X86any_VUintToFP (loadv2i64 addr:$src))),
13626            (VCVTUQQ2PHZ128rm addr:$src)>;
13627  def : Pat<(X86VMUintToFP (loadv2i64 addr:$src), (v8f16 VR128X:$src0),
13628                           VK2WM:$mask),
13629            (VCVTUQQ2PHZ128rmk VR128X:$src0, VK2WM:$mask, addr:$src)>;
13630  def : Pat<(X86VMUintToFP (loadv2i64 addr:$src), v8f16x_info.ImmAllZerosV,
13631                           VK2WM:$mask),
13632            (VCVTUQQ2PHZ128rmkz VK2WM:$mask, addr:$src)>;
13633
13634  def : Pat<(v8f16 (X86any_VUintToFP (v2i64 (X86VBroadcastld64 addr:$src)))),
13635            (VCVTUQQ2PHZ128rmb addr:$src)>;
13636  def : Pat<(X86VMUintToFP (v2i64 (X86VBroadcastld64 addr:$src)),
13637                           (v8f16 VR128X:$src0), VK2WM:$mask),
13638            (VCVTUQQ2PHZ128rmbk VR128X:$src0, VK2WM:$mask, addr:$src)>;
13639  def : Pat<(X86VMUintToFP (v2i64 (X86VBroadcastld64 addr:$src)),
13640                           v8f16x_info.ImmAllZerosV, VK2WM:$mask),
13641            (VCVTUQQ2PHZ128rmbkz VK2WM:$mask, addr:$src)>;
13642}
13643
13644let Constraints = "@earlyclobber $dst, $src1 = $dst" in {
13645  multiclass avx512_cfmaop_rm<bits<8> opc, string OpcodeStr, SDNode OpNode, X86VectorVTInfo _, bit IsCommutable> {
13646    defm r: AVX512_maskable_3src<opc, MRMSrcReg, _, (outs _.RC:$dst),
13647            (ins _.RC:$src2, _.RC:$src3),
13648            OpcodeStr, "$src3, $src2", "$src2, $src3",
13649            (_.VT (OpNode _.RC:$src2, _.RC:$src3, _.RC:$src1)), IsCommutable>, EVEX_4V;
13650
13651    defm m: AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst),
13652            (ins _.RC:$src2, _.MemOp:$src3),
13653            OpcodeStr, "$src3, $src2", "$src2, $src3",
13654            (_.VT (OpNode _.RC:$src2, (_.LdFrag addr:$src3), _.RC:$src1))>, EVEX_4V;
13655
13656    defm mb: AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst),
13657            (ins _.RC:$src2, _.ScalarMemOp:$src3),
13658            OpcodeStr, !strconcat("${src3}", _.BroadcastStr,", $src2"), !strconcat("$src2, ${src3}", _.BroadcastStr),
13659            (_.VT (OpNode _.RC:$src2, (_.VT (_.BroadcastLdFrag addr:$src3)), _.RC:$src1))>, EVEX_B, EVEX_4V;
13660  }
13661} // Constraints = "@earlyclobber $dst, $src1 = $dst"
13662
13663multiclass avx512_cfmaop_round<bits<8> opc, string OpcodeStr, SDNode OpNode,
13664                                 X86VectorVTInfo _> {
13665  let Constraints = "@earlyclobber $dst, $src1 = $dst" in
13666  defm rb: AVX512_maskable_3src<opc, MRMSrcReg, _, (outs _.RC:$dst),
13667          (ins _.RC:$src2, _.RC:$src3, AVX512RC:$rc),
13668          OpcodeStr, "$rc, $src3, $src2", "$src2, $src3, $rc",
13669          (_.VT (OpNode _.RC:$src2, _.RC:$src3, _.RC:$src1, (i32 timm:$rc)))>,
13670          EVEX_4V, EVEX_B, EVEX_RC;
13671}
13672
13673
13674multiclass avx512_cfmaop_common<bits<8> opc, string OpcodeStr, SDNode OpNode, SDNode OpNodeRnd, bit IsCommutable> {
13675  let Predicates = [HasFP16] in {
13676    defm Z    : avx512_cfmaop_rm<opc, OpcodeStr, OpNode, v16f32_info, IsCommutable>,
13677                avx512_cfmaop_round<opc, OpcodeStr, OpNodeRnd, v16f32_info>,
13678                      EVEX_V512, Sched<[WriteFMAZ]>;
13679  }
13680  let Predicates = [HasVLX, HasFP16] in {
13681    defm Z256 : avx512_cfmaop_rm<opc, OpcodeStr, OpNode, v8f32x_info, IsCommutable>, EVEX_V256, Sched<[WriteFMAY]>;
13682    defm Z128 : avx512_cfmaop_rm<opc, OpcodeStr, OpNode, v4f32x_info, IsCommutable>, EVEX_V128, Sched<[WriteFMAX]>;
13683  }
13684}
13685
13686multiclass avx512_cfmulop_common<bits<8> opc, string OpcodeStr, SDNode OpNode,
13687                                 SDNode MaskOpNode, SDNode OpNodeRnd, bit IsCommutable> {
13688  let Predicates = [HasFP16] in {
13689    defm Z    : avx512_fp_packed<opc, OpcodeStr, OpNode, MaskOpNode, v16f32_info,
13690                                 WriteFMAZ, IsCommutable, IsCommutable, "", "@earlyclobber $dst", 0>,
13691                avx512_fp_round_packed<opc, OpcodeStr, OpNodeRnd, WriteFMAZ, v16f32_info,
13692                                       "", "@earlyclobber $dst">, EVEX_V512;
13693  }
13694  let Predicates = [HasVLX, HasFP16] in {
13695    defm Z256 : avx512_fp_packed<opc, OpcodeStr, OpNode, MaskOpNode, v8f32x_info,
13696                                 WriteFMAY, IsCommutable, IsCommutable, "", "@earlyclobber $dst", 0>, EVEX_V256;
13697    defm Z128 : avx512_fp_packed<opc, OpcodeStr, OpNode, MaskOpNode, v4f32x_info,
13698                                 WriteFMAX, IsCommutable, IsCommutable, "", "@earlyclobber $dst", 0>, EVEX_V128;
13699  }
13700}
13701
13702
13703let Uses = [MXCSR] in {
13704  defm VFMADDCPH  : avx512_cfmaop_common<0x56, "vfmaddcph", x86vfmaddc, x86vfmaddcRnd, 1>,
13705                                    T_MAP6XS, EVEX_CD8<32, CD8VF>;
13706  defm VFCMADDCPH : avx512_cfmaop_common<0x56, "vfcmaddcph", x86vfcmaddc, x86vfcmaddcRnd, 0>,
13707                                    T_MAP6XD, EVEX_CD8<32, CD8VF>;
13708
13709  defm VFMULCPH  : avx512_cfmulop_common<0xD6, "vfmulcph", x86vfmulc, x86vfmulc,
13710                                         x86vfmulcRnd, 1>, T_MAP6XS, EVEX_CD8<32, CD8VF>;
13711  defm VFCMULCPH : avx512_cfmulop_common<0xD6, "vfcmulcph", x86vfcmulc,
13712                                         x86vfcmulc, x86vfcmulcRnd, 0>, T_MAP6XD, EVEX_CD8<32, CD8VF>;
13713}
13714
13715
13716multiclass avx512_cfmaop_sh_common<bits<8> opc, string OpcodeStr, SDNode OpNode, SDNode OpNodeRnd,
13717                                   bit IsCommutable> {
13718  let Predicates = [HasFP16], Constraints = "@earlyclobber $dst, $src1 = $dst" in {
13719    defm r : AVX512_maskable_3src<opc, MRMSrcReg, v4f32x_info, (outs VR128X:$dst),
13720                        (ins VR128X:$src2, VR128X:$src3), OpcodeStr,
13721                        "$src3, $src2", "$src2, $src3",
13722                        (v4f32 (OpNode VR128X:$src2, VR128X:$src3, VR128X:$src1)), IsCommutable>,
13723                        Sched<[WriteFMAX]>;
13724    defm m : AVX512_maskable_3src<opc, MRMSrcMem, v4f32x_info, (outs VR128X:$dst),
13725                        (ins VR128X:$src2, ssmem:$src3), OpcodeStr,
13726                        "$src3, $src2", "$src2, $src3",
13727                        (v4f32 (OpNode VR128X:$src2, (sse_load_f32 addr:$src3), VR128X:$src1))>,
13728                        Sched<[WriteFMAX.Folded, WriteFMAX.ReadAfterFold]>;
13729    defm rb : AVX512_maskable_3src<opc,  MRMSrcReg, v4f32x_info, (outs VR128X:$dst),
13730                        (ins VR128X:$src2, VR128X:$src3, AVX512RC:$rc), OpcodeStr,
13731                        "$rc, $src3, $src2", "$src2, $src3, $rc",
13732                        (v4f32 (OpNodeRnd VR128X:$src2, VR128X:$src3, VR128X:$src1, (i32 timm:$rc)))>,
13733                        EVEX_B, EVEX_RC, Sched<[WriteFMAX]>;
13734  }
13735}
13736
13737multiclass avx512_cfmbinop_sh_common<bits<8> opc, string OpcodeStr, SDNode OpNode,
13738                                     SDNode OpNodeRnd, bit IsCommutable> {
13739  let Predicates = [HasFP16] in {
13740    defm rr : AVX512_maskable<opc, MRMSrcReg, f32x_info, (outs VR128X:$dst),
13741                        (ins VR128X:$src1, VR128X:$src2), OpcodeStr,
13742                        "$src2, $src1", "$src1, $src2",
13743                        (v4f32 (OpNode VR128X:$src1, VR128X:$src2)),
13744                        IsCommutable, IsCommutable, IsCommutable,
13745                        X86selects, "@earlyclobber $dst">, Sched<[WriteFMAX]>;
13746    defm rm : AVX512_maskable<opc, MRMSrcMem, f32x_info, (outs VR128X:$dst),
13747                        (ins VR128X:$src1, ssmem:$src2), OpcodeStr,
13748                        "$src2, $src1", "$src1, $src2",
13749                        (v4f32 (OpNode VR128X:$src1, (sse_load_f32 addr:$src2))),
13750                        0, 0, 0, X86selects, "@earlyclobber $dst">,
13751                        Sched<[WriteFMAX.Folded, WriteFMAX.ReadAfterFold]>;
13752    defm rrb : AVX512_maskable<opc, MRMSrcReg, f32x_info, (outs VR128X:$dst),
13753                        (ins VR128X:$src1, VR128X:$src2, AVX512RC:$rc), OpcodeStr,
13754                        "$rc, $src2, $src1", "$src1, $src2, $rc",
13755                        (OpNodeRnd (v4f32 VR128X:$src1), (v4f32 VR128X:$src2), (i32 timm:$rc)),
13756                        0, 0, 0, X86selects, "@earlyclobber $dst">,
13757                        EVEX_B, EVEX_RC, Sched<[WriteFMAX]>;
13758  }
13759}
13760
13761let Uses = [MXCSR] in {
13762  defm VFMADDCSHZ  : avx512_cfmaop_sh_common<0x57, "vfmaddcsh", x86vfmaddcSh, x86vfmaddcShRnd, 1>,
13763                                    T_MAP6XS, EVEX_CD8<32, CD8VT1>, EVEX_V128, EVEX_4V;
13764  defm VFCMADDCSHZ : avx512_cfmaop_sh_common<0x57, "vfcmaddcsh", x86vfcmaddcSh, x86vfcmaddcShRnd, 0>,
13765                                    T_MAP6XD, EVEX_CD8<32, CD8VT1>, EVEX_V128, EVEX_4V;
13766
13767  defm VFMULCSHZ  : avx512_cfmbinop_sh_common<0xD7, "vfmulcsh", x86vfmulcSh, x86vfmulcShRnd, 1>,
13768                                    T_MAP6XS, EVEX_CD8<32, CD8VT1>, EVEX_V128, VEX_LIG, EVEX_4V;
13769  defm VFCMULCSHZ : avx512_cfmbinop_sh_common<0xD7, "vfcmulcsh", x86vfcmulcSh, x86vfcmulcShRnd, 0>,
13770                                    T_MAP6XD, EVEX_CD8<32, CD8VT1>, EVEX_V128, VEX_LIG, EVEX_4V;
13771}
13772