xref: /freebsd/contrib/llvm-project/llvm/lib/Target/X86/X86InstrAVX512.td (revision 357378bbdedf24ce2b90e9bd831af4a9db3ec70a)
1//===-- X86InstrAVX512.td - AVX512 Instruction Set ---------*- tablegen -*-===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file describes the X86 AVX512 instruction set, defining the
10// instructions, and properties of the instructions which are needed for code
11// generation, machine code emission, and analysis.
12//
13//===----------------------------------------------------------------------===//
14
15// This multiclass generates the masking variants from the non-masking
16// variant.  It only provides the assembly pieces for the masking variants.
17// It assumes custom ISel patterns for masking which can be provided as
18// template arguments.
19multiclass AVX512_maskable_custom<bits<8> O, Format F,
20                                  dag Outs,
21                                  dag Ins, dag MaskingIns, dag ZeroMaskingIns,
22                                  string OpcodeStr,
23                                  string AttSrcAsm, string IntelSrcAsm,
24                                  list<dag> Pattern,
25                                  list<dag> MaskingPattern,
26                                  list<dag> ZeroMaskingPattern,
27                                  string MaskingConstraint = "",
28                                  bit IsCommutable = 0,
29                                  bit IsKCommutable = 0,
30                                  bit IsKZCommutable = IsCommutable,
31                                  string ClobberConstraint = ""> {
32  let isCommutable = IsCommutable, Constraints = ClobberConstraint in
33    def NAME: AVX512<O, F, Outs, Ins,
34                       OpcodeStr#"\t{"#AttSrcAsm#", $dst|"#
35                                     "$dst, "#IntelSrcAsm#"}",
36                       Pattern>;
37
38  // Prefer over VMOV*rrk Pat<>
39  let isCommutable = IsKCommutable in
40    def NAME#k: AVX512<O, F, Outs, MaskingIns,
41                       OpcodeStr#"\t{"#AttSrcAsm#", $dst {${mask}}|"#
42                                     "$dst {${mask}}, "#IntelSrcAsm#"}",
43                       MaskingPattern>,
44              EVEX_K {
45      // In case of the 3src subclass this is overridden with a let.
46      string Constraints = !if(!eq(ClobberConstraint, ""), MaskingConstraint,
47                               !if(!eq(MaskingConstraint, ""), ClobberConstraint,
48                                   !strconcat(ClobberConstraint, ", ", MaskingConstraint)));
49    }
50
51  // Zero mask does not add any restrictions to commute operands transformation.
52  // So, it is Ok to use IsCommutable instead of IsKCommutable.
53  let isCommutable = IsKZCommutable, // Prefer over VMOV*rrkz Pat<>
54      Constraints = ClobberConstraint in
55    def NAME#kz: AVX512<O, F, Outs, ZeroMaskingIns,
56                       OpcodeStr#"\t{"#AttSrcAsm#", $dst {${mask}} {z}|"#
57                                     "$dst {${mask}} {z}, "#IntelSrcAsm#"}",
58                       ZeroMaskingPattern>,
59              EVEX_KZ;
60}
61
62
63// Common base class of AVX512_maskable and AVX512_maskable_3src.
64multiclass AVX512_maskable_common<bits<8> O, Format F, X86VectorVTInfo _,
65                                  dag Outs,
66                                  dag Ins, dag MaskingIns, dag ZeroMaskingIns,
67                                  string OpcodeStr,
68                                  string AttSrcAsm, string IntelSrcAsm,
69                                  dag RHS, dag MaskingRHS,
70                                  SDPatternOperator Select = vselect_mask,
71                                  string MaskingConstraint = "",
72                                  bit IsCommutable = 0,
73                                  bit IsKCommutable = 0,
74                                  bit IsKZCommutable = IsCommutable,
75                                  string ClobberConstraint = ""> :
76  AVX512_maskable_custom<O, F, Outs, Ins, MaskingIns, ZeroMaskingIns, OpcodeStr,
77                         AttSrcAsm, IntelSrcAsm,
78                         [(set _.RC:$dst, RHS)],
79                         [(set _.RC:$dst, MaskingRHS)],
80                         [(set _.RC:$dst,
81                               (Select _.KRCWM:$mask, RHS, _.ImmAllZerosV))],
82                         MaskingConstraint, IsCommutable,
83                         IsKCommutable, IsKZCommutable, ClobberConstraint>;
84
85// This multiclass generates the unconditional/non-masking, the masking and
86// the zero-masking variant of the vector instruction.  In the masking case, the
87// preserved vector elements come from a new dummy input operand tied to $dst.
88// This version uses a separate dag for non-masking and masking.
89multiclass AVX512_maskable_split<bits<8> O, Format F, X86VectorVTInfo _,
90                           dag Outs, dag Ins, string OpcodeStr,
91                           string AttSrcAsm, string IntelSrcAsm,
92                           dag RHS, dag MaskRHS,
93                           string ClobberConstraint = "",
94                           bit IsCommutable = 0, bit IsKCommutable = 0,
95                           bit IsKZCommutable = IsCommutable> :
96   AVX512_maskable_custom<O, F, Outs, Ins,
97                          !con((ins _.RC:$src0, _.KRCWM:$mask), Ins),
98                          !con((ins _.KRCWM:$mask), Ins),
99                          OpcodeStr, AttSrcAsm, IntelSrcAsm,
100                          [(set _.RC:$dst, RHS)],
101                          [(set _.RC:$dst,
102                              (vselect_mask _.KRCWM:$mask, MaskRHS, _.RC:$src0))],
103                          [(set _.RC:$dst,
104                              (vselect_mask _.KRCWM:$mask, MaskRHS, _.ImmAllZerosV))],
105                          "$src0 = $dst", IsCommutable, IsKCommutable,
106                          IsKZCommutable, ClobberConstraint>;
107
108// This multiclass generates the unconditional/non-masking, the masking and
109// the zero-masking variant of the vector instruction.  In the masking case, the
110// preserved vector elements come from a new dummy input operand tied to $dst.
111multiclass AVX512_maskable<bits<8> O, Format F, X86VectorVTInfo _,
112                           dag Outs, dag Ins, string OpcodeStr,
113                           string AttSrcAsm, string IntelSrcAsm,
114                           dag RHS,
115                           bit IsCommutable = 0, bit IsKCommutable = 0,
116                           bit IsKZCommutable = IsCommutable,
117                           SDPatternOperator Select = vselect_mask,
118                           string ClobberConstraint = ""> :
119   AVX512_maskable_common<O, F, _, Outs, Ins,
120                          !con((ins _.RC:$src0, _.KRCWM:$mask), Ins),
121                          !con((ins _.KRCWM:$mask), Ins),
122                          OpcodeStr, AttSrcAsm, IntelSrcAsm, RHS,
123                          (Select _.KRCWM:$mask, RHS, _.RC:$src0),
124                          Select, "$src0 = $dst", IsCommutable, IsKCommutable,
125                          IsKZCommutable, ClobberConstraint>;
126
127// This multiclass generates the unconditional/non-masking, the masking and
128// the zero-masking variant of the scalar instruction.
129multiclass AVX512_maskable_scalar<bits<8> O, Format F, X86VectorVTInfo _,
130                           dag Outs, dag Ins, string OpcodeStr,
131                           string AttSrcAsm, string IntelSrcAsm,
132                           dag RHS> :
133   AVX512_maskable<O, F, _, Outs, Ins, OpcodeStr, AttSrcAsm, IntelSrcAsm,
134                   RHS, 0, 0, 0, X86selects_mask>;
135
136// Similar to AVX512_maskable but in this case one of the source operands
137// ($src1) is already tied to $dst so we just use that for the preserved
138// vector elements.  NOTE that the NonTiedIns (the ins dag) should exclude
139// $src1.
140multiclass AVX512_maskable_3src<bits<8> O, Format F, X86VectorVTInfo _,
141                                dag Outs, dag NonTiedIns, string OpcodeStr,
142                                string AttSrcAsm, string IntelSrcAsm,
143                                dag RHS,
144                                bit IsCommutable = 0,
145                                bit IsKCommutable = 0,
146                                SDPatternOperator Select = vselect_mask,
147                                bit MaskOnly = 0> :
148   AVX512_maskable_common<O, F, _, Outs,
149                          !con((ins _.RC:$src1), NonTiedIns),
150                          !con((ins _.RC:$src1, _.KRCWM:$mask), NonTiedIns),
151                          !con((ins _.RC:$src1, _.KRCWM:$mask), NonTiedIns),
152                          OpcodeStr, AttSrcAsm, IntelSrcAsm,
153                          !if(MaskOnly, (null_frag), RHS),
154                          (Select _.KRCWM:$mask, RHS, _.RC:$src1),
155                          Select, "", IsCommutable, IsKCommutable>;
156
157// Similar to AVX512_maskable_3src but in this case the input VT for the tied
158// operand differs from the output VT. This requires a bitconvert on
159// the preserved vector going into the vselect.
160// NOTE: The unmasked pattern is disabled.
161multiclass AVX512_maskable_3src_cast<bits<8> O, Format F, X86VectorVTInfo OutVT,
162                                     X86VectorVTInfo InVT,
163                                     dag Outs, dag NonTiedIns, string OpcodeStr,
164                                     string AttSrcAsm, string IntelSrcAsm,
165                                     dag RHS, bit IsCommutable = 0> :
166   AVX512_maskable_common<O, F, OutVT, Outs,
167                          !con((ins InVT.RC:$src1), NonTiedIns),
168                          !con((ins InVT.RC:$src1, InVT.KRCWM:$mask), NonTiedIns),
169                          !con((ins InVT.RC:$src1, InVT.KRCWM:$mask), NonTiedIns),
170                          OpcodeStr, AttSrcAsm, IntelSrcAsm, (null_frag),
171                          (vselect_mask InVT.KRCWM:$mask, RHS,
172                           (bitconvert InVT.RC:$src1)),
173                           vselect_mask, "", IsCommutable>;
174
175multiclass AVX512_maskable_3src_scalar<bits<8> O, Format F, X86VectorVTInfo _,
176                                     dag Outs, dag NonTiedIns, string OpcodeStr,
177                                     string AttSrcAsm, string IntelSrcAsm,
178                                     dag RHS,
179                                     bit IsCommutable = 0,
180                                     bit IsKCommutable = 0,
181                                     bit MaskOnly = 0> :
182   AVX512_maskable_3src<O, F, _, Outs, NonTiedIns, OpcodeStr, AttSrcAsm,
183                        IntelSrcAsm, RHS, IsCommutable, IsKCommutable,
184                        X86selects_mask, MaskOnly>;
185
186multiclass AVX512_maskable_in_asm<bits<8> O, Format F, X86VectorVTInfo _,
187                                  dag Outs, dag Ins,
188                                  string OpcodeStr,
189                                  string AttSrcAsm, string IntelSrcAsm,
190                                  list<dag> Pattern> :
191   AVX512_maskable_custom<O, F, Outs, Ins,
192                          !con((ins _.RC:$src0, _.KRCWM:$mask), Ins),
193                          !con((ins _.KRCWM:$mask), Ins),
194                          OpcodeStr, AttSrcAsm, IntelSrcAsm, Pattern, [], [],
195                          "$src0 = $dst">;
196
197multiclass AVX512_maskable_3src_in_asm<bits<8> O, Format F, X86VectorVTInfo _,
198                                       dag Outs, dag NonTiedIns,
199                                       string OpcodeStr,
200                                       string AttSrcAsm, string IntelSrcAsm,
201                                       list<dag> Pattern> :
202   AVX512_maskable_custom<O, F, Outs,
203                          !con((ins _.RC:$src1), NonTiedIns),
204                          !con((ins _.RC:$src1, _.KRCWM:$mask), NonTiedIns),
205                          !con((ins _.RC:$src1, _.KRCWM:$mask), NonTiedIns),
206                          OpcodeStr, AttSrcAsm, IntelSrcAsm, Pattern, [], [],
207                          "">;
208
209// Instruction with mask that puts result in mask register,
210// like "compare" and "vptest"
211multiclass AVX512_maskable_custom_cmp<bits<8> O, Format F,
212                                  dag Outs,
213                                  dag Ins, dag MaskingIns,
214                                  string OpcodeStr,
215                                  string AttSrcAsm, string IntelSrcAsm,
216                                  list<dag> Pattern,
217                                  list<dag> MaskingPattern,
218                                  bit IsCommutable = 0> {
219    let isCommutable = IsCommutable in {
220    def NAME: AVX512<O, F, Outs, Ins,
221                       OpcodeStr#"\t{"#AttSrcAsm#", $dst|"#
222                                     "$dst, "#IntelSrcAsm#"}",
223                       Pattern>;
224
225    def NAME#k: AVX512<O, F, Outs, MaskingIns,
226                       OpcodeStr#"\t{"#AttSrcAsm#", $dst {${mask}}|"#
227                                     "$dst {${mask}}, "#IntelSrcAsm#"}",
228                       MaskingPattern>, EVEX_K;
229    }
230}
231
232multiclass AVX512_maskable_common_cmp<bits<8> O, Format F, X86VectorVTInfo _,
233                                  dag Outs,
234                                  dag Ins, dag MaskingIns,
235                                  string OpcodeStr,
236                                  string AttSrcAsm, string IntelSrcAsm,
237                                  dag RHS, dag MaskingRHS,
238                                  bit IsCommutable = 0> :
239  AVX512_maskable_custom_cmp<O, F, Outs, Ins, MaskingIns, OpcodeStr,
240                         AttSrcAsm, IntelSrcAsm,
241                         [(set _.KRC:$dst, RHS)],
242                         [(set _.KRC:$dst, MaskingRHS)], IsCommutable>;
243
244multiclass AVX512_maskable_cmp<bits<8> O, Format F, X86VectorVTInfo _,
245                           dag Outs, dag Ins, string OpcodeStr,
246                           string AttSrcAsm, string IntelSrcAsm,
247                           dag RHS, dag RHS_su, bit IsCommutable = 0> :
248   AVX512_maskable_common_cmp<O, F, _, Outs, Ins,
249                          !con((ins _.KRCWM:$mask), Ins),
250                          OpcodeStr, AttSrcAsm, IntelSrcAsm, RHS,
251                          (and _.KRCWM:$mask, RHS_su), IsCommutable>;
252
253// Used by conversion instructions.
254multiclass AVX512_maskable_cvt<bits<8> O, Format F, X86VectorVTInfo _,
255                                  dag Outs,
256                                  dag Ins, dag MaskingIns, dag ZeroMaskingIns,
257                                  string OpcodeStr,
258                                  string AttSrcAsm, string IntelSrcAsm,
259                                  dag RHS, dag MaskingRHS, dag ZeroMaskingRHS> :
260  AVX512_maskable_custom<O, F, Outs, Ins, MaskingIns, ZeroMaskingIns, OpcodeStr,
261                         AttSrcAsm, IntelSrcAsm,
262                         [(set _.RC:$dst, RHS)],
263                         [(set _.RC:$dst, MaskingRHS)],
264                         [(set _.RC:$dst, ZeroMaskingRHS)],
265                         "$src0 = $dst">;
266
267multiclass AVX512_maskable_fma<bits<8> O, Format F, X86VectorVTInfo _,
268                               dag Outs, dag NonTiedIns, string OpcodeStr,
269                               string AttSrcAsm, string IntelSrcAsm,
270                               dag RHS, dag MaskingRHS, bit IsCommutable,
271                               bit IsKCommutable> :
272   AVX512_maskable_custom<O, F, Outs,
273                          !con((ins _.RC:$src1), NonTiedIns),
274                          !con((ins _.RC:$src1, _.KRCWM:$mask), NonTiedIns),
275                          !con((ins _.RC:$src1, _.KRCWM:$mask), NonTiedIns),
276                          OpcodeStr, AttSrcAsm, IntelSrcAsm,
277                          [(set _.RC:$dst, RHS)],
278                          [(set _.RC:$dst,
279                            (vselect_mask _.KRCWM:$mask, MaskingRHS, _.RC:$src1))],
280                          [(set _.RC:$dst,
281                            (vselect_mask _.KRCWM:$mask, MaskingRHS, _.ImmAllZerosV))],
282                          "", IsCommutable, IsKCommutable>;
283
284// Alias instruction that maps zero vector to pxor / xorp* for AVX-512.
285// This is expanded by ExpandPostRAPseudos to an xorps / vxorps, and then
286// swizzled by ExecutionDomainFix to pxor.
287// We set canFoldAsLoad because this can be converted to a constant-pool
288// load of an all-zeros value if folding it would be beneficial.
289let isReMaterializable = 1, isAsCheapAsAMove = 1, canFoldAsLoad = 1,
290    isPseudo = 1, Predicates = [HasAVX512], SchedRW = [WriteZero] in {
291def AVX512_512_SET0 : I<0, Pseudo, (outs VR512:$dst), (ins), "",
292               [(set VR512:$dst, (v16i32 immAllZerosV))]>;
293def AVX512_512_SETALLONES : I<0, Pseudo, (outs VR512:$dst), (ins), "",
294               [(set VR512:$dst, (v16i32 immAllOnesV))]>;
295}
296
297let Predicates = [HasAVX512] in {
298def : Pat<(v64i8 immAllZerosV), (AVX512_512_SET0)>;
299def : Pat<(v32i16 immAllZerosV), (AVX512_512_SET0)>;
300def : Pat<(v8i64 immAllZerosV), (AVX512_512_SET0)>;
301def : Pat<(v32f16 immAllZerosV), (AVX512_512_SET0)>;
302def : Pat<(v16f32 immAllZerosV), (AVX512_512_SET0)>;
303def : Pat<(v8f64 immAllZerosV), (AVX512_512_SET0)>;
304}
305
306// Alias instructions that allow VPTERNLOG to be used with a mask to create
307// a mix of all ones and all zeros elements. This is done this way to force
308// the same register to be used as input for all three sources.
309let isPseudo = 1, Predicates = [HasAVX512], SchedRW = [WriteVecALU] in {
310def AVX512_512_SEXT_MASK_32 : I<0, Pseudo, (outs VR512:$dst),
311                                (ins VK16WM:$mask), "",
312                           [(set VR512:$dst, (vselect (v16i1 VK16WM:$mask),
313                                                      (v16i32 immAllOnesV),
314                                                      (v16i32 immAllZerosV)))]>;
315def AVX512_512_SEXT_MASK_64 : I<0, Pseudo, (outs VR512:$dst),
316                                (ins VK8WM:$mask), "",
317                [(set VR512:$dst, (vselect (v8i1 VK8WM:$mask),
318                                           (v8i64 immAllOnesV),
319                                           (v8i64 immAllZerosV)))]>;
320}
321
322let isReMaterializable = 1, isAsCheapAsAMove = 1, canFoldAsLoad = 1,
323    isPseudo = 1, Predicates = [HasAVX512], SchedRW = [WriteZero] in {
324def AVX512_128_SET0 : I<0, Pseudo, (outs VR128X:$dst), (ins), "",
325               [(set VR128X:$dst, (v4i32 immAllZerosV))]>;
326def AVX512_256_SET0 : I<0, Pseudo, (outs VR256X:$dst), (ins), "",
327               [(set VR256X:$dst, (v8i32 immAllZerosV))]>;
328}
329
330let Predicates = [HasAVX512] in {
331def : Pat<(v8i16 immAllZerosV), (AVX512_128_SET0)>;
332def : Pat<(v16i8 immAllZerosV), (AVX512_128_SET0)>;
333def : Pat<(v2i64 immAllZerosV), (AVX512_128_SET0)>;
334def : Pat<(v8f16 immAllZerosV), (AVX512_128_SET0)>;
335def : Pat<(v4f32 immAllZerosV), (AVX512_128_SET0)>;
336def : Pat<(v2f64 immAllZerosV), (AVX512_128_SET0)>;
337def : Pat<(v32i8 immAllZerosV), (AVX512_256_SET0)>;
338def : Pat<(v16i16 immAllZerosV), (AVX512_256_SET0)>;
339def : Pat<(v4i64 immAllZerosV), (AVX512_256_SET0)>;
340def : Pat<(v16f16 immAllZerosV), (AVX512_256_SET0)>;
341def : Pat<(v8f32 immAllZerosV), (AVX512_256_SET0)>;
342def : Pat<(v4f64 immAllZerosV), (AVX512_256_SET0)>;
343}
344
345// Alias instructions that map fld0 to xorps for sse or vxorps for avx.
346// This is expanded by ExpandPostRAPseudos.
347let isReMaterializable = 1, isAsCheapAsAMove = 1, canFoldAsLoad = 1,
348    isPseudo = 1, SchedRW = [WriteZero], Predicates = [HasAVX512] in {
349  def AVX512_FsFLD0SH : I<0, Pseudo, (outs FR16X:$dst), (ins), "",
350                          [(set FR16X:$dst, fp16imm0)]>;
351  def AVX512_FsFLD0SS : I<0, Pseudo, (outs FR32X:$dst), (ins), "",
352                          [(set FR32X:$dst, fp32imm0)]>;
353  def AVX512_FsFLD0SD : I<0, Pseudo, (outs FR64X:$dst), (ins), "",
354                          [(set FR64X:$dst, fp64imm0)]>;
355  def AVX512_FsFLD0F128 : I<0, Pseudo, (outs VR128X:$dst), (ins), "",
356                            [(set VR128X:$dst, fp128imm0)]>;
357}
358
359//===----------------------------------------------------------------------===//
360// AVX-512 - VECTOR INSERT
361//
362
363// Supports two different pattern operators for mask and unmasked ops. Allows
364// null_frag to be passed for one.
365multiclass vinsert_for_size_split<int Opcode, X86VectorVTInfo From,
366                                  X86VectorVTInfo To,
367                                  SDPatternOperator vinsert_insert,
368                                  SDPatternOperator vinsert_for_mask,
369                                  X86FoldableSchedWrite sched> {
370  let hasSideEffects = 0, ExeDomain = To.ExeDomain in {
371    defm rr : AVX512_maskable_split<Opcode, MRMSrcReg, To, (outs To.RC:$dst),
372                   (ins To.RC:$src1, From.RC:$src2, u8imm:$src3),
373                   "vinsert" # From.EltTypeName # "x" # From.NumElts,
374                   "$src3, $src2, $src1", "$src1, $src2, $src3",
375                   (vinsert_insert:$src3 (To.VT To.RC:$src1),
376                                         (From.VT From.RC:$src2),
377                                         (iPTR imm)),
378                   (vinsert_for_mask:$src3 (To.VT To.RC:$src1),
379                                           (From.VT From.RC:$src2),
380                                           (iPTR imm))>,
381                   AVX512AIi8Base, EVEX, VVVV, Sched<[sched]>;
382    let mayLoad = 1 in
383    defm rm : AVX512_maskable_split<Opcode, MRMSrcMem, To, (outs To.RC:$dst),
384                   (ins To.RC:$src1, From.MemOp:$src2, u8imm:$src3),
385                   "vinsert" # From.EltTypeName # "x" # From.NumElts,
386                   "$src3, $src2, $src1", "$src1, $src2, $src3",
387                   (vinsert_insert:$src3 (To.VT To.RC:$src1),
388                               (From.VT (From.LdFrag addr:$src2)),
389                               (iPTR imm)),
390                   (vinsert_for_mask:$src3 (To.VT To.RC:$src1),
391                               (From.VT (From.LdFrag addr:$src2)),
392                               (iPTR imm))>, AVX512AIi8Base, EVEX, VVVV,
393                   EVEX_CD8<From.EltSize, From.CD8TupleForm>,
394                   Sched<[sched.Folded, sched.ReadAfterFold]>;
395  }
396}
397
398// Passes the same pattern operator for masked and unmasked ops.
399multiclass vinsert_for_size<int Opcode, X86VectorVTInfo From,
400                            X86VectorVTInfo To,
401                            SDPatternOperator vinsert_insert,
402                            X86FoldableSchedWrite sched> :
403  vinsert_for_size_split<Opcode, From, To, vinsert_insert, vinsert_insert, sched>;
404
405multiclass vinsert_for_size_lowering<string InstrStr, X86VectorVTInfo From,
406                       X86VectorVTInfo To, PatFrag vinsert_insert,
407                       SDNodeXForm INSERT_get_vinsert_imm , list<Predicate> p> {
408  let Predicates = p in {
409    def : Pat<(vinsert_insert:$ins
410                     (To.VT To.RC:$src1), (From.VT From.RC:$src2), (iPTR imm)),
411              (To.VT (!cast<Instruction>(InstrStr#"rr")
412                     To.RC:$src1, From.RC:$src2,
413                     (INSERT_get_vinsert_imm To.RC:$ins)))>;
414
415    def : Pat<(vinsert_insert:$ins
416                  (To.VT To.RC:$src1),
417                  (From.VT (From.LdFrag addr:$src2)),
418                  (iPTR imm)),
419              (To.VT (!cast<Instruction>(InstrStr#"rm")
420                  To.RC:$src1, addr:$src2,
421                  (INSERT_get_vinsert_imm To.RC:$ins)))>;
422  }
423}
424
425multiclass vinsert_for_type<ValueType EltVT32, int Opcode128,
426                            ValueType EltVT64, int Opcode256,
427                            X86FoldableSchedWrite sched> {
428
429  let Predicates = [HasVLX] in
430    defm NAME # "32x4Z256" : vinsert_for_size<Opcode128,
431                                 X86VectorVTInfo< 4, EltVT32, VR128X>,
432                                 X86VectorVTInfo< 8, EltVT32, VR256X>,
433                                 vinsert128_insert, sched>, EVEX_V256;
434
435  defm NAME # "32x4Z" : vinsert_for_size<Opcode128,
436                                 X86VectorVTInfo< 4, EltVT32, VR128X>,
437                                 X86VectorVTInfo<16, EltVT32, VR512>,
438                                 vinsert128_insert, sched>, EVEX_V512;
439
440  defm NAME # "64x4Z" : vinsert_for_size<Opcode256,
441                                 X86VectorVTInfo< 4, EltVT64, VR256X>,
442                                 X86VectorVTInfo< 8, EltVT64, VR512>,
443                                 vinsert256_insert, sched>, REX_W, EVEX_V512;
444
445  // Even with DQI we'd like to only use these instructions for masking.
446  let Predicates = [HasVLX, HasDQI] in
447    defm NAME # "64x2Z256" : vinsert_for_size_split<Opcode128,
448                                   X86VectorVTInfo< 2, EltVT64, VR128X>,
449                                   X86VectorVTInfo< 4, EltVT64, VR256X>,
450                                   null_frag, vinsert128_insert, sched>,
451                                   EVEX_V256, REX_W;
452
453  // Even with DQI we'd like to only use these instructions for masking.
454  let Predicates = [HasDQI] in {
455    defm NAME # "64x2Z" : vinsert_for_size_split<Opcode128,
456                                 X86VectorVTInfo< 2, EltVT64, VR128X>,
457                                 X86VectorVTInfo< 8, EltVT64, VR512>,
458                                 null_frag, vinsert128_insert, sched>,
459                                 REX_W, EVEX_V512;
460
461    defm NAME # "32x8Z" : vinsert_for_size_split<Opcode256,
462                                   X86VectorVTInfo< 8, EltVT32, VR256X>,
463                                   X86VectorVTInfo<16, EltVT32, VR512>,
464                                   null_frag, vinsert256_insert, sched>,
465                                   EVEX_V512;
466  }
467}
468
469// FIXME: Is there a better scheduler class for VINSERTF/VINSERTI?
470defm VINSERTF : vinsert_for_type<f32, 0x18, f64, 0x1a, WriteFShuffle256>;
471defm VINSERTI : vinsert_for_type<i32, 0x38, i64, 0x3a, WriteShuffle256>;
472
473// Codegen pattern with the alternative types,
474// Even with AVX512DQ we'll still use these for unmasked operations.
475defm : vinsert_for_size_lowering<"VINSERTF32x4Z256", v2f64x_info, v4f64x_info,
476              vinsert128_insert, INSERT_get_vinsert128_imm, [HasVLX]>;
477defm : vinsert_for_size_lowering<"VINSERTI32x4Z256", v2i64x_info, v4i64x_info,
478              vinsert128_insert, INSERT_get_vinsert128_imm, [HasVLX]>;
479
480defm : vinsert_for_size_lowering<"VINSERTF32x4Z", v2f64x_info, v8f64_info,
481              vinsert128_insert, INSERT_get_vinsert128_imm, [HasAVX512]>;
482defm : vinsert_for_size_lowering<"VINSERTI32x4Z", v2i64x_info, v8i64_info,
483              vinsert128_insert, INSERT_get_vinsert128_imm, [HasAVX512]>;
484
485defm : vinsert_for_size_lowering<"VINSERTF64x4Z", v8f32x_info, v16f32_info,
486              vinsert256_insert, INSERT_get_vinsert256_imm, [HasAVX512]>;
487defm : vinsert_for_size_lowering<"VINSERTI64x4Z", v8i32x_info, v16i32_info,
488              vinsert256_insert, INSERT_get_vinsert256_imm, [HasAVX512]>;
489
490// Codegen pattern with the alternative types insert VEC128 into VEC256
491defm : vinsert_for_size_lowering<"VINSERTI32x4Z256", v8i16x_info, v16i16x_info,
492              vinsert128_insert, INSERT_get_vinsert128_imm, [HasVLX]>;
493defm : vinsert_for_size_lowering<"VINSERTI32x4Z256", v16i8x_info, v32i8x_info,
494              vinsert128_insert, INSERT_get_vinsert128_imm, [HasVLX]>;
495defm : vinsert_for_size_lowering<"VINSERTF32x4Z256", v8f16x_info, v16f16x_info,
496              vinsert128_insert, INSERT_get_vinsert128_imm, [HasVLX]>;
497defm : vinsert_for_size_lowering<"VINSERTF32x4Z256", v8bf16x_info, v16bf16x_info,
498              vinsert128_insert, INSERT_get_vinsert128_imm, [HasVLX]>;
499// Codegen pattern with the alternative types insert VEC128 into VEC512
500defm : vinsert_for_size_lowering<"VINSERTI32x4Z", v8i16x_info, v32i16_info,
501              vinsert128_insert, INSERT_get_vinsert128_imm, [HasAVX512]>;
502defm : vinsert_for_size_lowering<"VINSERTI32x4Z", v16i8x_info, v64i8_info,
503               vinsert128_insert, INSERT_get_vinsert128_imm, [HasAVX512]>;
504defm : vinsert_for_size_lowering<"VINSERTF32x4Z", v8f16x_info, v32f16_info,
505              vinsert128_insert, INSERT_get_vinsert128_imm, [HasAVX512]>;
506defm : vinsert_for_size_lowering<"VINSERTF32x4Z", v8bf16x_info, v32bf16_info,
507              vinsert128_insert, INSERT_get_vinsert128_imm, [HasAVX512]>;
508// Codegen pattern with the alternative types insert VEC256 into VEC512
509defm : vinsert_for_size_lowering<"VINSERTI64x4Z", v16i16x_info, v32i16_info,
510              vinsert256_insert, INSERT_get_vinsert256_imm, [HasAVX512]>;
511defm : vinsert_for_size_lowering<"VINSERTI64x4Z", v32i8x_info, v64i8_info,
512              vinsert256_insert, INSERT_get_vinsert256_imm, [HasAVX512]>;
513defm : vinsert_for_size_lowering<"VINSERTF64x4Z", v16f16x_info, v32f16_info,
514              vinsert256_insert, INSERT_get_vinsert256_imm, [HasAVX512]>;
515defm : vinsert_for_size_lowering<"VINSERTF64x4Z", v16bf16x_info, v32bf16_info,
516              vinsert256_insert, INSERT_get_vinsert256_imm, [HasAVX512]>;
517
518
519multiclass vinsert_for_mask_cast<string InstrStr, X86VectorVTInfo From,
520                                 X86VectorVTInfo To, X86VectorVTInfo Cast,
521                                 PatFrag vinsert_insert,
522                                 SDNodeXForm INSERT_get_vinsert_imm,
523                                 list<Predicate> p> {
524let Predicates = p in {
525  def : Pat<(Cast.VT
526             (vselect_mask Cast.KRCWM:$mask,
527                           (bitconvert
528                            (vinsert_insert:$ins (To.VT To.RC:$src1),
529                                                 (From.VT From.RC:$src2),
530                                                 (iPTR imm))),
531                           Cast.RC:$src0)),
532            (!cast<Instruction>(InstrStr#"rrk")
533             Cast.RC:$src0, Cast.KRCWM:$mask, To.RC:$src1, From.RC:$src2,
534             (INSERT_get_vinsert_imm To.RC:$ins))>;
535  def : Pat<(Cast.VT
536             (vselect_mask Cast.KRCWM:$mask,
537                           (bitconvert
538                            (vinsert_insert:$ins (To.VT To.RC:$src1),
539                                                 (From.VT
540                                                  (bitconvert
541                                                   (From.LdFrag addr:$src2))),
542                                                 (iPTR imm))),
543                           Cast.RC:$src0)),
544            (!cast<Instruction>(InstrStr#"rmk")
545             Cast.RC:$src0, Cast.KRCWM:$mask, To.RC:$src1, addr:$src2,
546             (INSERT_get_vinsert_imm To.RC:$ins))>;
547
548  def : Pat<(Cast.VT
549             (vselect_mask Cast.KRCWM:$mask,
550                           (bitconvert
551                            (vinsert_insert:$ins (To.VT To.RC:$src1),
552                                                 (From.VT From.RC:$src2),
553                                                 (iPTR imm))),
554                           Cast.ImmAllZerosV)),
555            (!cast<Instruction>(InstrStr#"rrkz")
556             Cast.KRCWM:$mask, To.RC:$src1, From.RC:$src2,
557             (INSERT_get_vinsert_imm To.RC:$ins))>;
558  def : Pat<(Cast.VT
559             (vselect_mask Cast.KRCWM:$mask,
560                           (bitconvert
561                            (vinsert_insert:$ins (To.VT To.RC:$src1),
562                                                 (From.VT (From.LdFrag addr:$src2)),
563                                                 (iPTR imm))),
564                           Cast.ImmAllZerosV)),
565            (!cast<Instruction>(InstrStr#"rmkz")
566             Cast.KRCWM:$mask, To.RC:$src1, addr:$src2,
567             (INSERT_get_vinsert_imm To.RC:$ins))>;
568}
569}
570
571defm : vinsert_for_mask_cast<"VINSERTF32x4Z256", v2f64x_info, v4f64x_info,
572                             v8f32x_info, vinsert128_insert,
573                             INSERT_get_vinsert128_imm, [HasVLX]>;
574defm : vinsert_for_mask_cast<"VINSERTF64x2Z256", v4f32x_info, v8f32x_info,
575                             v4f64x_info, vinsert128_insert,
576                             INSERT_get_vinsert128_imm, [HasDQI, HasVLX]>;
577
578defm : vinsert_for_mask_cast<"VINSERTI32x4Z256", v2i64x_info, v4i64x_info,
579                             v8i32x_info, vinsert128_insert,
580                             INSERT_get_vinsert128_imm, [HasVLX]>;
581defm : vinsert_for_mask_cast<"VINSERTI32x4Z256", v8i16x_info, v16i16x_info,
582                             v8i32x_info, vinsert128_insert,
583                             INSERT_get_vinsert128_imm, [HasVLX]>;
584defm : vinsert_for_mask_cast<"VINSERTI32x4Z256", v16i8x_info, v32i8x_info,
585                             v8i32x_info, vinsert128_insert,
586                             INSERT_get_vinsert128_imm, [HasVLX]>;
587defm : vinsert_for_mask_cast<"VINSERTF64x2Z256", v4i32x_info, v8i32x_info,
588                             v4i64x_info, vinsert128_insert,
589                             INSERT_get_vinsert128_imm, [HasDQI, HasVLX]>;
590defm : vinsert_for_mask_cast<"VINSERTF64x2Z256", v8i16x_info, v16i16x_info,
591                             v4i64x_info, vinsert128_insert,
592                             INSERT_get_vinsert128_imm, [HasDQI, HasVLX]>;
593defm : vinsert_for_mask_cast<"VINSERTF64x2Z256", v16i8x_info, v32i8x_info,
594                             v4i64x_info, vinsert128_insert,
595                             INSERT_get_vinsert128_imm, [HasDQI, HasVLX]>;
596
597defm : vinsert_for_mask_cast<"VINSERTF32x4Z", v2f64x_info, v8f64_info,
598                             v16f32_info, vinsert128_insert,
599                             INSERT_get_vinsert128_imm, [HasAVX512]>;
600defm : vinsert_for_mask_cast<"VINSERTF64x2Z", v4f32x_info, v16f32_info,
601                             v8f64_info, vinsert128_insert,
602                             INSERT_get_vinsert128_imm, [HasDQI]>;
603
604defm : vinsert_for_mask_cast<"VINSERTI32x4Z", v2i64x_info, v8i64_info,
605                             v16i32_info, vinsert128_insert,
606                             INSERT_get_vinsert128_imm, [HasAVX512]>;
607defm : vinsert_for_mask_cast<"VINSERTI32x4Z", v8i16x_info, v32i16_info,
608                             v16i32_info, vinsert128_insert,
609                             INSERT_get_vinsert128_imm, [HasAVX512]>;
610defm : vinsert_for_mask_cast<"VINSERTI32x4Z", v16i8x_info, v64i8_info,
611                             v16i32_info, vinsert128_insert,
612                             INSERT_get_vinsert128_imm, [HasAVX512]>;
613defm : vinsert_for_mask_cast<"VINSERTI64x2Z", v4i32x_info, v16i32_info,
614                             v8i64_info, vinsert128_insert,
615                             INSERT_get_vinsert128_imm, [HasDQI]>;
616defm : vinsert_for_mask_cast<"VINSERTI64x2Z", v8i16x_info, v32i16_info,
617                             v8i64_info, vinsert128_insert,
618                             INSERT_get_vinsert128_imm, [HasDQI]>;
619defm : vinsert_for_mask_cast<"VINSERTI64x2Z", v16i8x_info, v64i8_info,
620                             v8i64_info, vinsert128_insert,
621                             INSERT_get_vinsert128_imm, [HasDQI]>;
622
623defm : vinsert_for_mask_cast<"VINSERTF32x8Z", v4f64x_info, v8f64_info,
624                             v16f32_info, vinsert256_insert,
625                             INSERT_get_vinsert256_imm, [HasDQI]>;
626defm : vinsert_for_mask_cast<"VINSERTF64x4Z", v8f32x_info, v16f32_info,
627                             v8f64_info, vinsert256_insert,
628                             INSERT_get_vinsert256_imm, [HasAVX512]>;
629
630defm : vinsert_for_mask_cast<"VINSERTI32x8Z", v4i64x_info, v8i64_info,
631                             v16i32_info, vinsert256_insert,
632                             INSERT_get_vinsert256_imm, [HasDQI]>;
633defm : vinsert_for_mask_cast<"VINSERTI32x8Z", v16i16x_info, v32i16_info,
634                             v16i32_info, vinsert256_insert,
635                             INSERT_get_vinsert256_imm, [HasDQI]>;
636defm : vinsert_for_mask_cast<"VINSERTI32x8Z", v32i8x_info, v64i8_info,
637                             v16i32_info, vinsert256_insert,
638                             INSERT_get_vinsert256_imm, [HasDQI]>;
639defm : vinsert_for_mask_cast<"VINSERTI64x4Z", v8i32x_info, v16i32_info,
640                             v8i64_info, vinsert256_insert,
641                             INSERT_get_vinsert256_imm, [HasAVX512]>;
642defm : vinsert_for_mask_cast<"VINSERTI64x4Z", v16i16x_info, v32i16_info,
643                             v8i64_info, vinsert256_insert,
644                             INSERT_get_vinsert256_imm, [HasAVX512]>;
645defm : vinsert_for_mask_cast<"VINSERTI64x4Z", v32i8x_info, v64i8_info,
646                             v8i64_info, vinsert256_insert,
647                             INSERT_get_vinsert256_imm, [HasAVX512]>;
648
649// vinsertps - insert f32 to XMM
650let ExeDomain = SSEPackedSingle in {
651let isCommutable = 1 in
652def VINSERTPSZrr : AVX512AIi8<0x21, MRMSrcReg, (outs VR128X:$dst),
653      (ins VR128X:$src1, VR128X:$src2, u8imm:$src3),
654      "vinsertps\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}",
655      [(set VR128X:$dst, (X86insertps VR128X:$src1, VR128X:$src2, timm:$src3))]>,
656      EVEX, VVVV, Sched<[SchedWriteFShuffle.XMM]>;
657def VINSERTPSZrm: AVX512AIi8<0x21, MRMSrcMem, (outs VR128X:$dst),
658      (ins VR128X:$src1, f32mem:$src2, u8imm:$src3),
659      "vinsertps\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}",
660      [(set VR128X:$dst, (X86insertps VR128X:$src1,
661                          (v4f32 (scalar_to_vector (loadf32 addr:$src2))),
662                          timm:$src3))]>,
663      EVEX, VVVV, EVEX_CD8<32, CD8VT1>,
664      Sched<[SchedWriteFShuffle.XMM.Folded, SchedWriteFShuffle.XMM.ReadAfterFold]>;
665}
666
667//===----------------------------------------------------------------------===//
668// AVX-512 VECTOR EXTRACT
669//---
670
671// Supports two different pattern operators for mask and unmasked ops. Allows
672// null_frag to be passed for one.
673multiclass vextract_for_size_split<int Opcode,
674                                   X86VectorVTInfo From, X86VectorVTInfo To,
675                                   SDPatternOperator vextract_extract,
676                                   SDPatternOperator vextract_for_mask,
677                                   SchedWrite SchedRR, SchedWrite SchedMR> {
678
679  let hasSideEffects = 0, ExeDomain = To.ExeDomain in {
680    defm rr : AVX512_maskable_split<Opcode, MRMDestReg, To, (outs To.RC:$dst),
681                (ins From.RC:$src1, u8imm:$idx),
682                "vextract" # To.EltTypeName # "x" # To.NumElts,
683                "$idx, $src1", "$src1, $idx",
684                (vextract_extract:$idx (From.VT From.RC:$src1), (iPTR imm)),
685                (vextract_for_mask:$idx (From.VT From.RC:$src1), (iPTR imm))>,
686                AVX512AIi8Base, EVEX, Sched<[SchedRR]>;
687
688    def mr  : AVX512AIi8<Opcode, MRMDestMem, (outs),
689                    (ins To.MemOp:$dst, From.RC:$src1, u8imm:$idx),
690                    "vextract" # To.EltTypeName # "x" # To.NumElts #
691                        "\t{$idx, $src1, $dst|$dst, $src1, $idx}",
692                    [(store (To.VT (vextract_extract:$idx
693                                    (From.VT From.RC:$src1), (iPTR imm))),
694                             addr:$dst)]>, EVEX,
695                    Sched<[SchedMR]>;
696
697    let mayStore = 1, hasSideEffects = 0 in
698    def mrk : AVX512AIi8<Opcode, MRMDestMem, (outs),
699                    (ins To.MemOp:$dst, To.KRCWM:$mask,
700                                        From.RC:$src1, u8imm:$idx),
701                     "vextract" # To.EltTypeName # "x" # To.NumElts #
702                          "\t{$idx, $src1, $dst {${mask}}|"
703                          "$dst {${mask}}, $src1, $idx}", []>,
704                    EVEX_K, EVEX, Sched<[SchedMR]>;
705  }
706}
707
708// Passes the same pattern operator for masked and unmasked ops.
709multiclass vextract_for_size<int Opcode, X86VectorVTInfo From,
710                             X86VectorVTInfo To,
711                             SDPatternOperator vextract_extract,
712                             SchedWrite SchedRR, SchedWrite SchedMR> :
713  vextract_for_size_split<Opcode, From, To, vextract_extract, vextract_extract, SchedRR, SchedMR>;
714
715// Codegen pattern for the alternative types
716multiclass vextract_for_size_lowering<string InstrStr, X86VectorVTInfo From,
717                X86VectorVTInfo To, PatFrag vextract_extract,
718                SDNodeXForm EXTRACT_get_vextract_imm, list<Predicate> p> {
719  let Predicates = p in {
720     def : Pat<(vextract_extract:$ext (From.VT From.RC:$src1), (iPTR imm)),
721               (To.VT (!cast<Instruction>(InstrStr#"rr")
722                          From.RC:$src1,
723                          (EXTRACT_get_vextract_imm To.RC:$ext)))>;
724     def : Pat<(store (To.VT (vextract_extract:$ext (From.VT From.RC:$src1),
725                              (iPTR imm))), addr:$dst),
726               (!cast<Instruction>(InstrStr#"mr") addr:$dst, From.RC:$src1,
727                (EXTRACT_get_vextract_imm To.RC:$ext))>;
728  }
729}
730
731multiclass vextract_for_type<ValueType EltVT32, int Opcode128,
732                             ValueType EltVT64, int Opcode256,
733                             SchedWrite SchedRR, SchedWrite SchedMR> {
734  let Predicates = [HasAVX512] in {
735    defm NAME # "32x4Z" : vextract_for_size<Opcode128,
736                                   X86VectorVTInfo<16, EltVT32, VR512>,
737                                   X86VectorVTInfo< 4, EltVT32, VR128X>,
738                                   vextract128_extract, SchedRR, SchedMR>,
739                                       EVEX_V512, EVEX_CD8<32, CD8VT4>;
740    defm NAME # "64x4Z" : vextract_for_size<Opcode256,
741                                   X86VectorVTInfo< 8, EltVT64, VR512>,
742                                   X86VectorVTInfo< 4, EltVT64, VR256X>,
743                                   vextract256_extract, SchedRR, SchedMR>,
744                                       REX_W, EVEX_V512, EVEX_CD8<64, CD8VT4>;
745  }
746  let Predicates = [HasVLX] in
747    defm NAME # "32x4Z256" : vextract_for_size<Opcode128,
748                                 X86VectorVTInfo< 8, EltVT32, VR256X>,
749                                 X86VectorVTInfo< 4, EltVT32, VR128X>,
750                                 vextract128_extract, SchedRR, SchedMR>,
751                                     EVEX_V256, EVEX_CD8<32, CD8VT4>;
752
753  // Even with DQI we'd like to only use these instructions for masking.
754  let Predicates = [HasVLX, HasDQI] in
755    defm NAME # "64x2Z256" : vextract_for_size_split<Opcode128,
756                                 X86VectorVTInfo< 4, EltVT64, VR256X>,
757                                 X86VectorVTInfo< 2, EltVT64, VR128X>,
758                                 null_frag, vextract128_extract, SchedRR, SchedMR>,
759                                    EVEX_V256, EVEX_CD8<64, CD8VT2>, REX_W;
760
761  // Even with DQI we'd like to only use these instructions for masking.
762  let Predicates = [HasDQI] in {
763    defm NAME # "64x2Z" : vextract_for_size_split<Opcode128,
764                                 X86VectorVTInfo< 8, EltVT64, VR512>,
765                                 X86VectorVTInfo< 2, EltVT64, VR128X>,
766                                 null_frag, vextract128_extract, SchedRR, SchedMR>,
767                                     REX_W, EVEX_V512, EVEX_CD8<64, CD8VT2>;
768    defm NAME # "32x8Z" : vextract_for_size_split<Opcode256,
769                                 X86VectorVTInfo<16, EltVT32, VR512>,
770                                 X86VectorVTInfo< 8, EltVT32, VR256X>,
771                                 null_frag, vextract256_extract, SchedRR, SchedMR>,
772                                     EVEX_V512, EVEX_CD8<32, CD8VT8>;
773  }
774}
775
776// TODO - replace WriteFStore/WriteVecStore with X86SchedWriteMoveLSWidths types.
777defm VEXTRACTF : vextract_for_type<f32, 0x19, f64, 0x1b, WriteFShuffle256, WriteFStore>;
778defm VEXTRACTI : vextract_for_type<i32, 0x39, i64, 0x3b, WriteShuffle256, WriteVecStore>;
779
780// extract_subvector codegen patterns with the alternative types.
781// Even with AVX512DQ we'll still use these for unmasked operations.
782defm : vextract_for_size_lowering<"VEXTRACTF32x4Z", v8f64_info, v2f64x_info,
783          vextract128_extract, EXTRACT_get_vextract128_imm, [HasAVX512]>;
784defm : vextract_for_size_lowering<"VEXTRACTI32x4Z", v8i64_info, v2i64x_info,
785          vextract128_extract, EXTRACT_get_vextract128_imm, [HasAVX512]>;
786
787defm : vextract_for_size_lowering<"VEXTRACTF64x4Z", v16f32_info, v8f32x_info,
788          vextract256_extract, EXTRACT_get_vextract256_imm, [HasAVX512]>;
789defm : vextract_for_size_lowering<"VEXTRACTI64x4Z", v16i32_info, v8i32x_info,
790          vextract256_extract, EXTRACT_get_vextract256_imm, [HasAVX512]>;
791
792defm : vextract_for_size_lowering<"VEXTRACTF32x4Z256", v4f64x_info, v2f64x_info,
793          vextract128_extract, EXTRACT_get_vextract128_imm, [HasVLX]>;
794defm : vextract_for_size_lowering<"VEXTRACTI32x4Z256", v4i64x_info, v2i64x_info,
795          vextract128_extract, EXTRACT_get_vextract128_imm, [HasVLX]>;
796
797// Codegen pattern with the alternative types extract VEC128 from VEC256
798defm : vextract_for_size_lowering<"VEXTRACTI32x4Z256", v16i16x_info, v8i16x_info,
799          vextract128_extract, EXTRACT_get_vextract128_imm, [HasVLX]>;
800defm : vextract_for_size_lowering<"VEXTRACTI32x4Z256", v32i8x_info, v16i8x_info,
801          vextract128_extract, EXTRACT_get_vextract128_imm, [HasVLX]>;
802defm : vextract_for_size_lowering<"VEXTRACTF32x4Z256", v16f16x_info, v8f16x_info,
803          vextract128_extract, EXTRACT_get_vextract128_imm, [HasVLX]>;
804defm : vextract_for_size_lowering<"VEXTRACTF32x4Z256", v16bf16x_info, v8bf16x_info,
805          vextract128_extract, EXTRACT_get_vextract128_imm, [HasVLX]>;
806
807// Codegen pattern with the alternative types extract VEC128 from VEC512
808defm : vextract_for_size_lowering<"VEXTRACTI32x4Z", v32i16_info, v8i16x_info,
809                 vextract128_extract, EXTRACT_get_vextract128_imm, [HasAVX512]>;
810defm : vextract_for_size_lowering<"VEXTRACTI32x4Z", v64i8_info, v16i8x_info,
811                 vextract128_extract, EXTRACT_get_vextract128_imm, [HasAVX512]>;
812defm : vextract_for_size_lowering<"VEXTRACTF32x4Z", v32f16_info, v8f16x_info,
813                 vextract128_extract, EXTRACT_get_vextract128_imm, [HasAVX512]>;
814defm : vextract_for_size_lowering<"VEXTRACTF32x4Z", v32bf16_info, v8bf16x_info,
815                 vextract128_extract, EXTRACT_get_vextract128_imm, [HasAVX512]>;
816// Codegen pattern with the alternative types extract VEC256 from VEC512
817defm : vextract_for_size_lowering<"VEXTRACTI64x4Z", v32i16_info, v16i16x_info,
818                 vextract256_extract, EXTRACT_get_vextract256_imm, [HasAVX512]>;
819defm : vextract_for_size_lowering<"VEXTRACTI64x4Z", v64i8_info, v32i8x_info,
820                 vextract256_extract, EXTRACT_get_vextract256_imm, [HasAVX512]>;
821defm : vextract_for_size_lowering<"VEXTRACTF64x4Z", v32f16_info, v16f16x_info,
822                 vextract256_extract, EXTRACT_get_vextract256_imm, [HasAVX512]>;
823defm : vextract_for_size_lowering<"VEXTRACTF64x4Z", v32bf16_info, v16bf16x_info,
824                 vextract256_extract, EXTRACT_get_vextract256_imm, [HasAVX512]>;
825
826
827// A 128-bit extract from bits [255:128] of a 512-bit vector should use a
828// smaller extract to enable EVEX->VEX.
829let Predicates = [NoVLX, HasEVEX512] in {
830def : Pat<(v2i64 (extract_subvector (v8i64 VR512:$src), (iPTR 2))),
831          (v2i64 (VEXTRACTI128rr
832                  (v4i64 (EXTRACT_SUBREG (v8i64 VR512:$src), sub_ymm)),
833                  (iPTR 1)))>;
834def : Pat<(v2f64 (extract_subvector (v8f64 VR512:$src), (iPTR 2))),
835          (v2f64 (VEXTRACTF128rr
836                  (v4f64 (EXTRACT_SUBREG (v8f64 VR512:$src), sub_ymm)),
837                  (iPTR 1)))>;
838def : Pat<(v4i32 (extract_subvector (v16i32 VR512:$src), (iPTR 4))),
839          (v4i32 (VEXTRACTI128rr
840                  (v8i32 (EXTRACT_SUBREG (v16i32 VR512:$src), sub_ymm)),
841                  (iPTR 1)))>;
842def : Pat<(v4f32 (extract_subvector (v16f32 VR512:$src), (iPTR 4))),
843          (v4f32 (VEXTRACTF128rr
844                  (v8f32 (EXTRACT_SUBREG (v16f32 VR512:$src), sub_ymm)),
845                  (iPTR 1)))>;
846def : Pat<(v8i16 (extract_subvector (v32i16 VR512:$src), (iPTR 8))),
847          (v8i16 (VEXTRACTI128rr
848                  (v16i16 (EXTRACT_SUBREG (v32i16 VR512:$src), sub_ymm)),
849                  (iPTR 1)))>;
850def : Pat<(v8f16 (extract_subvector (v32f16 VR512:$src), (iPTR 8))),
851          (v8f16 (VEXTRACTF128rr
852                  (v16f16 (EXTRACT_SUBREG (v32f16 VR512:$src), sub_ymm)),
853                  (iPTR 1)))>;
854def : Pat<(v16i8 (extract_subvector (v64i8 VR512:$src), (iPTR 16))),
855          (v16i8 (VEXTRACTI128rr
856                  (v32i8 (EXTRACT_SUBREG (v64i8 VR512:$src), sub_ymm)),
857                  (iPTR 1)))>;
858}
859
860// A 128-bit extract from bits [255:128] of a 512-bit vector should use a
861// smaller extract to enable EVEX->VEX.
862let Predicates = [HasVLX] in {
863def : Pat<(v2i64 (extract_subvector (v8i64 VR512:$src), (iPTR 2))),
864          (v2i64 (VEXTRACTI32x4Z256rr
865                  (v4i64 (EXTRACT_SUBREG (v8i64 VR512:$src), sub_ymm)),
866                  (iPTR 1)))>;
867def : Pat<(v2f64 (extract_subvector (v8f64 VR512:$src), (iPTR 2))),
868          (v2f64 (VEXTRACTF32x4Z256rr
869                  (v4f64 (EXTRACT_SUBREG (v8f64 VR512:$src), sub_ymm)),
870                  (iPTR 1)))>;
871def : Pat<(v4i32 (extract_subvector (v16i32 VR512:$src), (iPTR 4))),
872          (v4i32 (VEXTRACTI32x4Z256rr
873                  (v8i32 (EXTRACT_SUBREG (v16i32 VR512:$src), sub_ymm)),
874                  (iPTR 1)))>;
875def : Pat<(v4f32 (extract_subvector (v16f32 VR512:$src), (iPTR 4))),
876          (v4f32 (VEXTRACTF32x4Z256rr
877                  (v8f32 (EXTRACT_SUBREG (v16f32 VR512:$src), sub_ymm)),
878                  (iPTR 1)))>;
879def : Pat<(v8i16 (extract_subvector (v32i16 VR512:$src), (iPTR 8))),
880          (v8i16 (VEXTRACTI32x4Z256rr
881                  (v16i16 (EXTRACT_SUBREG (v32i16 VR512:$src), sub_ymm)),
882                  (iPTR 1)))>;
883def : Pat<(v8f16 (extract_subvector (v32f16 VR512:$src), (iPTR 8))),
884          (v8f16 (VEXTRACTF32x4Z256rr
885                  (v16f16 (EXTRACT_SUBREG (v32f16 VR512:$src), sub_ymm)),
886                  (iPTR 1)))>;
887def : Pat<(v16i8 (extract_subvector (v64i8 VR512:$src), (iPTR 16))),
888          (v16i8 (VEXTRACTI32x4Z256rr
889                  (v32i8 (EXTRACT_SUBREG (v64i8 VR512:$src), sub_ymm)),
890                  (iPTR 1)))>;
891}
892
893
894// Additional patterns for handling a bitcast between the vselect and the
895// extract_subvector.
896multiclass vextract_for_mask_cast<string InstrStr, X86VectorVTInfo From,
897                                  X86VectorVTInfo To, X86VectorVTInfo Cast,
898                                  PatFrag vextract_extract,
899                                  SDNodeXForm EXTRACT_get_vextract_imm,
900                                  list<Predicate> p> {
901let Predicates = p in {
902  def : Pat<(Cast.VT (vselect_mask Cast.KRCWM:$mask,
903                                   (bitconvert
904                                    (To.VT (vextract_extract:$ext
905                                            (From.VT From.RC:$src), (iPTR imm)))),
906                                   To.RC:$src0)),
907            (Cast.VT (!cast<Instruction>(InstrStr#"rrk")
908                      Cast.RC:$src0, Cast.KRCWM:$mask, From.RC:$src,
909                      (EXTRACT_get_vextract_imm To.RC:$ext)))>;
910
911  def : Pat<(Cast.VT (vselect_mask Cast.KRCWM:$mask,
912                                   (bitconvert
913                                    (To.VT (vextract_extract:$ext
914                                            (From.VT From.RC:$src), (iPTR imm)))),
915                                   Cast.ImmAllZerosV)),
916            (Cast.VT (!cast<Instruction>(InstrStr#"rrkz")
917                      Cast.KRCWM:$mask, From.RC:$src,
918                      (EXTRACT_get_vextract_imm To.RC:$ext)))>;
919}
920}
921
922defm : vextract_for_mask_cast<"VEXTRACTF32x4Z256", v4f64x_info, v2f64x_info,
923                              v4f32x_info, vextract128_extract,
924                              EXTRACT_get_vextract128_imm, [HasVLX]>;
925defm : vextract_for_mask_cast<"VEXTRACTF64x2Z256", v8f32x_info, v4f32x_info,
926                              v2f64x_info, vextract128_extract,
927                              EXTRACT_get_vextract128_imm, [HasDQI, HasVLX]>;
928
929defm : vextract_for_mask_cast<"VEXTRACTI32x4Z256", v4i64x_info, v2i64x_info,
930                              v4i32x_info, vextract128_extract,
931                              EXTRACT_get_vextract128_imm, [HasVLX]>;
932defm : vextract_for_mask_cast<"VEXTRACTI32x4Z256", v16i16x_info, v8i16x_info,
933                              v4i32x_info, vextract128_extract,
934                              EXTRACT_get_vextract128_imm, [HasVLX]>;
935defm : vextract_for_mask_cast<"VEXTRACTI32x4Z256", v32i8x_info, v16i8x_info,
936                              v4i32x_info, vextract128_extract,
937                              EXTRACT_get_vextract128_imm, [HasVLX]>;
938defm : vextract_for_mask_cast<"VEXTRACTI64x2Z256", v8i32x_info, v4i32x_info,
939                              v2i64x_info, vextract128_extract,
940                              EXTRACT_get_vextract128_imm, [HasDQI, HasVLX]>;
941defm : vextract_for_mask_cast<"VEXTRACTI64x2Z256", v16i16x_info, v8i16x_info,
942                              v2i64x_info, vextract128_extract,
943                              EXTRACT_get_vextract128_imm, [HasDQI, HasVLX]>;
944defm : vextract_for_mask_cast<"VEXTRACTI64x2Z256", v32i8x_info, v16i8x_info,
945                              v2i64x_info, vextract128_extract,
946                              EXTRACT_get_vextract128_imm, [HasDQI, HasVLX]>;
947
948defm : vextract_for_mask_cast<"VEXTRACTF32x4Z", v8f64_info, v2f64x_info,
949                              v4f32x_info, vextract128_extract,
950                              EXTRACT_get_vextract128_imm, [HasAVX512]>;
951defm : vextract_for_mask_cast<"VEXTRACTF64x2Z", v16f32_info, v4f32x_info,
952                              v2f64x_info, vextract128_extract,
953                              EXTRACT_get_vextract128_imm, [HasDQI]>;
954
955defm : vextract_for_mask_cast<"VEXTRACTI32x4Z", v8i64_info, v2i64x_info,
956                              v4i32x_info, vextract128_extract,
957                              EXTRACT_get_vextract128_imm, [HasAVX512]>;
958defm : vextract_for_mask_cast<"VEXTRACTI32x4Z", v32i16_info, v8i16x_info,
959                              v4i32x_info, vextract128_extract,
960                              EXTRACT_get_vextract128_imm, [HasAVX512]>;
961defm : vextract_for_mask_cast<"VEXTRACTI32x4Z", v64i8_info, v16i8x_info,
962                              v4i32x_info, vextract128_extract,
963                              EXTRACT_get_vextract128_imm, [HasAVX512]>;
964defm : vextract_for_mask_cast<"VEXTRACTI64x2Z", v16i32_info, v4i32x_info,
965                              v2i64x_info, vextract128_extract,
966                              EXTRACT_get_vextract128_imm, [HasDQI]>;
967defm : vextract_for_mask_cast<"VEXTRACTI64x2Z", v32i16_info, v8i16x_info,
968                              v2i64x_info, vextract128_extract,
969                              EXTRACT_get_vextract128_imm, [HasDQI]>;
970defm : vextract_for_mask_cast<"VEXTRACTI64x2Z", v64i8_info, v16i8x_info,
971                              v2i64x_info, vextract128_extract,
972                              EXTRACT_get_vextract128_imm, [HasDQI]>;
973
974defm : vextract_for_mask_cast<"VEXTRACTF32x8Z", v8f64_info, v4f64x_info,
975                              v8f32x_info, vextract256_extract,
976                              EXTRACT_get_vextract256_imm, [HasDQI]>;
977defm : vextract_for_mask_cast<"VEXTRACTF64x4Z", v16f32_info, v8f32x_info,
978                              v4f64x_info, vextract256_extract,
979                              EXTRACT_get_vextract256_imm, [HasAVX512]>;
980
981defm : vextract_for_mask_cast<"VEXTRACTI32x8Z", v8i64_info, v4i64x_info,
982                              v8i32x_info, vextract256_extract,
983                              EXTRACT_get_vextract256_imm, [HasDQI]>;
984defm : vextract_for_mask_cast<"VEXTRACTI32x8Z", v32i16_info, v16i16x_info,
985                              v8i32x_info, vextract256_extract,
986                              EXTRACT_get_vextract256_imm, [HasDQI]>;
987defm : vextract_for_mask_cast<"VEXTRACTI32x8Z", v64i8_info, v32i8x_info,
988                              v8i32x_info, vextract256_extract,
989                              EXTRACT_get_vextract256_imm, [HasDQI]>;
990defm : vextract_for_mask_cast<"VEXTRACTI64x4Z", v16i32_info, v8i32x_info,
991                              v4i64x_info, vextract256_extract,
992                              EXTRACT_get_vextract256_imm, [HasAVX512]>;
993defm : vextract_for_mask_cast<"VEXTRACTI64x4Z", v32i16_info, v16i16x_info,
994                              v4i64x_info, vextract256_extract,
995                              EXTRACT_get_vextract256_imm, [HasAVX512]>;
996defm : vextract_for_mask_cast<"VEXTRACTI64x4Z", v64i8_info, v32i8x_info,
997                              v4i64x_info, vextract256_extract,
998                              EXTRACT_get_vextract256_imm, [HasAVX512]>;
999
1000// vextractps - extract 32 bits from XMM
1001def VEXTRACTPSZrr : AVX512AIi8<0x17, MRMDestReg, (outs GR32orGR64:$dst),
1002      (ins VR128X:$src1, u8imm:$src2),
1003      "vextractps\t{$src2, $src1, $dst|$dst, $src1, $src2}",
1004      [(set GR32orGR64:$dst, (extractelt (bc_v4i32 (v4f32 VR128X:$src1)), imm:$src2))]>,
1005      EVEX, WIG, Sched<[WriteVecExtract]>;
1006
1007def VEXTRACTPSZmr : AVX512AIi8<0x17, MRMDestMem, (outs),
1008      (ins f32mem:$dst, VR128X:$src1, u8imm:$src2),
1009      "vextractps\t{$src2, $src1, $dst|$dst, $src1, $src2}",
1010      [(store (extractelt (bc_v4i32 (v4f32 VR128X:$src1)), imm:$src2),
1011                          addr:$dst)]>,
1012      EVEX, WIG, EVEX_CD8<32, CD8VT1>, Sched<[WriteVecExtractSt]>;
1013
1014//===---------------------------------------------------------------------===//
1015// AVX-512 BROADCAST
1016//---
1017// broadcast with a scalar argument.
1018multiclass avx512_broadcast_scalar<string Name, X86VectorVTInfo DestInfo,
1019                                   X86VectorVTInfo SrcInfo> {
1020  def : Pat<(DestInfo.VT (X86VBroadcast SrcInfo.FRC:$src)),
1021            (!cast<Instruction>(Name#DestInfo.ZSuffix#rr)
1022             (SrcInfo.VT (COPY_TO_REGCLASS SrcInfo.FRC:$src, SrcInfo.RC)))>;
1023  def : Pat<(DestInfo.VT (vselect_mask DestInfo.KRCWM:$mask,
1024                                       (X86VBroadcast SrcInfo.FRC:$src),
1025                                       DestInfo.RC:$src0)),
1026            (!cast<Instruction>(Name#DestInfo.ZSuffix#rrk)
1027             DestInfo.RC:$src0, DestInfo.KRCWM:$mask,
1028             (SrcInfo.VT (COPY_TO_REGCLASS SrcInfo.FRC:$src, SrcInfo.RC)))>;
1029  def : Pat<(DestInfo.VT (vselect_mask DestInfo.KRCWM:$mask,
1030                                       (X86VBroadcast SrcInfo.FRC:$src),
1031                                       DestInfo.ImmAllZerosV)),
1032            (!cast<Instruction>(Name#DestInfo.ZSuffix#rrkz)
1033             DestInfo.KRCWM:$mask, (SrcInfo.VT (COPY_TO_REGCLASS SrcInfo.FRC:$src, SrcInfo.RC)))>;
1034}
1035
1036// Split version to allow mask and broadcast node to be different types. This
1037// helps support the 32x2 broadcasts.
1038multiclass avx512_broadcast_rm_split<bits<8> opc, string OpcodeStr,
1039                                     SchedWrite SchedRR, SchedWrite SchedRM,
1040                                     X86VectorVTInfo MaskInfo,
1041                                     X86VectorVTInfo DestInfo,
1042                                     X86VectorVTInfo SrcInfo,
1043                                     bit IsConvertibleToThreeAddress,
1044                                     SDPatternOperator UnmaskedOp = X86VBroadcast,
1045                                     SDPatternOperator UnmaskedBcastOp = SrcInfo.BroadcastLdFrag> {
1046  let hasSideEffects = 0 in
1047  def rr : AVX512PI<opc, MRMSrcReg, (outs MaskInfo.RC:$dst), (ins SrcInfo.RC:$src),
1048                    !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
1049                    [(set MaskInfo.RC:$dst,
1050                      (MaskInfo.VT
1051                       (bitconvert
1052                        (DestInfo.VT
1053                         (UnmaskedOp (SrcInfo.VT SrcInfo.RC:$src))))))],
1054                    DestInfo.ExeDomain>, T8, PD, EVEX, Sched<[SchedRR]>;
1055  def rrkz : AVX512PI<opc, MRMSrcReg, (outs MaskInfo.RC:$dst),
1056                      (ins MaskInfo.KRCWM:$mask, SrcInfo.RC:$src),
1057                      !strconcat(OpcodeStr, "\t{$src, ${dst} {${mask}} {z}|",
1058                       "${dst} {${mask}} {z}, $src}"),
1059                       [(set MaskInfo.RC:$dst,
1060                         (vselect_mask MaskInfo.KRCWM:$mask,
1061                          (MaskInfo.VT
1062                           (bitconvert
1063                            (DestInfo.VT
1064                             (X86VBroadcast (SrcInfo.VT SrcInfo.RC:$src))))),
1065                          MaskInfo.ImmAllZerosV))],
1066                       DestInfo.ExeDomain>, T8, PD, EVEX, EVEX_KZ, Sched<[SchedRR]>;
1067  let Constraints = "$src0 = $dst" in
1068  def rrk : AVX512PI<opc, MRMSrcReg, (outs MaskInfo.RC:$dst),
1069                     (ins MaskInfo.RC:$src0, MaskInfo.KRCWM:$mask,
1070                          SrcInfo.RC:$src),
1071                     !strconcat(OpcodeStr, "\t{$src, ${dst} {${mask}}|",
1072                     "${dst} {${mask}}, $src}"),
1073                     [(set MaskInfo.RC:$dst,
1074                       (vselect_mask MaskInfo.KRCWM:$mask,
1075                        (MaskInfo.VT
1076                         (bitconvert
1077                          (DestInfo.VT
1078                           (X86VBroadcast (SrcInfo.VT SrcInfo.RC:$src))))),
1079                        MaskInfo.RC:$src0))],
1080                      DestInfo.ExeDomain>, T8, PD, EVEX, EVEX_K, Sched<[SchedRR]>;
1081
1082  let hasSideEffects = 0, mayLoad = 1 in
1083  def rm : AVX512PI<opc, MRMSrcMem, (outs MaskInfo.RC:$dst),
1084                    (ins SrcInfo.ScalarMemOp:$src),
1085                    !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
1086                    [(set MaskInfo.RC:$dst,
1087                      (MaskInfo.VT
1088                       (bitconvert
1089                        (DestInfo.VT
1090                         (UnmaskedBcastOp addr:$src)))))],
1091                    DestInfo.ExeDomain>, T8, PD, EVEX,
1092                    EVEX_CD8<SrcInfo.EltSize, CD8VT1>, Sched<[SchedRM]>;
1093
1094  def rmkz : AVX512PI<opc, MRMSrcMem, (outs MaskInfo.RC:$dst),
1095                      (ins MaskInfo.KRCWM:$mask, SrcInfo.ScalarMemOp:$src),
1096                      !strconcat(OpcodeStr, "\t{$src, ${dst} {${mask}} {z}|",
1097                       "${dst} {${mask}} {z}, $src}"),
1098                       [(set MaskInfo.RC:$dst,
1099                         (vselect_mask MaskInfo.KRCWM:$mask,
1100                          (MaskInfo.VT
1101                           (bitconvert
1102                            (DestInfo.VT
1103                             (SrcInfo.BroadcastLdFrag addr:$src)))),
1104                          MaskInfo.ImmAllZerosV))],
1105                       DestInfo.ExeDomain>, T8, PD, EVEX, EVEX_KZ,
1106                       EVEX_CD8<SrcInfo.EltSize, CD8VT1>, Sched<[SchedRM]>;
1107
1108  let Constraints = "$src0 = $dst",
1109      isConvertibleToThreeAddress = IsConvertibleToThreeAddress in
1110  def rmk : AVX512PI<opc, MRMSrcMem, (outs MaskInfo.RC:$dst),
1111                     (ins MaskInfo.RC:$src0, MaskInfo.KRCWM:$mask,
1112                          SrcInfo.ScalarMemOp:$src),
1113                     !strconcat(OpcodeStr, "\t{$src, ${dst} {${mask}}|",
1114                     "${dst} {${mask}}, $src}"),
1115                     [(set MaskInfo.RC:$dst,
1116                       (vselect_mask MaskInfo.KRCWM:$mask,
1117                        (MaskInfo.VT
1118                         (bitconvert
1119                          (DestInfo.VT
1120                           (SrcInfo.BroadcastLdFrag addr:$src)))),
1121                        MaskInfo.RC:$src0))],
1122                      DestInfo.ExeDomain>, T8, PD, EVEX, EVEX_K,
1123                      EVEX_CD8<SrcInfo.EltSize, CD8VT1>, Sched<[SchedRM]>;
1124}
1125
1126// Helper class to force mask and broadcast result to same type.
1127multiclass avx512_broadcast_rm<bits<8> opc, string OpcodeStr,
1128                               SchedWrite SchedRR, SchedWrite SchedRM,
1129                               X86VectorVTInfo DestInfo,
1130                               X86VectorVTInfo SrcInfo,
1131                               bit IsConvertibleToThreeAddress> :
1132  avx512_broadcast_rm_split<opc, OpcodeStr, SchedRR, SchedRM,
1133                            DestInfo, DestInfo, SrcInfo,
1134                            IsConvertibleToThreeAddress>;
1135
1136multiclass avx512_fp_broadcast_sd<bits<8> opc, string OpcodeStr,
1137                                  AVX512VLVectorVTInfo _> {
1138  let Predicates = [HasAVX512] in {
1139    defm Z  : avx512_broadcast_rm<opc, OpcodeStr, WriteFShuffle256,
1140                                  WriteFShuffle256Ld, _.info512, _.info128, 1>,
1141              avx512_broadcast_scalar<NAME, _.info512, _.info128>,
1142              EVEX_V512;
1143  }
1144
1145  let Predicates = [HasVLX] in {
1146    defm Z256  : avx512_broadcast_rm<opc, OpcodeStr, WriteFShuffle256,
1147                                     WriteFShuffle256Ld, _.info256, _.info128, 1>,
1148                 avx512_broadcast_scalar<NAME, _.info256, _.info128>,
1149                 EVEX_V256;
1150  }
1151}
1152
1153multiclass avx512_fp_broadcast_ss<bits<8> opc, string OpcodeStr,
1154                                  AVX512VLVectorVTInfo _> {
1155  let Predicates = [HasAVX512] in {
1156    defm Z  : avx512_broadcast_rm<opc, OpcodeStr, WriteFShuffle256,
1157                                  WriteFShuffle256Ld, _.info512, _.info128, 1>,
1158              avx512_broadcast_scalar<NAME, _.info512, _.info128>,
1159              EVEX_V512;
1160  }
1161
1162  let Predicates = [HasVLX] in {
1163    defm Z256  : avx512_broadcast_rm<opc, OpcodeStr, WriteFShuffle256,
1164                                     WriteFShuffle256Ld, _.info256, _.info128, 1>,
1165                 avx512_broadcast_scalar<NAME, _.info256, _.info128>,
1166                 EVEX_V256;
1167    defm Z128  : avx512_broadcast_rm<opc, OpcodeStr, WriteFShuffle256,
1168                                     WriteFShuffle256Ld, _.info128, _.info128, 1>,
1169                 avx512_broadcast_scalar<NAME, _.info128, _.info128>,
1170                 EVEX_V128;
1171  }
1172}
1173defm VBROADCASTSS  : avx512_fp_broadcast_ss<0x18, "vbroadcastss",
1174                                       avx512vl_f32_info>;
1175defm VBROADCASTSD  : avx512_fp_broadcast_sd<0x19, "vbroadcastsd",
1176                                       avx512vl_f64_info>, REX_W;
1177
1178multiclass avx512_int_broadcast_reg<bits<8> opc, SchedWrite SchedRR,
1179                                    X86VectorVTInfo _, SDPatternOperator OpNode,
1180                                    RegisterClass SrcRC> {
1181  // Fold with a mask even if it has multiple uses since it is cheap.
1182  let ExeDomain = _.ExeDomain in
1183  defm rr : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
1184                          (ins SrcRC:$src),
1185                          "vpbroadcast"#_.Suffix, "$src", "$src",
1186                          (_.VT (OpNode SrcRC:$src)), /*IsCommutable*/0,
1187                          /*IsKCommutable*/0, /*IsKZCommutable*/0, vselect>,
1188                          T8, PD, EVEX, Sched<[SchedRR]>;
1189}
1190
1191multiclass avx512_int_broadcastbw_reg<bits<8> opc, string Name, SchedWrite SchedRR,
1192                                    X86VectorVTInfo _, SDPatternOperator OpNode,
1193                                    RegisterClass SrcRC, SubRegIndex Subreg> {
1194  let hasSideEffects = 0, ExeDomain = _.ExeDomain in
1195  defm rr : AVX512_maskable_custom<opc, MRMSrcReg,
1196                         (outs _.RC:$dst), (ins GR32:$src),
1197                         !con((ins _.RC:$src0, _.KRCWM:$mask), (ins GR32:$src)),
1198                         !con((ins _.KRCWM:$mask), (ins GR32:$src)),
1199                         "vpbroadcast"#_.Suffix, "$src", "$src", [], [], [],
1200                         "$src0 = $dst">, T8, PD, EVEX, Sched<[SchedRR]>;
1201
1202  def : Pat <(_.VT (OpNode SrcRC:$src)),
1203             (!cast<Instruction>(Name#rr)
1204              (i32 (INSERT_SUBREG (i32 (IMPLICIT_DEF)), SrcRC:$src, Subreg)))>;
1205
1206  // Fold with a mask even if it has multiple uses since it is cheap.
1207  def : Pat <(vselect _.KRCWM:$mask, (_.VT (OpNode SrcRC:$src)), _.RC:$src0),
1208             (!cast<Instruction>(Name#rrk) _.RC:$src0, _.KRCWM:$mask,
1209              (i32 (INSERT_SUBREG (i32 (IMPLICIT_DEF)), SrcRC:$src, Subreg)))>;
1210
1211  def : Pat <(vselect _.KRCWM:$mask, (_.VT (OpNode SrcRC:$src)), _.ImmAllZerosV),
1212             (!cast<Instruction>(Name#rrkz) _.KRCWM:$mask,
1213              (i32 (INSERT_SUBREG (i32 (IMPLICIT_DEF)), SrcRC:$src, Subreg)))>;
1214}
1215
1216multiclass avx512_int_broadcastbw_reg_vl<bits<8> opc, string Name,
1217                      AVX512VLVectorVTInfo _, SDPatternOperator OpNode,
1218                      RegisterClass SrcRC, SubRegIndex Subreg, Predicate prd> {
1219  let Predicates = [prd] in
1220    defm Z : avx512_int_broadcastbw_reg<opc, Name#Z, WriteShuffle256, _.info512,
1221              OpNode, SrcRC, Subreg>, EVEX_V512;
1222  let Predicates = [prd, HasVLX] in {
1223    defm Z256 : avx512_int_broadcastbw_reg<opc, Name#Z256, WriteShuffle256,
1224              _.info256, OpNode, SrcRC, Subreg>, EVEX_V256;
1225    defm Z128 : avx512_int_broadcastbw_reg<opc, Name#Z128, WriteShuffle,
1226              _.info128, OpNode, SrcRC, Subreg>, EVEX_V128;
1227  }
1228}
1229
1230multiclass avx512_int_broadcast_reg_vl<bits<8> opc, AVX512VLVectorVTInfo _,
1231                                       SDPatternOperator OpNode,
1232                                       RegisterClass SrcRC, Predicate prd> {
1233  let Predicates = [prd] in
1234    defm Z : avx512_int_broadcast_reg<opc, WriteShuffle256, _.info512, OpNode,
1235                                      SrcRC>, EVEX_V512;
1236  let Predicates = [prd, HasVLX] in {
1237    defm Z256 : avx512_int_broadcast_reg<opc, WriteShuffle256, _.info256, OpNode,
1238                                         SrcRC>, EVEX_V256;
1239    defm Z128 : avx512_int_broadcast_reg<opc, WriteShuffle, _.info128, OpNode,
1240                                         SrcRC>, EVEX_V128;
1241  }
1242}
1243
1244defm VPBROADCASTBr : avx512_int_broadcastbw_reg_vl<0x7A, "VPBROADCASTBr",
1245                       avx512vl_i8_info, X86VBroadcast, GR8, sub_8bit, HasBWI>;
1246defm VPBROADCASTWr : avx512_int_broadcastbw_reg_vl<0x7B, "VPBROADCASTWr",
1247                       avx512vl_i16_info, X86VBroadcast, GR16, sub_16bit,
1248                       HasBWI>;
1249defm VPBROADCASTDr : avx512_int_broadcast_reg_vl<0x7C, avx512vl_i32_info,
1250                                                 X86VBroadcast, GR32, HasAVX512>;
1251defm VPBROADCASTQr : avx512_int_broadcast_reg_vl<0x7C, avx512vl_i64_info,
1252                                                 X86VBroadcast, GR64, HasAVX512>, REX_W;
1253
1254multiclass avx512_int_broadcast_rm_vl<bits<8> opc, string OpcodeStr,
1255                                      AVX512VLVectorVTInfo _, Predicate prd,
1256                                      bit IsConvertibleToThreeAddress> {
1257  let Predicates = [prd] in {
1258    defm Z :   avx512_broadcast_rm<opc, OpcodeStr, WriteShuffle256,
1259                                   WriteShuffle256Ld, _.info512, _.info128,
1260                                   IsConvertibleToThreeAddress>,
1261                                  EVEX_V512;
1262  }
1263  let Predicates = [prd, HasVLX] in {
1264    defm Z256 : avx512_broadcast_rm<opc, OpcodeStr, WriteShuffle256,
1265                                    WriteShuffle256Ld, _.info256, _.info128,
1266                                    IsConvertibleToThreeAddress>,
1267                                 EVEX_V256;
1268    defm Z128 : avx512_broadcast_rm<opc, OpcodeStr, WriteShuffle,
1269                                    WriteShuffleXLd, _.info128, _.info128,
1270                                    IsConvertibleToThreeAddress>,
1271                                 EVEX_V128;
1272  }
1273}
1274
1275defm VPBROADCASTB  : avx512_int_broadcast_rm_vl<0x78, "vpbroadcastb",
1276                                           avx512vl_i8_info, HasBWI, 0>;
1277defm VPBROADCASTW  : avx512_int_broadcast_rm_vl<0x79, "vpbroadcastw",
1278                                           avx512vl_i16_info, HasBWI, 0>;
1279defm VPBROADCASTD  : avx512_int_broadcast_rm_vl<0x58, "vpbroadcastd",
1280                                           avx512vl_i32_info, HasAVX512, 1>;
1281defm VPBROADCASTQ  : avx512_int_broadcast_rm_vl<0x59, "vpbroadcastq",
1282                                           avx512vl_i64_info, HasAVX512, 1>, REX_W;
1283
1284multiclass avx512_subvec_broadcast_rm<bits<8> opc, string OpcodeStr,
1285                                      SDPatternOperator OpNode,
1286                                      X86VectorVTInfo _Dst,
1287                                      X86VectorVTInfo _Src> {
1288  defm rm : AVX512_maskable<opc, MRMSrcMem, _Dst, (outs _Dst.RC:$dst),
1289                           (ins _Src.MemOp:$src), OpcodeStr, "$src", "$src",
1290                           (_Dst.VT (OpNode addr:$src))>,
1291                           Sched<[SchedWriteShuffle.YMM.Folded]>,
1292                           AVX5128IBase, EVEX;
1293}
1294
1295// This should be used for the AVX512DQ broadcast instructions. It disables
1296// the unmasked patterns so that we only use the DQ instructions when masking
1297//  is requested.
1298multiclass avx512_subvec_broadcast_rm_dq<bits<8> opc, string OpcodeStr,
1299                                         SDPatternOperator OpNode,
1300                                         X86VectorVTInfo _Dst,
1301                                         X86VectorVTInfo _Src> {
1302  let hasSideEffects = 0, mayLoad = 1 in
1303  defm rm : AVX512_maskable_split<opc, MRMSrcMem, _Dst, (outs _Dst.RC:$dst),
1304                           (ins _Src.MemOp:$src), OpcodeStr, "$src", "$src",
1305                           (null_frag),
1306                           (_Dst.VT (OpNode addr:$src))>,
1307                           Sched<[SchedWriteShuffle.YMM.Folded]>,
1308                           AVX5128IBase, EVEX;
1309}
1310let Predicates = [HasBWI] in {
1311  def : Pat<(v32f16 (X86VBroadcastld16 addr:$src)),
1312            (VPBROADCASTWZrm addr:$src)>;
1313
1314  def : Pat<(v32f16 (X86VBroadcast (v8f16 VR128X:$src))),
1315            (VPBROADCASTWZrr VR128X:$src)>;
1316  def : Pat<(v32f16 (X86VBroadcast (f16 FR16X:$src))),
1317            (VPBROADCASTWZrr (COPY_TO_REGCLASS FR16X:$src, VR128X))>;
1318}
1319let Predicates = [HasVLX, HasBWI] in {
1320  def : Pat<(v8f16 (X86VBroadcastld16 addr:$src)),
1321            (VPBROADCASTWZ128rm addr:$src)>;
1322  def : Pat<(v16f16 (X86VBroadcastld16 addr:$src)),
1323            (VPBROADCASTWZ256rm addr:$src)>;
1324
1325  def : Pat<(v8f16 (X86VBroadcast (v8f16 VR128X:$src))),
1326            (VPBROADCASTWZ128rr VR128X:$src)>;
1327  def : Pat<(v16f16 (X86VBroadcast (v8f16 VR128X:$src))),
1328            (VPBROADCASTWZ256rr VR128X:$src)>;
1329
1330  def : Pat<(v8f16 (X86VBroadcast (f16 FR16X:$src))),
1331            (VPBROADCASTWZ128rr (COPY_TO_REGCLASS FR16X:$src, VR128X))>;
1332  def : Pat<(v16f16 (X86VBroadcast (f16 FR16X:$src))),
1333            (VPBROADCASTWZ256rr (COPY_TO_REGCLASS FR16X:$src, VR128X))>;
1334}
1335
1336//===----------------------------------------------------------------------===//
1337// AVX-512 BROADCAST SUBVECTORS
1338//
1339
1340defm VBROADCASTI32X4 : avx512_subvec_broadcast_rm<0x5a, "vbroadcasti32x4",
1341                       X86SubVBroadcastld128, v16i32_info, v4i32x_info>,
1342                       EVEX_V512, EVEX_CD8<32, CD8VT4>;
1343defm VBROADCASTF32X4 : avx512_subvec_broadcast_rm<0x1a, "vbroadcastf32x4",
1344                       X86SubVBroadcastld128, v16f32_info, v4f32x_info>,
1345                       EVEX_V512, EVEX_CD8<32, CD8VT4>;
1346defm VBROADCASTI64X4 : avx512_subvec_broadcast_rm<0x5b, "vbroadcasti64x4",
1347                       X86SubVBroadcastld256, v8i64_info, v4i64x_info>, REX_W,
1348                       EVEX_V512, EVEX_CD8<64, CD8VT4>;
1349defm VBROADCASTF64X4 : avx512_subvec_broadcast_rm<0x1b, "vbroadcastf64x4",
1350                       X86SubVBroadcastld256, v8f64_info, v4f64x_info>, REX_W,
1351                       EVEX_V512, EVEX_CD8<64, CD8VT4>;
1352
1353let Predicates = [HasAVX512] in {
1354def : Pat<(v8f64 (X86SubVBroadcastld256 addr:$src)),
1355          (VBROADCASTF64X4rm addr:$src)>;
1356def : Pat<(v16f32 (X86SubVBroadcastld256 addr:$src)),
1357          (VBROADCASTF64X4rm addr:$src)>;
1358def : Pat<(v32f16 (X86SubVBroadcastld256 addr:$src)),
1359          (VBROADCASTF64X4rm addr:$src)>;
1360def : Pat<(v8i64 (X86SubVBroadcastld256 addr:$src)),
1361          (VBROADCASTI64X4rm addr:$src)>;
1362def : Pat<(v16i32 (X86SubVBroadcastld256 addr:$src)),
1363          (VBROADCASTI64X4rm addr:$src)>;
1364def : Pat<(v32i16 (X86SubVBroadcastld256 addr:$src)),
1365          (VBROADCASTI64X4rm addr:$src)>;
1366def : Pat<(v64i8 (X86SubVBroadcastld256 addr:$src)),
1367          (VBROADCASTI64X4rm addr:$src)>;
1368
1369def : Pat<(v8f64 (X86SubVBroadcastld128 addr:$src)),
1370          (VBROADCASTF32X4rm addr:$src)>;
1371def : Pat<(v16f32 (X86SubVBroadcastld128 addr:$src)),
1372          (VBROADCASTF32X4rm addr:$src)>;
1373def : Pat<(v32f16 (X86SubVBroadcastld128 addr:$src)),
1374          (VBROADCASTF32X4rm addr:$src)>;
1375def : Pat<(v8i64 (X86SubVBroadcastld128 addr:$src)),
1376          (VBROADCASTI32X4rm addr:$src)>;
1377def : Pat<(v16i32 (X86SubVBroadcastld128 addr:$src)),
1378          (VBROADCASTI32X4rm addr:$src)>;
1379def : Pat<(v32i16 (X86SubVBroadcastld128 addr:$src)),
1380          (VBROADCASTI32X4rm addr:$src)>;
1381def : Pat<(v64i8 (X86SubVBroadcastld128 addr:$src)),
1382          (VBROADCASTI32X4rm addr:$src)>;
1383
1384// Patterns for selects of bitcasted operations.
1385def : Pat<(vselect_mask VK16WM:$mask,
1386                        (bc_v16f32 (v8f64 (X86SubVBroadcastld128 addr:$src))),
1387                        (v16f32 immAllZerosV)),
1388          (VBROADCASTF32X4rmkz VK16WM:$mask, addr:$src)>;
1389def : Pat<(vselect_mask VK16WM:$mask,
1390                        (bc_v16f32 (v8f64 (X86SubVBroadcastld128 addr:$src))),
1391                        VR512:$src0),
1392          (VBROADCASTF32X4rmk VR512:$src0, VK16WM:$mask, addr:$src)>;
1393def : Pat<(vselect_mask VK16WM:$mask,
1394                        (bc_v16i32 (v8i64 (X86SubVBroadcastld128 addr:$src))),
1395                        (v16i32 immAllZerosV)),
1396          (VBROADCASTI32X4rmkz VK16WM:$mask, addr:$src)>;
1397def : Pat<(vselect_mask VK16WM:$mask,
1398                        (bc_v16i32 (v8i64 (X86SubVBroadcastld128 addr:$src))),
1399                        VR512:$src0),
1400          (VBROADCASTI32X4rmk VR512:$src0, VK16WM:$mask, addr:$src)>;
1401
1402def : Pat<(vselect_mask VK8WM:$mask,
1403                        (bc_v8f64 (v16f32 (X86SubVBroadcastld256 addr:$src))),
1404                        (v8f64 immAllZerosV)),
1405          (VBROADCASTF64X4rmkz VK8WM:$mask, addr:$src)>;
1406def : Pat<(vselect_mask VK8WM:$mask,
1407                        (bc_v8f64 (v16f32 (X86SubVBroadcastld256 addr:$src))),
1408                        VR512:$src0),
1409          (VBROADCASTF64X4rmk VR512:$src0, VK8WM:$mask, addr:$src)>;
1410def : Pat<(vselect_mask VK8WM:$mask,
1411                        (bc_v8i64 (v16i32 (X86SubVBroadcastld256 addr:$src))),
1412                        (v8i64 immAllZerosV)),
1413          (VBROADCASTI64X4rmkz VK8WM:$mask, addr:$src)>;
1414def : Pat<(vselect_mask VK8WM:$mask,
1415                        (bc_v8i64 (v16i32 (X86SubVBroadcastld256 addr:$src))),
1416                        VR512:$src0),
1417          (VBROADCASTI64X4rmk VR512:$src0, VK8WM:$mask, addr:$src)>;
1418}
1419
1420let Predicates = [HasVLX] in {
1421defm VBROADCASTI32X4Z256 : avx512_subvec_broadcast_rm<0x5a, "vbroadcasti32x4",
1422                           X86SubVBroadcastld128, v8i32x_info, v4i32x_info>,
1423                           EVEX_V256, EVEX_CD8<32, CD8VT4>;
1424defm VBROADCASTF32X4Z256 : avx512_subvec_broadcast_rm<0x1a, "vbroadcastf32x4",
1425                           X86SubVBroadcastld128, v8f32x_info, v4f32x_info>,
1426                           EVEX_V256, EVEX_CD8<32, CD8VT4>;
1427
1428def : Pat<(v4f64 (X86SubVBroadcastld128 addr:$src)),
1429          (VBROADCASTF32X4Z256rm addr:$src)>;
1430def : Pat<(v8f32 (X86SubVBroadcastld128 addr:$src)),
1431          (VBROADCASTF32X4Z256rm addr:$src)>;
1432def : Pat<(v16f16 (X86SubVBroadcastld128 addr:$src)),
1433          (VBROADCASTF32X4Z256rm addr:$src)>;
1434def : Pat<(v4i64 (X86SubVBroadcastld128 addr:$src)),
1435          (VBROADCASTI32X4Z256rm addr:$src)>;
1436def : Pat<(v8i32 (X86SubVBroadcastld128 addr:$src)),
1437          (VBROADCASTI32X4Z256rm addr:$src)>;
1438def : Pat<(v16i16 (X86SubVBroadcastld128 addr:$src)),
1439          (VBROADCASTI32X4Z256rm addr:$src)>;
1440def : Pat<(v32i8 (X86SubVBroadcastld128 addr:$src)),
1441          (VBROADCASTI32X4Z256rm addr:$src)>;
1442
1443// Patterns for selects of bitcasted operations.
1444def : Pat<(vselect_mask VK8WM:$mask,
1445                        (bc_v8f32 (v4f64 (X86SubVBroadcastld128 addr:$src))),
1446                        (v8f32 immAllZerosV)),
1447          (VBROADCASTF32X4Z256rmkz VK8WM:$mask, addr:$src)>;
1448def : Pat<(vselect_mask VK8WM:$mask,
1449                        (bc_v8f32 (v4f64 (X86SubVBroadcastld128 addr:$src))),
1450                        VR256X:$src0),
1451          (VBROADCASTF32X4Z256rmk VR256X:$src0, VK8WM:$mask, addr:$src)>;
1452def : Pat<(vselect_mask VK8WM:$mask,
1453                        (bc_v8i32 (v4i64 (X86SubVBroadcastld128 addr:$src))),
1454                        (v8i32 immAllZerosV)),
1455          (VBROADCASTI32X4Z256rmkz VK8WM:$mask, addr:$src)>;
1456def : Pat<(vselect_mask VK8WM:$mask,
1457                        (bc_v8i32 (v4i64 (X86SubVBroadcastld128 addr:$src))),
1458                        VR256X:$src0),
1459          (VBROADCASTI32X4Z256rmk VR256X:$src0, VK8WM:$mask, addr:$src)>;
1460}
1461
1462let Predicates = [HasBF16] in {
1463  def : Pat<(v32bf16 (X86SubVBroadcastld256 addr:$src)),
1464            (VBROADCASTF64X4rm addr:$src)>;
1465  def : Pat<(v32bf16 (X86SubVBroadcastld128 addr:$src)),
1466            (VBROADCASTF32X4rm addr:$src)>;
1467}
1468
1469let Predicates = [HasBF16, HasVLX] in
1470  def : Pat<(v16bf16 (X86SubVBroadcastld128 addr:$src)),
1471            (VBROADCASTF32X4Z256rm addr:$src)>;
1472
1473let Predicates = [HasVLX, HasDQI] in {
1474defm VBROADCASTI64X2Z128 : avx512_subvec_broadcast_rm_dq<0x5a, "vbroadcasti64x2",
1475                           X86SubVBroadcastld128, v4i64x_info, v2i64x_info>,
1476                           EVEX_V256, EVEX_CD8<64, CD8VT2>, REX_W;
1477defm VBROADCASTF64X2Z128 : avx512_subvec_broadcast_rm_dq<0x1a, "vbroadcastf64x2",
1478                           X86SubVBroadcastld128, v4f64x_info, v2f64x_info>,
1479                           EVEX_V256, EVEX_CD8<64, CD8VT2>, REX_W;
1480
1481// Patterns for selects of bitcasted operations.
1482def : Pat<(vselect_mask VK4WM:$mask,
1483                        (bc_v4f64 (v8f32 (X86SubVBroadcastld128 addr:$src))),
1484                        (v4f64 immAllZerosV)),
1485          (VBROADCASTF64X2Z128rmkz VK4WM:$mask, addr:$src)>;
1486def : Pat<(vselect_mask VK4WM:$mask,
1487                        (bc_v4f64 (v8f32 (X86SubVBroadcastld128 addr:$src))),
1488                        VR256X:$src0),
1489          (VBROADCASTF64X2Z128rmk VR256X:$src0, VK4WM:$mask, addr:$src)>;
1490def : Pat<(vselect_mask VK4WM:$mask,
1491                        (bc_v4i64 (v8i32 (X86SubVBroadcastld128 addr:$src))),
1492                        (v4i64 immAllZerosV)),
1493          (VBROADCASTI64X2Z128rmkz VK4WM:$mask, addr:$src)>;
1494def : Pat<(vselect_mask VK4WM:$mask,
1495                        (bc_v4i64 (v8i32 (X86SubVBroadcastld128 addr:$src))),
1496                        VR256X:$src0),
1497          (VBROADCASTI64X2Z128rmk VR256X:$src0, VK4WM:$mask, addr:$src)>;
1498}
1499
1500let Predicates = [HasDQI] in {
1501defm VBROADCASTI64X2 : avx512_subvec_broadcast_rm_dq<0x5a, "vbroadcasti64x2",
1502                       X86SubVBroadcastld128, v8i64_info, v2i64x_info>, REX_W,
1503                       EVEX_V512, EVEX_CD8<64, CD8VT2>;
1504defm VBROADCASTI32X8 : avx512_subvec_broadcast_rm_dq<0x5b, "vbroadcasti32x8",
1505                       X86SubVBroadcastld256, v16i32_info, v8i32x_info>,
1506                       EVEX_V512, EVEX_CD8<32, CD8VT8>;
1507defm VBROADCASTF64X2 : avx512_subvec_broadcast_rm_dq<0x1a, "vbroadcastf64x2",
1508                       X86SubVBroadcastld128, v8f64_info, v2f64x_info>, REX_W,
1509                       EVEX_V512, EVEX_CD8<64, CD8VT2>;
1510defm VBROADCASTF32X8 : avx512_subvec_broadcast_rm_dq<0x1b, "vbroadcastf32x8",
1511                       X86SubVBroadcastld256, v16f32_info, v8f32x_info>,
1512                       EVEX_V512, EVEX_CD8<32, CD8VT8>;
1513
1514// Patterns for selects of bitcasted operations.
1515def : Pat<(vselect_mask VK16WM:$mask,
1516                        (bc_v16f32 (v8f64 (X86SubVBroadcastld256 addr:$src))),
1517                        (v16f32 immAllZerosV)),
1518          (VBROADCASTF32X8rmkz VK16WM:$mask, addr:$src)>;
1519def : Pat<(vselect_mask VK16WM:$mask,
1520                        (bc_v16f32 (v8f64 (X86SubVBroadcastld256 addr:$src))),
1521                        VR512:$src0),
1522          (VBROADCASTF32X8rmk VR512:$src0, VK16WM:$mask, addr:$src)>;
1523def : Pat<(vselect_mask VK16WM:$mask,
1524                        (bc_v16i32 (v8i64 (X86SubVBroadcastld256 addr:$src))),
1525                        (v16i32 immAllZerosV)),
1526          (VBROADCASTI32X8rmkz VK16WM:$mask, addr:$src)>;
1527def : Pat<(vselect_mask VK16WM:$mask,
1528                        (bc_v16i32 (v8i64 (X86SubVBroadcastld256 addr:$src))),
1529                        VR512:$src0),
1530          (VBROADCASTI32X8rmk VR512:$src0, VK16WM:$mask, addr:$src)>;
1531
1532def : Pat<(vselect_mask VK8WM:$mask,
1533                        (bc_v8f64 (v16f32 (X86SubVBroadcastld128 addr:$src))),
1534                        (v8f64 immAllZerosV)),
1535          (VBROADCASTF64X2rmkz VK8WM:$mask, addr:$src)>;
1536def : Pat<(vselect_mask VK8WM:$mask,
1537                        (bc_v8f64 (v16f32 (X86SubVBroadcastld128 addr:$src))),
1538                        VR512:$src0),
1539          (VBROADCASTF64X2rmk VR512:$src0, VK8WM:$mask, addr:$src)>;
1540def : Pat<(vselect_mask VK8WM:$mask,
1541                        (bc_v8i64 (v16i32 (X86SubVBroadcastld128 addr:$src))),
1542                        (v8i64 immAllZerosV)),
1543          (VBROADCASTI64X2rmkz VK8WM:$mask, addr:$src)>;
1544def : Pat<(vselect_mask VK8WM:$mask,
1545                        (bc_v8i64 (v16i32 (X86SubVBroadcastld128 addr:$src))),
1546                        VR512:$src0),
1547          (VBROADCASTI64X2rmk VR512:$src0, VK8WM:$mask, addr:$src)>;
1548}
1549
1550multiclass avx512_common_broadcast_32x2<bits<8> opc, string OpcodeStr,
1551                                        AVX512VLVectorVTInfo _Dst,
1552                                        AVX512VLVectorVTInfo _Src> {
1553  let Predicates = [HasDQI] in
1554    defm Z :    avx512_broadcast_rm_split<opc, OpcodeStr, WriteShuffle256,
1555                                          WriteShuffle256Ld, _Dst.info512,
1556                                          _Src.info512, _Src.info128, 0, null_frag, null_frag>,
1557                                          EVEX_V512;
1558  let Predicates = [HasDQI, HasVLX] in
1559    defm Z256 : avx512_broadcast_rm_split<opc, OpcodeStr, WriteShuffle256,
1560                                          WriteShuffle256Ld, _Dst.info256,
1561                                          _Src.info256, _Src.info128, 0, null_frag, null_frag>,
1562                                          EVEX_V256;
1563}
1564
1565multiclass avx512_common_broadcast_i32x2<bits<8> opc, string OpcodeStr,
1566                                         AVX512VLVectorVTInfo _Dst,
1567                                         AVX512VLVectorVTInfo _Src> :
1568  avx512_common_broadcast_32x2<opc, OpcodeStr, _Dst, _Src> {
1569
1570  let Predicates = [HasDQI, HasVLX] in
1571    defm Z128 : avx512_broadcast_rm_split<opc, OpcodeStr, WriteShuffle,
1572                                          WriteShuffleXLd, _Dst.info128,
1573                                          _Src.info128, _Src.info128, 0, null_frag, null_frag>,
1574                                          EVEX_V128;
1575}
1576
1577defm VBROADCASTI32X2  : avx512_common_broadcast_i32x2<0x59, "vbroadcasti32x2",
1578                                          avx512vl_i32_info, avx512vl_i64_info>;
1579defm VBROADCASTF32X2  : avx512_common_broadcast_32x2<0x19, "vbroadcastf32x2",
1580                                          avx512vl_f32_info, avx512vl_f64_info>;
1581
1582//===----------------------------------------------------------------------===//
1583// AVX-512 BROADCAST MASK TO VECTOR REGISTER
1584//---
1585multiclass avx512_mask_broadcastm<bits<8> opc, string OpcodeStr,
1586                                  X86VectorVTInfo _, RegisterClass KRC> {
1587  def rr : AVX512XS8I<opc, MRMSrcReg, (outs _.RC:$dst), (ins KRC:$src),
1588                  !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
1589                  [(set _.RC:$dst, (_.VT (X86VBroadcastm KRC:$src)))]>,
1590                  EVEX, Sched<[WriteShuffle]>;
1591}
1592
1593multiclass avx512_mask_broadcast<bits<8> opc, string OpcodeStr,
1594                                 AVX512VLVectorVTInfo VTInfo, RegisterClass KRC> {
1595  let Predicates = [HasCDI] in
1596    defm Z : avx512_mask_broadcastm<opc, OpcodeStr, VTInfo.info512, KRC>, EVEX_V512;
1597  let Predicates = [HasCDI, HasVLX] in {
1598    defm Z256 : avx512_mask_broadcastm<opc, OpcodeStr, VTInfo.info256, KRC>, EVEX_V256;
1599    defm Z128 : avx512_mask_broadcastm<opc, OpcodeStr, VTInfo.info128, KRC>, EVEX_V128;
1600  }
1601}
1602
1603defm VPBROADCASTMW2D : avx512_mask_broadcast<0x3A, "vpbroadcastmw2d",
1604                                               avx512vl_i32_info, VK16>;
1605defm VPBROADCASTMB2Q : avx512_mask_broadcast<0x2A, "vpbroadcastmb2q",
1606                                               avx512vl_i64_info, VK8>, REX_W;
1607
1608//===----------------------------------------------------------------------===//
1609// -- VPERMI2 - 3 source operands form --
1610multiclass avx512_perm_i<bits<8> opc, string OpcodeStr,
1611                         X86FoldableSchedWrite sched,
1612                         X86VectorVTInfo _, X86VectorVTInfo IdxVT> {
1613let Constraints = "$src1 = $dst", ExeDomain = _.ExeDomain,
1614    hasSideEffects = 0 in {
1615  defm rr: AVX512_maskable_3src_cast<opc, MRMSrcReg, _, IdxVT, (outs _.RC:$dst),
1616          (ins _.RC:$src2, _.RC:$src3),
1617          OpcodeStr, "$src3, $src2", "$src2, $src3",
1618          (_.VT (X86VPermt2 _.RC:$src2, IdxVT.RC:$src1, _.RC:$src3)), 1>,
1619          EVEX, VVVV, AVX5128IBase, Sched<[sched]>;
1620
1621  let mayLoad = 1 in
1622  defm rm: AVX512_maskable_3src_cast<opc, MRMSrcMem, _, IdxVT, (outs _.RC:$dst),
1623            (ins _.RC:$src2, _.MemOp:$src3),
1624            OpcodeStr, "$src3, $src2", "$src2, $src3",
1625            (_.VT (X86VPermt2 _.RC:$src2, IdxVT.RC:$src1,
1626                   (_.VT (_.LdFrag addr:$src3)))), 1>,
1627            EVEX, VVVV, AVX5128IBase, Sched<[sched.Folded, sched.ReadAfterFold]>;
1628  }
1629}
1630
1631multiclass avx512_perm_i_mb<bits<8> opc, string OpcodeStr,
1632                            X86FoldableSchedWrite sched,
1633                            X86VectorVTInfo _, X86VectorVTInfo IdxVT> {
1634  let Constraints = "$src1 = $dst", ExeDomain = _.ExeDomain,
1635      hasSideEffects = 0, mayLoad = 1 in
1636  defm rmb: AVX512_maskable_3src_cast<opc, MRMSrcMem, _, IdxVT, (outs _.RC:$dst),
1637              (ins _.RC:$src2, _.ScalarMemOp:$src3),
1638              OpcodeStr,   !strconcat("${src3}", _.BroadcastStr,", $src2"),
1639              !strconcat("$src2, ${src3}", _.BroadcastStr ),
1640              (_.VT (X86VPermt2 _.RC:$src2,
1641               IdxVT.RC:$src1,(_.VT (_.BroadcastLdFrag addr:$src3)))), 1>,
1642              AVX5128IBase, EVEX, VVVV, EVEX_B,
1643              Sched<[sched.Folded, sched.ReadAfterFold]>;
1644}
1645
1646multiclass avx512_perm_i_sizes<bits<8> opc, string OpcodeStr,
1647                               X86FoldableSchedWrite sched,
1648                               AVX512VLVectorVTInfo VTInfo,
1649                               AVX512VLVectorVTInfo ShuffleMask> {
1650  defm NAME#Z: avx512_perm_i<opc, OpcodeStr, sched, VTInfo.info512,
1651                             ShuffleMask.info512>,
1652               avx512_perm_i_mb<opc, OpcodeStr, sched, VTInfo.info512,
1653                                ShuffleMask.info512>, EVEX_V512;
1654  let Predicates = [HasVLX] in {
1655  defm NAME#Z128: avx512_perm_i<opc, OpcodeStr, sched, VTInfo.info128,
1656                                ShuffleMask.info128>,
1657                  avx512_perm_i_mb<opc, OpcodeStr, sched, VTInfo.info128,
1658                                   ShuffleMask.info128>, EVEX_V128;
1659  defm NAME#Z256: avx512_perm_i<opc, OpcodeStr, sched, VTInfo.info256,
1660                                ShuffleMask.info256>,
1661                  avx512_perm_i_mb<opc, OpcodeStr, sched, VTInfo.info256,
1662                                   ShuffleMask.info256>, EVEX_V256;
1663  }
1664}
1665
1666multiclass avx512_perm_i_sizes_bw<bits<8> opc, string OpcodeStr,
1667                                  X86FoldableSchedWrite sched,
1668                                  AVX512VLVectorVTInfo VTInfo,
1669                                  AVX512VLVectorVTInfo Idx,
1670                                  Predicate Prd> {
1671  let Predicates = [Prd] in
1672  defm NAME#Z: avx512_perm_i<opc, OpcodeStr, sched, VTInfo.info512,
1673                             Idx.info512>, EVEX_V512;
1674  let Predicates = [Prd, HasVLX] in {
1675  defm NAME#Z128: avx512_perm_i<opc, OpcodeStr, sched, VTInfo.info128,
1676                                Idx.info128>, EVEX_V128;
1677  defm NAME#Z256: avx512_perm_i<opc, OpcodeStr, sched, VTInfo.info256,
1678                                Idx.info256>,  EVEX_V256;
1679  }
1680}
1681
1682defm VPERMI2D  : avx512_perm_i_sizes<0x76, "vpermi2d", WriteVarShuffle256,
1683                  avx512vl_i32_info, avx512vl_i32_info>, EVEX_CD8<32, CD8VF>;
1684defm VPERMI2Q  : avx512_perm_i_sizes<0x76, "vpermi2q", WriteVarShuffle256,
1685                  avx512vl_i64_info, avx512vl_i64_info>, REX_W, EVEX_CD8<64, CD8VF>;
1686defm VPERMI2W  : avx512_perm_i_sizes_bw<0x75, "vpermi2w", WriteVarShuffle256,
1687                  avx512vl_i16_info, avx512vl_i16_info, HasBWI>,
1688                  REX_W, EVEX_CD8<16, CD8VF>;
1689defm VPERMI2B  : avx512_perm_i_sizes_bw<0x75, "vpermi2b", WriteVarShuffle256,
1690                  avx512vl_i8_info, avx512vl_i8_info, HasVBMI>,
1691                  EVEX_CD8<8, CD8VF>;
1692defm VPERMI2PS : avx512_perm_i_sizes<0x77, "vpermi2ps", WriteFVarShuffle256,
1693                  avx512vl_f32_info, avx512vl_i32_info>, EVEX_CD8<32, CD8VF>;
1694defm VPERMI2PD : avx512_perm_i_sizes<0x77, "vpermi2pd", WriteFVarShuffle256,
1695                  avx512vl_f64_info, avx512vl_i64_info>, REX_W, EVEX_CD8<64, CD8VF>;
1696
1697// Extra patterns to deal with extra bitcasts due to passthru and index being
1698// different types on the fp versions.
1699multiclass avx512_perm_i_lowering<string InstrStr, X86VectorVTInfo _,
1700                                  X86VectorVTInfo IdxVT,
1701                                  X86VectorVTInfo CastVT> {
1702  def : Pat<(_.VT (vselect_mask _.KRCWM:$mask,
1703                                (X86VPermt2 (_.VT _.RC:$src2),
1704                                            (IdxVT.VT (bitconvert
1705                                                       (CastVT.VT _.RC:$src1))),
1706                                            _.RC:$src3),
1707                                (_.VT (bitconvert (CastVT.VT _.RC:$src1))))),
1708            (!cast<Instruction>(InstrStr#"rrk") _.RC:$src1, _.KRCWM:$mask,
1709                                                _.RC:$src2, _.RC:$src3)>;
1710  def : Pat<(_.VT (vselect_mask _.KRCWM:$mask,
1711                                (X86VPermt2 _.RC:$src2,
1712                                            (IdxVT.VT (bitconvert
1713                                                       (CastVT.VT _.RC:$src1))),
1714                                            (_.LdFrag addr:$src3)),
1715                                (_.VT (bitconvert  (CastVT.VT _.RC:$src1))))),
1716            (!cast<Instruction>(InstrStr#"rmk") _.RC:$src1, _.KRCWM:$mask,
1717                                                _.RC:$src2, addr:$src3)>;
1718  def : Pat<(_.VT (vselect_mask _.KRCWM:$mask,
1719                                 (X86VPermt2 _.RC:$src2,
1720                                             (IdxVT.VT (bitconvert  (CastVT.VT _.RC:$src1))),
1721                                             (_.BroadcastLdFrag addr:$src3)),
1722                                 (_.VT (bitconvert  (CastVT.VT _.RC:$src1))))),
1723            (!cast<Instruction>(InstrStr#"rmbk") _.RC:$src1, _.KRCWM:$mask,
1724                                                 _.RC:$src2, addr:$src3)>;
1725}
1726
1727// TODO: Should we add more casts? The vXi64 case is common due to ABI.
1728defm : avx512_perm_i_lowering<"VPERMI2PSZ", v16f32_info, v16i32_info, v8i64_info>;
1729defm : avx512_perm_i_lowering<"VPERMI2PSZ256", v8f32x_info, v8i32x_info, v4i64x_info>;
1730defm : avx512_perm_i_lowering<"VPERMI2PSZ128", v4f32x_info, v4i32x_info, v2i64x_info>;
1731
1732// VPERMT2
1733multiclass avx512_perm_t<bits<8> opc, string OpcodeStr,
1734                         X86FoldableSchedWrite sched,
1735                         X86VectorVTInfo _, X86VectorVTInfo IdxVT> {
1736let Constraints = "$src1 = $dst", ExeDomain = _.ExeDomain in {
1737  defm rr: AVX512_maskable_3src<opc, MRMSrcReg, _, (outs _.RC:$dst),
1738          (ins IdxVT.RC:$src2, _.RC:$src3),
1739          OpcodeStr, "$src3, $src2", "$src2, $src3",
1740          (_.VT (X86VPermt2 _.RC:$src1, IdxVT.RC:$src2, _.RC:$src3)), 1>,
1741          EVEX, VVVV, AVX5128IBase, Sched<[sched]>;
1742
1743  defm rm: AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst),
1744            (ins IdxVT.RC:$src2, _.MemOp:$src3),
1745            OpcodeStr, "$src3, $src2", "$src2, $src3",
1746            (_.VT (X86VPermt2 _.RC:$src1, IdxVT.RC:$src2,
1747                   (_.LdFrag addr:$src3))), 1>,
1748            EVEX, VVVV, AVX5128IBase, Sched<[sched.Folded, sched.ReadAfterFold]>;
1749  }
1750}
1751multiclass avx512_perm_t_mb<bits<8> opc, string OpcodeStr,
1752                            X86FoldableSchedWrite sched,
1753                            X86VectorVTInfo _, X86VectorVTInfo IdxVT> {
1754  let Constraints = "$src1 = $dst", ExeDomain = _.ExeDomain in
1755  defm rmb: AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst),
1756              (ins IdxVT.RC:$src2, _.ScalarMemOp:$src3),
1757              OpcodeStr,   !strconcat("${src3}", _.BroadcastStr,", $src2"),
1758              !strconcat("$src2, ${src3}", _.BroadcastStr ),
1759              (_.VT (X86VPermt2 _.RC:$src1,
1760               IdxVT.RC:$src2,(_.VT (_.BroadcastLdFrag addr:$src3)))), 1>,
1761              AVX5128IBase, EVEX, VVVV, EVEX_B,
1762              Sched<[sched.Folded, sched.ReadAfterFold]>;
1763}
1764
1765multiclass avx512_perm_t_sizes<bits<8> opc, string OpcodeStr,
1766                               X86FoldableSchedWrite sched,
1767                               AVX512VLVectorVTInfo VTInfo,
1768                               AVX512VLVectorVTInfo ShuffleMask> {
1769  defm NAME#Z: avx512_perm_t<opc, OpcodeStr, sched, VTInfo.info512,
1770                             ShuffleMask.info512>,
1771               avx512_perm_t_mb<opc, OpcodeStr, sched, VTInfo.info512,
1772                                ShuffleMask.info512>, EVEX_V512;
1773  let Predicates = [HasVLX] in {
1774  defm NAME#Z128: avx512_perm_t<opc, OpcodeStr, sched, VTInfo.info128,
1775                                ShuffleMask.info128>,
1776                  avx512_perm_t_mb<opc, OpcodeStr, sched, VTInfo.info128,
1777                                   ShuffleMask.info128>, EVEX_V128;
1778  defm NAME#Z256: avx512_perm_t<opc, OpcodeStr, sched, VTInfo.info256,
1779                                ShuffleMask.info256>,
1780                   avx512_perm_t_mb<opc, OpcodeStr, sched, VTInfo.info256,
1781                                    ShuffleMask.info256>, EVEX_V256;
1782  }
1783}
1784
1785multiclass avx512_perm_t_sizes_bw<bits<8> opc, string OpcodeStr,
1786                                  X86FoldableSchedWrite sched,
1787                                  AVX512VLVectorVTInfo VTInfo,
1788                                  AVX512VLVectorVTInfo Idx, Predicate Prd> {
1789  let Predicates = [Prd] in
1790  defm NAME#Z: avx512_perm_t<opc, OpcodeStr, sched, VTInfo.info512,
1791                             Idx.info512>, EVEX_V512;
1792  let Predicates = [Prd, HasVLX] in {
1793  defm NAME#Z128: avx512_perm_t<opc, OpcodeStr, sched, VTInfo.info128,
1794                                Idx.info128>, EVEX_V128;
1795  defm NAME#Z256: avx512_perm_t<opc, OpcodeStr, sched, VTInfo.info256,
1796                                Idx.info256>, EVEX_V256;
1797  }
1798}
1799
1800defm VPERMT2D  : avx512_perm_t_sizes<0x7E, "vpermt2d", WriteVarShuffle256,
1801                  avx512vl_i32_info, avx512vl_i32_info>, EVEX_CD8<32, CD8VF>;
1802defm VPERMT2Q  : avx512_perm_t_sizes<0x7E, "vpermt2q", WriteVarShuffle256,
1803                  avx512vl_i64_info, avx512vl_i64_info>, REX_W, EVEX_CD8<64, CD8VF>;
1804defm VPERMT2W  : avx512_perm_t_sizes_bw<0x7D, "vpermt2w", WriteVarShuffle256,
1805                  avx512vl_i16_info, avx512vl_i16_info, HasBWI>,
1806                  REX_W, EVEX_CD8<16, CD8VF>;
1807defm VPERMT2B  : avx512_perm_t_sizes_bw<0x7D, "vpermt2b", WriteVarShuffle256,
1808                  avx512vl_i8_info, avx512vl_i8_info, HasVBMI>,
1809                  EVEX_CD8<8, CD8VF>;
1810defm VPERMT2PS : avx512_perm_t_sizes<0x7F, "vpermt2ps", WriteFVarShuffle256,
1811                  avx512vl_f32_info, avx512vl_i32_info>, EVEX_CD8<32, CD8VF>;
1812defm VPERMT2PD : avx512_perm_t_sizes<0x7F, "vpermt2pd", WriteFVarShuffle256,
1813                  avx512vl_f64_info, avx512vl_i64_info>, REX_W, EVEX_CD8<64, CD8VF>;
1814
1815//===----------------------------------------------------------------------===//
1816// AVX-512 - BLEND using mask
1817//
1818
1819multiclass WriteFVarBlendask<bits<8> opc, string OpcodeStr,
1820                             X86FoldableSchedWrite sched, X86VectorVTInfo _> {
1821  let ExeDomain = _.ExeDomain, hasSideEffects = 0 in {
1822  def rr : AVX5128I<opc, MRMSrcReg, (outs _.RC:$dst),
1823             (ins _.RC:$src1, _.RC:$src2),
1824             !strconcat(OpcodeStr,
1825             "\t{$src2, $src1, ${dst}|${dst}, $src1, $src2}"), []>,
1826             EVEX, VVVV, Sched<[sched]>;
1827  def rrk : AVX5128I<opc, MRMSrcReg, (outs _.RC:$dst),
1828             (ins _.KRCWM:$mask, _.RC:$src1, _.RC:$src2),
1829             !strconcat(OpcodeStr,
1830             "\t{$src2, $src1, ${dst} {${mask}}|${dst} {${mask}}, $src1, $src2}"),
1831             []>, EVEX, VVVV, EVEX_K, Sched<[sched]>;
1832  def rrkz : AVX5128I<opc, MRMSrcReg, (outs _.RC:$dst),
1833             (ins _.KRCWM:$mask, _.RC:$src1, _.RC:$src2),
1834             !strconcat(OpcodeStr,
1835             "\t{$src2, $src1, ${dst} {${mask}} {z}|${dst} {${mask}} {z}, $src1, $src2}"),
1836             []>, EVEX, VVVV, EVEX_KZ, Sched<[sched]>;
1837  let mayLoad = 1 in {
1838  def rm  : AVX5128I<opc, MRMSrcMem, (outs _.RC:$dst),
1839             (ins _.RC:$src1, _.MemOp:$src2),
1840             !strconcat(OpcodeStr,
1841             "\t{$src2, $src1, ${dst}|${dst}, $src1, $src2}"),
1842             []>, EVEX, VVVV, EVEX_CD8<_.EltSize, CD8VF>,
1843             Sched<[sched.Folded, sched.ReadAfterFold]>;
1844  def rmk : AVX5128I<opc, MRMSrcMem, (outs _.RC:$dst),
1845             (ins _.KRCWM:$mask, _.RC:$src1, _.MemOp:$src2),
1846             !strconcat(OpcodeStr,
1847             "\t{$src2, $src1, ${dst} {${mask}}|${dst} {${mask}}, $src1, $src2}"),
1848             []>, EVEX, VVVV, EVEX_K, EVEX_CD8<_.EltSize, CD8VF>,
1849             Sched<[sched.Folded, sched.ReadAfterFold]>;
1850  def rmkz : AVX5128I<opc, MRMSrcMem, (outs _.RC:$dst),
1851             (ins _.KRCWM:$mask, _.RC:$src1, _.MemOp:$src2),
1852             !strconcat(OpcodeStr,
1853             "\t{$src2, $src1, ${dst} {${mask}} {z}|${dst} {${mask}} {z}, $src1, $src2}"),
1854             []>, EVEX, VVVV, EVEX_KZ, EVEX_CD8<_.EltSize, CD8VF>,
1855             Sched<[sched.Folded, sched.ReadAfterFold]>;
1856  }
1857  }
1858}
1859multiclass WriteFVarBlendask_rmb<bits<8> opc, string OpcodeStr,
1860                                 X86FoldableSchedWrite sched, X86VectorVTInfo _> {
1861  let ExeDomain = _.ExeDomain, mayLoad = 1, hasSideEffects = 0 in {
1862  def rmbk : AVX5128I<opc, MRMSrcMem, (outs _.RC:$dst),
1863      (ins _.KRCWM:$mask, _.RC:$src1, _.ScalarMemOp:$src2),
1864       !strconcat(OpcodeStr,
1865            "\t{${src2}", _.BroadcastStr, ", $src1, $dst {${mask}}|",
1866            "$dst {${mask}}, $src1, ${src2}", _.BroadcastStr, "}"), []>,
1867      EVEX, VVVV, EVEX_K, EVEX_B, EVEX_CD8<_.EltSize, CD8VF>,
1868      Sched<[sched.Folded, sched.ReadAfterFold]>;
1869
1870  def rmbkz : AVX5128I<opc, MRMSrcMem, (outs _.RC:$dst),
1871      (ins _.KRCWM:$mask, _.RC:$src1, _.ScalarMemOp:$src2),
1872       !strconcat(OpcodeStr,
1873            "\t{${src2}", _.BroadcastStr, ", $src1, $dst {${mask}} {z}|",
1874            "$dst {${mask}} {z}, $src1, ${src2}", _.BroadcastStr, "}"), []>,
1875      EVEX, VVVV, EVEX_KZ, EVEX_B, EVEX_CD8<_.EltSize, CD8VF>,
1876      Sched<[sched.Folded, sched.ReadAfterFold]>;
1877
1878  def rmb : AVX5128I<opc, MRMSrcMem, (outs _.RC:$dst),
1879      (ins _.RC:$src1, _.ScalarMemOp:$src2),
1880       !strconcat(OpcodeStr,
1881            "\t{${src2}", _.BroadcastStr, ", $src1, $dst|",
1882            "$dst, $src1, ${src2}", _.BroadcastStr, "}"), []>,
1883      EVEX, VVVV, EVEX_B, EVEX_CD8<_.EltSize, CD8VF>,
1884      Sched<[sched.Folded, sched.ReadAfterFold]>;
1885  }
1886}
1887
1888multiclass blendmask_dq<bits<8> opc, string OpcodeStr, X86SchedWriteWidths sched,
1889                        AVX512VLVectorVTInfo VTInfo> {
1890  defm Z : WriteFVarBlendask<opc, OpcodeStr, sched.ZMM, VTInfo.info512>,
1891           WriteFVarBlendask_rmb<opc, OpcodeStr, sched.ZMM, VTInfo.info512>,
1892                                 EVEX_V512;
1893
1894  let Predicates = [HasVLX] in {
1895    defm Z256 : WriteFVarBlendask<opc, OpcodeStr, sched.YMM, VTInfo.info256>,
1896                WriteFVarBlendask_rmb<opc, OpcodeStr, sched.YMM, VTInfo.info256>,
1897                                      EVEX_V256;
1898    defm Z128 : WriteFVarBlendask<opc, OpcodeStr, sched.XMM, VTInfo.info128>,
1899                WriteFVarBlendask_rmb<opc, OpcodeStr, sched.XMM, VTInfo.info128>,
1900                                      EVEX_V128;
1901  }
1902}
1903
1904multiclass blendmask_bw<bits<8> opc, string OpcodeStr, X86SchedWriteWidths sched,
1905                        AVX512VLVectorVTInfo VTInfo> {
1906  let Predicates = [HasBWI] in
1907    defm Z : WriteFVarBlendask<opc, OpcodeStr, sched.ZMM, VTInfo.info512>,
1908                               EVEX_V512;
1909
1910  let Predicates = [HasBWI, HasVLX] in {
1911    defm Z256 : WriteFVarBlendask<opc, OpcodeStr, sched.YMM, VTInfo.info256>,
1912                                  EVEX_V256;
1913    defm Z128 : WriteFVarBlendask<opc, OpcodeStr, sched.XMM, VTInfo.info128>,
1914                                  EVEX_V128;
1915  }
1916}
1917
1918defm VBLENDMPS : blendmask_dq<0x65, "vblendmps", SchedWriteFVarBlend,
1919                              avx512vl_f32_info>;
1920defm VBLENDMPD : blendmask_dq<0x65, "vblendmpd", SchedWriteFVarBlend,
1921                              avx512vl_f64_info>, REX_W;
1922defm VPBLENDMD : blendmask_dq<0x64, "vpblendmd", SchedWriteVarBlend,
1923                              avx512vl_i32_info>;
1924defm VPBLENDMQ : blendmask_dq<0x64, "vpblendmq", SchedWriteVarBlend,
1925                              avx512vl_i64_info>, REX_W;
1926defm VPBLENDMB : blendmask_bw<0x66, "vpblendmb", SchedWriteVarBlend,
1927                              avx512vl_i8_info>;
1928defm VPBLENDMW : blendmask_bw<0x66, "vpblendmw", SchedWriteVarBlend,
1929                              avx512vl_i16_info>, REX_W;
1930
1931//===----------------------------------------------------------------------===//
1932// Compare Instructions
1933//===----------------------------------------------------------------------===//
1934
1935// avx512_cmp_scalar - AVX512 CMPSS and CMPSD
1936
1937multiclass avx512_cmp_scalar<X86VectorVTInfo _, SDNode OpNode, SDNode OpNodeSAE,
1938                             PatFrag OpNode_su, PatFrag OpNodeSAE_su,
1939                             X86FoldableSchedWrite sched> {
1940  defm  rr_Int  : AVX512_maskable_cmp<0xC2, MRMSrcReg, _,
1941                      (outs _.KRC:$dst),
1942                      (ins _.RC:$src1, _.RC:$src2, u8imm:$cc),
1943                      "vcmp"#_.Suffix,
1944                      "$cc, $src2, $src1", "$src1, $src2, $cc",
1945                      (OpNode (_.VT _.RC:$src1), (_.VT _.RC:$src2), timm:$cc),
1946                      (OpNode_su (_.VT _.RC:$src1), (_.VT _.RC:$src2),
1947                                 timm:$cc)>, EVEX, VVVV, VEX_LIG, Sched<[sched]>, SIMD_EXC;
1948  let mayLoad = 1 in
1949  defm  rm_Int  : AVX512_maskable_cmp<0xC2, MRMSrcMem, _,
1950                    (outs _.KRC:$dst),
1951                    (ins _.RC:$src1, _.IntScalarMemOp:$src2, u8imm:$cc),
1952                    "vcmp"#_.Suffix,
1953                    "$cc, $src2, $src1", "$src1, $src2, $cc",
1954                    (OpNode (_.VT _.RC:$src1), (_.ScalarIntMemFrags addr:$src2),
1955                        timm:$cc),
1956                    (OpNode_su (_.VT _.RC:$src1), (_.ScalarIntMemFrags addr:$src2),
1957                        timm:$cc)>, EVEX, VVVV, VEX_LIG, EVEX_CD8<_.EltSize, CD8VT1>,
1958                    Sched<[sched.Folded, sched.ReadAfterFold]>, SIMD_EXC;
1959
1960  let Uses = [MXCSR] in
1961  defm  rrb_Int  : AVX512_maskable_cmp<0xC2, MRMSrcReg, _,
1962                     (outs _.KRC:$dst),
1963                     (ins _.RC:$src1, _.RC:$src2, u8imm:$cc),
1964                     "vcmp"#_.Suffix,
1965                     "$cc, {sae}, $src2, $src1","$src1, $src2, {sae}, $cc",
1966                     (OpNodeSAE (_.VT _.RC:$src1), (_.VT _.RC:$src2),
1967                                timm:$cc),
1968                     (OpNodeSAE_su (_.VT _.RC:$src1), (_.VT _.RC:$src2),
1969                                   timm:$cc)>,
1970                     EVEX, VVVV, VEX_LIG, EVEX_B, Sched<[sched]>;
1971
1972  let isCodeGenOnly = 1 in {
1973    let isCommutable = 1 in
1974    def rr : AVX512Ii8<0xC2, MRMSrcReg,
1975                (outs _.KRC:$dst), (ins _.FRC:$src1, _.FRC:$src2, u8imm:$cc),
1976                !strconcat("vcmp", _.Suffix,
1977                           "\t{$cc, $src2, $src1, $dst|$dst, $src1, $src2, $cc}"),
1978                [(set _.KRC:$dst, (OpNode _.FRC:$src1,
1979                                          _.FRC:$src2,
1980                                          timm:$cc))]>,
1981                EVEX, VVVV, VEX_LIG, Sched<[sched]>, SIMD_EXC;
1982    def rm : AVX512Ii8<0xC2, MRMSrcMem,
1983              (outs _.KRC:$dst),
1984              (ins _.FRC:$src1, _.ScalarMemOp:$src2, u8imm:$cc),
1985              !strconcat("vcmp", _.Suffix,
1986                         "\t{$cc, $src2, $src1, $dst|$dst, $src1, $src2, $cc}"),
1987              [(set _.KRC:$dst, (OpNode _.FRC:$src1,
1988                                        (_.ScalarLdFrag addr:$src2),
1989                                        timm:$cc))]>,
1990              EVEX, VVVV, VEX_LIG, EVEX_CD8<_.EltSize, CD8VT1>,
1991              Sched<[sched.Folded, sched.ReadAfterFold]>, SIMD_EXC;
1992  }
1993}
1994
1995let Predicates = [HasAVX512] in {
1996  let ExeDomain = SSEPackedSingle in
1997  defm VCMPSSZ : avx512_cmp_scalar<f32x_info, X86cmpms, X86cmpmsSAE,
1998                                   X86cmpms_su, X86cmpmsSAE_su,
1999                                   SchedWriteFCmp.Scl>, AVX512XSIi8Base;
2000  let ExeDomain = SSEPackedDouble in
2001  defm VCMPSDZ : avx512_cmp_scalar<f64x_info, X86cmpms, X86cmpmsSAE,
2002                                   X86cmpms_su, X86cmpmsSAE_su,
2003                                   SchedWriteFCmp.Scl>, AVX512XDIi8Base, REX_W;
2004}
2005let Predicates = [HasFP16], ExeDomain = SSEPackedSingle in
2006  defm VCMPSHZ : avx512_cmp_scalar<f16x_info, X86cmpms, X86cmpmsSAE,
2007                                   X86cmpms_su, X86cmpmsSAE_su,
2008                                   SchedWriteFCmp.Scl>, AVX512XSIi8Base, TA;
2009
2010multiclass avx512_icmp_packed<bits<8> opc, string OpcodeStr,
2011                              X86FoldableSchedWrite sched,
2012                              X86VectorVTInfo _, bit IsCommutable> {
2013  let isCommutable = IsCommutable, hasSideEffects = 0 in
2014  def rr : AVX512BI<opc, MRMSrcReg,
2015             (outs _.KRC:$dst), (ins _.RC:$src1, _.RC:$src2),
2016             !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
2017             []>, EVEX, VVVV, Sched<[sched]>;
2018  let mayLoad = 1, hasSideEffects = 0 in
2019  def rm : AVX512BI<opc, MRMSrcMem,
2020             (outs _.KRC:$dst), (ins _.RC:$src1, _.MemOp:$src2),
2021             !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
2022             []>, EVEX, VVVV, Sched<[sched.Folded, sched.ReadAfterFold]>;
2023  let isCommutable = IsCommutable, hasSideEffects = 0 in
2024  def rrk : AVX512BI<opc, MRMSrcReg,
2025              (outs _.KRC:$dst), (ins _.KRCWM:$mask, _.RC:$src1, _.RC:$src2),
2026              !strconcat(OpcodeStr, "\t{$src2, $src1, $dst {${mask}}|",
2027                          "$dst {${mask}}, $src1, $src2}"),
2028              []>, EVEX, VVVV, EVEX_K, Sched<[sched]>;
2029  let mayLoad = 1, hasSideEffects = 0 in
2030  def rmk : AVX512BI<opc, MRMSrcMem,
2031              (outs _.KRC:$dst), (ins _.KRCWM:$mask, _.RC:$src1, _.MemOp:$src2),
2032              !strconcat(OpcodeStr, "\t{$src2, $src1, $dst {${mask}}|",
2033                          "$dst {${mask}}, $src1, $src2}"),
2034              []>, EVEX, VVVV, EVEX_K, Sched<[sched.Folded, sched.ReadAfterFold]>;
2035}
2036
2037multiclass avx512_icmp_packed_rmb<bits<8> opc, string OpcodeStr,
2038                                  X86FoldableSchedWrite sched, X86VectorVTInfo _,
2039                                  bit IsCommutable> :
2040           avx512_icmp_packed<opc, OpcodeStr, sched, _, IsCommutable> {
2041  let mayLoad = 1, hasSideEffects = 0 in {
2042  def rmb : AVX512BI<opc, MRMSrcMem,
2043              (outs _.KRC:$dst), (ins _.RC:$src1, _.ScalarMemOp:$src2),
2044              !strconcat(OpcodeStr, "\t{${src2}", _.BroadcastStr, ", $src1, $dst",
2045                                    "|$dst, $src1, ${src2}", _.BroadcastStr, "}"),
2046              []>, EVEX, VVVV, EVEX_B, Sched<[sched.Folded, sched.ReadAfterFold]>;
2047  def rmbk : AVX512BI<opc, MRMSrcMem,
2048               (outs _.KRC:$dst), (ins _.KRCWM:$mask, _.RC:$src1,
2049                                       _.ScalarMemOp:$src2),
2050               !strconcat(OpcodeStr,
2051                          "\t{${src2}", _.BroadcastStr, ", $src1, $dst {${mask}}|",
2052                          "$dst {${mask}}, $src1, ${src2}", _.BroadcastStr, "}"),
2053               []>, EVEX, VVVV, EVEX_K, EVEX_B,
2054               Sched<[sched.Folded, sched.ReadAfterFold]>;
2055  }
2056}
2057
2058multiclass avx512_icmp_packed_vl<bits<8> opc, string OpcodeStr,
2059                                 X86SchedWriteWidths sched,
2060                                 AVX512VLVectorVTInfo VTInfo, Predicate prd,
2061                                 bit IsCommutable = 0> {
2062  let Predicates = [prd] in
2063  defm Z : avx512_icmp_packed<opc, OpcodeStr, sched.ZMM,
2064                              VTInfo.info512, IsCommutable>, EVEX_V512;
2065
2066  let Predicates = [prd, HasVLX] in {
2067    defm Z256 : avx512_icmp_packed<opc, OpcodeStr, sched.YMM,
2068                                   VTInfo.info256, IsCommutable>, EVEX_V256;
2069    defm Z128 : avx512_icmp_packed<opc, OpcodeStr, sched.XMM,
2070                                   VTInfo.info128, IsCommutable>, EVEX_V128;
2071  }
2072}
2073
2074multiclass avx512_icmp_packed_rmb_vl<bits<8> opc, string OpcodeStr,
2075                                     X86SchedWriteWidths sched,
2076                                     AVX512VLVectorVTInfo VTInfo,
2077                                     Predicate prd, bit IsCommutable = 0> {
2078  let Predicates = [prd] in
2079  defm Z : avx512_icmp_packed_rmb<opc, OpcodeStr, sched.ZMM,
2080                                  VTInfo.info512, IsCommutable>, EVEX_V512;
2081
2082  let Predicates = [prd, HasVLX] in {
2083    defm Z256 : avx512_icmp_packed_rmb<opc, OpcodeStr, sched.YMM,
2084                                       VTInfo.info256, IsCommutable>, EVEX_V256;
2085    defm Z128 : avx512_icmp_packed_rmb<opc, OpcodeStr, sched.XMM,
2086                                       VTInfo.info128, IsCommutable>, EVEX_V128;
2087  }
2088}
2089
2090// AddedComplexity is needed because the explicit SETEQ/SETGT CondCode doesn't
2091// increase the pattern complexity the way an immediate would.
2092let AddedComplexity = 2 in {
2093// FIXME: Is there a better scheduler class for VPCMP?
2094defm VPCMPEQB : avx512_icmp_packed_vl<0x74, "vpcmpeqb",
2095                      SchedWriteVecALU, avx512vl_i8_info, HasBWI, 1>,
2096                EVEX_CD8<8, CD8VF>, WIG;
2097
2098defm VPCMPEQW : avx512_icmp_packed_vl<0x75, "vpcmpeqw",
2099                      SchedWriteVecALU, avx512vl_i16_info, HasBWI, 1>,
2100                EVEX_CD8<16, CD8VF>, WIG;
2101
2102defm VPCMPEQD : avx512_icmp_packed_rmb_vl<0x76, "vpcmpeqd",
2103                      SchedWriteVecALU, avx512vl_i32_info, HasAVX512, 1>,
2104                EVEX_CD8<32, CD8VF>;
2105
2106defm VPCMPEQQ : avx512_icmp_packed_rmb_vl<0x29, "vpcmpeqq",
2107                      SchedWriteVecALU, avx512vl_i64_info, HasAVX512, 1>,
2108                T8, REX_W, EVEX_CD8<64, CD8VF>;
2109
2110defm VPCMPGTB : avx512_icmp_packed_vl<0x64, "vpcmpgtb",
2111                      SchedWriteVecALU, avx512vl_i8_info, HasBWI>,
2112                EVEX_CD8<8, CD8VF>, WIG;
2113
2114defm VPCMPGTW : avx512_icmp_packed_vl<0x65, "vpcmpgtw",
2115                      SchedWriteVecALU, avx512vl_i16_info, HasBWI>,
2116                EVEX_CD8<16, CD8VF>, WIG;
2117
2118defm VPCMPGTD : avx512_icmp_packed_rmb_vl<0x66, "vpcmpgtd",
2119                      SchedWriteVecALU, avx512vl_i32_info, HasAVX512>,
2120                EVEX_CD8<32, CD8VF>;
2121
2122defm VPCMPGTQ : avx512_icmp_packed_rmb_vl<0x37, "vpcmpgtq",
2123                      SchedWriteVecALU, avx512vl_i64_info, HasAVX512>,
2124                T8, REX_W, EVEX_CD8<64, CD8VF>;
2125}
2126
2127multiclass avx512_icmp_cc<bits<8> opc, string Suffix, PatFrag Frag,
2128                          PatFrag Frag_su,
2129                          X86FoldableSchedWrite sched,
2130                          X86VectorVTInfo _, string Name> {
2131  let isCommutable = 1 in
2132  def rri : AVX512AIi8<opc, MRMSrcReg,
2133             (outs _.KRC:$dst), (ins _.RC:$src1, _.RC:$src2, u8imm:$cc),
2134             !strconcat("vpcmp", Suffix,
2135                        "\t{$cc, $src2, $src1, $dst|$dst, $src1, $src2, $cc}"),
2136             [(set _.KRC:$dst, (_.KVT (Frag:$cc (_.VT _.RC:$src1),
2137                                                (_.VT _.RC:$src2),
2138                                                cond)))]>,
2139             EVEX, VVVV, Sched<[sched]>;
2140  def rmi : AVX512AIi8<opc, MRMSrcMem,
2141             (outs _.KRC:$dst), (ins _.RC:$src1, _.MemOp:$src2, u8imm:$cc),
2142             !strconcat("vpcmp", Suffix,
2143                        "\t{$cc, $src2, $src1, $dst|$dst, $src1, $src2, $cc}"),
2144             [(set _.KRC:$dst, (_.KVT
2145                                (Frag:$cc
2146                                 (_.VT _.RC:$src1),
2147                                 (_.VT (_.LdFrag addr:$src2)),
2148                                 cond)))]>,
2149             EVEX, VVVV, Sched<[sched.Folded, sched.ReadAfterFold]>;
2150  let isCommutable = 1 in
2151  def rrik : AVX512AIi8<opc, MRMSrcReg,
2152              (outs _.KRC:$dst), (ins _.KRCWM:$mask, _.RC:$src1, _.RC:$src2,
2153                                      u8imm:$cc),
2154              !strconcat("vpcmp", Suffix,
2155                         "\t{$cc, $src2, $src1, $dst {${mask}}|",
2156                         "$dst {${mask}}, $src1, $src2, $cc}"),
2157              [(set _.KRC:$dst, (and _.KRCWM:$mask,
2158                                     (_.KVT (Frag_su:$cc (_.VT _.RC:$src1),
2159                                                         (_.VT _.RC:$src2),
2160                                                         cond))))]>,
2161              EVEX, VVVV, EVEX_K, Sched<[sched]>;
2162  def rmik : AVX512AIi8<opc, MRMSrcMem,
2163              (outs _.KRC:$dst), (ins _.KRCWM:$mask, _.RC:$src1, _.MemOp:$src2,
2164                                    u8imm:$cc),
2165              !strconcat("vpcmp", Suffix,
2166                         "\t{$cc, $src2, $src1, $dst {${mask}}|",
2167                         "$dst {${mask}}, $src1, $src2, $cc}"),
2168              [(set _.KRC:$dst, (and _.KRCWM:$mask,
2169                                     (_.KVT
2170                                      (Frag_su:$cc
2171                                       (_.VT _.RC:$src1),
2172                                       (_.VT (_.LdFrag addr:$src2)),
2173                                       cond))))]>,
2174              EVEX, VVVV, EVEX_K, Sched<[sched.Folded, sched.ReadAfterFold]>;
2175
2176  def : Pat<(_.KVT (Frag:$cc (_.LdFrag addr:$src2),
2177                             (_.VT _.RC:$src1), cond)),
2178            (!cast<Instruction>(Name#_.ZSuffix#"rmi")
2179             _.RC:$src1, addr:$src2, (X86pcmpm_imm_commute $cc))>;
2180
2181  def : Pat<(and _.KRCWM:$mask,
2182                 (_.KVT (Frag_su:$cc (_.LdFrag addr:$src2),
2183                                     (_.VT _.RC:$src1), cond))),
2184            (!cast<Instruction>(Name#_.ZSuffix#"rmik")
2185             _.KRCWM:$mask, _.RC:$src1, addr:$src2,
2186             (X86pcmpm_imm_commute $cc))>;
2187}
2188
2189multiclass avx512_icmp_cc_rmb<bits<8> opc, string Suffix, PatFrag Frag,
2190                              PatFrag Frag_su, X86FoldableSchedWrite sched,
2191                              X86VectorVTInfo _, string Name> :
2192           avx512_icmp_cc<opc, Suffix, Frag, Frag_su, sched, _, Name> {
2193  def rmib : AVX512AIi8<opc, MRMSrcMem,
2194             (outs _.KRC:$dst), (ins _.RC:$src1, _.ScalarMemOp:$src2,
2195                                     u8imm:$cc),
2196             !strconcat("vpcmp", Suffix,
2197                        "\t{$cc, ${src2}", _.BroadcastStr, ", $src1, $dst|",
2198                        "$dst, $src1, ${src2}", _.BroadcastStr, ", $cc}"),
2199             [(set _.KRC:$dst, (_.KVT (Frag:$cc
2200                                       (_.VT _.RC:$src1),
2201                                       (_.BroadcastLdFrag addr:$src2),
2202                                       cond)))]>,
2203             EVEX, VVVV, EVEX_B, Sched<[sched.Folded, sched.ReadAfterFold]>;
2204  def rmibk : AVX512AIi8<opc, MRMSrcMem,
2205              (outs _.KRC:$dst), (ins _.KRCWM:$mask, _.RC:$src1,
2206                                       _.ScalarMemOp:$src2, u8imm:$cc),
2207              !strconcat("vpcmp", Suffix,
2208                  "\t{$cc, ${src2}", _.BroadcastStr, ", $src1, $dst {${mask}}|",
2209                  "$dst {${mask}}, $src1, ${src2}", _.BroadcastStr, ", $cc}"),
2210              [(set _.KRC:$dst, (and _.KRCWM:$mask,
2211                                     (_.KVT (Frag_su:$cc
2212                                             (_.VT _.RC:$src1),
2213                                             (_.BroadcastLdFrag addr:$src2),
2214                                             cond))))]>,
2215              EVEX, VVVV, EVEX_K, EVEX_B, Sched<[sched.Folded, sched.ReadAfterFold]>;
2216
2217  def : Pat<(_.KVT (Frag:$cc (_.BroadcastLdFrag addr:$src2),
2218                    (_.VT _.RC:$src1), cond)),
2219            (!cast<Instruction>(Name#_.ZSuffix#"rmib")
2220             _.RC:$src1, addr:$src2, (X86pcmpm_imm_commute $cc))>;
2221
2222  def : Pat<(and _.KRCWM:$mask,
2223                 (_.KVT (Frag_su:$cc (_.BroadcastLdFrag addr:$src2),
2224                                     (_.VT _.RC:$src1), cond))),
2225            (!cast<Instruction>(Name#_.ZSuffix#"rmibk")
2226             _.KRCWM:$mask, _.RC:$src1, addr:$src2,
2227             (X86pcmpm_imm_commute $cc))>;
2228}
2229
2230multiclass avx512_icmp_cc_vl<bits<8> opc, string Suffix, PatFrag Frag,
2231                             PatFrag Frag_su, X86SchedWriteWidths sched,
2232                             AVX512VLVectorVTInfo VTInfo, Predicate prd> {
2233  let Predicates = [prd] in
2234  defm Z : avx512_icmp_cc<opc, Suffix, Frag, Frag_su,
2235                          sched.ZMM, VTInfo.info512, NAME>, EVEX_V512;
2236
2237  let Predicates = [prd, HasVLX] in {
2238    defm Z256 : avx512_icmp_cc<opc, Suffix, Frag, Frag_su,
2239                               sched.YMM, VTInfo.info256, NAME>, EVEX_V256;
2240    defm Z128 : avx512_icmp_cc<opc, Suffix, Frag, Frag_su,
2241                               sched.XMM, VTInfo.info128, NAME>, EVEX_V128;
2242  }
2243}
2244
2245multiclass avx512_icmp_cc_rmb_vl<bits<8> opc, string Suffix, PatFrag Frag,
2246                                 PatFrag Frag_su, X86SchedWriteWidths sched,
2247                                 AVX512VLVectorVTInfo VTInfo, Predicate prd> {
2248  let Predicates = [prd] in
2249  defm Z : avx512_icmp_cc_rmb<opc, Suffix, Frag, Frag_su,
2250                              sched.ZMM, VTInfo.info512, NAME>, EVEX_V512;
2251
2252  let Predicates = [prd, HasVLX] in {
2253    defm Z256 : avx512_icmp_cc_rmb<opc, Suffix, Frag, Frag_su,
2254                                   sched.YMM, VTInfo.info256, NAME>, EVEX_V256;
2255    defm Z128 : avx512_icmp_cc_rmb<opc, Suffix, Frag, Frag_su,
2256                                   sched.XMM, VTInfo.info128, NAME>, EVEX_V128;
2257  }
2258}
2259
2260// FIXME: Is there a better scheduler class for VPCMP/VPCMPU?
2261defm VPCMPB : avx512_icmp_cc_vl<0x3F, "b", X86pcmpm, X86pcmpm_su,
2262                                SchedWriteVecALU, avx512vl_i8_info, HasBWI>,
2263                                EVEX_CD8<8, CD8VF>;
2264defm VPCMPUB : avx512_icmp_cc_vl<0x3E, "ub", X86pcmpum, X86pcmpum_su,
2265                                 SchedWriteVecALU, avx512vl_i8_info, HasBWI>,
2266                                 EVEX_CD8<8, CD8VF>;
2267
2268defm VPCMPW : avx512_icmp_cc_vl<0x3F, "w", X86pcmpm, X86pcmpm_su,
2269                                SchedWriteVecALU, avx512vl_i16_info, HasBWI>,
2270                                REX_W, EVEX_CD8<16, CD8VF>;
2271defm VPCMPUW : avx512_icmp_cc_vl<0x3E, "uw", X86pcmpum, X86pcmpum_su,
2272                                 SchedWriteVecALU, avx512vl_i16_info, HasBWI>,
2273                                 REX_W, EVEX_CD8<16, CD8VF>;
2274
2275defm VPCMPD : avx512_icmp_cc_rmb_vl<0x1F, "d", X86pcmpm, X86pcmpm_su,
2276                                    SchedWriteVecALU, avx512vl_i32_info,
2277                                    HasAVX512>, EVEX_CD8<32, CD8VF>;
2278defm VPCMPUD : avx512_icmp_cc_rmb_vl<0x1E, "ud", X86pcmpum, X86pcmpum_su,
2279                                     SchedWriteVecALU, avx512vl_i32_info,
2280                                     HasAVX512>, EVEX_CD8<32, CD8VF>;
2281
2282defm VPCMPQ : avx512_icmp_cc_rmb_vl<0x1F, "q", X86pcmpm, X86pcmpm_su,
2283                                    SchedWriteVecALU, avx512vl_i64_info,
2284                                    HasAVX512>, REX_W, EVEX_CD8<64, CD8VF>;
2285defm VPCMPUQ : avx512_icmp_cc_rmb_vl<0x1E, "uq", X86pcmpum, X86pcmpum_su,
2286                                     SchedWriteVecALU, avx512vl_i64_info,
2287                                     HasAVX512>, REX_W, EVEX_CD8<64, CD8VF>;
2288
2289multiclass avx512_vcmp_common<X86FoldableSchedWrite sched, X86VectorVTInfo _,
2290                              string Name> {
2291let Uses = [MXCSR], mayRaiseFPException = 1 in {
2292  defm  rri  : AVX512_maskable_cmp<0xC2, MRMSrcReg, _,
2293                   (outs _.KRC:$dst), (ins _.RC:$src1, _.RC:$src2,u8imm:$cc),
2294                   "vcmp"#_.Suffix,
2295                   "$cc, $src2, $src1", "$src1, $src2, $cc",
2296                   (X86any_cmpm (_.VT _.RC:$src1), (_.VT _.RC:$src2), timm:$cc),
2297                   (X86cmpm_su (_.VT _.RC:$src1), (_.VT _.RC:$src2), timm:$cc),
2298                   1>, Sched<[sched]>;
2299
2300  defm  rmi  : AVX512_maskable_cmp<0xC2, MRMSrcMem, _,
2301                (outs _.KRC:$dst),(ins _.RC:$src1, _.MemOp:$src2, u8imm:$cc),
2302                "vcmp"#_.Suffix,
2303                "$cc, $src2, $src1", "$src1, $src2, $cc",
2304                (X86any_cmpm (_.VT _.RC:$src1), (_.VT (_.LdFrag addr:$src2)),
2305                             timm:$cc),
2306                (X86cmpm_su (_.VT _.RC:$src1), (_.VT (_.LdFrag addr:$src2)),
2307                            timm:$cc)>,
2308                Sched<[sched.Folded, sched.ReadAfterFold]>;
2309
2310  defm  rmbi : AVX512_maskable_cmp<0xC2, MRMSrcMem, _,
2311                (outs _.KRC:$dst),
2312                (ins _.RC:$src1, _.ScalarMemOp:$src2, u8imm:$cc),
2313                "vcmp"#_.Suffix,
2314                "$cc, ${src2}"#_.BroadcastStr#", $src1",
2315                "$src1, ${src2}"#_.BroadcastStr#", $cc",
2316                (X86any_cmpm (_.VT _.RC:$src1),
2317                             (_.VT (_.BroadcastLdFrag addr:$src2)),
2318                             timm:$cc),
2319                (X86cmpm_su (_.VT _.RC:$src1),
2320                            (_.VT (_.BroadcastLdFrag addr:$src2)),
2321                            timm:$cc)>,
2322                EVEX_B, Sched<[sched.Folded, sched.ReadAfterFold]>;
2323  }
2324
2325  // Patterns for selecting with loads in other operand.
2326  def : Pat<(X86any_cmpm (_.LdFrag addr:$src2), (_.VT _.RC:$src1),
2327                         timm:$cc),
2328            (!cast<Instruction>(Name#_.ZSuffix#"rmi") _.RC:$src1, addr:$src2,
2329                                                      (X86cmpm_imm_commute timm:$cc))>;
2330
2331  def : Pat<(and _.KRCWM:$mask, (X86cmpm_su (_.LdFrag addr:$src2),
2332                                            (_.VT _.RC:$src1),
2333                                            timm:$cc)),
2334            (!cast<Instruction>(Name#_.ZSuffix#"rmik") _.KRCWM:$mask,
2335                                                       _.RC:$src1, addr:$src2,
2336                                                       (X86cmpm_imm_commute timm:$cc))>;
2337
2338  def : Pat<(X86any_cmpm (_.BroadcastLdFrag addr:$src2),
2339                         (_.VT _.RC:$src1), timm:$cc),
2340            (!cast<Instruction>(Name#_.ZSuffix#"rmbi") _.RC:$src1, addr:$src2,
2341                                                       (X86cmpm_imm_commute timm:$cc))>;
2342
2343  def : Pat<(and _.KRCWM:$mask, (X86cmpm_su (_.BroadcastLdFrag addr:$src2),
2344                                            (_.VT _.RC:$src1),
2345                                            timm:$cc)),
2346            (!cast<Instruction>(Name#_.ZSuffix#"rmbik") _.KRCWM:$mask,
2347                                                        _.RC:$src1, addr:$src2,
2348                                                        (X86cmpm_imm_commute timm:$cc))>;
2349
2350  // Patterns for mask intrinsics.
2351  def : Pat<(X86cmpmm (_.VT _.RC:$src1), (_.VT _.RC:$src2), timm:$cc,
2352                      (_.KVT immAllOnesV)),
2353            (!cast<Instruction>(Name#_.ZSuffix#"rri") _.RC:$src1, _.RC:$src2, timm:$cc)>;
2354
2355  def : Pat<(X86cmpmm (_.VT _.RC:$src1), (_.VT _.RC:$src2), timm:$cc, _.KRCWM:$mask),
2356            (!cast<Instruction>(Name#_.ZSuffix#"rrik") _.KRCWM:$mask, _.RC:$src1,
2357                                                       _.RC:$src2, timm:$cc)>;
2358
2359  def : Pat<(X86cmpmm (_.VT _.RC:$src1), (_.VT (_.LdFrag addr:$src2)), timm:$cc,
2360                      (_.KVT immAllOnesV)),
2361            (!cast<Instruction>(Name#_.ZSuffix#"rmi") _.RC:$src1, addr:$src2, timm:$cc)>;
2362
2363  def : Pat<(X86cmpmm (_.VT _.RC:$src1), (_.VT (_.LdFrag addr:$src2)), timm:$cc,
2364                      _.KRCWM:$mask),
2365            (!cast<Instruction>(Name#_.ZSuffix#"rmik") _.KRCWM:$mask, _.RC:$src1,
2366                                                       addr:$src2, timm:$cc)>;
2367
2368  def : Pat<(X86cmpmm (_.VT _.RC:$src1), (_.VT (_.BroadcastLdFrag addr:$src2)), timm:$cc,
2369                      (_.KVT immAllOnesV)),
2370            (!cast<Instruction>(Name#_.ZSuffix#"rmbi") _.RC:$src1, addr:$src2, timm:$cc)>;
2371
2372  def : Pat<(X86cmpmm (_.VT _.RC:$src1), (_.VT (_.BroadcastLdFrag addr:$src2)), timm:$cc,
2373                      _.KRCWM:$mask),
2374            (!cast<Instruction>(Name#_.ZSuffix#"rmbik") _.KRCWM:$mask, _.RC:$src1,
2375                                                        addr:$src2, timm:$cc)>;
2376
2377  // Patterns for mask intrinsics with loads in other operand.
2378  def : Pat<(X86cmpmm (_.VT (_.LdFrag addr:$src2)), (_.VT _.RC:$src1), timm:$cc,
2379                      (_.KVT immAllOnesV)),
2380            (!cast<Instruction>(Name#_.ZSuffix#"rmi") _.RC:$src1, addr:$src2,
2381                                                      (X86cmpm_imm_commute timm:$cc))>;
2382
2383  def : Pat<(X86cmpmm (_.VT (_.LdFrag addr:$src2)), (_.VT _.RC:$src1), timm:$cc,
2384                      _.KRCWM:$mask),
2385            (!cast<Instruction>(Name#_.ZSuffix#"rmik") _.KRCWM:$mask,
2386                                                       _.RC:$src1, addr:$src2,
2387                                                       (X86cmpm_imm_commute timm:$cc))>;
2388
2389  def : Pat<(X86cmpmm (_.VT (_.BroadcastLdFrag addr:$src2)), (_.VT _.RC:$src1), timm:$cc,
2390                      (_.KVT immAllOnesV)),
2391            (!cast<Instruction>(Name#_.ZSuffix#"rmbi") _.RC:$src1, addr:$src2,
2392                                                       (X86cmpm_imm_commute timm:$cc))>;
2393
2394  def : Pat<(X86cmpmm (_.VT (_.BroadcastLdFrag addr:$src2)), (_.VT _.RC:$src1), timm:$cc,
2395                      _.KRCWM:$mask),
2396            (!cast<Instruction>(Name#_.ZSuffix#"rmbik") _.KRCWM:$mask,
2397                                                        _.RC:$src1, addr:$src2,
2398                                                        (X86cmpm_imm_commute  timm:$cc))>;
2399}
2400
2401multiclass avx512_vcmp_sae<X86FoldableSchedWrite sched, X86VectorVTInfo _> {
2402  // comparison code form (VCMP[EQ/LT/LE/...]
2403  let Uses = [MXCSR] in
2404  defm  rrib  : AVX512_maskable_custom_cmp<0xC2, MRMSrcReg, (outs _.KRC:$dst),
2405                     (ins _.RC:$src1, _.RC:$src2, u8imm:$cc),
2406                     (ins _.KRCWM:$mask, _.RC:$src1, _.RC:$src2, u8imm:$cc),
2407                     "vcmp"#_.Suffix,
2408                     "$cc, {sae}, $src2, $src1",
2409                     "$src1, $src2, {sae}, $cc",
2410                     [(set _.KRC:$dst, (X86cmpmmSAE (_.VT _.RC:$src1),
2411                                        (_.VT _.RC:$src2), timm:$cc, (_.KVT immAllOnesV)))],
2412                     [(set _.KRC:$dst, (X86cmpmmSAE (_.VT _.RC:$src1),
2413                                        (_.VT _.RC:$src2), timm:$cc, _.KRCWM:$mask))]>,
2414                     EVEX_B, Sched<[sched]>;
2415}
2416
2417multiclass avx512_vcmp<X86SchedWriteWidths sched, AVX512VLVectorVTInfo _,
2418                       Predicate Pred = HasAVX512> {
2419  let Predicates = [Pred] in {
2420    defm Z    : avx512_vcmp_common<sched.ZMM, _.info512, NAME>,
2421                avx512_vcmp_sae<sched.ZMM, _.info512>, EVEX_V512;
2422
2423  }
2424  let Predicates = [Pred,HasVLX] in {
2425   defm Z128 : avx512_vcmp_common<sched.XMM, _.info128, NAME>, EVEX_V128;
2426   defm Z256 : avx512_vcmp_common<sched.YMM, _.info256, NAME>, EVEX_V256;
2427  }
2428}
2429
2430defm VCMPPD : avx512_vcmp<SchedWriteFCmp, avx512vl_f64_info>,
2431                          AVX512PDIi8Base, EVEX, VVVV, EVEX_CD8<64, CD8VF>, REX_W;
2432defm VCMPPS : avx512_vcmp<SchedWriteFCmp, avx512vl_f32_info>,
2433                          AVX512PSIi8Base, EVEX, VVVV, EVEX_CD8<32, CD8VF>;
2434defm VCMPPH : avx512_vcmp<SchedWriteFCmp, avx512vl_f16_info, HasFP16>,
2435                          AVX512PSIi8Base, EVEX, VVVV, EVEX_CD8<16, CD8VF>, TA;
2436
2437// Patterns to select fp compares with load as first operand.
2438let Predicates = [HasAVX512] in {
2439  def : Pat<(v1i1 (X86cmpms (loadf64 addr:$src2), FR64X:$src1, timm:$cc)),
2440            (VCMPSDZrm FR64X:$src1, addr:$src2, (X86cmpm_imm_commute timm:$cc))>;
2441
2442  def : Pat<(v1i1 (X86cmpms (loadf32 addr:$src2), FR32X:$src1, timm:$cc)),
2443            (VCMPSSZrm FR32X:$src1, addr:$src2, (X86cmpm_imm_commute timm:$cc))>;
2444}
2445
2446let Predicates = [HasFP16] in {
2447  def : Pat<(v1i1 (X86cmpms (loadf16 addr:$src2), FR16X:$src1, timm:$cc)),
2448            (VCMPSHZrm FR16X:$src1, addr:$src2, (X86cmpm_imm_commute timm:$cc))>;
2449}
2450
2451// ----------------------------------------------------------------
2452// FPClass
2453
2454//handle fpclass instruction  mask =  op(reg_scalar,imm)
2455//                                    op(mem_scalar,imm)
2456multiclass avx512_scalar_fpclass<bits<8> opc, string OpcodeStr,
2457                                 X86FoldableSchedWrite sched, X86VectorVTInfo _,
2458                                 Predicate prd> {
2459  let Predicates = [prd], ExeDomain = _.ExeDomain, Uses = [MXCSR] in {
2460      def rr : AVX512<opc, MRMSrcReg, (outs _.KRC:$dst),
2461                      (ins _.RC:$src1, i32u8imm:$src2),
2462                      OpcodeStr#_.Suffix#"\t{$src2, $src1, $dst|$dst, $src1, $src2}",
2463                      [(set _.KRC:$dst,(X86Vfpclasss (_.VT _.RC:$src1),
2464                              (i32 timm:$src2)))]>,
2465                      Sched<[sched]>;
2466      def rrk : AVX512<opc, MRMSrcReg, (outs _.KRC:$dst),
2467                      (ins _.KRCWM:$mask, _.RC:$src1, i32u8imm:$src2),
2468                      OpcodeStr#_.Suffix#
2469                      "\t{$src2, $src1, $dst {${mask}}|$dst {${mask}}, $src1, $src2}",
2470                      [(set _.KRC:$dst,(and _.KRCWM:$mask,
2471                                      (X86Vfpclasss_su (_.VT _.RC:$src1),
2472                                      (i32 timm:$src2))))]>,
2473                      EVEX_K, Sched<[sched]>;
2474    def rm : AVX512<opc, MRMSrcMem, (outs _.KRC:$dst),
2475                    (ins _.IntScalarMemOp:$src1, i32u8imm:$src2),
2476                    OpcodeStr#_.Suffix#
2477                              "\t{$src2, $src1, $dst|$dst, $src1, $src2}",
2478                    [(set _.KRC:$dst,
2479                          (X86Vfpclasss (_.ScalarIntMemFrags addr:$src1),
2480                                        (i32 timm:$src2)))]>,
2481                    Sched<[sched.Folded, sched.ReadAfterFold]>;
2482    def rmk : AVX512<opc, MRMSrcMem, (outs _.KRC:$dst),
2483                    (ins _.KRCWM:$mask, _.IntScalarMemOp:$src1, i32u8imm:$src2),
2484                    OpcodeStr#_.Suffix#
2485                    "\t{$src2, $src1, $dst {${mask}}|$dst {${mask}}, $src1, $src2}",
2486                    [(set _.KRC:$dst,(and _.KRCWM:$mask,
2487                        (X86Vfpclasss_su (_.ScalarIntMemFrags addr:$src1),
2488                            (i32 timm:$src2))))]>,
2489                    EVEX_K, Sched<[sched.Folded, sched.ReadAfterFold]>;
2490  }
2491}
2492
2493//handle fpclass instruction mask = fpclass(reg_vec, reg_vec, imm)
2494//                                  fpclass(reg_vec, mem_vec, imm)
2495//                                  fpclass(reg_vec, broadcast(eltVt), imm)
2496multiclass avx512_vector_fpclass<bits<8> opc, string OpcodeStr,
2497                                 X86FoldableSchedWrite sched, X86VectorVTInfo _,
2498                                 string mem>{
2499  let ExeDomain = _.ExeDomain, Uses = [MXCSR] in {
2500  def rr : AVX512<opc, MRMSrcReg, (outs _.KRC:$dst),
2501                      (ins _.RC:$src1, i32u8imm:$src2),
2502                      OpcodeStr#_.Suffix#"\t{$src2, $src1, $dst|$dst, $src1, $src2}",
2503                      [(set _.KRC:$dst,(X86Vfpclass (_.VT _.RC:$src1),
2504                                       (i32 timm:$src2)))]>,
2505                      Sched<[sched]>;
2506  def rrk : AVX512<opc, MRMSrcReg, (outs _.KRC:$dst),
2507                      (ins _.KRCWM:$mask, _.RC:$src1, i32u8imm:$src2),
2508                      OpcodeStr#_.Suffix#
2509                      "\t{$src2, $src1, $dst {${mask}}|$dst {${mask}}, $src1, $src2}",
2510                      [(set _.KRC:$dst,(and _.KRCWM:$mask,
2511                                       (X86Vfpclass_su (_.VT _.RC:$src1),
2512                                       (i32 timm:$src2))))]>,
2513                      EVEX_K, Sched<[sched]>;
2514  def rm : AVX512<opc, MRMSrcMem, (outs _.KRC:$dst),
2515                    (ins _.MemOp:$src1, i32u8imm:$src2),
2516                    OpcodeStr#_.Suffix#"{"#mem#"}"#
2517                    "\t{$src2, $src1, $dst|$dst, $src1, $src2}",
2518                    [(set _.KRC:$dst,(X86Vfpclass
2519                                     (_.VT (_.LdFrag addr:$src1)),
2520                                     (i32 timm:$src2)))]>,
2521                    Sched<[sched.Folded, sched.ReadAfterFold]>;
2522  def rmk : AVX512<opc, MRMSrcMem, (outs _.KRC:$dst),
2523                    (ins _.KRCWM:$mask, _.MemOp:$src1, i32u8imm:$src2),
2524                    OpcodeStr#_.Suffix#"{"#mem#"}"#
2525                    "\t{$src2, $src1, $dst {${mask}}|$dst {${mask}}, $src1, $src2}",
2526                    [(set _.KRC:$dst, (and _.KRCWM:$mask, (X86Vfpclass_su
2527                                  (_.VT (_.LdFrag addr:$src1)),
2528                                  (i32 timm:$src2))))]>,
2529                    EVEX_K, Sched<[sched.Folded, sched.ReadAfterFold]>;
2530  def rmb : AVX512<opc, MRMSrcMem, (outs _.KRC:$dst),
2531                    (ins _.ScalarMemOp:$src1, i32u8imm:$src2),
2532                    OpcodeStr#_.Suffix#"\t{$src2, ${src1}"#
2533                                      _.BroadcastStr#", $dst|$dst, ${src1}"
2534                                                  #_.BroadcastStr#", $src2}",
2535                    [(set _.KRC:$dst,(X86Vfpclass
2536                                     (_.VT (_.BroadcastLdFrag addr:$src1)),
2537                                     (i32 timm:$src2)))]>,
2538                    EVEX_B, Sched<[sched.Folded, sched.ReadAfterFold]>;
2539  def rmbk : AVX512<opc, MRMSrcMem, (outs _.KRC:$dst),
2540                    (ins _.KRCWM:$mask, _.ScalarMemOp:$src1, i32u8imm:$src2),
2541                    OpcodeStr#_.Suffix#"\t{$src2, ${src1}"#
2542                          _.BroadcastStr#", $dst {${mask}}|$dst {${mask}}, ${src1}"#
2543                                                   _.BroadcastStr#", $src2}",
2544                    [(set _.KRC:$dst,(and _.KRCWM:$mask, (X86Vfpclass_su
2545                                     (_.VT (_.BroadcastLdFrag addr:$src1)),
2546                                     (i32 timm:$src2))))]>,
2547                    EVEX_B, EVEX_K,  Sched<[sched.Folded, sched.ReadAfterFold]>;
2548  }
2549
2550  // Allow registers or broadcast with the x, y, z suffix we use to disambiguate
2551  // the memory form.
2552  def : InstAlias<OpcodeStr#_.Suffix#mem#
2553                  "\t{$src2, $src1, $dst|$dst, $src1, $src2}",
2554                  (!cast<Instruction>(NAME#"rr")
2555                   _.KRC:$dst, _.RC:$src1, i32u8imm:$src2), 0, "att">;
2556  def : InstAlias<OpcodeStr#_.Suffix#mem#
2557                  "\t{$src2, $src1, $dst {${mask}}|$dst {${mask}}, $src1, $src2}",
2558                  (!cast<Instruction>(NAME#"rrk")
2559                   _.KRC:$dst, _.KRCWM:$mask, _.RC:$src1, i32u8imm:$src2), 0, "att">;
2560  def : InstAlias<OpcodeStr#_.Suffix#mem#
2561                  "\t{$src2, ${src1}"#_.BroadcastStr#", $dst|$dst, ${src1}"#
2562                  _.BroadcastStr#", $src2}",
2563                  (!cast<Instruction>(NAME#"rmb")
2564                   _.KRC:$dst, _.ScalarMemOp:$src1, i32u8imm:$src2), 0, "att">;
2565  def : InstAlias<OpcodeStr#_.Suffix#mem#
2566                  "\t{$src2, ${src1}"#_.BroadcastStr#", $dst {${mask}}|"
2567                  "$dst {${mask}}, ${src1}"#_.BroadcastStr#", $src2}",
2568                  (!cast<Instruction>(NAME#"rmbk")
2569                   _.KRC:$dst, _.KRCWM:$mask, _.ScalarMemOp:$src1, i32u8imm:$src2), 0, "att">;
2570}
2571
2572multiclass avx512_vector_fpclass_all<string OpcodeStr, AVX512VLVectorVTInfo _,
2573                                     bits<8> opc, X86SchedWriteWidths sched,
2574                                     Predicate prd>{
2575  let Predicates = [prd] in {
2576    defm Z    : avx512_vector_fpclass<opc, OpcodeStr, sched.ZMM,
2577                                      _.info512, "z">, EVEX_V512;
2578  }
2579  let Predicates = [prd, HasVLX] in {
2580    defm Z128 : avx512_vector_fpclass<opc, OpcodeStr, sched.XMM,
2581                                      _.info128, "x">, EVEX_V128;
2582    defm Z256 : avx512_vector_fpclass<opc, OpcodeStr, sched.YMM,
2583                                      _.info256, "y">, EVEX_V256;
2584  }
2585}
2586
2587multiclass avx512_fp_fpclass_all<string OpcodeStr, bits<8> opcVec,
2588                                 bits<8> opcScalar, X86SchedWriteWidths sched> {
2589  defm PH : avx512_vector_fpclass_all<OpcodeStr,  avx512vl_f16_info, opcVec,
2590                                      sched, HasFP16>,
2591                                      EVEX_CD8<16, CD8VF>, AVX512PSIi8Base, TA;
2592  defm SHZ : avx512_scalar_fpclass<opcScalar, OpcodeStr,
2593                                   sched.Scl, f16x_info, HasFP16>,
2594                                   EVEX_CD8<16, CD8VT1>, AVX512PSIi8Base, TA;
2595  defm PS : avx512_vector_fpclass_all<OpcodeStr,  avx512vl_f32_info, opcVec,
2596                                      sched, HasDQI>,
2597                                      EVEX_CD8<32, CD8VF>, AVX512AIi8Base;
2598  defm PD : avx512_vector_fpclass_all<OpcodeStr,  avx512vl_f64_info, opcVec,
2599                                      sched, HasDQI>,
2600                                      EVEX_CD8<64, CD8VF>, AVX512AIi8Base, REX_W;
2601  defm SSZ : avx512_scalar_fpclass<opcScalar, OpcodeStr,
2602                                   sched.Scl, f32x_info, HasDQI>, VEX_LIG,
2603                                   EVEX_CD8<32, CD8VT1>, AVX512AIi8Base;
2604  defm SDZ : avx512_scalar_fpclass<opcScalar, OpcodeStr,
2605                                   sched.Scl, f64x_info, HasDQI>, VEX_LIG,
2606                                   EVEX_CD8<64, CD8VT1>, AVX512AIi8Base, REX_W;
2607}
2608
2609defm VFPCLASS : avx512_fp_fpclass_all<"vfpclass", 0x66, 0x67, SchedWriteFCmp>, EVEX;
2610
2611//-----------------------------------------------------------------
2612// Mask register copy, including
2613// - copy between mask registers
2614// - load/store mask registers
2615// - copy from GPR to mask register and vice versa
2616//
2617multiclass avx512_mask_mov<bits<8> opc_kk, bits<8> opc_km, bits<8> opc_mk,
2618                          string OpcodeStr, RegisterClass KRC, ValueType vvt,
2619                          X86MemOperand x86memop, string Suffix = ""> {
2620  let isMoveReg = 1, hasSideEffects = 0, SchedRW = [WriteMove],
2621      explicitOpPrefix = !if(!eq(Suffix, ""), NoExplicitOpPrefix, ExplicitEVEX) in
2622  def kk#Suffix : I<opc_kk, MRMSrcReg, (outs KRC:$dst), (ins KRC:$src),
2623                    !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), []>,
2624                  Sched<[WriteMove]>;
2625  def km#Suffix : I<opc_km, MRMSrcMem, (outs KRC:$dst), (ins x86memop:$src),
2626                    !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
2627                    [(set KRC:$dst, (vvt (load addr:$src)))]>,
2628                  Sched<[WriteLoad]>;
2629  def mk#Suffix : I<opc_mk, MRMDestMem, (outs), (ins x86memop:$dst, KRC:$src),
2630                    !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
2631                    [(store KRC:$src, addr:$dst)]>,
2632                  Sched<[WriteStore]>;
2633}
2634
2635multiclass avx512_mask_mov_gpr<bits<8> opc_kr, bits<8> opc_rk,
2636                               string OpcodeStr, RegisterClass KRC,
2637                               RegisterClass GRC, string Suffix = ""> {
2638  let hasSideEffects = 0 in {
2639    def kr#Suffix : I<opc_kr, MRMSrcReg, (outs KRC:$dst), (ins GRC:$src),
2640                      !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), []>,
2641                    Sched<[WriteMove]>;
2642    def rk#Suffix : I<opc_rk, MRMSrcReg, (outs GRC:$dst), (ins KRC:$src),
2643                      !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), []>,
2644                    Sched<[WriteMove]>;
2645  }
2646}
2647
2648let Predicates = [HasDQI, NoEGPR] in
2649  defm KMOVB : avx512_mask_mov<0x90, 0x90, 0x91, "kmovb", VK8, v8i1, i8mem>,
2650               avx512_mask_mov_gpr<0x92, 0x93, "kmovb", VK8, GR32>,
2651               VEX, TB, PD;
2652let Predicates = [HasDQI, HasEGPR, In64BitMode] in
2653  defm KMOVB : avx512_mask_mov<0x90, 0x90, 0x91, "kmovb", VK8, v8i1, i8mem, "_EVEX">,
2654               avx512_mask_mov_gpr<0x92, 0x93, "kmovb", VK8, GR32, "_EVEX">,
2655               EVEX, TB, PD;
2656
2657let Predicates = [HasAVX512, NoEGPR] in
2658  defm KMOVW : avx512_mask_mov<0x90, 0x90, 0x91, "kmovw", VK16, v16i1, i16mem>,
2659               avx512_mask_mov_gpr<0x92, 0x93, "kmovw", VK16, GR32>,
2660               VEX, TB;
2661let Predicates = [HasAVX512, HasEGPR, In64BitMode] in
2662  defm KMOVW : avx512_mask_mov<0x90, 0x90, 0x91, "kmovw", VK16, v16i1, i16mem, "_EVEX">,
2663               avx512_mask_mov_gpr<0x92, 0x93, "kmovw", VK16, GR32, "_EVEX">,
2664               EVEX, TB;
2665
2666let Predicates = [HasBWI, NoEGPR] in {
2667  defm KMOVD : avx512_mask_mov<0x90, 0x90, 0x91, "kmovd", VK32, v32i1,i32mem>,
2668               VEX, TB, PD, REX_W;
2669  defm KMOVD : avx512_mask_mov_gpr<0x92, 0x93, "kmovd", VK32, GR32>,
2670               VEX, TB, XD;
2671  defm KMOVQ : avx512_mask_mov<0x90, 0x90, 0x91, "kmovq", VK64, v64i1, i64mem>,
2672               VEX, TB, REX_W;
2673  defm KMOVQ : avx512_mask_mov_gpr<0x92, 0x93, "kmovq", VK64, GR64>,
2674               VEX, TB, XD, REX_W;
2675}
2676let Predicates = [HasBWI, HasEGPR, In64BitMode] in {
2677  defm KMOVD : avx512_mask_mov<0x90, 0x90, 0x91, "kmovd", VK32, v32i1,i32mem, "_EVEX">,
2678               EVEX, TB, PD, REX_W;
2679  defm KMOVD : avx512_mask_mov_gpr<0x92, 0x93, "kmovd", VK32, GR32, "_EVEX">,
2680               EVEX, TB, XD;
2681  defm KMOVQ : avx512_mask_mov<0x90, 0x90, 0x91, "kmovq", VK64, v64i1, i64mem, "_EVEX">,
2682               EVEX, TB, REX_W;
2683  defm KMOVQ : avx512_mask_mov_gpr<0x92, 0x93, "kmovq", VK64, GR64, "_EVEX">,
2684               EVEX, TB, XD, REX_W;
2685}
2686
2687// GR from/to mask register
2688def : Pat<(v16i1 (bitconvert (i16 GR16:$src))),
2689          (COPY_TO_REGCLASS (i32 (INSERT_SUBREG (IMPLICIT_DEF), GR16:$src, sub_16bit)), VK16)>;
2690def : Pat<(i16 (bitconvert (v16i1 VK16:$src))),
2691          (EXTRACT_SUBREG (i32 (COPY_TO_REGCLASS VK16:$src, GR32)), sub_16bit)>;
2692def : Pat<(i8 (trunc (i16 (bitconvert (v16i1 VK16:$src))))),
2693          (EXTRACT_SUBREG (i32 (COPY_TO_REGCLASS VK16:$src, GR32)), sub_8bit)>;
2694
2695def : Pat<(v8i1 (bitconvert (i8 GR8:$src))),
2696          (COPY_TO_REGCLASS (i32 (INSERT_SUBREG (IMPLICIT_DEF), GR8:$src, sub_8bit)), VK8)>;
2697def : Pat<(i8 (bitconvert (v8i1 VK8:$src))),
2698          (EXTRACT_SUBREG (i32 (COPY_TO_REGCLASS VK8:$src, GR32)), sub_8bit)>;
2699
2700def : Pat<(i32 (zext (i16 (bitconvert (v16i1 VK16:$src))))),
2701          (KMOVWrk VK16:$src)>;
2702def : Pat<(i64 (zext (i16 (bitconvert (v16i1 VK16:$src))))),
2703          (SUBREG_TO_REG (i64 0), (KMOVWrk VK16:$src), sub_32bit)>;
2704def : Pat<(i32 (anyext (i16 (bitconvert (v16i1 VK16:$src))))),
2705          (COPY_TO_REGCLASS VK16:$src, GR32)>;
2706def : Pat<(i64 (anyext (i16 (bitconvert (v16i1 VK16:$src))))),
2707          (INSERT_SUBREG (IMPLICIT_DEF), (COPY_TO_REGCLASS VK16:$src, GR32), sub_32bit)>;
2708
2709def : Pat<(i32 (zext (i8 (bitconvert (v8i1 VK8:$src))))),
2710          (KMOVBrk VK8:$src)>, Requires<[HasDQI]>;
2711def : Pat<(i64 (zext (i8 (bitconvert (v8i1 VK8:$src))))),
2712          (SUBREG_TO_REG (i64 0), (KMOVBrk VK8:$src), sub_32bit)>, Requires<[HasDQI]>;
2713def : Pat<(i32 (anyext (i8 (bitconvert (v8i1 VK8:$src))))),
2714          (COPY_TO_REGCLASS VK8:$src, GR32)>;
2715def : Pat<(i64 (anyext (i8 (bitconvert (v8i1 VK8:$src))))),
2716          (INSERT_SUBREG (IMPLICIT_DEF), (COPY_TO_REGCLASS VK8:$src, GR32), sub_32bit)>;
2717
2718def : Pat<(v32i1 (bitconvert (i32 GR32:$src))),
2719          (COPY_TO_REGCLASS GR32:$src, VK32)>;
2720def : Pat<(i32 (bitconvert (v32i1 VK32:$src))),
2721          (COPY_TO_REGCLASS VK32:$src, GR32)>;
2722def : Pat<(v64i1 (bitconvert (i64 GR64:$src))),
2723          (COPY_TO_REGCLASS GR64:$src, VK64)>;
2724def : Pat<(i64 (bitconvert (v64i1 VK64:$src))),
2725          (COPY_TO_REGCLASS VK64:$src, GR64)>;
2726
2727// Load/store kreg
2728let Predicates = [HasDQI] in {
2729  def : Pat<(v1i1 (load addr:$src)),
2730            (COPY_TO_REGCLASS (KMOVBkm addr:$src), VK1)>;
2731  def : Pat<(v2i1 (load addr:$src)),
2732            (COPY_TO_REGCLASS (KMOVBkm addr:$src), VK2)>;
2733  def : Pat<(v4i1 (load addr:$src)),
2734            (COPY_TO_REGCLASS (KMOVBkm addr:$src), VK4)>;
2735}
2736
2737let Predicates = [HasAVX512] in {
2738  def : Pat<(v8i1 (bitconvert (i8 (load addr:$src)))),
2739            (COPY_TO_REGCLASS (MOVZX32rm8 addr:$src), VK8)>;
2740  def : Pat<(v16i1 (bitconvert (loadi16 addr:$src))),
2741            (KMOVWkm addr:$src)>;
2742}
2743
2744def X86kextract : SDNode<"ISD::EXTRACT_VECTOR_ELT",
2745                         SDTypeProfile<1, 2, [SDTCisVT<0, i8>,
2746                                              SDTCVecEltisVT<1, i1>,
2747                                              SDTCisPtrTy<2>]>>;
2748
2749let Predicates = [HasAVX512] in {
2750  multiclass operation_gpr_mask_copy_lowering<RegisterClass maskRC, ValueType maskVT> {
2751    def : Pat<(maskVT (scalar_to_vector GR32:$src)),
2752              (COPY_TO_REGCLASS GR32:$src, maskRC)>;
2753
2754    def : Pat<(maskVT (scalar_to_vector GR8:$src)),
2755              (COPY_TO_REGCLASS (INSERT_SUBREG (i32 (IMPLICIT_DEF)), GR8:$src, sub_8bit), maskRC)>;
2756
2757    def : Pat<(i8 (X86kextract maskRC:$src, (iPTR 0))),
2758              (EXTRACT_SUBREG (i32 (COPY_TO_REGCLASS maskRC:$src, GR32)), sub_8bit)>;
2759
2760    def : Pat<(i32 (anyext (i8 (X86kextract maskRC:$src, (iPTR 0))))),
2761              (i32 (COPY_TO_REGCLASS maskRC:$src, GR32))>;
2762  }
2763
2764  defm : operation_gpr_mask_copy_lowering<VK1,  v1i1>;
2765  defm : operation_gpr_mask_copy_lowering<VK2,  v2i1>;
2766  defm : operation_gpr_mask_copy_lowering<VK4,  v4i1>;
2767  defm : operation_gpr_mask_copy_lowering<VK8,  v8i1>;
2768  defm : operation_gpr_mask_copy_lowering<VK16,  v16i1>;
2769  defm : operation_gpr_mask_copy_lowering<VK32,  v32i1>;
2770  defm : operation_gpr_mask_copy_lowering<VK64,  v64i1>;
2771
2772  def : Pat<(insert_subvector (v16i1 immAllZerosV),
2773                              (v1i1 (scalar_to_vector GR8:$src)), (iPTR 0)),
2774            (KMOVWkr (AND32ri
2775                      (INSERT_SUBREG (i32 (IMPLICIT_DEF)), GR8:$src, sub_8bit),
2776                      (i32 1)))>;
2777}
2778
2779// Mask unary operation
2780// - KNOT
2781multiclass avx512_mask_unop<bits<8> opc, string OpcodeStr,
2782                            RegisterClass KRC, SDPatternOperator OpNode,
2783                            X86FoldableSchedWrite sched, Predicate prd> {
2784  let Predicates = [prd] in
2785    def rr : I<opc, MRMSrcReg, (outs KRC:$dst), (ins KRC:$src),
2786               !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
2787               [(set KRC:$dst, (OpNode KRC:$src))]>,
2788               Sched<[sched]>;
2789}
2790
2791multiclass avx512_mask_unop_all<bits<8> opc, string OpcodeStr,
2792                                SDPatternOperator OpNode,
2793                                X86FoldableSchedWrite sched> {
2794  defm B : avx512_mask_unop<opc, !strconcat(OpcodeStr, "b"), VK8, OpNode,
2795                            sched, HasDQI>, VEX, TB, PD;
2796  defm W : avx512_mask_unop<opc, !strconcat(OpcodeStr, "w"), VK16, OpNode,
2797                            sched, HasAVX512>, VEX, TB;
2798  defm D : avx512_mask_unop<opc, !strconcat(OpcodeStr, "d"), VK32, OpNode,
2799                            sched, HasBWI>, VEX, TB, PD, REX_W;
2800  defm Q : avx512_mask_unop<opc, !strconcat(OpcodeStr, "q"), VK64, OpNode,
2801                            sched, HasBWI>, VEX, TB, REX_W;
2802}
2803
2804// TODO - do we need a X86SchedWriteWidths::KMASK type?
2805defm KNOT : avx512_mask_unop_all<0x44, "knot", vnot, SchedWriteVecLogic.XMM>;
2806
2807// KNL does not support KMOVB, 8-bit mask is promoted to 16-bit
2808let Predicates = [HasAVX512, NoDQI] in
2809def : Pat<(vnot VK8:$src),
2810          (COPY_TO_REGCLASS (KNOTWrr (COPY_TO_REGCLASS VK8:$src, VK16)), VK8)>;
2811
2812def : Pat<(vnot VK4:$src),
2813          (COPY_TO_REGCLASS (KNOTWrr (COPY_TO_REGCLASS VK4:$src, VK16)), VK4)>;
2814def : Pat<(vnot VK2:$src),
2815          (COPY_TO_REGCLASS (KNOTWrr (COPY_TO_REGCLASS VK2:$src, VK16)), VK2)>;
2816def : Pat<(vnot VK1:$src),
2817          (COPY_TO_REGCLASS (KNOTWrr (COPY_TO_REGCLASS VK1:$src, VK16)), VK2)>;
2818
2819// Mask binary operation
2820// - KAND, KANDN, KOR, KXNOR, KXOR
2821multiclass avx512_mask_binop<bits<8> opc, string OpcodeStr,
2822                           RegisterClass KRC, SDPatternOperator OpNode,
2823                           X86FoldableSchedWrite sched, Predicate prd,
2824                           bit IsCommutable> {
2825  let Predicates = [prd], isCommutable = IsCommutable in
2826    def rr : I<opc, MRMSrcReg, (outs KRC:$dst), (ins KRC:$src1, KRC:$src2),
2827               !strconcat(OpcodeStr,
2828                          "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
2829               [(set KRC:$dst, (OpNode KRC:$src1, KRC:$src2))]>,
2830               Sched<[sched]>;
2831}
2832
2833multiclass avx512_mask_binop_all<bits<8> opc, string OpcodeStr,
2834                                 SDPatternOperator OpNode,
2835                                 X86FoldableSchedWrite sched, bit IsCommutable,
2836                                 Predicate prdW = HasAVX512> {
2837  defm B : avx512_mask_binop<opc, !strconcat(OpcodeStr, "b"), VK8, OpNode,
2838                             sched, HasDQI, IsCommutable>, VEX, VVVV, VEX_L, TB, PD;
2839  defm W : avx512_mask_binop<opc, !strconcat(OpcodeStr, "w"), VK16, OpNode,
2840                             sched, prdW, IsCommutable>, VEX, VVVV, VEX_L, TB;
2841  defm D : avx512_mask_binop<opc, !strconcat(OpcodeStr, "d"), VK32, OpNode,
2842                             sched, HasBWI, IsCommutable>, VEX, VVVV, VEX_L, REX_W, TB, PD;
2843  defm Q : avx512_mask_binop<opc, !strconcat(OpcodeStr, "q"), VK64, OpNode,
2844                             sched, HasBWI, IsCommutable>, VEX, VVVV, VEX_L, REX_W, TB;
2845}
2846
2847// TODO - do we need a X86SchedWriteWidths::KMASK type?
2848defm KAND  : avx512_mask_binop_all<0x41, "kand",  and,     SchedWriteVecLogic.XMM, 1>;
2849defm KOR   : avx512_mask_binop_all<0x45, "kor",   or,      SchedWriteVecLogic.XMM, 1>;
2850defm KXNOR : avx512_mask_binop_all<0x46, "kxnor", vxnor,   SchedWriteVecLogic.XMM, 1>;
2851defm KXOR  : avx512_mask_binop_all<0x47, "kxor",  xor,     SchedWriteVecLogic.XMM, 1>;
2852defm KANDN : avx512_mask_binop_all<0x42, "kandn", vandn,   SchedWriteVecLogic.XMM, 0>;
2853defm KADD  : avx512_mask_binop_all<0x4A, "kadd",  X86kadd, SchedWriteVecLogic.XMM, 1, HasDQI>;
2854
2855multiclass avx512_binop_pat<SDPatternOperator VOpNode,
2856                            Instruction Inst> {
2857  // With AVX512F, 8-bit mask is promoted to 16-bit mask,
2858  // for the DQI set, this type is legal and KxxxB instruction is used
2859  let Predicates = [NoDQI] in
2860  def : Pat<(VOpNode VK8:$src1, VK8:$src2),
2861            (COPY_TO_REGCLASS
2862              (Inst (COPY_TO_REGCLASS VK8:$src1, VK16),
2863                    (COPY_TO_REGCLASS VK8:$src2, VK16)), VK8)>;
2864
2865  // All types smaller than 8 bits require conversion anyway
2866  def : Pat<(VOpNode VK1:$src1, VK1:$src2),
2867        (COPY_TO_REGCLASS (Inst
2868                           (COPY_TO_REGCLASS VK1:$src1, VK16),
2869                           (COPY_TO_REGCLASS VK1:$src2, VK16)), VK1)>;
2870  def : Pat<(VOpNode VK2:$src1, VK2:$src2),
2871        (COPY_TO_REGCLASS (Inst
2872                           (COPY_TO_REGCLASS VK2:$src1, VK16),
2873                           (COPY_TO_REGCLASS VK2:$src2, VK16)), VK2)>;
2874  def : Pat<(VOpNode VK4:$src1, VK4:$src2),
2875        (COPY_TO_REGCLASS (Inst
2876                           (COPY_TO_REGCLASS VK4:$src1, VK16),
2877                           (COPY_TO_REGCLASS VK4:$src2, VK16)), VK4)>;
2878}
2879
2880defm : avx512_binop_pat<and,   KANDWrr>;
2881defm : avx512_binop_pat<vandn, KANDNWrr>;
2882defm : avx512_binop_pat<or,    KORWrr>;
2883defm : avx512_binop_pat<vxnor, KXNORWrr>;
2884defm : avx512_binop_pat<xor,   KXORWrr>;
2885
2886// Mask unpacking
2887multiclass avx512_mask_unpck<string Suffix, X86KVectorVTInfo Dst,
2888                             X86KVectorVTInfo Src, X86FoldableSchedWrite sched,
2889                             Predicate prd> {
2890  let Predicates = [prd] in {
2891    let hasSideEffects = 0 in
2892    def rr : I<0x4b, MRMSrcReg, (outs Dst.KRC:$dst),
2893               (ins Src.KRC:$src1, Src.KRC:$src2),
2894               "kunpck"#Suffix#"\t{$src2, $src1, $dst|$dst, $src1, $src2}", []>,
2895               VEX, VVVV, VEX_L, Sched<[sched]>;
2896
2897    def : Pat<(Dst.KVT (concat_vectors Src.KRC:$src1, Src.KRC:$src2)),
2898              (!cast<Instruction>(NAME#rr) Src.KRC:$src2, Src.KRC:$src1)>;
2899  }
2900}
2901
2902defm KUNPCKBW : avx512_mask_unpck<"bw", v16i1_info, v8i1_info,  WriteShuffle, HasAVX512>, TB, PD;
2903defm KUNPCKWD : avx512_mask_unpck<"wd", v32i1_info, v16i1_info, WriteShuffle, HasBWI>, TB;
2904defm KUNPCKDQ : avx512_mask_unpck<"dq", v64i1_info, v32i1_info, WriteShuffle, HasBWI>, TB, REX_W;
2905
2906// Mask bit testing
2907multiclass avx512_mask_testop<bits<8> opc, string OpcodeStr, RegisterClass KRC,
2908                              SDNode OpNode, X86FoldableSchedWrite sched,
2909                              Predicate prd> {
2910  let Predicates = [prd], Defs = [EFLAGS] in
2911    def rr : I<opc, MRMSrcReg, (outs), (ins KRC:$src1, KRC:$src2),
2912               !strconcat(OpcodeStr, "\t{$src2, $src1|$src1, $src2}"),
2913               [(set EFLAGS, (OpNode KRC:$src1, KRC:$src2))]>,
2914               Sched<[sched]>;
2915}
2916
2917multiclass avx512_mask_testop_w<bits<8> opc, string OpcodeStr, SDNode OpNode,
2918                                X86FoldableSchedWrite sched,
2919                                Predicate prdW = HasAVX512> {
2920  defm B : avx512_mask_testop<opc, OpcodeStr#"b", VK8, OpNode, sched, HasDQI>,
2921                                                                VEX, TB, PD;
2922  defm W : avx512_mask_testop<opc, OpcodeStr#"w", VK16, OpNode, sched, prdW>,
2923                                                                VEX, TB;
2924  defm Q : avx512_mask_testop<opc, OpcodeStr#"q", VK64, OpNode, sched, HasBWI>,
2925                                                                VEX, TB, REX_W;
2926  defm D : avx512_mask_testop<opc, OpcodeStr#"d", VK32, OpNode, sched, HasBWI>,
2927                                                                VEX, TB, PD, REX_W;
2928}
2929
2930// TODO - do we need a X86SchedWriteWidths::KMASK type?
2931defm KORTEST : avx512_mask_testop_w<0x98, "kortest", X86kortest, SchedWriteVecLogic.XMM>;
2932defm KTEST   : avx512_mask_testop_w<0x99, "ktest", X86ktest, SchedWriteVecLogic.XMM, HasDQI>;
2933
2934// Mask shift
2935multiclass avx512_mask_shiftop<bits<8> opc, string OpcodeStr, RegisterClass KRC,
2936                               SDNode OpNode, X86FoldableSchedWrite sched> {
2937  let Predicates = [HasAVX512] in
2938    def ri : Ii8<opc, MRMSrcReg, (outs KRC:$dst), (ins KRC:$src, u8imm:$imm),
2939                 !strconcat(OpcodeStr,
2940                            "\t{$imm, $src, $dst|$dst, $src, $imm}"),
2941                            [(set KRC:$dst, (OpNode KRC:$src, (i8 timm:$imm)))]>,
2942                 Sched<[sched]>;
2943}
2944
2945multiclass avx512_mask_shiftop_w<bits<8> opc1, bits<8> opc2, string OpcodeStr,
2946                                 SDNode OpNode, X86FoldableSchedWrite sched> {
2947  defm W : avx512_mask_shiftop<opc1, !strconcat(OpcodeStr, "w"), VK16, OpNode,
2948                               sched>, VEX, TA, PD, REX_W;
2949  let Predicates = [HasDQI] in
2950  defm B : avx512_mask_shiftop<opc1, !strconcat(OpcodeStr, "b"), VK8, OpNode,
2951                               sched>, VEX, TA, PD;
2952  let Predicates = [HasBWI] in {
2953  defm Q : avx512_mask_shiftop<opc2, !strconcat(OpcodeStr, "q"), VK64, OpNode,
2954                               sched>, VEX, TA, PD, REX_W;
2955  defm D : avx512_mask_shiftop<opc2, !strconcat(OpcodeStr, "d"), VK32, OpNode,
2956                               sched>, VEX, TA, PD;
2957  }
2958}
2959
2960defm KSHIFTL : avx512_mask_shiftop_w<0x32, 0x33, "kshiftl", X86kshiftl, WriteShuffle>;
2961defm KSHIFTR : avx512_mask_shiftop_w<0x30, 0x31, "kshiftr", X86kshiftr, WriteShuffle>;
2962
2963// Patterns for comparing 128/256-bit integer vectors using 512-bit instruction.
2964multiclass axv512_icmp_packed_cc_no_vlx_lowering<PatFrag Frag, PatFrag Frag_su,
2965                                                 string InstStr,
2966                                                 X86VectorVTInfo Narrow,
2967                                                 X86VectorVTInfo Wide> {
2968def : Pat<(Narrow.KVT (Frag:$cc (Narrow.VT Narrow.RC:$src1),
2969                                (Narrow.VT Narrow.RC:$src2), cond)),
2970          (COPY_TO_REGCLASS
2971           (!cast<Instruction>(InstStr#"Zrri")
2972            (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src1, Narrow.SubRegIdx)),
2973            (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src2, Narrow.SubRegIdx)),
2974            (X86pcmpm_imm $cc)), Narrow.KRC)>;
2975
2976def : Pat<(Narrow.KVT (and Narrow.KRC:$mask,
2977                           (Narrow.KVT (Frag_su:$cc (Narrow.VT Narrow.RC:$src1),
2978                                                    (Narrow.VT Narrow.RC:$src2),
2979                                                    cond)))),
2980          (COPY_TO_REGCLASS (!cast<Instruction>(InstStr#"Zrrik")
2981           (COPY_TO_REGCLASS Narrow.KRC:$mask, Wide.KRC),
2982           (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src1, Narrow.SubRegIdx)),
2983           (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src2, Narrow.SubRegIdx)),
2984           (X86pcmpm_imm $cc)), Narrow.KRC)>;
2985}
2986
2987multiclass axv512_icmp_packed_cc_rmb_no_vlx_lowering<PatFrag Frag, PatFrag Frag_su,
2988                                                     string InstStr,
2989                                                     X86VectorVTInfo Narrow,
2990                                                     X86VectorVTInfo Wide> {
2991// Broadcast load.
2992def : Pat<(Narrow.KVT (Frag:$cc (Narrow.VT Narrow.RC:$src1),
2993                                (Narrow.BroadcastLdFrag addr:$src2), cond)),
2994          (COPY_TO_REGCLASS
2995           (!cast<Instruction>(InstStr#"Zrmib")
2996            (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src1, Narrow.SubRegIdx)),
2997            addr:$src2, (X86pcmpm_imm $cc)), Narrow.KRC)>;
2998
2999def : Pat<(Narrow.KVT (and Narrow.KRC:$mask,
3000                           (Narrow.KVT
3001                            (Frag_su:$cc (Narrow.VT Narrow.RC:$src1),
3002                                         (Narrow.BroadcastLdFrag addr:$src2),
3003                                         cond)))),
3004          (COPY_TO_REGCLASS (!cast<Instruction>(InstStr#"Zrmibk")
3005           (COPY_TO_REGCLASS Narrow.KRC:$mask, Wide.KRC),
3006           (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src1, Narrow.SubRegIdx)),
3007           addr:$src2, (X86pcmpm_imm $cc)), Narrow.KRC)>;
3008
3009// Commuted with broadcast load.
3010def : Pat<(Narrow.KVT (Frag:$cc (Narrow.BroadcastLdFrag addr:$src2),
3011                                (Narrow.VT Narrow.RC:$src1),
3012                                cond)),
3013          (COPY_TO_REGCLASS
3014           (!cast<Instruction>(InstStr#"Zrmib")
3015            (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src1, Narrow.SubRegIdx)),
3016            addr:$src2, (X86pcmpm_imm_commute $cc)), Narrow.KRC)>;
3017
3018def : Pat<(Narrow.KVT (and Narrow.KRC:$mask,
3019                           (Narrow.KVT
3020                            (Frag_su:$cc (Narrow.BroadcastLdFrag addr:$src2),
3021                                         (Narrow.VT Narrow.RC:$src1),
3022                                         cond)))),
3023          (COPY_TO_REGCLASS (!cast<Instruction>(InstStr#"Zrmibk")
3024           (COPY_TO_REGCLASS Narrow.KRC:$mask, Wide.KRC),
3025           (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src1, Narrow.SubRegIdx)),
3026           addr:$src2, (X86pcmpm_imm_commute $cc)), Narrow.KRC)>;
3027}
3028
3029// Same as above, but for fp types which don't use PatFrags.
3030multiclass axv512_cmp_packed_cc_no_vlx_lowering<string InstStr,
3031                                                X86VectorVTInfo Narrow,
3032                                                X86VectorVTInfo Wide> {
3033def : Pat<(Narrow.KVT (X86cmpm (Narrow.VT Narrow.RC:$src1),
3034                               (Narrow.VT Narrow.RC:$src2), timm:$cc)),
3035          (COPY_TO_REGCLASS
3036           (!cast<Instruction>(InstStr#"Zrri")
3037            (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src1, Narrow.SubRegIdx)),
3038            (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src2, Narrow.SubRegIdx)),
3039            timm:$cc), Narrow.KRC)>;
3040
3041def : Pat<(Narrow.KVT (and Narrow.KRC:$mask,
3042                           (X86cmpm_su (Narrow.VT Narrow.RC:$src1),
3043                                       (Narrow.VT Narrow.RC:$src2), timm:$cc))),
3044          (COPY_TO_REGCLASS (!cast<Instruction>(InstStr#"Zrrik")
3045           (COPY_TO_REGCLASS Narrow.KRC:$mask, Wide.KRC),
3046           (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src1, Narrow.SubRegIdx)),
3047           (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src2, Narrow.SubRegIdx)),
3048           timm:$cc), Narrow.KRC)>;
3049
3050// Broadcast load.
3051def : Pat<(Narrow.KVT (X86cmpm (Narrow.VT Narrow.RC:$src1),
3052                               (Narrow.VT (Narrow.BroadcastLdFrag addr:$src2)), timm:$cc)),
3053          (COPY_TO_REGCLASS
3054           (!cast<Instruction>(InstStr#"Zrmbi")
3055            (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src1, Narrow.SubRegIdx)),
3056            addr:$src2, timm:$cc), Narrow.KRC)>;
3057
3058def : Pat<(Narrow.KVT (and Narrow.KRC:$mask,
3059                           (X86cmpm_su (Narrow.VT Narrow.RC:$src1),
3060                                       (Narrow.VT (Narrow.BroadcastLdFrag addr:$src2)), timm:$cc))),
3061          (COPY_TO_REGCLASS (!cast<Instruction>(InstStr#"Zrmbik")
3062           (COPY_TO_REGCLASS Narrow.KRC:$mask, Wide.KRC),
3063           (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src1, Narrow.SubRegIdx)),
3064           addr:$src2, timm:$cc), Narrow.KRC)>;
3065
3066// Commuted with broadcast load.
3067def : Pat<(Narrow.KVT (X86cmpm (Narrow.VT (Narrow.BroadcastLdFrag addr:$src2)),
3068                               (Narrow.VT Narrow.RC:$src1), timm:$cc)),
3069          (COPY_TO_REGCLASS
3070           (!cast<Instruction>(InstStr#"Zrmbi")
3071            (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src1, Narrow.SubRegIdx)),
3072            addr:$src2, (X86cmpm_imm_commute timm:$cc)), Narrow.KRC)>;
3073
3074def : Pat<(Narrow.KVT (and Narrow.KRC:$mask,
3075                           (X86cmpm_su (Narrow.VT (Narrow.BroadcastLdFrag addr:$src2)),
3076                                       (Narrow.VT Narrow.RC:$src1), timm:$cc))),
3077          (COPY_TO_REGCLASS (!cast<Instruction>(InstStr#"Zrmbik")
3078           (COPY_TO_REGCLASS Narrow.KRC:$mask, Wide.KRC),
3079           (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src1, Narrow.SubRegIdx)),
3080           addr:$src2, (X86cmpm_imm_commute timm:$cc)), Narrow.KRC)>;
3081}
3082
3083let Predicates = [HasAVX512, NoVLX, HasEVEX512] in {
3084  defm : axv512_icmp_packed_cc_no_vlx_lowering<X86pcmpm, X86pcmpm_su, "VPCMPD", v8i32x_info, v16i32_info>;
3085  defm : axv512_icmp_packed_cc_no_vlx_lowering<X86pcmpum, X86pcmpum_su, "VPCMPUD", v8i32x_info, v16i32_info>;
3086
3087  defm : axv512_icmp_packed_cc_no_vlx_lowering<X86pcmpm, X86pcmpm_su, "VPCMPD", v4i32x_info, v16i32_info>;
3088  defm : axv512_icmp_packed_cc_no_vlx_lowering<X86pcmpum, X86pcmpum_su, "VPCMPUD", v4i32x_info, v16i32_info>;
3089
3090  defm : axv512_icmp_packed_cc_no_vlx_lowering<X86pcmpm, X86pcmpm_su, "VPCMPQ", v4i64x_info, v8i64_info>;
3091  defm : axv512_icmp_packed_cc_no_vlx_lowering<X86pcmpum, X86pcmpum_su, "VPCMPUQ", v4i64x_info, v8i64_info>;
3092
3093  defm : axv512_icmp_packed_cc_no_vlx_lowering<X86pcmpm, X86pcmpm_su, "VPCMPQ", v2i64x_info, v8i64_info>;
3094  defm : axv512_icmp_packed_cc_no_vlx_lowering<X86pcmpum, X86pcmpum_su, "VPCMPUQ", v2i64x_info, v8i64_info>;
3095
3096  defm : axv512_icmp_packed_cc_rmb_no_vlx_lowering<X86pcmpm, X86pcmpm_su, "VPCMPD", v8i32x_info, v16i32_info>;
3097  defm : axv512_icmp_packed_cc_rmb_no_vlx_lowering<X86pcmpum, X86pcmpum_su, "VPCMPUD", v8i32x_info, v16i32_info>;
3098
3099  defm : axv512_icmp_packed_cc_rmb_no_vlx_lowering<X86pcmpm, X86pcmpm_su, "VPCMPD", v4i32x_info, v16i32_info>;
3100  defm : axv512_icmp_packed_cc_rmb_no_vlx_lowering<X86pcmpum, X86pcmpum_su, "VPCMPUD", v4i32x_info, v16i32_info>;
3101
3102  defm : axv512_icmp_packed_cc_rmb_no_vlx_lowering<X86pcmpm, X86pcmpm_su, "VPCMPQ", v4i64x_info, v8i64_info>;
3103  defm : axv512_icmp_packed_cc_rmb_no_vlx_lowering<X86pcmpum, X86pcmpum_su, "VPCMPUQ", v4i64x_info, v8i64_info>;
3104
3105  defm : axv512_icmp_packed_cc_rmb_no_vlx_lowering<X86pcmpm, X86pcmpm_su, "VPCMPQ", v2i64x_info, v8i64_info>;
3106  defm : axv512_icmp_packed_cc_rmb_no_vlx_lowering<X86pcmpum, X86pcmpum_su, "VPCMPUQ", v2i64x_info, v8i64_info>;
3107
3108  defm : axv512_cmp_packed_cc_no_vlx_lowering<"VCMPPS", v8f32x_info, v16f32_info>;
3109  defm : axv512_cmp_packed_cc_no_vlx_lowering<"VCMPPS", v4f32x_info, v16f32_info>;
3110  defm : axv512_cmp_packed_cc_no_vlx_lowering<"VCMPPD", v4f64x_info, v8f64_info>;
3111  defm : axv512_cmp_packed_cc_no_vlx_lowering<"VCMPPD", v2f64x_info, v8f64_info>;
3112}
3113
3114let Predicates = [HasBWI, NoVLX, HasEVEX512] in {
3115  defm : axv512_icmp_packed_cc_no_vlx_lowering<X86pcmpm, X86pcmpm_su, "VPCMPB", v32i8x_info, v64i8_info>;
3116  defm : axv512_icmp_packed_cc_no_vlx_lowering<X86pcmpum, X86pcmpum_su, "VPCMPUB", v32i8x_info, v64i8_info>;
3117
3118  defm : axv512_icmp_packed_cc_no_vlx_lowering<X86pcmpm, X86pcmpm_su, "VPCMPB", v16i8x_info, v64i8_info>;
3119  defm : axv512_icmp_packed_cc_no_vlx_lowering<X86pcmpum, X86pcmpum_su, "VPCMPUB", v16i8x_info, v64i8_info>;
3120
3121  defm : axv512_icmp_packed_cc_no_vlx_lowering<X86pcmpm, X86pcmpm_su, "VPCMPW", v16i16x_info, v32i16_info>;
3122  defm : axv512_icmp_packed_cc_no_vlx_lowering<X86pcmpum, X86pcmpum_su, "VPCMPUW", v16i16x_info, v32i16_info>;
3123
3124  defm : axv512_icmp_packed_cc_no_vlx_lowering<X86pcmpm, X86pcmpm_su, "VPCMPW", v8i16x_info, v32i16_info>;
3125  defm : axv512_icmp_packed_cc_no_vlx_lowering<X86pcmpum, X86pcmpum_su, "VPCMPUW", v8i16x_info, v32i16_info>;
3126}
3127
3128// Mask setting all 0s or 1s
3129multiclass avx512_mask_setop<RegisterClass KRC, ValueType VT, SDPatternOperator Val> {
3130  let Predicates = [HasAVX512] in
3131    let isReMaterializable = 1, isAsCheapAsAMove = 1, isPseudo = 1,
3132        SchedRW = [WriteZero] in
3133      def NAME# : I<0, Pseudo, (outs KRC:$dst), (ins), "",
3134                     [(set KRC:$dst, (VT Val))]>;
3135}
3136
3137multiclass avx512_mask_setop_w<SDPatternOperator Val> {
3138  defm W : avx512_mask_setop<VK16, v16i1, Val>;
3139  defm D : avx512_mask_setop<VK32,  v32i1, Val>;
3140  defm Q : avx512_mask_setop<VK64, v64i1, Val>;
3141}
3142
3143defm KSET0 : avx512_mask_setop_w<immAllZerosV>;
3144defm KSET1 : avx512_mask_setop_w<immAllOnesV>;
3145
3146// With AVX-512 only, 8-bit mask is promoted to 16-bit mask.
3147let Predicates = [HasAVX512] in {
3148  def : Pat<(v8i1 immAllZerosV), (COPY_TO_REGCLASS (KSET0W), VK8)>;
3149  def : Pat<(v4i1 immAllZerosV), (COPY_TO_REGCLASS (KSET0W), VK4)>;
3150  def : Pat<(v2i1 immAllZerosV), (COPY_TO_REGCLASS (KSET0W), VK2)>;
3151  def : Pat<(v1i1 immAllZerosV), (COPY_TO_REGCLASS (KSET0W), VK1)>;
3152  def : Pat<(v8i1 immAllOnesV),  (COPY_TO_REGCLASS (KSET1W), VK8)>;
3153  def : Pat<(v4i1 immAllOnesV),  (COPY_TO_REGCLASS (KSET1W), VK4)>;
3154  def : Pat<(v2i1 immAllOnesV),  (COPY_TO_REGCLASS (KSET1W), VK2)>;
3155  def : Pat<(v1i1 immAllOnesV),  (COPY_TO_REGCLASS (KSET1W), VK1)>;
3156}
3157
3158// Patterns for kmask insert_subvector/extract_subvector to/from index=0
3159multiclass operation_subvector_mask_lowering<RegisterClass subRC, ValueType subVT,
3160                                             RegisterClass RC, ValueType VT> {
3161  def : Pat<(subVT (extract_subvector (VT RC:$src), (iPTR 0))),
3162            (subVT (COPY_TO_REGCLASS RC:$src, subRC))>;
3163
3164  def : Pat<(VT (insert_subvector undef, subRC:$src, (iPTR 0))),
3165            (VT (COPY_TO_REGCLASS subRC:$src, RC))>;
3166}
3167defm : operation_subvector_mask_lowering<VK1,  v1i1,  VK2,  v2i1>;
3168defm : operation_subvector_mask_lowering<VK1,  v1i1,  VK4,  v4i1>;
3169defm : operation_subvector_mask_lowering<VK1,  v1i1,  VK8,  v8i1>;
3170defm : operation_subvector_mask_lowering<VK1,  v1i1,  VK16, v16i1>;
3171defm : operation_subvector_mask_lowering<VK1,  v1i1,  VK32, v32i1>;
3172defm : operation_subvector_mask_lowering<VK1,  v1i1,  VK64, v64i1>;
3173
3174defm : operation_subvector_mask_lowering<VK2,  v2i1,  VK4,  v4i1>;
3175defm : operation_subvector_mask_lowering<VK2,  v2i1,  VK8,  v8i1>;
3176defm : operation_subvector_mask_lowering<VK2,  v2i1,  VK16, v16i1>;
3177defm : operation_subvector_mask_lowering<VK2,  v2i1,  VK32, v32i1>;
3178defm : operation_subvector_mask_lowering<VK2,  v2i1,  VK64, v64i1>;
3179
3180defm : operation_subvector_mask_lowering<VK4,  v4i1,  VK8,  v8i1>;
3181defm : operation_subvector_mask_lowering<VK4,  v4i1,  VK16, v16i1>;
3182defm : operation_subvector_mask_lowering<VK4,  v4i1,  VK32, v32i1>;
3183defm : operation_subvector_mask_lowering<VK4,  v4i1,  VK64, v64i1>;
3184
3185defm : operation_subvector_mask_lowering<VK8,  v8i1,  VK16, v16i1>;
3186defm : operation_subvector_mask_lowering<VK8,  v8i1,  VK32, v32i1>;
3187defm : operation_subvector_mask_lowering<VK8,  v8i1,  VK64, v64i1>;
3188
3189defm : operation_subvector_mask_lowering<VK16, v16i1, VK32, v32i1>;
3190defm : operation_subvector_mask_lowering<VK16, v16i1, VK64, v64i1>;
3191
3192defm : operation_subvector_mask_lowering<VK32, v32i1, VK64, v64i1>;
3193
3194//===----------------------------------------------------------------------===//
3195// AVX-512 - Aligned and unaligned load and store
3196//
3197
3198multiclass avx512_load<bits<8> opc, string OpcodeStr, string Name,
3199                       X86VectorVTInfo _, PatFrag ld_frag, PatFrag mload,
3200                       X86SchedWriteMoveLS Sched, bit NoRMPattern = 0,
3201                       SDPatternOperator SelectOprr = vselect> {
3202  let hasSideEffects = 0 in {
3203  let isMoveReg = 1 in
3204  def rr : AVX512PI<opc, MRMSrcReg, (outs _.RC:$dst), (ins _.RC:$src),
3205                    !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), [],
3206                    _.ExeDomain>, EVEX, Sched<[Sched.RR]>;
3207  def rrkz : AVX512PI<opc, MRMSrcReg, (outs _.RC:$dst),
3208                      (ins _.KRCWM:$mask,  _.RC:$src),
3209                      !strconcat(OpcodeStr, "\t{$src, ${dst} {${mask}} {z}|",
3210                       "${dst} {${mask}} {z}, $src}"),
3211                       [(set _.RC:$dst, (_.VT (SelectOprr _.KRCWM:$mask,
3212                                           (_.VT _.RC:$src),
3213                                           _.ImmAllZerosV)))], _.ExeDomain>,
3214                       EVEX, EVEX_KZ, Sched<[Sched.RR]>;
3215
3216  let mayLoad = 1, canFoldAsLoad = 1, isReMaterializable = 1 in
3217  def rm : AVX512PI<opc, MRMSrcMem, (outs _.RC:$dst), (ins _.MemOp:$src),
3218                    !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
3219                    !if(NoRMPattern, [],
3220                        [(set _.RC:$dst,
3221                          (_.VT (ld_frag addr:$src)))]),
3222                    _.ExeDomain>, EVEX, Sched<[Sched.RM]>;
3223
3224  let Constraints = "$src0 = $dst", isConvertibleToThreeAddress = 1 in {
3225    def rrk : AVX512PI<opc, MRMSrcReg, (outs _.RC:$dst),
3226                      (ins _.RC:$src0, _.KRCWM:$mask, _.RC:$src1),
3227                      !strconcat(OpcodeStr, "\t{$src1, ${dst} {${mask}}|",
3228                      "${dst} {${mask}}, $src1}"),
3229                      [(set _.RC:$dst, (_.VT (SelectOprr _.KRCWM:$mask,
3230                                          (_.VT _.RC:$src1),
3231                                          (_.VT _.RC:$src0))))], _.ExeDomain>,
3232                       EVEX, EVEX_K, Sched<[Sched.RR]>;
3233    def rmk : AVX512PI<opc, MRMSrcMem, (outs _.RC:$dst),
3234                     (ins _.RC:$src0, _.KRCWM:$mask, _.MemOp:$src1),
3235                     !strconcat(OpcodeStr, "\t{$src1, ${dst} {${mask}}|",
3236                      "${dst} {${mask}}, $src1}"),
3237                     [(set _.RC:$dst, (_.VT
3238                         (vselect_mask _.KRCWM:$mask,
3239                          (_.VT (ld_frag addr:$src1)),
3240                           (_.VT _.RC:$src0))))], _.ExeDomain>,
3241                     EVEX, EVEX_K, Sched<[Sched.RM]>;
3242  }
3243  def rmkz : AVX512PI<opc, MRMSrcMem, (outs _.RC:$dst),
3244                  (ins _.KRCWM:$mask, _.MemOp:$src),
3245                  OpcodeStr #"\t{$src, ${dst} {${mask}} {z}|"#
3246                                "${dst} {${mask}} {z}, $src}",
3247                  [(set _.RC:$dst, (_.VT (vselect_mask _.KRCWM:$mask,
3248                    (_.VT (ld_frag addr:$src)), _.ImmAllZerosV)))],
3249                  _.ExeDomain>, EVEX, EVEX_KZ, Sched<[Sched.RM]>;
3250  }
3251  def : Pat<(_.VT (mload addr:$ptr, _.KRCWM:$mask, undef)),
3252            (!cast<Instruction>(Name#_.ZSuffix#rmkz) _.KRCWM:$mask, addr:$ptr)>;
3253
3254  def : Pat<(_.VT (mload addr:$ptr, _.KRCWM:$mask, _.ImmAllZerosV)),
3255            (!cast<Instruction>(Name#_.ZSuffix#rmkz) _.KRCWM:$mask, addr:$ptr)>;
3256
3257  def : Pat<(_.VT (mload addr:$ptr, _.KRCWM:$mask, (_.VT _.RC:$src0))),
3258            (!cast<Instruction>(Name#_.ZSuffix#rmk) _.RC:$src0,
3259             _.KRCWM:$mask, addr:$ptr)>;
3260}
3261
3262multiclass avx512_alignedload_vl<bits<8> opc, string OpcodeStr,
3263                                 AVX512VLVectorVTInfo _, Predicate prd,
3264                                 X86SchedWriteMoveLSWidths Sched,
3265                                 bit NoRMPattern = 0> {
3266  let Predicates = [prd] in
3267  defm Z : avx512_load<opc, OpcodeStr, NAME, _.info512,
3268                       _.info512.AlignedLdFrag, masked_load_aligned,
3269                       Sched.ZMM, NoRMPattern>, EVEX_V512;
3270
3271  let Predicates = [prd, HasVLX] in {
3272  defm Z256 : avx512_load<opc, OpcodeStr, NAME, _.info256,
3273                          _.info256.AlignedLdFrag, masked_load_aligned,
3274                          Sched.YMM, NoRMPattern>, EVEX_V256;
3275  defm Z128 : avx512_load<opc, OpcodeStr, NAME, _.info128,
3276                          _.info128.AlignedLdFrag, masked_load_aligned,
3277                          Sched.XMM, NoRMPattern>, EVEX_V128;
3278  }
3279}
3280
3281multiclass avx512_load_vl<bits<8> opc, string OpcodeStr,
3282                          AVX512VLVectorVTInfo _, Predicate prd,
3283                          X86SchedWriteMoveLSWidths Sched,
3284                          bit NoRMPattern = 0,
3285                          SDPatternOperator SelectOprr = vselect> {
3286  let Predicates = [prd] in
3287  defm Z : avx512_load<opc, OpcodeStr, NAME, _.info512, _.info512.LdFrag,
3288                       masked_load, Sched.ZMM, NoRMPattern, SelectOprr>, EVEX_V512;
3289
3290  let Predicates = [prd, HasVLX] in {
3291  defm Z256 : avx512_load<opc, OpcodeStr, NAME, _.info256, _.info256.LdFrag,
3292                         masked_load, Sched.YMM, NoRMPattern, SelectOprr>, EVEX_V256;
3293  defm Z128 : avx512_load<opc, OpcodeStr, NAME, _.info128, _.info128.LdFrag,
3294                         masked_load, Sched.XMM, NoRMPattern, SelectOprr>, EVEX_V128;
3295  }
3296}
3297
3298multiclass avx512_store<bits<8> opc, string OpcodeStr, string BaseName,
3299                        X86VectorVTInfo _, PatFrag st_frag, PatFrag mstore,
3300                        X86SchedWriteMoveLS Sched, bit NoMRPattern = 0> {
3301  let hasSideEffects = 0, isCodeGenOnly = 1, ForceDisassemble = 1 in {
3302  let isMoveReg = 1 in
3303  def rr_REV  : AVX512PI<opc, MRMDestReg, (outs _.RC:$dst), (ins _.RC:$src),
3304                         OpcodeStr # "\t{$src, $dst|$dst, $src}",
3305                         [], _.ExeDomain>, EVEX,
3306                         Sched<[Sched.RR]>;
3307  def rrk_REV : AVX512PI<opc, MRMDestReg, (outs  _.RC:$dst),
3308                         (ins _.KRCWM:$mask, _.RC:$src),
3309                         OpcodeStr # "\t{$src, ${dst} {${mask}}|"#
3310                         "${dst} {${mask}}, $src}",
3311                         [], _.ExeDomain>,  EVEX, EVEX_K,
3312                         Sched<[Sched.RR]>;
3313  def rrkz_REV : AVX512PI<opc, MRMDestReg, (outs  _.RC:$dst),
3314                          (ins _.KRCWM:$mask, _.RC:$src),
3315                          OpcodeStr # "\t{$src, ${dst} {${mask}} {z}|" #
3316                          "${dst} {${mask}} {z}, $src}",
3317                          [], _.ExeDomain>, EVEX, EVEX_KZ,
3318                          Sched<[Sched.RR]>;
3319  }
3320
3321  let hasSideEffects = 0, mayStore = 1 in
3322  def mr : AVX512PI<opc, MRMDestMem, (outs), (ins _.MemOp:$dst, _.RC:$src),
3323                    !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
3324                    !if(NoMRPattern, [],
3325                        [(st_frag (_.VT _.RC:$src), addr:$dst)]),
3326                    _.ExeDomain>, EVEX, Sched<[Sched.MR]>;
3327  def mrk : AVX512PI<opc, MRMDestMem, (outs),
3328                     (ins _.MemOp:$dst, _.KRCWM:$mask, _.RC:$src),
3329              OpcodeStr # "\t{$src, ${dst} {${mask}}|${dst} {${mask}}, $src}",
3330               [], _.ExeDomain>, EVEX, EVEX_K, Sched<[Sched.MR]>;
3331
3332  def: Pat<(mstore (_.VT _.RC:$src), addr:$ptr, _.KRCWM:$mask),
3333           (!cast<Instruction>(BaseName#_.ZSuffix#mrk) addr:$ptr,
3334                                                        _.KRCWM:$mask, _.RC:$src)>;
3335
3336  def : InstAlias<OpcodeStr#".s\t{$src, $dst|$dst, $src}",
3337                  (!cast<Instruction>(BaseName#_.ZSuffix#"rr_REV")
3338                   _.RC:$dst, _.RC:$src), 0>;
3339  def : InstAlias<OpcodeStr#".s\t{$src, ${dst} {${mask}}|${dst} {${mask}}, $src}",
3340                  (!cast<Instruction>(BaseName#_.ZSuffix#"rrk_REV")
3341                   _.RC:$dst, _.KRCWM:$mask, _.RC:$src), 0>;
3342  def : InstAlias<OpcodeStr#".s\t{$src, ${dst} {${mask}} {z}|${dst} {${mask}} {z}, $src}",
3343                  (!cast<Instruction>(BaseName#_.ZSuffix#"rrkz_REV")
3344                   _.RC:$dst, _.KRCWM:$mask, _.RC:$src), 0>;
3345}
3346
3347multiclass avx512_store_vl< bits<8> opc, string OpcodeStr,
3348                            AVX512VLVectorVTInfo _, Predicate prd,
3349                            X86SchedWriteMoveLSWidths Sched,
3350                            bit NoMRPattern = 0> {
3351  let Predicates = [prd] in
3352  defm Z : avx512_store<opc, OpcodeStr, NAME, _.info512, store,
3353                        masked_store, Sched.ZMM, NoMRPattern>, EVEX_V512;
3354  let Predicates = [prd, HasVLX] in {
3355    defm Z256 : avx512_store<opc, OpcodeStr, NAME, _.info256, store,
3356                             masked_store, Sched.YMM, NoMRPattern>, EVEX_V256;
3357    defm Z128 : avx512_store<opc, OpcodeStr, NAME, _.info128, store,
3358                             masked_store, Sched.XMM, NoMRPattern>, EVEX_V128;
3359  }
3360}
3361
3362multiclass avx512_alignedstore_vl<bits<8> opc, string OpcodeStr,
3363                                  AVX512VLVectorVTInfo _, Predicate prd,
3364                                  X86SchedWriteMoveLSWidths Sched,
3365                                  bit NoMRPattern = 0> {
3366  let Predicates = [prd] in
3367  defm Z : avx512_store<opc, OpcodeStr, NAME, _.info512, alignedstore,
3368                        masked_store_aligned, Sched.ZMM, NoMRPattern>, EVEX_V512;
3369
3370  let Predicates = [prd, HasVLX] in {
3371    defm Z256 : avx512_store<opc, OpcodeStr, NAME, _.info256, alignedstore,
3372                             masked_store_aligned, Sched.YMM, NoMRPattern>, EVEX_V256;
3373    defm Z128 : avx512_store<opc, OpcodeStr, NAME, _.info128, alignedstore,
3374                             masked_store_aligned, Sched.XMM, NoMRPattern>, EVEX_V128;
3375  }
3376}
3377
3378defm VMOVAPS : avx512_alignedload_vl<0x28, "vmovaps", avx512vl_f32_info,
3379                                     HasAVX512, SchedWriteFMoveLS>,
3380               avx512_alignedstore_vl<0x29, "vmovaps", avx512vl_f32_info,
3381                                      HasAVX512, SchedWriteFMoveLS>,
3382               TB, EVEX_CD8<32, CD8VF>;
3383
3384defm VMOVAPD : avx512_alignedload_vl<0x28, "vmovapd", avx512vl_f64_info,
3385                                     HasAVX512, SchedWriteFMoveLS>,
3386               avx512_alignedstore_vl<0x29, "vmovapd", avx512vl_f64_info,
3387                                      HasAVX512, SchedWriteFMoveLS>,
3388               TB, PD, REX_W, EVEX_CD8<64, CD8VF>;
3389
3390defm VMOVUPS : avx512_load_vl<0x10, "vmovups", avx512vl_f32_info, HasAVX512,
3391                              SchedWriteFMoveLS, 0, null_frag>,
3392               avx512_store_vl<0x11, "vmovups", avx512vl_f32_info, HasAVX512,
3393                               SchedWriteFMoveLS>,
3394                               TB, EVEX_CD8<32, CD8VF>;
3395
3396defm VMOVUPD : avx512_load_vl<0x10, "vmovupd", avx512vl_f64_info, HasAVX512,
3397                              SchedWriteFMoveLS, 0, null_frag>,
3398               avx512_store_vl<0x11, "vmovupd", avx512vl_f64_info, HasAVX512,
3399                               SchedWriteFMoveLS>,
3400               TB, PD, REX_W, EVEX_CD8<64, CD8VF>;
3401
3402defm VMOVDQA32 : avx512_alignedload_vl<0x6F, "vmovdqa32", avx512vl_i32_info,
3403                                       HasAVX512, SchedWriteVecMoveLS, 1>,
3404                 avx512_alignedstore_vl<0x7F, "vmovdqa32", avx512vl_i32_info,
3405                                        HasAVX512, SchedWriteVecMoveLS, 1>,
3406                 TB, PD, EVEX_CD8<32, CD8VF>;
3407
3408defm VMOVDQA64 : avx512_alignedload_vl<0x6F, "vmovdqa64", avx512vl_i64_info,
3409                                       HasAVX512, SchedWriteVecMoveLS>,
3410                 avx512_alignedstore_vl<0x7F, "vmovdqa64", avx512vl_i64_info,
3411                                        HasAVX512, SchedWriteVecMoveLS>,
3412                 TB, PD, REX_W, EVEX_CD8<64, CD8VF>;
3413
3414defm VMOVDQU8 : avx512_load_vl<0x6F, "vmovdqu8", avx512vl_i8_info, HasBWI,
3415                               SchedWriteVecMoveLS, 1>,
3416                avx512_store_vl<0x7F, "vmovdqu8", avx512vl_i8_info, HasBWI,
3417                                SchedWriteVecMoveLS, 1>,
3418                TB, XD, EVEX_CD8<8, CD8VF>;
3419
3420defm VMOVDQU16 : avx512_load_vl<0x6F, "vmovdqu16", avx512vl_i16_info, HasBWI,
3421                                SchedWriteVecMoveLS, 1>,
3422                 avx512_store_vl<0x7F, "vmovdqu16", avx512vl_i16_info, HasBWI,
3423                                 SchedWriteVecMoveLS, 1>,
3424                 TB, XD, REX_W, EVEX_CD8<16, CD8VF>;
3425
3426defm VMOVDQU32 : avx512_load_vl<0x6F, "vmovdqu32", avx512vl_i32_info, HasAVX512,
3427                                SchedWriteVecMoveLS, 1, null_frag>,
3428                 avx512_store_vl<0x7F, "vmovdqu32", avx512vl_i32_info, HasAVX512,
3429                                 SchedWriteVecMoveLS, 1>,
3430                 TB, XS, EVEX_CD8<32, CD8VF>;
3431
3432defm VMOVDQU64 : avx512_load_vl<0x6F, "vmovdqu64", avx512vl_i64_info, HasAVX512,
3433                                SchedWriteVecMoveLS, 0, null_frag>,
3434                 avx512_store_vl<0x7F, "vmovdqu64", avx512vl_i64_info, HasAVX512,
3435                                 SchedWriteVecMoveLS>,
3436                 TB, XS, REX_W, EVEX_CD8<64, CD8VF>;
3437
3438// Special instructions to help with spilling when we don't have VLX. We need
3439// to load or store from a ZMM register instead. These are converted in
3440// expandPostRAPseudos.
3441let isReMaterializable = 1, canFoldAsLoad = 1,
3442    isPseudo = 1, mayLoad = 1, hasSideEffects = 0 in {
3443def VMOVAPSZ128rm_NOVLX : I<0, Pseudo, (outs VR128X:$dst), (ins f128mem:$src),
3444                            "", []>, Sched<[WriteFLoadX]>;
3445def VMOVAPSZ256rm_NOVLX : I<0, Pseudo, (outs VR256X:$dst), (ins f256mem:$src),
3446                            "", []>, Sched<[WriteFLoadY]>;
3447def VMOVUPSZ128rm_NOVLX : I<0, Pseudo, (outs VR128X:$dst), (ins f128mem:$src),
3448                            "", []>, Sched<[WriteFLoadX]>;
3449def VMOVUPSZ256rm_NOVLX : I<0, Pseudo, (outs VR256X:$dst), (ins f256mem:$src),
3450                            "", []>, Sched<[WriteFLoadY]>;
3451}
3452
3453let isPseudo = 1, mayStore = 1, hasSideEffects = 0 in {
3454def VMOVAPSZ128mr_NOVLX : I<0, Pseudo, (outs), (ins f128mem:$dst, VR128X:$src),
3455                            "", []>, Sched<[WriteFStoreX]>;
3456def VMOVAPSZ256mr_NOVLX : I<0, Pseudo, (outs), (ins f256mem:$dst, VR256X:$src),
3457                            "", []>, Sched<[WriteFStoreY]>;
3458def VMOVUPSZ128mr_NOVLX : I<0, Pseudo, (outs), (ins f128mem:$dst, VR128X:$src),
3459                            "", []>, Sched<[WriteFStoreX]>;
3460def VMOVUPSZ256mr_NOVLX : I<0, Pseudo, (outs), (ins f256mem:$dst, VR256X:$src),
3461                            "", []>, Sched<[WriteFStoreY]>;
3462}
3463
3464def : Pat<(v8i64 (vselect VK8WM:$mask, (v8i64 immAllZerosV),
3465                          (v8i64 VR512:$src))),
3466   (VMOVDQA64Zrrkz (COPY_TO_REGCLASS (KNOTWrr (COPY_TO_REGCLASS VK8:$mask, VK16)),
3467                                              VK8), VR512:$src)>;
3468
3469def : Pat<(v16i32 (vselect VK16WM:$mask, (v16i32 immAllZerosV),
3470                           (v16i32 VR512:$src))),
3471                  (VMOVDQA32Zrrkz (KNOTWrr VK16WM:$mask), VR512:$src)>;
3472
3473// These patterns exist to prevent the above patterns from introducing a second
3474// mask inversion when one already exists.
3475def : Pat<(v8i64 (vselect (v8i1 (vnot VK8:$mask)),
3476                          (v8i64 immAllZerosV),
3477                          (v8i64 VR512:$src))),
3478                 (VMOVDQA64Zrrkz VK8:$mask, VR512:$src)>;
3479def : Pat<(v16i32 (vselect (v16i1 (vnot VK16:$mask)),
3480                           (v16i32 immAllZerosV),
3481                           (v16i32 VR512:$src))),
3482                  (VMOVDQA32Zrrkz VK16WM:$mask, VR512:$src)>;
3483
3484multiclass mask_move_lowering<string InstrStr, X86VectorVTInfo Narrow,
3485                              X86VectorVTInfo Wide> {
3486 def : Pat<(Narrow.VT (vselect (Narrow.KVT Narrow.KRCWM:$mask),
3487                               Narrow.RC:$src1, Narrow.RC:$src0)),
3488           (EXTRACT_SUBREG
3489            (Wide.VT
3490             (!cast<Instruction>(InstrStr#"rrk")
3491              (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src0, Narrow.SubRegIdx)),
3492              (COPY_TO_REGCLASS Narrow.KRCWM:$mask, Wide.KRCWM),
3493              (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src1, Narrow.SubRegIdx)))),
3494            Narrow.SubRegIdx)>;
3495
3496 def : Pat<(Narrow.VT (vselect (Narrow.KVT Narrow.KRCWM:$mask),
3497                               Narrow.RC:$src1, Narrow.ImmAllZerosV)),
3498           (EXTRACT_SUBREG
3499            (Wide.VT
3500             (!cast<Instruction>(InstrStr#"rrkz")
3501              (COPY_TO_REGCLASS Narrow.KRCWM:$mask, Wide.KRCWM),
3502              (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src1, Narrow.SubRegIdx)))),
3503            Narrow.SubRegIdx)>;
3504}
3505
3506// Patterns for handling v8i1 selects of 256-bit vectors when VLX isn't
3507// available. Use a 512-bit operation and extract.
3508let Predicates = [HasAVX512, NoVLX, HasEVEX512] in {
3509  defm : mask_move_lowering<"VMOVAPSZ", v4f32x_info, v16f32_info>;
3510  defm : mask_move_lowering<"VMOVDQA32Z", v4i32x_info, v16i32_info>;
3511  defm : mask_move_lowering<"VMOVAPSZ", v8f32x_info, v16f32_info>;
3512  defm : mask_move_lowering<"VMOVDQA32Z", v8i32x_info, v16i32_info>;
3513
3514  defm : mask_move_lowering<"VMOVAPDZ", v2f64x_info, v8f64_info>;
3515  defm : mask_move_lowering<"VMOVDQA64Z", v2i64x_info, v8i64_info>;
3516  defm : mask_move_lowering<"VMOVAPDZ", v4f64x_info, v8f64_info>;
3517  defm : mask_move_lowering<"VMOVDQA64Z", v4i64x_info, v8i64_info>;
3518}
3519
3520let Predicates = [HasBWI, NoVLX, HasEVEX512] in {
3521  defm : mask_move_lowering<"VMOVDQU8Z", v16i8x_info, v64i8_info>;
3522  defm : mask_move_lowering<"VMOVDQU8Z", v32i8x_info, v64i8_info>;
3523
3524  defm : mask_move_lowering<"VMOVDQU16Z", v8i16x_info, v32i16_info>;
3525  defm : mask_move_lowering<"VMOVDQU16Z", v16i16x_info, v32i16_info>;
3526
3527  defm : mask_move_lowering<"VMOVDQU16Z", v8f16x_info, v32f16_info>;
3528  defm : mask_move_lowering<"VMOVDQU16Z", v16f16x_info, v32f16_info>;
3529
3530  defm : mask_move_lowering<"VMOVDQU16Z", v8bf16x_info, v32bf16_info>;
3531  defm : mask_move_lowering<"VMOVDQU16Z", v16bf16x_info, v32bf16_info>;
3532}
3533
3534let Predicates = [HasAVX512] in {
3535  // 512-bit load.
3536  def : Pat<(alignedloadv16i32 addr:$src),
3537            (VMOVDQA64Zrm addr:$src)>;
3538  def : Pat<(alignedloadv32i16 addr:$src),
3539            (VMOVDQA64Zrm addr:$src)>;
3540  def : Pat<(alignedloadv32f16 addr:$src),
3541            (VMOVAPSZrm addr:$src)>;
3542  def : Pat<(alignedloadv32bf16 addr:$src),
3543            (VMOVAPSZrm addr:$src)>;
3544  def : Pat<(alignedloadv64i8 addr:$src),
3545            (VMOVDQA64Zrm addr:$src)>;
3546  def : Pat<(loadv16i32 addr:$src),
3547            (VMOVDQU64Zrm addr:$src)>;
3548  def : Pat<(loadv32i16 addr:$src),
3549            (VMOVDQU64Zrm addr:$src)>;
3550  def : Pat<(loadv32f16 addr:$src),
3551            (VMOVUPSZrm addr:$src)>;
3552  def : Pat<(loadv32bf16 addr:$src),
3553            (VMOVUPSZrm addr:$src)>;
3554  def : Pat<(loadv64i8 addr:$src),
3555            (VMOVDQU64Zrm addr:$src)>;
3556
3557  // 512-bit store.
3558  def : Pat<(alignedstore (v16i32 VR512:$src), addr:$dst),
3559            (VMOVDQA64Zmr addr:$dst, VR512:$src)>;
3560  def : Pat<(alignedstore (v32i16 VR512:$src), addr:$dst),
3561            (VMOVDQA64Zmr addr:$dst, VR512:$src)>;
3562  def : Pat<(alignedstore (v32f16 VR512:$src), addr:$dst),
3563            (VMOVAPSZmr addr:$dst, VR512:$src)>;
3564  def : Pat<(alignedstore (v32bf16 VR512:$src), addr:$dst),
3565            (VMOVAPSZmr addr:$dst, VR512:$src)>;
3566  def : Pat<(alignedstore (v64i8 VR512:$src), addr:$dst),
3567            (VMOVDQA64Zmr addr:$dst, VR512:$src)>;
3568  def : Pat<(store (v16i32 VR512:$src), addr:$dst),
3569            (VMOVDQU64Zmr addr:$dst, VR512:$src)>;
3570  def : Pat<(store (v32i16 VR512:$src), addr:$dst),
3571            (VMOVDQU64Zmr addr:$dst, VR512:$src)>;
3572  def : Pat<(store (v32f16 VR512:$src), addr:$dst),
3573            (VMOVUPSZmr addr:$dst, VR512:$src)>;
3574  def : Pat<(store (v32bf16 VR512:$src), addr:$dst),
3575            (VMOVUPSZmr addr:$dst, VR512:$src)>;
3576  def : Pat<(store (v64i8 VR512:$src), addr:$dst),
3577            (VMOVDQU64Zmr addr:$dst, VR512:$src)>;
3578}
3579
3580let Predicates = [HasVLX] in {
3581  // 128-bit load.
3582  def : Pat<(alignedloadv4i32 addr:$src),
3583            (VMOVDQA64Z128rm addr:$src)>;
3584  def : Pat<(alignedloadv8i16 addr:$src),
3585            (VMOVDQA64Z128rm addr:$src)>;
3586  def : Pat<(alignedloadv8f16 addr:$src),
3587            (VMOVAPSZ128rm addr:$src)>;
3588  def : Pat<(alignedloadv8bf16 addr:$src),
3589            (VMOVAPSZ128rm addr:$src)>;
3590  def : Pat<(alignedloadv16i8 addr:$src),
3591            (VMOVDQA64Z128rm addr:$src)>;
3592  def : Pat<(loadv4i32 addr:$src),
3593            (VMOVDQU64Z128rm addr:$src)>;
3594  def : Pat<(loadv8i16 addr:$src),
3595            (VMOVDQU64Z128rm addr:$src)>;
3596  def : Pat<(loadv8f16 addr:$src),
3597            (VMOVUPSZ128rm addr:$src)>;
3598  def : Pat<(loadv8bf16 addr:$src),
3599            (VMOVUPSZ128rm addr:$src)>;
3600  def : Pat<(loadv16i8 addr:$src),
3601            (VMOVDQU64Z128rm addr:$src)>;
3602
3603  // 128-bit store.
3604  def : Pat<(alignedstore (v4i32 VR128X:$src), addr:$dst),
3605            (VMOVDQA64Z128mr addr:$dst, VR128X:$src)>;
3606  def : Pat<(alignedstore (v8i16 VR128X:$src), addr:$dst),
3607            (VMOVDQA64Z128mr addr:$dst, VR128X:$src)>;
3608  def : Pat<(alignedstore (v8f16 VR128X:$src), addr:$dst),
3609            (VMOVAPSZ128mr addr:$dst, VR128X:$src)>;
3610  def : Pat<(alignedstore (v8bf16 VR128X:$src), addr:$dst),
3611            (VMOVAPSZ128mr addr:$dst, VR128X:$src)>;
3612  def : Pat<(alignedstore (v16i8 VR128X:$src), addr:$dst),
3613            (VMOVDQA64Z128mr addr:$dst, VR128X:$src)>;
3614  def : Pat<(store (v4i32 VR128X:$src), addr:$dst),
3615            (VMOVDQU64Z128mr addr:$dst, VR128X:$src)>;
3616  def : Pat<(store (v8i16 VR128X:$src), addr:$dst),
3617            (VMOVDQU64Z128mr addr:$dst, VR128X:$src)>;
3618  def : Pat<(store (v8f16 VR128X:$src), addr:$dst),
3619            (VMOVUPSZ128mr addr:$dst, VR128X:$src)>;
3620  def : Pat<(store (v8bf16 VR128X:$src), addr:$dst),
3621            (VMOVUPSZ128mr addr:$dst, VR128X:$src)>;
3622  def : Pat<(store (v16i8 VR128X:$src), addr:$dst),
3623            (VMOVDQU64Z128mr addr:$dst, VR128X:$src)>;
3624
3625  // 256-bit load.
3626  def : Pat<(alignedloadv8i32 addr:$src),
3627            (VMOVDQA64Z256rm addr:$src)>;
3628  def : Pat<(alignedloadv16i16 addr:$src),
3629            (VMOVDQA64Z256rm addr:$src)>;
3630  def : Pat<(alignedloadv16f16 addr:$src),
3631            (VMOVAPSZ256rm addr:$src)>;
3632  def : Pat<(alignedloadv16bf16 addr:$src),
3633            (VMOVAPSZ256rm addr:$src)>;
3634  def : Pat<(alignedloadv32i8 addr:$src),
3635            (VMOVDQA64Z256rm addr:$src)>;
3636  def : Pat<(loadv8i32 addr:$src),
3637            (VMOVDQU64Z256rm addr:$src)>;
3638  def : Pat<(loadv16i16 addr:$src),
3639            (VMOVDQU64Z256rm addr:$src)>;
3640  def : Pat<(loadv16f16 addr:$src),
3641            (VMOVUPSZ256rm addr:$src)>;
3642  def : Pat<(loadv16bf16 addr:$src),
3643            (VMOVUPSZ256rm addr:$src)>;
3644  def : Pat<(loadv32i8 addr:$src),
3645            (VMOVDQU64Z256rm addr:$src)>;
3646
3647  // 256-bit store.
3648  def : Pat<(alignedstore (v8i32 VR256X:$src), addr:$dst),
3649            (VMOVDQA64Z256mr addr:$dst, VR256X:$src)>;
3650  def : Pat<(alignedstore (v16i16 VR256X:$src), addr:$dst),
3651            (VMOVDQA64Z256mr addr:$dst, VR256X:$src)>;
3652  def : Pat<(alignedstore (v16f16 VR256X:$src), addr:$dst),
3653            (VMOVAPSZ256mr addr:$dst, VR256X:$src)>;
3654  def : Pat<(alignedstore (v16bf16 VR256X:$src), addr:$dst),
3655            (VMOVAPSZ256mr addr:$dst, VR256X:$src)>;
3656  def : Pat<(alignedstore (v32i8 VR256X:$src), addr:$dst),
3657            (VMOVDQA64Z256mr addr:$dst, VR256X:$src)>;
3658  def : Pat<(store (v8i32 VR256X:$src), addr:$dst),
3659            (VMOVDQU64Z256mr addr:$dst, VR256X:$src)>;
3660  def : Pat<(store (v16i16 VR256X:$src), addr:$dst),
3661            (VMOVDQU64Z256mr addr:$dst, VR256X:$src)>;
3662  def : Pat<(store (v16f16 VR256X:$src), addr:$dst),
3663            (VMOVUPSZ256mr addr:$dst, VR256X:$src)>;
3664  def : Pat<(store (v16bf16 VR256X:$src), addr:$dst),
3665            (VMOVUPSZ256mr addr:$dst, VR256X:$src)>;
3666  def : Pat<(store (v32i8 VR256X:$src), addr:$dst),
3667            (VMOVDQU64Z256mr addr:$dst, VR256X:$src)>;
3668}
3669
3670multiclass mask_move_lowering_f16_bf16<AVX512VLVectorVTInfo _> {
3671let Predicates = [HasBWI] in {
3672  def : Pat<(_.info512.VT (vselect VK32WM:$mask, (_.info512.VT VR512:$src1), (_.info512.VT VR512:$src0))),
3673            (VMOVDQU16Zrrk VR512:$src0, VK32WM:$mask, VR512:$src1)>;
3674  def : Pat<(_.info512.VT (vselect VK32WM:$mask, (_.info512.VT VR512:$src1), _.info512.ImmAllZerosV)),
3675            (VMOVDQU16Zrrkz VK32WM:$mask, VR512:$src1)>;
3676  def : Pat<(_.info512.VT (vselect VK32WM:$mask,
3677                     (_.info512.VT (_.info512.AlignedLdFrag addr:$src)), (_.info512.VT VR512:$src0))),
3678            (VMOVDQU16Zrmk VR512:$src0, VK32WM:$mask, addr:$src)>;
3679  def : Pat<(_.info512.VT (vselect VK32WM:$mask,
3680                     (_.info512.VT (_.info512.AlignedLdFrag addr:$src)), _.info512.ImmAllZerosV)),
3681            (VMOVDQU16Zrmkz VK32WM:$mask, addr:$src)>;
3682  def : Pat<(_.info512.VT (vselect VK32WM:$mask,
3683                     (_.info512.VT (_.info512.LdFrag addr:$src)), (_.info512.VT VR512:$src0))),
3684            (VMOVDQU16Zrmk VR512:$src0, VK32WM:$mask, addr:$src)>;
3685  def : Pat<(_.info512.VT (vselect VK32WM:$mask,
3686                     (_.info512.VT (_.info512.LdFrag addr:$src)), _.info512.ImmAllZerosV)),
3687            (VMOVDQU16Zrmkz VK32WM:$mask, addr:$src)>;
3688  def : Pat<(_.info512.VT (masked_load addr:$src, VK32WM:$mask, (_.info512.VT VR512:$src0))),
3689            (VMOVDQU16Zrmk VR512:$src0, VK32WM:$mask, addr:$src)>;
3690  def : Pat<(_.info512.VT (masked_load addr:$src, VK32WM:$mask, undef)),
3691            (VMOVDQU16Zrmkz VK32WM:$mask, addr:$src)>;
3692  def : Pat<(_.info512.VT (masked_load addr:$src, VK32WM:$mask, _.info512.ImmAllZerosV)),
3693            (VMOVDQU16Zrmkz VK32WM:$mask, addr:$src)>;
3694
3695  def : Pat<(masked_store (_.info512.VT VR512:$src), addr:$dst, VK32WM:$mask),
3696            (VMOVDQU16Zmrk addr:$dst, VK32WM:$mask, VR512:$src)>;
3697}
3698let Predicates = [HasBWI, HasVLX] in {
3699  def : Pat<(_.info256.VT (vselect VK16WM:$mask, (_.info256.VT VR256X:$src1), (_.info256.VT VR256X:$src0))),
3700            (VMOVDQU16Z256rrk VR256X:$src0, VK16WM:$mask, VR256X:$src1)>;
3701  def : Pat<(_.info256.VT (vselect VK16WM:$mask, (_.info256.VT VR256X:$src1), _.info256.ImmAllZerosV)),
3702            (VMOVDQU16Z256rrkz VK16WM:$mask, VR256X:$src1)>;
3703  def : Pat<(_.info256.VT (vselect VK16WM:$mask,
3704                     (_.info256.VT (_.info256.AlignedLdFrag addr:$src)), (_.info256.VT VR256X:$src0))),
3705            (VMOVDQU16Z256rmk VR256X:$src0, VK16WM:$mask, addr:$src)>;
3706  def : Pat<(_.info256.VT (vselect VK16WM:$mask,
3707                     (_.info256.VT (_.info256.AlignedLdFrag addr:$src)), _.info256.ImmAllZerosV)),
3708            (VMOVDQU16Z256rmkz VK16WM:$mask, addr:$src)>;
3709  def : Pat<(_.info256.VT (vselect VK16WM:$mask,
3710                     (_.info256.VT (_.info256.LdFrag addr:$src)), (_.info256.VT VR256X:$src0))),
3711            (VMOVDQU16Z256rmk VR256X:$src0, VK16WM:$mask, addr:$src)>;
3712  def : Pat<(_.info256.VT (vselect VK16WM:$mask,
3713                     (_.info256.VT (_.info256.LdFrag addr:$src)), _.info256.ImmAllZerosV)),
3714            (VMOVDQU16Z256rmkz VK16WM:$mask, addr:$src)>;
3715  def : Pat<(_.info256.VT (masked_load addr:$src, VK16WM:$mask, (_.info256.VT VR256X:$src0))),
3716            (VMOVDQU16Z256rmk VR256X:$src0, VK16WM:$mask, addr:$src)>;
3717  def : Pat<(_.info256.VT (masked_load addr:$src, VK16WM:$mask, undef)),
3718            (VMOVDQU16Z256rmkz VK16WM:$mask, addr:$src)>;
3719  def : Pat<(_.info256.VT (masked_load addr:$src, VK16WM:$mask, _.info256.ImmAllZerosV)),
3720            (VMOVDQU16Z256rmkz VK16WM:$mask, addr:$src)>;
3721
3722  def : Pat<(masked_store (_.info256.VT VR256X:$src), addr:$dst, VK16WM:$mask),
3723            (VMOVDQU16Z256mrk addr:$dst, VK16WM:$mask, VR256X:$src)>;
3724
3725  def : Pat<(_.info128.VT (vselect VK8WM:$mask, (_.info128.VT VR128X:$src1), (_.info128.VT VR128X:$src0))),
3726            (VMOVDQU16Z128rrk VR128X:$src0, VK8WM:$mask, VR128X:$src1)>;
3727  def : Pat<(_.info128.VT (vselect VK8WM:$mask, (_.info128.VT VR128X:$src1), _.info128.ImmAllZerosV)),
3728            (VMOVDQU16Z128rrkz VK8WM:$mask, VR128X:$src1)>;
3729  def : Pat<(_.info128.VT (vselect VK8WM:$mask,
3730                     (_.info128.VT (_.info128.AlignedLdFrag addr:$src)), (_.info128.VT VR128X:$src0))),
3731            (VMOVDQU16Z128rmk VR128X:$src0, VK8WM:$mask, addr:$src)>;
3732  def : Pat<(_.info128.VT (vselect VK8WM:$mask,
3733                     (_.info128.VT (_.info128.AlignedLdFrag addr:$src)), _.info128.ImmAllZerosV)),
3734            (VMOVDQU16Z128rmkz VK8WM:$mask, addr:$src)>;
3735  def : Pat<(_.info128.VT (vselect VK8WM:$mask,
3736                     (_.info128.VT (_.info128.LdFrag addr:$src)), (_.info128.VT VR128X:$src0))),
3737            (VMOVDQU16Z128rmk VR128X:$src0, VK8WM:$mask, addr:$src)>;
3738  def : Pat<(_.info128.VT (vselect VK8WM:$mask,
3739                     (_.info128.VT (_.info128.LdFrag addr:$src)), _.info128.ImmAllZerosV)),
3740            (VMOVDQU16Z128rmkz VK8WM:$mask, addr:$src)>;
3741  def : Pat<(_.info128.VT (masked_load addr:$src, VK8WM:$mask, (_.info128.VT VR128X:$src0))),
3742            (VMOVDQU16Z128rmk VR128X:$src0, VK8WM:$mask, addr:$src)>;
3743  def : Pat<(_.info128.VT (masked_load addr:$src, VK8WM:$mask, undef)),
3744            (VMOVDQU16Z128rmkz VK8WM:$mask, addr:$src)>;
3745  def : Pat<(_.info128.VT (masked_load addr:$src, VK8WM:$mask, _.info128.ImmAllZerosV)),
3746            (VMOVDQU16Z128rmkz VK8WM:$mask, addr:$src)>;
3747
3748  def : Pat<(masked_store (_.info128.VT VR128X:$src), addr:$dst, VK8WM:$mask),
3749            (VMOVDQU16Z128mrk addr:$dst, VK8WM:$mask, VR128X:$src)>;
3750}
3751}
3752
3753defm : mask_move_lowering_f16_bf16<avx512vl_f16_info>;
3754defm : mask_move_lowering_f16_bf16<avx512vl_bf16_info>;
3755
3756// Move Int Doubleword to Packed Double Int
3757//
3758let ExeDomain = SSEPackedInt in {
3759def VMOVDI2PDIZrr : AVX512BI<0x6E, MRMSrcReg, (outs VR128X:$dst), (ins GR32:$src),
3760                      "vmovd\t{$src, $dst|$dst, $src}",
3761                      [(set VR128X:$dst,
3762                        (v4i32 (scalar_to_vector GR32:$src)))]>,
3763                        EVEX, Sched<[WriteVecMoveFromGpr]>;
3764def VMOVDI2PDIZrm : AVX512BI<0x6E, MRMSrcMem, (outs VR128X:$dst), (ins i32mem:$src),
3765                      "vmovd\t{$src, $dst|$dst, $src}",
3766                      [(set VR128X:$dst,
3767                        (v4i32 (scalar_to_vector (loadi32 addr:$src))))]>,
3768                      EVEX, EVEX_CD8<32, CD8VT1>, Sched<[WriteVecLoad]>;
3769def VMOV64toPQIZrr : AVX512BI<0x6E, MRMSrcReg, (outs VR128X:$dst), (ins GR64:$src),
3770                      "vmovq\t{$src, $dst|$dst, $src}",
3771                        [(set VR128X:$dst,
3772                          (v2i64 (scalar_to_vector GR64:$src)))]>,
3773                      EVEX, REX_W, Sched<[WriteVecMoveFromGpr]>;
3774let isCodeGenOnly = 1, ForceDisassemble = 1, hasSideEffects = 0, mayLoad = 1 in
3775def VMOV64toPQIZrm : AVX512BI<0x6E, MRMSrcMem, (outs VR128X:$dst),
3776                      (ins i64mem:$src),
3777                      "vmovq\t{$src, $dst|$dst, $src}", []>,
3778                      EVEX, REX_W, EVEX_CD8<64, CD8VT1>, Sched<[WriteVecLoad]>;
3779let isCodeGenOnly = 1 in {
3780def VMOV64toSDZrr : AVX512BI<0x6E, MRMSrcReg, (outs FR64X:$dst), (ins GR64:$src),
3781                       "vmovq\t{$src, $dst|$dst, $src}",
3782                       [(set FR64X:$dst, (bitconvert GR64:$src))]>,
3783                       EVEX, REX_W, Sched<[WriteVecMoveFromGpr]>;
3784def VMOVSDto64Zrr : AVX512BI<0x7E, MRMDestReg, (outs GR64:$dst), (ins FR64X:$src),
3785                         "vmovq\t{$src, $dst|$dst, $src}",
3786                         [(set GR64:$dst, (bitconvert FR64X:$src))]>,
3787                         EVEX, REX_W, Sched<[WriteVecMoveFromGpr]>;
3788}
3789} // ExeDomain = SSEPackedInt
3790
3791// Move Int Doubleword to Single Scalar
3792//
3793let ExeDomain = SSEPackedInt, isCodeGenOnly = 1 in {
3794def VMOVDI2SSZrr  : AVX512BI<0x6E, MRMSrcReg, (outs FR32X:$dst), (ins GR32:$src),
3795                      "vmovd\t{$src, $dst|$dst, $src}",
3796                      [(set FR32X:$dst, (bitconvert GR32:$src))]>,
3797                      EVEX, Sched<[WriteVecMoveFromGpr]>;
3798} // ExeDomain = SSEPackedInt, isCodeGenOnly = 1
3799
3800// Move doubleword from xmm register to r/m32
3801//
3802let ExeDomain = SSEPackedInt in {
3803def VMOVPDI2DIZrr  : AVX512BI<0x7E, MRMDestReg, (outs GR32:$dst), (ins VR128X:$src),
3804                       "vmovd\t{$src, $dst|$dst, $src}",
3805                       [(set GR32:$dst, (extractelt (v4i32 VR128X:$src),
3806                                        (iPTR 0)))]>,
3807                       EVEX, Sched<[WriteVecMoveToGpr]>;
3808def VMOVPDI2DIZmr  : AVX512BI<0x7E, MRMDestMem, (outs),
3809                       (ins i32mem:$dst, VR128X:$src),
3810                       "vmovd\t{$src, $dst|$dst, $src}",
3811                       [(store (i32 (extractelt (v4i32 VR128X:$src),
3812                                     (iPTR 0))), addr:$dst)]>,
3813                       EVEX, EVEX_CD8<32, CD8VT1>, Sched<[WriteVecStore]>;
3814} // ExeDomain = SSEPackedInt
3815
3816// Move quadword from xmm1 register to r/m64
3817//
3818let ExeDomain = SSEPackedInt in {
3819def VMOVPQIto64Zrr : I<0x7E, MRMDestReg, (outs GR64:$dst), (ins VR128X:$src),
3820                      "vmovq\t{$src, $dst|$dst, $src}",
3821                      [(set GR64:$dst, (extractelt (v2i64 VR128X:$src),
3822                                                   (iPTR 0)))]>,
3823                      TB, PD, EVEX, REX_W, Sched<[WriteVecMoveToGpr]>,
3824                      Requires<[HasAVX512]>;
3825
3826let isCodeGenOnly = 1, ForceDisassemble = 1, hasSideEffects = 0, mayStore = 1 in
3827def VMOVPQIto64Zmr : I<0x7E, MRMDestMem, (outs), (ins i64mem:$dst, VR128X:$src),
3828                      "vmovq\t{$src, $dst|$dst, $src}", []>, TB, PD,
3829                      EVEX, REX_W, EVEX_CD8<64, CD8VT1>, Sched<[WriteVecStore]>,
3830                      Requires<[HasAVX512, In64BitMode]>;
3831
3832def VMOVPQI2QIZmr : I<0xD6, MRMDestMem, (outs),
3833                      (ins i64mem:$dst, VR128X:$src),
3834                      "vmovq\t{$src, $dst|$dst, $src}",
3835                      [(store (extractelt (v2i64 VR128X:$src), (iPTR 0)),
3836                              addr:$dst)]>,
3837                      EVEX, TB, PD, REX_W, EVEX_CD8<64, CD8VT1>,
3838                      Sched<[WriteVecStore]>, Requires<[HasAVX512]>;
3839
3840let hasSideEffects = 0, isCodeGenOnly = 1, ForceDisassemble = 1 in
3841def VMOVPQI2QIZrr : AVX512BI<0xD6, MRMDestReg, (outs VR128X:$dst),
3842                             (ins VR128X:$src),
3843                             "vmovq\t{$src, $dst|$dst, $src}", []>,
3844                             EVEX, REX_W, Sched<[SchedWriteVecLogic.XMM]>;
3845} // ExeDomain = SSEPackedInt
3846
3847def : InstAlias<"vmovq.s\t{$src, $dst|$dst, $src}",
3848                (VMOVPQI2QIZrr VR128X:$dst, VR128X:$src), 0>;
3849
3850let Predicates = [HasAVX512] in {
3851  def : Pat<(X86vextractstore64 (v2i64 VR128X:$src), addr:$dst),
3852            (VMOVPQI2QIZmr addr:$dst, VR128X:$src)>;
3853}
3854
3855// Move Scalar Single to Double Int
3856//
3857let ExeDomain = SSEPackedInt, isCodeGenOnly = 1 in {
3858def VMOVSS2DIZrr  : AVX512BI<0x7E, MRMDestReg, (outs GR32:$dst),
3859                      (ins FR32X:$src),
3860                      "vmovd\t{$src, $dst|$dst, $src}",
3861                      [(set GR32:$dst, (bitconvert FR32X:$src))]>,
3862                      EVEX, Sched<[WriteVecMoveToGpr]>;
3863} // ExeDomain = SSEPackedInt, isCodeGenOnly = 1
3864
3865// Move Quadword Int to Packed Quadword Int
3866//
3867let ExeDomain = SSEPackedInt in {
3868def VMOVQI2PQIZrm : AVX512XSI<0x7E, MRMSrcMem, (outs VR128X:$dst),
3869                      (ins i64mem:$src),
3870                      "vmovq\t{$src, $dst|$dst, $src}",
3871                      [(set VR128X:$dst,
3872                        (v2i64 (scalar_to_vector (loadi64 addr:$src))))]>,
3873                      EVEX, REX_W, EVEX_CD8<8, CD8VT8>, Sched<[WriteVecLoad]>;
3874} // ExeDomain = SSEPackedInt
3875
3876// Allow "vmovd" but print "vmovq".
3877def : InstAlias<"vmovd\t{$src, $dst|$dst, $src}",
3878                (VMOV64toPQIZrr VR128X:$dst, GR64:$src), 0>;
3879def : InstAlias<"vmovd\t{$src, $dst|$dst, $src}",
3880                (VMOVPQIto64Zrr GR64:$dst, VR128X:$src), 0>;
3881
3882// Conversions between masks and scalar fp.
3883def : Pat<(v32i1 (bitconvert FR32X:$src)),
3884          (KMOVDkr (VMOVSS2DIZrr FR32X:$src))>;
3885def : Pat<(f32 (bitconvert VK32:$src)),
3886          (VMOVDI2SSZrr (KMOVDrk VK32:$src))>;
3887
3888def : Pat<(v64i1 (bitconvert FR64X:$src)),
3889          (KMOVQkr (VMOVSDto64Zrr FR64X:$src))>;
3890def : Pat<(f64 (bitconvert VK64:$src)),
3891          (VMOV64toSDZrr (KMOVQrk VK64:$src))>;
3892
3893//===----------------------------------------------------------------------===//
3894// AVX-512  MOVSH, MOVSS, MOVSD
3895//===----------------------------------------------------------------------===//
3896
3897multiclass avx512_move_scalar<string asm, SDNode OpNode, PatFrag vzload_frag,
3898                              X86VectorVTInfo _, Predicate prd = HasAVX512> {
3899  let Predicates = !if (!eq (prd, HasFP16), [HasFP16], [prd, OptForSize]) in
3900  def rr : AVX512PI<0x10, MRMSrcReg, (outs _.RC:$dst),
3901             (ins _.RC:$src1, _.RC:$src2),
3902             !strconcat(asm, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
3903             [(set _.RC:$dst, (_.VT (OpNode _.RC:$src1, _.RC:$src2)))],
3904             _.ExeDomain>, EVEX, VVVV, Sched<[SchedWriteFShuffle.XMM]>;
3905  let Predicates = [prd] in {
3906  def rrkz : AVX512PI<0x10, MRMSrcReg, (outs _.RC:$dst),
3907              (ins _.KRCWM:$mask, _.RC:$src1, _.RC:$src2),
3908              !strconcat(asm, "\t{$src2, $src1, $dst {${mask}} {z}|",
3909              "$dst {${mask}} {z}, $src1, $src2}"),
3910              [(set _.RC:$dst, (_.VT (X86selects _.KRCWM:$mask,
3911                                      (_.VT (OpNode _.RC:$src1, _.RC:$src2)),
3912                                      _.ImmAllZerosV)))],
3913              _.ExeDomain>, EVEX, VVVV, EVEX_KZ, Sched<[SchedWriteFShuffle.XMM]>;
3914  let Constraints = "$src0 = $dst"  in
3915  def rrk : AVX512PI<0x10, MRMSrcReg, (outs _.RC:$dst),
3916             (ins _.RC:$src0, _.KRCWM:$mask, _.RC:$src1, _.RC:$src2),
3917             !strconcat(asm, "\t{$src2, $src1, $dst {${mask}}|",
3918             "$dst {${mask}}, $src1, $src2}"),
3919             [(set _.RC:$dst, (_.VT (X86selects _.KRCWM:$mask,
3920                                     (_.VT (OpNode _.RC:$src1, _.RC:$src2)),
3921                                     (_.VT _.RC:$src0))))],
3922             _.ExeDomain>, EVEX, VVVV, EVEX_K, Sched<[SchedWriteFShuffle.XMM]>;
3923  let canFoldAsLoad = 1, isReMaterializable = 1 in {
3924  def rm : AVX512PI<0x10, MRMSrcMem, (outs _.RC:$dst), (ins _.ScalarMemOp:$src),
3925             !strconcat(asm, "\t{$src, $dst|$dst, $src}"),
3926             [(set _.RC:$dst, (_.VT (vzload_frag addr:$src)))],
3927             _.ExeDomain>, EVEX, Sched<[WriteFLoad]>;
3928  // _alt version uses FR32/FR64 register class.
3929  let isCodeGenOnly = 1 in
3930  def rm_alt : AVX512PI<0x10, MRMSrcMem, (outs _.FRC:$dst), (ins _.ScalarMemOp:$src),
3931                 !strconcat(asm, "\t{$src, $dst|$dst, $src}"),
3932                 [(set _.FRC:$dst, (_.ScalarLdFrag addr:$src))],
3933                 _.ExeDomain>, EVEX, Sched<[WriteFLoad]>;
3934  }
3935  let mayLoad = 1, hasSideEffects = 0 in {
3936    let Constraints = "$src0 = $dst" in
3937    def rmk : AVX512PI<0x10, MRMSrcMem, (outs _.RC:$dst),
3938               (ins _.RC:$src0, _.KRCWM:$mask, _.ScalarMemOp:$src),
3939               !strconcat(asm, "\t{$src, $dst {${mask}}|",
3940               "$dst {${mask}}, $src}"),
3941               [], _.ExeDomain>, EVEX, EVEX_K, Sched<[WriteFLoad]>;
3942    def rmkz : AVX512PI<0x10, MRMSrcMem, (outs _.RC:$dst),
3943               (ins _.KRCWM:$mask, _.ScalarMemOp:$src),
3944               !strconcat(asm, "\t{$src, $dst {${mask}} {z}|",
3945               "$dst {${mask}} {z}, $src}"),
3946               [], _.ExeDomain>, EVEX, EVEX_KZ, Sched<[WriteFLoad]>;
3947  }
3948  def mr: AVX512PI<0x11, MRMDestMem, (outs), (ins _.ScalarMemOp:$dst, _.FRC:$src),
3949             !strconcat(asm, "\t{$src, $dst|$dst, $src}"),
3950             [(store _.FRC:$src, addr:$dst)],  _.ExeDomain>,
3951             EVEX, Sched<[WriteFStore]>;
3952  let mayStore = 1, hasSideEffects = 0 in
3953  def mrk: AVX512PI<0x11, MRMDestMem, (outs),
3954              (ins _.ScalarMemOp:$dst, VK1WM:$mask, _.RC:$src),
3955              !strconcat(asm, "\t{$src, $dst {${mask}}|$dst {${mask}}, $src}"),
3956              [], _.ExeDomain>, EVEX, EVEX_K, Sched<[WriteFStore]>;
3957  }
3958}
3959
3960defm VMOVSSZ : avx512_move_scalar<"vmovss", X86Movss, X86vzload32, f32x_info>,
3961                                  VEX_LIG, TB, XS, EVEX_CD8<32, CD8VT1>;
3962
3963defm VMOVSDZ : avx512_move_scalar<"vmovsd", X86Movsd, X86vzload64, f64x_info>,
3964                                  VEX_LIG, TB, XD, REX_W, EVEX_CD8<64, CD8VT1>;
3965
3966defm VMOVSHZ : avx512_move_scalar<"vmovsh", X86Movsh, X86vzload16, f16x_info,
3967                                  HasFP16>,
3968                                  VEX_LIG, T_MAP5, XS, EVEX_CD8<16, CD8VT1>;
3969
3970multiclass avx512_move_scalar_lowering<string InstrStr, SDNode OpNode,
3971                                       PatLeaf ZeroFP, X86VectorVTInfo _> {
3972
3973def : Pat<(_.VT (OpNode _.RC:$src0,
3974                        (_.VT (scalar_to_vector
3975                                  (_.EltVT (X86selects VK1WM:$mask,
3976                                                       (_.EltVT _.FRC:$src1),
3977                                                       (_.EltVT _.FRC:$src2))))))),
3978          (!cast<Instruction>(InstrStr#rrk)
3979                        (_.VT (COPY_TO_REGCLASS _.FRC:$src2, _.RC)),
3980                        VK1WM:$mask,
3981                        (_.VT _.RC:$src0),
3982                        (_.VT (COPY_TO_REGCLASS _.FRC:$src1, _.RC)))>;
3983
3984def : Pat<(_.VT (OpNode _.RC:$src0,
3985                        (_.VT (scalar_to_vector
3986                                  (_.EltVT (X86selects VK1WM:$mask,
3987                                                       (_.EltVT _.FRC:$src1),
3988                                                       (_.EltVT ZeroFP))))))),
3989          (!cast<Instruction>(InstrStr#rrkz)
3990                        VK1WM:$mask,
3991                        (_.VT _.RC:$src0),
3992                        (_.VT (COPY_TO_REGCLASS _.FRC:$src1, _.RC)))>;
3993}
3994
3995multiclass avx512_store_scalar_lowering<string InstrStr, AVX512VLVectorVTInfo _,
3996                                        dag Mask, RegisterClass MaskRC> {
3997
3998def : Pat<(masked_store
3999             (_.info512.VT (insert_subvector undef,
4000                               (_.info128.VT _.info128.RC:$src),
4001                               (iPTR 0))), addr:$dst, Mask),
4002          (!cast<Instruction>(InstrStr#mrk) addr:$dst,
4003                      (COPY_TO_REGCLASS MaskRC:$mask, VK1WM),
4004                      _.info128.RC:$src)>;
4005
4006}
4007
4008multiclass avx512_store_scalar_lowering_subreg<string InstrStr,
4009                                               AVX512VLVectorVTInfo _,
4010                                               dag Mask, RegisterClass MaskRC,
4011                                               SubRegIndex subreg> {
4012
4013def : Pat<(masked_store
4014             (_.info512.VT (insert_subvector undef,
4015                               (_.info128.VT _.info128.RC:$src),
4016                               (iPTR 0))), addr:$dst, Mask),
4017          (!cast<Instruction>(InstrStr#mrk) addr:$dst,
4018                      (COPY_TO_REGCLASS (i32 (INSERT_SUBREG (IMPLICIT_DEF), MaskRC:$mask, subreg)), VK1WM),
4019                      _.info128.RC:$src)>;
4020
4021}
4022
4023// This matches the more recent codegen from clang that avoids emitting a 512
4024// bit masked store directly. Codegen will widen 128-bit masked store to 512
4025// bits on AVX512F only targets.
4026multiclass avx512_store_scalar_lowering_subreg2<string InstrStr,
4027                                               AVX512VLVectorVTInfo _,
4028                                               dag Mask512, dag Mask128,
4029                                               RegisterClass MaskRC,
4030                                               SubRegIndex subreg> {
4031
4032// AVX512F pattern.
4033def : Pat<(masked_store
4034             (_.info512.VT (insert_subvector undef,
4035                               (_.info128.VT _.info128.RC:$src),
4036                               (iPTR 0))), addr:$dst, Mask512),
4037          (!cast<Instruction>(InstrStr#mrk) addr:$dst,
4038                      (COPY_TO_REGCLASS (i32 (INSERT_SUBREG (IMPLICIT_DEF), MaskRC:$mask, subreg)), VK1WM),
4039                      _.info128.RC:$src)>;
4040
4041// AVX512VL pattern.
4042def : Pat<(masked_store (_.info128.VT _.info128.RC:$src), addr:$dst, Mask128),
4043          (!cast<Instruction>(InstrStr#mrk) addr:$dst,
4044                      (COPY_TO_REGCLASS (i32 (INSERT_SUBREG (IMPLICIT_DEF), MaskRC:$mask, subreg)), VK1WM),
4045                      _.info128.RC:$src)>;
4046}
4047
4048multiclass avx512_load_scalar_lowering<string InstrStr, AVX512VLVectorVTInfo _,
4049                                       dag Mask, RegisterClass MaskRC> {
4050
4051def : Pat<(_.info128.VT (extract_subvector
4052                         (_.info512.VT (masked_load addr:$srcAddr, Mask,
4053                                        _.info512.ImmAllZerosV)),
4054                           (iPTR 0))),
4055          (!cast<Instruction>(InstrStr#rmkz)
4056                      (COPY_TO_REGCLASS MaskRC:$mask, VK1WM),
4057                      addr:$srcAddr)>;
4058
4059def : Pat<(_.info128.VT (extract_subvector
4060                (_.info512.VT (masked_load addr:$srcAddr, Mask,
4061                      (_.info512.VT (insert_subvector undef,
4062                            (_.info128.VT (X86vzmovl _.info128.RC:$src)),
4063                            (iPTR 0))))),
4064                (iPTR 0))),
4065          (!cast<Instruction>(InstrStr#rmk) _.info128.RC:$src,
4066                      (COPY_TO_REGCLASS MaskRC:$mask, VK1WM),
4067                      addr:$srcAddr)>;
4068
4069}
4070
4071multiclass avx512_load_scalar_lowering_subreg<string InstrStr,
4072                                              AVX512VLVectorVTInfo _,
4073                                              dag Mask, RegisterClass MaskRC,
4074                                              SubRegIndex subreg> {
4075
4076def : Pat<(_.info128.VT (extract_subvector
4077                         (_.info512.VT (masked_load addr:$srcAddr, Mask,
4078                                        _.info512.ImmAllZerosV)),
4079                           (iPTR 0))),
4080          (!cast<Instruction>(InstrStr#rmkz)
4081                      (COPY_TO_REGCLASS (i32 (INSERT_SUBREG (IMPLICIT_DEF), MaskRC:$mask, subreg)), VK1WM),
4082                      addr:$srcAddr)>;
4083
4084def : Pat<(_.info128.VT (extract_subvector
4085                (_.info512.VT (masked_load addr:$srcAddr, Mask,
4086                      (_.info512.VT (insert_subvector undef,
4087                            (_.info128.VT (X86vzmovl _.info128.RC:$src)),
4088                            (iPTR 0))))),
4089                (iPTR 0))),
4090          (!cast<Instruction>(InstrStr#rmk) _.info128.RC:$src,
4091                      (COPY_TO_REGCLASS (i32 (INSERT_SUBREG (IMPLICIT_DEF), MaskRC:$mask, subreg)), VK1WM),
4092                      addr:$srcAddr)>;
4093
4094}
4095
4096// This matches the more recent codegen from clang that avoids emitting a 512
4097// bit masked load directly. Codegen will widen 128-bit masked load to 512
4098// bits on AVX512F only targets.
4099multiclass avx512_load_scalar_lowering_subreg2<string InstrStr,
4100                                              AVX512VLVectorVTInfo _,
4101                                              dag Mask512, dag Mask128,
4102                                              RegisterClass MaskRC,
4103                                              SubRegIndex subreg> {
4104// AVX512F patterns.
4105def : Pat<(_.info128.VT (extract_subvector
4106                         (_.info512.VT (masked_load addr:$srcAddr, Mask512,
4107                                        _.info512.ImmAllZerosV)),
4108                           (iPTR 0))),
4109          (!cast<Instruction>(InstrStr#rmkz)
4110                      (COPY_TO_REGCLASS (i32 (INSERT_SUBREG (IMPLICIT_DEF), MaskRC:$mask, subreg)), VK1WM),
4111                      addr:$srcAddr)>;
4112
4113def : Pat<(_.info128.VT (extract_subvector
4114                (_.info512.VT (masked_load addr:$srcAddr, Mask512,
4115                      (_.info512.VT (insert_subvector undef,
4116                            (_.info128.VT (X86vzmovl _.info128.RC:$src)),
4117                            (iPTR 0))))),
4118                (iPTR 0))),
4119          (!cast<Instruction>(InstrStr#rmk) _.info128.RC:$src,
4120                      (COPY_TO_REGCLASS (i32 (INSERT_SUBREG (IMPLICIT_DEF), MaskRC:$mask, subreg)), VK1WM),
4121                      addr:$srcAddr)>;
4122
4123// AVX512Vl patterns.
4124def : Pat<(_.info128.VT (masked_load addr:$srcAddr, Mask128,
4125                         _.info128.ImmAllZerosV)),
4126          (!cast<Instruction>(InstrStr#rmkz)
4127                      (COPY_TO_REGCLASS (i32 (INSERT_SUBREG (IMPLICIT_DEF), MaskRC:$mask, subreg)), VK1WM),
4128                      addr:$srcAddr)>;
4129
4130def : Pat<(_.info128.VT (masked_load addr:$srcAddr, Mask128,
4131                         (_.info128.VT (X86vzmovl _.info128.RC:$src)))),
4132          (!cast<Instruction>(InstrStr#rmk) _.info128.RC:$src,
4133                      (COPY_TO_REGCLASS (i32 (INSERT_SUBREG (IMPLICIT_DEF), MaskRC:$mask, subreg)), VK1WM),
4134                      addr:$srcAddr)>;
4135}
4136
4137defm : avx512_move_scalar_lowering<"VMOVSSZ", X86Movss, fp32imm0, v4f32x_info>;
4138defm : avx512_move_scalar_lowering<"VMOVSDZ", X86Movsd, fp64imm0, v2f64x_info>;
4139
4140defm : avx512_store_scalar_lowering<"VMOVSSZ", avx512vl_f32_info,
4141                   (v16i1 (bitconvert (i16 (trunc (and GR32:$mask, (i32 1)))))), GR32>;
4142defm : avx512_store_scalar_lowering_subreg<"VMOVSSZ", avx512vl_f32_info,
4143                   (v16i1 (bitconvert (i16 (and GR16:$mask, (i16 1))))), GR16, sub_16bit>;
4144defm : avx512_store_scalar_lowering_subreg<"VMOVSDZ", avx512vl_f64_info,
4145                   (v8i1 (bitconvert (i8 (and GR8:$mask, (i8 1))))), GR8, sub_8bit>;
4146
4147let Predicates = [HasFP16] in {
4148defm : avx512_move_scalar_lowering<"VMOVSHZ", X86Movsh, fp16imm0, v8f16x_info>;
4149defm : avx512_store_scalar_lowering<"VMOVSHZ", avx512vl_f16_info,
4150                   (v32i1 (bitconvert (and GR32:$mask, (i32 1)))), GR32>;
4151defm : avx512_store_scalar_lowering_subreg<"VMOVSHZ", avx512vl_f16_info,
4152                   (v32i1 (bitconvert (and GR32:$mask, (i32 1)))), GR32, sub_32bit>;
4153defm : avx512_store_scalar_lowering_subreg2<"VMOVSHZ", avx512vl_f16_info,
4154                   (v32i1 (insert_subvector
4155                           (v32i1 immAllZerosV),
4156                           (v8i1 (bitconvert (and GR8:$mask, (i8 1)))),
4157                           (iPTR 0))),
4158                   (v8i1 (bitconvert (and GR8:$mask, (i8 1)))),
4159                   GR8, sub_8bit>;
4160
4161defm : avx512_load_scalar_lowering<"VMOVSHZ", avx512vl_f16_info,
4162                   (v32i1 (bitconvert (and GR32:$mask, (i32 1)))), GR32>;
4163defm : avx512_load_scalar_lowering_subreg<"VMOVSHZ", avx512vl_f16_info,
4164                   (v32i1 (bitconvert (and GR32:$mask, (i32 1)))), GR32, sub_32bit>;
4165defm : avx512_load_scalar_lowering_subreg2<"VMOVSHZ", avx512vl_f16_info,
4166                   (v32i1 (insert_subvector
4167                           (v32i1 immAllZerosV),
4168                           (v8i1 (bitconvert (and GR8:$mask, (i8 1)))),
4169                           (iPTR 0))),
4170                   (v8i1 (bitconvert (and GR8:$mask, (i8 1)))),
4171                   GR8, sub_8bit>;
4172
4173def : Pat<(f16 (X86selects VK1WM:$mask, (f16 FR16X:$src1), (f16 FR16X:$src2))),
4174          (COPY_TO_REGCLASS (v8f16 (VMOVSHZrrk
4175           (v8f16 (COPY_TO_REGCLASS FR16X:$src2, VR128X)),
4176           VK1WM:$mask, (v8f16 (IMPLICIT_DEF)),
4177           (v8f16 (COPY_TO_REGCLASS FR16X:$src1, VR128X)))), FR16X)>;
4178
4179def : Pat<(f16 (X86selects VK1WM:$mask, (f16 FR16X:$src1), fp16imm0)),
4180          (COPY_TO_REGCLASS (v8f16 (VMOVSHZrrkz VK1WM:$mask, (v8f16 (IMPLICIT_DEF)),
4181           (v8f16 (COPY_TO_REGCLASS FR16X:$src1, VR128X)))), FR16X)>;
4182}
4183
4184defm : avx512_store_scalar_lowering_subreg2<"VMOVSSZ", avx512vl_f32_info,
4185                   (v16i1 (insert_subvector
4186                           (v16i1 immAllZerosV),
4187                           (v4i1 (extract_subvector
4188                                  (v8i1 (bitconvert (and GR8:$mask, (i8 1)))),
4189                                  (iPTR 0))),
4190                           (iPTR 0))),
4191                   (v4i1 (extract_subvector
4192                          (v8i1 (bitconvert (and GR8:$mask, (i8 1)))),
4193                          (iPTR 0))), GR8, sub_8bit>;
4194defm : avx512_store_scalar_lowering_subreg2<"VMOVSDZ", avx512vl_f64_info,
4195                   (v8i1
4196                    (extract_subvector
4197                     (v16i1
4198                      (insert_subvector
4199                       (v16i1 immAllZerosV),
4200                       (v2i1 (extract_subvector
4201                              (v8i1 (bitconvert (i8 (and GR8:$mask, (i8 1))))),
4202                              (iPTR 0))),
4203                       (iPTR 0))),
4204                     (iPTR 0))),
4205                   (v2i1 (extract_subvector
4206                          (v8i1 (bitconvert (i8 (and GR8:$mask, (i8 1))))),
4207                          (iPTR 0))), GR8, sub_8bit>;
4208
4209defm : avx512_load_scalar_lowering<"VMOVSSZ", avx512vl_f32_info,
4210                   (v16i1 (bitconvert (i16 (trunc (and GR32:$mask, (i32 1)))))), GR32>;
4211defm : avx512_load_scalar_lowering_subreg<"VMOVSSZ", avx512vl_f32_info,
4212                   (v16i1 (bitconvert (i16 (and GR16:$mask, (i16 1))))), GR16, sub_16bit>;
4213defm : avx512_load_scalar_lowering_subreg<"VMOVSDZ", avx512vl_f64_info,
4214                   (v8i1 (bitconvert (i8 (and GR8:$mask, (i8 1))))), GR8, sub_8bit>;
4215
4216defm : avx512_load_scalar_lowering_subreg2<"VMOVSSZ", avx512vl_f32_info,
4217                   (v16i1 (insert_subvector
4218                           (v16i1 immAllZerosV),
4219                           (v4i1 (extract_subvector
4220                                  (v8i1 (bitconvert (and GR8:$mask, (i8 1)))),
4221                                  (iPTR 0))),
4222                           (iPTR 0))),
4223                   (v4i1 (extract_subvector
4224                          (v8i1 (bitconvert (and GR8:$mask, (i8 1)))),
4225                          (iPTR 0))), GR8, sub_8bit>;
4226defm : avx512_load_scalar_lowering_subreg2<"VMOVSDZ", avx512vl_f64_info,
4227                   (v8i1
4228                    (extract_subvector
4229                     (v16i1
4230                      (insert_subvector
4231                       (v16i1 immAllZerosV),
4232                       (v2i1 (extract_subvector
4233                              (v8i1 (bitconvert (i8 (and GR8:$mask, (i8 1))))),
4234                              (iPTR 0))),
4235                       (iPTR 0))),
4236                     (iPTR 0))),
4237                   (v2i1 (extract_subvector
4238                          (v8i1 (bitconvert (i8 (and GR8:$mask, (i8 1))))),
4239                          (iPTR 0))), GR8, sub_8bit>;
4240
4241def : Pat<(f32 (X86selects VK1WM:$mask, (f32 FR32X:$src1), (f32 FR32X:$src2))),
4242          (COPY_TO_REGCLASS (v4f32 (VMOVSSZrrk
4243           (v4f32 (COPY_TO_REGCLASS FR32X:$src2, VR128X)),
4244           VK1WM:$mask, (v4f32 (IMPLICIT_DEF)),
4245           (v4f32 (COPY_TO_REGCLASS FR32X:$src1, VR128X)))), FR32X)>;
4246
4247def : Pat<(f32 (X86selects VK1WM:$mask, (f32 FR32X:$src1), fp32imm0)),
4248          (COPY_TO_REGCLASS (v4f32 (VMOVSSZrrkz VK1WM:$mask, (v4f32 (IMPLICIT_DEF)),
4249           (v4f32 (COPY_TO_REGCLASS FR32X:$src1, VR128X)))), FR32X)>;
4250
4251def : Pat<(f32 (X86selects VK1WM:$mask, (loadf32 addr:$src), (f32 FR32X:$src0))),
4252          (COPY_TO_REGCLASS
4253           (v4f32 (VMOVSSZrmk (v4f32 (COPY_TO_REGCLASS FR32X:$src0, VR128X)),
4254                                                       VK1WM:$mask, addr:$src)),
4255           FR32X)>;
4256def : Pat<(f32 (X86selects VK1WM:$mask, (loadf32 addr:$src), fp32imm0)),
4257          (COPY_TO_REGCLASS (v4f32 (VMOVSSZrmkz VK1WM:$mask, addr:$src)), FR32X)>;
4258
4259def : Pat<(f64 (X86selects VK1WM:$mask, (f64 FR64X:$src1), (f64 FR64X:$src2))),
4260          (COPY_TO_REGCLASS (v2f64 (VMOVSDZrrk
4261           (v2f64 (COPY_TO_REGCLASS FR64X:$src2, VR128X)),
4262           VK1WM:$mask, (v2f64 (IMPLICIT_DEF)),
4263           (v2f64 (COPY_TO_REGCLASS FR64X:$src1, VR128X)))), FR64X)>;
4264
4265def : Pat<(f64 (X86selects VK1WM:$mask, (f64 FR64X:$src1), fp64imm0)),
4266          (COPY_TO_REGCLASS (v2f64 (VMOVSDZrrkz VK1WM:$mask, (v2f64 (IMPLICIT_DEF)),
4267           (v2f64 (COPY_TO_REGCLASS FR64X:$src1, VR128X)))), FR64X)>;
4268
4269def : Pat<(f64 (X86selects VK1WM:$mask, (loadf64 addr:$src), (f64 FR64X:$src0))),
4270          (COPY_TO_REGCLASS
4271           (v2f64 (VMOVSDZrmk (v2f64 (COPY_TO_REGCLASS FR64X:$src0, VR128X)),
4272                                                       VK1WM:$mask, addr:$src)),
4273           FR64X)>;
4274def : Pat<(f64 (X86selects VK1WM:$mask, (loadf64 addr:$src), fp64imm0)),
4275          (COPY_TO_REGCLASS (v2f64 (VMOVSDZrmkz VK1WM:$mask, addr:$src)), FR64X)>;
4276
4277
4278def : Pat<(v4f32 (X86selects VK1WM:$mask, (v4f32 VR128X:$src1), (v4f32 VR128X:$src2))),
4279          (VMOVSSZrrk VR128X:$src2, VK1WM:$mask, VR128X:$src1, VR128X:$src1)>;
4280def : Pat<(v2f64 (X86selects VK1WM:$mask, (v2f64 VR128X:$src1), (v2f64 VR128X:$src2))),
4281          (VMOVSDZrrk VR128X:$src2, VK1WM:$mask, VR128X:$src1, VR128X:$src1)>;
4282
4283def : Pat<(v4f32 (X86selects VK1WM:$mask, (v4f32 VR128X:$src1), (v4f32 immAllZerosV))),
4284          (VMOVSSZrrkz VK1WM:$mask, VR128X:$src1, VR128X:$src1)>;
4285def : Pat<(v2f64 (X86selects VK1WM:$mask, (v2f64 VR128X:$src1), (v2f64 immAllZerosV))),
4286          (VMOVSDZrrkz VK1WM:$mask, VR128X:$src1, VR128X:$src1)>;
4287
4288let hasSideEffects = 0, isCodeGenOnly = 1, ForceDisassemble = 1 in {
4289  let Predicates = [HasFP16] in {
4290    def VMOVSHZrr_REV: AVX512<0x11, MRMDestReg, (outs VR128X:$dst),
4291        (ins VR128X:$src1, VR128X:$src2),
4292        "vmovsh\t{$src2, $src1, $dst|$dst, $src1, $src2}",
4293        []>, T_MAP5, XS, EVEX, VVVV, VEX_LIG,
4294        Sched<[SchedWriteFShuffle.XMM]>;
4295
4296    let Constraints = "$src0 = $dst" in
4297    def VMOVSHZrrk_REV: AVX512<0x11, MRMDestReg, (outs VR128X:$dst),
4298        (ins f16x_info.RC:$src0, f16x_info.KRCWM:$mask,
4299         VR128X:$src1, VR128X:$src2),
4300        "vmovsh\t{$src2, $src1, $dst {${mask}}|"#
4301          "$dst {${mask}}, $src1, $src2}",
4302        []>, T_MAP5, XS, EVEX_K, EVEX, VVVV, VEX_LIG,
4303        Sched<[SchedWriteFShuffle.XMM]>;
4304
4305    def VMOVSHZrrkz_REV: AVX512<0x11, MRMDestReg, (outs VR128X:$dst),
4306        (ins f16x_info.KRCWM:$mask, VR128X:$src1, VR128X:$src2),
4307        "vmovsh\t{$src2, $src1, $dst {${mask}} {z}|"#
4308          "$dst {${mask}} {z}, $src1, $src2}",
4309        []>, EVEX_KZ, T_MAP5, XS, EVEX, VVVV, VEX_LIG,
4310        Sched<[SchedWriteFShuffle.XMM]>;
4311  }
4312  def VMOVSSZrr_REV: AVX512<0x11, MRMDestReg, (outs VR128X:$dst),
4313                           (ins VR128X:$src1, VR128X:$src2),
4314                           "vmovss\t{$src2, $src1, $dst|$dst, $src1, $src2}",
4315                           []>, TB, XS, EVEX, VVVV, VEX_LIG,
4316                           Sched<[SchedWriteFShuffle.XMM]>;
4317
4318  let Constraints = "$src0 = $dst" in
4319  def VMOVSSZrrk_REV: AVX512<0x11, MRMDestReg, (outs VR128X:$dst),
4320                             (ins f32x_info.RC:$src0, f32x_info.KRCWM:$mask,
4321                                                   VR128X:$src1, VR128X:$src2),
4322                             "vmovss\t{$src2, $src1, $dst {${mask}}|"#
4323                                        "$dst {${mask}}, $src1, $src2}",
4324                             []>, EVEX_K, TB, XS, EVEX, VVVV, VEX_LIG,
4325                             Sched<[SchedWriteFShuffle.XMM]>;
4326
4327  def VMOVSSZrrkz_REV: AVX512<0x11, MRMDestReg, (outs VR128X:$dst),
4328                         (ins f32x_info.KRCWM:$mask, VR128X:$src1, VR128X:$src2),
4329                         "vmovss\t{$src2, $src1, $dst {${mask}} {z}|"#
4330                                    "$dst {${mask}} {z}, $src1, $src2}",
4331                         []>, EVEX_KZ, TB, XS, EVEX, VVVV, VEX_LIG,
4332                         Sched<[SchedWriteFShuffle.XMM]>;
4333
4334  def VMOVSDZrr_REV: AVX512<0x11, MRMDestReg, (outs VR128X:$dst),
4335                           (ins VR128X:$src1, VR128X:$src2),
4336                           "vmovsd\t{$src2, $src1, $dst|$dst, $src1, $src2}",
4337                           []>, TB, XD, EVEX, VVVV, VEX_LIG, REX_W,
4338                           Sched<[SchedWriteFShuffle.XMM]>;
4339
4340  let Constraints = "$src0 = $dst" in
4341  def VMOVSDZrrk_REV: AVX512<0x11, MRMDestReg, (outs VR128X:$dst),
4342                             (ins f64x_info.RC:$src0, f64x_info.KRCWM:$mask,
4343                                                   VR128X:$src1, VR128X:$src2),
4344                             "vmovsd\t{$src2, $src1, $dst {${mask}}|"#
4345                                        "$dst {${mask}}, $src1, $src2}",
4346                             []>, EVEX_K, TB, XD, EVEX, VVVV, VEX_LIG,
4347                             REX_W, Sched<[SchedWriteFShuffle.XMM]>;
4348
4349  def VMOVSDZrrkz_REV: AVX512<0x11, MRMDestReg, (outs VR128X:$dst),
4350                              (ins f64x_info.KRCWM:$mask, VR128X:$src1,
4351                                                          VR128X:$src2),
4352                              "vmovsd\t{$src2, $src1, $dst {${mask}} {z}|"#
4353                                         "$dst {${mask}} {z}, $src1, $src2}",
4354                              []>, EVEX_KZ, TB, XD, EVEX, VVVV, VEX_LIG,
4355                              REX_W, Sched<[SchedWriteFShuffle.XMM]>;
4356}
4357
4358def : InstAlias<"vmovsh.s\t{$src2, $src1, $dst|$dst, $src1, $src2}",
4359                (VMOVSHZrr_REV VR128X:$dst, VR128X:$src1, VR128X:$src2), 0>;
4360def : InstAlias<"vmovsh.s\t{$src2, $src1, $dst {${mask}}|"#
4361                             "$dst {${mask}}, $src1, $src2}",
4362                (VMOVSHZrrk_REV VR128X:$dst, VK1WM:$mask,
4363                                VR128X:$src1, VR128X:$src2), 0>;
4364def : InstAlias<"vmovsh.s\t{$src2, $src1, $dst {${mask}} {z}|"#
4365                             "$dst {${mask}} {z}, $src1, $src2}",
4366                (VMOVSHZrrkz_REV VR128X:$dst, VK1WM:$mask,
4367                                 VR128X:$src1, VR128X:$src2), 0>;
4368def : InstAlias<"vmovss.s\t{$src2, $src1, $dst|$dst, $src1, $src2}",
4369                (VMOVSSZrr_REV VR128X:$dst, VR128X:$src1, VR128X:$src2), 0>;
4370def : InstAlias<"vmovss.s\t{$src2, $src1, $dst {${mask}}|"#
4371                             "$dst {${mask}}, $src1, $src2}",
4372                (VMOVSSZrrk_REV VR128X:$dst, VK1WM:$mask,
4373                                VR128X:$src1, VR128X:$src2), 0>;
4374def : InstAlias<"vmovss.s\t{$src2, $src1, $dst {${mask}} {z}|"#
4375                             "$dst {${mask}} {z}, $src1, $src2}",
4376                (VMOVSSZrrkz_REV VR128X:$dst, VK1WM:$mask,
4377                                 VR128X:$src1, VR128X:$src2), 0>;
4378def : InstAlias<"vmovsd.s\t{$src2, $src1, $dst|$dst, $src1, $src2}",
4379                (VMOVSDZrr_REV VR128X:$dst, VR128X:$src1, VR128X:$src2), 0>;
4380def : InstAlias<"vmovsd.s\t{$src2, $src1, $dst {${mask}}|"#
4381                             "$dst {${mask}}, $src1, $src2}",
4382                (VMOVSDZrrk_REV VR128X:$dst, VK1WM:$mask,
4383                                VR128X:$src1, VR128X:$src2), 0>;
4384def : InstAlias<"vmovsd.s\t{$src2, $src1, $dst {${mask}} {z}|"#
4385                             "$dst {${mask}} {z}, $src1, $src2}",
4386                (VMOVSDZrrkz_REV VR128X:$dst, VK1WM:$mask,
4387                                 VR128X:$src1, VR128X:$src2), 0>;
4388
4389let Predicates = [HasAVX512, OptForSize] in {
4390  def : Pat<(v4f32 (X86vzmovl (v4f32 VR128X:$src))),
4391            (VMOVSSZrr (v4f32 (AVX512_128_SET0)), VR128X:$src)>;
4392  def : Pat<(v4i32 (X86vzmovl (v4i32 VR128X:$src))),
4393            (VMOVSSZrr (v4i32 (AVX512_128_SET0)), VR128X:$src)>;
4394
4395  // Move low f32 and clear high bits.
4396  def : Pat<(v8f32 (X86vzmovl (v8f32 VR256X:$src))),
4397            (SUBREG_TO_REG (i32 0),
4398             (v4f32 (VMOVSSZrr (v4f32 (AVX512_128_SET0)),
4399              (v4f32 (EXTRACT_SUBREG (v8f32 VR256X:$src), sub_xmm)))), sub_xmm)>;
4400  def : Pat<(v8i32 (X86vzmovl (v8i32 VR256X:$src))),
4401            (SUBREG_TO_REG (i32 0),
4402             (v4i32 (VMOVSSZrr (v4i32 (AVX512_128_SET0)),
4403              (v4i32 (EXTRACT_SUBREG (v8i32 VR256X:$src), sub_xmm)))), sub_xmm)>;
4404
4405  def : Pat<(v16f32 (X86vzmovl (v16f32 VR512:$src))),
4406            (SUBREG_TO_REG (i32 0),
4407             (v4f32 (VMOVSSZrr (v4f32 (AVX512_128_SET0)),
4408              (v4f32 (EXTRACT_SUBREG (v16f32 VR512:$src), sub_xmm)))), sub_xmm)>;
4409  def : Pat<(v16i32 (X86vzmovl (v16i32 VR512:$src))),
4410            (SUBREG_TO_REG (i32 0),
4411             (v4i32 (VMOVSSZrr (v4i32 (AVX512_128_SET0)),
4412              (v4i32 (EXTRACT_SUBREG (v16i32 VR512:$src), sub_xmm)))), sub_xmm)>;
4413}
4414
4415// Use 128-bit blends for OptForSpeed since BLENDs have better throughput than
4416// VMOVSS/SD. Unfortunately, loses the ability to use XMM16-31.
4417let Predicates = [HasAVX512, OptForSpeed] in {
4418  def : Pat<(v16f32 (X86vzmovl (v16f32 VR512:$src))),
4419            (SUBREG_TO_REG (i32 0),
4420             (v4f32 (VBLENDPSrri (v4f32 (V_SET0)),
4421                          (v4f32 (EXTRACT_SUBREG (v16f32 VR512:$src), sub_xmm)),
4422                          (i8 1))), sub_xmm)>;
4423  def : Pat<(v16i32 (X86vzmovl (v16i32 VR512:$src))),
4424            (SUBREG_TO_REG (i32 0),
4425             (v4i32 (VPBLENDWrri (v4i32 (V_SET0)),
4426                          (v4i32 (EXTRACT_SUBREG (v16i32 VR512:$src), sub_xmm)),
4427                          (i8 3))), sub_xmm)>;
4428}
4429
4430let Predicates = [HasAVX512] in {
4431  def : Pat<(v4f32 (scalar_to_vector (loadf32 addr:$src))),
4432            (VMOVSSZrm addr:$src)>;
4433  def : Pat<(v2f64 (scalar_to_vector (loadf64 addr:$src))),
4434            (VMOVSDZrm addr:$src)>;
4435
4436  // Represent the same patterns above but in the form they appear for
4437  // 256-bit types
4438  def : Pat<(v8f32 (X86vzload32 addr:$src)),
4439            (SUBREG_TO_REG (i32 0), (VMOVSSZrm addr:$src), sub_xmm)>;
4440  def : Pat<(v4f64 (X86vzload64 addr:$src)),
4441            (SUBREG_TO_REG (i32 0), (VMOVSDZrm addr:$src), sub_xmm)>;
4442
4443  // Represent the same patterns above but in the form they appear for
4444  // 512-bit types
4445  def : Pat<(v16f32 (X86vzload32 addr:$src)),
4446            (SUBREG_TO_REG (i32 0), (VMOVSSZrm addr:$src), sub_xmm)>;
4447  def : Pat<(v8f64 (X86vzload64 addr:$src)),
4448            (SUBREG_TO_REG (i32 0), (VMOVSDZrm addr:$src), sub_xmm)>;
4449}
4450let Predicates = [HasFP16] in {
4451  def : Pat<(v8f16 (X86vzmovl (v8f16 VR128X:$src))),
4452            (VMOVSHZrr (v8f16 (AVX512_128_SET0)), VR128X:$src)>;
4453  def : Pat<(v8i16 (X86vzmovl (v8i16 VR128X:$src))),
4454            (VMOVSHZrr (v8i16 (AVX512_128_SET0)), VR128X:$src)>;
4455
4456  // FIXME we need better canonicalization in dag combine
4457  def : Pat<(v16f16 (X86vzmovl (v16f16 VR256X:$src))),
4458            (SUBREG_TO_REG (i32 0),
4459             (v8f16 (VMOVSHZrr (v8f16 (AVX512_128_SET0)),
4460              (v8f16 (EXTRACT_SUBREG (v16f16 VR256X:$src), sub_xmm)))), sub_xmm)>;
4461  def : Pat<(v16i16 (X86vzmovl (v16i16 VR256X:$src))),
4462            (SUBREG_TO_REG (i32 0),
4463             (v8i16 (VMOVSHZrr (v8i16 (AVX512_128_SET0)),
4464              (v8i16 (EXTRACT_SUBREG (v16i16 VR256X:$src), sub_xmm)))), sub_xmm)>;
4465
4466  // FIXME we need better canonicalization in dag combine
4467  def : Pat<(v32f16 (X86vzmovl (v32f16 VR512:$src))),
4468            (SUBREG_TO_REG (i32 0),
4469             (v8f16 (VMOVSHZrr (v8f16 (AVX512_128_SET0)),
4470              (v8f16 (EXTRACT_SUBREG (v32f16 VR512:$src), sub_xmm)))), sub_xmm)>;
4471  def : Pat<(v32i16 (X86vzmovl (v32i16 VR512:$src))),
4472            (SUBREG_TO_REG (i32 0),
4473             (v8i16 (VMOVSHZrr (v8i16 (AVX512_128_SET0)),
4474              (v8i16 (EXTRACT_SUBREG (v32i16 VR512:$src), sub_xmm)))), sub_xmm)>;
4475
4476  def : Pat<(v8f16 (X86vzload16 addr:$src)),
4477            (VMOVSHZrm addr:$src)>;
4478
4479  def : Pat<(v16f16 (X86vzload16 addr:$src)),
4480            (SUBREG_TO_REG (i32 0), (VMOVSHZrm addr:$src), sub_xmm)>;
4481
4482  def : Pat<(v32f16 (X86vzload16 addr:$src)),
4483            (SUBREG_TO_REG (i32 0), (VMOVSHZrm addr:$src), sub_xmm)>;
4484}
4485
4486let ExeDomain = SSEPackedInt, SchedRW = [SchedWriteVecLogic.XMM] in {
4487def VMOVZPQILo2PQIZrr : AVX512XSI<0x7E, MRMSrcReg, (outs VR128X:$dst),
4488                                (ins VR128X:$src),
4489                                "vmovq\t{$src, $dst|$dst, $src}",
4490                                [(set VR128X:$dst, (v2i64 (X86vzmovl
4491                                                   (v2i64 VR128X:$src))))]>,
4492                                EVEX, REX_W;
4493}
4494
4495let Predicates = [HasAVX512] in {
4496  def : Pat<(v4i32 (scalar_to_vector (i32 (anyext GR8:$src)))),
4497            (VMOVDI2PDIZrr (i32 (INSERT_SUBREG (i32 (IMPLICIT_DEF)),
4498                                              GR8:$src, sub_8bit)))>;
4499  def : Pat<(v4i32 (X86vzmovl (v4i32 (scalar_to_vector GR32:$src)))),
4500            (VMOVDI2PDIZrr GR32:$src)>;
4501
4502  def : Pat<(v2i64 (X86vzmovl (v2i64 (scalar_to_vector GR64:$src)))),
4503            (VMOV64toPQIZrr GR64:$src)>;
4504
4505  // AVX 128-bit movd/movq instruction write zeros in the high 128-bit part.
4506  def : Pat<(v4i32 (X86vzload32 addr:$src)),
4507            (VMOVDI2PDIZrm addr:$src)>;
4508  def : Pat<(v8i32 (X86vzload32 addr:$src)),
4509            (SUBREG_TO_REG (i32 0), (v4i32 (VMOVDI2PDIZrm addr:$src)), sub_xmm)>;
4510  def : Pat<(v2f64 (X86vzmovl (v2f64 VR128X:$src))),
4511            (VMOVZPQILo2PQIZrr VR128X:$src)>;
4512  def : Pat<(v2i64 (X86vzload64 addr:$src)),
4513            (VMOVQI2PQIZrm addr:$src)>;
4514  def : Pat<(v4i64 (X86vzload64 addr:$src)),
4515            (SUBREG_TO_REG (i64 0), (v2i64 (VMOVQI2PQIZrm addr:$src)), sub_xmm)>;
4516
4517  // Use regular 128-bit instructions to match 512-bit scalar_to_vec+zext.
4518  def : Pat<(v16i32 (X86vzload32 addr:$src)),
4519            (SUBREG_TO_REG (i32 0), (v4i32 (VMOVDI2PDIZrm addr:$src)), sub_xmm)>;
4520  def : Pat<(v8i64 (X86vzload64 addr:$src)),
4521            (SUBREG_TO_REG (i64 0), (v2i64 (VMOVQI2PQIZrm addr:$src)), sub_xmm)>;
4522
4523  def : Pat<(v4f64 (X86vzmovl (v4f64 VR256X:$src))),
4524            (SUBREG_TO_REG (i32 0),
4525             (v2f64 (VMOVZPQILo2PQIZrr
4526                     (v2f64 (EXTRACT_SUBREG (v4f64 VR256X:$src), sub_xmm)))),
4527             sub_xmm)>;
4528  def : Pat<(v4i64 (X86vzmovl (v4i64 VR256X:$src))),
4529            (SUBREG_TO_REG (i32 0),
4530             (v2i64 (VMOVZPQILo2PQIZrr
4531                     (v2i64 (EXTRACT_SUBREG (v4i64 VR256X:$src), sub_xmm)))),
4532             sub_xmm)>;
4533
4534  def : Pat<(v8f64 (X86vzmovl (v8f64 VR512:$src))),
4535            (SUBREG_TO_REG (i32 0),
4536             (v2f64 (VMOVZPQILo2PQIZrr
4537                     (v2f64 (EXTRACT_SUBREG (v8f64 VR512:$src), sub_xmm)))),
4538             sub_xmm)>;
4539  def : Pat<(v8i64 (X86vzmovl (v8i64 VR512:$src))),
4540            (SUBREG_TO_REG (i32 0),
4541             (v2i64 (VMOVZPQILo2PQIZrr
4542                     (v2i64 (EXTRACT_SUBREG (v8i64 VR512:$src), sub_xmm)))),
4543             sub_xmm)>;
4544}
4545
4546//===----------------------------------------------------------------------===//
4547// AVX-512 - Non-temporals
4548//===----------------------------------------------------------------------===//
4549
4550def VMOVNTDQAZrm : AVX512PI<0x2A, MRMSrcMem, (outs VR512:$dst),
4551                      (ins i512mem:$src), "vmovntdqa\t{$src, $dst|$dst, $src}",
4552                      [], SSEPackedInt>, Sched<[SchedWriteVecMoveLS.ZMM.RM]>,
4553                      EVEX, T8, PD, EVEX_V512, EVEX_CD8<64, CD8VF>;
4554
4555let Predicates = [HasVLX] in {
4556  def VMOVNTDQAZ256rm : AVX512PI<0x2A, MRMSrcMem, (outs VR256X:$dst),
4557                       (ins i256mem:$src),
4558                       "vmovntdqa\t{$src, $dst|$dst, $src}",
4559                       [], SSEPackedInt>, Sched<[SchedWriteVecMoveLS.YMM.RM]>,
4560                       EVEX, T8, PD, EVEX_V256, EVEX_CD8<64, CD8VF>;
4561
4562  def VMOVNTDQAZ128rm : AVX512PI<0x2A, MRMSrcMem, (outs VR128X:$dst),
4563                      (ins i128mem:$src),
4564                      "vmovntdqa\t{$src, $dst|$dst, $src}",
4565                      [], SSEPackedInt>, Sched<[SchedWriteVecMoveLS.XMM.RM]>,
4566                      EVEX, T8, PD, EVEX_V128, EVEX_CD8<64, CD8VF>;
4567}
4568
4569multiclass avx512_movnt<bits<8> opc, string OpcodeStr, X86VectorVTInfo _,
4570                        X86SchedWriteMoveLS Sched,
4571                        PatFrag st_frag = alignednontemporalstore> {
4572  let SchedRW = [Sched.MR], AddedComplexity = 400 in
4573  def mr : AVX512PI<opc, MRMDestMem, (outs), (ins _.MemOp:$dst, _.RC:$src),
4574                    !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
4575                    [(st_frag (_.VT _.RC:$src), addr:$dst)],
4576                    _.ExeDomain>, EVEX, EVEX_CD8<_.EltSize, CD8VF>;
4577}
4578
4579multiclass avx512_movnt_vl<bits<8> opc, string OpcodeStr,
4580                           AVX512VLVectorVTInfo VTInfo,
4581                           X86SchedWriteMoveLSWidths Sched> {
4582  let Predicates = [HasAVX512] in
4583    defm Z : avx512_movnt<opc, OpcodeStr, VTInfo.info512, Sched.ZMM>, EVEX_V512;
4584
4585  let Predicates = [HasAVX512, HasVLX] in {
4586    defm Z256 : avx512_movnt<opc, OpcodeStr, VTInfo.info256, Sched.YMM>, EVEX_V256;
4587    defm Z128 : avx512_movnt<opc, OpcodeStr, VTInfo.info128, Sched.XMM>, EVEX_V128;
4588  }
4589}
4590
4591defm VMOVNTDQ : avx512_movnt_vl<0xE7, "vmovntdq", avx512vl_i64_info,
4592                                SchedWriteVecMoveLSNT>, TB, PD;
4593defm VMOVNTPD : avx512_movnt_vl<0x2B, "vmovntpd", avx512vl_f64_info,
4594                                SchedWriteFMoveLSNT>, TB, PD, REX_W;
4595defm VMOVNTPS : avx512_movnt_vl<0x2B, "vmovntps", avx512vl_f32_info,
4596                                SchedWriteFMoveLSNT>, TB;
4597
4598let Predicates = [HasAVX512], AddedComplexity = 400 in {
4599  def : Pat<(alignednontemporalstore (v16i32 VR512:$src), addr:$dst),
4600            (VMOVNTDQZmr addr:$dst, VR512:$src)>;
4601  def : Pat<(alignednontemporalstore (v32i16 VR512:$src), addr:$dst),
4602            (VMOVNTDQZmr addr:$dst, VR512:$src)>;
4603  def : Pat<(alignednontemporalstore (v64i8 VR512:$src), addr:$dst),
4604            (VMOVNTDQZmr addr:$dst, VR512:$src)>;
4605
4606  def : Pat<(v8f64 (alignednontemporalload addr:$src)),
4607            (VMOVNTDQAZrm addr:$src)>;
4608  def : Pat<(v16f32 (alignednontemporalload addr:$src)),
4609            (VMOVNTDQAZrm addr:$src)>;
4610  def : Pat<(v8i64 (alignednontemporalload addr:$src)),
4611            (VMOVNTDQAZrm addr:$src)>;
4612  def : Pat<(v16i32 (alignednontemporalload addr:$src)),
4613            (VMOVNTDQAZrm addr:$src)>;
4614  def : Pat<(v32i16 (alignednontemporalload addr:$src)),
4615            (VMOVNTDQAZrm addr:$src)>;
4616  def : Pat<(v64i8 (alignednontemporalload addr:$src)),
4617            (VMOVNTDQAZrm addr:$src)>;
4618}
4619
4620let Predicates = [HasVLX], AddedComplexity = 400 in {
4621  def : Pat<(alignednontemporalstore (v8i32 VR256X:$src), addr:$dst),
4622            (VMOVNTDQZ256mr addr:$dst, VR256X:$src)>;
4623  def : Pat<(alignednontemporalstore (v16i16 VR256X:$src), addr:$dst),
4624            (VMOVNTDQZ256mr addr:$dst, VR256X:$src)>;
4625  def : Pat<(alignednontemporalstore (v32i8 VR256X:$src), addr:$dst),
4626            (VMOVNTDQZ256mr addr:$dst, VR256X:$src)>;
4627
4628  def : Pat<(v4f64 (alignednontemporalload addr:$src)),
4629            (VMOVNTDQAZ256rm addr:$src)>;
4630  def : Pat<(v8f32 (alignednontemporalload addr:$src)),
4631            (VMOVNTDQAZ256rm addr:$src)>;
4632  def : Pat<(v4i64 (alignednontemporalload addr:$src)),
4633            (VMOVNTDQAZ256rm addr:$src)>;
4634  def : Pat<(v8i32 (alignednontemporalload addr:$src)),
4635            (VMOVNTDQAZ256rm addr:$src)>;
4636  def : Pat<(v16i16 (alignednontemporalload addr:$src)),
4637            (VMOVNTDQAZ256rm addr:$src)>;
4638  def : Pat<(v32i8 (alignednontemporalload addr:$src)),
4639            (VMOVNTDQAZ256rm addr:$src)>;
4640
4641  def : Pat<(alignednontemporalstore (v4i32 VR128X:$src), addr:$dst),
4642            (VMOVNTDQZ128mr addr:$dst, VR128X:$src)>;
4643  def : Pat<(alignednontemporalstore (v8i16 VR128X:$src), addr:$dst),
4644            (VMOVNTDQZ128mr addr:$dst, VR128X:$src)>;
4645  def : Pat<(alignednontemporalstore (v16i8 VR128X:$src), addr:$dst),
4646            (VMOVNTDQZ128mr addr:$dst, VR128X:$src)>;
4647
4648  def : Pat<(v2f64 (alignednontemporalload addr:$src)),
4649            (VMOVNTDQAZ128rm addr:$src)>;
4650  def : Pat<(v4f32 (alignednontemporalload addr:$src)),
4651            (VMOVNTDQAZ128rm addr:$src)>;
4652  def : Pat<(v2i64 (alignednontemporalload addr:$src)),
4653            (VMOVNTDQAZ128rm addr:$src)>;
4654  def : Pat<(v4i32 (alignednontemporalload addr:$src)),
4655            (VMOVNTDQAZ128rm addr:$src)>;
4656  def : Pat<(v8i16 (alignednontemporalload addr:$src)),
4657            (VMOVNTDQAZ128rm addr:$src)>;
4658  def : Pat<(v16i8 (alignednontemporalload addr:$src)),
4659            (VMOVNTDQAZ128rm addr:$src)>;
4660}
4661
4662//===----------------------------------------------------------------------===//
4663// AVX-512 - Integer arithmetic
4664//
4665multiclass avx512_binop_rm<bits<8> opc, string OpcodeStr, SDNode OpNode,
4666                           X86VectorVTInfo _, X86FoldableSchedWrite sched,
4667                           bit IsCommutable = 0> {
4668  defm rr : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
4669                    (ins _.RC:$src1, _.RC:$src2), OpcodeStr,
4670                    "$src2, $src1", "$src1, $src2",
4671                    (_.VT (OpNode _.RC:$src1, _.RC:$src2)),
4672                    IsCommutable, IsCommutable>, AVX512BIBase, EVEX, VVVV,
4673                    Sched<[sched]>;
4674
4675  defm rm : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
4676                  (ins _.RC:$src1, _.MemOp:$src2), OpcodeStr,
4677                  "$src2, $src1", "$src1, $src2",
4678                  (_.VT (OpNode _.RC:$src1, (_.LdFrag addr:$src2)))>,
4679                  AVX512BIBase, EVEX, VVVV,
4680                  Sched<[sched.Folded, sched.ReadAfterFold]>;
4681}
4682
4683multiclass avx512_binop_rmb<bits<8> opc, string OpcodeStr, SDNode OpNode,
4684                            X86VectorVTInfo _, X86FoldableSchedWrite sched,
4685                            bit IsCommutable = 0> :
4686           avx512_binop_rm<opc, OpcodeStr, OpNode, _, sched, IsCommutable> {
4687  defm rmb : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
4688                  (ins _.RC:$src1, _.ScalarMemOp:$src2), OpcodeStr,
4689                  "${src2}"#_.BroadcastStr#", $src1",
4690                  "$src1, ${src2}"#_.BroadcastStr,
4691                  (_.VT (OpNode _.RC:$src1,
4692                                (_.BroadcastLdFrag addr:$src2)))>,
4693                  AVX512BIBase, EVEX, VVVV, EVEX_B,
4694                  Sched<[sched.Folded, sched.ReadAfterFold]>;
4695}
4696
4697multiclass avx512_binop_rm_vl<bits<8> opc, string OpcodeStr, SDNode OpNode,
4698                              AVX512VLVectorVTInfo VTInfo,
4699                              X86SchedWriteWidths sched, Predicate prd,
4700                              bit IsCommutable = 0> {
4701  let Predicates = [prd] in
4702    defm Z : avx512_binop_rm<opc, OpcodeStr, OpNode, VTInfo.info512, sched.ZMM,
4703                             IsCommutable>, EVEX_V512;
4704
4705  let Predicates = [prd, HasVLX] in {
4706    defm Z256 : avx512_binop_rm<opc, OpcodeStr, OpNode, VTInfo.info256,
4707                                sched.YMM, IsCommutable>, EVEX_V256;
4708    defm Z128 : avx512_binop_rm<opc, OpcodeStr, OpNode, VTInfo.info128,
4709                                sched.XMM, IsCommutable>, EVEX_V128;
4710  }
4711}
4712
4713multiclass avx512_binop_rmb_vl<bits<8> opc, string OpcodeStr, SDNode OpNode,
4714                               AVX512VLVectorVTInfo VTInfo,
4715                               X86SchedWriteWidths sched, Predicate prd,
4716                               bit IsCommutable = 0> {
4717  let Predicates = [prd] in
4718    defm Z : avx512_binop_rmb<opc, OpcodeStr, OpNode, VTInfo.info512, sched.ZMM,
4719                             IsCommutable>, EVEX_V512;
4720
4721  let Predicates = [prd, HasVLX] in {
4722    defm Z256 : avx512_binop_rmb<opc, OpcodeStr, OpNode, VTInfo.info256,
4723                                 sched.YMM, IsCommutable>, EVEX_V256;
4724    defm Z128 : avx512_binop_rmb<opc, OpcodeStr, OpNode, VTInfo.info128,
4725                                 sched.XMM, IsCommutable>, EVEX_V128;
4726  }
4727}
4728
4729multiclass avx512_binop_rm_vl_q<bits<8> opc, string OpcodeStr, SDNode OpNode,
4730                                X86SchedWriteWidths sched, Predicate prd,
4731                                bit IsCommutable = 0> {
4732  defm NAME : avx512_binop_rmb_vl<opc, OpcodeStr, OpNode, avx512vl_i64_info,
4733                                  sched, prd, IsCommutable>,
4734                                  REX_W, EVEX_CD8<64, CD8VF>;
4735}
4736
4737multiclass avx512_binop_rm_vl_d<bits<8> opc, string OpcodeStr, SDNode OpNode,
4738                                X86SchedWriteWidths sched, Predicate prd,
4739                                bit IsCommutable = 0> {
4740  defm NAME : avx512_binop_rmb_vl<opc, OpcodeStr, OpNode, avx512vl_i32_info,
4741                                  sched, prd, IsCommutable>, EVEX_CD8<32, CD8VF>;
4742}
4743
4744multiclass avx512_binop_rm_vl_w<bits<8> opc, string OpcodeStr, SDNode OpNode,
4745                                X86SchedWriteWidths sched, Predicate prd,
4746                                bit IsCommutable = 0> {
4747  defm NAME : avx512_binop_rm_vl<opc, OpcodeStr, OpNode, avx512vl_i16_info,
4748                                 sched, prd, IsCommutable>, EVEX_CD8<16, CD8VF>,
4749                                 WIG;
4750}
4751
4752multiclass avx512_binop_rm_vl_b<bits<8> opc, string OpcodeStr, SDNode OpNode,
4753                                X86SchedWriteWidths sched, Predicate prd,
4754                                bit IsCommutable = 0> {
4755  defm NAME : avx512_binop_rm_vl<opc, OpcodeStr, OpNode, avx512vl_i8_info,
4756                                 sched, prd, IsCommutable>, EVEX_CD8<8, CD8VF>,
4757                                 WIG;
4758}
4759
4760multiclass avx512_binop_rm_vl_dq<bits<8> opc_d, bits<8> opc_q, string OpcodeStr,
4761                                 SDNode OpNode, X86SchedWriteWidths sched,
4762                                 Predicate prd, bit IsCommutable = 0> {
4763  defm Q : avx512_binop_rm_vl_q<opc_q, OpcodeStr#"q", OpNode, sched, prd,
4764                                   IsCommutable>;
4765
4766  defm D : avx512_binop_rm_vl_d<opc_d, OpcodeStr#"d", OpNode, sched, prd,
4767                                   IsCommutable>;
4768}
4769
4770multiclass avx512_binop_rm_vl_bw<bits<8> opc_b, bits<8> opc_w, string OpcodeStr,
4771                                 SDNode OpNode, X86SchedWriteWidths sched,
4772                                 Predicate prd, bit IsCommutable = 0> {
4773  defm W : avx512_binop_rm_vl_w<opc_w, OpcodeStr#"w", OpNode, sched, prd,
4774                                   IsCommutable>;
4775
4776  defm B : avx512_binop_rm_vl_b<opc_b, OpcodeStr#"b", OpNode, sched, prd,
4777                                   IsCommutable>;
4778}
4779
4780multiclass avx512_binop_rm_vl_all<bits<8> opc_b, bits<8> opc_w,
4781                                  bits<8> opc_d, bits<8> opc_q,
4782                                  string OpcodeStr, SDNode OpNode,
4783                                  X86SchedWriteWidths sched,
4784                                  bit IsCommutable = 0> {
4785  defm NAME : avx512_binop_rm_vl_dq<opc_d, opc_q, OpcodeStr, OpNode,
4786                                    sched, HasAVX512, IsCommutable>,
4787              avx512_binop_rm_vl_bw<opc_b, opc_w, OpcodeStr, OpNode,
4788                                    sched, HasBWI, IsCommutable>;
4789}
4790
4791multiclass avx512_binop_rm2<bits<8> opc, string OpcodeStr,
4792                            X86FoldableSchedWrite sched,
4793                            SDNode OpNode,X86VectorVTInfo _Src,
4794                            X86VectorVTInfo _Dst, X86VectorVTInfo _Brdct,
4795                            bit IsCommutable = 0> {
4796  defm rr : AVX512_maskable<opc, MRMSrcReg, _Dst, (outs _Dst.RC:$dst),
4797                            (ins _Src.RC:$src1, _Src.RC:$src2), OpcodeStr,
4798                            "$src2, $src1","$src1, $src2",
4799                            (_Dst.VT (OpNode
4800                                         (_Src.VT _Src.RC:$src1),
4801                                         (_Src.VT _Src.RC:$src2))),
4802                            IsCommutable>,
4803                            AVX512BIBase, EVEX, VVVV, Sched<[sched]>;
4804  defm rm : AVX512_maskable<opc, MRMSrcMem, _Dst, (outs _Dst.RC:$dst),
4805                        (ins _Src.RC:$src1, _Src.MemOp:$src2), OpcodeStr,
4806                        "$src2, $src1", "$src1, $src2",
4807                        (_Dst.VT (OpNode (_Src.VT _Src.RC:$src1),
4808                                      (_Src.LdFrag addr:$src2)))>,
4809                        AVX512BIBase, EVEX, VVVV,
4810                        Sched<[sched.Folded, sched.ReadAfterFold]>;
4811
4812  defm rmb : AVX512_maskable<opc, MRMSrcMem, _Dst, (outs _Dst.RC:$dst),
4813                    (ins _Src.RC:$src1, _Brdct.ScalarMemOp:$src2),
4814                    OpcodeStr,
4815                    "${src2}"#_Brdct.BroadcastStr#", $src1",
4816                     "$src1, ${src2}"#_Brdct.BroadcastStr,
4817                    (_Dst.VT (OpNode (_Src.VT _Src.RC:$src1), (bitconvert
4818                                 (_Brdct.VT (_Brdct.BroadcastLdFrag addr:$src2)))))>,
4819                    AVX512BIBase, EVEX, VVVV, EVEX_B,
4820                    Sched<[sched.Folded, sched.ReadAfterFold]>;
4821}
4822
4823defm VPADD : avx512_binop_rm_vl_all<0xFC, 0xFD, 0xFE, 0xD4, "vpadd", add,
4824                                    SchedWriteVecALU, 1>;
4825defm VPSUB : avx512_binop_rm_vl_all<0xF8, 0xF9, 0xFA, 0xFB, "vpsub", sub,
4826                                    SchedWriteVecALU, 0>;
4827defm VPADDS : avx512_binop_rm_vl_bw<0xEC, 0xED, "vpadds", saddsat,
4828                                    SchedWriteVecALU, HasBWI, 1>;
4829defm VPSUBS : avx512_binop_rm_vl_bw<0xE8, 0xE9, "vpsubs", ssubsat,
4830                                    SchedWriteVecALU, HasBWI, 0>;
4831defm VPADDUS : avx512_binop_rm_vl_bw<0xDC, 0xDD, "vpaddus", uaddsat,
4832                                     SchedWriteVecALU, HasBWI, 1>;
4833defm VPSUBUS : avx512_binop_rm_vl_bw<0xD8, 0xD9, "vpsubus", usubsat,
4834                                     SchedWriteVecALU, HasBWI, 0>;
4835defm VPMULLD : avx512_binop_rm_vl_d<0x40, "vpmulld", mul,
4836                                    SchedWritePMULLD, HasAVX512, 1>, T8;
4837defm VPMULLW : avx512_binop_rm_vl_w<0xD5, "vpmullw", mul,
4838                                    SchedWriteVecIMul, HasBWI, 1>;
4839defm VPMULLQ : avx512_binop_rm_vl_q<0x40, "vpmullq", mul,
4840                                    SchedWriteVecIMul, HasDQI, 1>, T8;
4841defm VPMULHW : avx512_binop_rm_vl_w<0xE5, "vpmulhw", mulhs, SchedWriteVecIMul,
4842                                    HasBWI, 1>;
4843defm VPMULHUW : avx512_binop_rm_vl_w<0xE4, "vpmulhuw", mulhu, SchedWriteVecIMul,
4844                                     HasBWI, 1>;
4845defm VPMULHRSW : avx512_binop_rm_vl_w<0x0B, "vpmulhrsw", X86mulhrs,
4846                                      SchedWriteVecIMul, HasBWI, 1>, T8;
4847defm VPAVG : avx512_binop_rm_vl_bw<0xE0, 0xE3, "vpavg", avgceilu,
4848                                   SchedWriteVecALU, HasBWI, 1>;
4849defm VPMULDQ : avx512_binop_rm_vl_q<0x28, "vpmuldq", X86pmuldq,
4850                                    SchedWriteVecIMul, HasAVX512, 1>, T8;
4851defm VPMULUDQ : avx512_binop_rm_vl_q<0xF4, "vpmuludq", X86pmuludq,
4852                                     SchedWriteVecIMul, HasAVX512, 1>;
4853
4854multiclass avx512_binop_all<bits<8> opc, string OpcodeStr,
4855                            X86SchedWriteWidths sched,
4856                            AVX512VLVectorVTInfo _SrcVTInfo,
4857                            AVX512VLVectorVTInfo _DstVTInfo,
4858                            SDNode OpNode, Predicate prd,  bit IsCommutable = 0> {
4859  let Predicates = [prd] in
4860    defm NAME#Z : avx512_binop_rm2<opc, OpcodeStr, sched.ZMM, OpNode,
4861                                 _SrcVTInfo.info512, _DstVTInfo.info512,
4862                                 v8i64_info, IsCommutable>,
4863                                  EVEX_V512, EVEX_CD8<64, CD8VF>, REX_W;
4864  let Predicates = [HasVLX, prd] in {
4865    defm NAME#Z256 : avx512_binop_rm2<opc, OpcodeStr, sched.YMM, OpNode,
4866                                      _SrcVTInfo.info256, _DstVTInfo.info256,
4867                                      v4i64x_info, IsCommutable>,
4868                                      EVEX_V256, EVEX_CD8<64, CD8VF>, REX_W;
4869    defm NAME#Z128 : avx512_binop_rm2<opc, OpcodeStr, sched.XMM, OpNode,
4870                                      _SrcVTInfo.info128, _DstVTInfo.info128,
4871                                      v2i64x_info, IsCommutable>,
4872                                     EVEX_V128, EVEX_CD8<64, CD8VF>, REX_W;
4873  }
4874}
4875
4876defm VPMULTISHIFTQB : avx512_binop_all<0x83, "vpmultishiftqb", SchedWriteVecALU,
4877                                avx512vl_i8_info, avx512vl_i8_info,
4878                                X86multishift, HasVBMI, 0>, T8;
4879
4880multiclass avx512_packs_rmb<bits<8> opc, string OpcodeStr, SDNode OpNode,
4881                            X86VectorVTInfo _Src, X86VectorVTInfo _Dst,
4882                            X86FoldableSchedWrite sched> {
4883  defm rmb : AVX512_maskable<opc, MRMSrcMem, _Dst, (outs _Dst.RC:$dst),
4884                    (ins _Src.RC:$src1, _Src.ScalarMemOp:$src2),
4885                    OpcodeStr,
4886                    "${src2}"#_Src.BroadcastStr#", $src1",
4887                     "$src1, ${src2}"#_Src.BroadcastStr,
4888                    (_Dst.VT (OpNode (_Src.VT _Src.RC:$src1), (bitconvert
4889                                 (_Src.VT (_Src.BroadcastLdFrag addr:$src2)))))>,
4890                    EVEX, VVVV, EVEX_B, EVEX_CD8<_Src.EltSize, CD8VF>,
4891                    Sched<[sched.Folded, sched.ReadAfterFold]>;
4892}
4893
4894multiclass avx512_packs_rm<bits<8> opc, string OpcodeStr,
4895                            SDNode OpNode,X86VectorVTInfo _Src,
4896                            X86VectorVTInfo _Dst, X86FoldableSchedWrite sched,
4897                            bit IsCommutable = 0> {
4898  defm rr : AVX512_maskable<opc, MRMSrcReg, _Dst, (outs _Dst.RC:$dst),
4899                            (ins _Src.RC:$src1, _Src.RC:$src2), OpcodeStr,
4900                            "$src2, $src1","$src1, $src2",
4901                            (_Dst.VT (OpNode
4902                                         (_Src.VT _Src.RC:$src1),
4903                                         (_Src.VT _Src.RC:$src2))),
4904                            IsCommutable, IsCommutable>,
4905                            EVEX_CD8<_Src.EltSize, CD8VF>, EVEX, VVVV, Sched<[sched]>;
4906  defm rm : AVX512_maskable<opc, MRMSrcMem, _Dst, (outs _Dst.RC:$dst),
4907                        (ins _Src.RC:$src1, _Src.MemOp:$src2), OpcodeStr,
4908                        "$src2, $src1", "$src1, $src2",
4909                        (_Dst.VT (OpNode (_Src.VT _Src.RC:$src1),
4910                                      (_Src.LdFrag addr:$src2)))>,
4911                         EVEX, VVVV, EVEX_CD8<_Src.EltSize, CD8VF>,
4912                         Sched<[sched.Folded, sched.ReadAfterFold]>;
4913}
4914
4915multiclass avx512_packs_all_i32_i16<bits<8> opc, string OpcodeStr,
4916                                    SDNode OpNode> {
4917  let Predicates = [HasBWI] in
4918  defm NAME#Z : avx512_packs_rm<opc, OpcodeStr, OpNode, v16i32_info,
4919                                 v32i16_info, SchedWriteShuffle.ZMM>,
4920                avx512_packs_rmb<opc, OpcodeStr, OpNode, v16i32_info,
4921                                 v32i16_info, SchedWriteShuffle.ZMM>, EVEX_V512;
4922  let Predicates = [HasBWI, HasVLX] in {
4923    defm NAME#Z256 : avx512_packs_rm<opc, OpcodeStr, OpNode, v8i32x_info,
4924                                     v16i16x_info, SchedWriteShuffle.YMM>,
4925                     avx512_packs_rmb<opc, OpcodeStr, OpNode, v8i32x_info,
4926                                      v16i16x_info, SchedWriteShuffle.YMM>,
4927                                      EVEX_V256;
4928    defm NAME#Z128 : avx512_packs_rm<opc, OpcodeStr, OpNode, v4i32x_info,
4929                                     v8i16x_info, SchedWriteShuffle.XMM>,
4930                     avx512_packs_rmb<opc, OpcodeStr, OpNode, v4i32x_info,
4931                                      v8i16x_info, SchedWriteShuffle.XMM>,
4932                                      EVEX_V128;
4933  }
4934}
4935multiclass avx512_packs_all_i16_i8<bits<8> opc, string OpcodeStr,
4936                            SDNode OpNode> {
4937  let Predicates = [HasBWI] in
4938  defm NAME#Z : avx512_packs_rm<opc, OpcodeStr, OpNode, v32i16_info, v64i8_info,
4939                                SchedWriteShuffle.ZMM>, EVEX_V512, WIG;
4940  let Predicates = [HasBWI, HasVLX] in {
4941    defm NAME#Z256 : avx512_packs_rm<opc, OpcodeStr, OpNode, v16i16x_info,
4942                                     v32i8x_info, SchedWriteShuffle.YMM>,
4943                                     EVEX_V256, WIG;
4944    defm NAME#Z128 : avx512_packs_rm<opc, OpcodeStr, OpNode, v8i16x_info,
4945                                     v16i8x_info, SchedWriteShuffle.XMM>,
4946                                     EVEX_V128, WIG;
4947  }
4948}
4949
4950multiclass avx512_vpmadd<bits<8> opc, string OpcodeStr,
4951                            SDNode OpNode, AVX512VLVectorVTInfo _Src,
4952                            AVX512VLVectorVTInfo _Dst, bit IsCommutable = 0> {
4953  let Predicates = [HasBWI] in
4954  defm NAME#Z : avx512_packs_rm<opc, OpcodeStr, OpNode, _Src.info512,
4955                                _Dst.info512, SchedWriteVecIMul.ZMM,
4956                                IsCommutable>, EVEX_V512;
4957  let Predicates = [HasBWI, HasVLX] in {
4958    defm NAME#Z256 : avx512_packs_rm<opc, OpcodeStr, OpNode, _Src.info256,
4959                                     _Dst.info256, SchedWriteVecIMul.YMM,
4960                                     IsCommutable>, EVEX_V256;
4961    defm NAME#Z128 : avx512_packs_rm<opc, OpcodeStr, OpNode, _Src.info128,
4962                                     _Dst.info128, SchedWriteVecIMul.XMM,
4963                                     IsCommutable>, EVEX_V128;
4964  }
4965}
4966
4967defm VPACKSSDW : avx512_packs_all_i32_i16<0x6B, "vpackssdw", X86Packss>, AVX512BIBase;
4968defm VPACKUSDW : avx512_packs_all_i32_i16<0x2b, "vpackusdw", X86Packus>, AVX5128IBase;
4969defm VPACKSSWB : avx512_packs_all_i16_i8 <0x63, "vpacksswb", X86Packss>, AVX512BIBase;
4970defm VPACKUSWB : avx512_packs_all_i16_i8 <0x67, "vpackuswb", X86Packus>, AVX512BIBase;
4971
4972defm VPMADDUBSW : avx512_vpmadd<0x04, "vpmaddubsw", X86vpmaddubsw,
4973                     avx512vl_i8_info, avx512vl_i16_info>, AVX512BIBase, T8, WIG;
4974defm VPMADDWD   : avx512_vpmadd<0xF5, "vpmaddwd", X86vpmaddwd,
4975                     avx512vl_i16_info, avx512vl_i32_info, 1>, AVX512BIBase, WIG;
4976
4977defm VPMAXSB : avx512_binop_rm_vl_b<0x3C, "vpmaxsb", smax,
4978                                    SchedWriteVecALU, HasBWI, 1>, T8;
4979defm VPMAXSW : avx512_binop_rm_vl_w<0xEE, "vpmaxsw", smax,
4980                                    SchedWriteVecALU, HasBWI, 1>;
4981defm VPMAXSD : avx512_binop_rm_vl_d<0x3D, "vpmaxsd", smax,
4982                                    SchedWriteVecALU, HasAVX512, 1>, T8;
4983defm VPMAXSQ : avx512_binop_rm_vl_q<0x3D, "vpmaxsq", smax,
4984                                    SchedWriteVecALU, HasAVX512, 1>, T8;
4985
4986defm VPMAXUB : avx512_binop_rm_vl_b<0xDE, "vpmaxub", umax,
4987                                    SchedWriteVecALU, HasBWI, 1>;
4988defm VPMAXUW : avx512_binop_rm_vl_w<0x3E, "vpmaxuw", umax,
4989                                    SchedWriteVecALU, HasBWI, 1>, T8;
4990defm VPMAXUD : avx512_binop_rm_vl_d<0x3F, "vpmaxud", umax,
4991                                    SchedWriteVecALU, HasAVX512, 1>, T8;
4992defm VPMAXUQ : avx512_binop_rm_vl_q<0x3F, "vpmaxuq", umax,
4993                                    SchedWriteVecALU, HasAVX512, 1>, T8;
4994
4995defm VPMINSB : avx512_binop_rm_vl_b<0x38, "vpminsb", smin,
4996                                    SchedWriteVecALU, HasBWI, 1>, T8;
4997defm VPMINSW : avx512_binop_rm_vl_w<0xEA, "vpminsw", smin,
4998                                    SchedWriteVecALU, HasBWI, 1>;
4999defm VPMINSD : avx512_binop_rm_vl_d<0x39, "vpminsd", smin,
5000                                    SchedWriteVecALU, HasAVX512, 1>, T8;
5001defm VPMINSQ : avx512_binop_rm_vl_q<0x39, "vpminsq", smin,
5002                                    SchedWriteVecALU, HasAVX512, 1>, T8;
5003
5004defm VPMINUB : avx512_binop_rm_vl_b<0xDA, "vpminub", umin,
5005                                    SchedWriteVecALU, HasBWI, 1>;
5006defm VPMINUW : avx512_binop_rm_vl_w<0x3A, "vpminuw", umin,
5007                                    SchedWriteVecALU, HasBWI, 1>, T8;
5008defm VPMINUD : avx512_binop_rm_vl_d<0x3B, "vpminud", umin,
5009                                    SchedWriteVecALU, HasAVX512, 1>, T8;
5010defm VPMINUQ : avx512_binop_rm_vl_q<0x3B, "vpminuq", umin,
5011                                    SchedWriteVecALU, HasAVX512, 1>, T8;
5012
5013// PMULLQ: Use 512bit version to implement 128/256 bit in case NoVLX, HasEVEX512.
5014let Predicates = [HasDQI, NoVLX, HasEVEX512] in {
5015  def : Pat<(v4i64 (mul (v4i64 VR256X:$src1), (v4i64 VR256X:$src2))),
5016            (EXTRACT_SUBREG
5017                (VPMULLQZrr
5018                    (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src1, sub_ymm),
5019                    (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src2, sub_ymm)),
5020             sub_ymm)>;
5021  def : Pat<(v4i64 (mul (v4i64 VR256X:$src1), (v4i64 (X86VBroadcastld64 addr:$src2)))),
5022            (EXTRACT_SUBREG
5023                (VPMULLQZrmb
5024                    (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src1, sub_ymm),
5025                    addr:$src2),
5026             sub_ymm)>;
5027
5028  def : Pat<(v2i64 (mul (v2i64 VR128X:$src1), (v2i64 VR128X:$src2))),
5029            (EXTRACT_SUBREG
5030                (VPMULLQZrr
5031                    (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src1, sub_xmm),
5032                    (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src2, sub_xmm)),
5033             sub_xmm)>;
5034  def : Pat<(v2i64 (mul (v2i64 VR128X:$src1), (v2i64 (X86VBroadcastld64 addr:$src2)))),
5035            (EXTRACT_SUBREG
5036                (VPMULLQZrmb
5037                    (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src1, sub_xmm),
5038                    addr:$src2),
5039             sub_xmm)>;
5040}
5041
5042multiclass avx512_min_max_lowering<string Instr, SDNode OpNode> {
5043  def : Pat<(v4i64 (OpNode VR256X:$src1, VR256X:$src2)),
5044            (EXTRACT_SUBREG
5045                (!cast<Instruction>(Instr#"rr")
5046                    (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src1, sub_ymm),
5047                    (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src2, sub_ymm)),
5048             sub_ymm)>;
5049  def : Pat<(v4i64 (OpNode (v4i64 VR256X:$src1), (v4i64 (X86VBroadcastld64 addr:$src2)))),
5050            (EXTRACT_SUBREG
5051                (!cast<Instruction>(Instr#"rmb")
5052                    (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src1, sub_ymm),
5053                    addr:$src2),
5054             sub_ymm)>;
5055
5056  def : Pat<(v2i64 (OpNode VR128X:$src1, VR128X:$src2)),
5057            (EXTRACT_SUBREG
5058                (!cast<Instruction>(Instr#"rr")
5059                    (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src1, sub_xmm),
5060                    (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src2, sub_xmm)),
5061             sub_xmm)>;
5062  def : Pat<(v2i64 (OpNode (v2i64 VR128X:$src1), (v2i64 (X86VBroadcastld64 addr:$src2)))),
5063            (EXTRACT_SUBREG
5064                (!cast<Instruction>(Instr#"rmb")
5065                    (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src1, sub_xmm),
5066                    addr:$src2),
5067             sub_xmm)>;
5068}
5069
5070let Predicates = [HasAVX512, NoVLX, HasEVEX512] in {
5071  defm : avx512_min_max_lowering<"VPMAXUQZ", umax>;
5072  defm : avx512_min_max_lowering<"VPMINUQZ", umin>;
5073  defm : avx512_min_max_lowering<"VPMAXSQZ", smax>;
5074  defm : avx512_min_max_lowering<"VPMINSQZ", smin>;
5075}
5076
5077//===----------------------------------------------------------------------===//
5078// AVX-512  Logical Instructions
5079//===----------------------------------------------------------------------===//
5080
5081defm VPAND : avx512_binop_rm_vl_dq<0xDB, 0xDB, "vpand", and,
5082                                   SchedWriteVecLogic, HasAVX512, 1>;
5083defm VPOR : avx512_binop_rm_vl_dq<0xEB, 0xEB, "vpor", or,
5084                                  SchedWriteVecLogic, HasAVX512, 1>;
5085defm VPXOR : avx512_binop_rm_vl_dq<0xEF, 0xEF, "vpxor", xor,
5086                                   SchedWriteVecLogic, HasAVX512, 1>;
5087defm VPANDN : avx512_binop_rm_vl_dq<0xDF, 0xDF, "vpandn", X86andnp,
5088                                    SchedWriteVecLogic, HasAVX512>;
5089
5090let Predicates = [HasVLX] in {
5091  def : Pat<(v16i8 (and VR128X:$src1, VR128X:$src2)),
5092            (VPANDQZ128rr VR128X:$src1, VR128X:$src2)>;
5093  def : Pat<(v8i16 (and VR128X:$src1, VR128X:$src2)),
5094            (VPANDQZ128rr VR128X:$src1, VR128X:$src2)>;
5095
5096  def : Pat<(v16i8 (or VR128X:$src1, VR128X:$src2)),
5097            (VPORQZ128rr VR128X:$src1, VR128X:$src2)>;
5098  def : Pat<(v8i16 (or VR128X:$src1, VR128X:$src2)),
5099            (VPORQZ128rr VR128X:$src1, VR128X:$src2)>;
5100
5101  def : Pat<(v16i8 (xor VR128X:$src1, VR128X:$src2)),
5102            (VPXORQZ128rr VR128X:$src1, VR128X:$src2)>;
5103  def : Pat<(v8i16 (xor VR128X:$src1, VR128X:$src2)),
5104            (VPXORQZ128rr VR128X:$src1, VR128X:$src2)>;
5105
5106  def : Pat<(v16i8 (X86andnp VR128X:$src1, VR128X:$src2)),
5107            (VPANDNQZ128rr VR128X:$src1, VR128X:$src2)>;
5108  def : Pat<(v8i16 (X86andnp VR128X:$src1, VR128X:$src2)),
5109            (VPANDNQZ128rr VR128X:$src1, VR128X:$src2)>;
5110
5111  def : Pat<(and VR128X:$src1, (loadv16i8 addr:$src2)),
5112            (VPANDQZ128rm VR128X:$src1, addr:$src2)>;
5113  def : Pat<(and VR128X:$src1, (loadv8i16 addr:$src2)),
5114            (VPANDQZ128rm VR128X:$src1, addr:$src2)>;
5115
5116  def : Pat<(or VR128X:$src1, (loadv16i8 addr:$src2)),
5117            (VPORQZ128rm VR128X:$src1, addr:$src2)>;
5118  def : Pat<(or VR128X:$src1, (loadv8i16 addr:$src2)),
5119            (VPORQZ128rm VR128X:$src1, addr:$src2)>;
5120
5121  def : Pat<(xor VR128X:$src1, (loadv16i8 addr:$src2)),
5122            (VPXORQZ128rm VR128X:$src1, addr:$src2)>;
5123  def : Pat<(xor VR128X:$src1, (loadv8i16 addr:$src2)),
5124            (VPXORQZ128rm VR128X:$src1, addr:$src2)>;
5125
5126  def : Pat<(X86andnp VR128X:$src1, (loadv16i8 addr:$src2)),
5127            (VPANDNQZ128rm VR128X:$src1, addr:$src2)>;
5128  def : Pat<(X86andnp VR128X:$src1, (loadv8i16 addr:$src2)),
5129            (VPANDNQZ128rm VR128X:$src1, addr:$src2)>;
5130
5131  def : Pat<(v32i8 (and VR256X:$src1, VR256X:$src2)),
5132            (VPANDQZ256rr VR256X:$src1, VR256X:$src2)>;
5133  def : Pat<(v16i16 (and VR256X:$src1, VR256X:$src2)),
5134            (VPANDQZ256rr VR256X:$src1, VR256X:$src2)>;
5135
5136  def : Pat<(v32i8 (or VR256X:$src1, VR256X:$src2)),
5137            (VPORQZ256rr VR256X:$src1, VR256X:$src2)>;
5138  def : Pat<(v16i16 (or VR256X:$src1, VR256X:$src2)),
5139            (VPORQZ256rr VR256X:$src1, VR256X:$src2)>;
5140
5141  def : Pat<(v32i8 (xor VR256X:$src1, VR256X:$src2)),
5142            (VPXORQZ256rr VR256X:$src1, VR256X:$src2)>;
5143  def : Pat<(v16i16 (xor VR256X:$src1, VR256X:$src2)),
5144            (VPXORQZ256rr VR256X:$src1, VR256X:$src2)>;
5145
5146  def : Pat<(v32i8 (X86andnp VR256X:$src1, VR256X:$src2)),
5147            (VPANDNQZ256rr VR256X:$src1, VR256X:$src2)>;
5148  def : Pat<(v16i16 (X86andnp VR256X:$src1, VR256X:$src2)),
5149            (VPANDNQZ256rr VR256X:$src1, VR256X:$src2)>;
5150
5151  def : Pat<(and VR256X:$src1, (loadv32i8 addr:$src2)),
5152            (VPANDQZ256rm VR256X:$src1, addr:$src2)>;
5153  def : Pat<(and VR256X:$src1, (loadv16i16 addr:$src2)),
5154            (VPANDQZ256rm VR256X:$src1, addr:$src2)>;
5155
5156  def : Pat<(or VR256X:$src1, (loadv32i8 addr:$src2)),
5157            (VPORQZ256rm VR256X:$src1, addr:$src2)>;
5158  def : Pat<(or VR256X:$src1, (loadv16i16 addr:$src2)),
5159            (VPORQZ256rm VR256X:$src1, addr:$src2)>;
5160
5161  def : Pat<(xor VR256X:$src1, (loadv32i8 addr:$src2)),
5162            (VPXORQZ256rm VR256X:$src1, addr:$src2)>;
5163  def : Pat<(xor VR256X:$src1, (loadv16i16 addr:$src2)),
5164            (VPXORQZ256rm VR256X:$src1, addr:$src2)>;
5165
5166  def : Pat<(X86andnp VR256X:$src1, (loadv32i8 addr:$src2)),
5167            (VPANDNQZ256rm VR256X:$src1, addr:$src2)>;
5168  def : Pat<(X86andnp VR256X:$src1, (loadv16i16 addr:$src2)),
5169            (VPANDNQZ256rm VR256X:$src1, addr:$src2)>;
5170}
5171
5172let Predicates = [HasAVX512] in {
5173  def : Pat<(v64i8 (and VR512:$src1, VR512:$src2)),
5174            (VPANDQZrr VR512:$src1, VR512:$src2)>;
5175  def : Pat<(v32i16 (and VR512:$src1, VR512:$src2)),
5176            (VPANDQZrr VR512:$src1, VR512:$src2)>;
5177
5178  def : Pat<(v64i8 (or VR512:$src1, VR512:$src2)),
5179            (VPORQZrr VR512:$src1, VR512:$src2)>;
5180  def : Pat<(v32i16 (or VR512:$src1, VR512:$src2)),
5181            (VPORQZrr VR512:$src1, VR512:$src2)>;
5182
5183  def : Pat<(v64i8 (xor VR512:$src1, VR512:$src2)),
5184            (VPXORQZrr VR512:$src1, VR512:$src2)>;
5185  def : Pat<(v32i16 (xor VR512:$src1, VR512:$src2)),
5186            (VPXORQZrr VR512:$src1, VR512:$src2)>;
5187
5188  def : Pat<(v64i8 (X86andnp VR512:$src1, VR512:$src2)),
5189            (VPANDNQZrr VR512:$src1, VR512:$src2)>;
5190  def : Pat<(v32i16 (X86andnp VR512:$src1, VR512:$src2)),
5191            (VPANDNQZrr VR512:$src1, VR512:$src2)>;
5192
5193  def : Pat<(and VR512:$src1, (loadv64i8 addr:$src2)),
5194            (VPANDQZrm VR512:$src1, addr:$src2)>;
5195  def : Pat<(and VR512:$src1, (loadv32i16 addr:$src2)),
5196            (VPANDQZrm VR512:$src1, addr:$src2)>;
5197
5198  def : Pat<(or VR512:$src1, (loadv64i8 addr:$src2)),
5199            (VPORQZrm VR512:$src1, addr:$src2)>;
5200  def : Pat<(or VR512:$src1, (loadv32i16 addr:$src2)),
5201            (VPORQZrm VR512:$src1, addr:$src2)>;
5202
5203  def : Pat<(xor VR512:$src1, (loadv64i8 addr:$src2)),
5204            (VPXORQZrm VR512:$src1, addr:$src2)>;
5205  def : Pat<(xor VR512:$src1, (loadv32i16 addr:$src2)),
5206            (VPXORQZrm VR512:$src1, addr:$src2)>;
5207
5208  def : Pat<(X86andnp VR512:$src1, (loadv64i8 addr:$src2)),
5209            (VPANDNQZrm VR512:$src1, addr:$src2)>;
5210  def : Pat<(X86andnp VR512:$src1, (loadv32i16 addr:$src2)),
5211            (VPANDNQZrm VR512:$src1, addr:$src2)>;
5212}
5213
5214// Patterns to catch vselect with different type than logic op.
5215multiclass avx512_logical_lowering<string InstrStr, SDNode OpNode,
5216                                    X86VectorVTInfo _,
5217                                    X86VectorVTInfo IntInfo> {
5218  // Masked register-register logical operations.
5219  def : Pat<(_.VT (vselect_mask _.KRCWM:$mask,
5220                   (bitconvert (IntInfo.VT (OpNode _.RC:$src1, _.RC:$src2))),
5221                   _.RC:$src0)),
5222            (!cast<Instruction>(InstrStr#rrk) _.RC:$src0, _.KRCWM:$mask,
5223             _.RC:$src1, _.RC:$src2)>;
5224
5225  def : Pat<(_.VT (vselect_mask _.KRCWM:$mask,
5226                   (bitconvert (IntInfo.VT (OpNode _.RC:$src1, _.RC:$src2))),
5227                   _.ImmAllZerosV)),
5228            (!cast<Instruction>(InstrStr#rrkz) _.KRCWM:$mask, _.RC:$src1,
5229             _.RC:$src2)>;
5230
5231  // Masked register-memory logical operations.
5232  def : Pat<(_.VT (vselect_mask _.KRCWM:$mask,
5233                   (bitconvert (IntInfo.VT (OpNode _.RC:$src1,
5234                                            (load addr:$src2)))),
5235                   _.RC:$src0)),
5236            (!cast<Instruction>(InstrStr#rmk) _.RC:$src0, _.KRCWM:$mask,
5237             _.RC:$src1, addr:$src2)>;
5238  def : Pat<(_.VT (vselect_mask _.KRCWM:$mask,
5239                   (bitconvert (IntInfo.VT (OpNode _.RC:$src1,
5240                                            (load addr:$src2)))),
5241                   _.ImmAllZerosV)),
5242            (!cast<Instruction>(InstrStr#rmkz) _.KRCWM:$mask, _.RC:$src1,
5243             addr:$src2)>;
5244}
5245
5246multiclass avx512_logical_lowering_bcast<string InstrStr, SDNode OpNode,
5247                                         X86VectorVTInfo _,
5248                                         X86VectorVTInfo IntInfo> {
5249  // Register-broadcast logical operations.
5250  def : Pat<(_.VT (vselect_mask _.KRCWM:$mask,
5251                   (bitconvert
5252                    (IntInfo.VT (OpNode _.RC:$src1,
5253                                 (IntInfo.VT (IntInfo.BroadcastLdFrag addr:$src2))))),
5254                   _.RC:$src0)),
5255            (!cast<Instruction>(InstrStr#rmbk) _.RC:$src0, _.KRCWM:$mask,
5256             _.RC:$src1, addr:$src2)>;
5257  def : Pat<(_.VT (vselect_mask _.KRCWM:$mask,
5258                   (bitconvert
5259                    (IntInfo.VT (OpNode _.RC:$src1,
5260                                 (IntInfo.VT (IntInfo.BroadcastLdFrag addr:$src2))))),
5261                   _.ImmAllZerosV)),
5262            (!cast<Instruction>(InstrStr#rmbkz)  _.KRCWM:$mask,
5263             _.RC:$src1, addr:$src2)>;
5264}
5265
5266multiclass avx512_logical_lowering_sizes<string InstrStr, SDNode OpNode,
5267                                         AVX512VLVectorVTInfo SelectInfo,
5268                                         AVX512VLVectorVTInfo IntInfo> {
5269let Predicates = [HasVLX] in {
5270  defm : avx512_logical_lowering<InstrStr#"Z128", OpNode, SelectInfo.info128,
5271                                 IntInfo.info128>;
5272  defm : avx512_logical_lowering<InstrStr#"Z256", OpNode, SelectInfo.info256,
5273                                 IntInfo.info256>;
5274}
5275let Predicates = [HasAVX512] in {
5276  defm : avx512_logical_lowering<InstrStr#"Z", OpNode, SelectInfo.info512,
5277                                 IntInfo.info512>;
5278}
5279}
5280
5281multiclass avx512_logical_lowering_sizes_bcast<string InstrStr, SDNode OpNode,
5282                                               AVX512VLVectorVTInfo SelectInfo,
5283                                               AVX512VLVectorVTInfo IntInfo> {
5284let Predicates = [HasVLX] in {
5285  defm : avx512_logical_lowering_bcast<InstrStr#"Z128", OpNode,
5286                                       SelectInfo.info128, IntInfo.info128>;
5287  defm : avx512_logical_lowering_bcast<InstrStr#"Z256", OpNode,
5288                                       SelectInfo.info256, IntInfo.info256>;
5289}
5290let Predicates = [HasAVX512] in {
5291  defm : avx512_logical_lowering_bcast<InstrStr#"Z", OpNode,
5292                                       SelectInfo.info512, IntInfo.info512>;
5293}
5294}
5295
5296multiclass avx512_logical_lowering_types<string InstrStr, SDNode OpNode> {
5297  // i64 vselect with i32/i16/i8 logic op
5298  defm : avx512_logical_lowering_sizes<InstrStr#"Q", OpNode, avx512vl_i64_info,
5299                                       avx512vl_i32_info>;
5300  defm : avx512_logical_lowering_sizes<InstrStr#"Q", OpNode, avx512vl_i64_info,
5301                                       avx512vl_i16_info>;
5302  defm : avx512_logical_lowering_sizes<InstrStr#"Q", OpNode, avx512vl_i64_info,
5303                                       avx512vl_i8_info>;
5304
5305  // i32 vselect with i64/i16/i8 logic op
5306  defm : avx512_logical_lowering_sizes<InstrStr#"D", OpNode, avx512vl_i32_info,
5307                                       avx512vl_i64_info>;
5308  defm : avx512_logical_lowering_sizes<InstrStr#"D", OpNode, avx512vl_i32_info,
5309                                       avx512vl_i16_info>;
5310  defm : avx512_logical_lowering_sizes<InstrStr#"D", OpNode, avx512vl_i32_info,
5311                                       avx512vl_i8_info>;
5312
5313  // f32 vselect with i64/i32/i16/i8 logic op
5314  defm : avx512_logical_lowering_sizes<InstrStr#"D", OpNode, avx512vl_f32_info,
5315                                       avx512vl_i64_info>;
5316  defm : avx512_logical_lowering_sizes<InstrStr#"D", OpNode, avx512vl_f32_info,
5317                                       avx512vl_i32_info>;
5318  defm : avx512_logical_lowering_sizes<InstrStr#"D", OpNode, avx512vl_f32_info,
5319                                       avx512vl_i16_info>;
5320  defm : avx512_logical_lowering_sizes<InstrStr#"D", OpNode, avx512vl_f32_info,
5321                                       avx512vl_i8_info>;
5322
5323  // f64 vselect with i64/i32/i16/i8 logic op
5324  defm : avx512_logical_lowering_sizes<InstrStr#"Q", OpNode, avx512vl_f64_info,
5325                                       avx512vl_i64_info>;
5326  defm : avx512_logical_lowering_sizes<InstrStr#"Q", OpNode, avx512vl_f64_info,
5327                                       avx512vl_i32_info>;
5328  defm : avx512_logical_lowering_sizes<InstrStr#"Q", OpNode, avx512vl_f64_info,
5329                                       avx512vl_i16_info>;
5330  defm : avx512_logical_lowering_sizes<InstrStr#"Q", OpNode, avx512vl_f64_info,
5331                                       avx512vl_i8_info>;
5332
5333  defm : avx512_logical_lowering_sizes_bcast<InstrStr#"D", OpNode,
5334                                             avx512vl_f32_info,
5335                                             avx512vl_i32_info>;
5336  defm : avx512_logical_lowering_sizes_bcast<InstrStr#"Q", OpNode,
5337                                             avx512vl_f64_info,
5338                                             avx512vl_i64_info>;
5339}
5340
5341defm : avx512_logical_lowering_types<"VPAND", and>;
5342defm : avx512_logical_lowering_types<"VPOR",  or>;
5343defm : avx512_logical_lowering_types<"VPXOR", xor>;
5344defm : avx512_logical_lowering_types<"VPANDN", X86andnp>;
5345
5346//===----------------------------------------------------------------------===//
5347// AVX-512  FP arithmetic
5348//===----------------------------------------------------------------------===//
5349
5350multiclass avx512_fp_scalar<bits<8> opc, string OpcodeStr,X86VectorVTInfo _,
5351                            SDPatternOperator OpNode, SDNode VecNode,
5352                            X86FoldableSchedWrite sched, bit IsCommutable> {
5353  let ExeDomain = _.ExeDomain, Uses = [MXCSR], mayRaiseFPException = 1 in {
5354  defm rr_Int : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
5355                           (ins _.RC:$src1, _.RC:$src2), OpcodeStr,
5356                           "$src2, $src1", "$src1, $src2",
5357                           (_.VT (VecNode _.RC:$src1, _.RC:$src2))>,
5358                           Sched<[sched]>;
5359
5360  defm rm_Int : AVX512_maskable_scalar<opc, MRMSrcMem, _, (outs _.RC:$dst),
5361                         (ins _.RC:$src1, _.IntScalarMemOp:$src2), OpcodeStr,
5362                         "$src2, $src1", "$src1, $src2",
5363                         (_.VT (VecNode _.RC:$src1,
5364                                        (_.ScalarIntMemFrags addr:$src2)))>,
5365                         Sched<[sched.Folded, sched.ReadAfterFold]>;
5366  let isCodeGenOnly = 1, Predicates = [HasAVX512] in {
5367  def rr : I< opc, MRMSrcReg, (outs _.FRC:$dst),
5368                         (ins _.FRC:$src1, _.FRC:$src2),
5369                          OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}",
5370                          [(set _.FRC:$dst, (OpNode _.FRC:$src1, _.FRC:$src2))]>,
5371                          Sched<[sched]> {
5372    let isCommutable = IsCommutable;
5373  }
5374  def rm : I< opc, MRMSrcMem, (outs _.FRC:$dst),
5375                         (ins _.FRC:$src1, _.ScalarMemOp:$src2),
5376                         OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}",
5377                         [(set _.FRC:$dst, (OpNode _.FRC:$src1,
5378                         (_.ScalarLdFrag addr:$src2)))]>,
5379                         Sched<[sched.Folded, sched.ReadAfterFold]>;
5380  }
5381  }
5382}
5383
5384multiclass avx512_fp_scalar_round<bits<8> opc, string OpcodeStr,X86VectorVTInfo _,
5385                                  SDNode VecNode, X86FoldableSchedWrite sched> {
5386  let ExeDomain = _.ExeDomain, Uses = [MXCSR] in
5387  defm rrb_Int : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
5388                          (ins _.RC:$src1, _.RC:$src2, AVX512RC:$rc), OpcodeStr,
5389                          "$rc, $src2, $src1", "$src1, $src2, $rc",
5390                          (VecNode (_.VT _.RC:$src1), (_.VT _.RC:$src2),
5391                          (i32 timm:$rc))>,
5392                          EVEX_B, EVEX_RC, Sched<[sched]>;
5393}
5394multiclass avx512_fp_scalar_sae<bits<8> opc, string OpcodeStr,X86VectorVTInfo _,
5395                                SDNode OpNode, SDNode VecNode, SDNode SaeNode,
5396                                X86FoldableSchedWrite sched, bit IsCommutable> {
5397  let ExeDomain = _.ExeDomain in {
5398  defm rr_Int : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
5399                           (ins _.RC:$src1, _.RC:$src2), OpcodeStr,
5400                           "$src2, $src1", "$src1, $src2",
5401                           (_.VT (VecNode _.RC:$src1, _.RC:$src2))>,
5402                           Sched<[sched]>, SIMD_EXC;
5403
5404  defm rm_Int : AVX512_maskable_scalar<opc, MRMSrcMem, _, (outs _.RC:$dst),
5405                         (ins _.RC:$src1, _.IntScalarMemOp:$src2), OpcodeStr,
5406                         "$src2, $src1", "$src1, $src2",
5407                         (_.VT (VecNode _.RC:$src1,
5408                                        (_.ScalarIntMemFrags addr:$src2)))>,
5409                         Sched<[sched.Folded, sched.ReadAfterFold]>, SIMD_EXC;
5410
5411  let isCodeGenOnly = 1, Predicates = [HasAVX512],
5412      Uses = [MXCSR], mayRaiseFPException = 1 in {
5413  def rr : I< opc, MRMSrcReg, (outs _.FRC:$dst),
5414                         (ins _.FRC:$src1, _.FRC:$src2),
5415                          OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}",
5416                          [(set _.FRC:$dst, (OpNode _.FRC:$src1, _.FRC:$src2))]>,
5417                          Sched<[sched]> {
5418    let isCommutable = IsCommutable;
5419  }
5420  def rm : I< opc, MRMSrcMem, (outs _.FRC:$dst),
5421                         (ins _.FRC:$src1, _.ScalarMemOp:$src2),
5422                         OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}",
5423                         [(set _.FRC:$dst, (OpNode _.FRC:$src1,
5424                         (_.ScalarLdFrag addr:$src2)))]>,
5425                         Sched<[sched.Folded, sched.ReadAfterFold]>;
5426  }
5427
5428  let Uses = [MXCSR] in
5429  defm rrb_Int : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
5430                            (ins _.RC:$src1, _.RC:$src2), OpcodeStr,
5431                            "{sae}, $src2, $src1", "$src1, $src2, {sae}",
5432                            (SaeNode (_.VT _.RC:$src1), (_.VT _.RC:$src2))>,
5433                            EVEX_B, Sched<[sched]>;
5434  }
5435}
5436
5437multiclass avx512_binop_s_round<bits<8> opc, string OpcodeStr, SDPatternOperator OpNode,
5438                                SDNode VecNode, SDNode RndNode,
5439                                X86SchedWriteSizes sched, bit IsCommutable> {
5440  defm SSZ : avx512_fp_scalar<opc, OpcodeStr#"ss", f32x_info, OpNode, VecNode,
5441                              sched.PS.Scl, IsCommutable>,
5442             avx512_fp_scalar_round<opc, OpcodeStr#"ss", f32x_info, RndNode,
5443                              sched.PS.Scl>,
5444                              TB, XS, EVEX, VVVV, VEX_LIG,  EVEX_CD8<32, CD8VT1>;
5445  defm SDZ : avx512_fp_scalar<opc, OpcodeStr#"sd", f64x_info, OpNode, VecNode,
5446                              sched.PD.Scl, IsCommutable>,
5447             avx512_fp_scalar_round<opc, OpcodeStr#"sd", f64x_info, RndNode,
5448                              sched.PD.Scl>,
5449                              TB, XD, REX_W, EVEX, VVVV, VEX_LIG, EVEX_CD8<64, CD8VT1>;
5450  let Predicates = [HasFP16] in
5451    defm SHZ : avx512_fp_scalar<opc, OpcodeStr#"sh", f16x_info, OpNode,
5452                                VecNode, sched.PH.Scl, IsCommutable>,
5453               avx512_fp_scalar_round<opc, OpcodeStr#"sh", f16x_info, RndNode,
5454                                sched.PH.Scl>,
5455                                T_MAP5, XS, EVEX, VVVV, VEX_LIG, EVEX_CD8<16, CD8VT1>;
5456}
5457
5458multiclass avx512_binop_s_sae<bits<8> opc, string OpcodeStr, SDNode OpNode,
5459                              SDNode VecNode, SDNode SaeNode,
5460                              X86SchedWriteSizes sched, bit IsCommutable> {
5461  defm SSZ : avx512_fp_scalar_sae<opc, OpcodeStr#"ss", f32x_info, OpNode,
5462                              VecNode, SaeNode, sched.PS.Scl, IsCommutable>,
5463                              TB, XS, EVEX, VVVV, VEX_LIG,  EVEX_CD8<32, CD8VT1>;
5464  defm SDZ : avx512_fp_scalar_sae<opc, OpcodeStr#"sd", f64x_info, OpNode,
5465                              VecNode, SaeNode, sched.PD.Scl, IsCommutable>,
5466                              TB, XD, REX_W, EVEX, VVVV, VEX_LIG, EVEX_CD8<64, CD8VT1>;
5467  let Predicates = [HasFP16] in {
5468    defm SHZ : avx512_fp_scalar_sae<opc, OpcodeStr#"sh", f16x_info, OpNode,
5469                                VecNode, SaeNode, sched.PH.Scl, IsCommutable>,
5470                                T_MAP5, XS, EVEX, VVVV, VEX_LIG, EVEX_CD8<16, CD8VT1>;
5471  }
5472}
5473defm VADD : avx512_binop_s_round<0x58, "vadd", any_fadd, X86fadds, X86faddRnds,
5474                                 SchedWriteFAddSizes, 1>;
5475defm VMUL : avx512_binop_s_round<0x59, "vmul", any_fmul, X86fmuls, X86fmulRnds,
5476                                 SchedWriteFMulSizes, 1>;
5477defm VSUB : avx512_binop_s_round<0x5C, "vsub", any_fsub, X86fsubs, X86fsubRnds,
5478                                 SchedWriteFAddSizes, 0>;
5479defm VDIV : avx512_binop_s_round<0x5E, "vdiv", any_fdiv, X86fdivs, X86fdivRnds,
5480                                 SchedWriteFDivSizes, 0>;
5481defm VMIN : avx512_binop_s_sae<0x5D, "vmin", X86fmin, X86fmins, X86fminSAEs,
5482                               SchedWriteFCmpSizes, 0>;
5483defm VMAX : avx512_binop_s_sae<0x5F, "vmax", X86fmax, X86fmaxs, X86fmaxSAEs,
5484                               SchedWriteFCmpSizes, 0>;
5485
5486// MIN/MAX nodes are commutable under "unsafe-fp-math". In this case we use
5487// X86fminc and X86fmaxc instead of X86fmin and X86fmax
5488multiclass avx512_comutable_binop_s<bits<8> opc, string OpcodeStr,
5489                                    X86VectorVTInfo _, SDNode OpNode,
5490                                    X86FoldableSchedWrite sched> {
5491  let isCodeGenOnly = 1, Predicates = [HasAVX512], ExeDomain = _.ExeDomain in {
5492  def rr : I< opc, MRMSrcReg, (outs _.FRC:$dst),
5493                         (ins _.FRC:$src1, _.FRC:$src2),
5494                          OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}",
5495                          [(set _.FRC:$dst, (OpNode _.FRC:$src1, _.FRC:$src2))]>,
5496                          Sched<[sched]> {
5497    let isCommutable = 1;
5498  }
5499  def rm : I< opc, MRMSrcMem, (outs _.FRC:$dst),
5500                         (ins _.FRC:$src1, _.ScalarMemOp:$src2),
5501                         OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}",
5502                         [(set _.FRC:$dst, (OpNode _.FRC:$src1,
5503                         (_.ScalarLdFrag addr:$src2)))]>,
5504                         Sched<[sched.Folded, sched.ReadAfterFold]>;
5505  }
5506}
5507defm VMINCSSZ : avx512_comutable_binop_s<0x5D, "vminss", f32x_info, X86fminc,
5508                                         SchedWriteFCmp.Scl>, TB, XS,
5509                                         EVEX, VVVV, VEX_LIG, EVEX_CD8<32, CD8VT1>, SIMD_EXC;
5510
5511defm VMINCSDZ : avx512_comutable_binop_s<0x5D, "vminsd", f64x_info, X86fminc,
5512                                         SchedWriteFCmp.Scl>, TB, XD,
5513                                         REX_W, EVEX, VVVV, VEX_LIG,
5514                                         EVEX_CD8<64, CD8VT1>, SIMD_EXC;
5515
5516defm VMAXCSSZ : avx512_comutable_binop_s<0x5F, "vmaxss", f32x_info, X86fmaxc,
5517                                         SchedWriteFCmp.Scl>, TB, XS,
5518                                         EVEX, VVVV, VEX_LIG, EVEX_CD8<32, CD8VT1>, SIMD_EXC;
5519
5520defm VMAXCSDZ : avx512_comutable_binop_s<0x5F, "vmaxsd", f64x_info, X86fmaxc,
5521                                         SchedWriteFCmp.Scl>, TB, XD,
5522                                         REX_W, EVEX, VVVV, VEX_LIG,
5523                                         EVEX_CD8<64, CD8VT1>, SIMD_EXC;
5524
5525defm VMINCSHZ : avx512_comutable_binop_s<0x5D, "vminsh", f16x_info, X86fminc,
5526                                         SchedWriteFCmp.Scl>, T_MAP5, XS,
5527                                         EVEX, VVVV, VEX_LIG, EVEX_CD8<16, CD8VT1>, SIMD_EXC;
5528
5529defm VMAXCSHZ : avx512_comutable_binop_s<0x5F, "vmaxsh", f16x_info, X86fmaxc,
5530                                         SchedWriteFCmp.Scl>, T_MAP5, XS,
5531                                         EVEX, VVVV, VEX_LIG, EVEX_CD8<16, CD8VT1>, SIMD_EXC;
5532
5533multiclass avx512_fp_packed<bits<8> opc, string OpcodeStr, SDPatternOperator OpNode,
5534                            SDPatternOperator MaskOpNode,
5535                            X86VectorVTInfo _, X86FoldableSchedWrite sched,
5536                            bit IsCommutable,
5537                            bit IsKCommutable = IsCommutable,
5538                            string suffix = _.Suffix,
5539                            string ClobberConstraint = "",
5540                            bit MayRaiseFPException = 1> {
5541  let ExeDomain = _.ExeDomain, hasSideEffects = 0,
5542      Uses = [MXCSR], mayRaiseFPException = MayRaiseFPException in {
5543  defm rr: AVX512_maskable_split<opc, MRMSrcReg, _, (outs _.RC:$dst),
5544                                 (ins _.RC:$src1, _.RC:$src2), OpcodeStr#suffix,
5545                                 "$src2, $src1", "$src1, $src2",
5546                                 (_.VT (OpNode _.RC:$src1, _.RC:$src2)),
5547                                 (_.VT (MaskOpNode _.RC:$src1, _.RC:$src2)), ClobberConstraint,
5548                                 IsCommutable, IsKCommutable, IsKCommutable>, EVEX, VVVV, Sched<[sched]>;
5549  let mayLoad = 1 in {
5550    defm rm: AVX512_maskable_split<opc, MRMSrcMem, _, (outs _.RC:$dst),
5551                                   (ins _.RC:$src1, _.MemOp:$src2), OpcodeStr#suffix,
5552                                   "$src2, $src1", "$src1, $src2",
5553                                   (OpNode _.RC:$src1, (_.LdFrag addr:$src2)),
5554                                   (MaskOpNode _.RC:$src1, (_.LdFrag addr:$src2)),
5555                                   ClobberConstraint>, EVEX, VVVV, Sched<[sched.Folded, sched.ReadAfterFold]>;
5556    defm rmb: AVX512_maskable_split<opc, MRMSrcMem, _, (outs _.RC:$dst),
5557                                    (ins _.RC:$src1, _.ScalarMemOp:$src2), OpcodeStr#suffix,
5558                                    "${src2}"#_.BroadcastStr#", $src1",
5559                                    "$src1, ${src2}"#_.BroadcastStr,
5560                                    (OpNode  _.RC:$src1, (_.VT (_.BroadcastLdFrag addr:$src2))),
5561                                    (MaskOpNode  _.RC:$src1, (_.VT (_.BroadcastLdFrag addr:$src2))),
5562                                    ClobberConstraint>, EVEX, VVVV, EVEX_B, Sched<[sched.Folded, sched.ReadAfterFold]>;
5563    }
5564  }
5565}
5566
5567multiclass avx512_fp_round_packed<bits<8> opc, string OpcodeStr,
5568                                  SDPatternOperator OpNodeRnd,
5569                                  X86FoldableSchedWrite sched, X86VectorVTInfo _,
5570                                  string suffix = _.Suffix,
5571                                  string ClobberConstraint = ""> {
5572  let ExeDomain = _.ExeDomain, Uses = [MXCSR] in
5573  defm rrb: AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
5574                  (ins _.RC:$src1, _.RC:$src2, AVX512RC:$rc), OpcodeStr#suffix,
5575                  "$rc, $src2, $src1", "$src1, $src2, $rc",
5576                  (_.VT (OpNodeRnd _.RC:$src1, _.RC:$src2, (i32 timm:$rc))),
5577                  0, 0, 0, vselect_mask, ClobberConstraint>,
5578                  EVEX, VVVV, EVEX_B, EVEX_RC, Sched<[sched]>;
5579}
5580
5581multiclass avx512_fp_sae_packed<bits<8> opc, string OpcodeStr,
5582                                SDPatternOperator OpNodeSAE,
5583                                X86FoldableSchedWrite sched, X86VectorVTInfo _> {
5584  let ExeDomain = _.ExeDomain, Uses = [MXCSR] in
5585  defm rrb: AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
5586                  (ins _.RC:$src1, _.RC:$src2), OpcodeStr#_.Suffix,
5587                  "{sae}, $src2, $src1", "$src1, $src2, {sae}",
5588                  (_.VT (OpNodeSAE _.RC:$src1, _.RC:$src2))>,
5589                  EVEX, VVVV, EVEX_B, Sched<[sched]>;
5590}
5591
5592multiclass avx512_fp_binop_p<bits<8> opc, string OpcodeStr, SDPatternOperator OpNode,
5593                             SDPatternOperator MaskOpNode,
5594                             Predicate prd, X86SchedWriteSizes sched,
5595                             bit IsCommutable = 0,
5596                             bit IsPD128Commutable = IsCommutable> {
5597  let Predicates = [prd] in {
5598  defm PSZ : avx512_fp_packed<opc, OpcodeStr, OpNode, MaskOpNode, v16f32_info,
5599                              sched.PS.ZMM, IsCommutable>, EVEX_V512, TB,
5600                              EVEX_CD8<32, CD8VF>;
5601  defm PDZ : avx512_fp_packed<opc, OpcodeStr, OpNode, MaskOpNode, v8f64_info,
5602                              sched.PD.ZMM, IsCommutable>, EVEX_V512, TB, PD, REX_W,
5603                              EVEX_CD8<64, CD8VF>;
5604  }
5605
5606    // Define only if AVX512VL feature is present.
5607  let Predicates = [prd, HasVLX] in {
5608    defm PSZ128 : avx512_fp_packed<opc, OpcodeStr, OpNode, MaskOpNode, v4f32x_info,
5609                                   sched.PS.XMM, IsCommutable>, EVEX_V128, TB,
5610                                   EVEX_CD8<32, CD8VF>;
5611    defm PSZ256 : avx512_fp_packed<opc, OpcodeStr, OpNode, MaskOpNode, v8f32x_info,
5612                                   sched.PS.YMM, IsCommutable>, EVEX_V256, TB,
5613                                   EVEX_CD8<32, CD8VF>;
5614    defm PDZ128 : avx512_fp_packed<opc, OpcodeStr, OpNode, MaskOpNode, v2f64x_info,
5615                                   sched.PD.XMM, IsPD128Commutable,
5616                                   IsCommutable>, EVEX_V128, TB, PD, REX_W,
5617                                   EVEX_CD8<64, CD8VF>;
5618    defm PDZ256 : avx512_fp_packed<opc, OpcodeStr, OpNode, MaskOpNode, v4f64x_info,
5619                                   sched.PD.YMM, IsCommutable>, EVEX_V256, TB, PD, REX_W,
5620                                   EVEX_CD8<64, CD8VF>;
5621  }
5622}
5623
5624multiclass avx512_fp_binop_ph<bits<8> opc, string OpcodeStr, SDPatternOperator OpNode,
5625                              SDPatternOperator MaskOpNode,
5626                              X86SchedWriteSizes sched, bit IsCommutable = 0> {
5627  let Predicates = [HasFP16] in {
5628    defm PHZ : avx512_fp_packed<opc, OpcodeStr, OpNode, MaskOpNode, v32f16_info,
5629                                sched.PH.ZMM, IsCommutable>, EVEX_V512, T_MAP5,
5630                                EVEX_CD8<16, CD8VF>;
5631  }
5632  let Predicates = [HasVLX, HasFP16] in {
5633    defm PHZ128 : avx512_fp_packed<opc, OpcodeStr, OpNode, MaskOpNode, v8f16x_info,
5634                                   sched.PH.XMM, IsCommutable>, EVEX_V128, T_MAP5,
5635                                   EVEX_CD8<16, CD8VF>;
5636    defm PHZ256 : avx512_fp_packed<opc, OpcodeStr, OpNode, MaskOpNode, v16f16x_info,
5637                                   sched.PH.YMM, IsCommutable>, EVEX_V256, T_MAP5,
5638                                   EVEX_CD8<16, CD8VF>;
5639  }
5640}
5641
5642let Uses = [MXCSR] in
5643multiclass avx512_fp_binop_p_round<bits<8> opc, string OpcodeStr, SDNode OpNodeRnd,
5644                                   X86SchedWriteSizes sched> {
5645  let Predicates = [HasFP16] in {
5646    defm PHZ : avx512_fp_round_packed<opc, OpcodeStr, OpNodeRnd, sched.PH.ZMM,
5647                                      v32f16_info>,
5648                                      EVEX_V512, T_MAP5, EVEX_CD8<16, CD8VF>;
5649  }
5650  defm PSZ : avx512_fp_round_packed<opc, OpcodeStr, OpNodeRnd, sched.PS.ZMM,
5651                                    v16f32_info>,
5652                                    EVEX_V512, TB, EVEX_CD8<32, CD8VF>;
5653  defm PDZ : avx512_fp_round_packed<opc, OpcodeStr, OpNodeRnd, sched.PD.ZMM,
5654                                    v8f64_info>,
5655                                    EVEX_V512, TB, PD, REX_W,EVEX_CD8<64, CD8VF>;
5656}
5657
5658let Uses = [MXCSR] in
5659multiclass avx512_fp_binop_p_sae<bits<8> opc, string OpcodeStr, SDNode OpNodeRnd,
5660                                 X86SchedWriteSizes sched> {
5661  let Predicates = [HasFP16] in {
5662    defm PHZ : avx512_fp_sae_packed<opc, OpcodeStr, OpNodeRnd, sched.PH.ZMM,
5663                                    v32f16_info>,
5664                                    EVEX_V512, T_MAP5, EVEX_CD8<16, CD8VF>;
5665  }
5666  defm PSZ : avx512_fp_sae_packed<opc, OpcodeStr, OpNodeRnd, sched.PS.ZMM,
5667                                  v16f32_info>,
5668                                  EVEX_V512, TB, EVEX_CD8<32, CD8VF>;
5669  defm PDZ : avx512_fp_sae_packed<opc, OpcodeStr, OpNodeRnd, sched.PD.ZMM,
5670                                  v8f64_info>,
5671                                  EVEX_V512, TB, PD, REX_W,EVEX_CD8<64, CD8VF>;
5672}
5673
5674defm VADD : avx512_fp_binop_p<0x58, "vadd", any_fadd, fadd, HasAVX512,
5675                              SchedWriteFAddSizes, 1>,
5676            avx512_fp_binop_ph<0x58, "vadd", any_fadd, fadd, SchedWriteFAddSizes, 1>,
5677            avx512_fp_binop_p_round<0x58, "vadd", X86faddRnd, SchedWriteFAddSizes>;
5678defm VMUL : avx512_fp_binop_p<0x59, "vmul", any_fmul, fmul, HasAVX512,
5679                              SchedWriteFMulSizes, 1>,
5680            avx512_fp_binop_ph<0x59, "vmul", any_fmul, fmul, SchedWriteFMulSizes, 1>,
5681            avx512_fp_binop_p_round<0x59, "vmul", X86fmulRnd, SchedWriteFMulSizes>;
5682defm VSUB : avx512_fp_binop_p<0x5C, "vsub", any_fsub, fsub, HasAVX512,
5683                              SchedWriteFAddSizes>,
5684            avx512_fp_binop_ph<0x5C, "vsub", any_fsub, fsub, SchedWriteFAddSizes>,
5685            avx512_fp_binop_p_round<0x5C, "vsub", X86fsubRnd, SchedWriteFAddSizes>;
5686defm VDIV : avx512_fp_binop_p<0x5E, "vdiv", any_fdiv, fdiv, HasAVX512,
5687                              SchedWriteFDivSizes>,
5688            avx512_fp_binop_ph<0x5E, "vdiv", any_fdiv, fdiv, SchedWriteFDivSizes>,
5689            avx512_fp_binop_p_round<0x5E, "vdiv", X86fdivRnd, SchedWriteFDivSizes>;
5690defm VMIN : avx512_fp_binop_p<0x5D, "vmin", X86fmin, X86fmin, HasAVX512,
5691                              SchedWriteFCmpSizes, 0>,
5692            avx512_fp_binop_ph<0x5D, "vmin", X86fmin, X86fmin, SchedWriteFCmpSizes, 0>,
5693            avx512_fp_binop_p_sae<0x5D, "vmin", X86fminSAE, SchedWriteFCmpSizes>;
5694defm VMAX : avx512_fp_binop_p<0x5F, "vmax", X86fmax, X86fmax, HasAVX512,
5695                              SchedWriteFCmpSizes, 0>,
5696            avx512_fp_binop_ph<0x5F, "vmax", X86fmax, X86fmax, SchedWriteFCmpSizes, 0>,
5697            avx512_fp_binop_p_sae<0x5F, "vmax", X86fmaxSAE, SchedWriteFCmpSizes>;
5698let isCodeGenOnly = 1 in {
5699  defm VMINC : avx512_fp_binop_p<0x5D, "vmin", X86fminc, X86fminc, HasAVX512,
5700                                 SchedWriteFCmpSizes, 1>,
5701               avx512_fp_binop_ph<0x5D, "vmin", X86fminc, X86fminc,
5702                                 SchedWriteFCmpSizes, 1>;
5703  defm VMAXC : avx512_fp_binop_p<0x5F, "vmax", X86fmaxc, X86fmaxc, HasAVX512,
5704                                 SchedWriteFCmpSizes, 1>,
5705               avx512_fp_binop_ph<0x5F, "vmax", X86fmaxc, X86fmaxc,
5706                                 SchedWriteFCmpSizes, 1>;
5707}
5708let Uses = []<Register>, mayRaiseFPException = 0 in {
5709defm VAND  : avx512_fp_binop_p<0x54, "vand", null_frag, null_frag, HasDQI,
5710                               SchedWriteFLogicSizes, 1>;
5711defm VANDN : avx512_fp_binop_p<0x55, "vandn", null_frag, null_frag, HasDQI,
5712                               SchedWriteFLogicSizes, 0>;
5713defm VOR   : avx512_fp_binop_p<0x56, "vor", null_frag, null_frag, HasDQI,
5714                               SchedWriteFLogicSizes, 1>;
5715defm VXOR  : avx512_fp_binop_p<0x57, "vxor", null_frag, null_frag, HasDQI,
5716                               SchedWriteFLogicSizes, 1>;
5717}
5718
5719multiclass avx512_fp_scalef_p<bits<8> opc, string OpcodeStr, SDNode OpNode,
5720                              X86FoldableSchedWrite sched, X86VectorVTInfo _> {
5721  let ExeDomain = _.ExeDomain, Uses = [MXCSR], mayRaiseFPException = 1 in {
5722  defm rr: AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
5723                  (ins _.RC:$src1, _.RC:$src2), OpcodeStr#_.Suffix,
5724                  "$src2, $src1", "$src1, $src2",
5725                  (_.VT (OpNode _.RC:$src1, _.RC:$src2))>,
5726                  EVEX, VVVV, Sched<[sched]>;
5727  defm rm: AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
5728                  (ins _.RC:$src1, _.MemOp:$src2), OpcodeStr#_.Suffix,
5729                  "$src2, $src1", "$src1, $src2",
5730                  (OpNode _.RC:$src1, (_.LdFrag addr:$src2))>,
5731                  EVEX, VVVV, Sched<[sched.Folded, sched.ReadAfterFold]>;
5732  defm rmb: AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
5733                   (ins _.RC:$src1, _.ScalarMemOp:$src2), OpcodeStr#_.Suffix,
5734                   "${src2}"#_.BroadcastStr#", $src1",
5735                   "$src1, ${src2}"#_.BroadcastStr,
5736                   (OpNode  _.RC:$src1, (_.VT (_.BroadcastLdFrag addr:$src2)))>,
5737                   EVEX, VVVV, EVEX_B, Sched<[sched.Folded, sched.ReadAfterFold]>;
5738  }
5739}
5740
5741multiclass avx512_fp_scalef_scalar<bits<8> opc, string OpcodeStr, SDNode OpNode,
5742                                   X86FoldableSchedWrite sched, X86VectorVTInfo _> {
5743  let ExeDomain = _.ExeDomain, Uses = [MXCSR], mayRaiseFPException = 1 in {
5744  defm rr: AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
5745                  (ins _.RC:$src1, _.RC:$src2), OpcodeStr#_.Suffix,
5746                  "$src2, $src1", "$src1, $src2",
5747                  (_.VT (OpNode _.RC:$src1, _.RC:$src2))>,
5748                  Sched<[sched]>;
5749  defm rm: AVX512_maskable_scalar<opc, MRMSrcMem, _, (outs _.RC:$dst),
5750                  (ins _.RC:$src1, _.IntScalarMemOp:$src2), OpcodeStr#_.Suffix,
5751                  "$src2, $src1", "$src1, $src2",
5752                  (OpNode _.RC:$src1, (_.ScalarIntMemFrags addr:$src2))>,
5753                  Sched<[sched.Folded, sched.ReadAfterFold]>;
5754  }
5755}
5756
5757multiclass avx512_fp_scalef_all<bits<8> opc, bits<8> opcScaler, string OpcodeStr,
5758                                X86SchedWriteWidths sched> {
5759  let Predicates = [HasFP16] in {
5760    defm PHZ : avx512_fp_scalef_p<opc, OpcodeStr, X86scalef, sched.ZMM, v32f16_info>,
5761               avx512_fp_round_packed<opc, OpcodeStr, X86scalefRnd, sched.ZMM, v32f16_info>,
5762                                EVEX_V512, T_MAP6, PD, EVEX_CD8<16, CD8VF>;
5763    defm SHZ : avx512_fp_scalef_scalar<opcScaler, OpcodeStr, X86scalefs, sched.Scl, f16x_info>,
5764               avx512_fp_scalar_round<opcScaler, OpcodeStr#"sh", f16x_info, X86scalefsRnd, sched.Scl>,
5765                             EVEX, VVVV, T_MAP6, PD, EVEX_CD8<16, CD8VT1>;
5766  }
5767  defm PSZ : avx512_fp_scalef_p<opc, OpcodeStr, X86scalef, sched.ZMM, v16f32_info>,
5768             avx512_fp_round_packed<opc, OpcodeStr, X86scalefRnd, sched.ZMM, v16f32_info>,
5769                              EVEX_V512, EVEX_CD8<32, CD8VF>, T8, PD;
5770  defm PDZ : avx512_fp_scalef_p<opc, OpcodeStr, X86scalef, sched.ZMM, v8f64_info>,
5771             avx512_fp_round_packed<opc, OpcodeStr, X86scalefRnd, sched.ZMM, v8f64_info>,
5772                              EVEX_V512, REX_W, EVEX_CD8<64, CD8VF>, T8, PD;
5773  defm SSZ : avx512_fp_scalef_scalar<opcScaler, OpcodeStr, X86scalefs, sched.Scl, f32x_info>,
5774             avx512_fp_scalar_round<opcScaler, OpcodeStr#"ss", f32x_info,
5775                                    X86scalefsRnd, sched.Scl>,
5776                                    EVEX, VVVV, VEX_LIG, EVEX_CD8<32, CD8VT1>, T8, PD;
5777  defm SDZ : avx512_fp_scalef_scalar<opcScaler, OpcodeStr, X86scalefs, sched.Scl, f64x_info>,
5778             avx512_fp_scalar_round<opcScaler, OpcodeStr#"sd", f64x_info,
5779                                    X86scalefsRnd, sched.Scl>,
5780                                    EVEX, VVVV, VEX_LIG, EVEX_CD8<64, CD8VT1>, REX_W, T8, PD;
5781
5782  // Define only if AVX512VL feature is present.
5783  let Predicates = [HasVLX] in {
5784    defm PSZ128 : avx512_fp_scalef_p<opc, OpcodeStr, X86scalef, sched.XMM, v4f32x_info>,
5785                                   EVEX_V128, EVEX_CD8<32, CD8VF>, T8, PD;
5786    defm PSZ256 : avx512_fp_scalef_p<opc, OpcodeStr, X86scalef, sched.YMM, v8f32x_info>,
5787                                   EVEX_V256, EVEX_CD8<32, CD8VF>, T8, PD;
5788    defm PDZ128 : avx512_fp_scalef_p<opc, OpcodeStr, X86scalef, sched.XMM, v2f64x_info>,
5789                                   EVEX_V128, REX_W, EVEX_CD8<64, CD8VF>, T8, PD;
5790    defm PDZ256 : avx512_fp_scalef_p<opc, OpcodeStr, X86scalef, sched.YMM, v4f64x_info>,
5791                                   EVEX_V256, REX_W, EVEX_CD8<64, CD8VF>, T8, PD;
5792  }
5793
5794  let Predicates = [HasFP16, HasVLX] in {
5795    defm PHZ128 : avx512_fp_scalef_p<opc, OpcodeStr, X86scalef, sched.XMM, v8f16x_info>,
5796                                   EVEX_V128, EVEX_CD8<16, CD8VF>, T_MAP6, PD;
5797    defm PHZ256 : avx512_fp_scalef_p<opc, OpcodeStr, X86scalef, sched.YMM, v16f16x_info>,
5798                                   EVEX_V256, EVEX_CD8<16, CD8VF>, T_MAP6, PD;
5799  }
5800}
5801defm VSCALEF : avx512_fp_scalef_all<0x2C, 0x2D, "vscalef", SchedWriteFAdd>;
5802
5803//===----------------------------------------------------------------------===//
5804// AVX-512  VPTESTM instructions
5805//===----------------------------------------------------------------------===//
5806
5807multiclass avx512_vptest<bits<8> opc, string OpcodeStr,
5808                         X86FoldableSchedWrite sched, X86VectorVTInfo _> {
5809  // NOTE: Patterns are omitted in favor of manual selection in X86ISelDAGToDAG.
5810  // There are just too many permutations due to commutability and bitcasts.
5811  let ExeDomain = _.ExeDomain, hasSideEffects = 0 in {
5812  defm rr : AVX512_maskable_cmp<opc, MRMSrcReg, _, (outs _.KRC:$dst),
5813                   (ins _.RC:$src1, _.RC:$src2), OpcodeStr,
5814                      "$src2, $src1", "$src1, $src2",
5815                   (null_frag), (null_frag), 1>,
5816                   EVEX, VVVV, Sched<[sched]>;
5817  let mayLoad = 1 in
5818  defm rm : AVX512_maskable_cmp<opc, MRMSrcMem, _, (outs _.KRC:$dst),
5819                   (ins _.RC:$src1, _.MemOp:$src2), OpcodeStr,
5820                       "$src2, $src1", "$src1, $src2",
5821                   (null_frag), (null_frag)>,
5822                   EVEX, VVVV, EVEX_CD8<_.EltSize, CD8VF>,
5823                   Sched<[sched.Folded, sched.ReadAfterFold]>;
5824  }
5825}
5826
5827multiclass avx512_vptest_mb<bits<8> opc, string OpcodeStr,
5828                            X86FoldableSchedWrite sched, X86VectorVTInfo _> {
5829  let ExeDomain = _.ExeDomain, mayLoad = 1, hasSideEffects = 0 in
5830  defm rmb : AVX512_maskable_cmp<opc, MRMSrcMem, _, (outs _.KRC:$dst),
5831                    (ins _.RC:$src1, _.ScalarMemOp:$src2), OpcodeStr,
5832                    "${src2}"#_.BroadcastStr#", $src1",
5833                    "$src1, ${src2}"#_.BroadcastStr,
5834                    (null_frag), (null_frag)>,
5835                    EVEX_B, EVEX, VVVV, EVEX_CD8<_.EltSize, CD8VF>,
5836                    Sched<[sched.Folded, sched.ReadAfterFold]>;
5837}
5838
5839multiclass avx512_vptest_dq_sizes<bits<8> opc, string OpcodeStr,
5840                                  X86SchedWriteWidths sched,
5841                                  AVX512VLVectorVTInfo _> {
5842  let Predicates  = [HasAVX512] in
5843  defm Z : avx512_vptest<opc, OpcodeStr, sched.ZMM, _.info512>,
5844           avx512_vptest_mb<opc, OpcodeStr, sched.ZMM, _.info512>, EVEX_V512;
5845
5846  let Predicates = [HasAVX512, HasVLX] in {
5847  defm Z256 : avx512_vptest<opc, OpcodeStr, sched.YMM, _.info256>,
5848              avx512_vptest_mb<opc, OpcodeStr, sched.YMM, _.info256>, EVEX_V256;
5849  defm Z128 : avx512_vptest<opc, OpcodeStr, sched.XMM, _.info128>,
5850              avx512_vptest_mb<opc, OpcodeStr, sched.XMM, _.info128>, EVEX_V128;
5851  }
5852}
5853
5854multiclass avx512_vptest_dq<bits<8> opc, string OpcodeStr,
5855                            X86SchedWriteWidths sched> {
5856  defm D : avx512_vptest_dq_sizes<opc, OpcodeStr#"d", sched,
5857                                 avx512vl_i32_info>;
5858  defm Q : avx512_vptest_dq_sizes<opc, OpcodeStr#"q", sched,
5859                                 avx512vl_i64_info>, REX_W;
5860}
5861
5862multiclass avx512_vptest_wb<bits<8> opc, string OpcodeStr,
5863                            X86SchedWriteWidths sched> {
5864  let Predicates = [HasBWI] in {
5865  defm WZ:    avx512_vptest<opc, OpcodeStr#"w", sched.ZMM,
5866                            v32i16_info>, EVEX_V512, REX_W;
5867  defm BZ:    avx512_vptest<opc, OpcodeStr#"b", sched.ZMM,
5868                            v64i8_info>, EVEX_V512;
5869  }
5870
5871  let Predicates = [HasVLX, HasBWI] in {
5872  defm WZ256: avx512_vptest<opc, OpcodeStr#"w", sched.YMM,
5873                            v16i16x_info>, EVEX_V256, REX_W;
5874  defm WZ128: avx512_vptest<opc, OpcodeStr#"w", sched.XMM,
5875                            v8i16x_info>, EVEX_V128, REX_W;
5876  defm BZ256: avx512_vptest<opc, OpcodeStr#"b", sched.YMM,
5877                            v32i8x_info>, EVEX_V256;
5878  defm BZ128: avx512_vptest<opc, OpcodeStr#"b", sched.XMM,
5879                            v16i8x_info>, EVEX_V128;
5880  }
5881}
5882
5883multiclass avx512_vptest_all_forms<bits<8> opc_wb, bits<8> opc_dq, string OpcodeStr,
5884                                   X86SchedWriteWidths sched> :
5885  avx512_vptest_wb<opc_wb, OpcodeStr, sched>,
5886  avx512_vptest_dq<opc_dq, OpcodeStr, sched>;
5887
5888defm VPTESTM   : avx512_vptest_all_forms<0x26, 0x27, "vptestm",
5889                                         SchedWriteVecLogic>, T8, PD;
5890defm VPTESTNM  : avx512_vptest_all_forms<0x26, 0x27, "vptestnm",
5891                                         SchedWriteVecLogic>, T8, XS;
5892
5893//===----------------------------------------------------------------------===//
5894// AVX-512  Shift instructions
5895//===----------------------------------------------------------------------===//
5896
5897multiclass avx512_shift_rmi<bits<8> opc, Format ImmFormR, Format ImmFormM,
5898                            string OpcodeStr, SDNode OpNode,
5899                            X86FoldableSchedWrite sched, X86VectorVTInfo _> {
5900  let ExeDomain = _.ExeDomain in {
5901  defm ri : AVX512_maskable<opc, ImmFormR, _, (outs _.RC:$dst),
5902                   (ins _.RC:$src1, u8imm:$src2), OpcodeStr,
5903                      "$src2, $src1", "$src1, $src2",
5904                   (_.VT (OpNode _.RC:$src1, (i8 timm:$src2)))>,
5905                   Sched<[sched]>;
5906  defm mi : AVX512_maskable<opc, ImmFormM, _, (outs _.RC:$dst),
5907                   (ins _.MemOp:$src1, u8imm:$src2), OpcodeStr,
5908                       "$src2, $src1", "$src1, $src2",
5909                   (_.VT (OpNode (_.VT (_.LdFrag addr:$src1)),
5910                          (i8 timm:$src2)))>,
5911                   Sched<[sched.Folded]>;
5912  }
5913}
5914
5915multiclass avx512_shift_rmbi<bits<8> opc, Format ImmFormM,
5916                             string OpcodeStr, SDNode OpNode,
5917                             X86FoldableSchedWrite sched, X86VectorVTInfo _> {
5918  let ExeDomain = _.ExeDomain in
5919  defm mbi : AVX512_maskable<opc, ImmFormM, _, (outs _.RC:$dst),
5920                   (ins _.ScalarMemOp:$src1, u8imm:$src2), OpcodeStr,
5921      "$src2, ${src1}"#_.BroadcastStr, "${src1}"#_.BroadcastStr#", $src2",
5922     (_.VT (OpNode (_.BroadcastLdFrag addr:$src1), (i8 timm:$src2)))>,
5923     EVEX_B, Sched<[sched.Folded]>;
5924}
5925
5926multiclass avx512_shift_rrm<bits<8> opc, string OpcodeStr, SDNode OpNode,
5927                            X86FoldableSchedWrite sched, ValueType SrcVT,
5928                            X86VectorVTInfo _> {
5929   // src2 is always 128-bit
5930  let ExeDomain = _.ExeDomain in {
5931  defm rr : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
5932                   (ins _.RC:$src1, VR128X:$src2), OpcodeStr,
5933                      "$src2, $src1", "$src1, $src2",
5934                   (_.VT (OpNode _.RC:$src1, (SrcVT VR128X:$src2)))>,
5935                   AVX512BIBase, EVEX, VVVV, Sched<[sched]>;
5936  defm rm : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
5937                   (ins _.RC:$src1, i128mem:$src2), OpcodeStr,
5938                       "$src2, $src1", "$src1, $src2",
5939                   (_.VT (OpNode _.RC:$src1, (SrcVT (load addr:$src2))))>,
5940                   AVX512BIBase,
5941                   EVEX, VVVV, Sched<[sched.Folded, sched.ReadAfterFold]>;
5942  }
5943}
5944
5945multiclass avx512_shift_sizes<bits<8> opc, string OpcodeStr, SDNode OpNode,
5946                              X86SchedWriteWidths sched, ValueType SrcVT,
5947                              AVX512VLVectorVTInfo VTInfo,
5948                              Predicate prd> {
5949  let Predicates = [prd] in
5950  defm Z    : avx512_shift_rrm<opc, OpcodeStr, OpNode, sched.ZMM, SrcVT,
5951                               VTInfo.info512>, EVEX_V512,
5952                               EVEX_CD8<VTInfo.info512.EltSize, CD8VQ> ;
5953  let Predicates = [prd, HasVLX] in {
5954  defm Z256 : avx512_shift_rrm<opc, OpcodeStr, OpNode, sched.YMM, SrcVT,
5955                               VTInfo.info256>, EVEX_V256,
5956                               EVEX_CD8<VTInfo.info256.EltSize, CD8VH>;
5957  defm Z128 : avx512_shift_rrm<opc, OpcodeStr, OpNode, sched.XMM, SrcVT,
5958                               VTInfo.info128>, EVEX_V128,
5959                               EVEX_CD8<VTInfo.info128.EltSize, CD8VF>;
5960  }
5961}
5962
5963multiclass avx512_shift_types<bits<8> opcd, bits<8> opcq, bits<8> opcw,
5964                              string OpcodeStr, SDNode OpNode,
5965                              X86SchedWriteWidths sched> {
5966  defm D : avx512_shift_sizes<opcd, OpcodeStr#"d", OpNode, sched, v4i32,
5967                              avx512vl_i32_info, HasAVX512>;
5968  defm Q : avx512_shift_sizes<opcq, OpcodeStr#"q", OpNode, sched, v2i64,
5969                              avx512vl_i64_info, HasAVX512>, REX_W;
5970  defm W : avx512_shift_sizes<opcw, OpcodeStr#"w", OpNode, sched, v8i16,
5971                              avx512vl_i16_info, HasBWI>;
5972}
5973
5974multiclass avx512_shift_rmi_sizes<bits<8> opc, Format ImmFormR, Format ImmFormM,
5975                                  string OpcodeStr, SDNode OpNode,
5976                                  X86SchedWriteWidths sched,
5977                                  AVX512VLVectorVTInfo VTInfo> {
5978  let Predicates = [HasAVX512] in
5979  defm Z:    avx512_shift_rmi<opc, ImmFormR, ImmFormM, OpcodeStr, OpNode,
5980                              sched.ZMM, VTInfo.info512>,
5981             avx512_shift_rmbi<opc, ImmFormM, OpcodeStr, OpNode, sched.ZMM,
5982                               VTInfo.info512>, EVEX_V512;
5983  let Predicates = [HasAVX512, HasVLX] in {
5984  defm Z256: avx512_shift_rmi<opc, ImmFormR, ImmFormM, OpcodeStr, OpNode,
5985                              sched.YMM, VTInfo.info256>,
5986             avx512_shift_rmbi<opc, ImmFormM, OpcodeStr, OpNode, sched.YMM,
5987                               VTInfo.info256>, EVEX_V256;
5988  defm Z128: avx512_shift_rmi<opc, ImmFormR, ImmFormM, OpcodeStr, OpNode,
5989                              sched.XMM, VTInfo.info128>,
5990             avx512_shift_rmbi<opc, ImmFormM, OpcodeStr, OpNode, sched.XMM,
5991                               VTInfo.info128>, EVEX_V128;
5992  }
5993}
5994
5995multiclass avx512_shift_rmi_w<bits<8> opcw, Format ImmFormR, Format ImmFormM,
5996                              string OpcodeStr, SDNode OpNode,
5997                              X86SchedWriteWidths sched> {
5998  let Predicates = [HasBWI] in
5999  defm WZ:    avx512_shift_rmi<opcw, ImmFormR, ImmFormM, OpcodeStr, OpNode,
6000                               sched.ZMM, v32i16_info>, EVEX_V512, WIG;
6001  let Predicates = [HasVLX, HasBWI] in {
6002  defm WZ256: avx512_shift_rmi<opcw, ImmFormR, ImmFormM, OpcodeStr, OpNode,
6003                               sched.YMM, v16i16x_info>, EVEX_V256, WIG;
6004  defm WZ128: avx512_shift_rmi<opcw, ImmFormR, ImmFormM, OpcodeStr, OpNode,
6005                               sched.XMM, v8i16x_info>, EVEX_V128, WIG;
6006  }
6007}
6008
6009multiclass avx512_shift_rmi_dq<bits<8> opcd, bits<8> opcq,
6010                               Format ImmFormR, Format ImmFormM,
6011                               string OpcodeStr, SDNode OpNode,
6012                               X86SchedWriteWidths sched> {
6013  defm D: avx512_shift_rmi_sizes<opcd, ImmFormR, ImmFormM, OpcodeStr#"d", OpNode,
6014                                 sched, avx512vl_i32_info>, EVEX_CD8<32, CD8VF>;
6015  defm Q: avx512_shift_rmi_sizes<opcq, ImmFormR, ImmFormM, OpcodeStr#"q", OpNode,
6016                                 sched, avx512vl_i64_info>, EVEX_CD8<64, CD8VF>, REX_W;
6017}
6018
6019defm VPSRL : avx512_shift_rmi_dq<0x72, 0x73, MRM2r, MRM2m, "vpsrl", X86vsrli,
6020                                 SchedWriteVecShiftImm>,
6021             avx512_shift_rmi_w<0x71, MRM2r, MRM2m, "vpsrlw", X86vsrli,
6022                                SchedWriteVecShiftImm>, AVX512BIi8Base, EVEX, VVVV;
6023
6024defm VPSLL : avx512_shift_rmi_dq<0x72, 0x73, MRM6r, MRM6m, "vpsll", X86vshli,
6025                                 SchedWriteVecShiftImm>,
6026             avx512_shift_rmi_w<0x71, MRM6r, MRM6m, "vpsllw", X86vshli,
6027                                SchedWriteVecShiftImm>, AVX512BIi8Base, EVEX, VVVV;
6028
6029defm VPSRA : avx512_shift_rmi_dq<0x72, 0x72, MRM4r, MRM4m, "vpsra", X86vsrai,
6030                                 SchedWriteVecShiftImm>,
6031             avx512_shift_rmi_w<0x71, MRM4r, MRM4m, "vpsraw", X86vsrai,
6032                                SchedWriteVecShiftImm>, AVX512BIi8Base, EVEX, VVVV;
6033
6034defm VPROR : avx512_shift_rmi_dq<0x72, 0x72, MRM0r, MRM0m, "vpror", X86vrotri,
6035                                 SchedWriteVecShiftImm>, AVX512BIi8Base, EVEX, VVVV;
6036defm VPROL : avx512_shift_rmi_dq<0x72, 0x72, MRM1r, MRM1m, "vprol", X86vrotli,
6037                                 SchedWriteVecShiftImm>, AVX512BIi8Base, EVEX, VVVV;
6038
6039defm VPSLL : avx512_shift_types<0xF2, 0xF3, 0xF1, "vpsll", X86vshl,
6040                                SchedWriteVecShift>;
6041defm VPSRA : avx512_shift_types<0xE2, 0xE2, 0xE1, "vpsra", X86vsra,
6042                                SchedWriteVecShift>;
6043defm VPSRL : avx512_shift_types<0xD2, 0xD3, 0xD1, "vpsrl", X86vsrl,
6044                                SchedWriteVecShift>;
6045
6046// Use 512bit VPSRA/VPSRAI version to implement v2i64/v4i64 in case NoVLX.
6047let Predicates = [HasAVX512, NoVLX, HasEVEX512] in {
6048  def : Pat<(v4i64 (X86vsra (v4i64 VR256X:$src1), (v2i64 VR128X:$src2))),
6049            (EXTRACT_SUBREG (v8i64
6050              (VPSRAQZrr
6051                (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src1, sub_ymm)),
6052                 VR128X:$src2)), sub_ymm)>;
6053
6054  def : Pat<(v2i64 (X86vsra (v2i64 VR128X:$src1), (v2i64 VR128X:$src2))),
6055            (EXTRACT_SUBREG (v8i64
6056              (VPSRAQZrr
6057                (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR128X:$src1, sub_xmm)),
6058                 VR128X:$src2)), sub_xmm)>;
6059
6060  def : Pat<(v4i64 (X86vsrai (v4i64 VR256X:$src1), (i8 timm:$src2))),
6061            (EXTRACT_SUBREG (v8i64
6062              (VPSRAQZri
6063                (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src1, sub_ymm)),
6064                 timm:$src2)), sub_ymm)>;
6065
6066  def : Pat<(v2i64 (X86vsrai (v2i64 VR128X:$src1), (i8 timm:$src2))),
6067            (EXTRACT_SUBREG (v8i64
6068              (VPSRAQZri
6069                (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR128X:$src1, sub_xmm)),
6070                 timm:$src2)), sub_xmm)>;
6071}
6072
6073//===-------------------------------------------------------------------===//
6074// Variable Bit Shifts
6075//===-------------------------------------------------------------------===//
6076
6077multiclass avx512_var_shift<bits<8> opc, string OpcodeStr, SDNode OpNode,
6078                            X86FoldableSchedWrite sched, X86VectorVTInfo _> {
6079  let ExeDomain = _.ExeDomain in {
6080  defm rr : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
6081                   (ins _.RC:$src1, _.RC:$src2), OpcodeStr,
6082                      "$src2, $src1", "$src1, $src2",
6083                   (_.VT (OpNode _.RC:$src1, (_.VT _.RC:$src2)))>,
6084                   AVX5128IBase, EVEX, VVVV, Sched<[sched]>;
6085  defm rm : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
6086                   (ins _.RC:$src1, _.MemOp:$src2), OpcodeStr,
6087                       "$src2, $src1", "$src1, $src2",
6088                   (_.VT (OpNode _.RC:$src1,
6089                   (_.VT (_.LdFrag addr:$src2))))>,
6090                   AVX5128IBase, EVEX, VVVV, EVEX_CD8<_.EltSize, CD8VF>,
6091                   Sched<[sched.Folded, sched.ReadAfterFold]>;
6092  }
6093}
6094
6095multiclass avx512_var_shift_mb<bits<8> opc, string OpcodeStr, SDNode OpNode,
6096                               X86FoldableSchedWrite sched, X86VectorVTInfo _> {
6097  let ExeDomain = _.ExeDomain in
6098  defm rmb : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
6099                    (ins _.RC:$src1, _.ScalarMemOp:$src2), OpcodeStr,
6100                    "${src2}"#_.BroadcastStr#", $src1",
6101                    "$src1, ${src2}"#_.BroadcastStr,
6102                    (_.VT (OpNode _.RC:$src1, (_.VT (_.BroadcastLdFrag addr:$src2))))>,
6103                    AVX5128IBase, EVEX_B, EVEX, VVVV, EVEX_CD8<_.EltSize, CD8VF>,
6104                    Sched<[sched.Folded, sched.ReadAfterFold]>;
6105}
6106
6107multiclass avx512_var_shift_sizes<bits<8> opc, string OpcodeStr, SDNode OpNode,
6108                                  X86SchedWriteWidths sched, AVX512VLVectorVTInfo _> {
6109  let Predicates  = [HasAVX512] in
6110  defm Z : avx512_var_shift<opc, OpcodeStr, OpNode, sched.ZMM, _.info512>,
6111           avx512_var_shift_mb<opc, OpcodeStr, OpNode, sched.ZMM, _.info512>, EVEX_V512;
6112
6113  let Predicates = [HasAVX512, HasVLX] in {
6114  defm Z256 : avx512_var_shift<opc, OpcodeStr, OpNode, sched.YMM, _.info256>,
6115              avx512_var_shift_mb<opc, OpcodeStr, OpNode, sched.YMM, _.info256>, EVEX_V256;
6116  defm Z128 : avx512_var_shift<opc, OpcodeStr, OpNode, sched.XMM, _.info128>,
6117              avx512_var_shift_mb<opc, OpcodeStr, OpNode, sched.XMM, _.info128>, EVEX_V128;
6118  }
6119}
6120
6121multiclass avx512_var_shift_types<bits<8> opc, string OpcodeStr,
6122                                  SDNode OpNode, X86SchedWriteWidths sched> {
6123  defm D : avx512_var_shift_sizes<opc, OpcodeStr#"d", OpNode, sched,
6124                                 avx512vl_i32_info>;
6125  defm Q : avx512_var_shift_sizes<opc, OpcodeStr#"q", OpNode, sched,
6126                                 avx512vl_i64_info>, REX_W;
6127}
6128
6129// Use 512bit version to implement 128/256 bit in case NoVLX.
6130multiclass avx512_var_shift_lowering<AVX512VLVectorVTInfo _, string OpcodeStr,
6131                                     SDNode OpNode, list<Predicate> p> {
6132  let Predicates = p in {
6133  def : Pat<(_.info256.VT (OpNode (_.info256.VT _.info256.RC:$src1),
6134                                  (_.info256.VT _.info256.RC:$src2))),
6135            (EXTRACT_SUBREG
6136                (!cast<Instruction>(OpcodeStr#"Zrr")
6137                    (INSERT_SUBREG (_.info512.VT (IMPLICIT_DEF)), VR256X:$src1, sub_ymm),
6138                    (INSERT_SUBREG (_.info512.VT (IMPLICIT_DEF)), VR256X:$src2, sub_ymm)),
6139             sub_ymm)>;
6140
6141  def : Pat<(_.info128.VT (OpNode (_.info128.VT _.info128.RC:$src1),
6142                                  (_.info128.VT _.info128.RC:$src2))),
6143            (EXTRACT_SUBREG
6144                (!cast<Instruction>(OpcodeStr#"Zrr")
6145                    (INSERT_SUBREG (_.info512.VT (IMPLICIT_DEF)), VR128X:$src1, sub_xmm),
6146                    (INSERT_SUBREG (_.info512.VT (IMPLICIT_DEF)), VR128X:$src2, sub_xmm)),
6147             sub_xmm)>;
6148  }
6149}
6150multiclass avx512_var_shift_w<bits<8> opc, string OpcodeStr,
6151                              SDNode OpNode, X86SchedWriteWidths sched> {
6152  let Predicates = [HasBWI] in
6153  defm WZ:    avx512_var_shift<opc, OpcodeStr, OpNode, sched.ZMM, v32i16_info>,
6154              EVEX_V512, REX_W;
6155  let Predicates = [HasVLX, HasBWI] in {
6156
6157  defm WZ256: avx512_var_shift<opc, OpcodeStr, OpNode, sched.YMM, v16i16x_info>,
6158              EVEX_V256, REX_W;
6159  defm WZ128: avx512_var_shift<opc, OpcodeStr, OpNode, sched.XMM, v8i16x_info>,
6160              EVEX_V128, REX_W;
6161  }
6162}
6163
6164defm VPSLLV : avx512_var_shift_types<0x47, "vpsllv", X86vshlv, SchedWriteVarVecShift>,
6165              avx512_var_shift_w<0x12, "vpsllvw", X86vshlv, SchedWriteVarVecShift>;
6166
6167defm VPSRAV : avx512_var_shift_types<0x46, "vpsrav", X86vsrav, SchedWriteVarVecShift>,
6168              avx512_var_shift_w<0x11, "vpsravw", X86vsrav, SchedWriteVarVecShift>;
6169
6170defm VPSRLV : avx512_var_shift_types<0x45, "vpsrlv", X86vsrlv, SchedWriteVarVecShift>,
6171              avx512_var_shift_w<0x10, "vpsrlvw", X86vsrlv, SchedWriteVarVecShift>;
6172
6173defm VPRORV : avx512_var_shift_types<0x14, "vprorv", rotr, SchedWriteVarVecShift>;
6174defm VPROLV : avx512_var_shift_types<0x15, "vprolv", rotl, SchedWriteVarVecShift>;
6175
6176defm : avx512_var_shift_lowering<avx512vl_i64_info, "VPSRAVQ", X86vsrav, [HasAVX512, NoVLX, HasEVEX512]>;
6177defm : avx512_var_shift_lowering<avx512vl_i16_info, "VPSLLVW", X86vshlv, [HasBWI, NoVLX, HasEVEX512]>;
6178defm : avx512_var_shift_lowering<avx512vl_i16_info, "VPSRAVW", X86vsrav, [HasBWI, NoVLX, HasEVEX512]>;
6179defm : avx512_var_shift_lowering<avx512vl_i16_info, "VPSRLVW", X86vsrlv, [HasBWI, NoVLX, HasEVEX512]>;
6180
6181
6182// Use 512bit VPROL/VPROLI version to implement v2i64/v4i64 + v4i32/v8i32 in case NoVLX.
6183let Predicates = [HasAVX512, NoVLX, HasEVEX512] in {
6184  def : Pat<(v2i64 (rotl (v2i64 VR128X:$src1), (v2i64 VR128X:$src2))),
6185            (EXTRACT_SUBREG (v8i64
6186              (VPROLVQZrr
6187                (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR128X:$src1, sub_xmm)),
6188                (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR128X:$src2, sub_xmm)))),
6189                       sub_xmm)>;
6190  def : Pat<(v4i64 (rotl (v4i64 VR256X:$src1), (v4i64 VR256X:$src2))),
6191            (EXTRACT_SUBREG (v8i64
6192              (VPROLVQZrr
6193                (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src1, sub_ymm)),
6194                (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src2, sub_ymm)))),
6195                       sub_ymm)>;
6196
6197  def : Pat<(v4i32 (rotl (v4i32 VR128X:$src1), (v4i32 VR128X:$src2))),
6198            (EXTRACT_SUBREG (v16i32
6199              (VPROLVDZrr
6200                (v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR128X:$src1, sub_xmm)),
6201                (v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR128X:$src2, sub_xmm)))),
6202                        sub_xmm)>;
6203  def : Pat<(v8i32 (rotl (v8i32 VR256X:$src1), (v8i32 VR256X:$src2))),
6204            (EXTRACT_SUBREG (v16i32
6205              (VPROLVDZrr
6206                (v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src1, sub_ymm)),
6207                (v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src2, sub_ymm)))),
6208                        sub_ymm)>;
6209
6210  def : Pat<(v2i64 (X86vrotli (v2i64 VR128X:$src1), (i8 timm:$src2))),
6211            (EXTRACT_SUBREG (v8i64
6212              (VPROLQZri
6213                (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR128X:$src1, sub_xmm)),
6214                        timm:$src2)), sub_xmm)>;
6215  def : Pat<(v4i64 (X86vrotli (v4i64 VR256X:$src1), (i8 timm:$src2))),
6216            (EXTRACT_SUBREG (v8i64
6217              (VPROLQZri
6218                (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src1, sub_ymm)),
6219                       timm:$src2)), sub_ymm)>;
6220
6221  def : Pat<(v4i32 (X86vrotli (v4i32 VR128X:$src1), (i8 timm:$src2))),
6222            (EXTRACT_SUBREG (v16i32
6223              (VPROLDZri
6224                (v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR128X:$src1, sub_xmm)),
6225                        timm:$src2)), sub_xmm)>;
6226  def : Pat<(v8i32 (X86vrotli (v8i32 VR256X:$src1), (i8 timm:$src2))),
6227            (EXTRACT_SUBREG (v16i32
6228              (VPROLDZri
6229                (v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src1, sub_ymm)),
6230                        timm:$src2)), sub_ymm)>;
6231}
6232
6233// Use 512bit VPROR/VPRORI version to implement v2i64/v4i64 + v4i32/v8i32 in case NoVLX.
6234let Predicates = [HasAVX512, NoVLX, HasEVEX512] in {
6235  def : Pat<(v2i64 (rotr (v2i64 VR128X:$src1), (v2i64 VR128X:$src2))),
6236            (EXTRACT_SUBREG (v8i64
6237              (VPRORVQZrr
6238                (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR128X:$src1, sub_xmm)),
6239                (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR128X:$src2, sub_xmm)))),
6240                       sub_xmm)>;
6241  def : Pat<(v4i64 (rotr (v4i64 VR256X:$src1), (v4i64 VR256X:$src2))),
6242            (EXTRACT_SUBREG (v8i64
6243              (VPRORVQZrr
6244                (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src1, sub_ymm)),
6245                (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src2, sub_ymm)))),
6246                       sub_ymm)>;
6247
6248  def : Pat<(v4i32 (rotr (v4i32 VR128X:$src1), (v4i32 VR128X:$src2))),
6249            (EXTRACT_SUBREG (v16i32
6250              (VPRORVDZrr
6251                (v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR128X:$src1, sub_xmm)),
6252                (v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR128X:$src2, sub_xmm)))),
6253                        sub_xmm)>;
6254  def : Pat<(v8i32 (rotr (v8i32 VR256X:$src1), (v8i32 VR256X:$src2))),
6255            (EXTRACT_SUBREG (v16i32
6256              (VPRORVDZrr
6257                (v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src1, sub_ymm)),
6258                (v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src2, sub_ymm)))),
6259                        sub_ymm)>;
6260
6261  def : Pat<(v2i64 (X86vrotri (v2i64 VR128X:$src1), (i8 timm:$src2))),
6262            (EXTRACT_SUBREG (v8i64
6263              (VPRORQZri
6264                (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR128X:$src1, sub_xmm)),
6265                        timm:$src2)), sub_xmm)>;
6266  def : Pat<(v4i64 (X86vrotri (v4i64 VR256X:$src1), (i8 timm:$src2))),
6267            (EXTRACT_SUBREG (v8i64
6268              (VPRORQZri
6269                (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src1, sub_ymm)),
6270                       timm:$src2)), sub_ymm)>;
6271
6272  def : Pat<(v4i32 (X86vrotri (v4i32 VR128X:$src1), (i8 timm:$src2))),
6273            (EXTRACT_SUBREG (v16i32
6274              (VPRORDZri
6275                (v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR128X:$src1, sub_xmm)),
6276                        timm:$src2)), sub_xmm)>;
6277  def : Pat<(v8i32 (X86vrotri (v8i32 VR256X:$src1), (i8 timm:$src2))),
6278            (EXTRACT_SUBREG (v16i32
6279              (VPRORDZri
6280                (v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src1, sub_ymm)),
6281                        timm:$src2)), sub_ymm)>;
6282}
6283
6284//===-------------------------------------------------------------------===//
6285// 1-src variable permutation VPERMW/D/Q
6286//===-------------------------------------------------------------------===//
6287
6288multiclass avx512_vperm_dq_sizes<bits<8> opc, string OpcodeStr, SDNode OpNode,
6289                                 X86FoldableSchedWrite sched, AVX512VLVectorVTInfo _> {
6290  let Predicates  = [HasAVX512] in
6291  defm Z : avx512_var_shift<opc, OpcodeStr, OpNode, sched, _.info512>,
6292           avx512_var_shift_mb<opc, OpcodeStr, OpNode, sched, _.info512>, EVEX_V512;
6293
6294  let Predicates = [HasAVX512, HasVLX] in
6295  defm Z256 : avx512_var_shift<opc, OpcodeStr, OpNode, sched, _.info256>,
6296              avx512_var_shift_mb<opc, OpcodeStr, OpNode, sched, _.info256>, EVEX_V256;
6297}
6298
6299multiclass avx512_vpermi_dq_sizes<bits<8> opc, Format ImmFormR, Format ImmFormM,
6300                                 string OpcodeStr, SDNode OpNode,
6301                                 X86FoldableSchedWrite sched, AVX512VLVectorVTInfo VTInfo> {
6302  let Predicates = [HasAVX512] in
6303  defm Z:    avx512_shift_rmi<opc, ImmFormR, ImmFormM, OpcodeStr, OpNode,
6304                              sched, VTInfo.info512>,
6305             avx512_shift_rmbi<opc, ImmFormM, OpcodeStr, OpNode,
6306                               sched, VTInfo.info512>, EVEX_V512;
6307  let Predicates = [HasAVX512, HasVLX] in
6308  defm Z256: avx512_shift_rmi<opc, ImmFormR, ImmFormM, OpcodeStr, OpNode,
6309                              sched, VTInfo.info256>,
6310             avx512_shift_rmbi<opc, ImmFormM, OpcodeStr, OpNode,
6311                               sched, VTInfo.info256>, EVEX_V256;
6312}
6313
6314multiclass avx512_vperm_bw<bits<8> opc, string OpcodeStr,
6315                              Predicate prd, SDNode OpNode,
6316                              X86FoldableSchedWrite sched, AVX512VLVectorVTInfo _> {
6317  let Predicates = [prd] in
6318  defm Z:    avx512_var_shift<opc, OpcodeStr, OpNode, sched, _.info512>,
6319              EVEX_V512 ;
6320  let Predicates = [HasVLX, prd] in {
6321  defm Z256: avx512_var_shift<opc, OpcodeStr, OpNode, sched, _.info256>,
6322              EVEX_V256 ;
6323  defm Z128: avx512_var_shift<opc, OpcodeStr, OpNode, sched, _.info128>,
6324              EVEX_V128 ;
6325  }
6326}
6327
6328defm VPERMW  : avx512_vperm_bw<0x8D, "vpermw", HasBWI, X86VPermv,
6329                               WriteVarShuffle256, avx512vl_i16_info>, REX_W;
6330defm VPERMB  : avx512_vperm_bw<0x8D, "vpermb", HasVBMI, X86VPermv,
6331                               WriteVarShuffle256, avx512vl_i8_info>;
6332
6333defm VPERMD : avx512_vperm_dq_sizes<0x36, "vpermd", X86VPermv,
6334                                    WriteVarShuffle256, avx512vl_i32_info>;
6335defm VPERMQ : avx512_vperm_dq_sizes<0x36, "vpermq", X86VPermv,
6336                                    WriteVarShuffle256, avx512vl_i64_info>, REX_W;
6337defm VPERMPS : avx512_vperm_dq_sizes<0x16, "vpermps", X86VPermv,
6338                                     WriteFVarShuffle256, avx512vl_f32_info>;
6339defm VPERMPD : avx512_vperm_dq_sizes<0x16, "vpermpd", X86VPermv,
6340                                     WriteFVarShuffle256, avx512vl_f64_info>, REX_W;
6341
6342defm VPERMQ : avx512_vpermi_dq_sizes<0x00, MRMSrcReg, MRMSrcMem, "vpermq",
6343                             X86VPermi, WriteShuffle256, avx512vl_i64_info>,
6344                             EVEX, AVX512AIi8Base, EVEX_CD8<64, CD8VF>, REX_W;
6345defm VPERMPD : avx512_vpermi_dq_sizes<0x01, MRMSrcReg, MRMSrcMem, "vpermpd",
6346                             X86VPermi, WriteFShuffle256, avx512vl_f64_info>,
6347                             EVEX, AVX512AIi8Base, EVEX_CD8<64, CD8VF>, REX_W;
6348
6349//===----------------------------------------------------------------------===//
6350// AVX-512 - VPERMIL
6351//===----------------------------------------------------------------------===//
6352
6353multiclass avx512_permil_vec<bits<8> OpcVar, string OpcodeStr, SDNode OpNode,
6354                             X86FoldableSchedWrite sched, X86VectorVTInfo _,
6355                             X86VectorVTInfo Ctrl> {
6356  defm rr: AVX512_maskable<OpcVar, MRMSrcReg, _, (outs _.RC:$dst),
6357                  (ins _.RC:$src1, Ctrl.RC:$src2), OpcodeStr,
6358                  "$src2, $src1", "$src1, $src2",
6359                  (_.VT (OpNode _.RC:$src1,
6360                               (Ctrl.VT Ctrl.RC:$src2)))>,
6361                  T8, PD, EVEX, VVVV, Sched<[sched]>;
6362  defm rm: AVX512_maskable<OpcVar, MRMSrcMem, _, (outs _.RC:$dst),
6363                  (ins _.RC:$src1, Ctrl.MemOp:$src2), OpcodeStr,
6364                  "$src2, $src1", "$src1, $src2",
6365                  (_.VT (OpNode
6366                           _.RC:$src1,
6367                           (Ctrl.VT (Ctrl.LdFrag addr:$src2))))>,
6368                  T8, PD, EVEX, VVVV, EVEX_CD8<_.EltSize, CD8VF>,
6369                  Sched<[sched.Folded, sched.ReadAfterFold]>;
6370  defm rmb: AVX512_maskable<OpcVar, MRMSrcMem, _, (outs _.RC:$dst),
6371                   (ins _.RC:$src1, _.ScalarMemOp:$src2), OpcodeStr,
6372                   "${src2}"#_.BroadcastStr#", $src1",
6373                   "$src1, ${src2}"#_.BroadcastStr,
6374                   (_.VT (OpNode
6375                            _.RC:$src1,
6376                            (Ctrl.VT (Ctrl.BroadcastLdFrag addr:$src2))))>,
6377                   T8, PD, EVEX, VVVV, EVEX_B, EVEX_CD8<_.EltSize, CD8VF>,
6378                   Sched<[sched.Folded, sched.ReadAfterFold]>;
6379}
6380
6381multiclass avx512_permil_vec_common<string OpcodeStr, bits<8> OpcVar,
6382                                    X86SchedWriteWidths sched,
6383                                    AVX512VLVectorVTInfo _,
6384                                    AVX512VLVectorVTInfo Ctrl> {
6385  let Predicates = [HasAVX512] in {
6386    defm Z    : avx512_permil_vec<OpcVar, OpcodeStr, X86VPermilpv, sched.ZMM,
6387                                  _.info512, Ctrl.info512>, EVEX_V512;
6388  }
6389  let Predicates = [HasAVX512, HasVLX] in {
6390    defm Z128 : avx512_permil_vec<OpcVar, OpcodeStr, X86VPermilpv, sched.XMM,
6391                                  _.info128, Ctrl.info128>, EVEX_V128;
6392    defm Z256 : avx512_permil_vec<OpcVar, OpcodeStr, X86VPermilpv, sched.YMM,
6393                                  _.info256, Ctrl.info256>, EVEX_V256;
6394  }
6395}
6396
6397multiclass avx512_permil<string OpcodeStr, bits<8> OpcImm, bits<8> OpcVar,
6398                         AVX512VLVectorVTInfo _, AVX512VLVectorVTInfo Ctrl>{
6399  defm NAME: avx512_permil_vec_common<OpcodeStr, OpcVar, SchedWriteFVarShuffle,
6400                                      _, Ctrl>;
6401  defm NAME: avx512_shift_rmi_sizes<OpcImm, MRMSrcReg, MRMSrcMem, OpcodeStr,
6402                                    X86VPermilpi, SchedWriteFShuffle, _>,
6403                    EVEX, AVX512AIi8Base, EVEX_CD8<_.info128.EltSize, CD8VF>;
6404}
6405
6406let ExeDomain = SSEPackedSingle in
6407defm VPERMILPS : avx512_permil<"vpermilps", 0x04, 0x0C, avx512vl_f32_info,
6408                               avx512vl_i32_info>;
6409let ExeDomain = SSEPackedDouble in
6410defm VPERMILPD : avx512_permil<"vpermilpd", 0x05, 0x0D, avx512vl_f64_info,
6411                               avx512vl_i64_info>, REX_W;
6412
6413//===----------------------------------------------------------------------===//
6414// AVX-512 - VPSHUFD, VPSHUFLW, VPSHUFHW
6415//===----------------------------------------------------------------------===//
6416
6417defm VPSHUFD : avx512_shift_rmi_sizes<0x70, MRMSrcReg, MRMSrcMem, "vpshufd",
6418                             X86PShufd, SchedWriteShuffle, avx512vl_i32_info>,
6419                             EVEX, AVX512BIi8Base, EVEX_CD8<32, CD8VF>;
6420defm VPSHUFH : avx512_shift_rmi_w<0x70, MRMSrcReg, MRMSrcMem, "vpshufhw",
6421                                  X86PShufhw, SchedWriteShuffle>,
6422                                  EVEX, AVX512XSIi8Base;
6423defm VPSHUFL : avx512_shift_rmi_w<0x70, MRMSrcReg, MRMSrcMem, "vpshuflw",
6424                                  X86PShuflw, SchedWriteShuffle>,
6425                                  EVEX, AVX512XDIi8Base;
6426
6427//===----------------------------------------------------------------------===//
6428// AVX-512 - VPSHUFB
6429//===----------------------------------------------------------------------===//
6430
6431multiclass avx512_pshufb_sizes<bits<8> opc, string OpcodeStr, SDNode OpNode,
6432                               X86SchedWriteWidths sched> {
6433  let Predicates = [HasBWI] in
6434  defm Z:    avx512_var_shift<opc, OpcodeStr, OpNode, sched.ZMM, v64i8_info>,
6435                              EVEX_V512;
6436
6437  let Predicates = [HasVLX, HasBWI] in {
6438  defm Z256: avx512_var_shift<opc, OpcodeStr, OpNode, sched.YMM, v32i8x_info>,
6439                              EVEX_V256;
6440  defm Z128: avx512_var_shift<opc, OpcodeStr, OpNode, sched.XMM, v16i8x_info>,
6441                              EVEX_V128;
6442  }
6443}
6444
6445defm VPSHUFB: avx512_pshufb_sizes<0x00, "vpshufb", X86pshufb,
6446                                  SchedWriteVarShuffle>, WIG;
6447
6448//===----------------------------------------------------------------------===//
6449// Move Low to High and High to Low packed FP Instructions
6450//===----------------------------------------------------------------------===//
6451
6452def VMOVLHPSZrr : AVX512PSI<0x16, MRMSrcReg, (outs VR128X:$dst),
6453          (ins VR128X:$src1, VR128X:$src2),
6454          "vmovlhps\t{$src2, $src1, $dst|$dst, $src1, $src2}",
6455          [(set VR128X:$dst, (v4f32 (X86Movlhps VR128X:$src1, VR128X:$src2)))]>,
6456          Sched<[SchedWriteFShuffle.XMM]>, EVEX, VVVV;
6457let isCommutable = 1 in
6458def VMOVHLPSZrr : AVX512PSI<0x12, MRMSrcReg, (outs VR128X:$dst),
6459          (ins VR128X:$src1, VR128X:$src2),
6460          "vmovhlps\t{$src2, $src1, $dst|$dst, $src1, $src2}",
6461          [(set VR128X:$dst, (v4f32 (X86Movhlps VR128X:$src1, VR128X:$src2)))]>,
6462          Sched<[SchedWriteFShuffle.XMM]>, EVEX, VVVV;
6463
6464//===----------------------------------------------------------------------===//
6465// VMOVHPS/PD VMOVLPS Instructions
6466// All patterns was taken from SSS implementation.
6467//===----------------------------------------------------------------------===//
6468
6469multiclass avx512_mov_hilo_packed<bits<8> opc, string OpcodeStr,
6470                                  SDPatternOperator OpNode,
6471                                  X86VectorVTInfo _> {
6472  let hasSideEffects = 0, mayLoad = 1, ExeDomain = _.ExeDomain in
6473  def rm : AVX512<opc, MRMSrcMem, (outs _.RC:$dst),
6474                  (ins _.RC:$src1, f64mem:$src2),
6475                  !strconcat(OpcodeStr,
6476                             "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
6477                  [(set _.RC:$dst,
6478                     (OpNode _.RC:$src1,
6479                       (_.VT (bitconvert
6480                         (v2f64 (scalar_to_vector (loadf64 addr:$src2)))))))]>,
6481                  Sched<[SchedWriteFShuffle.XMM.Folded, SchedWriteFShuffle.XMM.ReadAfterFold]>, EVEX, VVVV;
6482}
6483
6484// No patterns for MOVLPS/MOVHPS as the Movlhps node should only be created in
6485// SSE1. And MOVLPS pattern is even more complex.
6486defm VMOVHPSZ128 : avx512_mov_hilo_packed<0x16, "vmovhps", null_frag,
6487                                  v4f32x_info>, EVEX_CD8<32, CD8VT2>, TB;
6488defm VMOVHPDZ128 : avx512_mov_hilo_packed<0x16, "vmovhpd", X86Unpckl,
6489                                  v2f64x_info>, EVEX_CD8<64, CD8VT1>, TB, PD, REX_W;
6490defm VMOVLPSZ128 : avx512_mov_hilo_packed<0x12, "vmovlps", null_frag,
6491                                  v4f32x_info>, EVEX_CD8<32, CD8VT2>, TB;
6492defm VMOVLPDZ128 : avx512_mov_hilo_packed<0x12, "vmovlpd", X86Movsd,
6493                                  v2f64x_info>, EVEX_CD8<64, CD8VT1>, TB, PD, REX_W;
6494
6495let Predicates = [HasAVX512] in {
6496  // VMOVHPD patterns
6497  def : Pat<(v2f64 (X86Unpckl VR128X:$src1, (X86vzload64 addr:$src2))),
6498            (VMOVHPDZ128rm VR128X:$src1, addr:$src2)>;
6499
6500  // VMOVLPD patterns
6501  def : Pat<(v2f64 (X86Movsd VR128X:$src1, (X86vzload64 addr:$src2))),
6502            (VMOVLPDZ128rm VR128X:$src1, addr:$src2)>;
6503}
6504
6505let SchedRW = [WriteFStore] in {
6506let mayStore = 1, hasSideEffects = 0 in
6507def VMOVHPSZ128mr : AVX512PSI<0x17, MRMDestMem, (outs),
6508                       (ins f64mem:$dst, VR128X:$src),
6509                       "vmovhps\t{$src, $dst|$dst, $src}",
6510                       []>, EVEX, EVEX_CD8<32, CD8VT2>;
6511def VMOVHPDZ128mr : AVX512PDI<0x17, MRMDestMem, (outs),
6512                       (ins f64mem:$dst, VR128X:$src),
6513                       "vmovhpd\t{$src, $dst|$dst, $src}",
6514                       [(store (f64 (extractelt
6515                                     (v2f64 (X86Unpckh VR128X:$src, VR128X:$src)),
6516                                     (iPTR 0))), addr:$dst)]>,
6517                       EVEX, EVEX_CD8<64, CD8VT1>, REX_W;
6518let mayStore = 1, hasSideEffects = 0 in
6519def VMOVLPSZ128mr : AVX512PSI<0x13, MRMDestMem, (outs),
6520                       (ins f64mem:$dst, VR128X:$src),
6521                       "vmovlps\t{$src, $dst|$dst, $src}",
6522                       []>, EVEX, EVEX_CD8<32, CD8VT2>;
6523def VMOVLPDZ128mr : AVX512PDI<0x13, MRMDestMem, (outs),
6524                       (ins f64mem:$dst, VR128X:$src),
6525                       "vmovlpd\t{$src, $dst|$dst, $src}",
6526                       [(store (f64 (extractelt (v2f64 VR128X:$src),
6527                                     (iPTR 0))), addr:$dst)]>,
6528                       EVEX, EVEX_CD8<64, CD8VT1>, REX_W;
6529} // SchedRW
6530
6531let Predicates = [HasAVX512] in {
6532  // VMOVHPD patterns
6533  def : Pat<(store (f64 (extractelt
6534                           (v2f64 (X86VPermilpi VR128X:$src, (i8 1))),
6535                           (iPTR 0))), addr:$dst),
6536           (VMOVHPDZ128mr addr:$dst, VR128X:$src)>;
6537}
6538//===----------------------------------------------------------------------===//
6539// FMA - Fused Multiply Operations
6540//
6541
6542multiclass avx512_fma3p_213_rm<bits<8> opc, string OpcodeStr, SDPatternOperator OpNode,
6543                               SDNode MaskOpNode, X86FoldableSchedWrite sched,
6544                               X86VectorVTInfo _> {
6545  let Constraints = "$src1 = $dst", ExeDomain = _.ExeDomain, hasSideEffects = 0,
6546      Uses = [MXCSR], mayRaiseFPException = 1 in {
6547  defm r: AVX512_maskable_fma<opc, MRMSrcReg, _, (outs _.RC:$dst),
6548          (ins _.RC:$src2, _.RC:$src3),
6549          OpcodeStr, "$src3, $src2", "$src2, $src3",
6550          (_.VT (OpNode _.RC:$src2, _.RC:$src1, _.RC:$src3)),
6551          (_.VT (MaskOpNode _.RC:$src2, _.RC:$src1, _.RC:$src3)), 1, 1>,
6552          EVEX, VVVV, Sched<[sched]>;
6553
6554  defm m: AVX512_maskable_fma<opc, MRMSrcMem, _, (outs _.RC:$dst),
6555          (ins _.RC:$src2, _.MemOp:$src3),
6556          OpcodeStr, "$src3, $src2", "$src2, $src3",
6557          (_.VT (OpNode _.RC:$src2, _.RC:$src1, (_.LdFrag addr:$src3))),
6558          (_.VT (MaskOpNode _.RC:$src2, _.RC:$src1, (_.LdFrag addr:$src3))), 1, 0>,
6559          EVEX, VVVV, Sched<[sched.Folded, sched.ReadAfterFold,
6560                          sched.ReadAfterFold]>;
6561
6562  defm mb: AVX512_maskable_fma<opc, MRMSrcMem, _, (outs _.RC:$dst),
6563            (ins _.RC:$src2, _.ScalarMemOp:$src3),
6564            OpcodeStr,   !strconcat("${src3}", _.BroadcastStr,", $src2"),
6565            !strconcat("$src2, ${src3}", _.BroadcastStr ),
6566            (OpNode _.RC:$src2,
6567             _.RC:$src1,(_.VT (_.BroadcastLdFrag addr:$src3))),
6568            (MaskOpNode _.RC:$src2,
6569             _.RC:$src1,(_.VT (_.BroadcastLdFrag addr:$src3))), 1, 0>,
6570            EVEX, VVVV, EVEX_B, Sched<[sched.Folded, sched.ReadAfterFold,
6571                                    sched.ReadAfterFold]>;
6572  }
6573}
6574
6575multiclass avx512_fma3_213_round<bits<8> opc, string OpcodeStr, SDNode OpNode,
6576                                 X86FoldableSchedWrite sched,
6577                                 X86VectorVTInfo _> {
6578  let Constraints = "$src1 = $dst", ExeDomain = _.ExeDomain, hasSideEffects = 0,
6579      Uses = [MXCSR] in
6580  defm rb: AVX512_maskable_fma<opc, MRMSrcReg, _, (outs _.RC:$dst),
6581          (ins _.RC:$src2, _.RC:$src3, AVX512RC:$rc),
6582          OpcodeStr, "$rc, $src3, $src2", "$src2, $src3, $rc",
6583          (_.VT ( OpNode _.RC:$src2, _.RC:$src1, _.RC:$src3, (i32 timm:$rc))),
6584          (_.VT ( OpNode _.RC:$src2, _.RC:$src1, _.RC:$src3, (i32 timm:$rc))), 1, 1>,
6585          EVEX, VVVV, EVEX_B, EVEX_RC, Sched<[sched]>;
6586}
6587
6588multiclass avx512_fma3p_213_common<bits<8> opc, string OpcodeStr, SDPatternOperator OpNode,
6589                                   SDNode MaskOpNode, SDNode OpNodeRnd,
6590                                   X86SchedWriteWidths sched,
6591                                   AVX512VLVectorVTInfo _,
6592                                   Predicate prd = HasAVX512> {
6593  let Predicates = [prd] in {
6594    defm Z      : avx512_fma3p_213_rm<opc, OpcodeStr, OpNode, MaskOpNode,
6595                                      sched.ZMM, _.info512>,
6596                  avx512_fma3_213_round<opc, OpcodeStr, OpNodeRnd, sched.ZMM,
6597                                        _.info512>,
6598                              EVEX_V512, EVEX_CD8<_.info512.EltSize, CD8VF>;
6599  }
6600  let Predicates = [HasVLX, prd] in {
6601    defm Z256 : avx512_fma3p_213_rm<opc, OpcodeStr, OpNode, MaskOpNode,
6602                                    sched.YMM, _.info256>,
6603                      EVEX_V256, EVEX_CD8<_.info256.EltSize, CD8VF>;
6604    defm Z128 : avx512_fma3p_213_rm<opc, OpcodeStr, OpNode, MaskOpNode,
6605                                    sched.XMM, _.info128>,
6606                      EVEX_V128, EVEX_CD8<_.info128.EltSize, CD8VF>;
6607  }
6608}
6609
6610multiclass avx512_fma3p_213_f<bits<8> opc, string OpcodeStr, SDPatternOperator OpNode,
6611                              SDNode MaskOpNode, SDNode OpNodeRnd> {
6612    defm PH : avx512_fma3p_213_common<opc, OpcodeStr#"ph", OpNode, MaskOpNode,
6613                                      OpNodeRnd, SchedWriteFMA,
6614                                      avx512vl_f16_info, HasFP16>, T_MAP6, PD;
6615    defm PS : avx512_fma3p_213_common<opc, OpcodeStr#"ps", OpNode, MaskOpNode,
6616                                      OpNodeRnd, SchedWriteFMA,
6617                                      avx512vl_f32_info>, T8, PD;
6618    defm PD : avx512_fma3p_213_common<opc, OpcodeStr#"pd", OpNode, MaskOpNode,
6619                                      OpNodeRnd, SchedWriteFMA,
6620                                      avx512vl_f64_info>, T8, PD, REX_W;
6621}
6622
6623defm VFMADD213    : avx512_fma3p_213_f<0xA8, "vfmadd213", any_fma,
6624                                       fma, X86FmaddRnd>;
6625defm VFMSUB213    : avx512_fma3p_213_f<0xAA, "vfmsub213", X86any_Fmsub,
6626                                       X86Fmsub, X86FmsubRnd>;
6627defm VFMADDSUB213 : avx512_fma3p_213_f<0xA6, "vfmaddsub213", X86Fmaddsub,
6628                                       X86Fmaddsub, X86FmaddsubRnd>;
6629defm VFMSUBADD213 : avx512_fma3p_213_f<0xA7, "vfmsubadd213", X86Fmsubadd,
6630                                       X86Fmsubadd, X86FmsubaddRnd>;
6631defm VFNMADD213   : avx512_fma3p_213_f<0xAC, "vfnmadd213", X86any_Fnmadd,
6632                                       X86Fnmadd, X86FnmaddRnd>;
6633defm VFNMSUB213   : avx512_fma3p_213_f<0xAE, "vfnmsub213", X86any_Fnmsub,
6634                                       X86Fnmsub, X86FnmsubRnd>;
6635
6636
6637multiclass avx512_fma3p_231_rm<bits<8> opc, string OpcodeStr, SDPatternOperator OpNode,
6638                               SDNode MaskOpNode, X86FoldableSchedWrite sched,
6639                               X86VectorVTInfo _> {
6640  let Constraints = "$src1 = $dst", ExeDomain = _.ExeDomain, hasSideEffects = 0,
6641      Uses = [MXCSR], mayRaiseFPException = 1 in {
6642  defm r: AVX512_maskable_fma<opc, MRMSrcReg, _, (outs _.RC:$dst),
6643          (ins _.RC:$src2, _.RC:$src3),
6644          OpcodeStr, "$src3, $src2", "$src2, $src3",
6645          (null_frag),
6646          (_.VT (MaskOpNode _.RC:$src2, _.RC:$src3, _.RC:$src1)), 1, 1>,
6647          EVEX, VVVV, Sched<[sched]>;
6648
6649  defm m: AVX512_maskable_fma<opc, MRMSrcMem, _, (outs _.RC:$dst),
6650          (ins _.RC:$src2, _.MemOp:$src3),
6651          OpcodeStr, "$src3, $src2", "$src2, $src3",
6652          (_.VT (OpNode _.RC:$src2, (_.LdFrag addr:$src3), _.RC:$src1)),
6653          (_.VT (MaskOpNode _.RC:$src2, (_.LdFrag addr:$src3), _.RC:$src1)), 1, 0>,
6654          EVEX, VVVV, Sched<[sched.Folded, sched.ReadAfterFold,
6655                          sched.ReadAfterFold]>;
6656
6657  defm mb: AVX512_maskable_fma<opc, MRMSrcMem, _, (outs _.RC:$dst),
6658         (ins _.RC:$src2, _.ScalarMemOp:$src3),
6659         OpcodeStr, "${src3}"#_.BroadcastStr#", $src2",
6660         "$src2, ${src3}"#_.BroadcastStr,
6661         (_.VT (OpNode _.RC:$src2,
6662                      (_.VT (_.BroadcastLdFrag addr:$src3)),
6663                      _.RC:$src1)),
6664         (_.VT (MaskOpNode _.RC:$src2,
6665                           (_.VT (_.BroadcastLdFrag addr:$src3)),
6666                           _.RC:$src1)), 1, 0>, EVEX, VVVV, EVEX_B,
6667         Sched<[sched.Folded, sched.ReadAfterFold,
6668                sched.ReadAfterFold]>;
6669  }
6670}
6671
6672multiclass avx512_fma3_231_round<bits<8> opc, string OpcodeStr, SDNode OpNode,
6673                                 X86FoldableSchedWrite sched,
6674                                 X86VectorVTInfo _> {
6675  let Constraints = "$src1 = $dst", ExeDomain = _.ExeDomain, hasSideEffects = 0,
6676      Uses = [MXCSR] in
6677  defm rb: AVX512_maskable_fma<opc, MRMSrcReg, _, (outs _.RC:$dst),
6678          (ins _.RC:$src2, _.RC:$src3, AVX512RC:$rc),
6679          OpcodeStr, "$rc, $src3, $src2", "$src2, $src3, $rc",
6680          (null_frag),
6681          (_.VT (OpNode _.RC:$src2, _.RC:$src3, _.RC:$src1, (i32 timm:$rc))),
6682          1, 1>, EVEX, VVVV, EVEX_B, EVEX_RC, Sched<[sched]>;
6683}
6684
6685multiclass avx512_fma3p_231_common<bits<8> opc, string OpcodeStr, SDPatternOperator OpNode,
6686                                   SDNode MaskOpNode, SDNode OpNodeRnd,
6687                                   X86SchedWriteWidths sched,
6688                                   AVX512VLVectorVTInfo _,
6689                                   Predicate prd = HasAVX512> {
6690  let Predicates = [prd] in {
6691    defm Z      : avx512_fma3p_231_rm<opc, OpcodeStr, OpNode, MaskOpNode,
6692                                      sched.ZMM, _.info512>,
6693                  avx512_fma3_231_round<opc, OpcodeStr, OpNodeRnd, sched.ZMM,
6694                                        _.info512>,
6695                              EVEX_V512, EVEX_CD8<_.info512.EltSize, CD8VF>;
6696  }
6697  let Predicates = [HasVLX, prd] in {
6698    defm Z256 : avx512_fma3p_231_rm<opc, OpcodeStr, OpNode, MaskOpNode,
6699                                    sched.YMM, _.info256>,
6700                      EVEX_V256, EVEX_CD8<_.info256.EltSize, CD8VF>;
6701    defm Z128 : avx512_fma3p_231_rm<opc, OpcodeStr, OpNode, MaskOpNode,
6702                                    sched.XMM, _.info128>,
6703                      EVEX_V128, EVEX_CD8<_.info128.EltSize, CD8VF>;
6704  }
6705}
6706
6707multiclass avx512_fma3p_231_f<bits<8> opc, string OpcodeStr, SDPatternOperator OpNode,
6708                              SDNode MaskOpNode, SDNode OpNodeRnd > {
6709    defm PH : avx512_fma3p_231_common<opc, OpcodeStr#"ph", OpNode, MaskOpNode,
6710                                      OpNodeRnd, SchedWriteFMA,
6711                                      avx512vl_f16_info, HasFP16>, T_MAP6, PD;
6712    defm PS : avx512_fma3p_231_common<opc, OpcodeStr#"ps", OpNode, MaskOpNode,
6713                                      OpNodeRnd, SchedWriteFMA,
6714                                      avx512vl_f32_info>, T8, PD;
6715    defm PD : avx512_fma3p_231_common<opc, OpcodeStr#"pd", OpNode, MaskOpNode,
6716                                      OpNodeRnd, SchedWriteFMA,
6717                                      avx512vl_f64_info>, T8, PD, REX_W;
6718}
6719
6720defm VFMADD231    : avx512_fma3p_231_f<0xB8, "vfmadd231", any_fma,
6721                                       fma, X86FmaddRnd>;
6722defm VFMSUB231    : avx512_fma3p_231_f<0xBA, "vfmsub231", X86any_Fmsub,
6723                                       X86Fmsub, X86FmsubRnd>;
6724defm VFMADDSUB231 : avx512_fma3p_231_f<0xB6, "vfmaddsub231", X86Fmaddsub,
6725                                       X86Fmaddsub, X86FmaddsubRnd>;
6726defm VFMSUBADD231 : avx512_fma3p_231_f<0xB7, "vfmsubadd231", X86Fmsubadd,
6727                                       X86Fmsubadd, X86FmsubaddRnd>;
6728defm VFNMADD231   : avx512_fma3p_231_f<0xBC, "vfnmadd231", X86any_Fnmadd,
6729                                       X86Fnmadd, X86FnmaddRnd>;
6730defm VFNMSUB231   : avx512_fma3p_231_f<0xBE, "vfnmsub231", X86any_Fnmsub,
6731                                       X86Fnmsub, X86FnmsubRnd>;
6732
6733multiclass avx512_fma3p_132_rm<bits<8> opc, string OpcodeStr, SDPatternOperator OpNode,
6734                               SDNode MaskOpNode, X86FoldableSchedWrite sched,
6735                               X86VectorVTInfo _> {
6736  let Constraints = "$src1 = $dst", ExeDomain = _.ExeDomain, hasSideEffects = 0,
6737      Uses = [MXCSR], mayRaiseFPException = 1 in {
6738  defm r: AVX512_maskable_fma<opc, MRMSrcReg, _, (outs _.RC:$dst),
6739          (ins _.RC:$src2, _.RC:$src3),
6740          OpcodeStr, "$src3, $src2", "$src2, $src3",
6741          (null_frag),
6742          (_.VT (MaskOpNode _.RC:$src1, _.RC:$src3, _.RC:$src2)), 1, 1>,
6743          EVEX, VVVV, Sched<[sched]>;
6744
6745  // Pattern is 312 order so that the load is in a different place from the
6746  // 213 and 231 patterns this helps tablegen's duplicate pattern detection.
6747  defm m: AVX512_maskable_fma<opc, MRMSrcMem, _, (outs _.RC:$dst),
6748          (ins _.RC:$src2, _.MemOp:$src3),
6749          OpcodeStr, "$src3, $src2", "$src2, $src3",
6750          (_.VT (OpNode (_.LdFrag addr:$src3), _.RC:$src1, _.RC:$src2)),
6751          (_.VT (MaskOpNode (_.LdFrag addr:$src3), _.RC:$src1, _.RC:$src2)), 1, 0>,
6752          EVEX, VVVV, Sched<[sched.Folded, sched.ReadAfterFold,
6753                          sched.ReadAfterFold]>;
6754
6755  // Pattern is 312 order so that the load is in a different place from the
6756  // 213 and 231 patterns this helps tablegen's duplicate pattern detection.
6757  defm mb: AVX512_maskable_fma<opc, MRMSrcMem, _, (outs _.RC:$dst),
6758         (ins _.RC:$src2, _.ScalarMemOp:$src3),
6759         OpcodeStr, "${src3}"#_.BroadcastStr#", $src2",
6760         "$src2, ${src3}"#_.BroadcastStr,
6761         (_.VT (OpNode (_.VT (_.BroadcastLdFrag addr:$src3)),
6762                       _.RC:$src1, _.RC:$src2)),
6763         (_.VT (MaskOpNode (_.VT (_.BroadcastLdFrag addr:$src3)),
6764                           _.RC:$src1, _.RC:$src2)), 1, 0>,
6765         EVEX, VVVV, EVEX_B, Sched<[sched.Folded, sched.ReadAfterFold,
6766                                 sched.ReadAfterFold]>;
6767  }
6768}
6769
6770multiclass avx512_fma3_132_round<bits<8> opc, string OpcodeStr, SDNode OpNode,
6771                                 X86FoldableSchedWrite sched,
6772                                 X86VectorVTInfo _> {
6773  let Constraints = "$src1 = $dst", ExeDomain = _.ExeDomain, hasSideEffects = 0,
6774      Uses = [MXCSR] in
6775  defm rb: AVX512_maskable_fma<opc, MRMSrcReg, _, (outs _.RC:$dst),
6776          (ins _.RC:$src2, _.RC:$src3, AVX512RC:$rc),
6777          OpcodeStr, "$rc, $src3, $src2", "$src2, $src3, $rc",
6778          (null_frag),
6779          (_.VT (OpNode _.RC:$src1, _.RC:$src3, _.RC:$src2, (i32 timm:$rc))),
6780          1, 1>, EVEX, VVVV, EVEX_B, EVEX_RC, Sched<[sched]>;
6781}
6782
6783multiclass avx512_fma3p_132_common<bits<8> opc, string OpcodeStr, SDPatternOperator OpNode,
6784                                   SDNode MaskOpNode, SDNode OpNodeRnd,
6785                                   X86SchedWriteWidths sched,
6786                                   AVX512VLVectorVTInfo _,
6787                                   Predicate prd = HasAVX512> {
6788  let Predicates = [prd] in {
6789    defm Z      : avx512_fma3p_132_rm<opc, OpcodeStr, OpNode, MaskOpNode,
6790                                      sched.ZMM, _.info512>,
6791                  avx512_fma3_132_round<opc, OpcodeStr, OpNodeRnd, sched.ZMM,
6792                                        _.info512>,
6793                              EVEX_V512, EVEX_CD8<_.info512.EltSize, CD8VF>;
6794  }
6795  let Predicates = [HasVLX, prd] in {
6796    defm Z256 : avx512_fma3p_132_rm<opc, OpcodeStr, OpNode, MaskOpNode,
6797                                    sched.YMM, _.info256>,
6798                      EVEX_V256, EVEX_CD8<_.info256.EltSize, CD8VF>;
6799    defm Z128 : avx512_fma3p_132_rm<opc, OpcodeStr, OpNode, MaskOpNode,
6800                                    sched.XMM, _.info128>,
6801                      EVEX_V128, EVEX_CD8<_.info128.EltSize, CD8VF>;
6802  }
6803}
6804
6805multiclass avx512_fma3p_132_f<bits<8> opc, string OpcodeStr, SDPatternOperator OpNode,
6806                              SDNode MaskOpNode, SDNode OpNodeRnd > {
6807    defm PH : avx512_fma3p_132_common<opc, OpcodeStr#"ph", OpNode, MaskOpNode,
6808                                      OpNodeRnd, SchedWriteFMA,
6809                                      avx512vl_f16_info, HasFP16>, T_MAP6, PD;
6810    defm PS : avx512_fma3p_132_common<opc, OpcodeStr#"ps", OpNode, MaskOpNode,
6811                                      OpNodeRnd, SchedWriteFMA,
6812                                      avx512vl_f32_info>, T8, PD;
6813    defm PD : avx512_fma3p_132_common<opc, OpcodeStr#"pd", OpNode, MaskOpNode,
6814                                      OpNodeRnd, SchedWriteFMA,
6815                                      avx512vl_f64_info>, T8, PD, REX_W;
6816}
6817
6818defm VFMADD132    : avx512_fma3p_132_f<0x98, "vfmadd132", any_fma,
6819                                       fma, X86FmaddRnd>;
6820defm VFMSUB132    : avx512_fma3p_132_f<0x9A, "vfmsub132", X86any_Fmsub,
6821                                       X86Fmsub, X86FmsubRnd>;
6822defm VFMADDSUB132 : avx512_fma3p_132_f<0x96, "vfmaddsub132", X86Fmaddsub,
6823                                       X86Fmaddsub, X86FmaddsubRnd>;
6824defm VFMSUBADD132 : avx512_fma3p_132_f<0x97, "vfmsubadd132", X86Fmsubadd,
6825                                       X86Fmsubadd, X86FmsubaddRnd>;
6826defm VFNMADD132   : avx512_fma3p_132_f<0x9C, "vfnmadd132", X86any_Fnmadd,
6827                                       X86Fnmadd, X86FnmaddRnd>;
6828defm VFNMSUB132   : avx512_fma3p_132_f<0x9E, "vfnmsub132", X86any_Fnmsub,
6829                                       X86Fnmsub, X86FnmsubRnd>;
6830
6831// Scalar FMA
6832multiclass avx512_fma3s_common<bits<8> opc, string OpcodeStr, X86VectorVTInfo _,
6833                               dag RHS_r, dag RHS_m, dag RHS_b, bit MaskOnlyReg> {
6834let Constraints = "$src1 = $dst", hasSideEffects = 0 in {
6835  defm r_Int: AVX512_maskable_3src_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
6836          (ins _.RC:$src2, _.RC:$src3), OpcodeStr,
6837          "$src3, $src2", "$src2, $src3", (null_frag), 1, 1>,
6838          EVEX, VVVV, Sched<[SchedWriteFMA.Scl]>, SIMD_EXC;
6839
6840  let mayLoad = 1 in
6841  defm m_Int: AVX512_maskable_3src_scalar<opc, MRMSrcMem, _, (outs _.RC:$dst),
6842          (ins _.RC:$src2, _.IntScalarMemOp:$src3), OpcodeStr,
6843          "$src3, $src2", "$src2, $src3", (null_frag), 1, 1>,
6844          EVEX, VVVV, Sched<[SchedWriteFMA.Scl.Folded, SchedWriteFMA.Scl.ReadAfterFold,
6845                          SchedWriteFMA.Scl.ReadAfterFold]>, SIMD_EXC;
6846
6847  let Uses = [MXCSR] in
6848  defm rb_Int: AVX512_maskable_3src_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
6849         (ins _.RC:$src2, _.RC:$src3, AVX512RC:$rc),
6850         OpcodeStr, "$rc, $src3, $src2", "$src2, $src3, $rc", (null_frag), 1, 1>,
6851         EVEX, VVVV, EVEX_B, EVEX_RC, Sched<[SchedWriteFMA.Scl]>;
6852
6853  let isCodeGenOnly = 1, isCommutable = 1 in {
6854    def r     : AVX512<opc, MRMSrcReg, (outs _.FRC:$dst),
6855                     (ins _.FRC:$src1, _.FRC:$src2, _.FRC:$src3),
6856                     !strconcat(OpcodeStr,
6857                              "\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
6858                     !if(MaskOnlyReg, [], [RHS_r])>, Sched<[SchedWriteFMA.Scl]>, EVEX, VVVV, SIMD_EXC;
6859    def m     : AVX512<opc, MRMSrcMem, (outs _.FRC:$dst),
6860                    (ins _.FRC:$src1, _.FRC:$src2, _.ScalarMemOp:$src3),
6861                    !strconcat(OpcodeStr,
6862                               "\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
6863                    [RHS_m]>, Sched<[SchedWriteFMA.Scl.Folded, SchedWriteFMA.Scl.ReadAfterFold,
6864                                     SchedWriteFMA.Scl.ReadAfterFold]>, EVEX, VVVV, SIMD_EXC;
6865
6866    let Uses = [MXCSR] in
6867    def rb    : AVX512<opc, MRMSrcReg, (outs _.FRC:$dst),
6868                     (ins _.FRC:$src1, _.FRC:$src2, _.FRC:$src3, AVX512RC:$rc),
6869                     !strconcat(OpcodeStr,
6870                              "\t{$rc, $src3, $src2, $dst|$dst, $src2, $src3, $rc}"),
6871                     !if(MaskOnlyReg, [], [RHS_b])>, EVEX_B, EVEX_RC,
6872                     Sched<[SchedWriteFMA.Scl]>, EVEX, VVVV;
6873  }// isCodeGenOnly = 1
6874}// Constraints = "$src1 = $dst"
6875}
6876
6877multiclass avx512_fma3s_all<bits<8> opc213, bits<8> opc231, bits<8> opc132,
6878                            string OpcodeStr, SDPatternOperator OpNode, SDNode OpNodeRnd,
6879                            X86VectorVTInfo _, string SUFF> {
6880  let ExeDomain = _.ExeDomain in {
6881  defm NAME#213#SUFF#Z: avx512_fma3s_common<opc213, OpcodeStr#"213"#_.Suffix, _,
6882                // Operands for intrinsic are in 123 order to preserve passthu
6883                // semantics.
6884                (set _.FRC:$dst, (_.EltVT (OpNode _.FRC:$src2, _.FRC:$src1,
6885                         _.FRC:$src3))),
6886                (set _.FRC:$dst, (_.EltVT (OpNode _.FRC:$src2, _.FRC:$src1,
6887                         (_.ScalarLdFrag addr:$src3)))),
6888                (set _.FRC:$dst, (_.EltVT (OpNodeRnd _.FRC:$src2, _.FRC:$src1,
6889                         _.FRC:$src3, (i32 timm:$rc)))), 0>;
6890
6891  defm NAME#231#SUFF#Z: avx512_fma3s_common<opc231, OpcodeStr#"231"#_.Suffix, _,
6892                (set _.FRC:$dst, (_.EltVT (OpNode _.FRC:$src2, _.FRC:$src3,
6893                                          _.FRC:$src1))),
6894                (set _.FRC:$dst, (_.EltVT (OpNode _.FRC:$src2,
6895                            (_.ScalarLdFrag addr:$src3), _.FRC:$src1))),
6896                (set _.FRC:$dst, (_.EltVT (OpNodeRnd _.FRC:$src2, _.FRC:$src3,
6897                         _.FRC:$src1, (i32 timm:$rc)))), 1>;
6898
6899  // One pattern is 312 order so that the load is in a different place from the
6900  // 213 and 231 patterns this helps tablegen's duplicate pattern detection.
6901  defm NAME#132#SUFF#Z: avx512_fma3s_common<opc132, OpcodeStr#"132"#_.Suffix, _,
6902                (set _.FRC:$dst, (_.EltVT (OpNode _.FRC:$src1, _.FRC:$src3,
6903                         _.FRC:$src2))),
6904                (set _.FRC:$dst, (_.EltVT (OpNode (_.ScalarLdFrag addr:$src3),
6905                                 _.FRC:$src1, _.FRC:$src2))),
6906                (set _.FRC:$dst, (_.EltVT (OpNodeRnd _.FRC:$src1, _.FRC:$src3,
6907                         _.FRC:$src2, (i32 timm:$rc)))), 1>;
6908  }
6909}
6910
6911multiclass avx512_fma3s<bits<8> opc213, bits<8> opc231, bits<8> opc132,
6912                        string OpcodeStr, SDPatternOperator OpNode, SDNode OpNodeRnd> {
6913  let Predicates = [HasAVX512] in {
6914    defm NAME : avx512_fma3s_all<opc213, opc231, opc132, OpcodeStr, OpNode,
6915                                 OpNodeRnd, f32x_info, "SS">,
6916                                 EVEX_CD8<32, CD8VT1>, VEX_LIG, T8, PD;
6917    defm NAME : avx512_fma3s_all<opc213, opc231, opc132, OpcodeStr, OpNode,
6918                                 OpNodeRnd, f64x_info, "SD">,
6919                                 EVEX_CD8<64, CD8VT1>, VEX_LIG, REX_W, T8, PD;
6920  }
6921  let Predicates = [HasFP16] in {
6922    defm NAME : avx512_fma3s_all<opc213, opc231, opc132, OpcodeStr, OpNode,
6923                                 OpNodeRnd, f16x_info, "SH">,
6924                                 EVEX_CD8<16, CD8VT1>, VEX_LIG, T_MAP6, PD;
6925  }
6926}
6927
6928defm VFMADD  : avx512_fma3s<0xA9, 0xB9, 0x99, "vfmadd", any_fma, X86FmaddRnd>;
6929defm VFMSUB  : avx512_fma3s<0xAB, 0xBB, 0x9B, "vfmsub", X86any_Fmsub, X86FmsubRnd>;
6930defm VFNMADD : avx512_fma3s<0xAD, 0xBD, 0x9D, "vfnmadd", X86any_Fnmadd, X86FnmaddRnd>;
6931defm VFNMSUB : avx512_fma3s<0xAF, 0xBF, 0x9F, "vfnmsub", X86any_Fnmsub, X86FnmsubRnd>;
6932
6933multiclass avx512_scalar_fma_patterns<SDPatternOperator Op, SDNode MaskedOp,
6934                                      SDNode RndOp, string Prefix,
6935                                      string Suffix, SDNode Move,
6936                                      X86VectorVTInfo _, PatLeaf ZeroFP,
6937                                      Predicate prd = HasAVX512> {
6938  let Predicates = [prd] in {
6939    def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector
6940                (Op _.FRC:$src2,
6941                    (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0))),
6942                    _.FRC:$src3))))),
6943              (!cast<I>(Prefix#"213"#Suffix#"Zr_Int")
6944               VR128X:$src1, (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)),
6945               (_.VT (COPY_TO_REGCLASS _.FRC:$src3, VR128X)))>;
6946
6947    def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector
6948                (Op _.FRC:$src2, _.FRC:$src3,
6949                    (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0)))))))),
6950              (!cast<I>(Prefix#"231"#Suffix#"Zr_Int")
6951               VR128X:$src1, (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)),
6952               (_.VT (COPY_TO_REGCLASS _.FRC:$src3, VR128X)))>;
6953
6954    def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector
6955                (Op _.FRC:$src2,
6956                    (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0))),
6957                    (_.ScalarLdFrag addr:$src3)))))),
6958              (!cast<I>(Prefix#"213"#Suffix#"Zm_Int")
6959               VR128X:$src1, (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)),
6960               addr:$src3)>;
6961
6962    def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector
6963                (Op (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0))),
6964                    (_.ScalarLdFrag addr:$src3), _.FRC:$src2))))),
6965              (!cast<I>(Prefix#"132"#Suffix#"Zm_Int")
6966               VR128X:$src1, (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)),
6967               addr:$src3)>;
6968
6969    def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector
6970                (Op _.FRC:$src2, (_.ScalarLdFrag addr:$src3),
6971                    (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0)))))))),
6972              (!cast<I>(Prefix#"231"#Suffix#"Zm_Int")
6973               VR128X:$src1, (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)),
6974               addr:$src3)>;
6975
6976    def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector
6977               (X86selects_mask VK1WM:$mask,
6978                (MaskedOp _.FRC:$src2,
6979                    (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0))),
6980                    _.FRC:$src3),
6981                (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0)))))))),
6982              (!cast<I>(Prefix#"213"#Suffix#"Zr_Intk")
6983               VR128X:$src1, VK1WM:$mask,
6984               (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)),
6985               (_.VT (COPY_TO_REGCLASS _.FRC:$src3, VR128X)))>;
6986
6987    def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector
6988               (X86selects_mask VK1WM:$mask,
6989                (MaskedOp _.FRC:$src2,
6990                    (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0))),
6991                    (_.ScalarLdFrag addr:$src3)),
6992                (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0)))))))),
6993              (!cast<I>(Prefix#"213"#Suffix#"Zm_Intk")
6994               VR128X:$src1, VK1WM:$mask,
6995               (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)), addr:$src3)>;
6996
6997    def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector
6998               (X86selects_mask VK1WM:$mask,
6999                (MaskedOp (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0))),
7000                          (_.ScalarLdFrag addr:$src3), _.FRC:$src2),
7001                (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0)))))))),
7002              (!cast<I>(Prefix#"132"#Suffix#"Zm_Intk")
7003               VR128X:$src1, VK1WM:$mask,
7004               (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)), addr:$src3)>;
7005
7006    def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector
7007               (X86selects_mask VK1WM:$mask,
7008                (MaskedOp _.FRC:$src2, _.FRC:$src3,
7009                          (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0)))),
7010                (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0)))))))),
7011              (!cast<I>(Prefix#"231"#Suffix#"Zr_Intk")
7012               VR128X:$src1, VK1WM:$mask,
7013               (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)),
7014               (_.VT (COPY_TO_REGCLASS _.FRC:$src3, VR128X)))>;
7015
7016    def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector
7017               (X86selects_mask VK1WM:$mask,
7018                (MaskedOp _.FRC:$src2, (_.ScalarLdFrag addr:$src3),
7019                          (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0)))),
7020                (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0)))))))),
7021              (!cast<I>(Prefix#"231"#Suffix#"Zm_Intk")
7022               VR128X:$src1, VK1WM:$mask,
7023               (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)), addr:$src3)>;
7024
7025    def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector
7026               (X86selects_mask VK1WM:$mask,
7027                (MaskedOp _.FRC:$src2,
7028                          (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0))),
7029                          _.FRC:$src3),
7030                (_.EltVT ZeroFP)))))),
7031              (!cast<I>(Prefix#"213"#Suffix#"Zr_Intkz")
7032               VR128X:$src1, VK1WM:$mask,
7033               (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)),
7034               (_.VT (COPY_TO_REGCLASS _.FRC:$src3, VR128X)))>;
7035
7036    def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector
7037               (X86selects_mask VK1WM:$mask,
7038                (MaskedOp _.FRC:$src2, _.FRC:$src3,
7039                          (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0)))),
7040                (_.EltVT ZeroFP)))))),
7041              (!cast<I>(Prefix#"231"#Suffix#"Zr_Intkz")
7042               VR128X:$src1, VK1WM:$mask,
7043               (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)),
7044               (_.VT (COPY_TO_REGCLASS _.FRC:$src3, VR128X)))>;
7045
7046    def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector
7047               (X86selects_mask VK1WM:$mask,
7048                (MaskedOp _.FRC:$src2,
7049                          (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0))),
7050                          (_.ScalarLdFrag addr:$src3)),
7051                (_.EltVT ZeroFP)))))),
7052              (!cast<I>(Prefix#"213"#Suffix#"Zm_Intkz")
7053               VR128X:$src1, VK1WM:$mask,
7054               (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)), addr:$src3)>;
7055
7056    def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector
7057               (X86selects_mask VK1WM:$mask,
7058                (MaskedOp (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0))),
7059                          _.FRC:$src2, (_.ScalarLdFrag addr:$src3)),
7060                (_.EltVT ZeroFP)))))),
7061              (!cast<I>(Prefix#"132"#Suffix#"Zm_Intkz")
7062               VR128X:$src1, VK1WM:$mask,
7063               (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)), addr:$src3)>;
7064
7065    def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector
7066               (X86selects_mask VK1WM:$mask,
7067                (MaskedOp _.FRC:$src2, (_.ScalarLdFrag addr:$src3),
7068                          (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0)))),
7069                (_.EltVT ZeroFP)))))),
7070              (!cast<I>(Prefix#"231"#Suffix#"Zm_Intkz")
7071               VR128X:$src1, VK1WM:$mask,
7072               (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)), addr:$src3)>;
7073
7074    // Patterns with rounding mode.
7075    def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector
7076                (RndOp _.FRC:$src2,
7077                       (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0))),
7078                       _.FRC:$src3, (i32 timm:$rc)))))),
7079              (!cast<I>(Prefix#"213"#Suffix#"Zrb_Int")
7080               VR128X:$src1, (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)),
7081               (_.VT (COPY_TO_REGCLASS _.FRC:$src3, VR128X)), AVX512RC:$rc)>;
7082
7083    def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector
7084                (RndOp _.FRC:$src2, _.FRC:$src3,
7085                       (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0))),
7086                       (i32 timm:$rc)))))),
7087              (!cast<I>(Prefix#"231"#Suffix#"Zrb_Int")
7088               VR128X:$src1, (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)),
7089               (_.VT (COPY_TO_REGCLASS _.FRC:$src3, VR128X)), AVX512RC:$rc)>;
7090
7091    def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector
7092               (X86selects_mask VK1WM:$mask,
7093                (RndOp _.FRC:$src2,
7094                       (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0))),
7095                       _.FRC:$src3, (i32 timm:$rc)),
7096                (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0)))))))),
7097              (!cast<I>(Prefix#"213"#Suffix#"Zrb_Intk")
7098               VR128X:$src1, VK1WM:$mask,
7099               (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)),
7100               (_.VT (COPY_TO_REGCLASS _.FRC:$src3, VR128X)), AVX512RC:$rc)>;
7101
7102    def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector
7103               (X86selects_mask VK1WM:$mask,
7104                (RndOp _.FRC:$src2, _.FRC:$src3,
7105                       (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0))),
7106                       (i32 timm:$rc)),
7107                (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0)))))))),
7108              (!cast<I>(Prefix#"231"#Suffix#"Zrb_Intk")
7109               VR128X:$src1, VK1WM:$mask,
7110               (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)),
7111               (_.VT (COPY_TO_REGCLASS _.FRC:$src3, VR128X)), AVX512RC:$rc)>;
7112
7113    def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector
7114               (X86selects_mask VK1WM:$mask,
7115                (RndOp _.FRC:$src2,
7116                       (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0))),
7117                       _.FRC:$src3, (i32 timm:$rc)),
7118                (_.EltVT ZeroFP)))))),
7119              (!cast<I>(Prefix#"213"#Suffix#"Zrb_Intkz")
7120               VR128X:$src1, VK1WM:$mask,
7121               (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)),
7122               (_.VT (COPY_TO_REGCLASS _.FRC:$src3, VR128X)), AVX512RC:$rc)>;
7123
7124    def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector
7125               (X86selects_mask VK1WM:$mask,
7126                (RndOp _.FRC:$src2, _.FRC:$src3,
7127                       (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0))),
7128                       (i32 timm:$rc)),
7129                (_.EltVT ZeroFP)))))),
7130              (!cast<I>(Prefix#"231"#Suffix#"Zrb_Intkz")
7131               VR128X:$src1, VK1WM:$mask,
7132               (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)),
7133               (_.VT (COPY_TO_REGCLASS _.FRC:$src3, VR128X)), AVX512RC:$rc)>;
7134  }
7135}
7136defm : avx512_scalar_fma_patterns<any_fma, fma, X86FmaddRnd, "VFMADD", "SH",
7137                                  X86Movsh, v8f16x_info, fp16imm0, HasFP16>;
7138defm : avx512_scalar_fma_patterns<X86any_Fmsub, X86Fmsub, X86FmsubRnd, "VFMSUB", "SH",
7139                                  X86Movsh, v8f16x_info, fp16imm0, HasFP16>;
7140defm : avx512_scalar_fma_patterns<X86any_Fnmadd, X86Fnmadd, X86FnmaddRnd, "VFNMADD", "SH",
7141                                  X86Movsh, v8f16x_info, fp16imm0, HasFP16>;
7142defm : avx512_scalar_fma_patterns<X86any_Fnmsub, X86Fnmsub, X86FnmsubRnd, "VFNMSUB", "SH",
7143                                  X86Movsh, v8f16x_info, fp16imm0, HasFP16>;
7144
7145defm : avx512_scalar_fma_patterns<any_fma, fma, X86FmaddRnd, "VFMADD",
7146                                  "SS", X86Movss, v4f32x_info, fp32imm0>;
7147defm : avx512_scalar_fma_patterns<X86any_Fmsub, X86Fmsub, X86FmsubRnd, "VFMSUB",
7148                                  "SS", X86Movss, v4f32x_info, fp32imm0>;
7149defm : avx512_scalar_fma_patterns<X86any_Fnmadd, X86Fnmadd, X86FnmaddRnd, "VFNMADD",
7150                                  "SS", X86Movss, v4f32x_info, fp32imm0>;
7151defm : avx512_scalar_fma_patterns<X86any_Fnmsub, X86Fnmsub, X86FnmsubRnd, "VFNMSUB",
7152                                  "SS", X86Movss, v4f32x_info, fp32imm0>;
7153
7154defm : avx512_scalar_fma_patterns<any_fma, fma, X86FmaddRnd, "VFMADD",
7155                                  "SD", X86Movsd, v2f64x_info, fp64imm0>;
7156defm : avx512_scalar_fma_patterns<X86any_Fmsub, X86Fmsub, X86FmsubRnd, "VFMSUB",
7157                                  "SD", X86Movsd, v2f64x_info, fp64imm0>;
7158defm : avx512_scalar_fma_patterns<X86any_Fnmadd, X86Fnmadd, X86FnmaddRnd, "VFNMADD",
7159                                  "SD", X86Movsd, v2f64x_info, fp64imm0>;
7160defm : avx512_scalar_fma_patterns<X86any_Fnmsub, X86Fnmsub, X86FnmsubRnd, "VFNMSUB",
7161                                  "SD", X86Movsd, v2f64x_info, fp64imm0>;
7162
7163//===----------------------------------------------------------------------===//
7164// AVX-512  Packed Multiply of Unsigned 52-bit Integers and Add the Low 52-bit IFMA
7165//===----------------------------------------------------------------------===//
7166let Constraints = "$src1 = $dst" in {
7167multiclass avx512_pmadd52_rm<bits<8> opc, string OpcodeStr, SDNode OpNode,
7168                             X86FoldableSchedWrite sched, X86VectorVTInfo _> {
7169  // NOTE: The SDNode have the multiply operands first with the add last.
7170  // This enables commuted load patterns to be autogenerated by tablegen.
7171  let ExeDomain = _.ExeDomain in {
7172  defm r: AVX512_maskable_3src<opc, MRMSrcReg, _, (outs _.RC:$dst),
7173          (ins _.RC:$src2, _.RC:$src3),
7174          OpcodeStr, "$src3, $src2", "$src2, $src3",
7175          (_.VT (OpNode _.RC:$src2, _.RC:$src3, _.RC:$src1)), 1, 1>,
7176          T8, PD, EVEX, VVVV, Sched<[sched]>;
7177
7178  defm m: AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst),
7179          (ins _.RC:$src2, _.MemOp:$src3),
7180          OpcodeStr, "$src3, $src2", "$src2, $src3",
7181          (_.VT (OpNode _.RC:$src2, (_.LdFrag addr:$src3), _.RC:$src1))>,
7182          T8, PD, EVEX, VVVV, Sched<[sched.Folded, sched.ReadAfterFold,
7183                                sched.ReadAfterFold]>;
7184
7185  defm mb: AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst),
7186            (ins _.RC:$src2, _.ScalarMemOp:$src3),
7187            OpcodeStr,   !strconcat("${src3}", _.BroadcastStr,", $src2"),
7188            !strconcat("$src2, ${src3}", _.BroadcastStr ),
7189            (OpNode _.RC:$src2,
7190                    (_.VT (_.BroadcastLdFrag addr:$src3)),
7191                    _.RC:$src1)>,
7192            T8, PD, EVEX, VVVV, EVEX_B, Sched<[sched.Folded, sched.ReadAfterFold,
7193                                          sched.ReadAfterFold]>;
7194  }
7195}
7196} // Constraints = "$src1 = $dst"
7197
7198multiclass avx512_pmadd52_common<bits<8> opc, string OpcodeStr, SDNode OpNode,
7199                                 X86SchedWriteWidths sched, AVX512VLVectorVTInfo _> {
7200  let Predicates = [HasIFMA] in {
7201    defm Z      : avx512_pmadd52_rm<opc, OpcodeStr, OpNode, sched.ZMM, _.info512>,
7202                      EVEX_V512, EVEX_CD8<_.info512.EltSize, CD8VF>;
7203  }
7204  let Predicates = [HasVLX, HasIFMA] in {
7205    defm Z256 : avx512_pmadd52_rm<opc, OpcodeStr, OpNode, sched.YMM, _.info256>,
7206                      EVEX_V256, EVEX_CD8<_.info256.EltSize, CD8VF>;
7207    defm Z128 : avx512_pmadd52_rm<opc, OpcodeStr, OpNode, sched.XMM, _.info128>,
7208                      EVEX_V128, EVEX_CD8<_.info128.EltSize, CD8VF>;
7209  }
7210}
7211
7212defm VPMADD52LUQ : avx512_pmadd52_common<0xb4, "vpmadd52luq", x86vpmadd52l,
7213                                         SchedWriteVecIMul, avx512vl_i64_info>,
7214                                         REX_W;
7215defm VPMADD52HUQ : avx512_pmadd52_common<0xb5, "vpmadd52huq", x86vpmadd52h,
7216                                         SchedWriteVecIMul, avx512vl_i64_info>,
7217                                         REX_W;
7218
7219//===----------------------------------------------------------------------===//
7220// AVX-512  Scalar convert from sign integer to float/double
7221//===----------------------------------------------------------------------===//
7222
7223multiclass avx512_vcvtsi<bits<8> opc, SDPatternOperator OpNode, X86FoldableSchedWrite sched,
7224                    RegisterClass SrcRC, X86VectorVTInfo DstVT,
7225                    X86MemOperand x86memop, PatFrag ld_frag, string asm,
7226                    string mem, list<Register> _Uses = [MXCSR],
7227                    bit _mayRaiseFPException = 1> {
7228let ExeDomain = DstVT.ExeDomain, Uses = _Uses,
7229    mayRaiseFPException = _mayRaiseFPException in {
7230  let hasSideEffects = 0, isCodeGenOnly = 1 in {
7231    def rr : SI<opc, MRMSrcReg, (outs DstVT.FRC:$dst),
7232              (ins DstVT.FRC:$src1, SrcRC:$src),
7233              !strconcat(asm,"\t{$src, $src1, $dst|$dst, $src1, $src}"), []>,
7234              EVEX, VVVV, Sched<[sched, ReadDefault, ReadInt2Fpu]>;
7235    let mayLoad = 1 in
7236      def rm : SI<opc, MRMSrcMem, (outs DstVT.FRC:$dst),
7237              (ins DstVT.FRC:$src1, x86memop:$src),
7238              asm#"{"#mem#"}\t{$src, $src1, $dst|$dst, $src1, $src}", []>,
7239              EVEX, VVVV, Sched<[sched.Folded, sched.ReadAfterFold]>;
7240  } // hasSideEffects = 0
7241  def rr_Int : SI<opc, MRMSrcReg, (outs DstVT.RC:$dst),
7242                (ins DstVT.RC:$src1, SrcRC:$src2),
7243                !strconcat(asm,"\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
7244                [(set DstVT.RC:$dst,
7245                      (OpNode (DstVT.VT DstVT.RC:$src1), SrcRC:$src2))]>,
7246               EVEX, VVVV, Sched<[sched, ReadDefault, ReadInt2Fpu]>;
7247
7248  def rm_Int : SI<opc, MRMSrcMem, (outs DstVT.RC:$dst),
7249                (ins DstVT.RC:$src1, x86memop:$src2),
7250                asm#"{"#mem#"}\t{$src2, $src1, $dst|$dst, $src1, $src2}",
7251                [(set DstVT.RC:$dst,
7252                      (OpNode (DstVT.VT DstVT.RC:$src1),
7253                               (ld_frag addr:$src2)))]>,
7254                EVEX, VVVV, Sched<[sched.Folded, sched.ReadAfterFold]>;
7255}
7256  def : InstAlias<"v"#asm#mem#"\t{$src2, $src1, $dst|$dst, $src1, $src2}",
7257                  (!cast<Instruction>(NAME#"rr_Int") DstVT.RC:$dst,
7258                  DstVT.RC:$src1, SrcRC:$src2), 0, "att">;
7259}
7260
7261multiclass avx512_vcvtsi_round<bits<8> opc, SDNode OpNode,
7262                               X86FoldableSchedWrite sched, RegisterClass SrcRC,
7263                               X86VectorVTInfo DstVT, string asm,
7264                               string mem> {
7265  let ExeDomain = DstVT.ExeDomain, Uses = [MXCSR] in
7266  def rrb_Int : SI<opc, MRMSrcReg, (outs DstVT.RC:$dst),
7267              (ins DstVT.RC:$src1, SrcRC:$src2, AVX512RC:$rc),
7268              !strconcat(asm,
7269                  "\t{$src2, $rc, $src1, $dst|$dst, $src1, $rc, $src2}"),
7270              [(set DstVT.RC:$dst,
7271                    (OpNode (DstVT.VT DstVT.RC:$src1),
7272                             SrcRC:$src2,
7273                             (i32 timm:$rc)))]>,
7274              EVEX, VVVV, EVEX_B, EVEX_RC, Sched<[sched, ReadDefault, ReadInt2Fpu]>;
7275  def : InstAlias<"v"#asm#mem#"\t{$src2, $rc, $src1, $dst|$dst, $src1, $rc, $src2}",
7276                  (!cast<Instruction>(NAME#"rrb_Int") DstVT.RC:$dst,
7277                  DstVT.RC:$src1, SrcRC:$src2, AVX512RC:$rc), 0, "att">;
7278}
7279
7280multiclass avx512_vcvtsi_common<bits<8> opc, SDNode OpNode, SDNode OpNodeRnd,
7281                                X86FoldableSchedWrite sched,
7282                                RegisterClass SrcRC, X86VectorVTInfo DstVT,
7283                                X86MemOperand x86memop, PatFrag ld_frag,
7284                                string asm, string mem> {
7285  defm NAME : avx512_vcvtsi_round<opc, OpNodeRnd, sched, SrcRC, DstVT, asm, mem>,
7286              avx512_vcvtsi<opc, OpNode, sched, SrcRC, DstVT, x86memop,
7287                            ld_frag, asm, mem>, VEX_LIG;
7288}
7289
7290let Predicates = [HasAVX512] in {
7291defm VCVTSI2SSZ  : avx512_vcvtsi_common<0x2A, X86SintToFp, X86SintToFpRnd,
7292                                 WriteCvtI2SS, GR32,
7293                                 v4f32x_info, i32mem, loadi32, "cvtsi2ss", "l">,
7294                                 TB, XS, EVEX_CD8<32, CD8VT1>;
7295defm VCVTSI642SSZ: avx512_vcvtsi_common<0x2A, X86SintToFp, X86SintToFpRnd,
7296                                 WriteCvtI2SS, GR64,
7297                                 v4f32x_info, i64mem, loadi64, "cvtsi2ss", "q">,
7298                                 TB, XS, REX_W, EVEX_CD8<64, CD8VT1>;
7299defm VCVTSI2SDZ  : avx512_vcvtsi<0x2A, null_frag, WriteCvtI2SD, GR32,
7300                                 v2f64x_info, i32mem, loadi32, "cvtsi2sd", "l", [], 0>,
7301                                 TB, XD, VEX_LIG, EVEX_CD8<32, CD8VT1>;
7302defm VCVTSI642SDZ: avx512_vcvtsi_common<0x2A, X86SintToFp, X86SintToFpRnd,
7303                                 WriteCvtI2SD, GR64,
7304                                 v2f64x_info, i64mem, loadi64, "cvtsi2sd", "q">,
7305                                 TB, XD, REX_W, EVEX_CD8<64, CD8VT1>;
7306
7307def : InstAlias<"vcvtsi2ss\t{$src, $src1, $dst|$dst, $src1, $src}",
7308              (VCVTSI2SSZrm_Int VR128X:$dst, VR128X:$src1, i32mem:$src), 0, "att">;
7309def : InstAlias<"vcvtsi2sd\t{$src, $src1, $dst|$dst, $src1, $src}",
7310              (VCVTSI2SDZrm_Int VR128X:$dst, VR128X:$src1, i32mem:$src), 0, "att">;
7311
7312def : Pat<(f32 (any_sint_to_fp (loadi32 addr:$src))),
7313          (VCVTSI2SSZrm (f32 (IMPLICIT_DEF)), addr:$src)>;
7314def : Pat<(f32 (any_sint_to_fp (loadi64 addr:$src))),
7315          (VCVTSI642SSZrm (f32 (IMPLICIT_DEF)), addr:$src)>;
7316def : Pat<(f64 (any_sint_to_fp (loadi32 addr:$src))),
7317          (VCVTSI2SDZrm (f64 (IMPLICIT_DEF)), addr:$src)>;
7318def : Pat<(f64 (any_sint_to_fp (loadi64 addr:$src))),
7319          (VCVTSI642SDZrm (f64 (IMPLICIT_DEF)), addr:$src)>;
7320
7321def : Pat<(f32 (any_sint_to_fp GR32:$src)),
7322          (VCVTSI2SSZrr (f32 (IMPLICIT_DEF)), GR32:$src)>;
7323def : Pat<(f32 (any_sint_to_fp GR64:$src)),
7324          (VCVTSI642SSZrr (f32 (IMPLICIT_DEF)), GR64:$src)>;
7325def : Pat<(f64 (any_sint_to_fp GR32:$src)),
7326          (VCVTSI2SDZrr (f64 (IMPLICIT_DEF)), GR32:$src)>;
7327def : Pat<(f64 (any_sint_to_fp GR64:$src)),
7328          (VCVTSI642SDZrr (f64 (IMPLICIT_DEF)), GR64:$src)>;
7329
7330defm VCVTUSI2SSZ   : avx512_vcvtsi_common<0x7B, X86UintToFp, X86UintToFpRnd,
7331                                  WriteCvtI2SS, GR32,
7332                                  v4f32x_info, i32mem, loadi32,
7333                                  "cvtusi2ss", "l">, TB, XS, EVEX_CD8<32, CD8VT1>;
7334defm VCVTUSI642SSZ : avx512_vcvtsi_common<0x7B, X86UintToFp, X86UintToFpRnd,
7335                                  WriteCvtI2SS, GR64,
7336                                  v4f32x_info, i64mem, loadi64, "cvtusi2ss", "q">,
7337                                  TB, XS, REX_W, EVEX_CD8<64, CD8VT1>;
7338defm VCVTUSI2SDZ   : avx512_vcvtsi<0x7B, null_frag, WriteCvtI2SD, GR32, v2f64x_info,
7339                                  i32mem, loadi32, "cvtusi2sd", "l", [], 0>,
7340                                  TB, XD, VEX_LIG, EVEX_CD8<32, CD8VT1>;
7341defm VCVTUSI642SDZ : avx512_vcvtsi_common<0x7B, X86UintToFp, X86UintToFpRnd,
7342                                  WriteCvtI2SD, GR64,
7343                                  v2f64x_info, i64mem, loadi64, "cvtusi2sd", "q">,
7344                                  TB, XD, REX_W, EVEX_CD8<64, CD8VT1>;
7345
7346def : InstAlias<"vcvtusi2ss\t{$src, $src1, $dst|$dst, $src1, $src}",
7347              (VCVTUSI2SSZrm_Int VR128X:$dst, VR128X:$src1, i32mem:$src), 0, "att">;
7348def : InstAlias<"vcvtusi2sd\t{$src, $src1, $dst|$dst, $src1, $src}",
7349              (VCVTUSI2SDZrm_Int VR128X:$dst, VR128X:$src1, i32mem:$src), 0, "att">;
7350
7351def : Pat<(f32 (any_uint_to_fp (loadi32 addr:$src))),
7352          (VCVTUSI2SSZrm (f32 (IMPLICIT_DEF)), addr:$src)>;
7353def : Pat<(f32 (any_uint_to_fp (loadi64 addr:$src))),
7354          (VCVTUSI642SSZrm (f32 (IMPLICIT_DEF)), addr:$src)>;
7355def : Pat<(f64 (any_uint_to_fp (loadi32 addr:$src))),
7356          (VCVTUSI2SDZrm (f64 (IMPLICIT_DEF)), addr:$src)>;
7357def : Pat<(f64 (any_uint_to_fp (loadi64 addr:$src))),
7358          (VCVTUSI642SDZrm (f64 (IMPLICIT_DEF)), addr:$src)>;
7359
7360def : Pat<(f32 (any_uint_to_fp GR32:$src)),
7361          (VCVTUSI2SSZrr (f32 (IMPLICIT_DEF)), GR32:$src)>;
7362def : Pat<(f32 (any_uint_to_fp GR64:$src)),
7363          (VCVTUSI642SSZrr (f32 (IMPLICIT_DEF)), GR64:$src)>;
7364def : Pat<(f64 (any_uint_to_fp GR32:$src)),
7365          (VCVTUSI2SDZrr (f64 (IMPLICIT_DEF)), GR32:$src)>;
7366def : Pat<(f64 (any_uint_to_fp GR64:$src)),
7367          (VCVTUSI642SDZrr (f64 (IMPLICIT_DEF)), GR64:$src)>;
7368}
7369
7370//===----------------------------------------------------------------------===//
7371// AVX-512  Scalar convert from float/double to integer
7372//===----------------------------------------------------------------------===//
7373
7374multiclass avx512_cvt_s_int_round<bits<8> opc, X86VectorVTInfo SrcVT,
7375                                  X86VectorVTInfo DstVT, SDNode OpNode,
7376                                  SDNode OpNodeRnd,
7377                                  X86FoldableSchedWrite sched, string asm,
7378                                  string aliasStr, Predicate prd = HasAVX512> {
7379  let Predicates = [prd], ExeDomain = SrcVT.ExeDomain in {
7380    def rr_Int : SI<opc, MRMSrcReg, (outs DstVT.RC:$dst), (ins SrcVT.RC:$src),
7381                !strconcat(asm,"\t{$src, $dst|$dst, $src}"),
7382                [(set DstVT.RC:$dst, (OpNode (SrcVT.VT SrcVT.RC:$src)))]>,
7383                EVEX, VEX_LIG, Sched<[sched]>, SIMD_EXC;
7384    let Uses = [MXCSR] in
7385    def rrb_Int : SI<opc, MRMSrcReg, (outs DstVT.RC:$dst), (ins SrcVT.RC:$src, AVX512RC:$rc),
7386                 !strconcat(asm,"\t{$rc, $src, $dst|$dst, $src, $rc}"),
7387                 [(set DstVT.RC:$dst, (OpNodeRnd (SrcVT.VT SrcVT.RC:$src),(i32 timm:$rc)))]>,
7388                 EVEX, VEX_LIG, EVEX_B, EVEX_RC,
7389                 Sched<[sched]>;
7390    def rm_Int : SI<opc, MRMSrcMem, (outs DstVT.RC:$dst), (ins SrcVT.IntScalarMemOp:$src),
7391                !strconcat(asm,"\t{$src, $dst|$dst, $src}"),
7392                [(set DstVT.RC:$dst, (OpNode
7393                      (SrcVT.ScalarIntMemFrags addr:$src)))]>,
7394                EVEX, VEX_LIG, Sched<[sched.Folded, sched.ReadAfterFold]>, SIMD_EXC;
7395  } // Predicates = [prd]
7396
7397  def : InstAlias<"v" # asm # aliasStr # "\t{$src, $dst|$dst, $src}",
7398          (!cast<Instruction>(NAME # "rr_Int") DstVT.RC:$dst, SrcVT.RC:$src), 0, "att">;
7399  def : InstAlias<"v" # asm # aliasStr # "\t{$rc, $src, $dst|$dst, $src, $rc}",
7400          (!cast<Instruction>(NAME # "rrb_Int") DstVT.RC:$dst, SrcVT.RC:$src, AVX512RC:$rc), 0, "att">;
7401  def : InstAlias<"v" # asm # aliasStr # "\t{$src, $dst|$dst, $src}",
7402          (!cast<Instruction>(NAME # "rm_Int") DstVT.RC:$dst,
7403                                          SrcVT.IntScalarMemOp:$src), 0, "att">;
7404}
7405
7406// Convert float/double to signed/unsigned int 32/64
7407defm VCVTSS2SIZ: avx512_cvt_s_int_round<0x2D, f32x_info, i32x_info,X86cvts2si,
7408                                   X86cvts2siRnd, WriteCvtSS2I, "cvtss2si", "{l}">,
7409                                   TB, XS, EVEX_CD8<32, CD8VT1>;
7410defm VCVTSS2SI64Z: avx512_cvt_s_int_round<0x2D, f32x_info, i64x_info, X86cvts2si,
7411                                   X86cvts2siRnd, WriteCvtSS2I, "cvtss2si", "{q}">,
7412                                   TB, XS, REX_W, EVEX_CD8<32, CD8VT1>;
7413defm VCVTSS2USIZ: avx512_cvt_s_int_round<0x79, f32x_info, i32x_info, X86cvts2usi,
7414                                   X86cvts2usiRnd, WriteCvtSS2I, "cvtss2usi", "{l}">,
7415                                   TB, XS, EVEX_CD8<32, CD8VT1>;
7416defm VCVTSS2USI64Z: avx512_cvt_s_int_round<0x79, f32x_info, i64x_info, X86cvts2usi,
7417                                   X86cvts2usiRnd, WriteCvtSS2I, "cvtss2usi", "{q}">,
7418                                   TB, XS, REX_W, EVEX_CD8<32, CD8VT1>;
7419defm VCVTSD2SIZ: avx512_cvt_s_int_round<0x2D, f64x_info, i32x_info, X86cvts2si,
7420                                   X86cvts2siRnd, WriteCvtSD2I, "cvtsd2si", "{l}">,
7421                                   TB, XD, EVEX_CD8<64, CD8VT1>;
7422defm VCVTSD2SI64Z: avx512_cvt_s_int_round<0x2D, f64x_info, i64x_info, X86cvts2si,
7423                                   X86cvts2siRnd, WriteCvtSD2I, "cvtsd2si", "{q}">,
7424                                   TB, XD, REX_W, EVEX_CD8<64, CD8VT1>;
7425defm VCVTSD2USIZ:   avx512_cvt_s_int_round<0x79, f64x_info, i32x_info, X86cvts2usi,
7426                                   X86cvts2usiRnd, WriteCvtSD2I, "cvtsd2usi", "{l}">,
7427                                   TB, XD, EVEX_CD8<64, CD8VT1>;
7428defm VCVTSD2USI64Z: avx512_cvt_s_int_round<0x79, f64x_info, i64x_info, X86cvts2usi,
7429                                   X86cvts2usiRnd, WriteCvtSD2I, "cvtsd2usi", "{q}">,
7430                                   TB, XD, REX_W, EVEX_CD8<64, CD8VT1>;
7431
7432multiclass avx512_cvt_s<bits<8> opc, string asm, X86VectorVTInfo SrcVT,
7433                        X86VectorVTInfo DstVT, SDNode OpNode,
7434                        X86FoldableSchedWrite sched> {
7435  let Predicates = [HasAVX512], ExeDomain = SrcVT.ExeDomain in {
7436    let isCodeGenOnly = 1 in {
7437    def rr : AVX512<opc, MRMSrcReg, (outs DstVT.RC:$dst), (ins SrcVT.FRC:$src),
7438                !strconcat(asm,"\t{$src, $dst|$dst, $src}"),
7439                [(set DstVT.RC:$dst, (OpNode SrcVT.FRC:$src))]>,
7440                EVEX, VEX_LIG, Sched<[sched]>, SIMD_EXC;
7441    def rm : AVX512<opc, MRMSrcMem, (outs DstVT.RC:$dst), (ins SrcVT.ScalarMemOp:$src),
7442                !strconcat(asm,"\t{$src, $dst|$dst, $src}"),
7443                [(set DstVT.RC:$dst, (OpNode (SrcVT.ScalarLdFrag addr:$src)))]>,
7444                EVEX, VEX_LIG, Sched<[sched.Folded, sched.ReadAfterFold]>, SIMD_EXC;
7445    }
7446  } // Predicates = [HasAVX512]
7447}
7448
7449defm VCVTSS2SIZ: avx512_cvt_s<0x2D, "vcvtss2si", f32x_info, i32x_info,
7450                       lrint, WriteCvtSS2I>, TB, XS, EVEX_CD8<32, CD8VT1>;
7451defm VCVTSS2SI64Z: avx512_cvt_s<0x2D, "vcvtss2si", f32x_info, i64x_info,
7452                       llrint, WriteCvtSS2I>, REX_W, TB, XS, EVEX_CD8<32, CD8VT1>;
7453defm VCVTSD2SIZ: avx512_cvt_s<0x2D, "vcvtsd2si", f64x_info, i32x_info,
7454                       lrint, WriteCvtSD2I>, TB, XD, EVEX_CD8<64, CD8VT1>;
7455defm VCVTSD2SI64Z: avx512_cvt_s<0x2D, "vcvtsd2si", f64x_info, i64x_info,
7456                       llrint, WriteCvtSD2I>, REX_W, TB, XD, EVEX_CD8<64, CD8VT1>;
7457
7458let Predicates = [HasAVX512] in {
7459  def : Pat<(i64 (lrint FR32:$src)), (VCVTSS2SI64Zrr FR32:$src)>;
7460  def : Pat<(i64 (lrint (loadf32 addr:$src))), (VCVTSS2SI64Zrm addr:$src)>;
7461
7462  def : Pat<(i64 (lrint FR64:$src)), (VCVTSD2SI64Zrr FR64:$src)>;
7463  def : Pat<(i64 (lrint (loadf64 addr:$src))), (VCVTSD2SI64Zrm addr:$src)>;
7464}
7465
7466// Patterns used for matching vcvtsi2s{s,d} intrinsic sequences from clang
7467// which produce unnecessary vmovs{s,d} instructions
7468let Predicates = [HasAVX512] in {
7469def : Pat<(v4f32 (X86Movss
7470                   (v4f32 VR128X:$dst),
7471                   (v4f32 (scalar_to_vector (f32 (any_sint_to_fp GR64:$src)))))),
7472          (VCVTSI642SSZrr_Int VR128X:$dst, GR64:$src)>;
7473
7474def : Pat<(v4f32 (X86Movss
7475                   (v4f32 VR128X:$dst),
7476                   (v4f32 (scalar_to_vector (f32 (any_sint_to_fp (loadi64 addr:$src))))))),
7477          (VCVTSI642SSZrm_Int VR128X:$dst, addr:$src)>;
7478
7479def : Pat<(v4f32 (X86Movss
7480                   (v4f32 VR128X:$dst),
7481                   (v4f32 (scalar_to_vector (f32 (any_sint_to_fp GR32:$src)))))),
7482          (VCVTSI2SSZrr_Int VR128X:$dst, GR32:$src)>;
7483
7484def : Pat<(v4f32 (X86Movss
7485                   (v4f32 VR128X:$dst),
7486                   (v4f32 (scalar_to_vector (f32 (any_sint_to_fp (loadi32 addr:$src))))))),
7487          (VCVTSI2SSZrm_Int VR128X:$dst, addr:$src)>;
7488
7489def : Pat<(v2f64 (X86Movsd
7490                   (v2f64 VR128X:$dst),
7491                   (v2f64 (scalar_to_vector (f64 (any_sint_to_fp GR64:$src)))))),
7492          (VCVTSI642SDZrr_Int VR128X:$dst, GR64:$src)>;
7493
7494def : Pat<(v2f64 (X86Movsd
7495                   (v2f64 VR128X:$dst),
7496                   (v2f64 (scalar_to_vector (f64 (any_sint_to_fp (loadi64 addr:$src))))))),
7497          (VCVTSI642SDZrm_Int VR128X:$dst, addr:$src)>;
7498
7499def : Pat<(v2f64 (X86Movsd
7500                   (v2f64 VR128X:$dst),
7501                   (v2f64 (scalar_to_vector (f64 (any_sint_to_fp GR32:$src)))))),
7502          (VCVTSI2SDZrr_Int VR128X:$dst, GR32:$src)>;
7503
7504def : Pat<(v2f64 (X86Movsd
7505                   (v2f64 VR128X:$dst),
7506                   (v2f64 (scalar_to_vector (f64 (any_sint_to_fp (loadi32 addr:$src))))))),
7507          (VCVTSI2SDZrm_Int VR128X:$dst, addr:$src)>;
7508
7509def : Pat<(v4f32 (X86Movss
7510                   (v4f32 VR128X:$dst),
7511                   (v4f32 (scalar_to_vector (f32 (any_uint_to_fp GR64:$src)))))),
7512          (VCVTUSI642SSZrr_Int VR128X:$dst, GR64:$src)>;
7513
7514def : Pat<(v4f32 (X86Movss
7515                   (v4f32 VR128X:$dst),
7516                   (v4f32 (scalar_to_vector (f32 (any_uint_to_fp (loadi64 addr:$src))))))),
7517          (VCVTUSI642SSZrm_Int VR128X:$dst, addr:$src)>;
7518
7519def : Pat<(v4f32 (X86Movss
7520                   (v4f32 VR128X:$dst),
7521                   (v4f32 (scalar_to_vector (f32 (any_uint_to_fp GR32:$src)))))),
7522          (VCVTUSI2SSZrr_Int VR128X:$dst, GR32:$src)>;
7523
7524def : Pat<(v4f32 (X86Movss
7525                   (v4f32 VR128X:$dst),
7526                   (v4f32 (scalar_to_vector (f32 (any_uint_to_fp (loadi32 addr:$src))))))),
7527          (VCVTUSI2SSZrm_Int VR128X:$dst, addr:$src)>;
7528
7529def : Pat<(v2f64 (X86Movsd
7530                   (v2f64 VR128X:$dst),
7531                   (v2f64 (scalar_to_vector (f64 (any_uint_to_fp GR64:$src)))))),
7532          (VCVTUSI642SDZrr_Int VR128X:$dst, GR64:$src)>;
7533
7534def : Pat<(v2f64 (X86Movsd
7535                   (v2f64 VR128X:$dst),
7536                   (v2f64 (scalar_to_vector (f64 (any_uint_to_fp (loadi64 addr:$src))))))),
7537          (VCVTUSI642SDZrm_Int VR128X:$dst, addr:$src)>;
7538
7539def : Pat<(v2f64 (X86Movsd
7540                   (v2f64 VR128X:$dst),
7541                   (v2f64 (scalar_to_vector (f64 (any_uint_to_fp GR32:$src)))))),
7542          (VCVTUSI2SDZrr_Int VR128X:$dst, GR32:$src)>;
7543
7544def : Pat<(v2f64 (X86Movsd
7545                   (v2f64 VR128X:$dst),
7546                   (v2f64 (scalar_to_vector (f64 (any_uint_to_fp (loadi32 addr:$src))))))),
7547          (VCVTUSI2SDZrm_Int VR128X:$dst, addr:$src)>;
7548} // Predicates = [HasAVX512]
7549
7550// Convert float/double to signed/unsigned int 32/64 with truncation
7551multiclass avx512_cvt_s_all<bits<8> opc, string asm, X86VectorVTInfo _SrcRC,
7552                            X86VectorVTInfo _DstRC, SDPatternOperator OpNode,
7553                            SDNode OpNodeInt, SDNode OpNodeSAE,
7554                            X86FoldableSchedWrite sched, string aliasStr,
7555                            Predicate prd = HasAVX512> {
7556let Predicates = [prd], ExeDomain = _SrcRC.ExeDomain in {
7557  let isCodeGenOnly = 1 in {
7558  def rr : AVX512<opc, MRMSrcReg, (outs _DstRC.RC:$dst), (ins _SrcRC.FRC:$src),
7559              !strconcat(asm,"\t{$src, $dst|$dst, $src}"),
7560              [(set _DstRC.RC:$dst, (OpNode _SrcRC.FRC:$src))]>,
7561              EVEX, VEX_LIG, Sched<[sched]>, SIMD_EXC;
7562  def rm : AVX512<opc, MRMSrcMem, (outs _DstRC.RC:$dst), (ins _SrcRC.ScalarMemOp:$src),
7563              !strconcat(asm,"\t{$src, $dst|$dst, $src}"),
7564              [(set _DstRC.RC:$dst, (OpNode (_SrcRC.ScalarLdFrag addr:$src)))]>,
7565              EVEX, VEX_LIG, Sched<[sched.Folded, sched.ReadAfterFold]>, SIMD_EXC;
7566  }
7567
7568  def rr_Int : AVX512<opc, MRMSrcReg, (outs _DstRC.RC:$dst), (ins _SrcRC.RC:$src),
7569            !strconcat(asm,"\t{$src, $dst|$dst, $src}"),
7570           [(set _DstRC.RC:$dst, (OpNodeInt (_SrcRC.VT _SrcRC.RC:$src)))]>,
7571           EVEX, VEX_LIG, Sched<[sched]>, SIMD_EXC;
7572  let Uses = [MXCSR] in
7573  def rrb_Int : AVX512<opc, MRMSrcReg, (outs _DstRC.RC:$dst), (ins _SrcRC.RC:$src),
7574            !strconcat(asm,"\t{{sae}, $src, $dst|$dst, $src, {sae}}"),
7575            [(set _DstRC.RC:$dst, (OpNodeSAE (_SrcRC.VT _SrcRC.RC:$src)))]>,
7576                                  EVEX, VEX_LIG, EVEX_B, Sched<[sched]>;
7577  def rm_Int : AVX512<opc, MRMSrcMem, (outs _DstRC.RC:$dst),
7578              (ins _SrcRC.IntScalarMemOp:$src),
7579              !strconcat(asm,"\t{$src, $dst|$dst, $src}"),
7580              [(set _DstRC.RC:$dst,
7581                (OpNodeInt (_SrcRC.ScalarIntMemFrags addr:$src)))]>,
7582              EVEX, VEX_LIG, Sched<[sched.Folded, sched.ReadAfterFold]>, SIMD_EXC;
7583} // Predicates = [prd]
7584
7585  def : InstAlias<asm # aliasStr # "\t{$src, $dst|$dst, $src}",
7586          (!cast<Instruction>(NAME # "rr_Int") _DstRC.RC:$dst, _SrcRC.RC:$src), 0, "att">;
7587  def : InstAlias<asm # aliasStr # "\t{{sae}, $src, $dst|$dst, $src, {sae}}",
7588          (!cast<Instruction>(NAME # "rrb_Int") _DstRC.RC:$dst, _SrcRC.RC:$src), 0, "att">;
7589  def : InstAlias<asm # aliasStr # "\t{$src, $dst|$dst, $src}",
7590          (!cast<Instruction>(NAME # "rm_Int") _DstRC.RC:$dst,
7591                                          _SrcRC.IntScalarMemOp:$src), 0, "att">;
7592}
7593
7594defm VCVTTSS2SIZ: avx512_cvt_s_all<0x2C, "vcvttss2si", f32x_info, i32x_info,
7595                        any_fp_to_sint, X86cvtts2Int, X86cvtts2IntSAE, WriteCvtSS2I,
7596                        "{l}">, TB, XS, EVEX_CD8<32, CD8VT1>;
7597defm VCVTTSS2SI64Z: avx512_cvt_s_all<0x2C, "vcvttss2si", f32x_info, i64x_info,
7598                        any_fp_to_sint, X86cvtts2Int, X86cvtts2IntSAE, WriteCvtSS2I,
7599                        "{q}">, REX_W, TB, XS, EVEX_CD8<32, CD8VT1>;
7600defm VCVTTSD2SIZ: avx512_cvt_s_all<0x2C, "vcvttsd2si", f64x_info, i32x_info,
7601                        any_fp_to_sint, X86cvtts2Int, X86cvtts2IntSAE, WriteCvtSD2I,
7602                        "{l}">, TB, XD, EVEX_CD8<64, CD8VT1>;
7603defm VCVTTSD2SI64Z: avx512_cvt_s_all<0x2C, "vcvttsd2si", f64x_info, i64x_info,
7604                        any_fp_to_sint, X86cvtts2Int, X86cvtts2IntSAE, WriteCvtSD2I,
7605                        "{q}">, REX_W, TB, XD, EVEX_CD8<64, CD8VT1>;
7606
7607defm VCVTTSS2USIZ: avx512_cvt_s_all<0x78, "vcvttss2usi", f32x_info, i32x_info,
7608                        any_fp_to_uint, X86cvtts2UInt, X86cvtts2UIntSAE, WriteCvtSS2I,
7609                        "{l}">, TB, XS, EVEX_CD8<32, CD8VT1>;
7610defm VCVTTSS2USI64Z: avx512_cvt_s_all<0x78, "vcvttss2usi", f32x_info, i64x_info,
7611                        any_fp_to_uint, X86cvtts2UInt, X86cvtts2UIntSAE, WriteCvtSS2I,
7612                        "{q}">, TB, XS,REX_W, EVEX_CD8<32, CD8VT1>;
7613defm VCVTTSD2USIZ: avx512_cvt_s_all<0x78, "vcvttsd2usi", f64x_info, i32x_info,
7614                        any_fp_to_uint, X86cvtts2UInt, X86cvtts2UIntSAE, WriteCvtSD2I,
7615                        "{l}">, TB, XD, EVEX_CD8<64, CD8VT1>;
7616defm VCVTTSD2USI64Z: avx512_cvt_s_all<0x78, "vcvttsd2usi", f64x_info, i64x_info,
7617                        any_fp_to_uint, X86cvtts2UInt, X86cvtts2UIntSAE, WriteCvtSD2I,
7618                        "{q}">, TB, XD, REX_W, EVEX_CD8<64, CD8VT1>;
7619
7620//===----------------------------------------------------------------------===//
7621// AVX-512  Convert form float to double and back
7622//===----------------------------------------------------------------------===//
7623
7624let Uses = [MXCSR], mayRaiseFPException = 1 in
7625multiclass avx512_cvt_fp_scalar<bits<8> opc, string OpcodeStr, X86VectorVTInfo _,
7626                                X86VectorVTInfo _Src, SDNode OpNode,
7627                                X86FoldableSchedWrite sched> {
7628  defm rr_Int : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
7629                         (ins _.RC:$src1, _Src.RC:$src2), OpcodeStr,
7630                         "$src2, $src1", "$src1, $src2",
7631                         (_.VT (OpNode (_.VT _.RC:$src1),
7632                                       (_Src.VT _Src.RC:$src2)))>,
7633                         EVEX, VVVV, VEX_LIG, Sched<[sched]>;
7634  defm rm_Int : AVX512_maskable_scalar<opc, MRMSrcMem, _, (outs _.RC:$dst),
7635                         (ins _.RC:$src1, _Src.IntScalarMemOp:$src2), OpcodeStr,
7636                         "$src2, $src1", "$src1, $src2",
7637                         (_.VT (OpNode (_.VT _.RC:$src1),
7638                                  (_Src.ScalarIntMemFrags addr:$src2)))>,
7639                         EVEX, VVVV, VEX_LIG,
7640                         Sched<[sched.Folded, sched.ReadAfterFold]>;
7641
7642  let isCodeGenOnly = 1, hasSideEffects = 0 in {
7643    def rr : I<opc, MRMSrcReg, (outs _.FRC:$dst),
7644               (ins _.FRC:$src1, _Src.FRC:$src2),
7645               OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}", []>,
7646               EVEX, VVVV, VEX_LIG, Sched<[sched]>;
7647    let mayLoad = 1 in
7648    def rm : I<opc, MRMSrcMem, (outs _.FRC:$dst),
7649               (ins _.FRC:$src1, _Src.ScalarMemOp:$src2),
7650               OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}", []>,
7651               EVEX, VVVV, VEX_LIG, Sched<[sched.Folded, sched.ReadAfterFold]>;
7652  }
7653}
7654
7655// Scalar Conversion with SAE - suppress all exceptions
7656multiclass avx512_cvt_fp_sae_scalar<bits<8> opc, string OpcodeStr, X86VectorVTInfo _,
7657                                    X86VectorVTInfo _Src, SDNode OpNodeSAE,
7658                                    X86FoldableSchedWrite sched> {
7659  let Uses = [MXCSR] in
7660  defm rrb_Int : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
7661                        (ins _.RC:$src1, _Src.RC:$src2), OpcodeStr,
7662                        "{sae}, $src2, $src1", "$src1, $src2, {sae}",
7663                        (_.VT (OpNodeSAE (_.VT _.RC:$src1),
7664                                         (_Src.VT _Src.RC:$src2)))>,
7665                        EVEX, VVVV, VEX_LIG, EVEX_B, Sched<[sched]>;
7666}
7667
7668// Scalar Conversion with rounding control (RC)
7669multiclass avx512_cvt_fp_rc_scalar<bits<8> opc, string OpcodeStr, X86VectorVTInfo _,
7670                                   X86VectorVTInfo _Src, SDNode OpNodeRnd,
7671                                   X86FoldableSchedWrite sched> {
7672  let Uses = [MXCSR] in
7673  defm rrb_Int : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
7674                        (ins _.RC:$src1, _Src.RC:$src2, AVX512RC:$rc), OpcodeStr,
7675                        "$rc, $src2, $src1", "$src1, $src2, $rc",
7676                        (_.VT (OpNodeRnd (_.VT _.RC:$src1),
7677                                         (_Src.VT _Src.RC:$src2), (i32 timm:$rc)))>,
7678                        EVEX, VVVV, VEX_LIG, Sched<[sched]>,
7679                        EVEX_B, EVEX_RC;
7680}
7681multiclass avx512_cvt_fp_scalar_trunc<bits<8> opc, string OpcodeStr,
7682                                      SDNode OpNode, SDNode OpNodeRnd,
7683                                      X86FoldableSchedWrite sched,
7684                                      X86VectorVTInfo _src, X86VectorVTInfo _dst,
7685                                      Predicate prd = HasAVX512> {
7686  let Predicates = [prd], ExeDomain = SSEPackedSingle in {
7687    defm Z : avx512_cvt_fp_scalar<opc, OpcodeStr, _dst, _src, OpNode, sched>,
7688             avx512_cvt_fp_rc_scalar<opc, OpcodeStr, _dst, _src,
7689                               OpNodeRnd, sched>, EVEX_CD8<_src.EltSize, CD8VT1>;
7690  }
7691}
7692
7693multiclass avx512_cvt_fp_scalar_extend<bits<8> opc, string OpcodeStr,
7694                                       SDNode OpNode, SDNode OpNodeSAE,
7695                                       X86FoldableSchedWrite sched,
7696                                       X86VectorVTInfo _src, X86VectorVTInfo _dst,
7697                                       Predicate prd = HasAVX512> {
7698  let Predicates = [prd], ExeDomain = SSEPackedSingle in {
7699    defm Z : avx512_cvt_fp_scalar<opc, OpcodeStr, _dst, _src, OpNode, sched>,
7700             avx512_cvt_fp_sae_scalar<opc, OpcodeStr, _dst, _src, OpNodeSAE, sched>,
7701             EVEX_CD8<_src.EltSize, CD8VT1>;
7702  }
7703}
7704defm VCVTSD2SS : avx512_cvt_fp_scalar_trunc<0x5A, "vcvtsd2ss", X86frounds,
7705                                         X86froundsRnd, WriteCvtSD2SS, f64x_info,
7706                                         f32x_info>, TB, XD, REX_W;
7707defm VCVTSS2SD : avx512_cvt_fp_scalar_extend<0x5A, "vcvtss2sd", X86fpexts,
7708                                          X86fpextsSAE, WriteCvtSS2SD, f32x_info,
7709                                          f64x_info>, TB, XS;
7710defm VCVTSD2SH : avx512_cvt_fp_scalar_trunc<0x5A, "vcvtsd2sh", X86frounds,
7711                                          X86froundsRnd, WriteCvtSD2SS, f64x_info,
7712                                          f16x_info, HasFP16>, T_MAP5, XD, REX_W;
7713defm VCVTSH2SD : avx512_cvt_fp_scalar_extend<0x5A, "vcvtsh2sd", X86fpexts,
7714                                          X86fpextsSAE, WriteCvtSS2SD, f16x_info,
7715                                          f64x_info, HasFP16>, T_MAP5, XS;
7716defm VCVTSS2SH : avx512_cvt_fp_scalar_trunc<0x1D, "vcvtss2sh", X86frounds,
7717                                          X86froundsRnd, WriteCvtSD2SS, f32x_info,
7718                                          f16x_info, HasFP16>, T_MAP5;
7719defm VCVTSH2SS : avx512_cvt_fp_scalar_extend<0x13, "vcvtsh2ss", X86fpexts,
7720                                          X86fpextsSAE, WriteCvtSS2SD, f16x_info,
7721                                          f32x_info, HasFP16>, T_MAP6;
7722
7723def : Pat<(f64 (any_fpextend FR32X:$src)),
7724          (VCVTSS2SDZrr (f64 (IMPLICIT_DEF)), FR32X:$src)>,
7725          Requires<[HasAVX512]>;
7726def : Pat<(f64 (any_fpextend (loadf32 addr:$src))),
7727          (VCVTSS2SDZrm (f64 (IMPLICIT_DEF)), addr:$src)>,
7728          Requires<[HasAVX512, OptForSize]>;
7729
7730def : Pat<(f32 (any_fpround FR64X:$src)),
7731          (VCVTSD2SSZrr (f32 (IMPLICIT_DEF)), FR64X:$src)>,
7732           Requires<[HasAVX512]>;
7733
7734def : Pat<(f32 (any_fpextend FR16X:$src)),
7735          (VCVTSH2SSZrr (f32 (IMPLICIT_DEF)), FR16X:$src)>,
7736          Requires<[HasFP16]>;
7737def : Pat<(f32 (any_fpextend (loadf16 addr:$src))),
7738          (VCVTSH2SSZrm (f32 (IMPLICIT_DEF)), addr:$src)>,
7739          Requires<[HasFP16, OptForSize]>;
7740
7741def : Pat<(f64 (any_fpextend FR16X:$src)),
7742          (VCVTSH2SDZrr (f64 (IMPLICIT_DEF)), FR16X:$src)>,
7743          Requires<[HasFP16]>;
7744def : Pat<(f64 (any_fpextend (loadf16 addr:$src))),
7745          (VCVTSH2SDZrm (f64 (IMPLICIT_DEF)), addr:$src)>,
7746          Requires<[HasFP16, OptForSize]>;
7747
7748def : Pat<(f16 (any_fpround FR32X:$src)),
7749          (VCVTSS2SHZrr (f16 (IMPLICIT_DEF)), FR32X:$src)>,
7750           Requires<[HasFP16]>;
7751def : Pat<(f16 (any_fpround FR64X:$src)),
7752          (VCVTSD2SHZrr (f16 (IMPLICIT_DEF)), FR64X:$src)>,
7753           Requires<[HasFP16]>;
7754
7755def : Pat<(v4f32 (X86Movss
7756                   (v4f32 VR128X:$dst),
7757                   (v4f32 (scalar_to_vector
7758                     (f32 (any_fpround (f64 (extractelt VR128X:$src, (iPTR 0))))))))),
7759          (VCVTSD2SSZrr_Int VR128X:$dst, VR128X:$src)>,
7760          Requires<[HasAVX512]>;
7761
7762def : Pat<(v2f64 (X86Movsd
7763                   (v2f64 VR128X:$dst),
7764                   (v2f64 (scalar_to_vector
7765                     (f64 (any_fpextend (f32 (extractelt VR128X:$src, (iPTR 0))))))))),
7766          (VCVTSS2SDZrr_Int VR128X:$dst, VR128X:$src)>,
7767          Requires<[HasAVX512]>;
7768
7769//===----------------------------------------------------------------------===//
7770// AVX-512  Vector convert from signed/unsigned integer to float/double
7771//          and from float/double to signed/unsigned integer
7772//===----------------------------------------------------------------------===//
7773
7774multiclass avx512_vcvt_fp<bits<8> opc, string OpcodeStr, X86VectorVTInfo _,
7775                          X86VectorVTInfo _Src, SDPatternOperator OpNode, SDPatternOperator MaskOpNode,
7776                          X86FoldableSchedWrite sched,
7777                          string Broadcast = _.BroadcastStr,
7778                          string Alias = "", X86MemOperand MemOp = _Src.MemOp,
7779                          RegisterClass MaskRC = _.KRCWM,
7780                          dag LdDAG = (_.VT (OpNode (_Src.VT (_Src.LdFrag addr:$src)))),
7781                          dag MaskLdDAG = (_.VT (MaskOpNode (_Src.VT (_Src.LdFrag addr:$src))))> {
7782let Uses = [MXCSR], mayRaiseFPException = 1 in {
7783  defm rr : AVX512_maskable_cvt<opc, MRMSrcReg, _, (outs _.RC:$dst),
7784                         (ins _Src.RC:$src),
7785                         (ins _.RC:$src0, MaskRC:$mask, _Src.RC:$src),
7786                         (ins MaskRC:$mask, _Src.RC:$src),
7787                          OpcodeStr, "$src", "$src",
7788                         (_.VT (OpNode (_Src.VT _Src.RC:$src))),
7789                         (vselect_mask MaskRC:$mask,
7790                                       (_.VT (MaskOpNode (_Src.VT _Src.RC:$src))),
7791                                       _.RC:$src0),
7792                         (vselect_mask MaskRC:$mask,
7793                                       (_.VT (MaskOpNode (_Src.VT _Src.RC:$src))),
7794                                       _.ImmAllZerosV)>,
7795                         EVEX, Sched<[sched]>;
7796
7797  defm rm : AVX512_maskable_cvt<opc, MRMSrcMem, _, (outs _.RC:$dst),
7798                         (ins MemOp:$src),
7799                         (ins _.RC:$src0, MaskRC:$mask, MemOp:$src),
7800                         (ins MaskRC:$mask, MemOp:$src),
7801                         OpcodeStr#Alias, "$src", "$src",
7802                         LdDAG,
7803                         (vselect_mask MaskRC:$mask, MaskLdDAG, _.RC:$src0),
7804                         (vselect_mask MaskRC:$mask, MaskLdDAG, _.ImmAllZerosV)>,
7805                         EVEX, Sched<[sched.Folded]>;
7806
7807  defm rmb : AVX512_maskable_cvt<opc, MRMSrcMem, _, (outs _.RC:$dst),
7808                         (ins _Src.ScalarMemOp:$src),
7809                         (ins _.RC:$src0, MaskRC:$mask, _Src.ScalarMemOp:$src),
7810                         (ins MaskRC:$mask, _Src.ScalarMemOp:$src),
7811                         OpcodeStr,
7812                         "${src}"#Broadcast, "${src}"#Broadcast,
7813                         (_.VT (OpNode (_Src.VT
7814                                  (_Src.BroadcastLdFrag addr:$src))
7815                            )),
7816                         (vselect_mask MaskRC:$mask,
7817                                       (_.VT
7818                                        (MaskOpNode
7819                                         (_Src.VT
7820                                          (_Src.BroadcastLdFrag addr:$src)))),
7821                                       _.RC:$src0),
7822                         (vselect_mask MaskRC:$mask,
7823                                       (_.VT
7824                                        (MaskOpNode
7825                                         (_Src.VT
7826                                          (_Src.BroadcastLdFrag addr:$src)))),
7827                                       _.ImmAllZerosV)>,
7828                         EVEX, EVEX_B, Sched<[sched.Folded]>;
7829  }
7830}
7831// Conversion with SAE - suppress all exceptions
7832multiclass avx512_vcvt_fp_sae<bits<8> opc, string OpcodeStr, X86VectorVTInfo _,
7833                              X86VectorVTInfo _Src, SDNode OpNodeSAE,
7834                              X86FoldableSchedWrite sched> {
7835  let Uses = [MXCSR] in
7836  defm rrb : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
7837                        (ins _Src.RC:$src), OpcodeStr,
7838                        "{sae}, $src", "$src, {sae}",
7839                        (_.VT (OpNodeSAE (_Src.VT _Src.RC:$src)))>,
7840                        EVEX, EVEX_B, Sched<[sched]>;
7841}
7842
7843// Conversion with rounding control (RC)
7844multiclass avx512_vcvt_fp_rc<bits<8> opc, string OpcodeStr, X86VectorVTInfo _,
7845                         X86VectorVTInfo _Src, SDPatternOperator OpNodeRnd,
7846                         X86FoldableSchedWrite sched> {
7847  let Uses = [MXCSR] in
7848  defm rrb : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
7849                        (ins _Src.RC:$src, AVX512RC:$rc), OpcodeStr,
7850                        "$rc, $src", "$src, $rc",
7851                        (_.VT (OpNodeRnd (_Src.VT _Src.RC:$src), (i32 timm:$rc)))>,
7852                        EVEX, EVEX_B, EVEX_RC, Sched<[sched]>;
7853}
7854
7855// Similar to avx512_vcvt_fp, but uses an extload for the memory form.
7856multiclass avx512_vcvt_fpextend<bits<8> opc, string OpcodeStr, X86VectorVTInfo _,
7857                                X86VectorVTInfo _Src, SDPatternOperator OpNode,
7858                                SDNode MaskOpNode,
7859                                X86FoldableSchedWrite sched,
7860                                string Broadcast = _.BroadcastStr,
7861                                string Alias = "", X86MemOperand MemOp = _Src.MemOp,
7862                                RegisterClass MaskRC = _.KRCWM>
7863  : avx512_vcvt_fp<opc, OpcodeStr, _, _Src, OpNode, MaskOpNode, sched, Broadcast,
7864                   Alias, MemOp, MaskRC,
7865                   (_.VT (!cast<PatFrag>("extload"#_Src.VTName) addr:$src)),
7866                   (_.VT (!cast<PatFrag>("extload"#_Src.VTName) addr:$src))>;
7867
7868// Extend [Float to Double, Half to Float]
7869multiclass avx512_cvt_extend<bits<8> opc, string OpcodeStr,
7870                             AVX512VLVectorVTInfo _dst, AVX512VLVectorVTInfo _src,
7871                             X86SchedWriteWidths sched, Predicate prd = HasAVX512> {
7872  let Predicates = [prd] in {
7873    defm Z : avx512_vcvt_fpextend<opc, OpcodeStr,  _dst.info512, _src.info256,
7874                            any_fpextend, fpextend, sched.ZMM>,
7875             avx512_vcvt_fp_sae<opc, OpcodeStr, _dst.info512, _src.info256,
7876                                X86vfpextSAE, sched.ZMM>, EVEX_V512;
7877  }
7878  let Predicates = [prd, HasVLX] in {
7879    defm Z128 : avx512_vcvt_fpextend<opc, OpcodeStr, _dst.info128, _src.info128,
7880                               X86any_vfpext, X86vfpext, sched.XMM,
7881                               _dst.info128.BroadcastStr,
7882                               "", f64mem>, EVEX_V128;
7883    defm Z256 : avx512_vcvt_fpextend<opc, OpcodeStr, _dst.info256, _src.info128,
7884                               any_fpextend, fpextend, sched.YMM>, EVEX_V256;
7885  }
7886}
7887
7888// Truncate [Double to Float, Float to Half]
7889multiclass avx512_cvt_trunc<bits<8> opc, string OpcodeStr,
7890                            AVX512VLVectorVTInfo _dst, AVX512VLVectorVTInfo _src,
7891                            X86SchedWriteWidths sched, Predicate prd = HasAVX512,
7892                            PatFrag bcast128 = _src.info128.BroadcastLdFrag,
7893                            PatFrag loadVT128 = _src.info128.LdFrag,
7894                            RegisterClass maskRC128 = _src.info128.KRCWM> {
7895  let Predicates = [prd] in {
7896    defm Z : avx512_vcvt_fp<opc, OpcodeStr, _dst.info256, _src.info512,
7897                            X86any_vfpround, X86vfpround, sched.ZMM>,
7898             avx512_vcvt_fp_rc<opc, OpcodeStr, _dst.info256, _src.info512,
7899                               X86vfproundRnd, sched.ZMM>, EVEX_V512;
7900  }
7901  let Predicates = [prd, HasVLX] in {
7902    defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, _dst.info128, _src.info128,
7903                               null_frag, null_frag, sched.XMM,
7904                               _src.info128.BroadcastStr, "{x}",
7905                               f128mem, maskRC128>, EVEX_V128;
7906    defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, _dst.info128, _src.info256,
7907                               X86any_vfpround, X86vfpround,
7908                               sched.YMM, _src.info256.BroadcastStr, "{y}">, EVEX_V256;
7909
7910    // Special patterns to allow use of X86vmfpround for masking. Instruction
7911    // patterns have been disabled with null_frag.
7912    def : Pat<(_dst.info128.VT (X86any_vfpround (_src.info128.VT VR128X:$src))),
7913              (!cast<Instruction>(NAME # "Z128rr") VR128X:$src)>;
7914    def : Pat<(X86vmfpround (_src.info128.VT VR128X:$src), (_dst.info128.VT VR128X:$src0),
7915                            maskRC128:$mask),
7916              (!cast<Instruction>(NAME # "Z128rrk") VR128X:$src0, maskRC128:$mask, VR128X:$src)>;
7917    def : Pat<(X86vmfpround (_src.info128.VT VR128X:$src), _dst.info128.ImmAllZerosV,
7918                            maskRC128:$mask),
7919              (!cast<Instruction>(NAME # "Z128rrkz") maskRC128:$mask, VR128X:$src)>;
7920
7921    def : Pat<(_dst.info128.VT (X86any_vfpround (loadVT128 addr:$src))),
7922              (!cast<Instruction>(NAME # "Z128rm") addr:$src)>;
7923    def : Pat<(X86vmfpround (loadVT128 addr:$src), (_dst.info128.VT VR128X:$src0),
7924                            maskRC128:$mask),
7925              (!cast<Instruction>(NAME # "Z128rmk") VR128X:$src0, maskRC128:$mask, addr:$src)>;
7926    def : Pat<(X86vmfpround (loadVT128 addr:$src), _dst.info128.ImmAllZerosV,
7927                            maskRC128:$mask),
7928              (!cast<Instruction>(NAME # "Z128rmkz") maskRC128:$mask, addr:$src)>;
7929
7930    def : Pat<(_dst.info128.VT (X86any_vfpround (_src.info128.VT (bcast128 addr:$src)))),
7931              (!cast<Instruction>(NAME # "Z128rmb") addr:$src)>;
7932    def : Pat<(X86vmfpround (_src.info128.VT (bcast128 addr:$src)),
7933                            (_dst.info128.VT VR128X:$src0), maskRC128:$mask),
7934              (!cast<Instruction>(NAME # "Z128rmbk") VR128X:$src0, maskRC128:$mask, addr:$src)>;
7935    def : Pat<(X86vmfpround (_src.info128.VT (bcast128 addr:$src)),
7936                            _dst.info128.ImmAllZerosV, maskRC128:$mask),
7937              (!cast<Instruction>(NAME # "Z128rmbkz") maskRC128:$mask, addr:$src)>;
7938  }
7939
7940  def : InstAlias<OpcodeStr#"x\t{$src, $dst|$dst, $src}",
7941                  (!cast<Instruction>(NAME # "Z128rr") VR128X:$dst, VR128X:$src), 0, "att">;
7942  def : InstAlias<OpcodeStr#"x\t{$src, $dst {${mask}}|$dst {${mask}}, $src}",
7943                  (!cast<Instruction>(NAME # "Z128rrk") VR128X:$dst,
7944                  VK2WM:$mask, VR128X:$src), 0, "att">;
7945  def : InstAlias<OpcodeStr#"x\t{$src, $dst {${mask}} {z}|"
7946                  "$dst {${mask}} {z}, $src}",
7947                  (!cast<Instruction>(NAME # "Z128rrkz") VR128X:$dst,
7948                  VK2WM:$mask, VR128X:$src), 0, "att">;
7949  def : InstAlias<OpcodeStr#"x\t{${src}{1to2}, $dst|$dst, ${src}{1to2}}",
7950                  (!cast<Instruction>(NAME # "Z128rmb") VR128X:$dst, f64mem:$src), 0, "att">;
7951  def : InstAlias<OpcodeStr#"x\t{${src}{1to2}, $dst {${mask}}|"
7952                  "$dst {${mask}}, ${src}{1to2}}",
7953                  (!cast<Instruction>(NAME # "Z128rmbk") VR128X:$dst,
7954                  VK2WM:$mask, f64mem:$src), 0, "att">;
7955  def : InstAlias<OpcodeStr#"x\t{${src}{1to2}, $dst {${mask}} {z}|"
7956                  "$dst {${mask}} {z}, ${src}{1to2}}",
7957                  (!cast<Instruction>(NAME # "Z128rmbkz") VR128X:$dst,
7958                  VK2WM:$mask, f64mem:$src), 0, "att">;
7959
7960  def : InstAlias<OpcodeStr#"y\t{$src, $dst|$dst, $src}",
7961                  (!cast<Instruction>(NAME # "Z256rr") VR128X:$dst, VR256X:$src), 0, "att">;
7962  def : InstAlias<OpcodeStr#"y\t{$src, $dst {${mask}}|$dst {${mask}}, $src}",
7963                  (!cast<Instruction>(NAME # "Z256rrk") VR128X:$dst,
7964                  VK4WM:$mask, VR256X:$src), 0, "att">;
7965  def : InstAlias<OpcodeStr#"y\t{$src, $dst {${mask}} {z}|"
7966                  "$dst {${mask}} {z}, $src}",
7967                  (!cast<Instruction>(NAME # "Z256rrkz") VR128X:$dst,
7968                  VK4WM:$mask, VR256X:$src), 0, "att">;
7969  def : InstAlias<OpcodeStr#"y\t{${src}{1to4}, $dst|$dst, ${src}{1to4}}",
7970                  (!cast<Instruction>(NAME # "Z256rmb") VR128X:$dst, f64mem:$src), 0, "att">;
7971  def : InstAlias<OpcodeStr#"y\t{${src}{1to4}, $dst {${mask}}|"
7972                  "$dst {${mask}}, ${src}{1to4}}",
7973                  (!cast<Instruction>(NAME # "Z256rmbk") VR128X:$dst,
7974                  VK4WM:$mask, f64mem:$src), 0, "att">;
7975  def : InstAlias<OpcodeStr#"y\t{${src}{1to4}, $dst {${mask}} {z}|"
7976                  "$dst {${mask}} {z}, ${src}{1to4}}",
7977                  (!cast<Instruction>(NAME # "Z256rmbkz") VR128X:$dst,
7978                  VK4WM:$mask, f64mem:$src), 0, "att">;
7979}
7980
7981defm VCVTPD2PS : avx512_cvt_trunc<0x5A, "vcvtpd2ps",
7982                                  avx512vl_f32_info, avx512vl_f64_info, SchedWriteCvtPD2PS>,
7983                                  REX_W, TB, PD, EVEX_CD8<64, CD8VF>;
7984defm VCVTPS2PD : avx512_cvt_extend<0x5A, "vcvtps2pd",
7985                                   avx512vl_f64_info, avx512vl_f32_info, SchedWriteCvtPS2PD>,
7986                                   TB, EVEX_CD8<32, CD8VH>;
7987
7988// Extend Half to Double
7989multiclass avx512_cvtph2pd<bits<8> opc, string OpcodeStr,
7990                            X86SchedWriteWidths sched> {
7991  let Predicates = [HasFP16] in {
7992    defm Z : avx512_vcvt_fpextend<opc, OpcodeStr, v8f64_info, v8f16x_info,
7993                                  any_fpextend, fpextend, sched.ZMM>,
7994             avx512_vcvt_fp_sae<opc, OpcodeStr, v8f64_info, v8f16x_info,
7995                                X86vfpextSAE, sched.ZMM>, EVEX_V512;
7996    def : Pat<(v8f64 (extloadv8f16 addr:$src)),
7997                (!cast<Instruction>(NAME # "Zrm") addr:$src)>;
7998  }
7999  let Predicates = [HasFP16, HasVLX] in {
8000    defm Z128 : avx512_vcvt_fpextend<opc, OpcodeStr, v2f64x_info, v8f16x_info,
8001                                     X86any_vfpext, X86vfpext, sched.XMM, "{1to2}", "",
8002                                     f32mem>, EVEX_V128;
8003    defm Z256 : avx512_vcvt_fpextend<opc, OpcodeStr, v4f64x_info, v8f16x_info,
8004                                     X86any_vfpext, X86vfpext, sched.YMM, "{1to4}", "",
8005                                     f64mem>, EVEX_V256;
8006  }
8007}
8008
8009// Truncate Double to Half
8010multiclass avx512_cvtpd2ph<bits<8> opc, string OpcodeStr, X86SchedWriteWidths sched> {
8011  let Predicates = [HasFP16] in {
8012    defm Z : avx512_vcvt_fp<opc, OpcodeStr, v8f16x_info, v8f64_info,
8013                            X86any_vfpround, X86vfpround, sched.ZMM, "{1to8}", "{z}">,
8014             avx512_vcvt_fp_rc<opc, OpcodeStr, v8f16x_info, v8f64_info,
8015                               X86vfproundRnd, sched.ZMM>, EVEX_V512;
8016  }
8017  let Predicates = [HasFP16, HasVLX] in {
8018    defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v8f16x_info, v2f64x_info, null_frag,
8019                               null_frag, sched.XMM, "{1to2}", "{x}", f128mem,
8020                               VK2WM>, EVEX_V128;
8021    defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v8f16x_info, v4f64x_info, null_frag,
8022                               null_frag, sched.YMM, "{1to4}", "{y}", f256mem,
8023                               VK4WM>, EVEX_V256;
8024  }
8025  def : InstAlias<OpcodeStr#"x\t{$src, $dst|$dst, $src}",
8026                  (!cast<Instruction>(NAME # "Z128rr") VR128X:$dst,
8027                  VR128X:$src), 0, "att">;
8028  def : InstAlias<OpcodeStr#"x\t{$src, $dst {${mask}}|$dst {${mask}}, $src}",
8029                  (!cast<Instruction>(NAME # "Z128rrk") VR128X:$dst,
8030                  VK2WM:$mask, VR128X:$src), 0, "att">;
8031  def : InstAlias<OpcodeStr#"x\t{$src, $dst {${mask}} {z}|$dst {${mask}} {z}, $src}",
8032                  (!cast<Instruction>(NAME # "Z128rrkz") VR128X:$dst,
8033                  VK2WM:$mask, VR128X:$src), 0, "att">;
8034  def : InstAlias<OpcodeStr#"x\t{${src}{1to2}, $dst|$dst, ${src}{1to2}}",
8035                  (!cast<Instruction>(NAME # "Z128rmb") VR128X:$dst,
8036                  i64mem:$src), 0, "att">;
8037  def : InstAlias<OpcodeStr#"x\t{${src}{1to2}, $dst {${mask}}|"
8038                  "$dst {${mask}}, ${src}{1to2}}",
8039                  (!cast<Instruction>(NAME # "Z128rmbk") VR128X:$dst,
8040                  VK2WM:$mask, i64mem:$src), 0, "att">;
8041  def : InstAlias<OpcodeStr#"x\t{${src}{1to2}, $dst {${mask}} {z}|"
8042                  "$dst {${mask}} {z}, ${src}{1to2}}",
8043                  (!cast<Instruction>(NAME # "Z128rmbkz") VR128X:$dst,
8044                  VK2WM:$mask, i64mem:$src), 0, "att">;
8045
8046  def : InstAlias<OpcodeStr#"y\t{$src, $dst|$dst, $src}",
8047                  (!cast<Instruction>(NAME # "Z256rr") VR128X:$dst,
8048                  VR256X:$src), 0, "att">;
8049  def : InstAlias<OpcodeStr#"y\t{$src, $dst {${mask}}|"
8050                  "$dst {${mask}}, $src}",
8051                  (!cast<Instruction>(NAME # "Z256rrk") VR128X:$dst,
8052                  VK4WM:$mask, VR256X:$src), 0, "att">;
8053  def : InstAlias<OpcodeStr#"y\t{$src, $dst {${mask}} {z}|"
8054                  "$dst {${mask}} {z}, $src}",
8055                  (!cast<Instruction>(NAME # "Z256rrkz") VR128X:$dst,
8056                  VK4WM:$mask, VR256X:$src), 0, "att">;
8057  def : InstAlias<OpcodeStr#"y\t{${src}{1to4}, $dst|$dst, ${src}{1to4}}",
8058                  (!cast<Instruction>(NAME # "Z256rmb") VR128X:$dst,
8059                  i64mem:$src), 0, "att">;
8060  def : InstAlias<OpcodeStr#"y\t{${src}{1to4}, $dst {${mask}}|"
8061                  "$dst {${mask}}, ${src}{1to4}}",
8062                  (!cast<Instruction>(NAME # "Z256rmbk") VR128X:$dst,
8063                  VK4WM:$mask, i64mem:$src), 0, "att">;
8064  def : InstAlias<OpcodeStr#"y\t{${src}{1to4}, $dst {${mask}} {z}|"
8065                  "$dst {${mask}} {z}, ${src}{1to4}}",
8066                  (!cast<Instruction>(NAME # "Z256rmbkz") VR128X:$dst,
8067                  VK4WM:$mask, i64mem:$src), 0, "att">;
8068
8069  def : InstAlias<OpcodeStr#"z\t{$src, $dst|$dst, $src}",
8070                  (!cast<Instruction>(NAME # "Zrr") VR128X:$dst,
8071                  VR512:$src), 0, "att">;
8072  def : InstAlias<OpcodeStr#"z\t{$src, $dst {${mask}}|"
8073                  "$dst {${mask}}, $src}",
8074                  (!cast<Instruction>(NAME # "Zrrk") VR128X:$dst,
8075                  VK8WM:$mask, VR512:$src), 0, "att">;
8076  def : InstAlias<OpcodeStr#"z\t{$src, $dst {${mask}} {z}|"
8077                  "$dst {${mask}} {z}, $src}",
8078                  (!cast<Instruction>(NAME # "Zrrkz") VR128X:$dst,
8079                  VK8WM:$mask, VR512:$src), 0, "att">;
8080  def : InstAlias<OpcodeStr#"z\t{${src}{1to8}, $dst|$dst, ${src}{1to8}}",
8081                  (!cast<Instruction>(NAME # "Zrmb") VR128X:$dst,
8082                  i64mem:$src), 0, "att">;
8083  def : InstAlias<OpcodeStr#"z\t{${src}{1to8}, $dst {${mask}}|"
8084                  "$dst {${mask}}, ${src}{1to8}}",
8085                  (!cast<Instruction>(NAME # "Zrmbk") VR128X:$dst,
8086                  VK8WM:$mask, i64mem:$src), 0, "att">;
8087  def : InstAlias<OpcodeStr#"z\t{${src}{1to8}, $dst {${mask}} {z}|"
8088                  "$dst {${mask}} {z}, ${src}{1to8}}",
8089                  (!cast<Instruction>(NAME # "Zrmbkz") VR128X:$dst,
8090                  VK8WM:$mask, i64mem:$src), 0, "att">;
8091}
8092
8093defm VCVTPS2PHX : avx512_cvt_trunc<0x1D, "vcvtps2phx", avx512vl_f16_info,
8094                                   avx512vl_f32_info, SchedWriteCvtPD2PS,
8095                                   HasFP16>, T_MAP5, PD, EVEX_CD8<32, CD8VF>;
8096defm VCVTPH2PSX : avx512_cvt_extend<0x13, "vcvtph2psx", avx512vl_f32_info,
8097                                    avx512vl_f16_info, SchedWriteCvtPS2PD,
8098                                    HasFP16>, T_MAP6, PD, EVEX_CD8<16, CD8VH>;
8099defm VCVTPD2PH : avx512_cvtpd2ph<0x5A, "vcvtpd2ph", SchedWriteCvtPD2PS>,
8100                                 REX_W, T_MAP5, PD, EVEX_CD8<64, CD8VF>;
8101defm VCVTPH2PD : avx512_cvtph2pd<0x5A, "vcvtph2pd", SchedWriteCvtPS2PD>,
8102                                 T_MAP5, EVEX_CD8<16, CD8VQ>;
8103
8104let Predicates = [HasFP16, HasVLX] in {
8105  // Special patterns to allow use of X86vmfpround for masking. Instruction
8106  // patterns have been disabled with null_frag.
8107  def : Pat<(v8f16 (X86any_vfpround (v4f64 VR256X:$src))),
8108            (VCVTPD2PHZ256rr VR256X:$src)>;
8109  def : Pat<(v8f16 (X86vmfpround (v4f64 VR256X:$src), (v8f16 VR128X:$src0),
8110                          VK4WM:$mask)),
8111            (VCVTPD2PHZ256rrk VR128X:$src0, VK4WM:$mask, VR256X:$src)>;
8112  def : Pat<(X86vmfpround (v4f64 VR256X:$src), v8f16x_info.ImmAllZerosV,
8113                          VK4WM:$mask),
8114            (VCVTPD2PHZ256rrkz VK4WM:$mask, VR256X:$src)>;
8115
8116  def : Pat<(v8f16 (X86any_vfpround (loadv4f64 addr:$src))),
8117            (VCVTPD2PHZ256rm addr:$src)>;
8118  def : Pat<(X86vmfpround (loadv4f64 addr:$src), (v8f16 VR128X:$src0),
8119                          VK4WM:$mask),
8120            (VCVTPD2PHZ256rmk VR128X:$src0, VK4WM:$mask, addr:$src)>;
8121  def : Pat<(X86vmfpround (loadv4f64 addr:$src), v8f16x_info.ImmAllZerosV,
8122                          VK4WM:$mask),
8123            (VCVTPD2PHZ256rmkz VK4WM:$mask, addr:$src)>;
8124
8125  def : Pat<(v8f16 (X86any_vfpround (v4f64 (X86VBroadcastld64 addr:$src)))),
8126            (VCVTPD2PHZ256rmb addr:$src)>;
8127  def : Pat<(X86vmfpround (v4f64 (X86VBroadcastld64 addr:$src)),
8128                          (v8f16 VR128X:$src0), VK4WM:$mask),
8129            (VCVTPD2PHZ256rmbk VR128X:$src0, VK4WM:$mask, addr:$src)>;
8130  def : Pat<(X86vmfpround (v4f64 (X86VBroadcastld64 addr:$src)),
8131                          v8f16x_info.ImmAllZerosV, VK4WM:$mask),
8132            (VCVTPD2PHZ256rmbkz VK4WM:$mask, addr:$src)>;
8133
8134  def : Pat<(v8f16 (X86any_vfpround (v2f64 VR128X:$src))),
8135            (VCVTPD2PHZ128rr VR128X:$src)>;
8136  def : Pat<(X86vmfpround (v2f64 VR128X:$src), (v8f16 VR128X:$src0),
8137                          VK2WM:$mask),
8138            (VCVTPD2PHZ128rrk VR128X:$src0, VK2WM:$mask, VR128X:$src)>;
8139  def : Pat<(X86vmfpround (v2f64 VR128X:$src), v8f16x_info.ImmAllZerosV,
8140                          VK2WM:$mask),
8141            (VCVTPD2PHZ128rrkz VK2WM:$mask, VR128X:$src)>;
8142
8143  def : Pat<(v8f16 (X86any_vfpround (loadv2f64 addr:$src))),
8144            (VCVTPD2PHZ128rm addr:$src)>;
8145  def : Pat<(X86vmfpround (loadv2f64 addr:$src), (v8f16 VR128X:$src0),
8146                          VK2WM:$mask),
8147            (VCVTPD2PHZ128rmk VR128X:$src0, VK2WM:$mask, addr:$src)>;
8148  def : Pat<(X86vmfpround (loadv2f64 addr:$src), v8f16x_info.ImmAllZerosV,
8149                          VK2WM:$mask),
8150            (VCVTPD2PHZ128rmkz VK2WM:$mask, addr:$src)>;
8151
8152  def : Pat<(v8f16 (X86any_vfpround (v2f64 (X86VBroadcastld64 addr:$src)))),
8153            (VCVTPD2PHZ128rmb addr:$src)>;
8154  def : Pat<(X86vmfpround (v2f64 (X86VBroadcastld64 addr:$src)),
8155                          (v8f16 VR128X:$src0), VK2WM:$mask),
8156            (VCVTPD2PHZ128rmbk VR128X:$src0, VK2WM:$mask, addr:$src)>;
8157  def : Pat<(X86vmfpround (v2f64 (X86VBroadcastld64 addr:$src)),
8158                          v8f16x_info.ImmAllZerosV, VK2WM:$mask),
8159            (VCVTPD2PHZ128rmbkz VK2WM:$mask, addr:$src)>;
8160}
8161
8162// Convert Signed/Unsigned Doubleword to Double
8163let Uses = []<Register>, mayRaiseFPException = 0 in
8164multiclass avx512_cvtdq2pd<bits<8> opc, string OpcodeStr, SDPatternOperator OpNode,
8165                           SDNode MaskOpNode, SDPatternOperator OpNode128,
8166                           SDNode MaskOpNode128,
8167                           X86SchedWriteWidths sched> {
8168  // No rounding in this op
8169  let Predicates = [HasAVX512] in
8170    defm Z : avx512_vcvt_fp<opc, OpcodeStr, v8f64_info, v8i32x_info, OpNode,
8171                            MaskOpNode, sched.ZMM>, EVEX_V512;
8172
8173  let Predicates = [HasVLX] in {
8174    defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v2f64x_info, v4i32x_info,
8175                               OpNode128, MaskOpNode128, sched.XMM, "{1to2}",
8176                               "", i64mem, VK2WM,
8177                               (v2f64 (OpNode128 (bc_v4i32
8178                                (v2i64
8179                                 (scalar_to_vector (loadi64 addr:$src)))))),
8180                               (v2f64 (MaskOpNode128 (bc_v4i32
8181                                (v2i64
8182                                 (scalar_to_vector (loadi64 addr:$src))))))>,
8183                               EVEX_V128;
8184    defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v4f64x_info, v4i32x_info, OpNode,
8185                               MaskOpNode, sched.YMM>, EVEX_V256;
8186  }
8187}
8188
8189// Convert Signed/Unsigned Doubleword to Float
8190multiclass avx512_cvtdq2ps<bits<8> opc, string OpcodeStr, SDPatternOperator OpNode,
8191                           SDNode MaskOpNode, SDNode OpNodeRnd,
8192                           X86SchedWriteWidths sched> {
8193  let Predicates = [HasAVX512] in
8194    defm Z : avx512_vcvt_fp<opc, OpcodeStr, v16f32_info, v16i32_info, OpNode,
8195                            MaskOpNode, sched.ZMM>,
8196             avx512_vcvt_fp_rc<opc, OpcodeStr, v16f32_info, v16i32_info,
8197                               OpNodeRnd, sched.ZMM>, EVEX_V512;
8198
8199  let Predicates = [HasVLX] in {
8200    defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v4f32x_info, v4i32x_info, OpNode,
8201                               MaskOpNode, sched.XMM>, EVEX_V128;
8202    defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v8f32x_info, v8i32x_info, OpNode,
8203                               MaskOpNode, sched.YMM>, EVEX_V256;
8204  }
8205}
8206
8207// Convert Float to Signed/Unsigned Doubleword with truncation
8208multiclass avx512_cvttps2dq<bits<8> opc, string OpcodeStr, SDPatternOperator OpNode,
8209                            SDNode MaskOpNode,
8210                            SDNode OpNodeSAE, X86SchedWriteWidths sched> {
8211  let Predicates = [HasAVX512] in {
8212    defm Z : avx512_vcvt_fp<opc, OpcodeStr, v16i32_info, v16f32_info, OpNode,
8213                            MaskOpNode, sched.ZMM>,
8214             avx512_vcvt_fp_sae<opc, OpcodeStr, v16i32_info, v16f32_info,
8215                                OpNodeSAE, sched.ZMM>, EVEX_V512;
8216  }
8217  let Predicates = [HasVLX] in {
8218    defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v4i32x_info, v4f32x_info, OpNode,
8219                               MaskOpNode, sched.XMM>, EVEX_V128;
8220    defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v8i32x_info, v8f32x_info, OpNode,
8221                               MaskOpNode, sched.YMM>, EVEX_V256;
8222  }
8223}
8224
8225// Convert Float to Signed/Unsigned Doubleword
8226multiclass avx512_cvtps2dq<bits<8> opc, string OpcodeStr, SDNode OpNode,
8227                           SDNode MaskOpNode, SDNode OpNodeRnd,
8228                           X86SchedWriteWidths sched> {
8229  let Predicates = [HasAVX512] in {
8230    defm Z : avx512_vcvt_fp<opc, OpcodeStr, v16i32_info, v16f32_info, OpNode,
8231                            MaskOpNode, sched.ZMM>,
8232             avx512_vcvt_fp_rc<opc, OpcodeStr, v16i32_info, v16f32_info,
8233                                OpNodeRnd, sched.ZMM>, EVEX_V512;
8234  }
8235  let Predicates = [HasVLX] in {
8236    defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v4i32x_info, v4f32x_info, OpNode,
8237                               MaskOpNode, sched.XMM>, EVEX_V128;
8238    defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v8i32x_info, v8f32x_info, OpNode,
8239                               MaskOpNode, sched.YMM>, EVEX_V256;
8240  }
8241}
8242
8243// Convert Double to Signed/Unsigned Doubleword with truncation
8244multiclass avx512_cvttpd2dq<bits<8> opc, string OpcodeStr, SDPatternOperator OpNode,
8245                            SDNode MaskOpNode, SDNode OpNodeSAE,
8246                            X86SchedWriteWidths sched> {
8247  let Predicates = [HasAVX512] in {
8248    defm Z : avx512_vcvt_fp<opc, OpcodeStr, v8i32x_info, v8f64_info, OpNode,
8249                            MaskOpNode, sched.ZMM>,
8250             avx512_vcvt_fp_sae<opc, OpcodeStr, v8i32x_info, v8f64_info,
8251                                OpNodeSAE, sched.ZMM>, EVEX_V512;
8252  }
8253  let Predicates = [HasVLX] in {
8254    // we need "x"/"y" suffixes in order to distinguish between 128 and 256
8255    // memory forms of these instructions in Asm Parser. They have the same
8256    // dest type - 'v4i32x_info'. We also specify the broadcast string explicitly
8257    // due to the same reason.
8258    defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v4i32x_info, v2f64x_info,
8259                               null_frag, null_frag, sched.XMM, "{1to2}", "{x}", f128mem,
8260                               VK2WM>, EVEX_V128;
8261    defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v4i32x_info, v4f64x_info, OpNode,
8262                               MaskOpNode, sched.YMM, "{1to4}", "{y}">, EVEX_V256;
8263  }
8264
8265  def : InstAlias<OpcodeStr#"x\t{$src, $dst|$dst, $src}",
8266                  (!cast<Instruction>(NAME # "Z128rr") VR128X:$dst,
8267                  VR128X:$src), 0, "att">;
8268  def : InstAlias<OpcodeStr#"x\t{$src, $dst {${mask}}|$dst {${mask}}, $src}",
8269                  (!cast<Instruction>(NAME # "Z128rrk") VR128X:$dst,
8270                  VK2WM:$mask, VR128X:$src), 0, "att">;
8271  def : InstAlias<OpcodeStr#"x\t{$src, $dst {${mask}} {z}|$dst {${mask}} {z}, $src}",
8272                  (!cast<Instruction>(NAME # "Z128rrkz") VR128X:$dst,
8273                  VK2WM:$mask, VR128X:$src), 0, "att">;
8274  def : InstAlias<OpcodeStr#"x\t{${src}{1to2}, $dst|$dst, ${src}{1to2}}",
8275                  (!cast<Instruction>(NAME # "Z128rmb") VR128X:$dst,
8276                  f64mem:$src), 0, "att">;
8277  def : InstAlias<OpcodeStr#"x\t{${src}{1to2}, $dst {${mask}}|"
8278                  "$dst {${mask}}, ${src}{1to2}}",
8279                  (!cast<Instruction>(NAME # "Z128rmbk") VR128X:$dst,
8280                  VK2WM:$mask, f64mem:$src), 0, "att">;
8281  def : InstAlias<OpcodeStr#"x\t{${src}{1to2}, $dst {${mask}} {z}|"
8282                  "$dst {${mask}} {z}, ${src}{1to2}}",
8283                  (!cast<Instruction>(NAME # "Z128rmbkz") VR128X:$dst,
8284                  VK2WM:$mask, f64mem:$src), 0, "att">;
8285
8286  def : InstAlias<OpcodeStr#"y\t{$src, $dst|$dst, $src}",
8287                  (!cast<Instruction>(NAME # "Z256rr") VR128X:$dst,
8288                  VR256X:$src), 0, "att">;
8289  def : InstAlias<OpcodeStr#"y\t{$src, $dst {${mask}}|$dst {${mask}}, $src}",
8290                  (!cast<Instruction>(NAME # "Z256rrk") VR128X:$dst,
8291                  VK4WM:$mask, VR256X:$src), 0, "att">;
8292  def : InstAlias<OpcodeStr#"y\t{$src, $dst {${mask}} {z}|$dst {${mask}} {z}, $src}",
8293                  (!cast<Instruction>(NAME # "Z256rrkz") VR128X:$dst,
8294                  VK4WM:$mask, VR256X:$src), 0, "att">;
8295  def : InstAlias<OpcodeStr#"y\t{${src}{1to4}, $dst|$dst, ${src}{1to4}}",
8296                  (!cast<Instruction>(NAME # "Z256rmb") VR128X:$dst,
8297                  f64mem:$src), 0, "att">;
8298  def : InstAlias<OpcodeStr#"y\t{${src}{1to4}, $dst {${mask}}|"
8299                  "$dst {${mask}}, ${src}{1to4}}",
8300                  (!cast<Instruction>(NAME # "Z256rmbk") VR128X:$dst,
8301                  VK4WM:$mask, f64mem:$src), 0, "att">;
8302  def : InstAlias<OpcodeStr#"y\t{${src}{1to4}, $dst {${mask}} {z}|"
8303                  "$dst {${mask}} {z}, ${src}{1to4}}",
8304                  (!cast<Instruction>(NAME # "Z256rmbkz") VR128X:$dst,
8305                  VK4WM:$mask, f64mem:$src), 0, "att">;
8306}
8307
8308// Convert Double to Signed/Unsigned Doubleword
8309multiclass avx512_cvtpd2dq<bits<8> opc, string OpcodeStr, SDNode OpNode,
8310                           SDNode MaskOpNode, SDNode OpNodeRnd,
8311                           X86SchedWriteWidths sched> {
8312  let Predicates = [HasAVX512] in {
8313    defm Z : avx512_vcvt_fp<opc, OpcodeStr, v8i32x_info, v8f64_info, OpNode,
8314                            MaskOpNode, sched.ZMM>,
8315             avx512_vcvt_fp_rc<opc, OpcodeStr, v8i32x_info, v8f64_info,
8316                               OpNodeRnd, sched.ZMM>, EVEX_V512;
8317  }
8318  let Predicates = [HasVLX] in {
8319    // we need "x"/"y" suffixes in order to distinguish between 128 and 256
8320    // memory forms of these instructions in Asm Parcer. They have the same
8321    // dest type - 'v4i32x_info'. We also specify the broadcast string explicitly
8322    // due to the same reason.
8323    defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v4i32x_info, v2f64x_info,
8324                               null_frag, null_frag, sched.XMM, "{1to2}", "{x}", f128mem,
8325                               VK2WM>, EVEX_V128;
8326    defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v4i32x_info, v4f64x_info, OpNode,
8327                               MaskOpNode, sched.YMM, "{1to4}", "{y}">, EVEX_V256;
8328  }
8329
8330  def : InstAlias<OpcodeStr#"x\t{$src, $dst|$dst, $src}",
8331                  (!cast<Instruction>(NAME # "Z128rr") VR128X:$dst, VR128X:$src), 0, "att">;
8332  def : InstAlias<OpcodeStr#"x\t{$src, $dst {${mask}}|$dst {${mask}}, $src}",
8333                  (!cast<Instruction>(NAME # "Z128rrk") VR128X:$dst,
8334                  VK2WM:$mask, VR128X:$src), 0, "att">;
8335  def : InstAlias<OpcodeStr#"x\t{$src, $dst {${mask}} {z}|$dst {${mask}} {z}, $src}",
8336                  (!cast<Instruction>(NAME # "Z128rrkz") VR128X:$dst,
8337                  VK2WM:$mask, VR128X:$src), 0, "att">;
8338  def : InstAlias<OpcodeStr#"x\t{${src}{1to2}, $dst|$dst, ${src}{1to2}}",
8339                  (!cast<Instruction>(NAME # "Z128rmb") VR128X:$dst,
8340                  f64mem:$src), 0, "att">;
8341  def : InstAlias<OpcodeStr#"x\t{${src}{1to2}, $dst {${mask}}|"
8342                  "$dst {${mask}}, ${src}{1to2}}",
8343                  (!cast<Instruction>(NAME # "Z128rmbk") VR128X:$dst,
8344                  VK2WM:$mask, f64mem:$src), 0, "att">;
8345  def : InstAlias<OpcodeStr#"x\t{${src}{1to2}, $dst {${mask}} {z}|"
8346                  "$dst {${mask}} {z}, ${src}{1to2}}",
8347                  (!cast<Instruction>(NAME # "Z128rmbkz") VR128X:$dst,
8348                  VK2WM:$mask, f64mem:$src), 0, "att">;
8349
8350  def : InstAlias<OpcodeStr#"y\t{$src, $dst|$dst, $src}",
8351                  (!cast<Instruction>(NAME # "Z256rr") VR128X:$dst, VR256X:$src), 0, "att">;
8352  def : InstAlias<OpcodeStr#"y\t{$src, $dst {${mask}}|$dst {${mask}}, $src}",
8353                  (!cast<Instruction>(NAME # "Z256rrk") VR128X:$dst,
8354                  VK4WM:$mask, VR256X:$src), 0, "att">;
8355  def : InstAlias<OpcodeStr#"y\t{$src, $dst {${mask}} {z}|$dst {${mask}} {z}, $src}",
8356                  (!cast<Instruction>(NAME # "Z256rrkz") VR128X:$dst,
8357                  VK4WM:$mask, VR256X:$src), 0, "att">;
8358  def : InstAlias<OpcodeStr#"y\t{${src}{1to4}, $dst|$dst, ${src}{1to4}}",
8359                  (!cast<Instruction>(NAME # "Z256rmb") VR128X:$dst,
8360                  f64mem:$src), 0, "att">;
8361  def : InstAlias<OpcodeStr#"y\t{${src}{1to4}, $dst {${mask}}|"
8362                  "$dst {${mask}}, ${src}{1to4}}",
8363                  (!cast<Instruction>(NAME # "Z256rmbk") VR128X:$dst,
8364                  VK4WM:$mask, f64mem:$src), 0, "att">;
8365  def : InstAlias<OpcodeStr#"y\t{${src}{1to4}, $dst {${mask}} {z}|"
8366                  "$dst {${mask}} {z}, ${src}{1to4}}",
8367                  (!cast<Instruction>(NAME # "Z256rmbkz") VR128X:$dst,
8368                  VK4WM:$mask, f64mem:$src), 0, "att">;
8369}
8370
8371// Convert Double to Signed/Unsigned Quardword
8372multiclass avx512_cvtpd2qq<bits<8> opc, string OpcodeStr, SDNode OpNode,
8373                           SDNode MaskOpNode, SDNode OpNodeRnd,
8374                           X86SchedWriteWidths sched> {
8375  let Predicates = [HasDQI] in {
8376    defm Z : avx512_vcvt_fp<opc, OpcodeStr, v8i64_info, v8f64_info, OpNode,
8377                            MaskOpNode, sched.ZMM>,
8378             avx512_vcvt_fp_rc<opc, OpcodeStr, v8i64_info, v8f64_info,
8379                               OpNodeRnd, sched.ZMM>, EVEX_V512;
8380  }
8381  let Predicates = [HasDQI, HasVLX] in {
8382    defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v2i64x_info, v2f64x_info, OpNode,
8383                               MaskOpNode, sched.XMM>, EVEX_V128;
8384    defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v4i64x_info, v4f64x_info, OpNode,
8385                               MaskOpNode, sched.YMM>, EVEX_V256;
8386  }
8387}
8388
8389// Convert Double to Signed/Unsigned Quardword with truncation
8390multiclass avx512_cvttpd2qq<bits<8> opc, string OpcodeStr, SDPatternOperator OpNode,
8391                            SDNode MaskOpNode, SDNode OpNodeRnd,
8392                            X86SchedWriteWidths sched> {
8393  let Predicates = [HasDQI] in {
8394    defm Z : avx512_vcvt_fp<opc, OpcodeStr, v8i64_info, v8f64_info, OpNode,
8395                            MaskOpNode, sched.ZMM>,
8396             avx512_vcvt_fp_sae<opc, OpcodeStr, v8i64_info, v8f64_info,
8397                                OpNodeRnd, sched.ZMM>, EVEX_V512;
8398  }
8399  let Predicates = [HasDQI, HasVLX] in {
8400    defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v2i64x_info, v2f64x_info, OpNode,
8401                               MaskOpNode, sched.XMM>, EVEX_V128;
8402    defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v4i64x_info, v4f64x_info, OpNode,
8403                               MaskOpNode, sched.YMM>, EVEX_V256;
8404  }
8405}
8406
8407// Convert Signed/Unsigned Quardword to Double
8408multiclass avx512_cvtqq2pd<bits<8> opc, string OpcodeStr, SDPatternOperator OpNode,
8409                           SDNode MaskOpNode, SDNode OpNodeRnd,
8410                           X86SchedWriteWidths sched> {
8411  let Predicates = [HasDQI] in {
8412    defm Z : avx512_vcvt_fp<opc, OpcodeStr, v8f64_info, v8i64_info, OpNode,
8413                            MaskOpNode, sched.ZMM>,
8414             avx512_vcvt_fp_rc<opc, OpcodeStr, v8f64_info, v8i64_info,
8415                               OpNodeRnd, sched.ZMM>, EVEX_V512;
8416  }
8417  let Predicates = [HasDQI, HasVLX] in {
8418    defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v2f64x_info, v2i64x_info, OpNode,
8419                               MaskOpNode, sched.XMM>, EVEX_V128;
8420    defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v4f64x_info, v4i64x_info, OpNode,
8421                               MaskOpNode, sched.YMM>, EVEX_V256;
8422  }
8423}
8424
8425// Convert Float to Signed/Unsigned Quardword
8426multiclass avx512_cvtps2qq<bits<8> opc, string OpcodeStr, SDNode OpNode,
8427                           SDNode MaskOpNode, SDNode OpNodeRnd,
8428                           X86SchedWriteWidths sched> {
8429  let Predicates = [HasDQI] in {
8430    defm Z : avx512_vcvt_fp<opc, OpcodeStr, v8i64_info, v8f32x_info, OpNode,
8431                            MaskOpNode, sched.ZMM>,
8432             avx512_vcvt_fp_rc<opc, OpcodeStr, v8i64_info, v8f32x_info,
8433                               OpNodeRnd, sched.ZMM>, EVEX_V512;
8434  }
8435  let Predicates = [HasDQI, HasVLX] in {
8436    // Explicitly specified broadcast string, since we take only 2 elements
8437    // from v4f32x_info source
8438    defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v2i64x_info, v4f32x_info, OpNode,
8439                               MaskOpNode, sched.XMM, "{1to2}", "", f64mem, VK2WM,
8440                               (v2i64 (OpNode (bc_v4f32
8441                                (v2f64
8442                                 (scalar_to_vector (loadf64 addr:$src)))))),
8443                               (v2i64 (MaskOpNode (bc_v4f32
8444                                (v2f64
8445                                 (scalar_to_vector (loadf64 addr:$src))))))>,
8446                               EVEX_V128;
8447    defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v4i64x_info, v4f32x_info, OpNode,
8448                               MaskOpNode, sched.YMM>, EVEX_V256;
8449  }
8450}
8451
8452// Convert Float to Signed/Unsigned Quardword with truncation
8453multiclass avx512_cvttps2qq<bits<8> opc, string OpcodeStr, SDPatternOperator OpNode,
8454                            SDNode MaskOpNode, SDNode OpNodeRnd,
8455                            X86SchedWriteWidths sched> {
8456  let Predicates = [HasDQI] in {
8457    defm Z : avx512_vcvt_fp<opc, OpcodeStr, v8i64_info, v8f32x_info, OpNode,
8458                            MaskOpNode, sched.ZMM>,
8459             avx512_vcvt_fp_sae<opc, OpcodeStr, v8i64_info, v8f32x_info,
8460                                OpNodeRnd, sched.ZMM>, EVEX_V512;
8461  }
8462  let Predicates = [HasDQI, HasVLX] in {
8463    // Explicitly specified broadcast string, since we take only 2 elements
8464    // from v4f32x_info source
8465    defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v2i64x_info, v4f32x_info, OpNode,
8466                               MaskOpNode, sched.XMM, "{1to2}", "", f64mem, VK2WM,
8467                               (v2i64 (OpNode (bc_v4f32
8468                                (v2f64
8469                                 (scalar_to_vector (loadf64 addr:$src)))))),
8470                               (v2i64 (MaskOpNode (bc_v4f32
8471                                (v2f64
8472                                 (scalar_to_vector (loadf64 addr:$src))))))>,
8473                               EVEX_V128;
8474    defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v4i64x_info, v4f32x_info, OpNode,
8475                               MaskOpNode, sched.YMM>, EVEX_V256;
8476  }
8477}
8478
8479// Convert Signed/Unsigned Quardword to Float
8480// Also Convert Signed/Unsigned Doubleword to Half
8481multiclass avx512_cvtqq2ps_dq2ph<bits<8> opc, string OpcodeStr, SDPatternOperator OpNode,
8482                                 SDPatternOperator MaskOpNode, SDPatternOperator OpNode128,
8483                                 SDPatternOperator OpNode128M, SDPatternOperator OpNodeRnd,
8484                                 AVX512VLVectorVTInfo _dst, AVX512VLVectorVTInfo _src,
8485                                 X86SchedWriteWidths sched, Predicate prd = HasDQI> {
8486  let Predicates = [prd] in {
8487    defm Z : avx512_vcvt_fp<opc, OpcodeStr, _dst.info256, _src.info512, OpNode,
8488                            MaskOpNode, sched.ZMM>,
8489             avx512_vcvt_fp_rc<opc, OpcodeStr, _dst.info256, _src.info512,
8490                               OpNodeRnd, sched.ZMM>, EVEX_V512;
8491  }
8492  let Predicates = [prd, HasVLX] in {
8493    // we need "x"/"y" suffixes in order to distinguish between 128 and 256
8494    // memory forms of these instructions in Asm Parcer. They have the same
8495    // dest type - 'v4i32x_info'. We also specify the broadcast string explicitly
8496    // due to the same reason.
8497    defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, _dst.info128, _src.info128, null_frag,
8498                               null_frag, sched.XMM, _src.info128.BroadcastStr,
8499                               "{x}", i128mem, _src.info128.KRCWM>,
8500                               EVEX_V128;
8501    defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, _dst.info128, _src.info256, OpNode,
8502                               MaskOpNode, sched.YMM, _src.info256.BroadcastStr,
8503                               "{y}">, EVEX_V256;
8504
8505    // Special patterns to allow use of X86VM[SU]intToFP for masking. Instruction
8506    // patterns have been disabled with null_frag.
8507    def : Pat<(_dst.info128.VT (OpNode128 (_src.info128.VT VR128X:$src))),
8508              (!cast<Instruction>(NAME # "Z128rr") VR128X:$src)>;
8509    def : Pat<(OpNode128M (_src.info128.VT VR128X:$src), (_dst.info128.VT VR128X:$src0),
8510                             _src.info128.KRCWM:$mask),
8511              (!cast<Instruction>(NAME # "Z128rrk") VR128X:$src0, _src.info128.KRCWM:$mask, VR128X:$src)>;
8512    def : Pat<(OpNode128M (_src.info128.VT VR128X:$src), _dst.info128.ImmAllZerosV,
8513                             _src.info128.KRCWM:$mask),
8514              (!cast<Instruction>(NAME # "Z128rrkz") _src.info128.KRCWM:$mask, VR128X:$src)>;
8515
8516    def : Pat<(_dst.info128.VT (OpNode128 (_src.info128.LdFrag addr:$src))),
8517              (!cast<Instruction>(NAME # "Z128rm") addr:$src)>;
8518    def : Pat<(OpNode128M (_src.info128.LdFrag addr:$src), (_dst.info128.VT VR128X:$src0),
8519                             _src.info128.KRCWM:$mask),
8520              (!cast<Instruction>(NAME # "Z128rmk") VR128X:$src0, _src.info128.KRCWM:$mask, addr:$src)>;
8521    def : Pat<(OpNode128M (_src.info128.LdFrag addr:$src), _dst.info128.ImmAllZerosV,
8522                             _src.info128.KRCWM:$mask),
8523              (!cast<Instruction>(NAME # "Z128rmkz") _src.info128.KRCWM:$mask, addr:$src)>;
8524
8525    def : Pat<(_dst.info128.VT (OpNode128 (_src.info128.VT (X86VBroadcastld64 addr:$src)))),
8526              (!cast<Instruction>(NAME # "Z128rmb") addr:$src)>;
8527    def : Pat<(OpNode128M (_src.info128.VT (X86VBroadcastld64 addr:$src)),
8528                             (_dst.info128.VT VR128X:$src0), _src.info128.KRCWM:$mask),
8529              (!cast<Instruction>(NAME # "Z128rmbk") VR128X:$src0, _src.info128.KRCWM:$mask, addr:$src)>;
8530    def : Pat<(OpNode128M (_src.info128.VT (X86VBroadcastld64 addr:$src)),
8531                             _dst.info128.ImmAllZerosV, _src.info128.KRCWM:$mask),
8532              (!cast<Instruction>(NAME # "Z128rmbkz") _src.info128.KRCWM:$mask, addr:$src)>;
8533  }
8534
8535  def : InstAlias<OpcodeStr#"x\t{$src, $dst|$dst, $src}",
8536                  (!cast<Instruction>(NAME # "Z128rr") VR128X:$dst,
8537                  VR128X:$src), 0, "att">;
8538  def : InstAlias<OpcodeStr#"x\t{$src, $dst {${mask}}|$dst {${mask}}, $src}",
8539                  (!cast<Instruction>(NAME # "Z128rrk") VR128X:$dst,
8540                  VK2WM:$mask, VR128X:$src), 0, "att">;
8541  def : InstAlias<OpcodeStr#"x\t{$src, $dst {${mask}} {z}|$dst {${mask}} {z}, $src}",
8542                  (!cast<Instruction>(NAME # "Z128rrkz") VR128X:$dst,
8543                  VK2WM:$mask, VR128X:$src), 0, "att">;
8544  def : InstAlias<OpcodeStr#"x\t{${src}{1to2}, $dst|$dst, ${src}{1to2}}",
8545                  (!cast<Instruction>(NAME # "Z128rmb") VR128X:$dst,
8546                  i64mem:$src), 0, "att">;
8547  def : InstAlias<OpcodeStr#"x\t{${src}{1to2}, $dst {${mask}}|"
8548                  "$dst {${mask}}, ${src}{1to2}}",
8549                  (!cast<Instruction>(NAME # "Z128rmbk") VR128X:$dst,
8550                  VK2WM:$mask, i64mem:$src), 0, "att">;
8551  def : InstAlias<OpcodeStr#"x\t{${src}{1to2}, $dst {${mask}} {z}|"
8552                  "$dst {${mask}} {z}, ${src}{1to2}}",
8553                  (!cast<Instruction>(NAME # "Z128rmbkz") VR128X:$dst,
8554                  VK2WM:$mask, i64mem:$src), 0, "att">;
8555
8556  def : InstAlias<OpcodeStr#"y\t{$src, $dst|$dst, $src}",
8557                  (!cast<Instruction>(NAME # "Z256rr") VR128X:$dst,
8558                  VR256X:$src), 0, "att">;
8559  def : InstAlias<OpcodeStr#"y\t{$src, $dst {${mask}}|"
8560                  "$dst {${mask}}, $src}",
8561                  (!cast<Instruction>(NAME # "Z256rrk") VR128X:$dst,
8562                  VK4WM:$mask, VR256X:$src), 0, "att">;
8563  def : InstAlias<OpcodeStr#"y\t{$src, $dst {${mask}} {z}|"
8564                  "$dst {${mask}} {z}, $src}",
8565                  (!cast<Instruction>(NAME # "Z256rrkz") VR128X:$dst,
8566                  VK4WM:$mask, VR256X:$src), 0, "att">;
8567  def : InstAlias<OpcodeStr#"y\t{${src}{1to4}, $dst|$dst, ${src}{1to4}}",
8568                  (!cast<Instruction>(NAME # "Z256rmb") VR128X:$dst,
8569                  i64mem:$src), 0, "att">;
8570  def : InstAlias<OpcodeStr#"y\t{${src}{1to4}, $dst {${mask}}|"
8571                  "$dst {${mask}}, ${src}{1to4}}",
8572                  (!cast<Instruction>(NAME # "Z256rmbk") VR128X:$dst,
8573                  VK4WM:$mask, i64mem:$src), 0, "att">;
8574  def : InstAlias<OpcodeStr#"y\t{${src}{1to4}, $dst {${mask}} {z}|"
8575                  "$dst {${mask}} {z}, ${src}{1to4}}",
8576                  (!cast<Instruction>(NAME # "Z256rmbkz") VR128X:$dst,
8577                  VK4WM:$mask, i64mem:$src), 0, "att">;
8578}
8579
8580defm VCVTDQ2PD : avx512_cvtdq2pd<0xE6, "vcvtdq2pd", any_sint_to_fp, sint_to_fp,
8581                                 X86any_VSintToFP, X86VSintToFP,
8582                                 SchedWriteCvtDQ2PD>, TB, XS, EVEX_CD8<32, CD8VH>;
8583
8584defm VCVTDQ2PS : avx512_cvtdq2ps<0x5B, "vcvtdq2ps", any_sint_to_fp, sint_to_fp,
8585                                X86VSintToFpRnd, SchedWriteCvtDQ2PS>,
8586                                TB, EVEX_CD8<32, CD8VF>;
8587
8588defm VCVTTPS2DQ : avx512_cvttps2dq<0x5B, "vcvttps2dq", X86any_cvttp2si,
8589                                 X86cvttp2si, X86cvttp2siSAE,
8590                                 SchedWriteCvtPS2DQ>, TB, XS, EVEX_CD8<32, CD8VF>;
8591
8592defm VCVTTPD2DQ : avx512_cvttpd2dq<0xE6, "vcvttpd2dq", X86any_cvttp2si,
8593                                 X86cvttp2si, X86cvttp2siSAE,
8594                                 SchedWriteCvtPD2DQ>,
8595                                 TB, PD, REX_W, EVEX_CD8<64, CD8VF>;
8596
8597defm VCVTTPS2UDQ : avx512_cvttps2dq<0x78, "vcvttps2udq", X86any_cvttp2ui,
8598                                 X86cvttp2ui, X86cvttp2uiSAE,
8599                                 SchedWriteCvtPS2DQ>, TB, EVEX_CD8<32, CD8VF>;
8600
8601defm VCVTTPD2UDQ : avx512_cvttpd2dq<0x78, "vcvttpd2udq", X86any_cvttp2ui,
8602                                 X86cvttp2ui, X86cvttp2uiSAE,
8603                                 SchedWriteCvtPD2DQ>,
8604                                 TB, REX_W, EVEX_CD8<64, CD8VF>;
8605
8606defm VCVTUDQ2PD : avx512_cvtdq2pd<0x7A, "vcvtudq2pd", any_uint_to_fp,
8607                                  uint_to_fp, X86any_VUintToFP, X86VUintToFP,
8608                                  SchedWriteCvtDQ2PD>, TB, XS, EVEX_CD8<32, CD8VH>;
8609
8610defm VCVTUDQ2PS : avx512_cvtdq2ps<0x7A, "vcvtudq2ps", any_uint_to_fp,
8611                                 uint_to_fp, X86VUintToFpRnd,
8612                                 SchedWriteCvtDQ2PS>, TB, XD, EVEX_CD8<32, CD8VF>;
8613
8614defm VCVTPS2DQ : avx512_cvtps2dq<0x5B, "vcvtps2dq", X86cvtp2Int, X86cvtp2Int,
8615                                 X86cvtp2IntRnd, SchedWriteCvtPS2DQ>, TB, PD,
8616                                 EVEX_CD8<32, CD8VF>;
8617
8618defm VCVTPD2DQ : avx512_cvtpd2dq<0xE6, "vcvtpd2dq", X86cvtp2Int, X86cvtp2Int,
8619                                 X86cvtp2IntRnd, SchedWriteCvtPD2DQ>, TB, XD,
8620                                 REX_W, EVEX_CD8<64, CD8VF>;
8621
8622defm VCVTPS2UDQ : avx512_cvtps2dq<0x79, "vcvtps2udq", X86cvtp2UInt, X86cvtp2UInt,
8623                                 X86cvtp2UIntRnd, SchedWriteCvtPS2DQ>,
8624                                 TB, EVEX_CD8<32, CD8VF>;
8625
8626defm VCVTPD2UDQ : avx512_cvtpd2dq<0x79, "vcvtpd2udq", X86cvtp2UInt, X86cvtp2UInt,
8627                                 X86cvtp2UIntRnd, SchedWriteCvtPD2DQ>, REX_W,
8628                                 TB, EVEX_CD8<64, CD8VF>;
8629
8630defm VCVTPD2QQ : avx512_cvtpd2qq<0x7B, "vcvtpd2qq", X86cvtp2Int, X86cvtp2Int,
8631                                 X86cvtp2IntRnd, SchedWriteCvtPD2DQ>, REX_W,
8632                                 TB, PD, EVEX_CD8<64, CD8VF>;
8633
8634defm VCVTPS2QQ : avx512_cvtps2qq<0x7B, "vcvtps2qq", X86cvtp2Int, X86cvtp2Int,
8635                                 X86cvtp2IntRnd, SchedWriteCvtPS2DQ>, TB, PD,
8636                                 EVEX_CD8<32, CD8VH>;
8637
8638defm VCVTPD2UQQ : avx512_cvtpd2qq<0x79, "vcvtpd2uqq", X86cvtp2UInt, X86cvtp2UInt,
8639                                 X86cvtp2UIntRnd, SchedWriteCvtPD2DQ>, REX_W,
8640                                 TB, PD, EVEX_CD8<64, CD8VF>;
8641
8642defm VCVTPS2UQQ : avx512_cvtps2qq<0x79, "vcvtps2uqq", X86cvtp2UInt, X86cvtp2UInt,
8643                                 X86cvtp2UIntRnd, SchedWriteCvtPS2DQ>, TB, PD,
8644                                 EVEX_CD8<32, CD8VH>;
8645
8646defm VCVTTPD2QQ : avx512_cvttpd2qq<0x7A, "vcvttpd2qq", X86any_cvttp2si,
8647                                 X86cvttp2si, X86cvttp2siSAE,
8648                                 SchedWriteCvtPD2DQ>, REX_W,
8649                                 TB, PD, EVEX_CD8<64, CD8VF>;
8650
8651defm VCVTTPS2QQ : avx512_cvttps2qq<0x7A, "vcvttps2qq", X86any_cvttp2si,
8652                                 X86cvttp2si, X86cvttp2siSAE,
8653                                 SchedWriteCvtPS2DQ>, TB, PD,
8654                                 EVEX_CD8<32, CD8VH>;
8655
8656defm VCVTTPD2UQQ : avx512_cvttpd2qq<0x78, "vcvttpd2uqq", X86any_cvttp2ui,
8657                                 X86cvttp2ui, X86cvttp2uiSAE,
8658                                 SchedWriteCvtPD2DQ>, REX_W,
8659                                 TB, PD, EVEX_CD8<64, CD8VF>;
8660
8661defm VCVTTPS2UQQ : avx512_cvttps2qq<0x78, "vcvttps2uqq", X86any_cvttp2ui,
8662                                 X86cvttp2ui, X86cvttp2uiSAE,
8663                                 SchedWriteCvtPS2DQ>, TB, PD,
8664                                 EVEX_CD8<32, CD8VH>;
8665
8666defm VCVTQQ2PD : avx512_cvtqq2pd<0xE6, "vcvtqq2pd", any_sint_to_fp,
8667                            sint_to_fp, X86VSintToFpRnd,
8668                            SchedWriteCvtDQ2PD>, REX_W, TB, XS, EVEX_CD8<64, CD8VF>;
8669
8670defm VCVTUQQ2PD : avx512_cvtqq2pd<0x7A, "vcvtuqq2pd", any_uint_to_fp,
8671                            uint_to_fp, X86VUintToFpRnd, SchedWriteCvtDQ2PD>,
8672                            REX_W, TB, XS, EVEX_CD8<64, CD8VF>;
8673
8674defm VCVTDQ2PH : avx512_cvtqq2ps_dq2ph<0x5B, "vcvtdq2ph", any_sint_to_fp, sint_to_fp,
8675                            X86any_VSintToFP, X86VMSintToFP,
8676                            X86VSintToFpRnd, avx512vl_f16_info, avx512vl_i32_info,
8677                            SchedWriteCvtDQ2PS, HasFP16>,
8678                            T_MAP5, EVEX_CD8<32, CD8VF>;
8679
8680defm VCVTUDQ2PH : avx512_cvtqq2ps_dq2ph<0x7A, "vcvtudq2ph", any_uint_to_fp, uint_to_fp,
8681                            X86any_VUintToFP, X86VMUintToFP,
8682                            X86VUintToFpRnd, avx512vl_f16_info, avx512vl_i32_info,
8683                            SchedWriteCvtDQ2PS, HasFP16>, T_MAP5, XD,
8684                            EVEX_CD8<32, CD8VF>;
8685
8686defm VCVTQQ2PS : avx512_cvtqq2ps_dq2ph<0x5B, "vcvtqq2ps", any_sint_to_fp, sint_to_fp,
8687                            X86any_VSintToFP, X86VMSintToFP,
8688                            X86VSintToFpRnd, avx512vl_f32_info, avx512vl_i64_info,
8689                            SchedWriteCvtDQ2PS>, REX_W, TB,
8690                            EVEX_CD8<64, CD8VF>;
8691
8692defm VCVTUQQ2PS : avx512_cvtqq2ps_dq2ph<0x7A, "vcvtuqq2ps", any_uint_to_fp, uint_to_fp,
8693                            X86any_VUintToFP, X86VMUintToFP,
8694                            X86VUintToFpRnd, avx512vl_f32_info, avx512vl_i64_info,
8695                            SchedWriteCvtDQ2PS>, REX_W, TB, XD,
8696                            EVEX_CD8<64, CD8VF>;
8697
8698let Predicates = [HasVLX] in {
8699  // Special patterns to allow use of X86mcvtp2Int for masking. Instruction
8700  // patterns have been disabled with null_frag.
8701  def : Pat<(v4i32 (X86cvtp2Int (v2f64 VR128X:$src))),
8702            (VCVTPD2DQZ128rr VR128X:$src)>;
8703  def : Pat<(X86mcvtp2Int (v2f64 VR128X:$src), (v4i32 VR128X:$src0),
8704                          VK2WM:$mask),
8705            (VCVTPD2DQZ128rrk VR128X:$src0, VK2WM:$mask, VR128X:$src)>;
8706  def : Pat<(X86mcvtp2Int (v2f64 VR128X:$src), v4i32x_info.ImmAllZerosV,
8707                          VK2WM:$mask),
8708            (VCVTPD2DQZ128rrkz VK2WM:$mask, VR128X:$src)>;
8709
8710  def : Pat<(v4i32 (X86cvtp2Int (loadv2f64 addr:$src))),
8711            (VCVTPD2DQZ128rm addr:$src)>;
8712  def : Pat<(X86mcvtp2Int (loadv2f64 addr:$src), (v4i32 VR128X:$src0),
8713                          VK2WM:$mask),
8714            (VCVTPD2DQZ128rmk VR128X:$src0, VK2WM:$mask, addr:$src)>;
8715  def : Pat<(X86mcvtp2Int (loadv2f64 addr:$src), v4i32x_info.ImmAllZerosV,
8716                          VK2WM:$mask),
8717            (VCVTPD2DQZ128rmkz VK2WM:$mask, addr:$src)>;
8718
8719  def : Pat<(v4i32 (X86cvtp2Int (v2f64 (X86VBroadcastld64 addr:$src)))),
8720            (VCVTPD2DQZ128rmb addr:$src)>;
8721  def : Pat<(X86mcvtp2Int (v2f64 (X86VBroadcastld64 addr:$src)),
8722                          (v4i32 VR128X:$src0), VK2WM:$mask),
8723            (VCVTPD2DQZ128rmbk VR128X:$src0, VK2WM:$mask, addr:$src)>;
8724  def : Pat<(X86mcvtp2Int (v2f64 (X86VBroadcastld64 addr:$src)),
8725                          v4i32x_info.ImmAllZerosV, VK2WM:$mask),
8726            (VCVTPD2DQZ128rmbkz VK2WM:$mask, addr:$src)>;
8727
8728  // Special patterns to allow use of X86mcvttp2si for masking. Instruction
8729  // patterns have been disabled with null_frag.
8730  def : Pat<(v4i32 (X86any_cvttp2si (v2f64 VR128X:$src))),
8731            (VCVTTPD2DQZ128rr VR128X:$src)>;
8732  def : Pat<(X86mcvttp2si (v2f64 VR128X:$src), (v4i32 VR128X:$src0),
8733                          VK2WM:$mask),
8734            (VCVTTPD2DQZ128rrk VR128X:$src0, VK2WM:$mask, VR128X:$src)>;
8735  def : Pat<(X86mcvttp2si (v2f64 VR128X:$src), v4i32x_info.ImmAllZerosV,
8736                          VK2WM:$mask),
8737            (VCVTTPD2DQZ128rrkz VK2WM:$mask, VR128X:$src)>;
8738
8739  def : Pat<(v4i32 (X86any_cvttp2si (loadv2f64 addr:$src))),
8740            (VCVTTPD2DQZ128rm addr:$src)>;
8741  def : Pat<(X86mcvttp2si (loadv2f64 addr:$src), (v4i32 VR128X:$src0),
8742                          VK2WM:$mask),
8743            (VCVTTPD2DQZ128rmk VR128X:$src0, VK2WM:$mask, addr:$src)>;
8744  def : Pat<(X86mcvttp2si (loadv2f64 addr:$src), v4i32x_info.ImmAllZerosV,
8745                          VK2WM:$mask),
8746            (VCVTTPD2DQZ128rmkz VK2WM:$mask, addr:$src)>;
8747
8748  def : Pat<(v4i32 (X86any_cvttp2si (v2f64 (X86VBroadcastld64 addr:$src)))),
8749            (VCVTTPD2DQZ128rmb addr:$src)>;
8750  def : Pat<(X86mcvttp2si (v2f64 (X86VBroadcastld64 addr:$src)),
8751                          (v4i32 VR128X:$src0), VK2WM:$mask),
8752            (VCVTTPD2DQZ128rmbk VR128X:$src0, VK2WM:$mask, addr:$src)>;
8753  def : Pat<(X86mcvttp2si (v2f64 (X86VBroadcastld64 addr:$src)),
8754                          v4i32x_info.ImmAllZerosV, VK2WM:$mask),
8755            (VCVTTPD2DQZ128rmbkz VK2WM:$mask, addr:$src)>;
8756
8757  // Special patterns to allow use of X86mcvtp2UInt for masking. Instruction
8758  // patterns have been disabled with null_frag.
8759  def : Pat<(v4i32 (X86cvtp2UInt (v2f64 VR128X:$src))),
8760            (VCVTPD2UDQZ128rr VR128X:$src)>;
8761  def : Pat<(X86mcvtp2UInt (v2f64 VR128X:$src), (v4i32 VR128X:$src0),
8762                           VK2WM:$mask),
8763            (VCVTPD2UDQZ128rrk VR128X:$src0, VK2WM:$mask, VR128X:$src)>;
8764  def : Pat<(X86mcvtp2UInt (v2f64 VR128X:$src), v4i32x_info.ImmAllZerosV,
8765                           VK2WM:$mask),
8766            (VCVTPD2UDQZ128rrkz VK2WM:$mask, VR128X:$src)>;
8767
8768  def : Pat<(v4i32 (X86cvtp2UInt (loadv2f64 addr:$src))),
8769            (VCVTPD2UDQZ128rm addr:$src)>;
8770  def : Pat<(X86mcvtp2UInt (loadv2f64 addr:$src), (v4i32 VR128X:$src0),
8771                           VK2WM:$mask),
8772            (VCVTPD2UDQZ128rmk VR128X:$src0, VK2WM:$mask, addr:$src)>;
8773  def : Pat<(X86mcvtp2UInt (loadv2f64 addr:$src), v4i32x_info.ImmAllZerosV,
8774                           VK2WM:$mask),
8775            (VCVTPD2UDQZ128rmkz VK2WM:$mask, addr:$src)>;
8776
8777  def : Pat<(v4i32 (X86cvtp2UInt (v2f64 (X86VBroadcastld64 addr:$src)))),
8778            (VCVTPD2UDQZ128rmb addr:$src)>;
8779  def : Pat<(X86mcvtp2UInt (v2f64 (X86VBroadcastld64 addr:$src)),
8780                           (v4i32 VR128X:$src0), VK2WM:$mask),
8781            (VCVTPD2UDQZ128rmbk VR128X:$src0, VK2WM:$mask, addr:$src)>;
8782  def : Pat<(X86mcvtp2UInt (v2f64 (X86VBroadcastld64 addr:$src)),
8783                           v4i32x_info.ImmAllZerosV, VK2WM:$mask),
8784            (VCVTPD2UDQZ128rmbkz VK2WM:$mask, addr:$src)>;
8785
8786  // Special patterns to allow use of X86mcvtp2UInt for masking. Instruction
8787  // patterns have been disabled with null_frag.
8788  def : Pat<(v4i32 (X86any_cvttp2ui (v2f64 VR128X:$src))),
8789            (VCVTTPD2UDQZ128rr VR128X:$src)>;
8790  def : Pat<(X86mcvttp2ui (v2f64 VR128X:$src), (v4i32 VR128X:$src0),
8791                          VK2WM:$mask),
8792            (VCVTTPD2UDQZ128rrk VR128X:$src0, VK2WM:$mask, VR128X:$src)>;
8793  def : Pat<(X86mcvttp2ui (v2f64 VR128X:$src), v4i32x_info.ImmAllZerosV,
8794                          VK2WM:$mask),
8795            (VCVTTPD2UDQZ128rrkz VK2WM:$mask, VR128X:$src)>;
8796
8797  def : Pat<(v4i32 (X86any_cvttp2ui (loadv2f64 addr:$src))),
8798            (VCVTTPD2UDQZ128rm addr:$src)>;
8799  def : Pat<(X86mcvttp2ui (loadv2f64 addr:$src), (v4i32 VR128X:$src0),
8800                          VK2WM:$mask),
8801            (VCVTTPD2UDQZ128rmk VR128X:$src0, VK2WM:$mask, addr:$src)>;
8802  def : Pat<(X86mcvttp2ui (loadv2f64 addr:$src), v4i32x_info.ImmAllZerosV,
8803                          VK2WM:$mask),
8804            (VCVTTPD2UDQZ128rmkz VK2WM:$mask, addr:$src)>;
8805
8806  def : Pat<(v4i32 (X86any_cvttp2ui (v2f64 (X86VBroadcastld64 addr:$src)))),
8807            (VCVTTPD2UDQZ128rmb addr:$src)>;
8808  def : Pat<(X86mcvttp2ui (v2f64 (X86VBroadcastld64 addr:$src)),
8809                          (v4i32 VR128X:$src0), VK2WM:$mask),
8810            (VCVTTPD2UDQZ128rmbk VR128X:$src0, VK2WM:$mask, addr:$src)>;
8811  def : Pat<(X86mcvttp2ui (v2f64 (X86VBroadcastld64 addr:$src)),
8812                          v4i32x_info.ImmAllZerosV, VK2WM:$mask),
8813            (VCVTTPD2UDQZ128rmbkz VK2WM:$mask, addr:$src)>;
8814}
8815
8816let Predicates = [HasDQI, HasVLX] in {
8817  def : Pat<(v2i64 (X86cvtp2Int (bc_v4f32 (v2f64 (X86vzload64 addr:$src))))),
8818            (VCVTPS2QQZ128rm addr:$src)>;
8819  def : Pat<(v2i64 (vselect_mask VK2WM:$mask,
8820                                 (X86cvtp2Int (bc_v4f32 (v2f64 (X86vzload64 addr:$src)))),
8821                                 VR128X:$src0)),
8822            (VCVTPS2QQZ128rmk VR128X:$src0, VK2WM:$mask, addr:$src)>;
8823  def : Pat<(v2i64 (vselect_mask VK2WM:$mask,
8824                                 (X86cvtp2Int (bc_v4f32 (v2f64 (X86vzload64 addr:$src)))),
8825                                 v2i64x_info.ImmAllZerosV)),
8826            (VCVTPS2QQZ128rmkz VK2WM:$mask, addr:$src)>;
8827
8828  def : Pat<(v2i64 (X86cvtp2UInt (bc_v4f32 (v2f64 (X86vzload64 addr:$src))))),
8829            (VCVTPS2UQQZ128rm addr:$src)>;
8830  def : Pat<(v2i64 (vselect_mask VK2WM:$mask,
8831                                 (X86cvtp2UInt (bc_v4f32 (v2f64 (X86vzload64 addr:$src)))),
8832                                 VR128X:$src0)),
8833            (VCVTPS2UQQZ128rmk VR128X:$src0, VK2WM:$mask, addr:$src)>;
8834  def : Pat<(v2i64 (vselect_mask VK2WM:$mask,
8835                                 (X86cvtp2UInt (bc_v4f32 (v2f64 (X86vzload64 addr:$src)))),
8836                                 v2i64x_info.ImmAllZerosV)),
8837            (VCVTPS2UQQZ128rmkz VK2WM:$mask, addr:$src)>;
8838
8839  def : Pat<(v2i64 (X86any_cvttp2si (bc_v4f32 (v2f64 (X86vzload64 addr:$src))))),
8840            (VCVTTPS2QQZ128rm addr:$src)>;
8841  def : Pat<(v2i64 (vselect_mask VK2WM:$mask,
8842                                 (X86cvttp2si (bc_v4f32 (v2f64 (X86vzload64 addr:$src)))),
8843                                 VR128X:$src0)),
8844            (VCVTTPS2QQZ128rmk VR128X:$src0, VK2WM:$mask, addr:$src)>;
8845  def : Pat<(v2i64 (vselect_mask VK2WM:$mask,
8846                                 (X86cvttp2si (bc_v4f32 (v2f64 (X86vzload64 addr:$src)))),
8847                                 v2i64x_info.ImmAllZerosV)),
8848            (VCVTTPS2QQZ128rmkz VK2WM:$mask, addr:$src)>;
8849
8850  def : Pat<(v2i64 (X86any_cvttp2ui (bc_v4f32 (v2f64 (X86vzload64 addr:$src))))),
8851            (VCVTTPS2UQQZ128rm addr:$src)>;
8852  def : Pat<(v2i64 (vselect_mask VK2WM:$mask,
8853                                 (X86cvttp2ui (bc_v4f32 (v2f64 (X86vzload64 addr:$src)))),
8854                                 VR128X:$src0)),
8855            (VCVTTPS2UQQZ128rmk VR128X:$src0, VK2WM:$mask, addr:$src)>;
8856  def : Pat<(v2i64 (vselect_mask VK2WM:$mask,
8857                                 (X86cvttp2ui (bc_v4f32 (v2f64 (X86vzload64 addr:$src)))),
8858                                 v2i64x_info.ImmAllZerosV)),
8859            (VCVTTPS2UQQZ128rmkz VK2WM:$mask, addr:$src)>;
8860}
8861
8862let Predicates = [HasVLX] in {
8863  def : Pat<(v2f64 (X86any_VSintToFP (bc_v4i32 (v2i64 (X86vzload64 addr:$src))))),
8864            (VCVTDQ2PDZ128rm addr:$src)>;
8865  def : Pat<(v2f64 (vselect_mask VK2WM:$mask,
8866                                 (X86VSintToFP (bc_v4i32 (v2i64 (X86vzload64 addr:$src)))),
8867                                 VR128X:$src0)),
8868            (VCVTDQ2PDZ128rmk VR128X:$src0, VK2WM:$mask, addr:$src)>;
8869  def : Pat<(v2f64 (vselect_mask VK2WM:$mask,
8870                                 (X86VSintToFP (bc_v4i32 (v2i64 (X86vzload64 addr:$src)))),
8871                                 v2f64x_info.ImmAllZerosV)),
8872            (VCVTDQ2PDZ128rmkz VK2WM:$mask, addr:$src)>;
8873
8874  def : Pat<(v2f64 (X86any_VUintToFP (bc_v4i32 (v2i64 (X86vzload64 addr:$src))))),
8875            (VCVTUDQ2PDZ128rm addr:$src)>;
8876  def : Pat<(v2f64 (vselect_mask VK2WM:$mask,
8877                                 (X86VUintToFP (bc_v4i32 (v2i64 (X86vzload64 addr:$src)))),
8878                                 VR128X:$src0)),
8879            (VCVTUDQ2PDZ128rmk VR128X:$src0, VK2WM:$mask, addr:$src)>;
8880  def : Pat<(v2f64 (vselect_mask VK2WM:$mask,
8881                                 (X86VUintToFP (bc_v4i32 (v2i64 (X86vzload64 addr:$src)))),
8882                                 v2f64x_info.ImmAllZerosV)),
8883            (VCVTUDQ2PDZ128rmkz VK2WM:$mask, addr:$src)>;
8884}
8885
8886//===----------------------------------------------------------------------===//
8887// Half precision conversion instructions
8888//===----------------------------------------------------------------------===//
8889
8890let Uses = [MXCSR], mayRaiseFPException = 1 in
8891multiclass avx512_cvtph2ps<X86VectorVTInfo _dest, X86VectorVTInfo _src,
8892                           X86MemOperand x86memop, dag ld_dag,
8893                           X86FoldableSchedWrite sched> {
8894  defm rr : AVX512_maskable_split<0x13, MRMSrcReg, _dest ,(outs _dest.RC:$dst),
8895                            (ins _src.RC:$src), "vcvtph2ps", "$src", "$src",
8896                            (X86any_cvtph2ps (_src.VT _src.RC:$src)),
8897                            (X86cvtph2ps (_src.VT _src.RC:$src))>,
8898                            T8, PD, Sched<[sched]>;
8899  defm rm : AVX512_maskable_split<0x13, MRMSrcMem, _dest, (outs _dest.RC:$dst),
8900                            (ins x86memop:$src), "vcvtph2ps", "$src", "$src",
8901                            (X86any_cvtph2ps (_src.VT ld_dag)),
8902                            (X86cvtph2ps (_src.VT ld_dag))>,
8903                            T8, PD, Sched<[sched.Folded]>;
8904}
8905
8906multiclass avx512_cvtph2ps_sae<X86VectorVTInfo _dest, X86VectorVTInfo _src,
8907                               X86FoldableSchedWrite sched> {
8908  let Uses = [MXCSR] in
8909  defm rrb : AVX512_maskable<0x13, MRMSrcReg, _dest, (outs _dest.RC:$dst),
8910                             (ins _src.RC:$src), "vcvtph2ps",
8911                             "{sae}, $src", "$src, {sae}",
8912                             (X86cvtph2psSAE (_src.VT _src.RC:$src))>,
8913                             T8, PD, EVEX_B, Sched<[sched]>;
8914}
8915
8916let Predicates = [HasAVX512] in
8917  defm VCVTPH2PSZ : avx512_cvtph2ps<v16f32_info, v16i16x_info, f256mem,
8918                                    (load addr:$src), WriteCvtPH2PSZ>,
8919                    avx512_cvtph2ps_sae<v16f32_info, v16i16x_info, WriteCvtPH2PSZ>,
8920                    EVEX, EVEX_V512, EVEX_CD8<32, CD8VH>;
8921
8922let Predicates = [HasVLX] in {
8923  defm VCVTPH2PSZ256 : avx512_cvtph2ps<v8f32x_info, v8i16x_info, f128mem,
8924                       (load addr:$src), WriteCvtPH2PSY>, EVEX, EVEX_V256,
8925                       EVEX_CD8<32, CD8VH>;
8926  defm VCVTPH2PSZ128 : avx512_cvtph2ps<v4f32x_info, v8i16x_info, f64mem,
8927                       (bitconvert (v2i64 (X86vzload64 addr:$src))),
8928                       WriteCvtPH2PS>, EVEX, EVEX_V128,
8929                       EVEX_CD8<32, CD8VH>;
8930
8931  // Pattern match vcvtph2ps of a scalar i64 load.
8932  def : Pat<(v4f32 (X86any_cvtph2ps (v8i16 (bitconvert
8933              (v2i64 (scalar_to_vector (loadi64 addr:$src))))))),
8934            (VCVTPH2PSZ128rm addr:$src)>;
8935}
8936
8937multiclass avx512_cvtps2ph<X86VectorVTInfo _dest, X86VectorVTInfo _src,
8938                           X86MemOperand x86memop, SchedWrite RR, SchedWrite MR> {
8939let ExeDomain = GenericDomain, Uses = [MXCSR], mayRaiseFPException = 1 in {
8940  def rr : AVX512AIi8<0x1D, MRMDestReg, (outs _dest.RC:$dst),
8941             (ins _src.RC:$src1, i32u8imm:$src2),
8942             "vcvtps2ph\t{$src2, $src1, $dst|$dst, $src1, $src2}",
8943             [(set _dest.RC:$dst,
8944                   (X86any_cvtps2ph (_src.VT _src.RC:$src1), (i32 timm:$src2)))]>,
8945             Sched<[RR]>;
8946  let Constraints = "$src0 = $dst" in
8947  def rrk : AVX512AIi8<0x1D, MRMDestReg, (outs _dest.RC:$dst),
8948             (ins _dest.RC:$src0, _src.KRCWM:$mask, _src.RC:$src1, i32u8imm:$src2),
8949             "vcvtps2ph\t{$src2, $src1, $dst {${mask}}|$dst {${mask}}, $src1, $src2}",
8950             [(set _dest.RC:$dst,
8951                   (X86mcvtps2ph (_src.VT _src.RC:$src1), (i32 timm:$src2),
8952                                 _dest.RC:$src0, _src.KRCWM:$mask))]>,
8953             Sched<[RR]>, EVEX_K;
8954  def rrkz : AVX512AIi8<0x1D, MRMDestReg, (outs _dest.RC:$dst),
8955             (ins _src.KRCWM:$mask, _src.RC:$src1, i32u8imm:$src2),
8956             "vcvtps2ph\t{$src2, $src1, $dst {${mask}} {z}|$dst {${mask}} {z}, $src1, $src2}",
8957             [(set _dest.RC:$dst,
8958                   (X86mcvtps2ph (_src.VT _src.RC:$src1), (i32 timm:$src2),
8959                                 _dest.ImmAllZerosV, _src.KRCWM:$mask))]>,
8960             Sched<[RR]>, EVEX_KZ;
8961  let hasSideEffects = 0, mayStore = 1 in {
8962    def mr : AVX512AIi8<0x1D, MRMDestMem, (outs),
8963               (ins x86memop:$dst, _src.RC:$src1, i32u8imm:$src2),
8964               "vcvtps2ph\t{$src2, $src1, $dst|$dst, $src1, $src2}", []>,
8965               Sched<[MR]>;
8966    def mrk : AVX512AIi8<0x1D, MRMDestMem, (outs),
8967               (ins x86memop:$dst, _dest.KRCWM:$mask, _src.RC:$src1, i32u8imm:$src2),
8968               "vcvtps2ph\t{$src2, $src1, $dst {${mask}}|$dst {${mask}}, $src1, $src2}", []>,
8969                EVEX_K, Sched<[MR]>;
8970  }
8971}
8972}
8973
8974multiclass avx512_cvtps2ph_sae<X86VectorVTInfo _dest, X86VectorVTInfo _src,
8975                               SchedWrite Sched> {
8976  let hasSideEffects = 0, Uses = [MXCSR] in {
8977    def rrb : AVX512AIi8<0x1D, MRMDestReg, (outs _dest.RC:$dst),
8978              (ins _src.RC:$src1, i32u8imm:$src2),
8979              "vcvtps2ph\t{$src2, {sae}, $src1, $dst|$dst, $src1, {sae}, $src2}",
8980              [(set _dest.RC:$dst,
8981                    (X86cvtps2phSAE (_src.VT _src.RC:$src1), (i32 timm:$src2)))]>,
8982              EVEX_B, Sched<[Sched]>;
8983    let Constraints = "$src0 = $dst" in
8984    def rrbk : AVX512AIi8<0x1D, MRMDestReg, (outs _dest.RC:$dst),
8985              (ins _dest.RC:$src0, _src.KRCWM:$mask, _src.RC:$src1, i32u8imm:$src2),
8986              "vcvtps2ph\t{$src2, {sae}, $src1, $dst {${mask}}|$dst {${mask}}, $src1, {sae}, $src2}",
8987              [(set _dest.RC:$dst,
8988                    (X86mcvtps2phSAE (_src.VT _src.RC:$src1), (i32 timm:$src2),
8989                                  _dest.RC:$src0, _src.KRCWM:$mask))]>,
8990              EVEX_B, Sched<[Sched]>, EVEX_K;
8991    def rrbkz : AVX512AIi8<0x1D, MRMDestReg, (outs _dest.RC:$dst),
8992              (ins _src.KRCWM:$mask, _src.RC:$src1, i32u8imm:$src2),
8993              "vcvtps2ph\t{$src2, {sae}, $src1, $dst {${mask}} {z}|$dst {${mask}} {z}, $src1, {sae}, $src2}",
8994              [(set _dest.RC:$dst,
8995                    (X86mcvtps2phSAE (_src.VT _src.RC:$src1), (i32 timm:$src2),
8996                                  _dest.ImmAllZerosV, _src.KRCWM:$mask))]>,
8997              EVEX_B, Sched<[Sched]>, EVEX_KZ;
8998}
8999}
9000
9001let Predicates = [HasAVX512] in {
9002  defm VCVTPS2PHZ : avx512_cvtps2ph<v16i16x_info, v16f32_info, f256mem,
9003                                    WriteCvtPS2PHZ, WriteCvtPS2PHZSt>,
9004                    avx512_cvtps2ph_sae<v16i16x_info, v16f32_info, WriteCvtPS2PHZ>,
9005                                        EVEX, EVEX_V512, EVEX_CD8<32, CD8VH>;
9006
9007  def : Pat<(store (v16i16 (X86any_cvtps2ph VR512:$src1, timm:$src2)), addr:$dst),
9008            (VCVTPS2PHZmr addr:$dst, VR512:$src1, timm:$src2)>;
9009}
9010
9011let Predicates = [HasVLX] in {
9012  defm VCVTPS2PHZ256 : avx512_cvtps2ph<v8i16x_info, v8f32x_info, f128mem,
9013                                       WriteCvtPS2PHY, WriteCvtPS2PHYSt>,
9014                                       EVEX, EVEX_V256, EVEX_CD8<32, CD8VH>;
9015  defm VCVTPS2PHZ128 : avx512_cvtps2ph<v8i16x_info, v4f32x_info, f64mem,
9016                                       WriteCvtPS2PH, WriteCvtPS2PHSt>,
9017                                       EVEX, EVEX_V128, EVEX_CD8<32, CD8VH>;
9018
9019  def : Pat<(store (f64 (extractelt
9020                         (bc_v2f64 (v8i16 (X86any_cvtps2ph VR128X:$src1, timm:$src2))),
9021                         (iPTR 0))), addr:$dst),
9022            (VCVTPS2PHZ128mr addr:$dst, VR128X:$src1, timm:$src2)>;
9023  def : Pat<(store (i64 (extractelt
9024                         (bc_v2i64 (v8i16 (X86any_cvtps2ph VR128X:$src1, timm:$src2))),
9025                         (iPTR 0))), addr:$dst),
9026            (VCVTPS2PHZ128mr addr:$dst, VR128X:$src1, timm:$src2)>;
9027  def : Pat<(store (v8i16 (X86any_cvtps2ph VR256X:$src1, timm:$src2)), addr:$dst),
9028            (VCVTPS2PHZ256mr addr:$dst, VR256X:$src1, timm:$src2)>;
9029}
9030
9031//  Unordered/Ordered scalar fp compare with Sae and set EFLAGS
9032multiclass avx512_ord_cmp_sae<bits<8> opc, X86VectorVTInfo _,
9033                              string OpcodeStr, Domain d,
9034                              X86FoldableSchedWrite sched = WriteFComX> {
9035  let ExeDomain = d, hasSideEffects = 0, Uses = [MXCSR] in
9036  def rrb: AVX512<opc, MRMSrcReg, (outs), (ins _.RC:$src1, _.RC:$src2),
9037                  !strconcat(OpcodeStr, "\t{{sae}, $src2, $src1|$src1, $src2, {sae}}"), []>,
9038                  EVEX, EVEX_B, VEX_LIG, EVEX_V128, Sched<[sched]>;
9039}
9040
9041let Defs = [EFLAGS], Predicates = [HasAVX512] in {
9042  defm VUCOMISSZ : avx512_ord_cmp_sae<0x2E, v4f32x_info, "vucomiss", SSEPackedSingle>,
9043                                   AVX512PSIi8Base, EVEX_CD8<32, CD8VT1>;
9044  defm VUCOMISDZ : avx512_ord_cmp_sae<0x2E, v2f64x_info, "vucomisd", SSEPackedDouble>,
9045                                   AVX512PDIi8Base, REX_W, EVEX_CD8<64, CD8VT1>;
9046  defm VCOMISSZ : avx512_ord_cmp_sae<0x2F, v4f32x_info, "vcomiss", SSEPackedSingle>,
9047                                   AVX512PSIi8Base, EVEX_CD8<32, CD8VT1>;
9048  defm VCOMISDZ : avx512_ord_cmp_sae<0x2F, v2f64x_info, "vcomisd", SSEPackedDouble>,
9049                                   AVX512PDIi8Base, REX_W, EVEX_CD8<64, CD8VT1>;
9050}
9051
9052let Defs = [EFLAGS], Predicates = [HasAVX512] in {
9053  defm VUCOMISSZ : sse12_ord_cmp<0x2E, FR32X, X86any_fcmp, f32, f32mem, loadf32,
9054                                 "ucomiss", SSEPackedSingle>, TB, EVEX, VEX_LIG,
9055                                 EVEX_CD8<32, CD8VT1>;
9056  defm VUCOMISDZ : sse12_ord_cmp<0x2E, FR64X, X86any_fcmp, f64, f64mem, loadf64,
9057                                  "ucomisd", SSEPackedDouble>, TB, PD, EVEX,
9058                                  VEX_LIG, REX_W, EVEX_CD8<64, CD8VT1>;
9059  defm VCOMISSZ  : sse12_ord_cmp<0x2F, FR32X, X86strict_fcmps, f32, f32mem, loadf32,
9060                                 "comiss", SSEPackedSingle>, TB, EVEX, VEX_LIG,
9061                                 EVEX_CD8<32, CD8VT1>;
9062  defm VCOMISDZ  : sse12_ord_cmp<0x2F, FR64X, X86strict_fcmps, f64, f64mem, loadf64,
9063                                 "comisd", SSEPackedDouble>, TB, PD, EVEX,
9064                                  VEX_LIG, REX_W, EVEX_CD8<64, CD8VT1>;
9065  let isCodeGenOnly = 1 in {
9066    defm VUCOMISSZ  : sse12_ord_cmp_int<0x2E, VR128X, X86ucomi, v4f32, ssmem,
9067                          sse_load_f32, "ucomiss", SSEPackedSingle>, TB, EVEX, VEX_LIG,
9068                          EVEX_CD8<32, CD8VT1>;
9069    defm VUCOMISDZ  : sse12_ord_cmp_int<0x2E, VR128X, X86ucomi, v2f64, sdmem,
9070                          sse_load_f64, "ucomisd", SSEPackedDouble>, TB, PD, EVEX,
9071                          VEX_LIG, REX_W, EVEX_CD8<64, CD8VT1>;
9072
9073    defm VCOMISSZ  : sse12_ord_cmp_int<0x2F, VR128X, X86comi, v4f32, ssmem,
9074                          sse_load_f32, "comiss", SSEPackedSingle>, TB, EVEX, VEX_LIG,
9075                          EVEX_CD8<32, CD8VT1>;
9076    defm VCOMISDZ  : sse12_ord_cmp_int<0x2F, VR128X, X86comi, v2f64, sdmem,
9077                          sse_load_f64, "comisd", SSEPackedDouble>, TB, PD, EVEX,
9078                          VEX_LIG, REX_W, EVEX_CD8<64, CD8VT1>;
9079  }
9080}
9081
9082let Defs = [EFLAGS], Predicates = [HasFP16] in {
9083  defm VUCOMISHZ : avx512_ord_cmp_sae<0x2E, v8f16x_info, "vucomish",
9084                                SSEPackedSingle>, AVX512PSIi8Base, T_MAP5,
9085                                EVEX_CD8<16, CD8VT1>;
9086  defm VCOMISHZ : avx512_ord_cmp_sae<0x2F, v8f16x_info, "vcomish",
9087                                SSEPackedSingle>, AVX512PSIi8Base, T_MAP5,
9088                                EVEX_CD8<16, CD8VT1>;
9089  defm VUCOMISHZ : sse12_ord_cmp<0x2E, FR16X, X86any_fcmp, f16, f16mem, loadf16,
9090                                "ucomish", SSEPackedSingle>, T_MAP5, EVEX,
9091                                VEX_LIG, EVEX_CD8<16, CD8VT1>;
9092  defm VCOMISHZ : sse12_ord_cmp<0x2F, FR16X, X86strict_fcmps, f16, f16mem, loadf16,
9093                                "comish", SSEPackedSingle>, T_MAP5, EVEX,
9094                                VEX_LIG, EVEX_CD8<16, CD8VT1>;
9095  let isCodeGenOnly = 1 in {
9096    defm VUCOMISHZ : sse12_ord_cmp_int<0x2E, VR128X, X86ucomi, v8f16, shmem,
9097                                sse_load_f16, "ucomish", SSEPackedSingle>,
9098                                T_MAP5, EVEX, VEX_LIG, EVEX_CD8<16, CD8VT1>;
9099
9100    defm VCOMISHZ : sse12_ord_cmp_int<0x2F, VR128X, X86comi, v8f16, shmem,
9101                                sse_load_f16, "comish", SSEPackedSingle>,
9102                                T_MAP5, EVEX, VEX_LIG, EVEX_CD8<16, CD8VT1>;
9103  }
9104}
9105
9106/// avx512_fp14_s rcp14ss, rcp14sd, rsqrt14ss, rsqrt14sd, rcpsh, rsqrtsh
9107multiclass avx512_fp14_s<bits<8> opc, string OpcodeStr, SDNode OpNode,
9108                         X86FoldableSchedWrite sched, X86VectorVTInfo _,
9109                         Predicate prd = HasAVX512> {
9110  let Predicates = [prd], ExeDomain = _.ExeDomain in {
9111  defm rr : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
9112                           (ins _.RC:$src1, _.RC:$src2), OpcodeStr,
9113                           "$src2, $src1", "$src1, $src2",
9114                           (OpNode (_.VT _.RC:$src1), (_.VT _.RC:$src2))>,
9115                           EVEX, VVVV, VEX_LIG, Sched<[sched]>;
9116  defm rm : AVX512_maskable_scalar<opc, MRMSrcMem, _, (outs _.RC:$dst),
9117                         (ins _.RC:$src1, _.IntScalarMemOp:$src2), OpcodeStr,
9118                         "$src2, $src1", "$src1, $src2",
9119                         (OpNode (_.VT _.RC:$src1),
9120                          (_.ScalarIntMemFrags addr:$src2))>, EVEX, VVVV, VEX_LIG,
9121                          Sched<[sched.Folded, sched.ReadAfterFold]>;
9122}
9123}
9124
9125defm VRCPSHZ : avx512_fp14_s<0x4D, "vrcpsh", X86rcp14s, SchedWriteFRcp.Scl,
9126                               f16x_info, HasFP16>, EVEX_CD8<16, CD8VT1>,
9127                               T_MAP6, PD;
9128defm VRSQRTSHZ : avx512_fp14_s<0x4F, "vrsqrtsh", X86rsqrt14s,
9129                                 SchedWriteFRsqrt.Scl, f16x_info, HasFP16>,
9130                                 EVEX_CD8<16, CD8VT1>, T_MAP6, PD;
9131let Uses = [MXCSR] in {
9132defm VRCP14SSZ : avx512_fp14_s<0x4D, "vrcp14ss", X86rcp14s, SchedWriteFRcp.Scl,
9133                               f32x_info>, EVEX_CD8<32, CD8VT1>,
9134                               T8, PD;
9135defm VRCP14SDZ : avx512_fp14_s<0x4D, "vrcp14sd", X86rcp14s, SchedWriteFRcp.Scl,
9136                               f64x_info>, REX_W, EVEX_CD8<64, CD8VT1>,
9137                               T8, PD;
9138defm VRSQRT14SSZ : avx512_fp14_s<0x4F, "vrsqrt14ss", X86rsqrt14s,
9139                                 SchedWriteFRsqrt.Scl, f32x_info>,
9140                                 EVEX_CD8<32, CD8VT1>, T8, PD;
9141defm VRSQRT14SDZ : avx512_fp14_s<0x4F, "vrsqrt14sd", X86rsqrt14s,
9142                                 SchedWriteFRsqrt.Scl, f64x_info>, REX_W,
9143                                 EVEX_CD8<64, CD8VT1>, T8, PD;
9144}
9145
9146/// avx512_fp14_p rcp14ps, rcp14pd, rsqrt14ps, rsqrt14pd
9147multiclass avx512_fp14_p<bits<8> opc, string OpcodeStr, SDNode OpNode,
9148                         X86FoldableSchedWrite sched, X86VectorVTInfo _> {
9149  let ExeDomain = _.ExeDomain in {
9150  defm r: AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
9151                         (ins _.RC:$src), OpcodeStr, "$src", "$src",
9152                         (_.VT (OpNode _.RC:$src))>, EVEX, T8, PD,
9153                         Sched<[sched]>;
9154  defm m: AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
9155                         (ins _.MemOp:$src), OpcodeStr, "$src", "$src",
9156                         (OpNode (_.VT
9157                           (bitconvert (_.LdFrag addr:$src))))>, EVEX, T8, PD,
9158                         Sched<[sched.Folded, sched.ReadAfterFold]>;
9159  defm mb: AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
9160                          (ins _.ScalarMemOp:$src), OpcodeStr,
9161                          "${src}"#_.BroadcastStr, "${src}"#_.BroadcastStr,
9162                          (OpNode (_.VT
9163                            (_.BroadcastLdFrag addr:$src)))>,
9164                          EVEX, T8, PD, EVEX_B, Sched<[sched.Folded, sched.ReadAfterFold]>;
9165  }
9166}
9167
9168multiclass avx512_fp14_p_vl_all<bits<8> opc, string OpcodeStr, SDNode OpNode,
9169                                X86SchedWriteWidths sched> {
9170  let Uses = [MXCSR] in {
9171  defm 14PSZ : avx512_fp14_p<opc, !strconcat(OpcodeStr, "14ps"), OpNode, sched.ZMM,
9172                             v16f32_info>, EVEX_V512, EVEX_CD8<32, CD8VF>;
9173  defm 14PDZ : avx512_fp14_p<opc, !strconcat(OpcodeStr, "14pd"), OpNode, sched.ZMM,
9174                             v8f64_info>, EVEX_V512, REX_W, EVEX_CD8<64, CD8VF>;
9175  }
9176  let Predicates = [HasFP16] in
9177  defm PHZ : avx512_fp14_p<opc, !strconcat(OpcodeStr, "ph"), OpNode, sched.ZMM,
9178                           v32f16_info>, EVEX_V512, T_MAP6, EVEX_CD8<16, CD8VF>;
9179
9180  // Define only if AVX512VL feature is present.
9181  let Predicates = [HasVLX], Uses = [MXCSR] in {
9182    defm 14PSZ128 : avx512_fp14_p<opc, !strconcat(OpcodeStr, "14ps"),
9183                                  OpNode, sched.XMM, v4f32x_info>,
9184                                  EVEX_V128, EVEX_CD8<32, CD8VF>;
9185    defm 14PSZ256 : avx512_fp14_p<opc, !strconcat(OpcodeStr, "14ps"),
9186                                  OpNode, sched.YMM, v8f32x_info>,
9187                                  EVEX_V256, EVEX_CD8<32, CD8VF>;
9188    defm 14PDZ128 : avx512_fp14_p<opc, !strconcat(OpcodeStr, "14pd"),
9189                                  OpNode, sched.XMM, v2f64x_info>,
9190                                  EVEX_V128, REX_W, EVEX_CD8<64, CD8VF>;
9191    defm 14PDZ256 : avx512_fp14_p<opc, !strconcat(OpcodeStr, "14pd"),
9192                                  OpNode, sched.YMM, v4f64x_info>,
9193                                  EVEX_V256, REX_W, EVEX_CD8<64, CD8VF>;
9194  }
9195  let Predicates = [HasFP16, HasVLX] in {
9196    defm PHZ128 : avx512_fp14_p<opc, !strconcat(OpcodeStr, "ph"),
9197                                OpNode, sched.XMM, v8f16x_info>,
9198                                EVEX_V128, T_MAP6, EVEX_CD8<16, CD8VF>;
9199    defm PHZ256 : avx512_fp14_p<opc, !strconcat(OpcodeStr, "ph"),
9200                                OpNode, sched.YMM, v16f16x_info>,
9201                                EVEX_V256, T_MAP6, EVEX_CD8<16, CD8VF>;
9202  }
9203}
9204
9205defm VRSQRT : avx512_fp14_p_vl_all<0x4E, "vrsqrt", X86rsqrt14, SchedWriteFRsqrt>;
9206defm VRCP : avx512_fp14_p_vl_all<0x4C, "vrcp", X86rcp14, SchedWriteFRcp>;
9207
9208/// avx512_fp28_s rcp28ss, rcp28sd, rsqrt28ss, rsqrt28sd
9209multiclass avx512_fp28_s<bits<8> opc, string OpcodeStr,X86VectorVTInfo _,
9210                         SDNode OpNode, SDNode OpNodeSAE,
9211                         X86FoldableSchedWrite sched> {
9212  let ExeDomain = _.ExeDomain, Uses = [MXCSR] in {
9213  defm r : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
9214                           (ins _.RC:$src1, _.RC:$src2), OpcodeStr,
9215                           "$src2, $src1", "$src1, $src2",
9216                           (OpNode (_.VT _.RC:$src1), (_.VT _.RC:$src2))>,
9217                           Sched<[sched]>, SIMD_EXC;
9218
9219  defm rb : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
9220                            (ins _.RC:$src1, _.RC:$src2), OpcodeStr,
9221                            "{sae}, $src2, $src1", "$src1, $src2, {sae}",
9222                            (OpNodeSAE (_.VT _.RC:$src1), (_.VT _.RC:$src2))>,
9223                            EVEX_B, Sched<[sched]>;
9224
9225  defm m : AVX512_maskable_scalar<opc, MRMSrcMem, _, (outs _.RC:$dst),
9226                         (ins _.RC:$src1, _.IntScalarMemOp:$src2), OpcodeStr,
9227                         "$src2, $src1", "$src1, $src2",
9228                         (OpNode (_.VT _.RC:$src1), (_.ScalarIntMemFrags addr:$src2))>,
9229                         Sched<[sched.Folded, sched.ReadAfterFold]>, SIMD_EXC;
9230  }
9231}
9232
9233multiclass avx512_eri_s<bits<8> opc, string OpcodeStr, SDNode OpNode,
9234                        SDNode OpNodeSAE, X86FoldableSchedWrite sched> {
9235  defm SSZ : avx512_fp28_s<opc, OpcodeStr#"ss", f32x_info, OpNode, OpNodeSAE,
9236                           sched>, EVEX_CD8<32, CD8VT1>, VEX_LIG, T8, PD, EVEX, VVVV;
9237  defm SDZ : avx512_fp28_s<opc, OpcodeStr#"sd", f64x_info, OpNode, OpNodeSAE,
9238                           sched>, EVEX_CD8<64, CD8VT1>, VEX_LIG, REX_W, T8, PD, EVEX, VVVV;
9239}
9240
9241multiclass avx512_vgetexpsh<bits<8> opc, string OpcodeStr, SDNode OpNode,
9242                        SDNode OpNodeSAE, X86FoldableSchedWrite sched> {
9243  let Predicates = [HasFP16] in
9244  defm SHZ : avx512_fp28_s<opc, OpcodeStr#"sh", f16x_info, OpNode,  OpNodeSAE, sched>,
9245               EVEX_CD8<16, CD8VT1>, T_MAP6, PD, EVEX, VVVV;
9246}
9247
9248let Predicates = [HasERI] in {
9249  defm VRCP28   : avx512_eri_s<0xCB, "vrcp28", X86rcp28s, X86rcp28SAEs,
9250                               SchedWriteFRcp.Scl>;
9251  defm VRSQRT28 : avx512_eri_s<0xCD, "vrsqrt28", X86rsqrt28s, X86rsqrt28SAEs,
9252                               SchedWriteFRsqrt.Scl>;
9253}
9254
9255defm VGETEXP   : avx512_eri_s<0x43, "vgetexp", X86fgetexps, X86fgetexpSAEs,
9256                              SchedWriteFRnd.Scl>,
9257                 avx512_vgetexpsh<0x43, "vgetexp", X86fgetexps, X86fgetexpSAEs,
9258                                  SchedWriteFRnd.Scl>;
9259/// avx512_fp28_p rcp28ps, rcp28pd, rsqrt28ps, rsqrt28pd
9260
9261multiclass avx512_fp28_p<bits<8> opc, string OpcodeStr, X86VectorVTInfo _,
9262                         SDNode OpNode, X86FoldableSchedWrite sched> {
9263  let ExeDomain = _.ExeDomain, Uses = [MXCSR], mayRaiseFPException = 1 in {
9264  defm r : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
9265                         (ins _.RC:$src), OpcodeStr, "$src", "$src",
9266                         (OpNode (_.VT _.RC:$src))>,
9267                         Sched<[sched]>;
9268
9269  defm m : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
9270                         (ins _.MemOp:$src), OpcodeStr, "$src", "$src",
9271                         (OpNode (_.VT
9272                             (bitconvert (_.LdFrag addr:$src))))>,
9273                          Sched<[sched.Folded, sched.ReadAfterFold]>;
9274
9275  defm mb : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
9276                         (ins _.ScalarMemOp:$src), OpcodeStr,
9277                         "${src}"#_.BroadcastStr, "${src}"#_.BroadcastStr,
9278                         (OpNode (_.VT
9279                                  (_.BroadcastLdFrag addr:$src)))>,
9280                         EVEX_B, Sched<[sched.Folded, sched.ReadAfterFold]>;
9281  }
9282}
9283multiclass avx512_fp28_p_sae<bits<8> opc, string OpcodeStr, X86VectorVTInfo _,
9284                         SDNode OpNode, X86FoldableSchedWrite sched> {
9285  let ExeDomain = _.ExeDomain, Uses = [MXCSR] in
9286  defm rb : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
9287                        (ins _.RC:$src), OpcodeStr,
9288                        "{sae}, $src", "$src, {sae}",
9289                        (OpNode (_.VT _.RC:$src))>,
9290                        EVEX_B, Sched<[sched]>;
9291}
9292
9293multiclass  avx512_eri<bits<8> opc, string OpcodeStr, SDNode OpNode,
9294                       SDNode OpNodeSAE, X86SchedWriteWidths sched> {
9295   defm PSZ : avx512_fp28_p<opc, OpcodeStr#"ps", v16f32_info, OpNode, sched.ZMM>,
9296              avx512_fp28_p_sae<opc, OpcodeStr#"ps", v16f32_info, OpNodeSAE, sched.ZMM>,
9297              T8, PD, EVEX_V512, EVEX_CD8<32, CD8VF>;
9298   defm PDZ : avx512_fp28_p<opc, OpcodeStr#"pd", v8f64_info, OpNode, sched.ZMM>,
9299              avx512_fp28_p_sae<opc, OpcodeStr#"pd", v8f64_info, OpNodeSAE, sched.ZMM>,
9300              T8, PD, EVEX_V512, REX_W, EVEX_CD8<64, CD8VF>;
9301}
9302
9303multiclass avx512_fp_unaryop_packed<bits<8> opc, string OpcodeStr,
9304                                  SDNode OpNode, X86SchedWriteWidths sched> {
9305  // Define only if AVX512VL feature is present.
9306  let Predicates = [HasVLX] in {
9307    defm PSZ128 : avx512_fp28_p<opc, OpcodeStr#"ps", v4f32x_info, OpNode,
9308                                sched.XMM>,
9309                                EVEX_V128, T8, PD, EVEX_CD8<32, CD8VF>;
9310    defm PSZ256 : avx512_fp28_p<opc, OpcodeStr#"ps", v8f32x_info, OpNode,
9311                                sched.YMM>,
9312                                EVEX_V256, T8, PD, EVEX_CD8<32, CD8VF>;
9313    defm PDZ128 : avx512_fp28_p<opc, OpcodeStr#"pd", v2f64x_info, OpNode,
9314                                sched.XMM>,
9315                                EVEX_V128, REX_W, T8, PD, EVEX_CD8<64, CD8VF>;
9316    defm PDZ256 : avx512_fp28_p<opc, OpcodeStr#"pd", v4f64x_info, OpNode,
9317                                sched.YMM>,
9318                                EVEX_V256, REX_W, T8, PD, EVEX_CD8<64, CD8VF>;
9319  }
9320}
9321
9322multiclass  avx512_vgetexp_fp16<bits<8> opc, string OpcodeStr, SDNode OpNode,
9323                       SDNode OpNodeSAE, X86SchedWriteWidths sched> {
9324  let Predicates = [HasFP16] in
9325  defm PHZ : avx512_fp28_p<opc, OpcodeStr#"ph", v32f16_info, OpNode, sched.ZMM>,
9326              avx512_fp28_p_sae<opc, OpcodeStr#"ph", v32f16_info, OpNodeSAE, sched.ZMM>,
9327              T_MAP6, PD, EVEX_V512, EVEX_CD8<16, CD8VF>;
9328  let Predicates = [HasFP16, HasVLX] in {
9329    defm PHZ128 : avx512_fp28_p<opc, OpcodeStr#"ph", v8f16x_info, OpNode, sched.XMM>,
9330                                     EVEX_V128, T_MAP6, PD, EVEX_CD8<16, CD8VF>;
9331    defm PHZ256 : avx512_fp28_p<opc, OpcodeStr#"ph", v16f16x_info, OpNode, sched.YMM>,
9332                                     EVEX_V256, T_MAP6, PD, EVEX_CD8<16, CD8VF>;
9333  }
9334}
9335let Predicates = [HasERI] in {
9336 defm VRSQRT28 : avx512_eri<0xCC, "vrsqrt28", X86rsqrt28, X86rsqrt28SAE,
9337                            SchedWriteFRsqrt>, EVEX;
9338 defm VRCP28   : avx512_eri<0xCA, "vrcp28", X86rcp28, X86rcp28SAE,
9339                            SchedWriteFRcp>, EVEX;
9340 defm VEXP2    : avx512_eri<0xC8, "vexp2", X86exp2, X86exp2SAE,
9341                            SchedWriteFAdd>, EVEX;
9342}
9343defm VGETEXP   : avx512_eri<0x42, "vgetexp", X86fgetexp, X86fgetexpSAE,
9344                            SchedWriteFRnd>,
9345                 avx512_vgetexp_fp16<0x42, "vgetexp", X86fgetexp, X86fgetexpSAE,
9346                                     SchedWriteFRnd>,
9347                 avx512_fp_unaryop_packed<0x42, "vgetexp", X86fgetexp,
9348                                          SchedWriteFRnd>, EVEX;
9349
9350multiclass avx512_sqrt_packed_round<bits<8> opc, string OpcodeStr,
9351                                    X86FoldableSchedWrite sched, X86VectorVTInfo _>{
9352  let ExeDomain = _.ExeDomain in
9353  defm rb: AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
9354                         (ins _.RC:$src, AVX512RC:$rc), OpcodeStr, "$rc, $src", "$src, $rc",
9355                         (_.VT (X86fsqrtRnd _.RC:$src, (i32 timm:$rc)))>,
9356                         EVEX, EVEX_B, EVEX_RC, Sched<[sched]>;
9357}
9358
9359multiclass avx512_sqrt_packed<bits<8> opc, string OpcodeStr,
9360                              X86FoldableSchedWrite sched, X86VectorVTInfo _>{
9361  let ExeDomain = _.ExeDomain, Uses = [MXCSR], mayRaiseFPException = 1 in {
9362  defm r: AVX512_maskable_split<opc, MRMSrcReg, _, (outs _.RC:$dst),
9363                         (ins _.RC:$src), OpcodeStr, "$src", "$src",
9364                         (_.VT (any_fsqrt _.RC:$src)),
9365                         (_.VT (fsqrt _.RC:$src))>, EVEX,
9366                         Sched<[sched]>;
9367  defm m: AVX512_maskable_split<opc, MRMSrcMem, _, (outs _.RC:$dst),
9368                         (ins _.MemOp:$src), OpcodeStr, "$src", "$src",
9369                         (any_fsqrt (_.VT (_.LdFrag addr:$src))),
9370                         (fsqrt (_.VT (_.LdFrag addr:$src)))>, EVEX,
9371                         Sched<[sched.Folded, sched.ReadAfterFold]>;
9372  defm mb: AVX512_maskable_split<opc, MRMSrcMem, _, (outs _.RC:$dst),
9373                          (ins _.ScalarMemOp:$src), OpcodeStr,
9374                          "${src}"#_.BroadcastStr, "${src}"#_.BroadcastStr,
9375                          (any_fsqrt (_.VT (_.BroadcastLdFrag addr:$src))),
9376                          (fsqrt (_.VT (_.BroadcastLdFrag addr:$src)))>,
9377                          EVEX, EVEX_B, Sched<[sched.Folded, sched.ReadAfterFold]>;
9378  }
9379}
9380
9381let Uses = [MXCSR], mayRaiseFPException = 1 in
9382multiclass avx512_sqrt_packed_all<bits<8> opc, string OpcodeStr,
9383                                  X86SchedWriteSizes sched> {
9384  let Predicates = [HasFP16] in
9385  defm PHZ : avx512_sqrt_packed<opc, !strconcat(OpcodeStr, "ph"),
9386                                sched.PH.ZMM, v32f16_info>,
9387                                EVEX_V512, T_MAP5, EVEX_CD8<16, CD8VF>;
9388  let Predicates = [HasFP16, HasVLX] in {
9389    defm PHZ128 : avx512_sqrt_packed<opc, !strconcat(OpcodeStr, "ph"),
9390                                     sched.PH.XMM, v8f16x_info>,
9391                                     EVEX_V128, T_MAP5, EVEX_CD8<16, CD8VF>;
9392    defm PHZ256 : avx512_sqrt_packed<opc, !strconcat(OpcodeStr, "ph"),
9393                                     sched.PH.YMM, v16f16x_info>,
9394                                     EVEX_V256, T_MAP5, EVEX_CD8<16, CD8VF>;
9395  }
9396  defm PSZ : avx512_sqrt_packed<opc, !strconcat(OpcodeStr, "ps"),
9397                                sched.PS.ZMM, v16f32_info>,
9398                                EVEX_V512, TB, EVEX_CD8<32, CD8VF>;
9399  defm PDZ : avx512_sqrt_packed<opc, !strconcat(OpcodeStr, "pd"),
9400                                sched.PD.ZMM, v8f64_info>,
9401                                EVEX_V512, REX_W, TB, PD, EVEX_CD8<64, CD8VF>;
9402  // Define only if AVX512VL feature is present.
9403  let Predicates = [HasVLX] in {
9404    defm PSZ128 : avx512_sqrt_packed<opc, !strconcat(OpcodeStr, "ps"),
9405                                     sched.PS.XMM, v4f32x_info>,
9406                                     EVEX_V128, TB, EVEX_CD8<32, CD8VF>;
9407    defm PSZ256 : avx512_sqrt_packed<opc, !strconcat(OpcodeStr, "ps"),
9408                                     sched.PS.YMM, v8f32x_info>,
9409                                     EVEX_V256, TB, EVEX_CD8<32, CD8VF>;
9410    defm PDZ128 : avx512_sqrt_packed<opc, !strconcat(OpcodeStr, "pd"),
9411                                     sched.PD.XMM, v2f64x_info>,
9412                                     EVEX_V128, REX_W, TB, PD, EVEX_CD8<64, CD8VF>;
9413    defm PDZ256 : avx512_sqrt_packed<opc, !strconcat(OpcodeStr, "pd"),
9414                                     sched.PD.YMM, v4f64x_info>,
9415                                     EVEX_V256, REX_W, TB, PD, EVEX_CD8<64, CD8VF>;
9416  }
9417}
9418
9419let Uses = [MXCSR] in
9420multiclass avx512_sqrt_packed_all_round<bits<8> opc, string OpcodeStr,
9421                                        X86SchedWriteSizes sched> {
9422  let Predicates = [HasFP16] in
9423  defm PHZ : avx512_sqrt_packed_round<opc, !strconcat(OpcodeStr, "ph"),
9424                                      sched.PH.ZMM, v32f16_info>,
9425                                      EVEX_V512, T_MAP5, EVEX_CD8<16, CD8VF>;
9426  defm PSZ : avx512_sqrt_packed_round<opc, !strconcat(OpcodeStr, "ps"),
9427                                      sched.PS.ZMM, v16f32_info>,
9428                                      EVEX_V512, TB, EVEX_CD8<32, CD8VF>;
9429  defm PDZ : avx512_sqrt_packed_round<opc, !strconcat(OpcodeStr, "pd"),
9430                                      sched.PD.ZMM, v8f64_info>,
9431                                      EVEX_V512, REX_W, TB, PD, EVEX_CD8<64, CD8VF>;
9432}
9433
9434multiclass avx512_sqrt_scalar<bits<8> opc, string OpcodeStr, X86FoldableSchedWrite sched,
9435                              X86VectorVTInfo _, string Name, Predicate prd = HasAVX512> {
9436  let ExeDomain = _.ExeDomain, Predicates = [prd] in {
9437    defm r_Int : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
9438                         (ins _.RC:$src1, _.RC:$src2), OpcodeStr,
9439                         "$src2, $src1", "$src1, $src2",
9440                         (X86fsqrts (_.VT _.RC:$src1),
9441                                    (_.VT _.RC:$src2))>,
9442                         Sched<[sched]>, SIMD_EXC;
9443    defm m_Int : AVX512_maskable_scalar<opc, MRMSrcMem, _, (outs _.RC:$dst),
9444                         (ins _.RC:$src1, _.IntScalarMemOp:$src2), OpcodeStr,
9445                         "$src2, $src1", "$src1, $src2",
9446                         (X86fsqrts (_.VT _.RC:$src1),
9447                                    (_.ScalarIntMemFrags addr:$src2))>,
9448                         Sched<[sched.Folded, sched.ReadAfterFold]>, SIMD_EXC;
9449    let Uses = [MXCSR] in
9450    defm rb_Int : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
9451                         (ins _.RC:$src1, _.RC:$src2, AVX512RC:$rc), OpcodeStr,
9452                         "$rc, $src2, $src1", "$src1, $src2, $rc",
9453                         (X86fsqrtRnds (_.VT _.RC:$src1),
9454                                     (_.VT _.RC:$src2),
9455                                     (i32 timm:$rc))>,
9456                         EVEX_B, EVEX_RC, Sched<[sched]>;
9457
9458    let isCodeGenOnly = 1, hasSideEffects = 0 in {
9459      def r : I<opc, MRMSrcReg, (outs _.FRC:$dst),
9460                (ins _.FRC:$src1, _.FRC:$src2),
9461                OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}", []>,
9462                Sched<[sched]>, SIMD_EXC;
9463      let mayLoad = 1 in
9464        def m : I<opc, MRMSrcMem, (outs _.FRC:$dst),
9465                  (ins _.FRC:$src1, _.ScalarMemOp:$src2),
9466                  OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}", []>,
9467                  Sched<[sched.Folded, sched.ReadAfterFold]>, SIMD_EXC;
9468    }
9469  }
9470
9471  let Predicates = [prd] in {
9472    def : Pat<(_.EltVT (any_fsqrt _.FRC:$src)),
9473              (!cast<Instruction>(Name#Zr)
9474                  (_.EltVT (IMPLICIT_DEF)), _.FRC:$src)>;
9475  }
9476
9477  let Predicates = [prd, OptForSize] in {
9478    def : Pat<(_.EltVT (any_fsqrt (load addr:$src))),
9479              (!cast<Instruction>(Name#Zm)
9480                  (_.EltVT (IMPLICIT_DEF)), addr:$src)>;
9481  }
9482}
9483
9484multiclass avx512_sqrt_scalar_all<bits<8> opc, string OpcodeStr,
9485                                  X86SchedWriteSizes sched> {
9486  defm SHZ : avx512_sqrt_scalar<opc, OpcodeStr#"sh", sched.PH.Scl, f16x_info, NAME#"SH", HasFP16>,
9487                        EVEX_CD8<16, CD8VT1>, EVEX, VVVV, T_MAP5, XS;
9488  defm SSZ : avx512_sqrt_scalar<opc, OpcodeStr#"ss", sched.PS.Scl, f32x_info, NAME#"SS">,
9489                        EVEX_CD8<32, CD8VT1>, EVEX, VVVV, TB, XS;
9490  defm SDZ : avx512_sqrt_scalar<opc, OpcodeStr#"sd", sched.PD.Scl, f64x_info, NAME#"SD">,
9491                        EVEX_CD8<64, CD8VT1>, EVEX, VVVV, TB, XD, REX_W;
9492}
9493
9494defm VSQRT : avx512_sqrt_packed_all<0x51, "vsqrt", SchedWriteFSqrtSizes>,
9495             avx512_sqrt_packed_all_round<0x51, "vsqrt", SchedWriteFSqrtSizes>;
9496
9497defm VSQRT : avx512_sqrt_scalar_all<0x51, "vsqrt", SchedWriteFSqrtSizes>, VEX_LIG;
9498
9499multiclass avx512_rndscale_scalar<bits<8> opc, string OpcodeStr,
9500                                  X86FoldableSchedWrite sched, X86VectorVTInfo _> {
9501  let ExeDomain = _.ExeDomain in {
9502  defm r_Int : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
9503                           (ins _.RC:$src1, _.RC:$src2, i32u8imm:$src3), OpcodeStr,
9504                           "$src3, $src2, $src1", "$src1, $src2, $src3",
9505                           (_.VT (X86RndScales (_.VT _.RC:$src1), (_.VT _.RC:$src2),
9506                           (i32 timm:$src3)))>,
9507                           Sched<[sched]>, SIMD_EXC;
9508
9509  let Uses = [MXCSR] in
9510  defm rb_Int : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
9511                         (ins _.RC:$src1, _.RC:$src2, i32u8imm:$src3), OpcodeStr,
9512                         "$src3, {sae}, $src2, $src1", "$src1, $src2, {sae}, $src3",
9513                         (_.VT (X86RndScalesSAE (_.VT _.RC:$src1), (_.VT _.RC:$src2),
9514                         (i32 timm:$src3)))>, EVEX_B,
9515                         Sched<[sched]>;
9516
9517  defm m_Int : AVX512_maskable_scalar<opc, MRMSrcMem, _, (outs _.RC:$dst),
9518                         (ins _.RC:$src1, _.IntScalarMemOp:$src2, i32u8imm:$src3),
9519                         OpcodeStr,
9520                         "$src3, $src2, $src1", "$src1, $src2, $src3",
9521                         (_.VT (X86RndScales _.RC:$src1,
9522                                (_.ScalarIntMemFrags addr:$src2), (i32 timm:$src3)))>,
9523                         Sched<[sched.Folded, sched.ReadAfterFold]>, SIMD_EXC;
9524
9525  let isCodeGenOnly = 1, hasSideEffects = 0, Predicates = [HasAVX512] in {
9526    def r : I<opc, MRMSrcReg, (outs _.FRC:$dst),
9527               (ins _.FRC:$src1, _.FRC:$src2, i32u8imm:$src3),
9528               OpcodeStr#"\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}",
9529               []>, Sched<[sched]>, SIMD_EXC;
9530
9531    let mayLoad = 1 in
9532      def m : I<opc, MRMSrcMem, (outs _.FRC:$dst),
9533                 (ins _.FRC:$src1, _.ScalarMemOp:$src2, i32u8imm:$src3),
9534                 OpcodeStr#"\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}",
9535                 []>, Sched<[sched.Folded, sched.ReadAfterFold]>, SIMD_EXC;
9536  }
9537  }
9538
9539  let Predicates = [HasAVX512] in {
9540    def : Pat<(X86any_VRndScale _.FRC:$src1, timm:$src2),
9541              (_.EltVT (!cast<Instruction>(NAME#r) (_.EltVT (IMPLICIT_DEF)),
9542               _.FRC:$src1, timm:$src2))>;
9543  }
9544
9545  let Predicates = [HasAVX512, OptForSize] in {
9546    def : Pat<(X86any_VRndScale (_.ScalarLdFrag addr:$src1), timm:$src2),
9547              (_.EltVT (!cast<Instruction>(NAME#m) (_.EltVT (IMPLICIT_DEF)),
9548               addr:$src1, timm:$src2))>;
9549  }
9550}
9551
9552let Predicates = [HasFP16] in
9553defm VRNDSCALESHZ : avx512_rndscale_scalar<0x0A, "vrndscalesh",
9554                                           SchedWriteFRnd.Scl, f16x_info>,
9555                                           AVX512PSIi8Base, TA, EVEX, VVVV,
9556                                           EVEX_CD8<16, CD8VT1>;
9557
9558defm VRNDSCALESSZ : avx512_rndscale_scalar<0x0A, "vrndscaless",
9559                                           SchedWriteFRnd.Scl, f32x_info>,
9560                                           AVX512AIi8Base, EVEX, VVVV, VEX_LIG,
9561                                           EVEX_CD8<32, CD8VT1>;
9562
9563defm VRNDSCALESDZ : avx512_rndscale_scalar<0x0B, "vrndscalesd",
9564                                           SchedWriteFRnd.Scl, f64x_info>,
9565                                           REX_W, AVX512AIi8Base, EVEX, VVVV, VEX_LIG,
9566                                           EVEX_CD8<64, CD8VT1>;
9567
9568multiclass avx512_masked_scalar<SDNode OpNode, string OpcPrefix, SDNode Move,
9569                                dag Mask, X86VectorVTInfo _, PatLeaf ZeroFP,
9570                                dag OutMask, Predicate BasePredicate> {
9571  let Predicates = [BasePredicate] in {
9572    def : Pat<(Move _.VT:$src1, (scalar_to_vector (X86selects_mask Mask,
9573               (OpNode (extractelt _.VT:$src2, (iPTR 0))),
9574               (extractelt _.VT:$dst, (iPTR 0))))),
9575              (!cast<Instruction>("V"#OpcPrefix#r_Intk)
9576               _.VT:$dst, OutMask, _.VT:$src2, _.VT:$src1)>;
9577
9578    def : Pat<(Move _.VT:$src1, (scalar_to_vector (X86selects_mask Mask,
9579               (OpNode (extractelt _.VT:$src2, (iPTR 0))),
9580               ZeroFP))),
9581              (!cast<Instruction>("V"#OpcPrefix#r_Intkz)
9582               OutMask, _.VT:$src2, _.VT:$src1)>;
9583  }
9584}
9585
9586defm : avx512_masked_scalar<fsqrt, "SQRTSHZ", X86Movsh,
9587                            (v1i1 (scalar_to_vector (i8 (trunc (i32 GR32:$mask))))), v8f16x_info,
9588                            fp16imm0, (COPY_TO_REGCLASS  $mask, VK1WM), HasFP16>;
9589defm : avx512_masked_scalar<fsqrt, "SQRTSSZ", X86Movss,
9590                            (v1i1 (scalar_to_vector (i8 (trunc (i32 GR32:$mask))))), v4f32x_info,
9591                            fp32imm0, (COPY_TO_REGCLASS  $mask, VK1WM), HasAVX512>;
9592defm : avx512_masked_scalar<fsqrt, "SQRTSDZ", X86Movsd,
9593                            (v1i1 (scalar_to_vector (i8 (trunc (i32 GR32:$mask))))), v2f64x_info,
9594                            fp64imm0, (COPY_TO_REGCLASS  $mask, VK1WM), HasAVX512>;
9595
9596
9597//-------------------------------------------------
9598// Integer truncate and extend operations
9599//-------------------------------------------------
9600
9601multiclass avx512_trunc_common<bits<8> opc, string OpcodeStr, SDNode OpNode,
9602                              SDPatternOperator MaskNode,
9603                              X86FoldableSchedWrite sched, X86VectorVTInfo SrcInfo,
9604                              X86VectorVTInfo DestInfo, X86MemOperand x86memop> {
9605  let ExeDomain = DestInfo.ExeDomain in {
9606  def rr : AVX512XS8I<opc, MRMDestReg, (outs DestInfo.RC:$dst),
9607             (ins SrcInfo.RC:$src),
9608             OpcodeStr # "\t{$src, $dst|$dst, $src}",
9609             [(set DestInfo.RC:$dst,
9610                   (DestInfo.VT (OpNode (SrcInfo.VT SrcInfo.RC:$src))))]>,
9611             EVEX, Sched<[sched]>;
9612  let Constraints = "$src0 = $dst" in
9613  def rrk : AVX512XS8I<opc, MRMDestReg, (outs DestInfo.RC:$dst),
9614             (ins DestInfo.RC:$src0, SrcInfo.KRCWM:$mask, SrcInfo.RC:$src),
9615             OpcodeStr # "\t{$src, $dst {${mask}}|$dst {${mask}}, $src}",
9616             [(set DestInfo.RC:$dst,
9617                   (MaskNode (SrcInfo.VT SrcInfo.RC:$src),
9618                             (DestInfo.VT DestInfo.RC:$src0),
9619                             SrcInfo.KRCWM:$mask))]>,
9620             EVEX, EVEX_K, Sched<[sched]>;
9621  def rrkz : AVX512XS8I<opc, MRMDestReg, (outs DestInfo.RC:$dst),
9622             (ins SrcInfo.KRCWM:$mask, SrcInfo.RC:$src),
9623             OpcodeStr # "\t{$src, $dst {${mask}} {z}|$dst {${mask}} {z}, $src}",
9624             [(set DestInfo.RC:$dst,
9625                   (DestInfo.VT (MaskNode (SrcInfo.VT SrcInfo.RC:$src),
9626                             DestInfo.ImmAllZerosV, SrcInfo.KRCWM:$mask)))]>,
9627             EVEX, EVEX_KZ, Sched<[sched]>;
9628  }
9629
9630  let mayStore = 1, hasSideEffects = 0, ExeDomain = DestInfo.ExeDomain in {
9631    def mr : AVX512XS8I<opc, MRMDestMem, (outs),
9632               (ins x86memop:$dst, SrcInfo.RC:$src),
9633               OpcodeStr # "\t{$src, $dst|$dst, $src}", []>,
9634               EVEX, Sched<[sched.Folded]>;
9635
9636    def mrk : AVX512XS8I<opc, MRMDestMem, (outs),
9637               (ins x86memop:$dst, SrcInfo.KRCWM:$mask, SrcInfo.RC:$src),
9638               OpcodeStr # "\t{$src, $dst {${mask}}|$dst {${mask}}, $src}", []>,
9639               EVEX, EVEX_K, Sched<[sched.Folded]>;
9640  }//mayStore = 1, hasSideEffects = 0
9641}
9642
9643multiclass avx512_trunc_mr_lowering<X86VectorVTInfo SrcInfo,
9644                                    PatFrag truncFrag, PatFrag mtruncFrag,
9645                                    string Name> {
9646
9647  def : Pat<(truncFrag (SrcInfo.VT SrcInfo.RC:$src), addr:$dst),
9648            (!cast<Instruction>(Name#SrcInfo.ZSuffix#mr)
9649                                    addr:$dst, SrcInfo.RC:$src)>;
9650
9651  def : Pat<(mtruncFrag (SrcInfo.VT SrcInfo.RC:$src), addr:$dst,
9652                        SrcInfo.KRCWM:$mask),
9653            (!cast<Instruction>(Name#SrcInfo.ZSuffix#mrk)
9654                            addr:$dst, SrcInfo.KRCWM:$mask, SrcInfo.RC:$src)>;
9655}
9656
9657multiclass avx512_trunc<bits<8> opc, string OpcodeStr, SDNode OpNode128,
9658                        SDNode OpNode256, SDNode OpNode512,
9659                        SDPatternOperator MaskNode128,
9660                        SDPatternOperator MaskNode256,
9661                        SDPatternOperator MaskNode512,
9662                        X86SchedWriteWidths sched,
9663                        AVX512VLVectorVTInfo VTSrcInfo,
9664                        X86VectorVTInfo DestInfoZ128,
9665                        X86VectorVTInfo DestInfoZ256, X86VectorVTInfo DestInfoZ,
9666                        X86MemOperand x86memopZ128, X86MemOperand x86memopZ256,
9667                        X86MemOperand x86memopZ, PatFrag truncFrag,
9668                        PatFrag mtruncFrag, Predicate prd = HasAVX512>{
9669
9670  let Predicates = [HasVLX, prd] in {
9671    defm Z128:  avx512_trunc_common<opc, OpcodeStr, OpNode128, MaskNode128, sched.XMM,
9672                             VTSrcInfo.info128, DestInfoZ128, x86memopZ128>,
9673                avx512_trunc_mr_lowering<VTSrcInfo.info128, truncFrag,
9674                                         mtruncFrag, NAME>, EVEX_V128;
9675
9676    defm Z256:  avx512_trunc_common<opc, OpcodeStr, OpNode256, MaskNode256, sched.YMM,
9677                             VTSrcInfo.info256, DestInfoZ256, x86memopZ256>,
9678                avx512_trunc_mr_lowering<VTSrcInfo.info256, truncFrag,
9679                                         mtruncFrag, NAME>, EVEX_V256;
9680  }
9681  let Predicates = [prd] in
9682    defm Z:     avx512_trunc_common<opc, OpcodeStr, OpNode512, MaskNode512, sched.ZMM,
9683                             VTSrcInfo.info512, DestInfoZ, x86memopZ>,
9684                avx512_trunc_mr_lowering<VTSrcInfo.info512, truncFrag,
9685                                         mtruncFrag, NAME>, EVEX_V512;
9686}
9687
9688multiclass avx512_trunc_qb<bits<8> opc, string OpcodeStr,
9689                           X86SchedWriteWidths sched, PatFrag StoreNode,
9690                           PatFrag MaskedStoreNode, SDNode InVecNode,
9691                           SDPatternOperator InVecMaskNode> {
9692  defm NAME: avx512_trunc<opc, OpcodeStr, InVecNode, InVecNode, InVecNode,
9693                          InVecMaskNode, InVecMaskNode, InVecMaskNode, sched,
9694                          avx512vl_i64_info, v16i8x_info, v16i8x_info,
9695                          v16i8x_info, i16mem, i32mem, i64mem, StoreNode,
9696                          MaskedStoreNode>, EVEX_CD8<8, CD8VO>;
9697}
9698
9699multiclass avx512_trunc_qw<bits<8> opc, string OpcodeStr, SDNode OpNode,
9700                           SDPatternOperator MaskNode,
9701                           X86SchedWriteWidths sched, PatFrag StoreNode,
9702                           PatFrag MaskedStoreNode, SDNode InVecNode,
9703                           SDPatternOperator InVecMaskNode> {
9704  defm NAME: avx512_trunc<opc, OpcodeStr, InVecNode, InVecNode, OpNode,
9705                          InVecMaskNode, InVecMaskNode, MaskNode, sched,
9706                          avx512vl_i64_info, v8i16x_info, v8i16x_info,
9707                          v8i16x_info, i32mem, i64mem, i128mem, StoreNode,
9708                          MaskedStoreNode>, EVEX_CD8<16, CD8VQ>;
9709}
9710
9711multiclass avx512_trunc_qd<bits<8> opc, string OpcodeStr, SDNode OpNode,
9712                           SDPatternOperator MaskNode,
9713                           X86SchedWriteWidths sched, PatFrag StoreNode,
9714                           PatFrag MaskedStoreNode, SDNode InVecNode,
9715                           SDPatternOperator InVecMaskNode> {
9716  defm NAME: avx512_trunc<opc, OpcodeStr, InVecNode, OpNode, OpNode,
9717                          InVecMaskNode, MaskNode, MaskNode, sched,
9718                          avx512vl_i64_info, v4i32x_info, v4i32x_info,
9719                          v8i32x_info, i64mem, i128mem, i256mem, StoreNode,
9720                          MaskedStoreNode>, EVEX_CD8<32, CD8VH>;
9721}
9722
9723multiclass avx512_trunc_db<bits<8> opc, string OpcodeStr, SDNode OpNode,
9724                           SDPatternOperator MaskNode,
9725                           X86SchedWriteWidths sched, PatFrag StoreNode,
9726                           PatFrag MaskedStoreNode, SDNode InVecNode,
9727                           SDPatternOperator InVecMaskNode> {
9728  defm NAME: avx512_trunc<opc, OpcodeStr, InVecNode, InVecNode, OpNode,
9729                          InVecMaskNode, InVecMaskNode, MaskNode, sched,
9730                          avx512vl_i32_info, v16i8x_info, v16i8x_info,
9731                          v16i8x_info, i32mem, i64mem, i128mem, StoreNode,
9732                          MaskedStoreNode>, EVEX_CD8<8, CD8VQ>;
9733}
9734
9735multiclass avx512_trunc_dw<bits<8> opc, string OpcodeStr, SDNode OpNode,
9736                           SDPatternOperator MaskNode,
9737                           X86SchedWriteWidths sched, PatFrag StoreNode,
9738                           PatFrag MaskedStoreNode, SDNode InVecNode,
9739                           SDPatternOperator InVecMaskNode> {
9740  defm NAME: avx512_trunc<opc, OpcodeStr, InVecNode, OpNode, OpNode,
9741                          InVecMaskNode, MaskNode, MaskNode, sched,
9742                          avx512vl_i32_info, v8i16x_info, v8i16x_info,
9743                          v16i16x_info, i64mem, i128mem, i256mem, StoreNode,
9744                          MaskedStoreNode>, EVEX_CD8<16, CD8VH>;
9745}
9746
9747multiclass avx512_trunc_wb<bits<8> opc, string OpcodeStr, SDNode OpNode,
9748                           SDPatternOperator MaskNode,
9749                           X86SchedWriteWidths sched, PatFrag StoreNode,
9750                           PatFrag MaskedStoreNode, SDNode InVecNode,
9751                           SDPatternOperator InVecMaskNode> {
9752  defm NAME: avx512_trunc<opc, OpcodeStr, InVecNode, OpNode, OpNode,
9753                          InVecMaskNode, MaskNode, MaskNode, sched,
9754                          avx512vl_i16_info, v16i8x_info, v16i8x_info,
9755                          v32i8x_info, i64mem, i128mem, i256mem, StoreNode,
9756                          MaskedStoreNode, HasBWI>, EVEX_CD8<16, CD8VH>;
9757}
9758
9759defm VPMOVQB    : avx512_trunc_qb<0x32, "vpmovqb",
9760                                  SchedWriteVecTruncate, truncstorevi8,
9761                                  masked_truncstorevi8, X86vtrunc, X86vmtrunc>;
9762defm VPMOVSQB   : avx512_trunc_qb<0x22, "vpmovsqb",
9763                                  SchedWriteVecTruncate, truncstore_s_vi8,
9764                                  masked_truncstore_s_vi8, X86vtruncs,
9765                                  X86vmtruncs>;
9766defm VPMOVUSQB  : avx512_trunc_qb<0x12, "vpmovusqb",
9767                                  SchedWriteVecTruncate, truncstore_us_vi8,
9768                                  masked_truncstore_us_vi8, X86vtruncus, X86vmtruncus>;
9769
9770defm VPMOVQW    : avx512_trunc_qw<0x34, "vpmovqw", trunc, select_trunc,
9771                                  SchedWriteVecTruncate, truncstorevi16,
9772                                  masked_truncstorevi16, X86vtrunc, X86vmtrunc>;
9773defm VPMOVSQW   : avx512_trunc_qw<0x24, "vpmovsqw",  X86vtruncs, select_truncs,
9774                                  SchedWriteVecTruncate, truncstore_s_vi16,
9775                                  masked_truncstore_s_vi16, X86vtruncs,
9776                                  X86vmtruncs>;
9777defm VPMOVUSQW  : avx512_trunc_qw<0x14, "vpmovusqw", X86vtruncus,
9778                                  select_truncus, SchedWriteVecTruncate,
9779                                  truncstore_us_vi16, masked_truncstore_us_vi16,
9780                                  X86vtruncus, X86vmtruncus>;
9781
9782defm VPMOVQD    : avx512_trunc_qd<0x35, "vpmovqd", trunc, select_trunc,
9783                                  SchedWriteVecTruncate, truncstorevi32,
9784                                  masked_truncstorevi32, X86vtrunc, X86vmtrunc>;
9785defm VPMOVSQD   : avx512_trunc_qd<0x25, "vpmovsqd",  X86vtruncs, select_truncs,
9786                                  SchedWriteVecTruncate, truncstore_s_vi32,
9787                                  masked_truncstore_s_vi32, X86vtruncs,
9788                                  X86vmtruncs>;
9789defm VPMOVUSQD  : avx512_trunc_qd<0x15, "vpmovusqd", X86vtruncus,
9790                                  select_truncus, SchedWriteVecTruncate,
9791                                  truncstore_us_vi32, masked_truncstore_us_vi32,
9792                                  X86vtruncus, X86vmtruncus>;
9793
9794defm VPMOVDB    : avx512_trunc_db<0x31, "vpmovdb", trunc, select_trunc,
9795                                  SchedWriteVecTruncate, truncstorevi8,
9796                                  masked_truncstorevi8, X86vtrunc, X86vmtrunc>;
9797defm VPMOVSDB   : avx512_trunc_db<0x21, "vpmovsdb", X86vtruncs, select_truncs,
9798                                  SchedWriteVecTruncate, truncstore_s_vi8,
9799                                  masked_truncstore_s_vi8, X86vtruncs,
9800                                  X86vmtruncs>;
9801defm VPMOVUSDB  : avx512_trunc_db<0x11, "vpmovusdb",  X86vtruncus,
9802                                  select_truncus, SchedWriteVecTruncate,
9803                                  truncstore_us_vi8, masked_truncstore_us_vi8,
9804                                  X86vtruncus, X86vmtruncus>;
9805
9806defm VPMOVDW    : avx512_trunc_dw<0x33, "vpmovdw", trunc, select_trunc,
9807                                  SchedWriteVecTruncate, truncstorevi16,
9808                                  masked_truncstorevi16, X86vtrunc, X86vmtrunc>;
9809defm VPMOVSDW   : avx512_trunc_dw<0x23, "vpmovsdw", X86vtruncs, select_truncs,
9810                                  SchedWriteVecTruncate, truncstore_s_vi16,
9811                                  masked_truncstore_s_vi16, X86vtruncs,
9812                                  X86vmtruncs>;
9813defm VPMOVUSDW  : avx512_trunc_dw<0x13, "vpmovusdw", X86vtruncus,
9814                                  select_truncus, SchedWriteVecTruncate,
9815                                  truncstore_us_vi16, masked_truncstore_us_vi16,
9816                                  X86vtruncus, X86vmtruncus>;
9817
9818defm VPMOVWB    : avx512_trunc_wb<0x30, "vpmovwb", trunc, select_trunc,
9819                                  SchedWriteVecTruncate, truncstorevi8,
9820                                  masked_truncstorevi8, X86vtrunc,
9821                                  X86vmtrunc>;
9822defm VPMOVSWB   : avx512_trunc_wb<0x20, "vpmovswb", X86vtruncs, select_truncs,
9823                                  SchedWriteVecTruncate, truncstore_s_vi8,
9824                                  masked_truncstore_s_vi8, X86vtruncs,
9825                                  X86vmtruncs>;
9826defm VPMOVUSWB  : avx512_trunc_wb<0x10, "vpmovuswb", X86vtruncus,
9827                                  select_truncus, SchedWriteVecTruncate,
9828                                  truncstore_us_vi8, masked_truncstore_us_vi8,
9829                                  X86vtruncus, X86vmtruncus>;
9830
9831let Predicates = [HasAVX512, NoVLX, HasEVEX512] in {
9832def: Pat<(v8i16 (trunc (v8i32 VR256X:$src))),
9833         (v8i16 (EXTRACT_SUBREG
9834                 (v16i16 (VPMOVDWZrr (v16i32 (INSERT_SUBREG (IMPLICIT_DEF),
9835                                          VR256X:$src, sub_ymm)))), sub_xmm))>;
9836def: Pat<(v4i32 (trunc (v4i64 VR256X:$src))),
9837         (v4i32 (EXTRACT_SUBREG
9838                 (v8i32 (VPMOVQDZrr (v8i64 (INSERT_SUBREG (IMPLICIT_DEF),
9839                                           VR256X:$src, sub_ymm)))), sub_xmm))>;
9840}
9841
9842let Predicates = [HasBWI, NoVLX, HasEVEX512] in {
9843def: Pat<(v16i8 (trunc (v16i16 VR256X:$src))),
9844         (v16i8 (EXTRACT_SUBREG (VPMOVWBZrr (v32i16 (INSERT_SUBREG (IMPLICIT_DEF),
9845                                            VR256X:$src, sub_ymm))), sub_xmm))>;
9846}
9847
9848// Without BWI we can't use vXi16/vXi8 vselect so we have to use vmtrunc nodes.
9849multiclass mtrunc_lowering<string InstrName, SDNode OpNode,
9850                           X86VectorVTInfo DestInfo,
9851                           X86VectorVTInfo SrcInfo> {
9852  def : Pat<(DestInfo.VT (OpNode (SrcInfo.VT SrcInfo.RC:$src),
9853                                 DestInfo.RC:$src0,
9854                                 SrcInfo.KRCWM:$mask)),
9855            (!cast<Instruction>(InstrName#"rrk") DestInfo.RC:$src0,
9856                                                 SrcInfo.KRCWM:$mask,
9857                                                 SrcInfo.RC:$src)>;
9858
9859  def : Pat<(DestInfo.VT (OpNode (SrcInfo.VT SrcInfo.RC:$src),
9860                                 DestInfo.ImmAllZerosV,
9861                                 SrcInfo.KRCWM:$mask)),
9862            (!cast<Instruction>(InstrName#"rrkz") SrcInfo.KRCWM:$mask,
9863                                                  SrcInfo.RC:$src)>;
9864}
9865
9866let Predicates = [HasVLX] in {
9867defm : mtrunc_lowering<"VPMOVDWZ256", X86vmtrunc, v8i16x_info, v8i32x_info>;
9868defm : mtrunc_lowering<"VPMOVSDWZ256", X86vmtruncs, v8i16x_info, v8i32x_info>;
9869defm : mtrunc_lowering<"VPMOVUSDWZ256", X86vmtruncus, v8i16x_info, v8i32x_info>;
9870}
9871
9872let Predicates = [HasAVX512] in {
9873defm : mtrunc_lowering<"VPMOVDWZ", X86vmtrunc, v16i16x_info, v16i32_info>;
9874defm : mtrunc_lowering<"VPMOVSDWZ", X86vmtruncs, v16i16x_info, v16i32_info>;
9875defm : mtrunc_lowering<"VPMOVUSDWZ", X86vmtruncus, v16i16x_info, v16i32_info>;
9876
9877defm : mtrunc_lowering<"VPMOVDBZ", X86vmtrunc, v16i8x_info, v16i32_info>;
9878defm : mtrunc_lowering<"VPMOVSDBZ", X86vmtruncs, v16i8x_info, v16i32_info>;
9879defm : mtrunc_lowering<"VPMOVUSDBZ", X86vmtruncus, v16i8x_info, v16i32_info>;
9880
9881defm : mtrunc_lowering<"VPMOVQWZ", X86vmtrunc, v8i16x_info, v8i64_info>;
9882defm : mtrunc_lowering<"VPMOVSQWZ", X86vmtruncs, v8i16x_info, v8i64_info>;
9883defm : mtrunc_lowering<"VPMOVUSQWZ", X86vmtruncus, v8i16x_info, v8i64_info>;
9884}
9885
9886multiclass avx512_pmovx_common<bits<8> opc, string OpcodeStr, X86FoldableSchedWrite sched,
9887              X86VectorVTInfo DestInfo, X86VectorVTInfo SrcInfo,
9888              X86MemOperand x86memop, PatFrag LdFrag, SDNode OpNode>{
9889  let ExeDomain = DestInfo.ExeDomain in {
9890  defm rr   : AVX512_maskable<opc, MRMSrcReg, DestInfo, (outs DestInfo.RC:$dst),
9891                    (ins SrcInfo.RC:$src), OpcodeStr ,"$src", "$src",
9892                    (DestInfo.VT (OpNode (SrcInfo.VT SrcInfo.RC:$src)))>,
9893                  EVEX, Sched<[sched]>;
9894
9895  defm rm : AVX512_maskable<opc, MRMSrcMem, DestInfo, (outs DestInfo.RC:$dst),
9896                  (ins x86memop:$src), OpcodeStr ,"$src", "$src",
9897                  (DestInfo.VT (LdFrag addr:$src))>,
9898                EVEX, Sched<[sched.Folded]>;
9899  }
9900}
9901
9902multiclass avx512_pmovx_bw<bits<8> opc, string OpcodeStr,
9903          SDNode OpNode, SDNode InVecNode, string ExtTy,
9904          X86SchedWriteWidths sched,
9905          PatFrag LdFrag = !cast<PatFrag>(ExtTy#"extloadvi8")> {
9906  let Predicates = [HasVLX, HasBWI] in {
9907    defm Z128:  avx512_pmovx_common<opc, OpcodeStr, sched.XMM, v8i16x_info,
9908                    v16i8x_info, i64mem, LdFrag, InVecNode>,
9909                     EVEX_CD8<8, CD8VH>, T8, PD, EVEX_V128, WIG;
9910
9911    defm Z256:  avx512_pmovx_common<opc, OpcodeStr, sched.YMM, v16i16x_info,
9912                    v16i8x_info, i128mem, LdFrag, OpNode>,
9913                     EVEX_CD8<8, CD8VH>, T8, PD, EVEX_V256, WIG;
9914  }
9915  let Predicates = [HasBWI] in {
9916    defm Z   :  avx512_pmovx_common<opc, OpcodeStr, sched.ZMM, v32i16_info,
9917                    v32i8x_info, i256mem, LdFrag, OpNode>,
9918                     EVEX_CD8<8, CD8VH>, T8, PD, EVEX_V512, WIG;
9919  }
9920}
9921
9922multiclass avx512_pmovx_bd<bits<8> opc, string OpcodeStr,
9923          SDNode OpNode, SDNode InVecNode, string ExtTy,
9924          X86SchedWriteWidths sched,
9925          PatFrag LdFrag = !cast<PatFrag>(ExtTy#"extloadvi8")> {
9926  let Predicates = [HasVLX, HasAVX512] in {
9927    defm Z128:  avx512_pmovx_common<opc, OpcodeStr, sched.XMM, v4i32x_info,
9928                   v16i8x_info, i32mem, LdFrag, InVecNode>,
9929                         EVEX_CD8<8, CD8VQ>, T8, PD, EVEX_V128, WIG;
9930
9931    defm Z256:  avx512_pmovx_common<opc, OpcodeStr, sched.YMM, v8i32x_info,
9932                   v16i8x_info, i64mem, LdFrag, InVecNode>,
9933                         EVEX_CD8<8, CD8VQ>, T8, PD, EVEX_V256, WIG;
9934  }
9935  let Predicates = [HasAVX512] in {
9936    defm Z   :  avx512_pmovx_common<opc, OpcodeStr, sched.ZMM, v16i32_info,
9937                   v16i8x_info, i128mem, LdFrag, OpNode>,
9938                         EVEX_CD8<8, CD8VQ>, T8, PD, EVEX_V512, WIG;
9939  }
9940}
9941
9942multiclass avx512_pmovx_bq<bits<8> opc, string OpcodeStr,
9943                              SDNode InVecNode, string ExtTy,
9944                              X86SchedWriteWidths sched,
9945                              PatFrag LdFrag = !cast<PatFrag>(ExtTy#"extloadvi8")> {
9946  let Predicates = [HasVLX, HasAVX512] in {
9947    defm Z128:  avx512_pmovx_common<opc, OpcodeStr, sched.XMM, v2i64x_info,
9948                   v16i8x_info, i16mem, LdFrag, InVecNode>,
9949                     EVEX_CD8<8, CD8VO>, T8, PD, EVEX_V128, WIG;
9950
9951    defm Z256:  avx512_pmovx_common<opc, OpcodeStr, sched.YMM, v4i64x_info,
9952                   v16i8x_info, i32mem, LdFrag, InVecNode>,
9953                     EVEX_CD8<8, CD8VO>, T8, PD, EVEX_V256, WIG;
9954  }
9955  let Predicates = [HasAVX512] in {
9956    defm Z   :  avx512_pmovx_common<opc, OpcodeStr, sched.ZMM, v8i64_info,
9957                   v16i8x_info, i64mem, LdFrag, InVecNode>,
9958                     EVEX_CD8<8, CD8VO>, T8, PD, EVEX_V512, WIG;
9959  }
9960}
9961
9962multiclass avx512_pmovx_wd<bits<8> opc, string OpcodeStr,
9963         SDNode OpNode, SDNode InVecNode, string ExtTy,
9964         X86SchedWriteWidths sched,
9965         PatFrag LdFrag = !cast<PatFrag>(ExtTy#"extloadvi16")> {
9966  let Predicates = [HasVLX, HasAVX512] in {
9967    defm Z128:  avx512_pmovx_common<opc, OpcodeStr, sched.XMM, v4i32x_info,
9968                   v8i16x_info, i64mem, LdFrag, InVecNode>,
9969                     EVEX_CD8<16, CD8VH>, T8, PD, EVEX_V128, WIG;
9970
9971    defm Z256:  avx512_pmovx_common<opc, OpcodeStr, sched.YMM, v8i32x_info,
9972                   v8i16x_info, i128mem, LdFrag, OpNode>,
9973                     EVEX_CD8<16, CD8VH>, T8, PD, EVEX_V256, WIG;
9974  }
9975  let Predicates = [HasAVX512] in {
9976    defm Z   :  avx512_pmovx_common<opc, OpcodeStr, sched.ZMM, v16i32_info,
9977                   v16i16x_info, i256mem, LdFrag, OpNode>,
9978                     EVEX_CD8<16, CD8VH>, T8, PD, EVEX_V512, WIG;
9979  }
9980}
9981
9982multiclass avx512_pmovx_wq<bits<8> opc, string OpcodeStr,
9983         SDNode OpNode, SDNode InVecNode, string ExtTy,
9984         X86SchedWriteWidths sched,
9985         PatFrag LdFrag = !cast<PatFrag>(ExtTy#"extloadvi16")> {
9986  let Predicates = [HasVLX, HasAVX512] in {
9987    defm Z128:  avx512_pmovx_common<opc, OpcodeStr, sched.XMM, v2i64x_info,
9988                   v8i16x_info, i32mem, LdFrag, InVecNode>,
9989                     EVEX_CD8<16, CD8VQ>, T8, PD, EVEX_V128, WIG;
9990
9991    defm Z256:  avx512_pmovx_common<opc, OpcodeStr, sched.YMM, v4i64x_info,
9992                   v8i16x_info, i64mem, LdFrag, InVecNode>,
9993                     EVEX_CD8<16, CD8VQ>, T8, PD, EVEX_V256, WIG;
9994  }
9995  let Predicates = [HasAVX512] in {
9996    defm Z   :  avx512_pmovx_common<opc, OpcodeStr, sched.ZMM, v8i64_info,
9997                   v8i16x_info, i128mem, LdFrag, OpNode>,
9998                     EVEX_CD8<16, CD8VQ>, T8, PD, EVEX_V512, WIG;
9999  }
10000}
10001
10002multiclass avx512_pmovx_dq<bits<8> opc, string OpcodeStr,
10003         SDNode OpNode, SDNode InVecNode, string ExtTy,
10004         X86SchedWriteWidths sched,
10005         PatFrag LdFrag = !cast<PatFrag>(ExtTy#"extloadvi32")> {
10006
10007  let Predicates = [HasVLX, HasAVX512] in {
10008    defm Z128:  avx512_pmovx_common<opc, OpcodeStr, sched.XMM, v2i64x_info,
10009                   v4i32x_info, i64mem, LdFrag, InVecNode>,
10010                     EVEX_CD8<32, CD8VH>, T8, PD, EVEX_V128;
10011
10012    defm Z256:  avx512_pmovx_common<opc, OpcodeStr, sched.YMM, v4i64x_info,
10013                   v4i32x_info, i128mem, LdFrag, OpNode>,
10014                     EVEX_CD8<32, CD8VH>, T8, PD, EVEX_V256;
10015  }
10016  let Predicates = [HasAVX512] in {
10017    defm Z   :  avx512_pmovx_common<opc, OpcodeStr, sched.ZMM, v8i64_info,
10018                   v8i32x_info, i256mem, LdFrag, OpNode>,
10019                     EVEX_CD8<32, CD8VH>, T8, PD, EVEX_V512;
10020  }
10021}
10022
10023defm VPMOVZXBW : avx512_pmovx_bw<0x30, "vpmovzxbw", zext, zext_invec, "z", SchedWriteVecExtend>;
10024defm VPMOVZXBD : avx512_pmovx_bd<0x31, "vpmovzxbd", zext, zext_invec, "z", SchedWriteVecExtend>;
10025defm VPMOVZXBQ : avx512_pmovx_bq<0x32, "vpmovzxbq",       zext_invec, "z", SchedWriteVecExtend>;
10026defm VPMOVZXWD : avx512_pmovx_wd<0x33, "vpmovzxwd", zext, zext_invec, "z", SchedWriteVecExtend>;
10027defm VPMOVZXWQ : avx512_pmovx_wq<0x34, "vpmovzxwq", zext, zext_invec, "z", SchedWriteVecExtend>;
10028defm VPMOVZXDQ : avx512_pmovx_dq<0x35, "vpmovzxdq", zext, zext_invec, "z", SchedWriteVecExtend>;
10029
10030defm VPMOVSXBW: avx512_pmovx_bw<0x20, "vpmovsxbw", sext, sext_invec, "s", SchedWriteVecExtend>;
10031defm VPMOVSXBD: avx512_pmovx_bd<0x21, "vpmovsxbd", sext, sext_invec, "s", SchedWriteVecExtend>;
10032defm VPMOVSXBQ: avx512_pmovx_bq<0x22, "vpmovsxbq",       sext_invec, "s", SchedWriteVecExtend>;
10033defm VPMOVSXWD: avx512_pmovx_wd<0x23, "vpmovsxwd", sext, sext_invec, "s", SchedWriteVecExtend>;
10034defm VPMOVSXWQ: avx512_pmovx_wq<0x24, "vpmovsxwq", sext, sext_invec, "s", SchedWriteVecExtend>;
10035defm VPMOVSXDQ: avx512_pmovx_dq<0x25, "vpmovsxdq", sext, sext_invec, "s", SchedWriteVecExtend>;
10036
10037
10038// Patterns that we also need any extend versions of. aext_vector_inreg
10039// is currently legalized to zext_vector_inreg.
10040multiclass AVX512_pmovx_patterns_base<string OpcPrefix, SDNode ExtOp> {
10041  // 256-bit patterns
10042  let Predicates = [HasVLX, HasBWI] in {
10043    def : Pat<(v16i16 (ExtOp (loadv16i8 addr:$src))),
10044              (!cast<I>(OpcPrefix#BWZ256rm) addr:$src)>;
10045  }
10046
10047  let Predicates = [HasVLX] in {
10048    def : Pat<(v8i32 (ExtOp (loadv8i16 addr:$src))),
10049              (!cast<I>(OpcPrefix#WDZ256rm) addr:$src)>;
10050
10051    def : Pat<(v4i64 (ExtOp (loadv4i32 addr:$src))),
10052              (!cast<I>(OpcPrefix#DQZ256rm) addr:$src)>;
10053  }
10054
10055  // 512-bit patterns
10056  let Predicates = [HasBWI] in {
10057    def : Pat<(v32i16 (ExtOp (loadv32i8 addr:$src))),
10058              (!cast<I>(OpcPrefix#BWZrm) addr:$src)>;
10059  }
10060  let Predicates = [HasAVX512] in {
10061    def : Pat<(v16i32 (ExtOp (loadv16i8 addr:$src))),
10062              (!cast<I>(OpcPrefix#BDZrm) addr:$src)>;
10063    def : Pat<(v16i32 (ExtOp (loadv16i16 addr:$src))),
10064              (!cast<I>(OpcPrefix#WDZrm) addr:$src)>;
10065
10066    def : Pat<(v8i64 (ExtOp (loadv8i16 addr:$src))),
10067              (!cast<I>(OpcPrefix#WQZrm) addr:$src)>;
10068
10069    def : Pat<(v8i64 (ExtOp (loadv8i32 addr:$src))),
10070              (!cast<I>(OpcPrefix#DQZrm) addr:$src)>;
10071  }
10072}
10073
10074multiclass AVX512_pmovx_patterns<string OpcPrefix, SDNode ExtOp,
10075                                 SDNode InVecOp> :
10076    AVX512_pmovx_patterns_base<OpcPrefix, ExtOp> {
10077  // 128-bit patterns
10078  let Predicates = [HasVLX, HasBWI] in {
10079  def : Pat<(v8i16 (InVecOp (bc_v16i8 (v2i64 (scalar_to_vector (loadi64 addr:$src)))))),
10080            (!cast<I>(OpcPrefix#BWZ128rm) addr:$src)>;
10081  def : Pat<(v8i16 (InVecOp (bc_v16i8 (v2f64 (scalar_to_vector (loadf64 addr:$src)))))),
10082            (!cast<I>(OpcPrefix#BWZ128rm) addr:$src)>;
10083  def : Pat<(v8i16 (InVecOp (bc_v16i8 (v2i64 (X86vzload64 addr:$src))))),
10084            (!cast<I>(OpcPrefix#BWZ128rm) addr:$src)>;
10085  }
10086  let Predicates = [HasVLX] in {
10087  def : Pat<(v4i32 (InVecOp (bc_v16i8 (v4i32 (scalar_to_vector (loadi32 addr:$src)))))),
10088            (!cast<I>(OpcPrefix#BDZ128rm) addr:$src)>;
10089  def : Pat<(v4i32 (InVecOp (bc_v16i8 (v4i32 (X86vzload32 addr:$src))))),
10090            (!cast<I>(OpcPrefix#BDZ128rm) addr:$src)>;
10091
10092  def : Pat<(v2i64 (InVecOp (bc_v16i8 (v4i32 (scalar_to_vector (extloadi32i16 addr:$src)))))),
10093            (!cast<I>(OpcPrefix#BQZ128rm) addr:$src)>;
10094
10095  def : Pat<(v4i32 (InVecOp (bc_v8i16 (v2i64 (scalar_to_vector (loadi64 addr:$src)))))),
10096            (!cast<I>(OpcPrefix#WDZ128rm) addr:$src)>;
10097  def : Pat<(v4i32 (InVecOp (bc_v8i16 (v2f64 (scalar_to_vector (loadf64 addr:$src)))))),
10098            (!cast<I>(OpcPrefix#WDZ128rm) addr:$src)>;
10099  def : Pat<(v4i32 (InVecOp (bc_v8i16 (v2i64 (X86vzload64 addr:$src))))),
10100            (!cast<I>(OpcPrefix#WDZ128rm) addr:$src)>;
10101
10102  def : Pat<(v2i64 (InVecOp (bc_v8i16 (v4i32 (scalar_to_vector (loadi32 addr:$src)))))),
10103            (!cast<I>(OpcPrefix#WQZ128rm) addr:$src)>;
10104  def : Pat<(v2i64 (InVecOp (bc_v8i16 (v4i32 (X86vzload32 addr:$src))))),
10105            (!cast<I>(OpcPrefix#WQZ128rm) addr:$src)>;
10106
10107  def : Pat<(v2i64 (InVecOp (bc_v4i32 (v2i64 (scalar_to_vector (loadi64 addr:$src)))))),
10108            (!cast<I>(OpcPrefix#DQZ128rm) addr:$src)>;
10109  def : Pat<(v2i64 (InVecOp (bc_v4i32 (v2f64 (scalar_to_vector (loadf64 addr:$src)))))),
10110            (!cast<I>(OpcPrefix#DQZ128rm) addr:$src)>;
10111  def : Pat<(v2i64 (InVecOp (bc_v4i32 (v2i64 (X86vzload64 addr:$src))))),
10112            (!cast<I>(OpcPrefix#DQZ128rm) addr:$src)>;
10113  }
10114  let Predicates = [HasVLX] in {
10115  def : Pat<(v8i32 (InVecOp (bc_v16i8 (v2i64 (scalar_to_vector (loadi64 addr:$src)))))),
10116            (!cast<I>(OpcPrefix#BDZ256rm) addr:$src)>;
10117  def : Pat<(v8i32 (InVecOp (bc_v16i8 (v2i64 (scalar_to_vector (loadf64 addr:$src)))))),
10118            (!cast<I>(OpcPrefix#BDZ256rm) addr:$src)>;
10119  def : Pat<(v8i32 (InVecOp (bc_v16i8 (v2i64 (X86vzload64 addr:$src))))),
10120            (!cast<I>(OpcPrefix#BDZ256rm) addr:$src)>;
10121
10122  def : Pat<(v4i64 (InVecOp (bc_v16i8 (v4i32 (scalar_to_vector (loadi32 addr:$src)))))),
10123            (!cast<I>(OpcPrefix#BQZ256rm) addr:$src)>;
10124  def : Pat<(v4i64 (InVecOp (bc_v16i8 (v4i32 (X86vzload32 addr:$src))))),
10125            (!cast<I>(OpcPrefix#BQZ256rm) addr:$src)>;
10126
10127  def : Pat<(v4i64 (InVecOp (bc_v8i16 (v2i64 (scalar_to_vector (loadi64 addr:$src)))))),
10128            (!cast<I>(OpcPrefix#WQZ256rm) addr:$src)>;
10129  def : Pat<(v4i64 (InVecOp (bc_v8i16 (v2i64 (scalar_to_vector (loadf64 addr:$src)))))),
10130            (!cast<I>(OpcPrefix#WQZ256rm) addr:$src)>;
10131  def : Pat<(v4i64 (InVecOp (bc_v8i16 (v2i64 (X86vzload64 addr:$src))))),
10132            (!cast<I>(OpcPrefix#WQZ256rm) addr:$src)>;
10133  }
10134  // 512-bit patterns
10135  let Predicates = [HasAVX512] in {
10136  def : Pat<(v8i64 (InVecOp (bc_v16i8 (v2i64 (scalar_to_vector (loadi64 addr:$src)))))),
10137            (!cast<I>(OpcPrefix#BQZrm) addr:$src)>;
10138  def : Pat<(v8i64 (InVecOp (bc_v16i8 (v2f64 (scalar_to_vector (loadf64 addr:$src)))))),
10139            (!cast<I>(OpcPrefix#BQZrm) addr:$src)>;
10140  def : Pat<(v8i64 (InVecOp (bc_v16i8 (v2i64 (X86vzload64 addr:$src))))),
10141            (!cast<I>(OpcPrefix#BQZrm) addr:$src)>;
10142  }
10143}
10144
10145defm : AVX512_pmovx_patterns<"VPMOVSX", sext, sext_invec>;
10146defm : AVX512_pmovx_patterns<"VPMOVZX", zext, zext_invec>;
10147
10148// Without BWI we can't do a trunc from v16i16 to v16i8. DAG combine can merge
10149// ext+trunc aggressively making it impossible to legalize the DAG to this
10150// pattern directly.
10151let Predicates = [HasAVX512, NoBWI] in {
10152def: Pat<(v16i8 (trunc (v16i16 VR256X:$src))),
10153         (VPMOVDBZrr (v16i32 (VPMOVZXWDZrr VR256X:$src)))>;
10154def: Pat<(v16i8 (trunc (loadv16i16 addr:$src))),
10155         (VPMOVDBZrr (v16i32 (VPMOVZXWDZrm addr:$src)))>;
10156}
10157
10158//===----------------------------------------------------------------------===//
10159// GATHER - SCATTER Operations
10160
10161// FIXME: Improve scheduling of gather/scatter instructions.
10162multiclass avx512_gather<bits<8> opc, string OpcodeStr, X86VectorVTInfo _,
10163                         X86MemOperand memop, RegisterClass MaskRC = _.KRCWM> {
10164  let Constraints = "@earlyclobber $dst, $src1 = $dst, $mask = $mask_wb",
10165      ExeDomain = _.ExeDomain, mayLoad = 1, hasSideEffects = 0 in
10166  def rm  : AVX5128I<opc, MRMSrcMem, (outs _.RC:$dst, MaskRC:$mask_wb),
10167            (ins _.RC:$src1, MaskRC:$mask, memop:$src2),
10168            !strconcat(OpcodeStr#_.Suffix,
10169            "\t{$src2, ${dst} {${mask}}|${dst} {${mask}}, $src2}"),
10170            []>, EVEX, EVEX_K, EVEX_CD8<_.EltSize, CD8VT1>,
10171            Sched<[WriteLoad, WriteVecMaskedGatherWriteback]>;
10172}
10173
10174multiclass avx512_gather_q_pd<bits<8> dopc, bits<8> qopc,
10175                        AVX512VLVectorVTInfo _, string OpcodeStr, string SUFF> {
10176  defm NAME#D#SUFF#Z: avx512_gather<dopc, OpcodeStr#"d", _.info512,
10177                                      vy512xmem>, EVEX_V512, REX_W;
10178  defm NAME#Q#SUFF#Z: avx512_gather<qopc, OpcodeStr#"q", _.info512,
10179                                      vz512mem>, EVEX_V512, REX_W;
10180let Predicates = [HasVLX] in {
10181  defm NAME#D#SUFF#Z256: avx512_gather<dopc, OpcodeStr#"d", _.info256,
10182                              vx256xmem>, EVEX_V256, REX_W;
10183  defm NAME#Q#SUFF#Z256: avx512_gather<qopc, OpcodeStr#"q", _.info256,
10184                              vy256xmem>, EVEX_V256, REX_W;
10185  defm NAME#D#SUFF#Z128: avx512_gather<dopc, OpcodeStr#"d", _.info128,
10186                              vx128xmem>, EVEX_V128, REX_W;
10187  defm NAME#Q#SUFF#Z128: avx512_gather<qopc, OpcodeStr#"q", _.info128,
10188                              vx128xmem>, EVEX_V128, REX_W;
10189}
10190}
10191
10192multiclass avx512_gather_d_ps<bits<8> dopc, bits<8> qopc,
10193                       AVX512VLVectorVTInfo _, string OpcodeStr, string SUFF> {
10194  defm NAME#D#SUFF#Z: avx512_gather<dopc, OpcodeStr#"d", _.info512, vz512mem>,
10195                                       EVEX_V512;
10196  defm NAME#Q#SUFF#Z: avx512_gather<qopc, OpcodeStr#"q", _.info256, vz256mem>,
10197                                       EVEX_V512;
10198let Predicates = [HasVLX] in {
10199  defm NAME#D#SUFF#Z256: avx512_gather<dopc, OpcodeStr#"d", _.info256,
10200                                          vy256xmem>, EVEX_V256;
10201  defm NAME#Q#SUFF#Z256: avx512_gather<qopc, OpcodeStr#"q", _.info128,
10202                                          vy128xmem>, EVEX_V256;
10203  defm NAME#D#SUFF#Z128: avx512_gather<dopc, OpcodeStr#"d", _.info128,
10204                                          vx128xmem>, EVEX_V128;
10205  defm NAME#Q#SUFF#Z128: avx512_gather<qopc, OpcodeStr#"q", _.info128,
10206                                          vx64xmem, VK2WM>, EVEX_V128;
10207}
10208}
10209
10210
10211defm VGATHER : avx512_gather_q_pd<0x92, 0x93, avx512vl_f64_info, "vgather", "PD">,
10212               avx512_gather_d_ps<0x92, 0x93, avx512vl_f32_info, "vgather", "PS">;
10213
10214defm VPGATHER : avx512_gather_q_pd<0x90, 0x91, avx512vl_i64_info, "vpgather", "Q">,
10215                avx512_gather_d_ps<0x90, 0x91, avx512vl_i32_info, "vpgather", "D">;
10216
10217multiclass avx512_scatter<bits<8> opc, string OpcodeStr, X86VectorVTInfo _,
10218                          X86MemOperand memop, RegisterClass MaskRC = _.KRCWM> {
10219
10220let mayStore = 1, Constraints = "$mask = $mask_wb", ExeDomain = _.ExeDomain,
10221    hasSideEffects = 0 in
10222
10223  def mr  : AVX5128I<opc, MRMDestMem, (outs MaskRC:$mask_wb),
10224            (ins memop:$dst, MaskRC:$mask, _.RC:$src),
10225            !strconcat(OpcodeStr#_.Suffix,
10226            "\t{$src, ${dst} {${mask}}|${dst} {${mask}}, $src}"),
10227            []>, EVEX, EVEX_K, EVEX_CD8<_.EltSize, CD8VT1>,
10228            Sched<[WriteStore]>;
10229}
10230
10231multiclass avx512_scatter_q_pd<bits<8> dopc, bits<8> qopc,
10232                        AVX512VLVectorVTInfo _, string OpcodeStr, string SUFF> {
10233  defm NAME#D#SUFF#Z: avx512_scatter<dopc, OpcodeStr#"d", _.info512,
10234                                      vy512xmem>, EVEX_V512, REX_W;
10235  defm NAME#Q#SUFF#Z: avx512_scatter<qopc, OpcodeStr#"q", _.info512,
10236                                      vz512mem>, EVEX_V512, REX_W;
10237let Predicates = [HasVLX] in {
10238  defm NAME#D#SUFF#Z256: avx512_scatter<dopc, OpcodeStr#"d", _.info256,
10239                              vx256xmem>, EVEX_V256, REX_W;
10240  defm NAME#Q#SUFF#Z256: avx512_scatter<qopc, OpcodeStr#"q", _.info256,
10241                              vy256xmem>, EVEX_V256, REX_W;
10242  defm NAME#D#SUFF#Z128: avx512_scatter<dopc, OpcodeStr#"d", _.info128,
10243                              vx128xmem>, EVEX_V128, REX_W;
10244  defm NAME#Q#SUFF#Z128: avx512_scatter<qopc, OpcodeStr#"q", _.info128,
10245                              vx128xmem>, EVEX_V128, REX_W;
10246}
10247}
10248
10249multiclass avx512_scatter_d_ps<bits<8> dopc, bits<8> qopc,
10250                       AVX512VLVectorVTInfo _, string OpcodeStr, string SUFF> {
10251  defm NAME#D#SUFF#Z: avx512_scatter<dopc, OpcodeStr#"d", _.info512, vz512mem>,
10252                                       EVEX_V512;
10253  defm NAME#Q#SUFF#Z: avx512_scatter<qopc, OpcodeStr#"q", _.info256, vz256mem>,
10254                                       EVEX_V512;
10255let Predicates = [HasVLX] in {
10256  defm NAME#D#SUFF#Z256: avx512_scatter<dopc, OpcodeStr#"d", _.info256,
10257                                          vy256xmem>, EVEX_V256;
10258  defm NAME#Q#SUFF#Z256: avx512_scatter<qopc, OpcodeStr#"q", _.info128,
10259                                          vy128xmem>, EVEX_V256;
10260  defm NAME#D#SUFF#Z128: avx512_scatter<dopc, OpcodeStr#"d", _.info128,
10261                                          vx128xmem>, EVEX_V128;
10262  defm NAME#Q#SUFF#Z128: avx512_scatter<qopc, OpcodeStr#"q", _.info128,
10263                                          vx64xmem, VK2WM>, EVEX_V128;
10264}
10265}
10266
10267defm VSCATTER : avx512_scatter_q_pd<0xA2, 0xA3, avx512vl_f64_info, "vscatter", "PD">,
10268               avx512_scatter_d_ps<0xA2, 0xA3, avx512vl_f32_info, "vscatter", "PS">;
10269
10270defm VPSCATTER : avx512_scatter_q_pd<0xA0, 0xA1, avx512vl_i64_info, "vpscatter", "Q">,
10271                avx512_scatter_d_ps<0xA0, 0xA1, avx512vl_i32_info, "vpscatter", "D">;
10272
10273// prefetch
10274multiclass avx512_gather_scatter_prefetch<bits<8> opc, Format F, string OpcodeStr,
10275                       RegisterClass KRC, X86MemOperand memop> {
10276  let Predicates = [HasPFI], mayLoad = 1, mayStore = 1 in
10277  def m  : AVX5128I<opc, F, (outs), (ins KRC:$mask, memop:$src),
10278            !strconcat(OpcodeStr, "\t{$src {${mask}}|{${mask}}, $src}"), []>,
10279            EVEX, EVEX_K, Sched<[WriteLoad]>;
10280}
10281
10282defm VGATHERPF0DPS: avx512_gather_scatter_prefetch<0xC6, MRM1m, "vgatherpf0dps",
10283                     VK16WM, vz512mem>, EVEX_V512, EVEX_CD8<32, CD8VT1>;
10284
10285defm VGATHERPF0QPS: avx512_gather_scatter_prefetch<0xC7, MRM1m, "vgatherpf0qps",
10286                     VK8WM, vz256mem>, EVEX_V512, EVEX_CD8<64, CD8VT1>;
10287
10288defm VGATHERPF0DPD: avx512_gather_scatter_prefetch<0xC6, MRM1m, "vgatherpf0dpd",
10289                     VK8WM, vy512xmem>, EVEX_V512, REX_W, EVEX_CD8<32, CD8VT1>;
10290
10291defm VGATHERPF0QPD: avx512_gather_scatter_prefetch<0xC7, MRM1m, "vgatherpf0qpd",
10292                     VK8WM, vz512mem>, EVEX_V512, REX_W, EVEX_CD8<64, CD8VT1>;
10293
10294defm VGATHERPF1DPS: avx512_gather_scatter_prefetch<0xC6, MRM2m, "vgatherpf1dps",
10295                     VK16WM, vz512mem>, EVEX_V512, EVEX_CD8<32, CD8VT1>;
10296
10297defm VGATHERPF1QPS: avx512_gather_scatter_prefetch<0xC7, MRM2m, "vgatherpf1qps",
10298                     VK8WM, vz256mem>, EVEX_V512, EVEX_CD8<64, CD8VT1>;
10299
10300defm VGATHERPF1DPD: avx512_gather_scatter_prefetch<0xC6, MRM2m, "vgatherpf1dpd",
10301                     VK8WM, vy512xmem>, EVEX_V512, REX_W, EVEX_CD8<32, CD8VT1>;
10302
10303defm VGATHERPF1QPD: avx512_gather_scatter_prefetch<0xC7, MRM2m, "vgatherpf1qpd",
10304                     VK8WM, vz512mem>, EVEX_V512, REX_W, EVEX_CD8<64, CD8VT1>;
10305
10306defm VSCATTERPF0DPS: avx512_gather_scatter_prefetch<0xC6, MRM5m, "vscatterpf0dps",
10307                     VK16WM, vz512mem>, EVEX_V512, EVEX_CD8<32, CD8VT1>;
10308
10309defm VSCATTERPF0QPS: avx512_gather_scatter_prefetch<0xC7, MRM5m, "vscatterpf0qps",
10310                     VK8WM, vz256mem>, EVEX_V512, EVEX_CD8<64, CD8VT1>;
10311
10312defm VSCATTERPF0DPD: avx512_gather_scatter_prefetch<0xC6, MRM5m, "vscatterpf0dpd",
10313                     VK8WM, vy512xmem>, EVEX_V512, REX_W, EVEX_CD8<32, CD8VT1>;
10314
10315defm VSCATTERPF0QPD: avx512_gather_scatter_prefetch<0xC7, MRM5m, "vscatterpf0qpd",
10316                     VK8WM, vz512mem>, EVEX_V512, REX_W, EVEX_CD8<64, CD8VT1>;
10317
10318defm VSCATTERPF1DPS: avx512_gather_scatter_prefetch<0xC6, MRM6m, "vscatterpf1dps",
10319                     VK16WM, vz512mem>, EVEX_V512, EVEX_CD8<32, CD8VT1>;
10320
10321defm VSCATTERPF1QPS: avx512_gather_scatter_prefetch<0xC7, MRM6m, "vscatterpf1qps",
10322                     VK8WM, vz256mem>, EVEX_V512, EVEX_CD8<64, CD8VT1>;
10323
10324defm VSCATTERPF1DPD: avx512_gather_scatter_prefetch<0xC6, MRM6m, "vscatterpf1dpd",
10325                     VK8WM, vy512xmem>, EVEX_V512, REX_W, EVEX_CD8<32, CD8VT1>;
10326
10327defm VSCATTERPF1QPD: avx512_gather_scatter_prefetch<0xC7, MRM6m, "vscatterpf1qpd",
10328                     VK8WM, vz512mem>, EVEX_V512, REX_W, EVEX_CD8<64, CD8VT1>;
10329
10330multiclass cvt_by_vec_width<bits<8> opc, X86VectorVTInfo Vec, string OpcodeStr, SchedWrite Sched> {
10331def rr : AVX512XS8I<opc, MRMSrcReg, (outs Vec.RC:$dst), (ins Vec.KRC:$src),
10332                  !strconcat(OpcodeStr#Vec.Suffix, "\t{$src, $dst|$dst, $src}"),
10333                  [(set Vec.RC:$dst, (Vec.VT (sext Vec.KRC:$src)))]>,
10334                  EVEX, Sched<[Sched]>;
10335}
10336
10337multiclass cvt_mask_by_elt_width<bits<8> opc, AVX512VLVectorVTInfo VTInfo,
10338                                 string OpcodeStr, Predicate prd> {
10339let Predicates = [prd] in
10340  defm Z : cvt_by_vec_width<opc, VTInfo.info512, OpcodeStr, WriteVecMoveZ>, EVEX_V512;
10341
10342  let Predicates = [prd, HasVLX] in {
10343    defm Z256 : cvt_by_vec_width<opc, VTInfo.info256, OpcodeStr, WriteVecMoveY>, EVEX_V256;
10344    defm Z128 : cvt_by_vec_width<opc, VTInfo.info128, OpcodeStr, WriteVecMoveX>, EVEX_V128;
10345  }
10346}
10347
10348defm VPMOVM2B : cvt_mask_by_elt_width<0x28, avx512vl_i8_info, "vpmovm2" , HasBWI>;
10349defm VPMOVM2W : cvt_mask_by_elt_width<0x28, avx512vl_i16_info, "vpmovm2", HasBWI> , REX_W;
10350defm VPMOVM2D : cvt_mask_by_elt_width<0x38, avx512vl_i32_info, "vpmovm2", HasDQI>;
10351defm VPMOVM2Q : cvt_mask_by_elt_width<0x38, avx512vl_i64_info, "vpmovm2", HasDQI> , REX_W;
10352
10353multiclass convert_vector_to_mask_common<bits<8> opc, X86VectorVTInfo _, string OpcodeStr > {
10354    def rr : AVX512XS8I<opc, MRMSrcReg, (outs _.KRC:$dst), (ins _.RC:$src),
10355                        !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
10356                        [(set _.KRC:$dst, (X86pcmpgtm _.ImmAllZerosV, (_.VT _.RC:$src)))]>,
10357                        EVEX, Sched<[WriteMove]>;
10358}
10359
10360// Use 512bit version to implement 128/256 bit in case NoVLX.
10361multiclass convert_vector_to_mask_lowering<X86VectorVTInfo ExtendInfo,
10362                                           X86VectorVTInfo _,
10363                                           string Name> {
10364
10365  def : Pat<(_.KVT (X86pcmpgtm _.ImmAllZerosV, (_.VT _.RC:$src))),
10366            (_.KVT (COPY_TO_REGCLASS
10367                     (!cast<Instruction>(Name#"Zrr")
10368                       (INSERT_SUBREG (ExtendInfo.VT (IMPLICIT_DEF)),
10369                                      _.RC:$src, _.SubRegIdx)),
10370                   _.KRC))>;
10371}
10372
10373multiclass avx512_convert_vector_to_mask<bits<8> opc, string OpcodeStr,
10374                                   AVX512VLVectorVTInfo VTInfo, Predicate prd> {
10375  let Predicates = [prd] in
10376    defm Z : convert_vector_to_mask_common <opc, VTInfo.info512, OpcodeStr>,
10377                                            EVEX_V512;
10378
10379  let Predicates = [prd, HasVLX] in {
10380    defm Z256 : convert_vector_to_mask_common<opc, VTInfo.info256, OpcodeStr>,
10381                                              EVEX_V256;
10382    defm Z128 : convert_vector_to_mask_common<opc, VTInfo.info128, OpcodeStr>,
10383                                               EVEX_V128;
10384  }
10385  let Predicates = [prd, NoVLX, HasEVEX512] in {
10386    defm Z256_Alt : convert_vector_to_mask_lowering<VTInfo.info512, VTInfo.info256, NAME>;
10387    defm Z128_Alt : convert_vector_to_mask_lowering<VTInfo.info512, VTInfo.info128, NAME>;
10388  }
10389}
10390
10391defm VPMOVB2M : avx512_convert_vector_to_mask<0x29, "vpmovb2m",
10392                                              avx512vl_i8_info, HasBWI>;
10393defm VPMOVW2M : avx512_convert_vector_to_mask<0x29, "vpmovw2m",
10394                                              avx512vl_i16_info, HasBWI>, REX_W;
10395defm VPMOVD2M : avx512_convert_vector_to_mask<0x39, "vpmovd2m",
10396                                              avx512vl_i32_info, HasDQI>;
10397defm VPMOVQ2M : avx512_convert_vector_to_mask<0x39, "vpmovq2m",
10398                                              avx512vl_i64_info, HasDQI>, REX_W;
10399
10400// Patterns for handling sext from a mask register to v16i8/v16i16 when DQI
10401// is available, but BWI is not. We can't handle this in lowering because
10402// a target independent DAG combine likes to combine sext and trunc.
10403let Predicates = [HasDQI, NoBWI] in {
10404  def : Pat<(v16i8 (sext (v16i1 VK16:$src))),
10405            (VPMOVDBZrr (v16i32 (VPMOVM2DZrr VK16:$src)))>;
10406  def : Pat<(v16i16 (sext (v16i1 VK16:$src))),
10407            (VPMOVDWZrr (v16i32 (VPMOVM2DZrr VK16:$src)))>;
10408}
10409
10410let Predicates = [HasDQI, NoBWI, HasVLX] in {
10411  def : Pat<(v8i16 (sext (v8i1 VK8:$src))),
10412            (VPMOVDWZ256rr (v8i32 (VPMOVM2DZ256rr VK8:$src)))>;
10413}
10414
10415//===----------------------------------------------------------------------===//
10416// AVX-512 - COMPRESS and EXPAND
10417//
10418
10419multiclass compress_by_vec_width_common<bits<8> opc, X86VectorVTInfo _,
10420                                 string OpcodeStr, X86FoldableSchedWrite sched> {
10421  defm rr : AVX512_maskable<opc, MRMDestReg, _, (outs _.RC:$dst),
10422              (ins _.RC:$src1), OpcodeStr, "$src1", "$src1",
10423              (null_frag)>, AVX5128IBase,
10424              Sched<[sched]>;
10425
10426  let mayStore = 1, hasSideEffects = 0 in
10427  def mr : AVX5128I<opc, MRMDestMem, (outs),
10428              (ins _.MemOp:$dst, _.RC:$src),
10429              OpcodeStr # "\t{$src, $dst|$dst, $src}",
10430              []>, EVEX_CD8<_.EltSize, CD8VT1>,
10431              Sched<[sched.Folded]>;
10432
10433  def mrk : AVX5128I<opc, MRMDestMem, (outs),
10434              (ins _.MemOp:$dst, _.KRCWM:$mask, _.RC:$src),
10435              OpcodeStr # "\t{$src, $dst {${mask}}|$dst {${mask}}, $src}",
10436              []>,
10437              EVEX_K, EVEX_CD8<_.EltSize, CD8VT1>,
10438              Sched<[sched.Folded]>;
10439}
10440
10441multiclass compress_by_vec_width_lowering<X86VectorVTInfo _, string Name> {
10442  def : Pat<(X86mCompressingStore (_.VT _.RC:$src), addr:$dst, _.KRCWM:$mask),
10443            (!cast<Instruction>(Name#_.ZSuffix#mrk)
10444                            addr:$dst, _.KRCWM:$mask, _.RC:$src)>;
10445
10446  def : Pat<(X86compress (_.VT _.RC:$src), _.RC:$src0, _.KRCWM:$mask),
10447            (!cast<Instruction>(Name#_.ZSuffix#rrk)
10448                            _.RC:$src0, _.KRCWM:$mask, _.RC:$src)>;
10449  def : Pat<(X86compress (_.VT _.RC:$src), _.ImmAllZerosV, _.KRCWM:$mask),
10450            (!cast<Instruction>(Name#_.ZSuffix#rrkz)
10451                            _.KRCWM:$mask, _.RC:$src)>;
10452}
10453
10454multiclass compress_by_elt_width<bits<8> opc, string OpcodeStr,
10455                                 X86FoldableSchedWrite sched,
10456                                 AVX512VLVectorVTInfo VTInfo,
10457                                 Predicate Pred = HasAVX512> {
10458  let Predicates = [Pred] in
10459  defm Z : compress_by_vec_width_common<opc, VTInfo.info512, OpcodeStr, sched>,
10460           compress_by_vec_width_lowering<VTInfo.info512, NAME>, EVEX_V512;
10461
10462  let Predicates = [Pred, HasVLX] in {
10463    defm Z256 : compress_by_vec_width_common<opc, VTInfo.info256, OpcodeStr, sched>,
10464                compress_by_vec_width_lowering<VTInfo.info256, NAME>, EVEX_V256;
10465    defm Z128 : compress_by_vec_width_common<opc, VTInfo.info128, OpcodeStr, sched>,
10466                compress_by_vec_width_lowering<VTInfo.info128, NAME>, EVEX_V128;
10467  }
10468}
10469
10470// FIXME: Is there a better scheduler class for VPCOMPRESS?
10471defm VPCOMPRESSD : compress_by_elt_width <0x8B, "vpcompressd", WriteVarShuffle256,
10472                                          avx512vl_i32_info>, EVEX;
10473defm VPCOMPRESSQ : compress_by_elt_width <0x8B, "vpcompressq", WriteVarShuffle256,
10474                                          avx512vl_i64_info>, EVEX, REX_W;
10475defm VCOMPRESSPS : compress_by_elt_width <0x8A, "vcompressps", WriteVarShuffle256,
10476                                          avx512vl_f32_info>, EVEX;
10477defm VCOMPRESSPD : compress_by_elt_width <0x8A, "vcompresspd", WriteVarShuffle256,
10478                                          avx512vl_f64_info>, EVEX, REX_W;
10479
10480// expand
10481multiclass expand_by_vec_width<bits<8> opc, X86VectorVTInfo _,
10482                                 string OpcodeStr, X86FoldableSchedWrite sched> {
10483  defm rr : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
10484              (ins _.RC:$src1), OpcodeStr, "$src1", "$src1",
10485              (null_frag)>, AVX5128IBase,
10486              Sched<[sched]>;
10487
10488  defm rm : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
10489              (ins _.MemOp:$src1), OpcodeStr, "$src1", "$src1",
10490              (null_frag)>,
10491            AVX5128IBase, EVEX_CD8<_.EltSize, CD8VT1>,
10492            Sched<[sched.Folded, sched.ReadAfterFold]>;
10493}
10494
10495multiclass expand_by_vec_width_lowering<X86VectorVTInfo _, string Name> {
10496
10497  def : Pat<(_.VT (X86mExpandingLoad addr:$src, _.KRCWM:$mask, undef)),
10498            (!cast<Instruction>(Name#_.ZSuffix#rmkz)
10499                                        _.KRCWM:$mask, addr:$src)>;
10500
10501  def : Pat<(_.VT (X86mExpandingLoad addr:$src, _.KRCWM:$mask, _.ImmAllZerosV)),
10502            (!cast<Instruction>(Name#_.ZSuffix#rmkz)
10503                                        _.KRCWM:$mask, addr:$src)>;
10504
10505  def : Pat<(_.VT (X86mExpandingLoad addr:$src, _.KRCWM:$mask,
10506                                               (_.VT _.RC:$src0))),
10507            (!cast<Instruction>(Name#_.ZSuffix#rmk)
10508                            _.RC:$src0, _.KRCWM:$mask, addr:$src)>;
10509
10510  def : Pat<(X86expand (_.VT _.RC:$src), _.RC:$src0, _.KRCWM:$mask),
10511            (!cast<Instruction>(Name#_.ZSuffix#rrk)
10512                            _.RC:$src0, _.KRCWM:$mask, _.RC:$src)>;
10513  def : Pat<(X86expand (_.VT _.RC:$src), _.ImmAllZerosV, _.KRCWM:$mask),
10514            (!cast<Instruction>(Name#_.ZSuffix#rrkz)
10515                            _.KRCWM:$mask, _.RC:$src)>;
10516}
10517
10518multiclass expand_by_elt_width<bits<8> opc, string OpcodeStr,
10519                               X86FoldableSchedWrite sched,
10520                               AVX512VLVectorVTInfo VTInfo,
10521                               Predicate Pred = HasAVX512> {
10522  let Predicates = [Pred] in
10523  defm Z : expand_by_vec_width<opc, VTInfo.info512, OpcodeStr, sched>,
10524           expand_by_vec_width_lowering<VTInfo.info512, NAME>, EVEX_V512;
10525
10526  let Predicates = [Pred, HasVLX] in {
10527    defm Z256 : expand_by_vec_width<opc, VTInfo.info256, OpcodeStr, sched>,
10528                expand_by_vec_width_lowering<VTInfo.info256, NAME>, EVEX_V256;
10529    defm Z128 : expand_by_vec_width<opc, VTInfo.info128, OpcodeStr, sched>,
10530                expand_by_vec_width_lowering<VTInfo.info128, NAME>, EVEX_V128;
10531  }
10532}
10533
10534// FIXME: Is there a better scheduler class for VPEXPAND?
10535defm VPEXPANDD : expand_by_elt_width <0x89, "vpexpandd", WriteVarShuffle256,
10536                                      avx512vl_i32_info>, EVEX;
10537defm VPEXPANDQ : expand_by_elt_width <0x89, "vpexpandq", WriteVarShuffle256,
10538                                      avx512vl_i64_info>, EVEX, REX_W;
10539defm VEXPANDPS : expand_by_elt_width <0x88, "vexpandps", WriteVarShuffle256,
10540                                      avx512vl_f32_info>, EVEX;
10541defm VEXPANDPD : expand_by_elt_width <0x88, "vexpandpd", WriteVarShuffle256,
10542                                      avx512vl_f64_info>, EVEX, REX_W;
10543
10544//handle instruction  reg_vec1 = op(reg_vec,imm)
10545//                               op(mem_vec,imm)
10546//                               op(broadcast(eltVt),imm)
10547//all instruction created with FROUND_CURRENT
10548multiclass avx512_unary_fp_packed_imm<bits<8> opc, string OpcodeStr,
10549                                      SDPatternOperator OpNode,
10550                                      SDPatternOperator MaskOpNode,
10551                                      X86FoldableSchedWrite sched,
10552                                      X86VectorVTInfo _> {
10553  let ExeDomain = _.ExeDomain, Uses = [MXCSR], mayRaiseFPException = 1 in {
10554  defm rri : AVX512_maskable_split<opc, MRMSrcReg, _, (outs _.RC:$dst),
10555                      (ins _.RC:$src1, i32u8imm:$src2),
10556                      OpcodeStr#_.Suffix, "$src2, $src1", "$src1, $src2",
10557                      (OpNode (_.VT _.RC:$src1), (i32 timm:$src2)),
10558                      (MaskOpNode (_.VT _.RC:$src1), (i32 timm:$src2))>,
10559                      Sched<[sched]>;
10560  defm rmi : AVX512_maskable_split<opc, MRMSrcMem, _, (outs _.RC:$dst),
10561                    (ins _.MemOp:$src1, i32u8imm:$src2),
10562                    OpcodeStr#_.Suffix, "$src2, $src1", "$src1, $src2",
10563                    (OpNode (_.VT (bitconvert (_.LdFrag addr:$src1))),
10564                            (i32 timm:$src2)),
10565                    (MaskOpNode (_.VT (bitconvert (_.LdFrag addr:$src1))),
10566                                (i32 timm:$src2))>,
10567                    Sched<[sched.Folded, sched.ReadAfterFold]>;
10568  defm rmbi : AVX512_maskable_split<opc, MRMSrcMem, _, (outs _.RC:$dst),
10569                    (ins _.ScalarMemOp:$src1, i32u8imm:$src2),
10570                    OpcodeStr#_.Suffix, "$src2, ${src1}"#_.BroadcastStr,
10571                    "${src1}"#_.BroadcastStr#", $src2",
10572                    (OpNode (_.VT (_.BroadcastLdFrag addr:$src1)),
10573                            (i32 timm:$src2)),
10574                    (MaskOpNode (_.VT (_.BroadcastLdFrag addr:$src1)),
10575                                (i32 timm:$src2))>, EVEX_B,
10576                    Sched<[sched.Folded, sched.ReadAfterFold]>;
10577  }
10578}
10579
10580//handle instruction  reg_vec1 = op(reg_vec2,reg_vec3,imm),{sae}
10581multiclass avx512_unary_fp_sae_packed_imm<bits<8> opc, string OpcodeStr,
10582                                          SDNode OpNode, X86FoldableSchedWrite sched,
10583                                          X86VectorVTInfo _> {
10584  let ExeDomain = _.ExeDomain, Uses = [MXCSR] in
10585  defm rrib : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
10586                      (ins _.RC:$src1, i32u8imm:$src2),
10587                      OpcodeStr#_.Suffix, "$src2, {sae}, $src1",
10588                      "$src1, {sae}, $src2",
10589                      (OpNode (_.VT _.RC:$src1),
10590                              (i32 timm:$src2))>,
10591                      EVEX_B, Sched<[sched]>;
10592}
10593
10594multiclass avx512_common_unary_fp_sae_packed_imm<string OpcodeStr,
10595            AVX512VLVectorVTInfo _, bits<8> opc, SDPatternOperator OpNode,
10596            SDPatternOperator MaskOpNode, SDNode OpNodeSAE, X86SchedWriteWidths sched,
10597            Predicate prd>{
10598  let Predicates = [prd] in {
10599    defm Z    : avx512_unary_fp_packed_imm<opc, OpcodeStr, OpNode, MaskOpNode,
10600                                           sched.ZMM, _.info512>,
10601                avx512_unary_fp_sae_packed_imm<opc, OpcodeStr, OpNodeSAE,
10602                                               sched.ZMM, _.info512>, EVEX_V512;
10603  }
10604  let Predicates = [prd, HasVLX] in {
10605    defm Z128 : avx512_unary_fp_packed_imm<opc, OpcodeStr, OpNode, MaskOpNode,
10606                                           sched.XMM, _.info128>, EVEX_V128;
10607    defm Z256 : avx512_unary_fp_packed_imm<opc, OpcodeStr, OpNode, MaskOpNode,
10608                                           sched.YMM, _.info256>, EVEX_V256;
10609  }
10610}
10611
10612//handle instruction  reg_vec1 = op(reg_vec2,reg_vec3,imm)
10613//                               op(reg_vec2,mem_vec,imm)
10614//                               op(reg_vec2,broadcast(eltVt),imm)
10615//all instruction created with FROUND_CURRENT
10616multiclass avx512_fp_packed_imm<bits<8> opc, string OpcodeStr, SDNode OpNode,
10617                                X86FoldableSchedWrite sched, X86VectorVTInfo _>{
10618  let ExeDomain = _.ExeDomain, Uses = [MXCSR], mayRaiseFPException = 1 in {
10619  defm rri : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
10620                      (ins _.RC:$src1, _.RC:$src2, i32u8imm:$src3),
10621                      OpcodeStr, "$src3, $src2, $src1", "$src1, $src2, $src3",
10622                      (OpNode (_.VT _.RC:$src1),
10623                              (_.VT _.RC:$src2),
10624                              (i32 timm:$src3))>,
10625                      Sched<[sched]>;
10626  defm rmi : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
10627                    (ins _.RC:$src1, _.MemOp:$src2, i32u8imm:$src3),
10628                    OpcodeStr, "$src3, $src2, $src1", "$src1, $src2, $src3",
10629                    (OpNode (_.VT _.RC:$src1),
10630                            (_.VT (bitconvert (_.LdFrag addr:$src2))),
10631                            (i32 timm:$src3))>,
10632                    Sched<[sched.Folded, sched.ReadAfterFold]>;
10633  defm rmbi : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
10634                    (ins _.RC:$src1, _.ScalarMemOp:$src2, i32u8imm:$src3),
10635                    OpcodeStr, "$src3, ${src2}"#_.BroadcastStr#", $src1",
10636                    "$src1, ${src2}"#_.BroadcastStr#", $src3",
10637                    (OpNode (_.VT _.RC:$src1),
10638                            (_.VT (_.BroadcastLdFrag addr:$src2)),
10639                            (i32 timm:$src3))>, EVEX_B,
10640                    Sched<[sched.Folded, sched.ReadAfterFold]>;
10641  }
10642}
10643
10644//handle instruction  reg_vec1 = op(reg_vec2,reg_vec3,imm)
10645//                               op(reg_vec2,mem_vec,imm)
10646multiclass avx512_3Op_rm_imm8<bits<8> opc, string OpcodeStr, SDNode OpNode,
10647                              X86FoldableSchedWrite sched, X86VectorVTInfo DestInfo,
10648                              X86VectorVTInfo SrcInfo>{
10649  let ExeDomain = DestInfo.ExeDomain, ImmT = Imm8 in {
10650  defm rri : AVX512_maskable<opc, MRMSrcReg, DestInfo, (outs DestInfo.RC:$dst),
10651                  (ins SrcInfo.RC:$src1, SrcInfo.RC:$src2, u8imm:$src3),
10652                  OpcodeStr, "$src3, $src2, $src1", "$src1, $src2, $src3",
10653                  (DestInfo.VT (OpNode (SrcInfo.VT SrcInfo.RC:$src1),
10654                               (SrcInfo.VT SrcInfo.RC:$src2),
10655                               (i8 timm:$src3)))>,
10656                  Sched<[sched]>;
10657  defm rmi : AVX512_maskable<opc, MRMSrcMem, DestInfo, (outs DestInfo.RC:$dst),
10658                (ins SrcInfo.RC:$src1, SrcInfo.MemOp:$src2, u8imm:$src3),
10659                OpcodeStr, "$src3, $src2, $src1", "$src1, $src2, $src3",
10660                (DestInfo.VT (OpNode (SrcInfo.VT SrcInfo.RC:$src1),
10661                             (SrcInfo.VT (bitconvert
10662                                                (SrcInfo.LdFrag addr:$src2))),
10663                             (i8 timm:$src3)))>,
10664                Sched<[sched.Folded, sched.ReadAfterFold]>;
10665  }
10666}
10667
10668//handle instruction  reg_vec1 = op(reg_vec2,reg_vec3,imm)
10669//                               op(reg_vec2,mem_vec,imm)
10670//                               op(reg_vec2,broadcast(eltVt),imm)
10671multiclass avx512_3Op_imm8<bits<8> opc, string OpcodeStr, SDNode OpNode,
10672                           X86FoldableSchedWrite sched, X86VectorVTInfo _>:
10673  avx512_3Op_rm_imm8<opc, OpcodeStr, OpNode, sched, _, _>{
10674
10675  let ExeDomain = _.ExeDomain, ImmT = Imm8 in
10676  defm rmbi : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
10677                    (ins _.RC:$src1, _.ScalarMemOp:$src2, u8imm:$src3),
10678                    OpcodeStr, "$src3, ${src2}"#_.BroadcastStr#", $src1",
10679                    "$src1, ${src2}"#_.BroadcastStr#", $src3",
10680                    (OpNode (_.VT _.RC:$src1),
10681                            (_.VT (_.BroadcastLdFrag addr:$src2)),
10682                            (i8 timm:$src3))>, EVEX_B,
10683                    Sched<[sched.Folded, sched.ReadAfterFold]>;
10684}
10685
10686//handle scalar instruction  reg_vec1 = op(reg_vec2,reg_vec3,imm)
10687//                                      op(reg_vec2,mem_scalar,imm)
10688multiclass avx512_fp_scalar_imm<bits<8> opc, string OpcodeStr, SDNode OpNode,
10689                                X86FoldableSchedWrite sched, X86VectorVTInfo _> {
10690  let ExeDomain = _.ExeDomain, Uses = [MXCSR], mayRaiseFPException = 1 in {
10691  defm rri : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
10692                      (ins _.RC:$src1, _.RC:$src2, i32u8imm:$src3),
10693                      OpcodeStr, "$src3, $src2, $src1", "$src1, $src2, $src3",
10694                      (OpNode (_.VT _.RC:$src1),
10695                              (_.VT _.RC:$src2),
10696                              (i32 timm:$src3))>,
10697                      Sched<[sched]>;
10698  defm rmi : AVX512_maskable_scalar<opc, MRMSrcMem, _, (outs _.RC:$dst),
10699                    (ins _.RC:$src1, _.IntScalarMemOp:$src2, i32u8imm:$src3),
10700                    OpcodeStr, "$src3, $src2, $src1", "$src1, $src2, $src3",
10701                    (OpNode (_.VT _.RC:$src1),
10702                            (_.ScalarIntMemFrags addr:$src2),
10703                            (i32 timm:$src3))>,
10704                    Sched<[sched.Folded, sched.ReadAfterFold]>;
10705  }
10706}
10707
10708//handle instruction  reg_vec1 = op(reg_vec2,reg_vec3,imm),{sae}
10709multiclass avx512_fp_sae_packed_imm<bits<8> opc, string OpcodeStr,
10710                                    SDNode OpNode, X86FoldableSchedWrite sched,
10711                                    X86VectorVTInfo _> {
10712  let ExeDomain = _.ExeDomain, Uses = [MXCSR] in
10713  defm rrib : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
10714                      (ins _.RC:$src1, _.RC:$src2, i32u8imm:$src3),
10715                      OpcodeStr, "$src3, {sae}, $src2, $src1",
10716                      "$src1, $src2, {sae}, $src3",
10717                      (OpNode (_.VT _.RC:$src1),
10718                              (_.VT _.RC:$src2),
10719                              (i32 timm:$src3))>,
10720                      EVEX_B, Sched<[sched]>;
10721}
10722
10723//handle scalar instruction  reg_vec1 = op(reg_vec2,reg_vec3,imm),{sae}
10724multiclass avx512_fp_sae_scalar_imm<bits<8> opc, string OpcodeStr, SDNode OpNode,
10725                                    X86FoldableSchedWrite sched, X86VectorVTInfo _> {
10726  let ExeDomain = _.ExeDomain, Uses = [MXCSR] in
10727  defm NAME#rrib : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
10728                      (ins _.RC:$src1, _.RC:$src2, i32u8imm:$src3),
10729                      OpcodeStr, "$src3, {sae}, $src2, $src1",
10730                      "$src1, $src2, {sae}, $src3",
10731                      (OpNode (_.VT _.RC:$src1),
10732                              (_.VT _.RC:$src2),
10733                              (i32 timm:$src3))>,
10734                      EVEX_B, Sched<[sched]>;
10735}
10736
10737multiclass avx512_common_fp_sae_packed_imm<string OpcodeStr,
10738            AVX512VLVectorVTInfo _, bits<8> opc, SDNode OpNode,
10739            SDNode OpNodeSAE, X86SchedWriteWidths sched, Predicate prd>{
10740  let Predicates = [prd] in {
10741    defm Z    : avx512_fp_packed_imm<opc, OpcodeStr, OpNode, sched.ZMM, _.info512>,
10742                avx512_fp_sae_packed_imm<opc, OpcodeStr, OpNodeSAE, sched.ZMM, _.info512>,
10743                                  EVEX_V512;
10744
10745  }
10746  let Predicates = [prd, HasVLX] in {
10747    defm Z128 : avx512_fp_packed_imm<opc, OpcodeStr, OpNode, sched.XMM, _.info128>,
10748                                  EVEX_V128;
10749    defm Z256 : avx512_fp_packed_imm<opc, OpcodeStr, OpNode, sched.YMM, _.info256>,
10750                                  EVEX_V256;
10751  }
10752}
10753
10754multiclass avx512_common_3Op_rm_imm8<bits<8> opc, SDNode OpNode, string OpStr,
10755                   X86SchedWriteWidths sched, AVX512VLVectorVTInfo DestInfo,
10756                   AVX512VLVectorVTInfo SrcInfo, Predicate Pred = HasBWI> {
10757  let Predicates = [Pred] in {
10758    defm Z    : avx512_3Op_rm_imm8<opc, OpStr, OpNode, sched.ZMM, DestInfo.info512,
10759                           SrcInfo.info512>, EVEX_V512, AVX512AIi8Base, EVEX, VVVV;
10760  }
10761  let Predicates = [Pred, HasVLX] in {
10762    defm Z128 : avx512_3Op_rm_imm8<opc, OpStr, OpNode, sched.XMM, DestInfo.info128,
10763                           SrcInfo.info128>, EVEX_V128, AVX512AIi8Base, EVEX, VVVV;
10764    defm Z256 : avx512_3Op_rm_imm8<opc, OpStr, OpNode, sched.YMM, DestInfo.info256,
10765                           SrcInfo.info256>, EVEX_V256, AVX512AIi8Base, EVEX, VVVV;
10766  }
10767}
10768
10769multiclass avx512_common_3Op_imm8<string OpcodeStr, AVX512VLVectorVTInfo _,
10770                                  bits<8> opc, SDNode OpNode, X86SchedWriteWidths sched,
10771                                  Predicate Pred = HasAVX512> {
10772  let Predicates = [Pred] in {
10773    defm Z    : avx512_3Op_imm8<opc, OpcodeStr, OpNode, sched.ZMM, _.info512>,
10774                                EVEX_V512;
10775  }
10776  let Predicates = [Pred, HasVLX] in {
10777    defm Z128 : avx512_3Op_imm8<opc, OpcodeStr, OpNode, sched.XMM, _.info128>,
10778                                EVEX_V128;
10779    defm Z256 : avx512_3Op_imm8<opc, OpcodeStr, OpNode, sched.YMM, _.info256>,
10780                                EVEX_V256;
10781  }
10782}
10783
10784multiclass avx512_common_fp_sae_scalar_imm<string OpcodeStr,
10785                  X86VectorVTInfo _, bits<8> opc, SDNode OpNode,
10786                  SDNode OpNodeSAE, X86SchedWriteWidths sched, Predicate prd> {
10787  let Predicates = [prd] in {
10788     defm Z : avx512_fp_scalar_imm<opc, OpcodeStr, OpNode, sched.XMM, _>,
10789              avx512_fp_sae_scalar_imm<opc, OpcodeStr, OpNodeSAE, sched.XMM, _>;
10790  }
10791}
10792
10793multiclass avx512_common_unary_fp_sae_packed_imm_all<string OpcodeStr,
10794                    bits<8> opcPs, bits<8> opcPd, SDPatternOperator OpNode,
10795                    SDPatternOperator MaskOpNode, SDNode OpNodeSAE,
10796                    X86SchedWriteWidths sched, Predicate prd>{
10797  defm PH : avx512_common_unary_fp_sae_packed_imm<OpcodeStr, avx512vl_f16_info,
10798                            opcPs, OpNode, MaskOpNode, OpNodeSAE, sched, HasFP16>,
10799                            AVX512PSIi8Base, TA, EVEX, EVEX_CD8<16, CD8VF>;
10800  defm PS : avx512_common_unary_fp_sae_packed_imm<OpcodeStr, avx512vl_f32_info,
10801                            opcPs, OpNode, MaskOpNode, OpNodeSAE, sched, prd>,
10802                            AVX512AIi8Base, EVEX, EVEX_CD8<32, CD8VF>;
10803  defm PD : avx512_common_unary_fp_sae_packed_imm<OpcodeStr, avx512vl_f64_info,
10804                            opcPd, OpNode, MaskOpNode, OpNodeSAE, sched, prd>,
10805                            AVX512AIi8Base, EVEX, EVEX_CD8<64, CD8VF>, REX_W;
10806}
10807
10808defm VREDUCE   : avx512_common_unary_fp_sae_packed_imm_all<"vreduce", 0x56, 0x56,
10809                              X86VReduce, X86VReduce, X86VReduceSAE,
10810                              SchedWriteFRnd, HasDQI>;
10811defm VRNDSCALE : avx512_common_unary_fp_sae_packed_imm_all<"vrndscale", 0x08, 0x09,
10812                              X86any_VRndScale, X86VRndScale, X86VRndScaleSAE,
10813                              SchedWriteFRnd, HasAVX512>;
10814defm VGETMANT : avx512_common_unary_fp_sae_packed_imm_all<"vgetmant", 0x26, 0x26,
10815                              X86VGetMant, X86VGetMant, X86VGetMantSAE,
10816                              SchedWriteFRnd, HasAVX512>;
10817
10818defm VRANGEPD : avx512_common_fp_sae_packed_imm<"vrangepd", avx512vl_f64_info,
10819                                                0x50, X86VRange, X86VRangeSAE,
10820                                                SchedWriteFAdd, HasDQI>,
10821      AVX512AIi8Base, EVEX, VVVV, EVEX_CD8<64, CD8VF>, REX_W;
10822defm VRANGEPS : avx512_common_fp_sae_packed_imm<"vrangeps", avx512vl_f32_info,
10823                                                0x50, X86VRange, X86VRangeSAE,
10824                                                SchedWriteFAdd, HasDQI>,
10825      AVX512AIi8Base, EVEX, VVVV, EVEX_CD8<32, CD8VF>;
10826
10827defm VRANGESD: avx512_common_fp_sae_scalar_imm<"vrangesd",
10828      f64x_info, 0x51, X86Ranges, X86RangesSAE, SchedWriteFAdd, HasDQI>,
10829      AVX512AIi8Base, VEX_LIG, EVEX, VVVV, EVEX_CD8<64, CD8VT1>, REX_W;
10830defm VRANGESS: avx512_common_fp_sae_scalar_imm<"vrangess", f32x_info,
10831      0x51, X86Ranges, X86RangesSAE, SchedWriteFAdd, HasDQI>,
10832      AVX512AIi8Base, VEX_LIG, EVEX, VVVV, EVEX_CD8<32, CD8VT1>;
10833
10834defm VREDUCESD: avx512_common_fp_sae_scalar_imm<"vreducesd", f64x_info,
10835      0x57, X86Reduces, X86ReducesSAE, SchedWriteFRnd, HasDQI>,
10836      AVX512AIi8Base, VEX_LIG, EVEX, VVVV, EVEX_CD8<64, CD8VT1>, REX_W;
10837defm VREDUCESS: avx512_common_fp_sae_scalar_imm<"vreducess", f32x_info,
10838      0x57, X86Reduces, X86ReducesSAE, SchedWriteFRnd, HasDQI>,
10839      AVX512AIi8Base, VEX_LIG, EVEX, VVVV, EVEX_CD8<32, CD8VT1>;
10840defm VREDUCESH: avx512_common_fp_sae_scalar_imm<"vreducesh", f16x_info,
10841      0x57, X86Reduces, X86ReducesSAE, SchedWriteFRnd, HasFP16>,
10842      AVX512PSIi8Base, TA, VEX_LIG, EVEX, VVVV, EVEX_CD8<16, CD8VT1>;
10843
10844defm VGETMANTSD: avx512_common_fp_sae_scalar_imm<"vgetmantsd", f64x_info,
10845      0x27, X86GetMants, X86GetMantsSAE, SchedWriteFRnd, HasAVX512>,
10846      AVX512AIi8Base, VEX_LIG, EVEX, VVVV, EVEX_CD8<64, CD8VT1>, REX_W;
10847defm VGETMANTSS: avx512_common_fp_sae_scalar_imm<"vgetmantss", f32x_info,
10848      0x27, X86GetMants, X86GetMantsSAE, SchedWriteFRnd, HasAVX512>,
10849      AVX512AIi8Base, VEX_LIG, EVEX, VVVV, EVEX_CD8<32, CD8VT1>;
10850defm VGETMANTSH: avx512_common_fp_sae_scalar_imm<"vgetmantsh", f16x_info,
10851      0x27, X86GetMants, X86GetMantsSAE, SchedWriteFRnd, HasFP16>,
10852      AVX512PSIi8Base, TA, VEX_LIG, EVEX, VVVV, EVEX_CD8<16, CD8VT1>;
10853
10854multiclass avx512_shuff_packed_128_common<bits<8> opc, string OpcodeStr,
10855                                          X86FoldableSchedWrite sched,
10856                                          X86VectorVTInfo _,
10857                                          X86VectorVTInfo CastInfo> {
10858  let ExeDomain = _.ExeDomain in {
10859  defm rri : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
10860                  (ins _.RC:$src1, _.RC:$src2, u8imm:$src3),
10861                  OpcodeStr, "$src3, $src2, $src1", "$src1, $src2, $src3",
10862                  (_.VT (bitconvert
10863                         (CastInfo.VT (X86Shuf128 _.RC:$src1, _.RC:$src2,
10864                                                  (i8 timm:$src3)))))>,
10865                  Sched<[sched]>;
10866  defm rmi : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
10867                (ins _.RC:$src1, _.MemOp:$src2, u8imm:$src3),
10868                OpcodeStr, "$src3, $src2, $src1", "$src1, $src2, $src3",
10869                (_.VT
10870                 (bitconvert
10871                  (CastInfo.VT (X86Shuf128 _.RC:$src1,
10872                                           (CastInfo.LdFrag addr:$src2),
10873                                           (i8 timm:$src3)))))>,
10874                Sched<[sched.Folded, sched.ReadAfterFold]>;
10875  defm rmbi : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
10876                    (ins _.RC:$src1, _.ScalarMemOp:$src2, u8imm:$src3),
10877                    OpcodeStr, "$src3, ${src2}"#_.BroadcastStr#", $src1",
10878                    "$src1, ${src2}"#_.BroadcastStr#", $src3",
10879                    (_.VT
10880                     (bitconvert
10881                      (CastInfo.VT
10882                       (X86Shuf128 _.RC:$src1,
10883                                   (_.BroadcastLdFrag addr:$src2),
10884                                   (i8 timm:$src3)))))>, EVEX_B,
10885                    Sched<[sched.Folded, sched.ReadAfterFold]>;
10886  }
10887}
10888
10889multiclass avx512_shuff_packed_128<string OpcodeStr, X86FoldableSchedWrite sched,
10890                                   AVX512VLVectorVTInfo _,
10891                                   AVX512VLVectorVTInfo CastInfo, bits<8> opc>{
10892  let Predicates = [HasAVX512] in
10893  defm Z : avx512_shuff_packed_128_common<opc, OpcodeStr, sched,
10894                                          _.info512, CastInfo.info512>, EVEX_V512;
10895
10896  let Predicates = [HasAVX512, HasVLX] in
10897  defm Z256 : avx512_shuff_packed_128_common<opc, OpcodeStr, sched,
10898                                             _.info256, CastInfo.info256>, EVEX_V256;
10899}
10900
10901defm VSHUFF32X4 : avx512_shuff_packed_128<"vshuff32x4", WriteFShuffle256,
10902      avx512vl_f32_info, avx512vl_f64_info, 0x23>, AVX512AIi8Base, EVEX, VVVV, EVEX_CD8<32, CD8VF>;
10903defm VSHUFF64X2 : avx512_shuff_packed_128<"vshuff64x2", WriteFShuffle256,
10904      avx512vl_f64_info, avx512vl_f64_info, 0x23>, AVX512AIi8Base, EVEX, VVVV, EVEX_CD8<64, CD8VF>, REX_W;
10905defm VSHUFI32X4 : avx512_shuff_packed_128<"vshufi32x4", WriteFShuffle256,
10906      avx512vl_i32_info, avx512vl_i64_info, 0x43>, AVX512AIi8Base, EVEX, VVVV, EVEX_CD8<32, CD8VF>;
10907defm VSHUFI64X2 : avx512_shuff_packed_128<"vshufi64x2", WriteFShuffle256,
10908      avx512vl_i64_info, avx512vl_i64_info, 0x43>, AVX512AIi8Base, EVEX, VVVV, EVEX_CD8<64, CD8VF>, REX_W;
10909
10910multiclass avx512_valign<bits<8> opc, string OpcodeStr,
10911                         X86FoldableSchedWrite sched, X86VectorVTInfo _>{
10912  let ExeDomain = _.ExeDomain in {
10913  defm rri : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
10914                  (ins _.RC:$src1, _.RC:$src2, u8imm:$src3),
10915                  OpcodeStr, "$src3, $src2, $src1", "$src1, $src2, $src3",
10916                  (_.VT (X86VAlign _.RC:$src1, _.RC:$src2, (i8 timm:$src3)))>,
10917                  Sched<[sched]>;
10918  defm rmi : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
10919                (ins _.RC:$src1, _.MemOp:$src2, u8imm:$src3),
10920                OpcodeStr, "$src3, $src2, $src1", "$src1, $src2, $src3",
10921                (_.VT (X86VAlign _.RC:$src1,
10922                                 (bitconvert (_.LdFrag addr:$src2)),
10923                                 (i8 timm:$src3)))>,
10924                Sched<[sched.Folded, sched.ReadAfterFold]>;
10925
10926  defm rmbi : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
10927                   (ins _.RC:$src1, _.ScalarMemOp:$src2, u8imm:$src3),
10928                   OpcodeStr, "$src3, ${src2}"#_.BroadcastStr#", $src1",
10929                   "$src1, ${src2}"#_.BroadcastStr#", $src3",
10930                   (X86VAlign _.RC:$src1,
10931                              (_.VT (_.BroadcastLdFrag addr:$src2)),
10932                              (i8 timm:$src3))>, EVEX_B,
10933                   Sched<[sched.Folded, sched.ReadAfterFold]>;
10934  }
10935}
10936
10937multiclass avx512_valign_common<string OpcodeStr, X86SchedWriteWidths sched,
10938                                AVX512VLVectorVTInfo _> {
10939  let Predicates = [HasAVX512] in {
10940    defm Z    : avx512_valign<0x03, OpcodeStr, sched.ZMM, _.info512>,
10941                                AVX512AIi8Base, EVEX, VVVV, EVEX_V512;
10942  }
10943  let Predicates = [HasAVX512, HasVLX] in {
10944    defm Z128 : avx512_valign<0x03, OpcodeStr, sched.XMM, _.info128>,
10945                                AVX512AIi8Base, EVEX, VVVV, EVEX_V128;
10946    // We can't really override the 256-bit version so change it back to unset.
10947    defm Z256 : avx512_valign<0x03, OpcodeStr, sched.YMM, _.info256>,
10948                                AVX512AIi8Base, EVEX, VVVV, EVEX_V256;
10949  }
10950}
10951
10952defm VALIGND: avx512_valign_common<"valignd", SchedWriteShuffle,
10953                                   avx512vl_i32_info>, EVEX_CD8<32, CD8VF>;
10954defm VALIGNQ: avx512_valign_common<"valignq", SchedWriteShuffle,
10955                                   avx512vl_i64_info>, EVEX_CD8<64, CD8VF>,
10956                                   REX_W;
10957
10958defm VPALIGNR: avx512_common_3Op_rm_imm8<0x0F, X86PAlignr, "vpalignr",
10959                                         SchedWriteShuffle, avx512vl_i8_info,
10960                                         avx512vl_i8_info>, EVEX_CD8<8, CD8VF>;
10961
10962// Fragments to help convert valignq into masked valignd. Or valignq/valignd
10963// into vpalignr.
10964def ValignqImm32XForm : SDNodeXForm<timm, [{
10965  return getI8Imm(N->getZExtValue() * 2, SDLoc(N));
10966}]>;
10967def ValignqImm8XForm : SDNodeXForm<timm, [{
10968  return getI8Imm(N->getZExtValue() * 8, SDLoc(N));
10969}]>;
10970def ValigndImm8XForm : SDNodeXForm<timm, [{
10971  return getI8Imm(N->getZExtValue() * 4, SDLoc(N));
10972}]>;
10973
10974multiclass avx512_vpalign_mask_lowering<string OpcodeStr, SDNode OpNode,
10975                                        X86VectorVTInfo From, X86VectorVTInfo To,
10976                                        SDNodeXForm ImmXForm> {
10977  def : Pat<(To.VT (vselect_mask To.KRCWM:$mask,
10978                                 (bitconvert
10979                                  (From.VT (OpNode From.RC:$src1, From.RC:$src2,
10980                                                   timm:$src3))),
10981                                 To.RC:$src0)),
10982            (!cast<Instruction>(OpcodeStr#"rrik") To.RC:$src0, To.KRCWM:$mask,
10983                                                  To.RC:$src1, To.RC:$src2,
10984                                                  (ImmXForm timm:$src3))>;
10985
10986  def : Pat<(To.VT (vselect_mask To.KRCWM:$mask,
10987                                 (bitconvert
10988                                  (From.VT (OpNode From.RC:$src1, From.RC:$src2,
10989                                                   timm:$src3))),
10990                                 To.ImmAllZerosV)),
10991            (!cast<Instruction>(OpcodeStr#"rrikz") To.KRCWM:$mask,
10992                                                   To.RC:$src1, To.RC:$src2,
10993                                                   (ImmXForm timm:$src3))>;
10994
10995  def : Pat<(To.VT (vselect_mask To.KRCWM:$mask,
10996                                 (bitconvert
10997                                  (From.VT (OpNode From.RC:$src1,
10998                                                   (From.LdFrag addr:$src2),
10999                                           timm:$src3))),
11000                                 To.RC:$src0)),
11001            (!cast<Instruction>(OpcodeStr#"rmik") To.RC:$src0, To.KRCWM:$mask,
11002                                                  To.RC:$src1, addr:$src2,
11003                                                  (ImmXForm timm:$src3))>;
11004
11005  def : Pat<(To.VT (vselect_mask To.KRCWM:$mask,
11006                                 (bitconvert
11007                                  (From.VT (OpNode From.RC:$src1,
11008                                                   (From.LdFrag addr:$src2),
11009                                           timm:$src3))),
11010                                 To.ImmAllZerosV)),
11011            (!cast<Instruction>(OpcodeStr#"rmikz") To.KRCWM:$mask,
11012                                                   To.RC:$src1, addr:$src2,
11013                                                   (ImmXForm timm:$src3))>;
11014}
11015
11016multiclass avx512_vpalign_mask_lowering_mb<string OpcodeStr, SDNode OpNode,
11017                                           X86VectorVTInfo From,
11018                                           X86VectorVTInfo To,
11019                                           SDNodeXForm ImmXForm> :
11020      avx512_vpalign_mask_lowering<OpcodeStr, OpNode, From, To, ImmXForm> {
11021  def : Pat<(From.VT (OpNode From.RC:$src1,
11022                             (bitconvert (To.VT (To.BroadcastLdFrag addr:$src2))),
11023                             timm:$src3)),
11024            (!cast<Instruction>(OpcodeStr#"rmbi") To.RC:$src1, addr:$src2,
11025                                                  (ImmXForm timm:$src3))>;
11026
11027  def : Pat<(To.VT (vselect_mask To.KRCWM:$mask,
11028                                 (bitconvert
11029                                  (From.VT (OpNode From.RC:$src1,
11030                                           (bitconvert
11031                                            (To.VT (To.BroadcastLdFrag addr:$src2))),
11032                                           timm:$src3))),
11033                                 To.RC:$src0)),
11034            (!cast<Instruction>(OpcodeStr#"rmbik") To.RC:$src0, To.KRCWM:$mask,
11035                                                   To.RC:$src1, addr:$src2,
11036                                                   (ImmXForm timm:$src3))>;
11037
11038  def : Pat<(To.VT (vselect_mask To.KRCWM:$mask,
11039                                 (bitconvert
11040                                  (From.VT (OpNode From.RC:$src1,
11041                                           (bitconvert
11042                                            (To.VT (To.BroadcastLdFrag addr:$src2))),
11043                                           timm:$src3))),
11044                                 To.ImmAllZerosV)),
11045            (!cast<Instruction>(OpcodeStr#"rmbikz") To.KRCWM:$mask,
11046                                                    To.RC:$src1, addr:$src2,
11047                                                    (ImmXForm timm:$src3))>;
11048}
11049
11050let Predicates = [HasAVX512] in {
11051  // For 512-bit we lower to the widest element type we can. So we only need
11052  // to handle converting valignq to valignd.
11053  defm : avx512_vpalign_mask_lowering_mb<"VALIGNDZ", X86VAlign, v8i64_info,
11054                                         v16i32_info, ValignqImm32XForm>;
11055}
11056
11057let Predicates = [HasVLX] in {
11058  // For 128-bit we lower to the widest element type we can. So we only need
11059  // to handle converting valignq to valignd.
11060  defm : avx512_vpalign_mask_lowering_mb<"VALIGNDZ128", X86VAlign, v2i64x_info,
11061                                         v4i32x_info, ValignqImm32XForm>;
11062  // For 256-bit we lower to the widest element type we can. So we only need
11063  // to handle converting valignq to valignd.
11064  defm : avx512_vpalign_mask_lowering_mb<"VALIGNDZ256", X86VAlign, v4i64x_info,
11065                                         v8i32x_info, ValignqImm32XForm>;
11066}
11067
11068let Predicates = [HasVLX, HasBWI] in {
11069  // We can turn 128 and 256 bit VALIGND/VALIGNQ into VPALIGNR.
11070  defm : avx512_vpalign_mask_lowering<"VPALIGNRZ128", X86VAlign, v2i64x_info,
11071                                      v16i8x_info, ValignqImm8XForm>;
11072  defm : avx512_vpalign_mask_lowering<"VPALIGNRZ128", X86VAlign, v4i32x_info,
11073                                      v16i8x_info, ValigndImm8XForm>;
11074}
11075
11076defm VDBPSADBW: avx512_common_3Op_rm_imm8<0x42, X86dbpsadbw, "vdbpsadbw",
11077                SchedWritePSADBW, avx512vl_i16_info, avx512vl_i8_info>,
11078                EVEX_CD8<8, CD8VF>;
11079
11080multiclass avx512_unary_rm<bits<8> opc, string OpcodeStr, SDNode OpNode,
11081                           X86FoldableSchedWrite sched, X86VectorVTInfo _> {
11082  let ExeDomain = _.ExeDomain in {
11083  defm rr : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
11084                    (ins _.RC:$src1), OpcodeStr,
11085                    "$src1", "$src1",
11086                    (_.VT (OpNode (_.VT _.RC:$src1)))>, EVEX, AVX5128IBase,
11087                    Sched<[sched]>;
11088
11089  defm rm : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
11090                  (ins _.MemOp:$src1), OpcodeStr,
11091                  "$src1", "$src1",
11092                  (_.VT (OpNode (_.VT (bitconvert (_.LdFrag addr:$src1)))))>,
11093            EVEX, AVX5128IBase, EVEX_CD8<_.EltSize, CD8VF>,
11094            Sched<[sched.Folded]>;
11095  }
11096}
11097
11098multiclass avx512_unary_rmb<bits<8> opc, string OpcodeStr, SDNode OpNode,
11099                            X86FoldableSchedWrite sched, X86VectorVTInfo _> :
11100           avx512_unary_rm<opc, OpcodeStr, OpNode, sched, _> {
11101  defm rmb : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
11102                  (ins _.ScalarMemOp:$src1), OpcodeStr,
11103                  "${src1}"#_.BroadcastStr,
11104                  "${src1}"#_.BroadcastStr,
11105                  (_.VT (OpNode (_.VT (_.BroadcastLdFrag addr:$src1))))>,
11106             EVEX, AVX5128IBase, EVEX_B, EVEX_CD8<_.EltSize, CD8VF>,
11107             Sched<[sched.Folded]>;
11108}
11109
11110multiclass avx512_unary_rm_vl<bits<8> opc, string OpcodeStr, SDNode OpNode,
11111                              X86SchedWriteWidths sched,
11112                              AVX512VLVectorVTInfo VTInfo, Predicate prd> {
11113  let Predicates = [prd] in
11114    defm Z : avx512_unary_rm<opc, OpcodeStr, OpNode, sched.ZMM, VTInfo.info512>,
11115                             EVEX_V512;
11116
11117  let Predicates = [prd, HasVLX] in {
11118    defm Z256 : avx512_unary_rm<opc, OpcodeStr, OpNode, sched.YMM, VTInfo.info256>,
11119                              EVEX_V256;
11120    defm Z128 : avx512_unary_rm<opc, OpcodeStr, OpNode, sched.XMM, VTInfo.info128>,
11121                              EVEX_V128;
11122  }
11123}
11124
11125multiclass avx512_unary_rmb_vl<bits<8> opc, string OpcodeStr, SDNode OpNode,
11126                               X86SchedWriteWidths sched, AVX512VLVectorVTInfo VTInfo,
11127                               Predicate prd> {
11128  let Predicates = [prd] in
11129    defm Z : avx512_unary_rmb<opc, OpcodeStr, OpNode, sched.ZMM, VTInfo.info512>,
11130                              EVEX_V512;
11131
11132  let Predicates = [prd, HasVLX] in {
11133    defm Z256 : avx512_unary_rmb<opc, OpcodeStr, OpNode, sched.YMM, VTInfo.info256>,
11134                                 EVEX_V256;
11135    defm Z128 : avx512_unary_rmb<opc, OpcodeStr, OpNode, sched.XMM, VTInfo.info128>,
11136                                 EVEX_V128;
11137  }
11138}
11139
11140multiclass avx512_unary_rm_vl_dq<bits<8> opc_d, bits<8> opc_q, string OpcodeStr,
11141                                 SDNode OpNode, X86SchedWriteWidths sched,
11142                                 Predicate prd> {
11143  defm Q : avx512_unary_rmb_vl<opc_q, OpcodeStr#"q", OpNode, sched,
11144                               avx512vl_i64_info, prd>, REX_W;
11145  defm D : avx512_unary_rmb_vl<opc_d, OpcodeStr#"d", OpNode, sched,
11146                               avx512vl_i32_info, prd>;
11147}
11148
11149multiclass avx512_unary_rm_vl_bw<bits<8> opc_b, bits<8> opc_w, string OpcodeStr,
11150                                 SDNode OpNode, X86SchedWriteWidths sched,
11151                                 Predicate prd> {
11152  defm W : avx512_unary_rm_vl<opc_w, OpcodeStr#"w", OpNode, sched,
11153                              avx512vl_i16_info, prd>, WIG;
11154  defm B : avx512_unary_rm_vl<opc_b, OpcodeStr#"b", OpNode, sched,
11155                              avx512vl_i8_info, prd>, WIG;
11156}
11157
11158multiclass avx512_unary_rm_vl_all<bits<8> opc_b, bits<8> opc_w,
11159                                  bits<8> opc_d, bits<8> opc_q,
11160                                  string OpcodeStr, SDNode OpNode,
11161                                  X86SchedWriteWidths sched> {
11162  defm NAME : avx512_unary_rm_vl_dq<opc_d, opc_q, OpcodeStr, OpNode, sched,
11163                                    HasAVX512>,
11164              avx512_unary_rm_vl_bw<opc_b, opc_w, OpcodeStr, OpNode, sched,
11165                                    HasBWI>;
11166}
11167
11168defm VPABS : avx512_unary_rm_vl_all<0x1C, 0x1D, 0x1E, 0x1F, "vpabs", abs,
11169                                    SchedWriteVecALU>;
11170
11171// VPABS: Use 512bit version to implement 128/256 bit in case NoVLX.
11172let Predicates = [HasAVX512, NoVLX, HasEVEX512] in {
11173  def : Pat<(v4i64 (abs VR256X:$src)),
11174            (EXTRACT_SUBREG
11175                (VPABSQZrr
11176                    (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src, sub_ymm)),
11177             sub_ymm)>;
11178  def : Pat<(v2i64 (abs VR128X:$src)),
11179            (EXTRACT_SUBREG
11180                (VPABSQZrr
11181                    (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm)),
11182             sub_xmm)>;
11183}
11184
11185// Use 512bit version to implement 128/256 bit.
11186multiclass avx512_unary_lowering<string InstrStr, SDNode OpNode,
11187                                 AVX512VLVectorVTInfo _, Predicate prd> {
11188  let Predicates = [prd, NoVLX, HasEVEX512] in {
11189    def : Pat<(_.info256.VT (OpNode (_.info256.VT _.info256.RC:$src1))),
11190              (EXTRACT_SUBREG
11191                (!cast<Instruction>(InstrStr # "Zrr")
11192                  (INSERT_SUBREG(_.info512.VT(IMPLICIT_DEF)),
11193                                 _.info256.RC:$src1,
11194                                 _.info256.SubRegIdx)),
11195              _.info256.SubRegIdx)>;
11196
11197    def : Pat<(_.info128.VT (OpNode (_.info128.VT _.info128.RC:$src1))),
11198              (EXTRACT_SUBREG
11199                (!cast<Instruction>(InstrStr # "Zrr")
11200                  (INSERT_SUBREG(_.info512.VT(IMPLICIT_DEF)),
11201                                 _.info128.RC:$src1,
11202                                 _.info128.SubRegIdx)),
11203              _.info128.SubRegIdx)>;
11204  }
11205}
11206
11207defm VPLZCNT    : avx512_unary_rm_vl_dq<0x44, 0x44, "vplzcnt", ctlz,
11208                                        SchedWriteVecIMul, HasCDI>;
11209
11210// FIXME: Is there a better scheduler class for VPCONFLICT?
11211defm VPCONFLICT : avx512_unary_rm_vl_dq<0xC4, 0xC4, "vpconflict", X86Conflict,
11212                                        SchedWriteVecALU, HasCDI>;
11213
11214// VPLZCNT: Use 512bit version to implement 128/256 bit in case NoVLX.
11215defm : avx512_unary_lowering<"VPLZCNTQ", ctlz, avx512vl_i64_info, HasCDI>;
11216defm : avx512_unary_lowering<"VPLZCNTD", ctlz, avx512vl_i32_info, HasCDI>;
11217
11218//===---------------------------------------------------------------------===//
11219// Counts number of ones - VPOPCNTD and VPOPCNTQ
11220//===---------------------------------------------------------------------===//
11221
11222// FIXME: Is there a better scheduler class for VPOPCNTD/VPOPCNTQ?
11223defm VPOPCNT : avx512_unary_rm_vl_dq<0x55, 0x55, "vpopcnt", ctpop,
11224                                     SchedWriteVecALU, HasVPOPCNTDQ>;
11225
11226defm : avx512_unary_lowering<"VPOPCNTQ", ctpop, avx512vl_i64_info, HasVPOPCNTDQ>;
11227defm : avx512_unary_lowering<"VPOPCNTD", ctpop, avx512vl_i32_info, HasVPOPCNTDQ>;
11228
11229//===---------------------------------------------------------------------===//
11230// Replicate Single FP - MOVSHDUP and MOVSLDUP
11231//===---------------------------------------------------------------------===//
11232
11233multiclass avx512_replicate<bits<8> opc, string OpcodeStr, SDNode OpNode,
11234                            X86SchedWriteWidths sched> {
11235  defm NAME:       avx512_unary_rm_vl<opc, OpcodeStr, OpNode, sched,
11236                                      avx512vl_f32_info, HasAVX512>, TB, XS;
11237}
11238
11239defm VMOVSHDUP : avx512_replicate<0x16, "vmovshdup", X86Movshdup,
11240                                  SchedWriteFShuffle>;
11241defm VMOVSLDUP : avx512_replicate<0x12, "vmovsldup", X86Movsldup,
11242                                  SchedWriteFShuffle>;
11243
11244//===----------------------------------------------------------------------===//
11245// AVX-512 - MOVDDUP
11246//===----------------------------------------------------------------------===//
11247
11248multiclass avx512_movddup_128<bits<8> opc, string OpcodeStr,
11249                              X86FoldableSchedWrite sched, X86VectorVTInfo _> {
11250  let ExeDomain = _.ExeDomain in {
11251  defm rr : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
11252                   (ins _.RC:$src), OpcodeStr, "$src", "$src",
11253                   (_.VT (X86VBroadcast (_.VT _.RC:$src)))>, EVEX,
11254                   Sched<[sched]>;
11255  defm rm : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
11256                 (ins _.ScalarMemOp:$src), OpcodeStr, "$src", "$src",
11257                 (_.VT (_.BroadcastLdFrag addr:$src))>,
11258                 EVEX, EVEX_CD8<_.EltSize, CD8VH>,
11259                 Sched<[sched.Folded]>;
11260  }
11261}
11262
11263multiclass avx512_movddup_common<bits<8> opc, string OpcodeStr,
11264                                 X86SchedWriteWidths sched, AVX512VLVectorVTInfo VTInfo> {
11265  defm Z : avx512_unary_rm<opc, OpcodeStr, X86Movddup, sched.ZMM,
11266                           VTInfo.info512>, EVEX_V512;
11267
11268  let Predicates = [HasAVX512, HasVLX] in {
11269    defm Z256 : avx512_unary_rm<opc, OpcodeStr, X86Movddup, sched.YMM,
11270                                VTInfo.info256>, EVEX_V256;
11271    defm Z128 : avx512_movddup_128<opc, OpcodeStr, sched.XMM,
11272                                   VTInfo.info128>, EVEX_V128;
11273  }
11274}
11275
11276multiclass avx512_movddup<bits<8> opc, string OpcodeStr,
11277                          X86SchedWriteWidths sched> {
11278  defm NAME:      avx512_movddup_common<opc, OpcodeStr, sched,
11279                                        avx512vl_f64_info>, TB, XD, REX_W;
11280}
11281
11282defm VMOVDDUP : avx512_movddup<0x12, "vmovddup", SchedWriteFShuffle>;
11283
11284let Predicates = [HasVLX] in {
11285def : Pat<(v2f64 (X86VBroadcast f64:$src)),
11286          (VMOVDDUPZ128rr (v2f64 (COPY_TO_REGCLASS FR64X:$src, VR128X)))>;
11287
11288def : Pat<(vselect_mask (v2i1 VK2WM:$mask), (v2f64 (X86VBroadcast f64:$src)),
11289                        (v2f64 VR128X:$src0)),
11290          (VMOVDDUPZ128rrk VR128X:$src0, VK2WM:$mask,
11291                           (v2f64 (COPY_TO_REGCLASS FR64X:$src, VR128X)))>;
11292def : Pat<(vselect_mask (v2i1 VK2WM:$mask), (v2f64 (X86VBroadcast f64:$src)),
11293                        immAllZerosV),
11294          (VMOVDDUPZ128rrkz VK2WM:$mask, (v2f64 (COPY_TO_REGCLASS FR64X:$src, VR128X)))>;
11295}
11296
11297//===----------------------------------------------------------------------===//
11298// AVX-512 - Unpack Instructions
11299//===----------------------------------------------------------------------===//
11300
11301let Uses = []<Register>, mayRaiseFPException = 0 in {
11302defm VUNPCKH : avx512_fp_binop_p<0x15, "vunpckh", X86Unpckh, X86Unpckh, HasAVX512,
11303                                 SchedWriteFShuffleSizes, 0, 1>;
11304defm VUNPCKL : avx512_fp_binop_p<0x14, "vunpckl", X86Unpckl, X86Unpckl, HasAVX512,
11305                                 SchedWriteFShuffleSizes>;
11306}
11307
11308defm VPUNPCKLBW : avx512_binop_rm_vl_b<0x60, "vpunpcklbw", X86Unpckl,
11309                                       SchedWriteShuffle, HasBWI>;
11310defm VPUNPCKHBW : avx512_binop_rm_vl_b<0x68, "vpunpckhbw", X86Unpckh,
11311                                       SchedWriteShuffle, HasBWI>;
11312defm VPUNPCKLWD : avx512_binop_rm_vl_w<0x61, "vpunpcklwd", X86Unpckl,
11313                                       SchedWriteShuffle, HasBWI>;
11314defm VPUNPCKHWD : avx512_binop_rm_vl_w<0x69, "vpunpckhwd", X86Unpckh,
11315                                       SchedWriteShuffle, HasBWI>;
11316
11317defm VPUNPCKLDQ : avx512_binop_rm_vl_d<0x62, "vpunpckldq", X86Unpckl,
11318                                       SchedWriteShuffle, HasAVX512>;
11319defm VPUNPCKHDQ : avx512_binop_rm_vl_d<0x6A, "vpunpckhdq", X86Unpckh,
11320                                       SchedWriteShuffle, HasAVX512>;
11321defm VPUNPCKLQDQ : avx512_binop_rm_vl_q<0x6C, "vpunpcklqdq", X86Unpckl,
11322                                        SchedWriteShuffle, HasAVX512>;
11323defm VPUNPCKHQDQ : avx512_binop_rm_vl_q<0x6D, "vpunpckhqdq", X86Unpckh,
11324                                        SchedWriteShuffle, HasAVX512>;
11325
11326//===----------------------------------------------------------------------===//
11327// AVX-512 - Extract & Insert Integer Instructions
11328//===----------------------------------------------------------------------===//
11329
11330multiclass avx512_extract_elt_bw_m<bits<8> opc, string OpcodeStr, SDNode OpNode,
11331                                                            X86VectorVTInfo _> {
11332  def mr : AVX512Ii8<opc, MRMDestMem, (outs),
11333              (ins _.ScalarMemOp:$dst, _.RC:$src1, u8imm:$src2),
11334              OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}",
11335              [(store (_.EltVT (trunc (OpNode (_.VT _.RC:$src1), timm:$src2))),
11336                       addr:$dst)]>,
11337              EVEX, EVEX_CD8<_.EltSize, CD8VT1>, Sched<[WriteVecExtractSt]>;
11338}
11339
11340multiclass avx512_extract_elt_b<string OpcodeStr, X86VectorVTInfo _> {
11341  let Predicates = [HasBWI] in {
11342    def rr : AVX512Ii8<0x14, MRMDestReg, (outs GR32orGR64:$dst),
11343                  (ins _.RC:$src1, u8imm:$src2),
11344                  OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}",
11345                  [(set GR32orGR64:$dst,
11346                        (X86pextrb (_.VT _.RC:$src1), timm:$src2))]>,
11347                  EVEX, TA, PD, Sched<[WriteVecExtract]>;
11348
11349    defm NAME : avx512_extract_elt_bw_m<0x14, OpcodeStr, X86pextrb, _>, TA, PD;
11350  }
11351}
11352
11353multiclass avx512_extract_elt_w<string OpcodeStr, X86VectorVTInfo _> {
11354  let Predicates = [HasBWI] in {
11355    def rr : AVX512Ii8<0xC5, MRMSrcReg, (outs GR32orGR64:$dst),
11356                  (ins _.RC:$src1, u8imm:$src2),
11357                  OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}",
11358                  [(set GR32orGR64:$dst,
11359                        (X86pextrw (_.VT _.RC:$src1), timm:$src2))]>,
11360                  EVEX, TB, PD, Sched<[WriteVecExtract]>;
11361
11362    let hasSideEffects = 0, isCodeGenOnly = 1, ForceDisassemble = 1 in
11363    def rr_REV : AVX512Ii8<0x15, MRMDestReg, (outs GR32orGR64:$dst),
11364                   (ins _.RC:$src1, u8imm:$src2),
11365                   OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}", []>,
11366                   EVEX, TA, PD, Sched<[WriteVecExtract]>;
11367
11368    defm NAME : avx512_extract_elt_bw_m<0x15, OpcodeStr, X86pextrw, _>, TA, PD;
11369  }
11370}
11371
11372multiclass avx512_extract_elt_dq<string OpcodeStr, X86VectorVTInfo _,
11373                                                            RegisterClass GRC> {
11374  let Predicates = [HasDQI] in {
11375    def rr : AVX512Ii8<0x16, MRMDestReg, (outs GRC:$dst),
11376                  (ins _.RC:$src1, u8imm:$src2),
11377                  OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}",
11378                  [(set GRC:$dst,
11379                      (extractelt (_.VT _.RC:$src1), imm:$src2))]>,
11380                  EVEX, TA, PD, Sched<[WriteVecExtract]>;
11381
11382    def mr : AVX512Ii8<0x16, MRMDestMem, (outs),
11383                (ins _.ScalarMemOp:$dst, _.RC:$src1, u8imm:$src2),
11384                OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}",
11385                [(store (extractelt (_.VT _.RC:$src1),
11386                                    imm:$src2),addr:$dst)]>,
11387                EVEX, EVEX_CD8<_.EltSize, CD8VT1>, TA, PD,
11388                Sched<[WriteVecExtractSt]>;
11389  }
11390}
11391
11392defm VPEXTRBZ : avx512_extract_elt_b<"vpextrb", v16i8x_info>, WIG;
11393defm VPEXTRWZ : avx512_extract_elt_w<"vpextrw", v8i16x_info>, WIG;
11394defm VPEXTRDZ : avx512_extract_elt_dq<"vpextrd", v4i32x_info, GR32>;
11395defm VPEXTRQZ : avx512_extract_elt_dq<"vpextrq", v2i64x_info, GR64>, REX_W;
11396
11397multiclass avx512_insert_elt_m<bits<8> opc, string OpcodeStr, SDNode OpNode,
11398                                            X86VectorVTInfo _, PatFrag LdFrag,
11399                                            SDPatternOperator immoperator> {
11400  def rm : AVX512Ii8<opc, MRMSrcMem, (outs _.RC:$dst),
11401      (ins _.RC:$src1,  _.ScalarMemOp:$src2, u8imm:$src3),
11402      OpcodeStr#"\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}",
11403      [(set _.RC:$dst,
11404          (_.VT (OpNode _.RC:$src1, (LdFrag addr:$src2), immoperator:$src3)))]>,
11405      EVEX, VVVV, EVEX_CD8<_.EltSize, CD8VT1>, Sched<[WriteVecInsert.Folded, WriteVecInsert.ReadAfterFold]>;
11406}
11407
11408multiclass avx512_insert_elt_bw<bits<8> opc, string OpcodeStr, SDNode OpNode,
11409                                            X86VectorVTInfo _, PatFrag LdFrag> {
11410  let Predicates = [HasBWI] in {
11411    def rr : AVX512Ii8<opc, MRMSrcReg, (outs _.RC:$dst),
11412        (ins _.RC:$src1, GR32orGR64:$src2, u8imm:$src3),
11413        OpcodeStr#"\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}",
11414        [(set _.RC:$dst,
11415            (OpNode _.RC:$src1, GR32orGR64:$src2, timm:$src3))]>, EVEX, VVVV,
11416        Sched<[WriteVecInsert]>;
11417
11418    defm NAME : avx512_insert_elt_m<opc, OpcodeStr, OpNode, _, LdFrag, timm>;
11419  }
11420}
11421
11422multiclass avx512_insert_elt_dq<bits<8> opc, string OpcodeStr,
11423                                         X86VectorVTInfo _, RegisterClass GRC> {
11424  let Predicates = [HasDQI] in {
11425    def rr : AVX512Ii8<opc, MRMSrcReg, (outs _.RC:$dst),
11426        (ins _.RC:$src1, GRC:$src2, u8imm:$src3),
11427        OpcodeStr#"\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}",
11428        [(set _.RC:$dst,
11429            (_.VT (insertelt _.RC:$src1, GRC:$src2, imm:$src3)))]>,
11430        EVEX, VVVV, TA, PD, Sched<[WriteVecInsert]>;
11431
11432    defm NAME : avx512_insert_elt_m<opc, OpcodeStr, insertelt, _,
11433                                    _.ScalarLdFrag, imm>, TA, PD;
11434  }
11435}
11436
11437defm VPINSRBZ : avx512_insert_elt_bw<0x20, "vpinsrb", X86pinsrb, v16i8x_info,
11438                                     extloadi8>, TA, PD, WIG;
11439defm VPINSRWZ : avx512_insert_elt_bw<0xC4, "vpinsrw", X86pinsrw, v8i16x_info,
11440                                     extloadi16>, TB, PD, WIG;
11441defm VPINSRDZ : avx512_insert_elt_dq<0x22, "vpinsrd", v4i32x_info, GR32>;
11442defm VPINSRQZ : avx512_insert_elt_dq<0x22, "vpinsrq", v2i64x_info, GR64>, REX_W;
11443
11444let Predicates = [HasAVX512, NoBWI] in {
11445  def : Pat<(X86pinsrb VR128:$src1,
11446                       (i32 (anyext (i8 (bitconvert v8i1:$src2)))),
11447                       timm:$src3),
11448            (VPINSRBrr VR128:$src1, (i32 (COPY_TO_REGCLASS VK8:$src2, GR32)),
11449                       timm:$src3)>;
11450}
11451
11452let Predicates = [HasBWI] in {
11453  def : Pat<(X86pinsrb VR128:$src1, (i32 (anyext (i8 GR8:$src2))), timm:$src3),
11454            (VPINSRBZrr VR128:$src1, (INSERT_SUBREG (i32 (IMPLICIT_DEF)),
11455                        GR8:$src2, sub_8bit), timm:$src3)>;
11456  def : Pat<(X86pinsrb VR128:$src1,
11457                       (i32 (anyext (i8 (bitconvert v8i1:$src2)))),
11458                       timm:$src3),
11459            (VPINSRBZrr VR128:$src1, (i32 (COPY_TO_REGCLASS VK8:$src2, GR32)),
11460                        timm:$src3)>;
11461}
11462
11463// Always select FP16 instructions if available.
11464let Predicates = [HasBWI], AddedComplexity = -10 in {
11465  def : Pat<(f16 (load addr:$src)), (COPY_TO_REGCLASS (VPINSRWZrm (v8i16 (IMPLICIT_DEF)), addr:$src, 0), FR16X)>;
11466  def : Pat<(store f16:$src, addr:$dst), (VPEXTRWZmr addr:$dst, (v8i16 (COPY_TO_REGCLASS FR16:$src, VR128)), 0)>;
11467  def : Pat<(i16 (bitconvert f16:$src)), (EXTRACT_SUBREG (VPEXTRWZrr (v8i16 (COPY_TO_REGCLASS FR16X:$src, VR128X)), 0), sub_16bit)>;
11468  def : Pat<(f16 (bitconvert i16:$src)), (COPY_TO_REGCLASS (VPINSRWZrr (v8i16 (IMPLICIT_DEF)), (INSERT_SUBREG (IMPLICIT_DEF), GR16:$src, sub_16bit), 0), FR16X)>;
11469}
11470
11471//===----------------------------------------------------------------------===//
11472// VSHUFPS - VSHUFPD Operations
11473//===----------------------------------------------------------------------===//
11474
11475multiclass avx512_shufp<string OpcodeStr, AVX512VLVectorVTInfo VTInfo_FP>{
11476  defm NAME: avx512_common_3Op_imm8<OpcodeStr, VTInfo_FP, 0xC6, X86Shufp,
11477                                    SchedWriteFShuffle>,
11478                                    EVEX_CD8<VTInfo_FP.info512.EltSize, CD8VF>,
11479                                    TA, EVEX, VVVV;
11480}
11481
11482defm VSHUFPS: avx512_shufp<"vshufps", avx512vl_f32_info>, TB;
11483defm VSHUFPD: avx512_shufp<"vshufpd", avx512vl_f64_info>, TB, PD, REX_W;
11484
11485//===----------------------------------------------------------------------===//
11486// AVX-512 - Byte shift Left/Right
11487//===----------------------------------------------------------------------===//
11488
11489multiclass avx512_shift_packed<bits<8> opc, SDNode OpNode, Format MRMr,
11490                               Format MRMm, string OpcodeStr,
11491                               X86FoldableSchedWrite sched, X86VectorVTInfo _>{
11492  def ri : AVX512<opc, MRMr,
11493             (outs _.RC:$dst), (ins _.RC:$src1, u8imm:$src2),
11494             !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
11495             [(set _.RC:$dst,(_.VT (OpNode _.RC:$src1, (i8 timm:$src2))))]>,
11496             Sched<[sched]>;
11497  def mi : AVX512<opc, MRMm,
11498           (outs _.RC:$dst), (ins _.MemOp:$src1, u8imm:$src2),
11499           !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
11500           [(set _.RC:$dst,(_.VT (OpNode
11501                                 (_.VT (bitconvert (_.LdFrag addr:$src1))),
11502                                 (i8 timm:$src2))))]>,
11503           Sched<[sched.Folded, sched.ReadAfterFold]>;
11504}
11505
11506multiclass avx512_shift_packed_all<bits<8> opc, SDNode OpNode, Format MRMr,
11507                                   Format MRMm, string OpcodeStr,
11508                                   X86SchedWriteWidths sched, Predicate prd>{
11509  let Predicates = [prd] in
11510    defm Z : avx512_shift_packed<opc, OpNode, MRMr, MRMm, OpcodeStr,
11511                                 sched.ZMM, v64i8_info>, EVEX_V512;
11512  let Predicates = [prd, HasVLX] in {
11513    defm Z256 : avx512_shift_packed<opc, OpNode, MRMr, MRMm, OpcodeStr,
11514                                    sched.YMM, v32i8x_info>, EVEX_V256;
11515    defm Z128 : avx512_shift_packed<opc, OpNode, MRMr, MRMm, OpcodeStr,
11516                                    sched.XMM, v16i8x_info>, EVEX_V128;
11517  }
11518}
11519defm VPSLLDQ : avx512_shift_packed_all<0x73, X86vshldq, MRM7r, MRM7m, "vpslldq",
11520                                       SchedWriteShuffle, HasBWI>,
11521                                       AVX512PDIi8Base, EVEX, VVVV, WIG;
11522defm VPSRLDQ : avx512_shift_packed_all<0x73, X86vshrdq, MRM3r, MRM3m, "vpsrldq",
11523                                       SchedWriteShuffle, HasBWI>,
11524                                       AVX512PDIi8Base, EVEX, VVVV, WIG;
11525
11526multiclass avx512_psadbw_packed<bits<8> opc, SDNode OpNode,
11527                                string OpcodeStr, X86FoldableSchedWrite sched,
11528                                X86VectorVTInfo _dst, X86VectorVTInfo _src> {
11529  let isCommutable = 1 in
11530  def rr : AVX512BI<opc, MRMSrcReg,
11531             (outs _dst.RC:$dst), (ins _src.RC:$src1, _src.RC:$src2),
11532             !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
11533             [(set _dst.RC:$dst,(_dst.VT
11534                                (OpNode (_src.VT _src.RC:$src1),
11535                                        (_src.VT _src.RC:$src2))))]>,
11536             Sched<[sched]>;
11537  def rm : AVX512BI<opc, MRMSrcMem,
11538           (outs _dst.RC:$dst), (ins _src.RC:$src1, _src.MemOp:$src2),
11539           !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
11540           [(set _dst.RC:$dst,(_dst.VT
11541                              (OpNode (_src.VT _src.RC:$src1),
11542                              (_src.VT (bitconvert
11543                                        (_src.LdFrag addr:$src2))))))]>,
11544           Sched<[sched.Folded, sched.ReadAfterFold]>;
11545}
11546
11547multiclass avx512_psadbw_packed_all<bits<8> opc, SDNode OpNode,
11548                                    string OpcodeStr, X86SchedWriteWidths sched,
11549                                    Predicate prd> {
11550  let Predicates = [prd] in
11551    defm Z : avx512_psadbw_packed<opc, OpNode, OpcodeStr, sched.ZMM,
11552                                  v8i64_info, v64i8_info>, EVEX_V512;
11553  let Predicates = [prd, HasVLX] in {
11554    defm Z256 : avx512_psadbw_packed<opc, OpNode, OpcodeStr, sched.YMM,
11555                                     v4i64x_info, v32i8x_info>, EVEX_V256;
11556    defm Z128 : avx512_psadbw_packed<opc, OpNode, OpcodeStr, sched.XMM,
11557                                     v2i64x_info, v16i8x_info>, EVEX_V128;
11558  }
11559}
11560
11561defm VPSADBW : avx512_psadbw_packed_all<0xf6, X86psadbw, "vpsadbw",
11562                                        SchedWritePSADBW, HasBWI>, EVEX, VVVV, WIG;
11563
11564// Transforms to swizzle an immediate to enable better matching when
11565// memory operand isn't in the right place.
11566def VPTERNLOG321_imm8 : SDNodeXForm<timm, [{
11567  // Convert a VPTERNLOG immediate by swapping operand 0 and operand 2.
11568  uint8_t Imm = N->getZExtValue();
11569  // Swap bits 1/4 and 3/6.
11570  uint8_t NewImm = Imm & 0xa5;
11571  if (Imm & 0x02) NewImm |= 0x10;
11572  if (Imm & 0x10) NewImm |= 0x02;
11573  if (Imm & 0x08) NewImm |= 0x40;
11574  if (Imm & 0x40) NewImm |= 0x08;
11575  return getI8Imm(NewImm, SDLoc(N));
11576}]>;
11577def VPTERNLOG213_imm8 : SDNodeXForm<timm, [{
11578  // Convert a VPTERNLOG immediate by swapping operand 1 and operand 2.
11579  uint8_t Imm = N->getZExtValue();
11580  // Swap bits 2/4 and 3/5.
11581  uint8_t NewImm = Imm & 0xc3;
11582  if (Imm & 0x04) NewImm |= 0x10;
11583  if (Imm & 0x10) NewImm |= 0x04;
11584  if (Imm & 0x08) NewImm |= 0x20;
11585  if (Imm & 0x20) NewImm |= 0x08;
11586  return getI8Imm(NewImm, SDLoc(N));
11587}]>;
11588def VPTERNLOG132_imm8 : SDNodeXForm<timm, [{
11589  // Convert a VPTERNLOG immediate by swapping operand 1 and operand 2.
11590  uint8_t Imm = N->getZExtValue();
11591  // Swap bits 1/2 and 5/6.
11592  uint8_t NewImm = Imm & 0x99;
11593  if (Imm & 0x02) NewImm |= 0x04;
11594  if (Imm & 0x04) NewImm |= 0x02;
11595  if (Imm & 0x20) NewImm |= 0x40;
11596  if (Imm & 0x40) NewImm |= 0x20;
11597  return getI8Imm(NewImm, SDLoc(N));
11598}]>;
11599def VPTERNLOG231_imm8 : SDNodeXForm<timm, [{
11600  // Convert a VPTERNLOG immediate by moving operand 1 to the end.
11601  uint8_t Imm = N->getZExtValue();
11602  // Move bits 1->2, 2->4, 3->6, 4->1, 5->3, 6->5
11603  uint8_t NewImm = Imm & 0x81;
11604  if (Imm & 0x02) NewImm |= 0x04;
11605  if (Imm & 0x04) NewImm |= 0x10;
11606  if (Imm & 0x08) NewImm |= 0x40;
11607  if (Imm & 0x10) NewImm |= 0x02;
11608  if (Imm & 0x20) NewImm |= 0x08;
11609  if (Imm & 0x40) NewImm |= 0x20;
11610  return getI8Imm(NewImm, SDLoc(N));
11611}]>;
11612def VPTERNLOG312_imm8 : SDNodeXForm<timm, [{
11613  // Convert a VPTERNLOG immediate by moving operand 2 to the beginning.
11614  uint8_t Imm = N->getZExtValue();
11615  // Move bits 1->4, 2->1, 3->5, 4->2, 5->6, 6->3
11616  uint8_t NewImm = Imm & 0x81;
11617  if (Imm & 0x02) NewImm |= 0x10;
11618  if (Imm & 0x04) NewImm |= 0x02;
11619  if (Imm & 0x08) NewImm |= 0x20;
11620  if (Imm & 0x10) NewImm |= 0x04;
11621  if (Imm & 0x20) NewImm |= 0x40;
11622  if (Imm & 0x40) NewImm |= 0x08;
11623  return getI8Imm(NewImm, SDLoc(N));
11624}]>;
11625
11626multiclass avx512_ternlog<bits<8> opc, string OpcodeStr, SDNode OpNode,
11627                          X86FoldableSchedWrite sched, X86VectorVTInfo _,
11628                          string Name>{
11629  let Constraints = "$src1 = $dst", ExeDomain = _.ExeDomain in {
11630  defm rri : AVX512_maskable_3src<opc, MRMSrcReg, _, (outs _.RC:$dst),
11631                      (ins _.RC:$src2, _.RC:$src3, u8imm:$src4),
11632                      OpcodeStr, "$src4, $src3, $src2", "$src2, $src3, $src4",
11633                      (OpNode (_.VT _.RC:$src1),
11634                              (_.VT _.RC:$src2),
11635                              (_.VT _.RC:$src3),
11636                              (i8 timm:$src4)), 1, 1>,
11637                      AVX512AIi8Base, EVEX, VVVV, Sched<[sched]>;
11638  defm rmi : AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst),
11639                    (ins _.RC:$src2, _.MemOp:$src3, u8imm:$src4),
11640                    OpcodeStr, "$src4, $src3, $src2", "$src2, $src3, $src4",
11641                    (OpNode (_.VT _.RC:$src1),
11642                            (_.VT _.RC:$src2),
11643                            (_.VT (bitconvert (_.LdFrag addr:$src3))),
11644                            (i8 timm:$src4)), 1, 0>,
11645                    AVX512AIi8Base, EVEX, VVVV, EVEX_CD8<_.EltSize, CD8VF>,
11646                    Sched<[sched.Folded, sched.ReadAfterFold]>;
11647  defm rmbi : AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst),
11648                    (ins _.RC:$src2, _.ScalarMemOp:$src3, u8imm:$src4),
11649                    OpcodeStr, "$src4, ${src3}"#_.BroadcastStr#", $src2",
11650                    "$src2, ${src3}"#_.BroadcastStr#", $src4",
11651                    (OpNode (_.VT _.RC:$src1),
11652                            (_.VT _.RC:$src2),
11653                            (_.VT (_.BroadcastLdFrag addr:$src3)),
11654                            (i8 timm:$src4)), 1, 0>, EVEX_B,
11655                    AVX512AIi8Base, EVEX, VVVV, EVEX_CD8<_.EltSize, CD8VF>,
11656                    Sched<[sched.Folded, sched.ReadAfterFold]>;
11657  }// Constraints = "$src1 = $dst"
11658
11659  // Additional patterns for matching passthru operand in other positions.
11660  def : Pat<(_.VT (vselect_mask _.KRCWM:$mask,
11661                   (OpNode _.RC:$src3, _.RC:$src2, _.RC:$src1, (i8 timm:$src4)),
11662                   _.RC:$src1)),
11663            (!cast<Instruction>(Name#_.ZSuffix#rrik) _.RC:$src1, _.KRCWM:$mask,
11664             _.RC:$src2, _.RC:$src3, (VPTERNLOG321_imm8 timm:$src4))>;
11665  def : Pat<(_.VT (vselect_mask _.KRCWM:$mask,
11666                   (OpNode _.RC:$src2, _.RC:$src1, _.RC:$src3, (i8 timm:$src4)),
11667                   _.RC:$src1)),
11668            (!cast<Instruction>(Name#_.ZSuffix#rrik) _.RC:$src1, _.KRCWM:$mask,
11669             _.RC:$src2, _.RC:$src3, (VPTERNLOG213_imm8 timm:$src4))>;
11670
11671  // Additional patterns for matching zero masking with loads in other
11672  // positions.
11673  def : Pat<(_.VT (vselect_mask _.KRCWM:$mask,
11674                   (OpNode (bitconvert (_.LdFrag addr:$src3)),
11675                    _.RC:$src2, _.RC:$src1, (i8 timm:$src4)),
11676                   _.ImmAllZerosV)),
11677            (!cast<Instruction>(Name#_.ZSuffix#rmikz) _.RC:$src1, _.KRCWM:$mask,
11678             _.RC:$src2, addr:$src3, (VPTERNLOG321_imm8 timm:$src4))>;
11679  def : Pat<(_.VT (vselect_mask _.KRCWM:$mask,
11680                   (OpNode _.RC:$src1, (bitconvert (_.LdFrag addr:$src3)),
11681                    _.RC:$src2, (i8 timm:$src4)),
11682                   _.ImmAllZerosV)),
11683            (!cast<Instruction>(Name#_.ZSuffix#rmikz) _.RC:$src1, _.KRCWM:$mask,
11684             _.RC:$src2, addr:$src3, (VPTERNLOG132_imm8 timm:$src4))>;
11685
11686  // Additional patterns for matching masked loads with different
11687  // operand orders.
11688  def : Pat<(_.VT (vselect_mask _.KRCWM:$mask,
11689                   (OpNode _.RC:$src1, (bitconvert (_.LdFrag addr:$src3)),
11690                    _.RC:$src2, (i8 timm:$src4)),
11691                   _.RC:$src1)),
11692            (!cast<Instruction>(Name#_.ZSuffix#rmik) _.RC:$src1, _.KRCWM:$mask,
11693             _.RC:$src2, addr:$src3, (VPTERNLOG132_imm8 timm:$src4))>;
11694  def : Pat<(_.VT (vselect_mask _.KRCWM:$mask,
11695                   (OpNode (bitconvert (_.LdFrag addr:$src3)),
11696                    _.RC:$src2, _.RC:$src1, (i8 timm:$src4)),
11697                   _.RC:$src1)),
11698            (!cast<Instruction>(Name#_.ZSuffix#rmik) _.RC:$src1, _.KRCWM:$mask,
11699             _.RC:$src2, addr:$src3, (VPTERNLOG321_imm8 timm:$src4))>;
11700  def : Pat<(_.VT (vselect_mask _.KRCWM:$mask,
11701                   (OpNode _.RC:$src2, _.RC:$src1,
11702                    (bitconvert (_.LdFrag addr:$src3)), (i8 timm:$src4)),
11703                   _.RC:$src1)),
11704            (!cast<Instruction>(Name#_.ZSuffix#rmik) _.RC:$src1, _.KRCWM:$mask,
11705             _.RC:$src2, addr:$src3, (VPTERNLOG213_imm8 timm:$src4))>;
11706  def : Pat<(_.VT (vselect_mask _.KRCWM:$mask,
11707                   (OpNode _.RC:$src2, (bitconvert (_.LdFrag addr:$src3)),
11708                    _.RC:$src1, (i8 timm:$src4)),
11709                   _.RC:$src1)),
11710            (!cast<Instruction>(Name#_.ZSuffix#rmik) _.RC:$src1, _.KRCWM:$mask,
11711             _.RC:$src2, addr:$src3, (VPTERNLOG231_imm8 timm:$src4))>;
11712  def : Pat<(_.VT (vselect_mask _.KRCWM:$mask,
11713                   (OpNode (bitconvert (_.LdFrag addr:$src3)),
11714                    _.RC:$src1, _.RC:$src2, (i8 timm:$src4)),
11715                   _.RC:$src1)),
11716            (!cast<Instruction>(Name#_.ZSuffix#rmik) _.RC:$src1, _.KRCWM:$mask,
11717             _.RC:$src2, addr:$src3, (VPTERNLOG312_imm8 timm:$src4))>;
11718
11719  // Additional patterns for matching zero masking with broadcasts in other
11720  // positions.
11721  def : Pat<(_.VT (vselect_mask _.KRCWM:$mask,
11722                   (OpNode (_.BroadcastLdFrag addr:$src3),
11723                    _.RC:$src2, _.RC:$src1, (i8 timm:$src4)),
11724                   _.ImmAllZerosV)),
11725            (!cast<Instruction>(Name#_.ZSuffix#rmbikz) _.RC:$src1,
11726             _.KRCWM:$mask, _.RC:$src2, addr:$src3,
11727             (VPTERNLOG321_imm8 timm:$src4))>;
11728  def : Pat<(_.VT (vselect_mask _.KRCWM:$mask,
11729                   (OpNode _.RC:$src1,
11730                    (_.BroadcastLdFrag addr:$src3),
11731                    _.RC:$src2, (i8 timm:$src4)),
11732                   _.ImmAllZerosV)),
11733            (!cast<Instruction>(Name#_.ZSuffix#rmbikz) _.RC:$src1,
11734             _.KRCWM:$mask, _.RC:$src2, addr:$src3,
11735             (VPTERNLOG132_imm8 timm:$src4))>;
11736
11737  // Additional patterns for matching masked broadcasts with different
11738  // operand orders.
11739  def : Pat<(_.VT (vselect_mask _.KRCWM:$mask,
11740                   (OpNode _.RC:$src1, (_.BroadcastLdFrag addr:$src3),
11741                    _.RC:$src2, (i8 timm:$src4)),
11742                   _.RC:$src1)),
11743            (!cast<Instruction>(Name#_.ZSuffix#rmbik) _.RC:$src1, _.KRCWM:$mask,
11744             _.RC:$src2, addr:$src3, (VPTERNLOG132_imm8 timm:$src4))>;
11745  def : Pat<(_.VT (vselect_mask _.KRCWM:$mask,
11746                   (OpNode (_.BroadcastLdFrag addr:$src3),
11747                    _.RC:$src2, _.RC:$src1, (i8 timm:$src4)),
11748                   _.RC:$src1)),
11749            (!cast<Instruction>(Name#_.ZSuffix#rmbik) _.RC:$src1, _.KRCWM:$mask,
11750             _.RC:$src2, addr:$src3, (VPTERNLOG321_imm8 timm:$src4))>;
11751  def : Pat<(_.VT (vselect_mask _.KRCWM:$mask,
11752                   (OpNode _.RC:$src2, _.RC:$src1,
11753                    (_.BroadcastLdFrag addr:$src3),
11754                    (i8 timm:$src4)), _.RC:$src1)),
11755            (!cast<Instruction>(Name#_.ZSuffix#rmbik) _.RC:$src1, _.KRCWM:$mask,
11756             _.RC:$src2, addr:$src3, (VPTERNLOG213_imm8 timm:$src4))>;
11757  def : Pat<(_.VT (vselect_mask _.KRCWM:$mask,
11758                   (OpNode _.RC:$src2,
11759                    (_.BroadcastLdFrag addr:$src3),
11760                    _.RC:$src1, (i8 timm:$src4)),
11761                   _.RC:$src1)),
11762            (!cast<Instruction>(Name#_.ZSuffix#rmbik) _.RC:$src1, _.KRCWM:$mask,
11763             _.RC:$src2, addr:$src3, (VPTERNLOG231_imm8 timm:$src4))>;
11764  def : Pat<(_.VT (vselect_mask _.KRCWM:$mask,
11765                   (OpNode (_.BroadcastLdFrag addr:$src3),
11766                    _.RC:$src1, _.RC:$src2, (i8 timm:$src4)),
11767                   _.RC:$src1)),
11768            (!cast<Instruction>(Name#_.ZSuffix#rmbik) _.RC:$src1, _.KRCWM:$mask,
11769             _.RC:$src2, addr:$src3, (VPTERNLOG312_imm8 timm:$src4))>;
11770}
11771
11772multiclass avx512_common_ternlog<string OpcodeStr, X86SchedWriteWidths sched,
11773                                 AVX512VLVectorVTInfo _> {
11774  let Predicates = [HasAVX512] in
11775    defm Z    : avx512_ternlog<0x25, OpcodeStr, X86vpternlog, sched.ZMM,
11776                               _.info512, NAME>, EVEX_V512;
11777  let Predicates = [HasAVX512, HasVLX] in {
11778    defm Z128 : avx512_ternlog<0x25, OpcodeStr, X86vpternlog, sched.XMM,
11779                               _.info128, NAME>, EVEX_V128;
11780    defm Z256 : avx512_ternlog<0x25, OpcodeStr, X86vpternlog, sched.YMM,
11781                               _.info256, NAME>, EVEX_V256;
11782  }
11783}
11784
11785defm VPTERNLOGD : avx512_common_ternlog<"vpternlogd", SchedWriteVecALU,
11786                                        avx512vl_i32_info>;
11787defm VPTERNLOGQ : avx512_common_ternlog<"vpternlogq", SchedWriteVecALU,
11788                                        avx512vl_i64_info>, REX_W;
11789
11790// Patterns to implement vnot using vpternlog instead of creating all ones
11791// using pcmpeq or vpternlog and then xoring with that. The value 15 is chosen
11792// so that the result is only dependent on src0. But we use the same source
11793// for all operands to prevent a false dependency.
11794// TODO: We should maybe have a more generalized algorithm for folding to
11795// vpternlog.
11796let Predicates = [HasAVX512] in {
11797  def : Pat<(v64i8 (vnot VR512:$src)),
11798            (VPTERNLOGQZrri VR512:$src, VR512:$src, VR512:$src, (i8 15))>;
11799  def : Pat<(v32i16 (vnot VR512:$src)),
11800            (VPTERNLOGQZrri VR512:$src, VR512:$src, VR512:$src, (i8 15))>;
11801  def : Pat<(v16i32 (vnot VR512:$src)),
11802            (VPTERNLOGQZrri VR512:$src, VR512:$src, VR512:$src, (i8 15))>;
11803  def : Pat<(v8i64 (vnot VR512:$src)),
11804            (VPTERNLOGQZrri VR512:$src, VR512:$src, VR512:$src, (i8 15))>;
11805}
11806
11807let Predicates = [HasAVX512, NoVLX, HasEVEX512] in {
11808  def : Pat<(v16i8 (vnot VR128X:$src)),
11809            (EXTRACT_SUBREG
11810             (VPTERNLOGQZrri
11811              (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
11812              (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
11813              (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
11814              (i8 15)), sub_xmm)>;
11815  def : Pat<(v8i16 (vnot VR128X:$src)),
11816            (EXTRACT_SUBREG
11817             (VPTERNLOGQZrri
11818              (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
11819              (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
11820              (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
11821              (i8 15)), sub_xmm)>;
11822  def : Pat<(v4i32 (vnot VR128X:$src)),
11823            (EXTRACT_SUBREG
11824             (VPTERNLOGQZrri
11825              (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
11826              (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
11827              (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
11828              (i8 15)), sub_xmm)>;
11829  def : Pat<(v2i64 (vnot VR128X:$src)),
11830            (EXTRACT_SUBREG
11831             (VPTERNLOGQZrri
11832              (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
11833              (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
11834              (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
11835              (i8 15)), sub_xmm)>;
11836
11837  def : Pat<(v32i8 (vnot VR256X:$src)),
11838            (EXTRACT_SUBREG
11839             (VPTERNLOGQZrri
11840              (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src, sub_ymm),
11841              (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src, sub_ymm),
11842              (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src, sub_ymm),
11843              (i8 15)), sub_ymm)>;
11844  def : Pat<(v16i16 (vnot VR256X:$src)),
11845            (EXTRACT_SUBREG
11846             (VPTERNLOGQZrri
11847              (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src, sub_ymm),
11848              (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src, sub_ymm),
11849              (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src, sub_ymm),
11850              (i8 15)), sub_ymm)>;
11851  def : Pat<(v8i32 (vnot VR256X:$src)),
11852            (EXTRACT_SUBREG
11853             (VPTERNLOGQZrri
11854              (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src, sub_ymm),
11855              (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src, sub_ymm),
11856              (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src, sub_ymm),
11857              (i8 15)), sub_ymm)>;
11858  def : Pat<(v4i64 (vnot VR256X:$src)),
11859            (EXTRACT_SUBREG
11860             (VPTERNLOGQZrri
11861              (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src, sub_ymm),
11862              (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src, sub_ymm),
11863              (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src, sub_ymm),
11864              (i8 15)), sub_ymm)>;
11865}
11866
11867let Predicates = [HasVLX] in {
11868  def : Pat<(v16i8 (vnot VR128X:$src)),
11869            (VPTERNLOGQZ128rri VR128X:$src, VR128X:$src, VR128X:$src, (i8 15))>;
11870  def : Pat<(v8i16 (vnot VR128X:$src)),
11871            (VPTERNLOGQZ128rri VR128X:$src, VR128X:$src, VR128X:$src, (i8 15))>;
11872  def : Pat<(v4i32 (vnot VR128X:$src)),
11873            (VPTERNLOGQZ128rri VR128X:$src, VR128X:$src, VR128X:$src, (i8 15))>;
11874  def : Pat<(v2i64 (vnot VR128X:$src)),
11875            (VPTERNLOGQZ128rri VR128X:$src, VR128X:$src, VR128X:$src, (i8 15))>;
11876
11877  def : Pat<(v32i8 (vnot VR256X:$src)),
11878            (VPTERNLOGQZ256rri VR256X:$src, VR256X:$src, VR256X:$src, (i8 15))>;
11879  def : Pat<(v16i16 (vnot VR256X:$src)),
11880            (VPTERNLOGQZ256rri VR256X:$src, VR256X:$src, VR256X:$src, (i8 15))>;
11881  def : Pat<(v8i32 (vnot VR256X:$src)),
11882            (VPTERNLOGQZ256rri VR256X:$src, VR256X:$src, VR256X:$src, (i8 15))>;
11883  def : Pat<(v4i64 (vnot VR256X:$src)),
11884            (VPTERNLOGQZ256rri VR256X:$src, VR256X:$src, VR256X:$src, (i8 15))>;
11885}
11886
11887//===----------------------------------------------------------------------===//
11888// AVX-512 - FixupImm
11889//===----------------------------------------------------------------------===//
11890
11891multiclass avx512_fixupimm_packed<bits<8> opc, string OpcodeStr,
11892                                  X86FoldableSchedWrite sched, X86VectorVTInfo _,
11893                                  X86VectorVTInfo TblVT>{
11894  let Constraints = "$src1 = $dst", ExeDomain = _.ExeDomain,
11895      Uses = [MXCSR], mayRaiseFPException = 1 in {
11896    defm rri : AVX512_maskable_3src<opc, MRMSrcReg, _, (outs _.RC:$dst),
11897                        (ins _.RC:$src2, _.RC:$src3, i32u8imm:$src4),
11898                         OpcodeStr#_.Suffix, "$src4, $src3, $src2", "$src2, $src3, $src4",
11899                        (X86VFixupimm (_.VT _.RC:$src1),
11900                                      (_.VT _.RC:$src2),
11901                                      (TblVT.VT _.RC:$src3),
11902                                      (i32 timm:$src4))>, Sched<[sched]>;
11903    defm rmi : AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst),
11904                      (ins _.RC:$src2, _.MemOp:$src3, i32u8imm:$src4),
11905                      OpcodeStr#_.Suffix, "$src4, $src3, $src2", "$src2, $src3, $src4",
11906                      (X86VFixupimm (_.VT _.RC:$src1),
11907                                    (_.VT _.RC:$src2),
11908                                    (TblVT.VT (bitconvert (TblVT.LdFrag addr:$src3))),
11909                                    (i32 timm:$src4))>,
11910                      Sched<[sched.Folded, sched.ReadAfterFold]>;
11911    defm rmbi : AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst),
11912                      (ins _.RC:$src2, _.ScalarMemOp:$src3, i32u8imm:$src4),
11913                    OpcodeStr#_.Suffix, "$src4, ${src3}"#_.BroadcastStr#", $src2",
11914                    "$src2, ${src3}"#_.BroadcastStr#", $src4",
11915                      (X86VFixupimm (_.VT _.RC:$src1),
11916                                    (_.VT _.RC:$src2),
11917                                    (TblVT.VT (TblVT.BroadcastLdFrag addr:$src3)),
11918                                    (i32 timm:$src4))>,
11919                    EVEX_B, Sched<[sched.Folded, sched.ReadAfterFold]>;
11920  } // Constraints = "$src1 = $dst"
11921}
11922
11923multiclass avx512_fixupimm_packed_sae<bits<8> opc, string OpcodeStr,
11924                                      X86FoldableSchedWrite sched,
11925                                      X86VectorVTInfo _, X86VectorVTInfo TblVT>
11926  : avx512_fixupimm_packed<opc, OpcodeStr, sched, _, TblVT> {
11927let Constraints = "$src1 = $dst", ExeDomain = _.ExeDomain, Uses = [MXCSR] in {
11928  defm rrib : AVX512_maskable_3src<opc, MRMSrcReg, _, (outs _.RC:$dst),
11929                      (ins _.RC:$src2, _.RC:$src3, i32u8imm:$src4),
11930                      OpcodeStr#_.Suffix, "$src4, {sae}, $src3, $src2",
11931                      "$src2, $src3, {sae}, $src4",
11932                      (X86VFixupimmSAE (_.VT _.RC:$src1),
11933                                       (_.VT _.RC:$src2),
11934                                       (TblVT.VT _.RC:$src3),
11935                                       (i32 timm:$src4))>,
11936                      EVEX_B, Sched<[sched]>;
11937  }
11938}
11939
11940multiclass avx512_fixupimm_scalar<bits<8> opc, string OpcodeStr,
11941                                  X86FoldableSchedWrite sched, X86VectorVTInfo _,
11942                                  X86VectorVTInfo _src3VT> {
11943  let Constraints = "$src1 = $dst" , Predicates = [HasAVX512],
11944      ExeDomain = _.ExeDomain in {
11945    defm rri : AVX512_maskable_3src_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
11946                      (ins _.RC:$src2, _.RC:$src3, i32u8imm:$src4),
11947                      OpcodeStr#_.Suffix, "$src4, $src3, $src2", "$src2, $src3, $src4",
11948                      (X86VFixupimms (_.VT _.RC:$src1),
11949                                     (_.VT _.RC:$src2),
11950                                     (_src3VT.VT _src3VT.RC:$src3),
11951                                     (i32 timm:$src4))>, Sched<[sched]>, SIMD_EXC;
11952    let Uses = [MXCSR] in
11953    defm rrib : AVX512_maskable_3src_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
11954                      (ins _.RC:$src2, _.RC:$src3, i32u8imm:$src4),
11955                      OpcodeStr#_.Suffix, "$src4, {sae}, $src3, $src2",
11956                      "$src2, $src3, {sae}, $src4",
11957                      (X86VFixupimmSAEs (_.VT _.RC:$src1),
11958                                        (_.VT _.RC:$src2),
11959                                        (_src3VT.VT _src3VT.RC:$src3),
11960                                        (i32 timm:$src4))>,
11961                      EVEX_B, Sched<[sched.Folded, sched.ReadAfterFold]>;
11962    defm rmi : AVX512_maskable_3src_scalar<opc, MRMSrcMem, _, (outs _.RC:$dst),
11963                     (ins _.RC:$src2, _.ScalarMemOp:$src3, i32u8imm:$src4),
11964                     OpcodeStr#_.Suffix, "$src4, $src3, $src2", "$src2, $src3, $src4",
11965                     (X86VFixupimms (_.VT _.RC:$src1),
11966                                    (_.VT _.RC:$src2),
11967                                    (_src3VT.VT (scalar_to_vector
11968                                              (_src3VT.ScalarLdFrag addr:$src3))),
11969                                    (i32 timm:$src4))>,
11970                     Sched<[sched.Folded, sched.ReadAfterFold]>, SIMD_EXC;
11971  }
11972}
11973
11974multiclass avx512_fixupimm_packed_all<X86SchedWriteWidths sched,
11975                                      AVX512VLVectorVTInfo _Vec,
11976                                      AVX512VLVectorVTInfo _Tbl> {
11977  let Predicates = [HasAVX512] in
11978    defm Z    : avx512_fixupimm_packed_sae<0x54, "vfixupimm", sched.ZMM,
11979                                _Vec.info512, _Tbl.info512>, AVX512AIi8Base,
11980                                EVEX, VVVV, EVEX_V512;
11981  let Predicates = [HasAVX512, HasVLX] in {
11982    defm Z128 : avx512_fixupimm_packed<0x54, "vfixupimm", sched.XMM,
11983                            _Vec.info128, _Tbl.info128>, AVX512AIi8Base,
11984                            EVEX, VVVV, EVEX_V128;
11985    defm Z256 : avx512_fixupimm_packed<0x54, "vfixupimm", sched.YMM,
11986                            _Vec.info256, _Tbl.info256>, AVX512AIi8Base,
11987                            EVEX, VVVV, EVEX_V256;
11988  }
11989}
11990
11991defm VFIXUPIMMSSZ : avx512_fixupimm_scalar<0x55, "vfixupimm",
11992                                           SchedWriteFAdd.Scl, f32x_info, v4i32x_info>,
11993                          AVX512AIi8Base, VEX_LIG, EVEX, VVVV, EVEX_CD8<32, CD8VT1>;
11994defm VFIXUPIMMSDZ : avx512_fixupimm_scalar<0x55, "vfixupimm",
11995                                           SchedWriteFAdd.Scl, f64x_info, v2i64x_info>,
11996                          AVX512AIi8Base, VEX_LIG, EVEX, VVVV, EVEX_CD8<64, CD8VT1>, REX_W;
11997defm VFIXUPIMMPS : avx512_fixupimm_packed_all<SchedWriteFAdd, avx512vl_f32_info,
11998                         avx512vl_i32_info>, EVEX_CD8<32, CD8VF>;
11999defm VFIXUPIMMPD : avx512_fixupimm_packed_all<SchedWriteFAdd, avx512vl_f64_info,
12000                         avx512vl_i64_info>, EVEX_CD8<64, CD8VF>, REX_W;
12001
12002// Patterns used to select SSE scalar fp arithmetic instructions from
12003// either:
12004//
12005// (1) a scalar fp operation followed by a blend
12006//
12007// The effect is that the backend no longer emits unnecessary vector
12008// insert instructions immediately after SSE scalar fp instructions
12009// like addss or mulss.
12010//
12011// For example, given the following code:
12012//   __m128 foo(__m128 A, __m128 B) {
12013//     A[0] += B[0];
12014//     return A;
12015//   }
12016//
12017// Previously we generated:
12018//   addss %xmm0, %xmm1
12019//   movss %xmm1, %xmm0
12020//
12021// We now generate:
12022//   addss %xmm1, %xmm0
12023//
12024// (2) a vector packed single/double fp operation followed by a vector insert
12025//
12026// The effect is that the backend converts the packed fp instruction
12027// followed by a vector insert into a single SSE scalar fp instruction.
12028//
12029// For example, given the following code:
12030//   __m128 foo(__m128 A, __m128 B) {
12031//     __m128 C = A + B;
12032//     return (__m128) {c[0], a[1], a[2], a[3]};
12033//   }
12034//
12035// Previously we generated:
12036//   addps %xmm0, %xmm1
12037//   movss %xmm1, %xmm0
12038//
12039// We now generate:
12040//   addss %xmm1, %xmm0
12041
12042// TODO: Some canonicalization in lowering would simplify the number of
12043// patterns we have to try to match.
12044multiclass AVX512_scalar_math_fp_patterns<SDPatternOperator Op, SDNode MaskedOp,
12045                                          string OpcPrefix, SDNode MoveNode,
12046                                          X86VectorVTInfo _, PatLeaf ZeroFP> {
12047  let Predicates = [HasAVX512] in {
12048    // extracted scalar math op with insert via movss
12049    def : Pat<(MoveNode
12050               (_.VT VR128X:$dst),
12051               (_.VT (scalar_to_vector
12052                      (Op (_.EltVT (extractelt (_.VT VR128X:$dst), (iPTR 0))),
12053                          _.FRC:$src)))),
12054              (!cast<Instruction>("V"#OpcPrefix#"Zrr_Int") _.VT:$dst,
12055               (_.VT (COPY_TO_REGCLASS _.FRC:$src, VR128X)))>;
12056    def : Pat<(MoveNode
12057               (_.VT VR128X:$dst),
12058               (_.VT (scalar_to_vector
12059                      (Op (_.EltVT (extractelt (_.VT VR128X:$dst), (iPTR 0))),
12060                          (_.ScalarLdFrag addr:$src))))),
12061              (!cast<Instruction>("V"#OpcPrefix#"Zrm_Int") _.VT:$dst, addr:$src)>;
12062
12063    // extracted masked scalar math op with insert via movss
12064    def : Pat<(MoveNode (_.VT VR128X:$src1),
12065               (scalar_to_vector
12066                (X86selects_mask VK1WM:$mask,
12067                            (MaskedOp (_.EltVT
12068                                       (extractelt (_.VT VR128X:$src1), (iPTR 0))),
12069                                      _.FRC:$src2),
12070                            _.FRC:$src0))),
12071              (!cast<Instruction>("V"#OpcPrefix#"Zrr_Intk")
12072               (_.VT (COPY_TO_REGCLASS _.FRC:$src0, VR128X)),
12073               VK1WM:$mask, _.VT:$src1,
12074               (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)))>;
12075    def : Pat<(MoveNode (_.VT VR128X:$src1),
12076               (scalar_to_vector
12077                (X86selects_mask VK1WM:$mask,
12078                            (MaskedOp (_.EltVT
12079                                       (extractelt (_.VT VR128X:$src1), (iPTR 0))),
12080                                      (_.ScalarLdFrag addr:$src2)),
12081                            _.FRC:$src0))),
12082              (!cast<Instruction>("V"#OpcPrefix#"Zrm_Intk")
12083               (_.VT (COPY_TO_REGCLASS _.FRC:$src0, VR128X)),
12084               VK1WM:$mask, _.VT:$src1, addr:$src2)>;
12085
12086    // extracted masked scalar math op with insert via movss
12087    def : Pat<(MoveNode (_.VT VR128X:$src1),
12088               (scalar_to_vector
12089                (X86selects_mask VK1WM:$mask,
12090                            (MaskedOp (_.EltVT
12091                                       (extractelt (_.VT VR128X:$src1), (iPTR 0))),
12092                                      _.FRC:$src2), (_.EltVT ZeroFP)))),
12093      (!cast<I>("V"#OpcPrefix#"Zrr_Intkz")
12094          VK1WM:$mask, _.VT:$src1,
12095          (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)))>;
12096    def : Pat<(MoveNode (_.VT VR128X:$src1),
12097               (scalar_to_vector
12098                (X86selects_mask VK1WM:$mask,
12099                            (MaskedOp (_.EltVT
12100                                       (extractelt (_.VT VR128X:$src1), (iPTR 0))),
12101                                      (_.ScalarLdFrag addr:$src2)), (_.EltVT ZeroFP)))),
12102      (!cast<I>("V"#OpcPrefix#"Zrm_Intkz") VK1WM:$mask, _.VT:$src1, addr:$src2)>;
12103  }
12104}
12105
12106defm : AVX512_scalar_math_fp_patterns<any_fadd, fadd, "ADDSS", X86Movss, v4f32x_info, fp32imm0>;
12107defm : AVX512_scalar_math_fp_patterns<any_fsub, fsub, "SUBSS", X86Movss, v4f32x_info, fp32imm0>;
12108defm : AVX512_scalar_math_fp_patterns<any_fmul, fmul, "MULSS", X86Movss, v4f32x_info, fp32imm0>;
12109defm : AVX512_scalar_math_fp_patterns<any_fdiv, fdiv, "DIVSS", X86Movss, v4f32x_info, fp32imm0>;
12110
12111defm : AVX512_scalar_math_fp_patterns<any_fadd, fadd, "ADDSD", X86Movsd, v2f64x_info, fp64imm0>;
12112defm : AVX512_scalar_math_fp_patterns<any_fsub, fsub, "SUBSD", X86Movsd, v2f64x_info, fp64imm0>;
12113defm : AVX512_scalar_math_fp_patterns<any_fmul, fmul, "MULSD", X86Movsd, v2f64x_info, fp64imm0>;
12114defm : AVX512_scalar_math_fp_patterns<any_fdiv, fdiv, "DIVSD", X86Movsd, v2f64x_info, fp64imm0>;
12115
12116defm : AVX512_scalar_math_fp_patterns<any_fadd, fadd, "ADDSH", X86Movsh, v8f16x_info, fp16imm0>;
12117defm : AVX512_scalar_math_fp_patterns<any_fsub, fsub, "SUBSH", X86Movsh, v8f16x_info, fp16imm0>;
12118defm : AVX512_scalar_math_fp_patterns<any_fmul, fmul, "MULSH", X86Movsh, v8f16x_info, fp16imm0>;
12119defm : AVX512_scalar_math_fp_patterns<any_fdiv, fdiv, "DIVSH", X86Movsh, v8f16x_info, fp16imm0>;
12120
12121multiclass AVX512_scalar_unary_math_patterns<SDPatternOperator OpNode, string OpcPrefix,
12122                                             SDNode Move, X86VectorVTInfo _> {
12123  let Predicates = [HasAVX512] in {
12124    def : Pat<(_.VT (Move _.VT:$dst,
12125                     (scalar_to_vector (OpNode (extractelt _.VT:$src, 0))))),
12126              (!cast<Instruction>("V"#OpcPrefix#"Zr_Int") _.VT:$dst, _.VT:$src)>;
12127  }
12128}
12129
12130defm : AVX512_scalar_unary_math_patterns<any_fsqrt, "SQRTSS", X86Movss, v4f32x_info>;
12131defm : AVX512_scalar_unary_math_patterns<any_fsqrt, "SQRTSD", X86Movsd, v2f64x_info>;
12132defm : AVX512_scalar_unary_math_patterns<any_fsqrt, "SQRTSH", X86Movsh, v8f16x_info>;
12133
12134//===----------------------------------------------------------------------===//
12135// AES instructions
12136//===----------------------------------------------------------------------===//
12137
12138multiclass avx512_vaes<bits<8> Op, string OpStr, string IntPrefix> {
12139  let Predicates = [HasVLX, HasVAES] in {
12140    defm Z128 : AESI_binop_rm_int<Op, OpStr,
12141                                  !cast<Intrinsic>(IntPrefix),
12142                                  loadv2i64, 0, VR128X, i128mem>,
12143                  EVEX, VVVV, EVEX_CD8<64, CD8VF>, EVEX_V128, WIG;
12144    defm Z256 : AESI_binop_rm_int<Op, OpStr,
12145                                  !cast<Intrinsic>(IntPrefix#"_256"),
12146                                  loadv4i64, 0, VR256X, i256mem>,
12147                  EVEX, VVVV, EVEX_CD8<64, CD8VF>, EVEX_V256, WIG;
12148    }
12149    let Predicates = [HasAVX512, HasVAES] in
12150    defm Z    : AESI_binop_rm_int<Op, OpStr,
12151                                  !cast<Intrinsic>(IntPrefix#"_512"),
12152                                  loadv8i64, 0, VR512, i512mem>,
12153                  EVEX, VVVV, EVEX_CD8<64, CD8VF>, EVEX_V512, WIG;
12154}
12155
12156defm VAESENC      : avx512_vaes<0xDC, "vaesenc", "int_x86_aesni_aesenc">;
12157defm VAESENCLAST  : avx512_vaes<0xDD, "vaesenclast", "int_x86_aesni_aesenclast">;
12158defm VAESDEC      : avx512_vaes<0xDE, "vaesdec", "int_x86_aesni_aesdec">;
12159defm VAESDECLAST  : avx512_vaes<0xDF, "vaesdeclast", "int_x86_aesni_aesdeclast">;
12160
12161//===----------------------------------------------------------------------===//
12162// PCLMUL instructions - Carry less multiplication
12163//===----------------------------------------------------------------------===//
12164
12165let Predicates = [HasAVX512, HasVPCLMULQDQ] in
12166defm VPCLMULQDQZ : vpclmulqdq<VR512, i512mem, loadv8i64, int_x86_pclmulqdq_512>,
12167                              EVEX, VVVV, EVEX_V512, EVEX_CD8<64, CD8VF>, WIG;
12168
12169let Predicates = [HasVLX, HasVPCLMULQDQ] in {
12170defm VPCLMULQDQZ128 : vpclmulqdq<VR128X, i128mem, loadv2i64, int_x86_pclmulqdq>,
12171                              EVEX, VVVV, EVEX_V128, EVEX_CD8<64, CD8VF>, WIG;
12172
12173defm VPCLMULQDQZ256: vpclmulqdq<VR256X, i256mem, loadv4i64,
12174                                int_x86_pclmulqdq_256>, EVEX, VVVV, EVEX_V256,
12175                                EVEX_CD8<64, CD8VF>, WIG;
12176}
12177
12178// Aliases
12179defm : vpclmulqdq_aliases<"VPCLMULQDQZ", VR512, i512mem>;
12180defm : vpclmulqdq_aliases<"VPCLMULQDQZ128", VR128X, i128mem>;
12181defm : vpclmulqdq_aliases<"VPCLMULQDQZ256", VR256X, i256mem>;
12182
12183//===----------------------------------------------------------------------===//
12184// VBMI2
12185//===----------------------------------------------------------------------===//
12186
12187multiclass VBMI2_shift_var_rm<bits<8> Op, string OpStr, SDNode OpNode,
12188                              X86FoldableSchedWrite sched, X86VectorVTInfo VTI> {
12189  let Constraints = "$src1 = $dst",
12190      ExeDomain   = VTI.ExeDomain in {
12191    defm r:   AVX512_maskable_3src<Op, MRMSrcReg, VTI, (outs VTI.RC:$dst),
12192                (ins VTI.RC:$src2, VTI.RC:$src3), OpStr,
12193                "$src3, $src2", "$src2, $src3",
12194                (VTI.VT (OpNode VTI.RC:$src1, VTI.RC:$src2, VTI.RC:$src3))>,
12195                T8, PD, EVEX, VVVV, Sched<[sched]>;
12196    defm m:   AVX512_maskable_3src<Op, MRMSrcMem, VTI, (outs VTI.RC:$dst),
12197                (ins VTI.RC:$src2, VTI.MemOp:$src3), OpStr,
12198                "$src3, $src2", "$src2, $src3",
12199                (VTI.VT (OpNode VTI.RC:$src1, VTI.RC:$src2,
12200                        (VTI.VT (VTI.LdFrag addr:$src3))))>,
12201                T8, PD, EVEX, VVVV,
12202                Sched<[sched.Folded, sched.ReadAfterFold]>;
12203  }
12204}
12205
12206multiclass VBMI2_shift_var_rmb<bits<8> Op, string OpStr, SDNode OpNode,
12207                               X86FoldableSchedWrite sched, X86VectorVTInfo VTI>
12208         : VBMI2_shift_var_rm<Op, OpStr, OpNode, sched, VTI> {
12209  let Constraints = "$src1 = $dst",
12210      ExeDomain   = VTI.ExeDomain in
12211  defm mb:  AVX512_maskable_3src<Op, MRMSrcMem, VTI, (outs VTI.RC:$dst),
12212              (ins VTI.RC:$src2, VTI.ScalarMemOp:$src3), OpStr,
12213              "${src3}"#VTI.BroadcastStr#", $src2",
12214              "$src2, ${src3}"#VTI.BroadcastStr,
12215              (OpNode VTI.RC:$src1, VTI.RC:$src2,
12216               (VTI.VT (VTI.BroadcastLdFrag addr:$src3)))>,
12217              T8, PD, EVEX, VVVV, EVEX_B,
12218              Sched<[sched.Folded, sched.ReadAfterFold]>;
12219}
12220
12221multiclass VBMI2_shift_var_rm_common<bits<8> Op, string OpStr, SDNode OpNode,
12222                                     X86SchedWriteWidths sched, AVX512VLVectorVTInfo VTI> {
12223  let Predicates = [HasVBMI2] in
12224  defm Z      : VBMI2_shift_var_rm<Op, OpStr, OpNode, sched.ZMM, VTI.info512>,
12225                                   EVEX_V512;
12226  let Predicates = [HasVBMI2, HasVLX] in {
12227    defm Z256 : VBMI2_shift_var_rm<Op, OpStr, OpNode, sched.YMM, VTI.info256>,
12228                                   EVEX_V256;
12229    defm Z128 : VBMI2_shift_var_rm<Op, OpStr, OpNode, sched.XMM, VTI.info128>,
12230                                   EVEX_V128;
12231  }
12232}
12233
12234multiclass VBMI2_shift_var_rmb_common<bits<8> Op, string OpStr, SDNode OpNode,
12235                                      X86SchedWriteWidths sched, AVX512VLVectorVTInfo VTI> {
12236  let Predicates = [HasVBMI2] in
12237  defm Z      : VBMI2_shift_var_rmb<Op, OpStr, OpNode, sched.ZMM, VTI.info512>,
12238                                    EVEX_V512;
12239  let Predicates = [HasVBMI2, HasVLX] in {
12240    defm Z256 : VBMI2_shift_var_rmb<Op, OpStr, OpNode, sched.YMM, VTI.info256>,
12241                                    EVEX_V256;
12242    defm Z128 : VBMI2_shift_var_rmb<Op, OpStr, OpNode, sched.XMM, VTI.info128>,
12243                                    EVEX_V128;
12244  }
12245}
12246multiclass VBMI2_shift_var<bits<8> wOp, bits<8> dqOp, string Prefix,
12247                           SDNode OpNode, X86SchedWriteWidths sched> {
12248  defm W : VBMI2_shift_var_rm_common<wOp, Prefix#"w", OpNode, sched,
12249             avx512vl_i16_info>, REX_W, EVEX_CD8<16, CD8VF>;
12250  defm D : VBMI2_shift_var_rmb_common<dqOp, Prefix#"d", OpNode, sched,
12251             avx512vl_i32_info>, EVEX_CD8<32, CD8VF>;
12252  defm Q : VBMI2_shift_var_rmb_common<dqOp, Prefix#"q", OpNode, sched,
12253             avx512vl_i64_info>, REX_W, EVEX_CD8<64, CD8VF>;
12254}
12255
12256multiclass VBMI2_shift_imm<bits<8> wOp, bits<8> dqOp, string Prefix,
12257                           SDNode OpNode, X86SchedWriteWidths sched> {
12258  defm W : avx512_common_3Op_rm_imm8<wOp, OpNode, Prefix#"w", sched,
12259             avx512vl_i16_info, avx512vl_i16_info, HasVBMI2>,
12260             REX_W, EVEX_CD8<16, CD8VF>;
12261  defm D : avx512_common_3Op_imm8<Prefix#"d", avx512vl_i32_info, dqOp,
12262             OpNode, sched, HasVBMI2>, AVX512AIi8Base, EVEX, VVVV, EVEX_CD8<32, CD8VF>;
12263  defm Q : avx512_common_3Op_imm8<Prefix#"q", avx512vl_i64_info, dqOp, OpNode,
12264             sched, HasVBMI2>, AVX512AIi8Base, EVEX, VVVV, EVEX_CD8<64, CD8VF>, REX_W;
12265}
12266
12267// Concat & Shift
12268defm VPSHLDV : VBMI2_shift_var<0x70, 0x71, "vpshldv", X86VShldv, SchedWriteVecIMul>;
12269defm VPSHRDV : VBMI2_shift_var<0x72, 0x73, "vpshrdv", X86VShrdv, SchedWriteVecIMul>;
12270defm VPSHLD  : VBMI2_shift_imm<0x70, 0x71, "vpshld", X86VShld, SchedWriteVecIMul>;
12271defm VPSHRD  : VBMI2_shift_imm<0x72, 0x73, "vpshrd", X86VShrd, SchedWriteVecIMul>;
12272
12273// Compress
12274defm VPCOMPRESSB : compress_by_elt_width<0x63, "vpcompressb", WriteVarShuffle256,
12275                                         avx512vl_i8_info, HasVBMI2>, EVEX;
12276defm VPCOMPRESSW : compress_by_elt_width <0x63, "vpcompressw", WriteVarShuffle256,
12277                                          avx512vl_i16_info, HasVBMI2>, EVEX, REX_W;
12278// Expand
12279defm VPEXPANDB : expand_by_elt_width <0x62, "vpexpandb", WriteVarShuffle256,
12280                                      avx512vl_i8_info, HasVBMI2>, EVEX;
12281defm VPEXPANDW : expand_by_elt_width <0x62, "vpexpandw", WriteVarShuffle256,
12282                                      avx512vl_i16_info, HasVBMI2>, EVEX, REX_W;
12283
12284//===----------------------------------------------------------------------===//
12285// VNNI
12286//===----------------------------------------------------------------------===//
12287
12288let Constraints = "$src1 = $dst" in
12289multiclass VNNI_rmb<bits<8> Op, string OpStr, SDNode OpNode,
12290                    X86FoldableSchedWrite sched, X86VectorVTInfo VTI,
12291                    bit IsCommutable> {
12292  let ExeDomain = VTI.ExeDomain in {
12293  defm r  :   AVX512_maskable_3src<Op, MRMSrcReg, VTI, (outs VTI.RC:$dst),
12294                                   (ins VTI.RC:$src2, VTI.RC:$src3), OpStr,
12295                                   "$src3, $src2", "$src2, $src3",
12296                                   (VTI.VT (OpNode VTI.RC:$src1,
12297                                            VTI.RC:$src2, VTI.RC:$src3)),
12298                                   IsCommutable, IsCommutable>,
12299                                   EVEX, VVVV, T8, PD, Sched<[sched]>;
12300  defm m  :   AVX512_maskable_3src<Op, MRMSrcMem, VTI, (outs VTI.RC:$dst),
12301                                   (ins VTI.RC:$src2, VTI.MemOp:$src3), OpStr,
12302                                   "$src3, $src2", "$src2, $src3",
12303                                   (VTI.VT (OpNode VTI.RC:$src1, VTI.RC:$src2,
12304                                            (VTI.VT (VTI.LdFrag addr:$src3))))>,
12305                                   EVEX, VVVV, EVEX_CD8<32, CD8VF>, T8, PD,
12306                                   Sched<[sched.Folded, sched.ReadAfterFold,
12307                                          sched.ReadAfterFold]>;
12308  defm mb :   AVX512_maskable_3src<Op, MRMSrcMem, VTI, (outs VTI.RC:$dst),
12309                                   (ins VTI.RC:$src2, VTI.ScalarMemOp:$src3),
12310                                   OpStr, "${src3}"#VTI.BroadcastStr#", $src2",
12311                                   "$src2, ${src3}"#VTI.BroadcastStr,
12312                                   (OpNode VTI.RC:$src1, VTI.RC:$src2,
12313                                    (VTI.VT (VTI.BroadcastLdFrag addr:$src3)))>,
12314                                   EVEX, VVVV, EVEX_CD8<32, CD8VF>, EVEX_B,
12315                                   T8, PD, Sched<[sched.Folded, sched.ReadAfterFold,
12316                                                sched.ReadAfterFold]>;
12317  }
12318}
12319
12320multiclass VNNI_common<bits<8> Op, string OpStr, SDNode OpNode,
12321                       X86SchedWriteWidths sched, bit IsCommutable> {
12322  let Predicates = [HasVNNI] in
12323  defm Z      :   VNNI_rmb<Op, OpStr, OpNode, sched.ZMM, v16i32_info,
12324                           IsCommutable>, EVEX_V512;
12325  let Predicates = [HasVNNI, HasVLX] in {
12326    defm Z256 :   VNNI_rmb<Op, OpStr, OpNode, sched.YMM, v8i32x_info,
12327                           IsCommutable>, EVEX_V256;
12328    defm Z128 :   VNNI_rmb<Op, OpStr, OpNode, sched.XMM, v4i32x_info,
12329                           IsCommutable>, EVEX_V128;
12330  }
12331}
12332
12333// FIXME: Is there a better scheduler class for VPDP?
12334defm VPDPBUSD   : VNNI_common<0x50, "vpdpbusd", X86Vpdpbusd, SchedWriteVecIMul, 0>;
12335defm VPDPBUSDS  : VNNI_common<0x51, "vpdpbusds", X86Vpdpbusds, SchedWriteVecIMul, 0>;
12336defm VPDPWSSD   : VNNI_common<0x52, "vpdpwssd", X86Vpdpwssd, SchedWriteVecIMul, 1>;
12337defm VPDPWSSDS  : VNNI_common<0x53, "vpdpwssds", X86Vpdpwssds, SchedWriteVecIMul, 1>;
12338
12339// Patterns to match VPDPWSSD from existing instructions/intrinsics.
12340let Predicates = [HasVNNI] in {
12341  def : Pat<(v16i32 (add VR512:$src1,
12342                         (X86vpmaddwd_su VR512:$src2, VR512:$src3))),
12343            (VPDPWSSDZr VR512:$src1, VR512:$src2, VR512:$src3)>;
12344  def : Pat<(v16i32 (add VR512:$src1,
12345                         (X86vpmaddwd_su VR512:$src2, (load addr:$src3)))),
12346            (VPDPWSSDZm VR512:$src1, VR512:$src2, addr:$src3)>;
12347}
12348let Predicates = [HasVNNI,HasVLX] in {
12349  def : Pat<(v8i32 (add VR256X:$src1,
12350                        (X86vpmaddwd_su VR256X:$src2, VR256X:$src3))),
12351            (VPDPWSSDZ256r VR256X:$src1, VR256X:$src2, VR256X:$src3)>;
12352  def : Pat<(v8i32 (add VR256X:$src1,
12353                        (X86vpmaddwd_su VR256X:$src2, (load addr:$src3)))),
12354            (VPDPWSSDZ256m VR256X:$src1, VR256X:$src2, addr:$src3)>;
12355  def : Pat<(v4i32 (add VR128X:$src1,
12356                        (X86vpmaddwd_su VR128X:$src2, VR128X:$src3))),
12357            (VPDPWSSDZ128r VR128X:$src1, VR128X:$src2, VR128X:$src3)>;
12358  def : Pat<(v4i32 (add VR128X:$src1,
12359                        (X86vpmaddwd_su VR128X:$src2, (load addr:$src3)))),
12360            (VPDPWSSDZ128m VR128X:$src1, VR128X:$src2, addr:$src3)>;
12361}
12362
12363//===----------------------------------------------------------------------===//
12364// Bit Algorithms
12365//===----------------------------------------------------------------------===//
12366
12367// FIXME: Is there a better scheduler class for VPOPCNTB/VPOPCNTW?
12368defm VPOPCNTB : avx512_unary_rm_vl<0x54, "vpopcntb", ctpop, SchedWriteVecALU,
12369                                   avx512vl_i8_info, HasBITALG>;
12370defm VPOPCNTW : avx512_unary_rm_vl<0x54, "vpopcntw", ctpop, SchedWriteVecALU,
12371                                   avx512vl_i16_info, HasBITALG>, REX_W;
12372
12373defm : avx512_unary_lowering<"VPOPCNTB", ctpop, avx512vl_i8_info, HasBITALG>;
12374defm : avx512_unary_lowering<"VPOPCNTW", ctpop, avx512vl_i16_info, HasBITALG>;
12375
12376multiclass VPSHUFBITQMB_rm<X86FoldableSchedWrite sched, X86VectorVTInfo VTI> {
12377  defm rr : AVX512_maskable_cmp<0x8F, MRMSrcReg, VTI, (outs VTI.KRC:$dst),
12378                                (ins VTI.RC:$src1, VTI.RC:$src2),
12379                                "vpshufbitqmb",
12380                                "$src2, $src1", "$src1, $src2",
12381                                (X86Vpshufbitqmb (VTI.VT VTI.RC:$src1),
12382                                (VTI.VT VTI.RC:$src2)),
12383                                (X86Vpshufbitqmb_su (VTI.VT VTI.RC:$src1),
12384                                (VTI.VT VTI.RC:$src2))>, EVEX, VVVV, T8, PD,
12385                                Sched<[sched]>;
12386  defm rm : AVX512_maskable_cmp<0x8F, MRMSrcMem, VTI, (outs VTI.KRC:$dst),
12387                                (ins VTI.RC:$src1, VTI.MemOp:$src2),
12388                                "vpshufbitqmb",
12389                                "$src2, $src1", "$src1, $src2",
12390                                (X86Vpshufbitqmb (VTI.VT VTI.RC:$src1),
12391                                (VTI.VT (VTI.LdFrag addr:$src2))),
12392                                (X86Vpshufbitqmb_su (VTI.VT VTI.RC:$src1),
12393                                (VTI.VT (VTI.LdFrag addr:$src2)))>,
12394                                EVEX, VVVV, EVEX_CD8<8, CD8VF>, T8, PD,
12395                                Sched<[sched.Folded, sched.ReadAfterFold]>;
12396}
12397
12398multiclass VPSHUFBITQMB_common<X86SchedWriteWidths sched, AVX512VLVectorVTInfo VTI> {
12399  let Predicates = [HasBITALG] in
12400  defm Z      : VPSHUFBITQMB_rm<sched.ZMM, VTI.info512>, EVEX_V512;
12401  let Predicates = [HasBITALG, HasVLX] in {
12402    defm Z256 : VPSHUFBITQMB_rm<sched.YMM, VTI.info256>, EVEX_V256;
12403    defm Z128 : VPSHUFBITQMB_rm<sched.XMM, VTI.info128>, EVEX_V128;
12404  }
12405}
12406
12407// FIXME: Is there a better scheduler class for VPSHUFBITQMB?
12408defm VPSHUFBITQMB : VPSHUFBITQMB_common<SchedWriteVecIMul, avx512vl_i8_info>;
12409
12410//===----------------------------------------------------------------------===//
12411// GFNI
12412//===----------------------------------------------------------------------===//
12413
12414multiclass GF2P8MULB_avx512_common<bits<8> Op, string OpStr, SDNode OpNode,
12415                                   X86SchedWriteWidths sched> {
12416  let Predicates = [HasGFNI, HasAVX512] in
12417  defm Z      : avx512_binop_rm<Op, OpStr, OpNode, v64i8_info, sched.ZMM, 1>,
12418                                EVEX_V512;
12419  let Predicates = [HasGFNI, HasVLX] in {
12420    defm Z256 : avx512_binop_rm<Op, OpStr, OpNode, v32i8x_info, sched.YMM, 1>,
12421                                EVEX_V256;
12422    defm Z128 : avx512_binop_rm<Op, OpStr, OpNode, v16i8x_info, sched.XMM, 1>,
12423                                EVEX_V128;
12424  }
12425}
12426
12427defm VGF2P8MULB : GF2P8MULB_avx512_common<0xCF, "vgf2p8mulb", X86GF2P8mulb,
12428                                          SchedWriteVecALU>,
12429                                          EVEX_CD8<8, CD8VF>, T8;
12430
12431multiclass GF2P8AFFINE_avx512_rmb_imm<bits<8> Op, string OpStr, SDNode OpNode,
12432                                      X86FoldableSchedWrite sched, X86VectorVTInfo VTI,
12433                                      X86VectorVTInfo BcstVTI>
12434           : avx512_3Op_rm_imm8<Op, OpStr, OpNode, sched, VTI, VTI> {
12435  let ExeDomain = VTI.ExeDomain in
12436  defm rmbi : AVX512_maskable<Op, MRMSrcMem, VTI, (outs VTI.RC:$dst),
12437                (ins VTI.RC:$src1, BcstVTI.ScalarMemOp:$src2, u8imm:$src3),
12438                OpStr, "$src3, ${src2}"#BcstVTI.BroadcastStr#", $src1",
12439                "$src1, ${src2}"#BcstVTI.BroadcastStr#", $src3",
12440                (OpNode (VTI.VT VTI.RC:$src1),
12441                 (bitconvert (BcstVTI.VT (X86VBroadcastld64 addr:$src2))),
12442                 (i8 timm:$src3))>, EVEX_B,
12443                 Sched<[sched.Folded, sched.ReadAfterFold]>;
12444}
12445
12446multiclass GF2P8AFFINE_avx512_common<bits<8> Op, string OpStr, SDNode OpNode,
12447                                     X86SchedWriteWidths sched> {
12448  let Predicates = [HasGFNI, HasAVX512] in
12449  defm Z      : GF2P8AFFINE_avx512_rmb_imm<Op, OpStr, OpNode, sched.ZMM,
12450                                           v64i8_info, v8i64_info>, EVEX_V512;
12451  let Predicates = [HasGFNI, HasVLX] in {
12452    defm Z256 : GF2P8AFFINE_avx512_rmb_imm<Op, OpStr, OpNode, sched.YMM,
12453                                           v32i8x_info, v4i64x_info>, EVEX_V256;
12454    defm Z128 : GF2P8AFFINE_avx512_rmb_imm<Op, OpStr, OpNode, sched.XMM,
12455                                           v16i8x_info, v2i64x_info>, EVEX_V128;
12456  }
12457}
12458
12459defm VGF2P8AFFINEINVQB : GF2P8AFFINE_avx512_common<0xCF, "vgf2p8affineinvqb",
12460                         X86GF2P8affineinvqb, SchedWriteVecIMul>,
12461                         EVEX, VVVV, EVEX_CD8<8, CD8VF>, REX_W, AVX512AIi8Base;
12462defm VGF2P8AFFINEQB    : GF2P8AFFINE_avx512_common<0xCE, "vgf2p8affineqb",
12463                         X86GF2P8affineqb, SchedWriteVecIMul>,
12464                         EVEX, VVVV, EVEX_CD8<8, CD8VF>, REX_W, AVX512AIi8Base;
12465
12466
12467//===----------------------------------------------------------------------===//
12468// AVX5124FMAPS
12469//===----------------------------------------------------------------------===//
12470
12471let hasSideEffects = 0, mayLoad = 1, ExeDomain = SSEPackedSingle,
12472    Constraints = "$src1 = $dst", Uses = [MXCSR], mayRaiseFPException = 1 in {
12473defm V4FMADDPSrm : AVX512_maskable_3src_in_asm<0x9A, MRMSrcMem, v16f32_info,
12474                    (outs VR512:$dst), (ins VR512:$src2, f128mem:$src3),
12475                    "v4fmaddps", "$src3, $src2", "$src2, $src3",
12476                    []>, EVEX_V512, EVEX, VVVV, T8, XD, EVEX_CD8<32, CD8VQ>,
12477                    Sched<[SchedWriteFMA.ZMM.Folded]>;
12478
12479defm V4FNMADDPSrm : AVX512_maskable_3src_in_asm<0xAA, MRMSrcMem, v16f32_info,
12480                     (outs VR512:$dst), (ins VR512:$src2, f128mem:$src3),
12481                     "v4fnmaddps", "$src3, $src2", "$src2, $src3",
12482                     []>, EVEX_V512, EVEX, VVVV, T8, XD, EVEX_CD8<32, CD8VQ>,
12483                     Sched<[SchedWriteFMA.ZMM.Folded]>;
12484
12485defm V4FMADDSSrm : AVX512_maskable_3src_in_asm<0x9B, MRMSrcMem, f32x_info,
12486                    (outs VR128X:$dst), (ins  VR128X:$src2, f128mem:$src3),
12487                    "v4fmaddss", "$src3, $src2", "$src2, $src3",
12488                    []>, VEX_LIG, EVEX, VVVV, T8, XD, EVEX_CD8<32, CD8VF>,
12489                    Sched<[SchedWriteFMA.Scl.Folded]>;
12490
12491defm V4FNMADDSSrm : AVX512_maskable_3src_in_asm<0xAB, MRMSrcMem, f32x_info,
12492                     (outs VR128X:$dst), (ins VR128X:$src2, f128mem:$src3),
12493                     "v4fnmaddss", "$src3, $src2", "$src2, $src3",
12494                     []>, VEX_LIG, EVEX, VVVV, T8, XD, EVEX_CD8<32, CD8VF>,
12495                     Sched<[SchedWriteFMA.Scl.Folded]>;
12496}
12497
12498//===----------------------------------------------------------------------===//
12499// AVX5124VNNIW
12500//===----------------------------------------------------------------------===//
12501
12502let hasSideEffects = 0, mayLoad = 1, ExeDomain = SSEPackedInt,
12503    Constraints = "$src1 = $dst" in {
12504defm VP4DPWSSDrm : AVX512_maskable_3src_in_asm<0x52, MRMSrcMem, v16i32_info,
12505                    (outs VR512:$dst), (ins VR512:$src2, f128mem:$src3),
12506                     "vp4dpwssd", "$src3, $src2", "$src2, $src3",
12507                    []>, EVEX_V512, EVEX, VVVV, T8, XD, EVEX_CD8<32, CD8VQ>,
12508                    Sched<[SchedWriteFMA.ZMM.Folded]>;
12509
12510defm VP4DPWSSDSrm : AVX512_maskable_3src_in_asm<0x53, MRMSrcMem, v16i32_info,
12511                     (outs VR512:$dst), (ins VR512:$src2, f128mem:$src3),
12512                     "vp4dpwssds", "$src3, $src2", "$src2, $src3",
12513                     []>, EVEX_V512, EVEX, VVVV, T8, XD, EVEX_CD8<32, CD8VQ>,
12514                     Sched<[SchedWriteFMA.ZMM.Folded]>;
12515}
12516
12517let hasSideEffects = 0 in {
12518  let mayStore = 1, SchedRW = [WriteFStoreX] in
12519  def MASKPAIR16STORE : PseudoI<(outs), (ins anymem:$dst, VK16PAIR:$src), []>;
12520  let mayLoad = 1, SchedRW = [WriteFLoadX] in
12521  def MASKPAIR16LOAD : PseudoI<(outs VK16PAIR:$dst), (ins anymem:$src), []>;
12522}
12523
12524//===----------------------------------------------------------------------===//
12525// VP2INTERSECT
12526//===----------------------------------------------------------------------===//
12527
12528multiclass avx512_vp2intersect_modes<X86FoldableSchedWrite sched, X86VectorVTInfo _> {
12529  def rr : I<0x68, MRMSrcReg,
12530                  (outs _.KRPC:$dst),
12531                  (ins _.RC:$src1, _.RC:$src2),
12532                  !strconcat("vp2intersect", _.Suffix,
12533                             "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
12534                  [(set _.KRPC:$dst, (X86vp2intersect
12535                            _.RC:$src1, (_.VT _.RC:$src2)))]>,
12536                  EVEX, VVVV, T8, XD, Sched<[sched]>;
12537
12538  def rm : I<0x68, MRMSrcMem,
12539                  (outs _.KRPC:$dst),
12540                  (ins  _.RC:$src1, _.MemOp:$src2),
12541                  !strconcat("vp2intersect", _.Suffix,
12542                             "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
12543                  [(set _.KRPC:$dst, (X86vp2intersect
12544                            _.RC:$src1, (_.VT (bitconvert (_.LdFrag addr:$src2)))))]>,
12545                  EVEX, VVVV, T8, XD, EVEX_CD8<_.EltSize, CD8VF>,
12546                  Sched<[sched.Folded, sched.ReadAfterFold]>;
12547
12548  def rmb : I<0x68, MRMSrcMem,
12549                  (outs _.KRPC:$dst),
12550                  (ins _.RC:$src1, _.ScalarMemOp:$src2),
12551                  !strconcat("vp2intersect", _.Suffix, "\t{${src2}", _.BroadcastStr,
12552                             ", $src1, $dst|$dst, $src1, ${src2}", _.BroadcastStr ,"}"),
12553                  [(set _.KRPC:$dst, (X86vp2intersect
12554                             _.RC:$src1, (_.VT (_.BroadcastLdFrag addr:$src2))))]>,
12555                  EVEX, VVVV, T8, XD, EVEX_B, EVEX_CD8<_.EltSize, CD8VF>,
12556                  Sched<[sched.Folded, sched.ReadAfterFold]>;
12557}
12558
12559multiclass avx512_vp2intersect<X86SchedWriteWidths sched, AVX512VLVectorVTInfo _> {
12560  let Predicates  = [HasAVX512, HasVP2INTERSECT] in
12561    defm Z : avx512_vp2intersect_modes<sched.ZMM, _.info512>, EVEX_V512;
12562
12563  let Predicates = [HasAVX512, HasVP2INTERSECT, HasVLX] in {
12564    defm Z256 : avx512_vp2intersect_modes<sched.YMM, _.info256>, EVEX_V256;
12565    defm Z128 : avx512_vp2intersect_modes<sched.XMM, _.info128>, EVEX_V128;
12566  }
12567}
12568
12569let ExeDomain = SSEPackedInt in {
12570defm VP2INTERSECTD : avx512_vp2intersect<SchedWriteVecALU, avx512vl_i32_info>;
12571defm VP2INTERSECTQ : avx512_vp2intersect<SchedWriteVecALU, avx512vl_i64_info>, REX_W;
12572}
12573
12574multiclass avx512_binop_all2<bits<8> opc, string OpcodeStr,
12575                             X86SchedWriteWidths sched,
12576                             AVX512VLVectorVTInfo _SrcVTInfo,
12577                             AVX512VLVectorVTInfo _DstVTInfo,
12578                             SDNode OpNode, Predicate prd,
12579                             bit IsCommutable = 0> {
12580  let Predicates = [prd] in
12581    defm NAME#Z : avx512_binop_rm2<opc, OpcodeStr, sched.ZMM, OpNode,
12582                                   _SrcVTInfo.info512, _DstVTInfo.info512,
12583                                   _SrcVTInfo.info512, IsCommutable>,
12584                                   EVEX_V512, EVEX_CD8<32, CD8VF>;
12585  let Predicates = [HasVLX, prd] in {
12586    defm NAME#Z256 : avx512_binop_rm2<opc, OpcodeStr, sched.YMM, OpNode,
12587                                      _SrcVTInfo.info256, _DstVTInfo.info256,
12588                                      _SrcVTInfo.info256, IsCommutable>,
12589                                     EVEX_V256, EVEX_CD8<32, CD8VF>;
12590    defm NAME#Z128 : avx512_binop_rm2<opc, OpcodeStr, sched.XMM, OpNode,
12591                                      _SrcVTInfo.info128, _DstVTInfo.info128,
12592                                      _SrcVTInfo.info128, IsCommutable>,
12593                                      EVEX_V128, EVEX_CD8<32, CD8VF>;
12594  }
12595}
12596
12597let ExeDomain = SSEPackedSingle in
12598defm VCVTNE2PS2BF16 : avx512_binop_all2<0x72, "vcvtne2ps2bf16",
12599                                        SchedWriteCvtPD2PS, //FIXME: Should be SchedWriteCvtPS2BF
12600                                        avx512vl_f32_info, avx512vl_bf16_info,
12601                                        X86cvtne2ps2bf16, HasBF16, 0>, T8, XD;
12602
12603// Truncate Float to BFloat16
12604multiclass avx512_cvtps2bf16<bits<8> opc, string OpcodeStr,
12605                             X86SchedWriteWidths sched> {
12606  let ExeDomain = SSEPackedSingle in {
12607  let Predicates = [HasBF16], Uses = []<Register>, mayRaiseFPException = 0 in {
12608    defm Z : avx512_vcvt_fp<opc, OpcodeStr, v16bf16x_info, v16f32_info,
12609                            X86cvtneps2bf16, X86cvtneps2bf16, sched.ZMM>, EVEX_V512;
12610  }
12611  let Predicates = [HasBF16, HasVLX] in {
12612    let Uses = []<Register>, mayRaiseFPException = 0 in {
12613    defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v8bf16x_info, v4f32x_info,
12614                               null_frag, null_frag, sched.XMM, "{1to4}", "{x}", f128mem,
12615                               VK4WM>, EVEX_V128;
12616    defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v8bf16x_info, v8f32x_info,
12617                               X86cvtneps2bf16, X86cvtneps2bf16,
12618                               sched.YMM, "{1to8}", "{y}">, EVEX_V256;
12619    }
12620  } // Predicates = [HasBF16, HasVLX]
12621  } // ExeDomain = SSEPackedSingle
12622
12623  def : InstAlias<OpcodeStr#"x\t{$src, $dst|$dst, $src}",
12624                  (!cast<Instruction>(NAME # "Z128rr") VR128X:$dst,
12625                  VR128X:$src), 0>;
12626  def : InstAlias<OpcodeStr#"x\t{$src, $dst|$dst, $src}",
12627                  (!cast<Instruction>(NAME # "Z128rm") VR128X:$dst,
12628                  f128mem:$src), 0, "intel">;
12629  def : InstAlias<OpcodeStr#"y\t{$src, $dst|$dst, $src}",
12630                  (!cast<Instruction>(NAME # "Z256rr") VR128X:$dst,
12631                  VR256X:$src), 0>;
12632  def : InstAlias<OpcodeStr#"y\t{$src, $dst|$dst, $src}",
12633                  (!cast<Instruction>(NAME # "Z256rm") VR128X:$dst,
12634                  f256mem:$src), 0, "intel">;
12635}
12636
12637defm VCVTNEPS2BF16 : avx512_cvtps2bf16<0x72, "vcvtneps2bf16",
12638                                       SchedWriteCvtPD2PS>, T8, XS,
12639                                       EVEX_CD8<32, CD8VF>;
12640
12641let Predicates = [HasBF16, HasVLX] in {
12642  // Special patterns to allow use of X86mcvtneps2bf16 for masking. Instruction
12643  // patterns have been disabled with null_frag.
12644  def : Pat<(v8bf16 (X86cvtneps2bf16 (v4f32 VR128X:$src))),
12645            (VCVTNEPS2BF16Z128rr VR128X:$src)>;
12646  def : Pat<(X86mcvtneps2bf16 (v4f32 VR128X:$src), (v8bf16 VR128X:$src0),
12647                              VK4WM:$mask),
12648            (VCVTNEPS2BF16Z128rrk VR128X:$src0, VK4WM:$mask, VR128X:$src)>;
12649  def : Pat<(X86mcvtneps2bf16 (v4f32 VR128X:$src), v8bf16x_info.ImmAllZerosV,
12650                              VK4WM:$mask),
12651            (VCVTNEPS2BF16Z128rrkz VK4WM:$mask, VR128X:$src)>;
12652
12653  def : Pat<(v8bf16 (X86cvtneps2bf16 (loadv4f32 addr:$src))),
12654            (VCVTNEPS2BF16Z128rm addr:$src)>;
12655  def : Pat<(X86mcvtneps2bf16 (loadv4f32 addr:$src), (v8bf16 VR128X:$src0),
12656                              VK4WM:$mask),
12657            (VCVTNEPS2BF16Z128rmk VR128X:$src0, VK4WM:$mask, addr:$src)>;
12658  def : Pat<(X86mcvtneps2bf16 (loadv4f32 addr:$src), v8bf16x_info.ImmAllZerosV,
12659                              VK4WM:$mask),
12660            (VCVTNEPS2BF16Z128rmkz VK4WM:$mask, addr:$src)>;
12661
12662  def : Pat<(v8bf16 (X86cvtneps2bf16 (v4f32
12663                                     (X86VBroadcastld32 addr:$src)))),
12664            (VCVTNEPS2BF16Z128rmb addr:$src)>;
12665  def : Pat<(X86mcvtneps2bf16 (v4f32 (X86VBroadcastld32 addr:$src)),
12666                              (v8bf16 VR128X:$src0), VK4WM:$mask),
12667            (VCVTNEPS2BF16Z128rmbk VR128X:$src0, VK4WM:$mask, addr:$src)>;
12668  def : Pat<(X86mcvtneps2bf16 (v4f32 (X86VBroadcastld32 addr:$src)),
12669                              v8bf16x_info.ImmAllZerosV, VK4WM:$mask),
12670            (VCVTNEPS2BF16Z128rmbkz VK4WM:$mask, addr:$src)>;
12671
12672  def : Pat<(v8bf16 (int_x86_vcvtneps2bf16128 (v4f32 VR128X:$src))),
12673            (VCVTNEPS2BF16Z128rr VR128X:$src)>;
12674  def : Pat<(v8bf16 (int_x86_vcvtneps2bf16128 (loadv4f32 addr:$src))),
12675            (VCVTNEPS2BF16Z128rm addr:$src)>;
12676
12677  def : Pat<(v8bf16 (int_x86_vcvtneps2bf16256 (v8f32 VR256X:$src))),
12678            (VCVTNEPS2BF16Z256rr VR256X:$src)>;
12679  def : Pat<(v8bf16 (int_x86_vcvtneps2bf16256 (loadv8f32 addr:$src))),
12680            (VCVTNEPS2BF16Z256rm addr:$src)>;
12681
12682  def : Pat<(v8bf16 (X86VBroadcastld16 addr:$src)),
12683            (VPBROADCASTWZ128rm addr:$src)>;
12684  def : Pat<(v16bf16 (X86VBroadcastld16 addr:$src)),
12685            (VPBROADCASTWZ256rm addr:$src)>;
12686
12687  def : Pat<(v8bf16 (X86VBroadcast (v8bf16 VR128X:$src))),
12688            (VPBROADCASTWZ128rr VR128X:$src)>;
12689  def : Pat<(v16bf16 (X86VBroadcast (v8bf16 VR128X:$src))),
12690            (VPBROADCASTWZ256rr VR128X:$src)>;
12691
12692  def : Pat<(v8bf16 (X86vfpround (v8f32 VR256X:$src))),
12693            (VCVTNEPS2BF16Z256rr VR256X:$src)>;
12694  def : Pat<(v8bf16 (X86vfpround (loadv8f32 addr:$src))),
12695            (VCVTNEPS2BF16Z256rm addr:$src)>;
12696
12697  // TODO: No scalar broadcast due to we don't support legal scalar bf16 so far.
12698}
12699
12700let Predicates = [HasBF16] in {
12701  def : Pat<(v32bf16 (X86VBroadcastld16 addr:$src)),
12702            (VPBROADCASTWZrm addr:$src)>;
12703
12704  def : Pat<(v32bf16 (X86VBroadcast (v8bf16 VR128X:$src))),
12705            (VPBROADCASTWZrr VR128X:$src)>;
12706
12707  def : Pat<(v16bf16 (X86vfpround (v16f32 VR512:$src))),
12708            (VCVTNEPS2BF16Zrr VR512:$src)>;
12709  def : Pat<(v16bf16 (X86vfpround (loadv16f32 addr:$src))),
12710            (VCVTNEPS2BF16Zrm addr:$src)>;
12711  // TODO: No scalar broadcast due to we don't support legal scalar bf16 so far.
12712}
12713
12714let Constraints = "$src1 = $dst" in {
12715multiclass avx512_dpbf16ps_rm<bits<8> opc, string OpcodeStr, SDNode OpNode,
12716                              X86FoldableSchedWrite sched,
12717                              X86VectorVTInfo _, X86VectorVTInfo src_v> {
12718  defm r: AVX512_maskable_3src<opc, MRMSrcReg, _, (outs _.RC:$dst),
12719                           (ins src_v.RC:$src2, src_v.RC:$src3),
12720                           OpcodeStr, "$src3, $src2", "$src2, $src3",
12721                           (_.VT (OpNode _.RC:$src1, src_v.RC:$src2, src_v.RC:$src3))>,
12722                           EVEX, VVVV, Sched<[sched]>;
12723
12724  defm m: AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst),
12725                               (ins src_v.RC:$src2, src_v.MemOp:$src3),
12726                               OpcodeStr, "$src3, $src2", "$src2, $src3",
12727                               (_.VT (OpNode _.RC:$src1, src_v.RC:$src2,
12728                               (src_v.LdFrag addr:$src3)))>, EVEX, VVVV,
12729                               Sched<[sched.Folded, sched.ReadAfterFold]>;
12730
12731  defm mb: AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst),
12732                  (ins src_v.RC:$src2, f32mem:$src3),
12733                  OpcodeStr,
12734                  !strconcat("${src3}", _.BroadcastStr,", $src2"),
12735                  !strconcat("$src2, ${src3}", _.BroadcastStr),
12736                  (_.VT (OpNode _.RC:$src1, src_v.RC:$src2,
12737                  (src_v.VT (src_v.BroadcastLdFrag addr:$src3))))>,
12738                  EVEX_B, EVEX, VVVV, Sched<[sched.Folded, sched.ReadAfterFold]>;
12739
12740}
12741} // Constraints = "$src1 = $dst"
12742
12743multiclass avx512_dpbf16ps_sizes<bits<8> opc, string OpcodeStr, SDNode OpNode,
12744                                 X86SchedWriteWidths sched, AVX512VLVectorVTInfo _,
12745                                 AVX512VLVectorVTInfo src_v, Predicate prd> {
12746  let Predicates = [prd] in {
12747    defm Z    : avx512_dpbf16ps_rm<opc, OpcodeStr, OpNode, sched.ZMM, _.info512,
12748                                   src_v.info512>, EVEX_V512;
12749  }
12750  let Predicates = [HasVLX, prd] in {
12751    defm Z256 : avx512_dpbf16ps_rm<opc, OpcodeStr, OpNode, sched.YMM, _.info256,
12752                                   src_v.info256>, EVEX_V256;
12753    defm Z128 : avx512_dpbf16ps_rm<opc, OpcodeStr, OpNode, sched.XMM, _.info128,
12754                                   src_v.info128>, EVEX_V128;
12755  }
12756}
12757
12758let ExeDomain = SSEPackedSingle in
12759defm VDPBF16PS : avx512_dpbf16ps_sizes<0x52, "vdpbf16ps", X86dpbf16ps, SchedWriteFMA,
12760                                       avx512vl_f32_info, avx512vl_bf16_info,
12761                                       HasBF16>, T8, XS, EVEX_CD8<32, CD8VF>;
12762
12763//===----------------------------------------------------------------------===//
12764// AVX512FP16
12765//===----------------------------------------------------------------------===//
12766
12767let Predicates = [HasFP16] in {
12768// Move word ( r/m16) to Packed word
12769def VMOVW2SHrr : AVX512<0x6E, MRMSrcReg, (outs VR128X:$dst), (ins GR32:$src),
12770                      "vmovw\t{$src, $dst|$dst, $src}", []>, T_MAP5, PD, EVEX, Sched<[WriteVecMoveFromGpr]>;
12771def VMOVWrm : AVX512<0x6E, MRMSrcMem, (outs VR128X:$dst), (ins i16mem:$src),
12772                      "vmovw\t{$src, $dst|$dst, $src}",
12773                      [(set VR128X:$dst,
12774                        (v8i16 (scalar_to_vector (loadi16 addr:$src))))]>,
12775                      T_MAP5, PD, EVEX, EVEX_CD8<16, CD8VT1>, Sched<[WriteFLoad]>;
12776
12777def : Pat<(f16 (bitconvert GR16:$src)),
12778          (f16 (COPY_TO_REGCLASS
12779                (VMOVW2SHrr
12780                 (INSERT_SUBREG (IMPLICIT_DEF), GR16:$src, sub_16bit)),
12781                FR16X))>;
12782def : Pat<(v8i16 (scalar_to_vector (i16 GR16:$src))),
12783          (VMOVW2SHrr (INSERT_SUBREG (IMPLICIT_DEF), GR16:$src, sub_16bit))>;
12784def : Pat<(v4i32 (X86vzmovl (scalar_to_vector (and GR32:$src, 0xffff)))),
12785          (VMOVW2SHrr GR32:$src)>;
12786// FIXME: We should really find a way to improve these patterns.
12787def : Pat<(v8i32 (X86vzmovl
12788                  (insert_subvector undef,
12789                                    (v4i32 (scalar_to_vector
12790                                            (and GR32:$src, 0xffff))),
12791                                    (iPTR 0)))),
12792          (SUBREG_TO_REG (i32 0), (VMOVW2SHrr GR32:$src), sub_xmm)>;
12793def : Pat<(v16i32 (X86vzmovl
12794                   (insert_subvector undef,
12795                                     (v4i32 (scalar_to_vector
12796                                             (and GR32:$src, 0xffff))),
12797                                     (iPTR 0)))),
12798          (SUBREG_TO_REG (i32 0), (VMOVW2SHrr GR32:$src), sub_xmm)>;
12799
12800def : Pat<(v8i16 (X86vzmovl (scalar_to_vector (i16 GR16:$src)))),
12801          (VMOVW2SHrr (INSERT_SUBREG (IMPLICIT_DEF), GR16:$src, sub_16bit))>;
12802
12803// AVX 128-bit movw instruction write zeros in the high 128-bit part.
12804def : Pat<(v8i16 (X86vzload16 addr:$src)),
12805          (VMOVWrm addr:$src)>;
12806def : Pat<(v16i16 (X86vzload16 addr:$src)),
12807          (SUBREG_TO_REG (i32 0), (v8i16 (VMOVWrm addr:$src)), sub_xmm)>;
12808
12809// Use regular 128-bit instructions to match 512-bit scalar_to_vec+zext.
12810def : Pat<(v32i16 (X86vzload16 addr:$src)),
12811          (SUBREG_TO_REG (i32 0), (v8i16 (VMOVWrm addr:$src)), sub_xmm)>;
12812
12813def : Pat<(v4i32 (scalar_to_vector (i32 (extloadi16 addr:$src)))),
12814          (VMOVWrm addr:$src)>;
12815def : Pat<(v4i32 (X86vzmovl (scalar_to_vector (i32 (zextloadi16 addr:$src))))),
12816          (VMOVWrm addr:$src)>;
12817def : Pat<(v8i32 (X86vzmovl
12818                  (insert_subvector undef,
12819                                    (v4i32 (scalar_to_vector
12820                                            (i32 (zextloadi16 addr:$src)))),
12821                                    (iPTR 0)))),
12822          (SUBREG_TO_REG (i32 0), (VMOVWrm addr:$src), sub_xmm)>;
12823def : Pat<(v16i32 (X86vzmovl
12824                   (insert_subvector undef,
12825                                     (v4i32 (scalar_to_vector
12826                                             (i32 (zextloadi16 addr:$src)))),
12827                                     (iPTR 0)))),
12828          (SUBREG_TO_REG (i32 0), (VMOVWrm addr:$src), sub_xmm)>;
12829
12830// Move word from xmm register to r/m16
12831def VMOVSH2Wrr  : AVX512<0x7E, MRMDestReg, (outs GR32:$dst), (ins VR128X:$src),
12832                       "vmovw\t{$src, $dst|$dst, $src}", []>, T_MAP5, PD, EVEX, Sched<[WriteVecMoveToGpr]>;
12833def VMOVWmr  : AVX512<0x7E, MRMDestMem, (outs),
12834                       (ins i16mem:$dst, VR128X:$src),
12835                       "vmovw\t{$src, $dst|$dst, $src}",
12836                       [(store (i16 (extractelt (v8i16 VR128X:$src),
12837                                     (iPTR 0))), addr:$dst)]>,
12838                       T_MAP5, PD, EVEX, EVEX_CD8<16, CD8VT1>, Sched<[WriteFStore]>;
12839
12840def : Pat<(i16 (bitconvert FR16X:$src)),
12841          (i16 (EXTRACT_SUBREG
12842                (VMOVSH2Wrr (COPY_TO_REGCLASS FR16X:$src, VR128X)),
12843                sub_16bit))>;
12844def : Pat<(i16 (extractelt (v8i16 VR128X:$src), (iPTR 0))),
12845          (i16 (EXTRACT_SUBREG (VMOVSH2Wrr VR128X:$src), sub_16bit))>;
12846
12847// Allow "vmovw" to use GR64
12848let hasSideEffects = 0 in {
12849  def VMOVW64toSHrr : AVX512<0x6E, MRMSrcReg, (outs VR128X:$dst), (ins GR64:$src),
12850                     "vmovw\t{$src, $dst|$dst, $src}", []>, T_MAP5, PD, EVEX, REX_W, Sched<[WriteVecMoveFromGpr]>;
12851  def VMOVSHtoW64rr : AVX512<0x7E, MRMDestReg, (outs GR64:$dst), (ins VR128X:$src),
12852                     "vmovw\t{$src, $dst|$dst, $src}", []>, T_MAP5, PD, EVEX, REX_W, Sched<[WriteVecMoveToGpr]>;
12853}
12854}
12855
12856// Convert 16-bit float to i16/u16
12857multiclass avx512_cvtph2w<bits<8> opc, string OpcodeStr, SDPatternOperator OpNode,
12858                          SDPatternOperator MaskOpNode, SDNode OpNodeRnd,
12859                          AVX512VLVectorVTInfo _Dst,
12860                          AVX512VLVectorVTInfo _Src,
12861                          X86SchedWriteWidths sched> {
12862  let Predicates = [HasFP16] in {
12863    defm Z : avx512_vcvt_fp<opc, OpcodeStr, _Dst.info512, _Src.info512,
12864                            OpNode, MaskOpNode, sched.ZMM>,
12865             avx512_vcvt_fp_rc<opc, OpcodeStr, _Dst.info512, _Src.info512,
12866                               OpNodeRnd, sched.ZMM>, EVEX_V512;
12867  }
12868  let Predicates = [HasFP16, HasVLX] in {
12869    defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, _Dst.info128, _Src.info128,
12870                               OpNode, MaskOpNode, sched.XMM>, EVEX_V128;
12871    defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, _Dst.info256, _Src.info256,
12872                               OpNode, MaskOpNode, sched.YMM>, EVEX_V256;
12873  }
12874}
12875
12876// Convert 16-bit float to i16/u16 truncate
12877multiclass avx512_cvttph2w<bits<8> opc, string OpcodeStr, SDPatternOperator OpNode,
12878                           SDPatternOperator MaskOpNode, SDNode OpNodeRnd,
12879                           AVX512VLVectorVTInfo _Dst, AVX512VLVectorVTInfo _Src,
12880                           X86SchedWriteWidths sched> {
12881  let Predicates = [HasFP16] in {
12882    defm Z : avx512_vcvt_fp<opc, OpcodeStr, _Dst.info512, _Src.info512,
12883                            OpNode, MaskOpNode, sched.ZMM>,
12884             avx512_vcvt_fp_sae<opc, OpcodeStr, _Dst.info512, _Src.info512,
12885                               OpNodeRnd, sched.ZMM>, EVEX_V512;
12886  }
12887  let Predicates = [HasFP16, HasVLX] in {
12888    defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, _Dst.info128, _Src.info128,
12889                               OpNode, MaskOpNode, sched.XMM>, EVEX_V128;
12890    defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, _Dst.info256, _Src.info256,
12891                               OpNode, MaskOpNode, sched.YMM>, EVEX_V256;
12892  }
12893}
12894
12895defm VCVTPH2UW : avx512_cvtph2w<0x7D, "vcvtph2uw", X86cvtp2UInt, X86cvtp2UInt,
12896                                X86cvtp2UIntRnd, avx512vl_i16_info,
12897                                avx512vl_f16_info, SchedWriteCvtPD2DQ>,
12898                                T_MAP5, EVEX_CD8<16, CD8VF>;
12899defm VCVTUW2PH : avx512_cvtph2w<0x7D, "vcvtuw2ph", any_uint_to_fp, uint_to_fp,
12900                                X86VUintToFpRnd, avx512vl_f16_info,
12901                                avx512vl_i16_info, SchedWriteCvtPD2DQ>,
12902                                T_MAP5, XD, EVEX_CD8<16, CD8VF>;
12903defm VCVTTPH2W : avx512_cvttph2w<0x7C, "vcvttph2w", X86any_cvttp2si,
12904                                X86cvttp2si, X86cvttp2siSAE,
12905                                avx512vl_i16_info, avx512vl_f16_info,
12906                                SchedWriteCvtPD2DQ>, T_MAP5, PD, EVEX_CD8<16, CD8VF>;
12907defm VCVTTPH2UW : avx512_cvttph2w<0x7C, "vcvttph2uw", X86any_cvttp2ui,
12908                                X86cvttp2ui, X86cvttp2uiSAE,
12909                                avx512vl_i16_info, avx512vl_f16_info,
12910                                SchedWriteCvtPD2DQ>, T_MAP5, EVEX_CD8<16, CD8VF>;
12911defm VCVTPH2W : avx512_cvtph2w<0x7D, "vcvtph2w", X86cvtp2Int, X86cvtp2Int,
12912                                X86cvtp2IntRnd, avx512vl_i16_info,
12913                                avx512vl_f16_info, SchedWriteCvtPD2DQ>,
12914                                T_MAP5, PD, EVEX_CD8<16, CD8VF>;
12915defm VCVTW2PH : avx512_cvtph2w<0x7D, "vcvtw2ph", any_sint_to_fp, sint_to_fp,
12916                                X86VSintToFpRnd, avx512vl_f16_info,
12917                                avx512vl_i16_info, SchedWriteCvtPD2DQ>,
12918                                T_MAP5, XS, EVEX_CD8<16, CD8VF>;
12919
12920// Convert Half to Signed/Unsigned Doubleword
12921multiclass avx512_cvtph2dq<bits<8> opc, string OpcodeStr, SDPatternOperator OpNode,
12922                           SDPatternOperator MaskOpNode, SDNode OpNodeRnd,
12923                           X86SchedWriteWidths sched> {
12924  let Predicates = [HasFP16] in {
12925    defm Z : avx512_vcvt_fp<opc, OpcodeStr, v16i32_info, v16f16x_info, OpNode,
12926                            MaskOpNode, sched.ZMM>,
12927             avx512_vcvt_fp_rc<opc, OpcodeStr, v16i32_info, v16f16x_info,
12928                                OpNodeRnd, sched.ZMM>, EVEX_V512;
12929  }
12930  let Predicates = [HasFP16, HasVLX] in {
12931    defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v4i32x_info, v8f16x_info, OpNode,
12932                               MaskOpNode, sched.XMM, "{1to4}", "", f64mem>, EVEX_V128;
12933    defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v8i32x_info, v8f16x_info, OpNode,
12934                               MaskOpNode, sched.YMM>, EVEX_V256;
12935  }
12936}
12937
12938// Convert Half to Signed/Unsigned Doubleword with truncation
12939multiclass avx512_cvttph2dq<bits<8> opc, string OpcodeStr, SDPatternOperator OpNode,
12940                            SDPatternOperator MaskOpNode, SDNode OpNodeRnd,
12941                            X86SchedWriteWidths sched> {
12942  let Predicates = [HasFP16] in {
12943    defm Z : avx512_vcvt_fp<opc, OpcodeStr, v16i32_info, v16f16x_info, OpNode,
12944                            MaskOpNode, sched.ZMM>,
12945             avx512_vcvt_fp_sae<opc, OpcodeStr, v16i32_info, v16f16x_info,
12946                                OpNodeRnd, sched.ZMM>, EVEX_V512;
12947  }
12948  let Predicates = [HasFP16, HasVLX] in {
12949    defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v4i32x_info, v8f16x_info, OpNode,
12950                               MaskOpNode, sched.XMM, "{1to4}", "", f64mem>, EVEX_V128;
12951    defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v8i32x_info, v8f16x_info, OpNode,
12952                               MaskOpNode, sched.YMM>, EVEX_V256;
12953  }
12954}
12955
12956
12957defm VCVTPH2DQ : avx512_cvtph2dq<0x5B, "vcvtph2dq", X86cvtp2Int, X86cvtp2Int,
12958                                 X86cvtp2IntRnd, SchedWriteCvtPS2DQ>, T_MAP5, PD,
12959                                 EVEX_CD8<16, CD8VH>;
12960defm VCVTPH2UDQ : avx512_cvtph2dq<0x79, "vcvtph2udq", X86cvtp2UInt, X86cvtp2UInt,
12961                                 X86cvtp2UIntRnd, SchedWriteCvtPS2DQ>, T_MAP5,
12962                                 EVEX_CD8<16, CD8VH>;
12963
12964defm VCVTTPH2DQ : avx512_cvttph2dq<0x5B, "vcvttph2dq", X86any_cvttp2si,
12965                                X86cvttp2si, X86cvttp2siSAE,
12966                                SchedWriteCvtPS2DQ>, T_MAP5, XS,
12967                                EVEX_CD8<16, CD8VH>;
12968
12969defm VCVTTPH2UDQ : avx512_cvttph2dq<0x78, "vcvttph2udq", X86any_cvttp2ui,
12970                                 X86cvttp2ui, X86cvttp2uiSAE,
12971                                 SchedWriteCvtPS2DQ>, T_MAP5,
12972                                 EVEX_CD8<16, CD8VH>;
12973
12974// Convert Half to Signed/Unsigned Quardword
12975multiclass avx512_cvtph2qq<bits<8> opc, string OpcodeStr, SDPatternOperator OpNode,
12976                           SDPatternOperator MaskOpNode, SDNode OpNodeRnd,
12977                           X86SchedWriteWidths sched> {
12978  let Predicates = [HasFP16] in {
12979    defm Z : avx512_vcvt_fp<opc, OpcodeStr, v8i64_info, v8f16x_info, OpNode,
12980                            MaskOpNode, sched.ZMM>,
12981             avx512_vcvt_fp_rc<opc, OpcodeStr, v8i64_info, v8f16x_info,
12982                               OpNodeRnd, sched.ZMM>, EVEX_V512;
12983  }
12984  let Predicates = [HasFP16, HasVLX] in {
12985    // Explicitly specified broadcast string, since we take only 2 elements
12986    // from v8f16x_info source
12987    defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v2i64x_info, v8f16x_info, OpNode,
12988                               MaskOpNode, sched.XMM, "{1to2}", "", f32mem>,
12989                               EVEX_V128;
12990    // Explicitly specified broadcast string, since we take only 4 elements
12991    // from v8f16x_info source
12992    defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v4i64x_info, v8f16x_info, OpNode,
12993                               MaskOpNode, sched.YMM, "{1to4}", "", f64mem>,
12994                               EVEX_V256;
12995  }
12996}
12997
12998// Convert Half to Signed/Unsigned Quardword with truncation
12999multiclass avx512_cvttph2qq<bits<8> opc, string OpcodeStr, SDPatternOperator OpNode,
13000                            SDPatternOperator MaskOpNode, SDNode OpNodeRnd,
13001                            X86SchedWriteWidths sched> {
13002  let Predicates = [HasFP16] in {
13003    defm Z : avx512_vcvt_fp<opc, OpcodeStr, v8i64_info, v8f16x_info, OpNode,
13004                            MaskOpNode, sched.ZMM>,
13005             avx512_vcvt_fp_sae<opc, OpcodeStr, v8i64_info, v8f16x_info,
13006                                OpNodeRnd, sched.ZMM>, EVEX_V512;
13007  }
13008  let Predicates = [HasFP16, HasVLX] in {
13009    // Explicitly specified broadcast string, since we take only 2 elements
13010    // from v8f16x_info source
13011    defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v2i64x_info, v8f16x_info, OpNode,
13012                               MaskOpNode, sched.XMM, "{1to2}", "", f32mem>, EVEX_V128;
13013    // Explicitly specified broadcast string, since we take only 4 elements
13014    // from v8f16x_info source
13015    defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v4i64x_info, v8f16x_info, OpNode,
13016                               MaskOpNode, sched.YMM, "{1to4}", "", f64mem>, EVEX_V256;
13017  }
13018}
13019
13020defm VCVTPH2QQ : avx512_cvtph2qq<0x7B, "vcvtph2qq", X86cvtp2Int, X86cvtp2Int,
13021                                 X86cvtp2IntRnd, SchedWriteCvtPS2DQ>, T_MAP5, PD,
13022                                 EVEX_CD8<16, CD8VQ>;
13023
13024defm VCVTPH2UQQ : avx512_cvtph2qq<0x79, "vcvtph2uqq", X86cvtp2UInt, X86cvtp2UInt,
13025                                 X86cvtp2UIntRnd, SchedWriteCvtPS2DQ>, T_MAP5, PD,
13026                                 EVEX_CD8<16, CD8VQ>;
13027
13028defm VCVTTPH2QQ : avx512_cvttph2qq<0x7A, "vcvttph2qq", X86any_cvttp2si,
13029                                 X86cvttp2si, X86cvttp2siSAE,
13030                                 SchedWriteCvtPS2DQ>, T_MAP5, PD,
13031                                 EVEX_CD8<16, CD8VQ>;
13032
13033defm VCVTTPH2UQQ : avx512_cvttph2qq<0x78, "vcvttph2uqq", X86any_cvttp2ui,
13034                                 X86cvttp2ui, X86cvttp2uiSAE,
13035                                 SchedWriteCvtPS2DQ>, T_MAP5, PD,
13036                                 EVEX_CD8<16, CD8VQ>;
13037
13038// Convert Signed/Unsigned Quardword to Half
13039multiclass avx512_cvtqq2ph<bits<8> opc, string OpcodeStr, SDPatternOperator OpNode,
13040                           SDPatternOperator MaskOpNode, SDNode OpNodeRnd,
13041                           X86SchedWriteWidths sched> {
13042  // we need "x"/"y"/"z" suffixes in order to distinguish between 128, 256 and
13043  // 512 memory forms of these instructions in Asm Parcer. They have the same
13044  // dest type - 'v8f16x_info'. We also specify the broadcast string explicitly
13045  // due to the same reason.
13046  let Predicates = [HasFP16] in {
13047    defm Z : avx512_vcvt_fp<opc, OpcodeStr, v8f16x_info, v8i64_info, OpNode,
13048                            MaskOpNode, sched.ZMM, "{1to8}", "{z}">,
13049             avx512_vcvt_fp_rc<opc, OpcodeStr, v8f16x_info, v8i64_info,
13050                               OpNodeRnd, sched.ZMM>, EVEX_V512;
13051  }
13052  let Predicates = [HasFP16, HasVLX] in {
13053    defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v8f16x_info, v2i64x_info,
13054                               null_frag, null_frag, sched.XMM, "{1to2}", "{x}",
13055                               i128mem, VK2WM>, EVEX_V128;
13056    defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v8f16x_info, v4i64x_info,
13057                               null_frag, null_frag, sched.YMM, "{1to4}", "{y}",
13058                               i256mem, VK4WM>, EVEX_V256;
13059  }
13060
13061  def : InstAlias<OpcodeStr#"x\t{$src, $dst|$dst, $src}",
13062                  (!cast<Instruction>(NAME # "Z128rr") VR128X:$dst,
13063                  VR128X:$src), 0, "att">;
13064  def : InstAlias<OpcodeStr#"x\t{$src, $dst {${mask}}|$dst {${mask}}, $src}",
13065                  (!cast<Instruction>(NAME # "Z128rrk") VR128X:$dst,
13066                  VK2WM:$mask, VR128X:$src), 0, "att">;
13067  def : InstAlias<OpcodeStr#"x\t{$src, $dst {${mask}} {z}|$dst {${mask}} {z}, $src}",
13068                  (!cast<Instruction>(NAME # "Z128rrkz") VR128X:$dst,
13069                  VK2WM:$mask, VR128X:$src), 0, "att">;
13070  def : InstAlias<OpcodeStr#"x\t{${src}{1to2}, $dst|$dst, ${src}{1to2}}",
13071                  (!cast<Instruction>(NAME # "Z128rmb") VR128X:$dst,
13072                  i64mem:$src), 0, "att">;
13073  def : InstAlias<OpcodeStr#"x\t{${src}{1to2}, $dst {${mask}}|"
13074                  "$dst {${mask}}, ${src}{1to2}}",
13075                  (!cast<Instruction>(NAME # "Z128rmbk") VR128X:$dst,
13076                  VK2WM:$mask, i64mem:$src), 0, "att">;
13077  def : InstAlias<OpcodeStr#"x\t{${src}{1to2}, $dst {${mask}} {z}|"
13078                  "$dst {${mask}} {z}, ${src}{1to2}}",
13079                  (!cast<Instruction>(NAME # "Z128rmbkz") VR128X:$dst,
13080                  VK2WM:$mask, i64mem:$src), 0, "att">;
13081
13082  def : InstAlias<OpcodeStr#"y\t{$src, $dst|$dst, $src}",
13083                  (!cast<Instruction>(NAME # "Z256rr") VR128X:$dst,
13084                  VR256X:$src), 0, "att">;
13085  def : InstAlias<OpcodeStr#"y\t{$src, $dst {${mask}}|"
13086                  "$dst {${mask}}, $src}",
13087                  (!cast<Instruction>(NAME # "Z256rrk") VR128X:$dst,
13088                  VK4WM:$mask, VR256X:$src), 0, "att">;
13089  def : InstAlias<OpcodeStr#"y\t{$src, $dst {${mask}} {z}|"
13090                  "$dst {${mask}} {z}, $src}",
13091                  (!cast<Instruction>(NAME # "Z256rrkz") VR128X:$dst,
13092                  VK4WM:$mask, VR256X:$src), 0, "att">;
13093  def : InstAlias<OpcodeStr#"y\t{${src}{1to4}, $dst|$dst, ${src}{1to4}}",
13094                  (!cast<Instruction>(NAME # "Z256rmb") VR128X:$dst,
13095                  i64mem:$src), 0, "att">;
13096  def : InstAlias<OpcodeStr#"y\t{${src}{1to4}, $dst {${mask}}|"
13097                  "$dst {${mask}}, ${src}{1to4}}",
13098                  (!cast<Instruction>(NAME # "Z256rmbk") VR128X:$dst,
13099                  VK4WM:$mask, i64mem:$src), 0, "att">;
13100  def : InstAlias<OpcodeStr#"y\t{${src}{1to4}, $dst {${mask}} {z}|"
13101                  "$dst {${mask}} {z}, ${src}{1to4}}",
13102                  (!cast<Instruction>(NAME # "Z256rmbkz") VR128X:$dst,
13103                  VK4WM:$mask, i64mem:$src), 0, "att">;
13104
13105  def : InstAlias<OpcodeStr#"z\t{$src, $dst|$dst, $src}",
13106                  (!cast<Instruction>(NAME # "Zrr") VR128X:$dst,
13107                  VR512:$src), 0, "att">;
13108  def : InstAlias<OpcodeStr#"z\t{$src, $dst {${mask}}|"
13109                  "$dst {${mask}}, $src}",
13110                  (!cast<Instruction>(NAME # "Zrrk") VR128X:$dst,
13111                  VK8WM:$mask, VR512:$src), 0, "att">;
13112  def : InstAlias<OpcodeStr#"z\t{$src, $dst {${mask}} {z}|"
13113                  "$dst {${mask}} {z}, $src}",
13114                  (!cast<Instruction>(NAME # "Zrrkz") VR128X:$dst,
13115                  VK8WM:$mask, VR512:$src), 0, "att">;
13116  def : InstAlias<OpcodeStr#"z\t{${src}{1to8}, $dst|$dst, ${src}{1to8}}",
13117                  (!cast<Instruction>(NAME # "Zrmb") VR128X:$dst,
13118                  i64mem:$src), 0, "att">;
13119  def : InstAlias<OpcodeStr#"z\t{${src}{1to8}, $dst {${mask}}|"
13120                  "$dst {${mask}}, ${src}{1to8}}",
13121                  (!cast<Instruction>(NAME # "Zrmbk") VR128X:$dst,
13122                  VK8WM:$mask, i64mem:$src), 0, "att">;
13123  def : InstAlias<OpcodeStr#"z\t{${src}{1to8}, $dst {${mask}} {z}|"
13124                  "$dst {${mask}} {z}, ${src}{1to8}}",
13125                  (!cast<Instruction>(NAME # "Zrmbkz") VR128X:$dst,
13126                  VK8WM:$mask, i64mem:$src), 0, "att">;
13127}
13128
13129defm VCVTQQ2PH : avx512_cvtqq2ph<0x5B, "vcvtqq2ph", any_sint_to_fp, sint_to_fp,
13130                            X86VSintToFpRnd, SchedWriteCvtDQ2PS>, REX_W, T_MAP5,
13131                            EVEX_CD8<64, CD8VF>;
13132
13133defm VCVTUQQ2PH : avx512_cvtqq2ph<0x7A, "vcvtuqq2ph", any_uint_to_fp, uint_to_fp,
13134                            X86VUintToFpRnd, SchedWriteCvtDQ2PS>, REX_W, T_MAP5, XD,
13135                            EVEX_CD8<64, CD8VF>;
13136
13137// Convert half to signed/unsigned int 32/64
13138defm VCVTSH2SIZ: avx512_cvt_s_int_round<0x2D, f16x_info, i32x_info, X86cvts2si,
13139                                   X86cvts2siRnd, WriteCvtSS2I, "cvtsh2si", "{l}", HasFP16>,
13140                                   T_MAP5, XS, EVEX_CD8<16, CD8VT1>;
13141defm VCVTSH2SI64Z: avx512_cvt_s_int_round<0x2D, f16x_info, i64x_info, X86cvts2si,
13142                                   X86cvts2siRnd, WriteCvtSS2I, "cvtsh2si", "{q}", HasFP16>,
13143                                   T_MAP5, XS, REX_W, EVEX_CD8<16, CD8VT1>;
13144defm VCVTSH2USIZ: avx512_cvt_s_int_round<0x79, f16x_info, i32x_info, X86cvts2usi,
13145                                   X86cvts2usiRnd, WriteCvtSS2I, "cvtsh2usi", "{l}", HasFP16>,
13146                                   T_MAP5, XS, EVEX_CD8<16, CD8VT1>;
13147defm VCVTSH2USI64Z: avx512_cvt_s_int_round<0x79, f16x_info, i64x_info, X86cvts2usi,
13148                                   X86cvts2usiRnd, WriteCvtSS2I, "cvtsh2usi", "{q}", HasFP16>,
13149                                   T_MAP5, XS, REX_W, EVEX_CD8<16, CD8VT1>;
13150
13151defm VCVTTSH2SIZ: avx512_cvt_s_all<0x2C, "vcvttsh2si", f16x_info, i32x_info,
13152                        any_fp_to_sint, X86cvtts2Int, X86cvtts2IntSAE, WriteCvtSS2I,
13153                        "{l}", HasFP16>, T_MAP5, XS, EVEX_CD8<16, CD8VT1>;
13154defm VCVTTSH2SI64Z: avx512_cvt_s_all<0x2C, "vcvttsh2si", f16x_info, i64x_info,
13155                        any_fp_to_sint, X86cvtts2Int, X86cvtts2IntSAE, WriteCvtSS2I,
13156                        "{q}", HasFP16>, REX_W, T_MAP5, XS, EVEX_CD8<16, CD8VT1>;
13157defm VCVTTSH2USIZ: avx512_cvt_s_all<0x78, "vcvttsh2usi", f16x_info, i32x_info,
13158                        any_fp_to_uint, X86cvtts2UInt, X86cvtts2UIntSAE, WriteCvtSS2I,
13159                        "{l}", HasFP16>, T_MAP5, XS, EVEX_CD8<16, CD8VT1>;
13160defm VCVTTSH2USI64Z: avx512_cvt_s_all<0x78, "vcvttsh2usi", f16x_info, i64x_info,
13161                        any_fp_to_uint, X86cvtts2UInt, X86cvtts2UIntSAE, WriteCvtSS2I,
13162                        "{q}", HasFP16>, T_MAP5, XS, REX_W, EVEX_CD8<16, CD8VT1>;
13163
13164let Predicates = [HasFP16] in {
13165  defm VCVTSI2SHZ  : avx512_vcvtsi_common<0x2A,  X86SintToFp, X86SintToFpRnd, WriteCvtI2SS, GR32,
13166                                   v8f16x_info, i32mem, loadi32, "cvtsi2sh", "l">,
13167                                   T_MAP5, XS, EVEX_CD8<32, CD8VT1>;
13168  defm VCVTSI642SHZ: avx512_vcvtsi_common<0x2A,  X86SintToFp, X86SintToFpRnd, WriteCvtI2SS, GR64,
13169                                   v8f16x_info, i64mem, loadi64, "cvtsi2sh","q">,
13170                                   T_MAP5, XS, REX_W, EVEX_CD8<64, CD8VT1>;
13171  defm VCVTUSI2SHZ   : avx512_vcvtsi_common<0x7B,  X86UintToFp, X86UintToFpRnd, WriteCvtI2SS, GR32,
13172                                    v8f16x_info, i32mem, loadi32,
13173                                    "cvtusi2sh","l">, T_MAP5, XS, EVEX_CD8<32, CD8VT1>;
13174  defm VCVTUSI642SHZ : avx512_vcvtsi_common<0x7B,  X86UintToFp, X86UintToFpRnd, WriteCvtI2SS, GR64,
13175                                    v8f16x_info, i64mem, loadi64, "cvtusi2sh", "q">,
13176                                    T_MAP5, XS, REX_W, EVEX_CD8<64, CD8VT1>;
13177  def : InstAlias<"vcvtsi2sh\t{$src, $src1, $dst|$dst, $src1, $src}",
13178              (VCVTSI2SHZrm_Int VR128X:$dst, VR128X:$src1, i32mem:$src), 0, "att">;
13179
13180  def : InstAlias<"vcvtusi2sh\t{$src, $src1, $dst|$dst, $src1, $src}",
13181              (VCVTUSI2SHZrm_Int VR128X:$dst, VR128X:$src1, i32mem:$src), 0, "att">;
13182
13183
13184  def : Pat<(f16 (any_sint_to_fp (loadi32 addr:$src))),
13185            (VCVTSI2SHZrm (f16 (IMPLICIT_DEF)), addr:$src)>;
13186  def : Pat<(f16 (any_sint_to_fp (loadi64 addr:$src))),
13187            (VCVTSI642SHZrm (f16 (IMPLICIT_DEF)), addr:$src)>;
13188
13189  def : Pat<(f16 (any_sint_to_fp GR32:$src)),
13190            (VCVTSI2SHZrr (f16 (IMPLICIT_DEF)), GR32:$src)>;
13191  def : Pat<(f16 (any_sint_to_fp GR64:$src)),
13192            (VCVTSI642SHZrr (f16 (IMPLICIT_DEF)), GR64:$src)>;
13193
13194  def : Pat<(f16 (any_uint_to_fp (loadi32 addr:$src))),
13195            (VCVTUSI2SHZrm (f16 (IMPLICIT_DEF)), addr:$src)>;
13196  def : Pat<(f16 (any_uint_to_fp (loadi64 addr:$src))),
13197            (VCVTUSI642SHZrm (f16 (IMPLICIT_DEF)), addr:$src)>;
13198
13199  def : Pat<(f16 (any_uint_to_fp GR32:$src)),
13200            (VCVTUSI2SHZrr (f16 (IMPLICIT_DEF)), GR32:$src)>;
13201  def : Pat<(f16 (any_uint_to_fp GR64:$src)),
13202            (VCVTUSI642SHZrr (f16 (IMPLICIT_DEF)), GR64:$src)>;
13203
13204  // Patterns used for matching vcvtsi2sh intrinsic sequences from clang
13205  // which produce unnecessary vmovsh instructions
13206  def : Pat<(v8f16 (X86Movsh
13207                     (v8f16 VR128X:$dst),
13208                     (v8f16 (scalar_to_vector (f16 (any_sint_to_fp GR64:$src)))))),
13209            (VCVTSI642SHZrr_Int VR128X:$dst, GR64:$src)>;
13210
13211  def : Pat<(v8f16 (X86Movsh
13212                     (v8f16 VR128X:$dst),
13213                     (v8f16 (scalar_to_vector (f16 (any_sint_to_fp (loadi64 addr:$src))))))),
13214            (VCVTSI642SHZrm_Int VR128X:$dst, addr:$src)>;
13215
13216  def : Pat<(v8f16 (X86Movsh
13217                     (v8f16 VR128X:$dst),
13218                     (v8f16 (scalar_to_vector (f16 (any_sint_to_fp GR32:$src)))))),
13219            (VCVTSI2SHZrr_Int VR128X:$dst, GR32:$src)>;
13220
13221  def : Pat<(v8f16 (X86Movsh
13222                     (v8f16 VR128X:$dst),
13223                     (v8f16 (scalar_to_vector (f16 (any_sint_to_fp (loadi32 addr:$src))))))),
13224            (VCVTSI2SHZrm_Int VR128X:$dst, addr:$src)>;
13225
13226  def : Pat<(v8f16 (X86Movsh
13227                     (v8f16 VR128X:$dst),
13228                     (v8f16 (scalar_to_vector (f16 (any_uint_to_fp GR64:$src)))))),
13229            (VCVTUSI642SHZrr_Int VR128X:$dst, GR64:$src)>;
13230
13231  def : Pat<(v8f16 (X86Movsh
13232                     (v8f16 VR128X:$dst),
13233                     (v8f16 (scalar_to_vector (f16 (any_uint_to_fp (loadi64 addr:$src))))))),
13234            (VCVTUSI642SHZrm_Int VR128X:$dst, addr:$src)>;
13235
13236  def : Pat<(v8f16 (X86Movsh
13237                     (v8f16 VR128X:$dst),
13238                     (v8f16 (scalar_to_vector (f16 (any_uint_to_fp GR32:$src)))))),
13239            (VCVTUSI2SHZrr_Int VR128X:$dst, GR32:$src)>;
13240
13241  def : Pat<(v8f16 (X86Movsh
13242                     (v8f16 VR128X:$dst),
13243                     (v8f16 (scalar_to_vector (f16 (any_uint_to_fp (loadi32 addr:$src))))))),
13244            (VCVTUSI2SHZrm_Int VR128X:$dst, addr:$src)>;
13245} // Predicates = [HasFP16]
13246
13247let Predicates = [HasFP16, HasVLX] in {
13248  // Special patterns to allow use of X86VMSintToFP for masking. Instruction
13249  // patterns have been disabled with null_frag.
13250  def : Pat<(v8f16 (X86any_VSintToFP (v4i64 VR256X:$src))),
13251            (VCVTQQ2PHZ256rr VR256X:$src)>;
13252  def : Pat<(X86VMSintToFP (v4i64 VR256X:$src), (v8f16 VR128X:$src0),
13253                           VK4WM:$mask),
13254            (VCVTQQ2PHZ256rrk VR128X:$src0, VK4WM:$mask, VR256X:$src)>;
13255  def : Pat<(X86VMSintToFP (v4i64 VR256X:$src), v8f16x_info.ImmAllZerosV,
13256                           VK4WM:$mask),
13257            (VCVTQQ2PHZ256rrkz VK4WM:$mask, VR256X:$src)>;
13258
13259  def : Pat<(v8f16 (X86any_VSintToFP (loadv4i64 addr:$src))),
13260            (VCVTQQ2PHZ256rm addr:$src)>;
13261  def : Pat<(X86VMSintToFP (loadv4i64 addr:$src), (v8f16 VR128X:$src0),
13262                           VK4WM:$mask),
13263            (VCVTQQ2PHZ256rmk VR128X:$src0, VK4WM:$mask, addr:$src)>;
13264  def : Pat<(X86VMSintToFP (loadv4i64 addr:$src), v8f16x_info.ImmAllZerosV,
13265                           VK4WM:$mask),
13266            (VCVTQQ2PHZ256rmkz VK4WM:$mask, addr:$src)>;
13267
13268  def : Pat<(v8f16 (X86any_VSintToFP (v4i64 (X86VBroadcastld64 addr:$src)))),
13269            (VCVTQQ2PHZ256rmb addr:$src)>;
13270  def : Pat<(X86VMSintToFP (v4i64 (X86VBroadcastld64 addr:$src)),
13271                           (v8f16 VR128X:$src0), VK4WM:$mask),
13272            (VCVTQQ2PHZ256rmbk VR128X:$src0, VK4WM:$mask, addr:$src)>;
13273  def : Pat<(X86VMSintToFP (v4i64 (X86VBroadcastld64 addr:$src)),
13274                           v8f16x_info.ImmAllZerosV, VK4WM:$mask),
13275            (VCVTQQ2PHZ256rmbkz VK4WM:$mask, addr:$src)>;
13276
13277  def : Pat<(v8f16 (X86any_VSintToFP (v2i64 VR128X:$src))),
13278            (VCVTQQ2PHZ128rr VR128X:$src)>;
13279  def : Pat<(X86VMSintToFP (v2i64 VR128X:$src), (v8f16 VR128X:$src0),
13280                           VK2WM:$mask),
13281            (VCVTQQ2PHZ128rrk VR128X:$src0, VK2WM:$mask, VR128X:$src)>;
13282  def : Pat<(X86VMSintToFP (v2i64 VR128X:$src), v8f16x_info.ImmAllZerosV,
13283                           VK2WM:$mask),
13284            (VCVTQQ2PHZ128rrkz VK2WM:$mask, VR128X:$src)>;
13285
13286  def : Pat<(v8f16 (X86any_VSintToFP (loadv2i64 addr:$src))),
13287            (VCVTQQ2PHZ128rm addr:$src)>;
13288  def : Pat<(X86VMSintToFP (loadv2i64 addr:$src), (v8f16 VR128X:$src0),
13289                           VK2WM:$mask),
13290            (VCVTQQ2PHZ128rmk VR128X:$src0, VK2WM:$mask, addr:$src)>;
13291  def : Pat<(X86VMSintToFP (loadv2i64 addr:$src), v8f16x_info.ImmAllZerosV,
13292                           VK2WM:$mask),
13293            (VCVTQQ2PHZ128rmkz VK2WM:$mask, addr:$src)>;
13294
13295  def : Pat<(v8f16 (X86any_VSintToFP (v2i64 (X86VBroadcastld64 addr:$src)))),
13296            (VCVTQQ2PHZ128rmb addr:$src)>;
13297  def : Pat<(X86VMSintToFP (v2i64 (X86VBroadcastld64 addr:$src)),
13298                           (v8f16 VR128X:$src0), VK2WM:$mask),
13299            (VCVTQQ2PHZ128rmbk VR128X:$src0, VK2WM:$mask, addr:$src)>;
13300  def : Pat<(X86VMSintToFP (v2i64 (X86VBroadcastld64 addr:$src)),
13301                           v8f16x_info.ImmAllZerosV, VK2WM:$mask),
13302            (VCVTQQ2PHZ128rmbkz VK2WM:$mask, addr:$src)>;
13303
13304  // Special patterns to allow use of X86VMUintToFP for masking. Instruction
13305  // patterns have been disabled with null_frag.
13306  def : Pat<(v8f16 (X86any_VUintToFP (v4i64 VR256X:$src))),
13307            (VCVTUQQ2PHZ256rr VR256X:$src)>;
13308  def : Pat<(X86VMUintToFP (v4i64 VR256X:$src), (v8f16 VR128X:$src0),
13309                           VK4WM:$mask),
13310            (VCVTUQQ2PHZ256rrk VR128X:$src0, VK4WM:$mask, VR256X:$src)>;
13311  def : Pat<(X86VMUintToFP (v4i64 VR256X:$src), v8f16x_info.ImmAllZerosV,
13312                           VK4WM:$mask),
13313            (VCVTUQQ2PHZ256rrkz VK4WM:$mask, VR256X:$src)>;
13314
13315  def : Pat<(v8f16 (X86any_VUintToFP (loadv4i64 addr:$src))),
13316            (VCVTUQQ2PHZ256rm addr:$src)>;
13317  def : Pat<(X86VMUintToFP (loadv4i64 addr:$src), (v8f16 VR128X:$src0),
13318                           VK4WM:$mask),
13319            (VCVTUQQ2PHZ256rmk VR128X:$src0, VK4WM:$mask, addr:$src)>;
13320  def : Pat<(X86VMUintToFP (loadv4i64 addr:$src), v8f16x_info.ImmAllZerosV,
13321                           VK4WM:$mask),
13322            (VCVTUQQ2PHZ256rmkz VK4WM:$mask, addr:$src)>;
13323
13324  def : Pat<(v8f16 (X86any_VUintToFP (v4i64 (X86VBroadcastld64 addr:$src)))),
13325            (VCVTUQQ2PHZ256rmb addr:$src)>;
13326  def : Pat<(X86VMUintToFP (v4i64 (X86VBroadcastld64 addr:$src)),
13327                           (v8f16 VR128X:$src0), VK4WM:$mask),
13328            (VCVTUQQ2PHZ256rmbk VR128X:$src0, VK4WM:$mask, addr:$src)>;
13329  def : Pat<(X86VMUintToFP (v4i64 (X86VBroadcastld64 addr:$src)),
13330                           v8f16x_info.ImmAllZerosV, VK4WM:$mask),
13331            (VCVTUQQ2PHZ256rmbkz VK4WM:$mask, addr:$src)>;
13332
13333  def : Pat<(v8f16 (X86any_VUintToFP (v2i64 VR128X:$src))),
13334            (VCVTUQQ2PHZ128rr VR128X:$src)>;
13335  def : Pat<(X86VMUintToFP (v2i64 VR128X:$src), (v8f16 VR128X:$src0),
13336                           VK2WM:$mask),
13337            (VCVTUQQ2PHZ128rrk VR128X:$src0, VK2WM:$mask, VR128X:$src)>;
13338  def : Pat<(X86VMUintToFP (v2i64 VR128X:$src), v8f16x_info.ImmAllZerosV,
13339                           VK2WM:$mask),
13340            (VCVTUQQ2PHZ128rrkz VK2WM:$mask, VR128X:$src)>;
13341
13342  def : Pat<(v8f16 (X86any_VUintToFP (loadv2i64 addr:$src))),
13343            (VCVTUQQ2PHZ128rm addr:$src)>;
13344  def : Pat<(X86VMUintToFP (loadv2i64 addr:$src), (v8f16 VR128X:$src0),
13345                           VK2WM:$mask),
13346            (VCVTUQQ2PHZ128rmk VR128X:$src0, VK2WM:$mask, addr:$src)>;
13347  def : Pat<(X86VMUintToFP (loadv2i64 addr:$src), v8f16x_info.ImmAllZerosV,
13348                           VK2WM:$mask),
13349            (VCVTUQQ2PHZ128rmkz VK2WM:$mask, addr:$src)>;
13350
13351  def : Pat<(v8f16 (X86any_VUintToFP (v2i64 (X86VBroadcastld64 addr:$src)))),
13352            (VCVTUQQ2PHZ128rmb addr:$src)>;
13353  def : Pat<(X86VMUintToFP (v2i64 (X86VBroadcastld64 addr:$src)),
13354                           (v8f16 VR128X:$src0), VK2WM:$mask),
13355            (VCVTUQQ2PHZ128rmbk VR128X:$src0, VK2WM:$mask, addr:$src)>;
13356  def : Pat<(X86VMUintToFP (v2i64 (X86VBroadcastld64 addr:$src)),
13357                           v8f16x_info.ImmAllZerosV, VK2WM:$mask),
13358            (VCVTUQQ2PHZ128rmbkz VK2WM:$mask, addr:$src)>;
13359}
13360
13361let Constraints = "@earlyclobber $dst, $src1 = $dst" in {
13362  multiclass avx512_cfmaop_rm<bits<8> opc, string OpcodeStr, SDNode OpNode, X86VectorVTInfo _, bit IsCommutable> {
13363    defm r: AVX512_maskable_3src<opc, MRMSrcReg, _, (outs _.RC:$dst),
13364            (ins _.RC:$src2, _.RC:$src3),
13365            OpcodeStr, "$src3, $src2", "$src2, $src3",
13366            (_.VT (OpNode _.RC:$src2, _.RC:$src3, _.RC:$src1)), IsCommutable>, EVEX, VVVV;
13367
13368    defm m: AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst),
13369            (ins _.RC:$src2, _.MemOp:$src3),
13370            OpcodeStr, "$src3, $src2", "$src2, $src3",
13371            (_.VT (OpNode _.RC:$src2, (_.LdFrag addr:$src3), _.RC:$src1))>, EVEX, VVVV;
13372
13373    defm mb: AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst),
13374            (ins _.RC:$src2, _.ScalarMemOp:$src3),
13375            OpcodeStr, !strconcat("${src3}", _.BroadcastStr,", $src2"), !strconcat("$src2, ${src3}", _.BroadcastStr),
13376            (_.VT (OpNode _.RC:$src2, (_.VT (_.BroadcastLdFrag addr:$src3)), _.RC:$src1))>, EVEX_B, EVEX, VVVV;
13377  }
13378} // Constraints = "@earlyclobber $dst, $src1 = $dst"
13379
13380multiclass avx512_cfmaop_round<bits<8> opc, string OpcodeStr, SDNode OpNode,
13381                                 X86VectorVTInfo _> {
13382  let Constraints = "@earlyclobber $dst, $src1 = $dst" in
13383  defm rb: AVX512_maskable_3src<opc, MRMSrcReg, _, (outs _.RC:$dst),
13384          (ins _.RC:$src2, _.RC:$src3, AVX512RC:$rc),
13385          OpcodeStr, "$rc, $src3, $src2", "$src2, $src3, $rc",
13386          (_.VT (OpNode _.RC:$src2, _.RC:$src3, _.RC:$src1, (i32 timm:$rc)))>,
13387          EVEX, VVVV, EVEX_B, EVEX_RC;
13388}
13389
13390
13391multiclass avx512_cfmaop_common<bits<8> opc, string OpcodeStr, SDNode OpNode, SDNode OpNodeRnd, bit IsCommutable> {
13392  let Predicates = [HasFP16] in {
13393    defm Z    : avx512_cfmaop_rm<opc, OpcodeStr, OpNode, v16f32_info, IsCommutable>,
13394                avx512_cfmaop_round<opc, OpcodeStr, OpNodeRnd, v16f32_info>,
13395                      EVEX_V512, Sched<[WriteFMAZ]>;
13396  }
13397  let Predicates = [HasVLX, HasFP16] in {
13398    defm Z256 : avx512_cfmaop_rm<opc, OpcodeStr, OpNode, v8f32x_info, IsCommutable>, EVEX_V256, Sched<[WriteFMAY]>;
13399    defm Z128 : avx512_cfmaop_rm<opc, OpcodeStr, OpNode, v4f32x_info, IsCommutable>, EVEX_V128, Sched<[WriteFMAX]>;
13400  }
13401}
13402
13403multiclass avx512_cfmulop_common<bits<8> opc, string OpcodeStr, SDNode OpNode,
13404                                 SDNode MaskOpNode, SDNode OpNodeRnd, bit IsCommutable> {
13405  let Predicates = [HasFP16] in {
13406    defm Z    : avx512_fp_packed<opc, OpcodeStr, OpNode, MaskOpNode, v16f32_info,
13407                                 WriteFMAZ, IsCommutable, IsCommutable, "", "@earlyclobber $dst", 0>,
13408                avx512_fp_round_packed<opc, OpcodeStr, OpNodeRnd, WriteFMAZ, v16f32_info,
13409                                       "", "@earlyclobber $dst">, EVEX_V512;
13410  }
13411  let Predicates = [HasVLX, HasFP16] in {
13412    defm Z256 : avx512_fp_packed<opc, OpcodeStr, OpNode, MaskOpNode, v8f32x_info,
13413                                 WriteFMAY, IsCommutable, IsCommutable, "", "@earlyclobber $dst", 0>, EVEX_V256;
13414    defm Z128 : avx512_fp_packed<opc, OpcodeStr, OpNode, MaskOpNode, v4f32x_info,
13415                                 WriteFMAX, IsCommutable, IsCommutable, "", "@earlyclobber $dst", 0>, EVEX_V128;
13416  }
13417}
13418
13419
13420let Uses = [MXCSR] in {
13421  defm VFMADDCPH  : avx512_cfmaop_common<0x56, "vfmaddcph", x86vfmaddc, x86vfmaddcRnd, 1>,
13422                                    T_MAP6, XS, EVEX_CD8<32, CD8VF>;
13423  defm VFCMADDCPH : avx512_cfmaop_common<0x56, "vfcmaddcph", x86vfcmaddc, x86vfcmaddcRnd, 0>,
13424                                    T_MAP6, XD, EVEX_CD8<32, CD8VF>;
13425
13426  defm VFMULCPH  : avx512_cfmulop_common<0xD6, "vfmulcph", x86vfmulc, x86vfmulc,
13427                                         x86vfmulcRnd, 1>, T_MAP6, XS, EVEX_CD8<32, CD8VF>;
13428  defm VFCMULCPH : avx512_cfmulop_common<0xD6, "vfcmulcph", x86vfcmulc,
13429                                         x86vfcmulc, x86vfcmulcRnd, 0>, T_MAP6, XD, EVEX_CD8<32, CD8VF>;
13430}
13431
13432
13433multiclass avx512_cfmaop_sh_common<bits<8> opc, string OpcodeStr, SDNode OpNode, SDNode OpNodeRnd,
13434                                   bit IsCommutable> {
13435  let Predicates = [HasFP16], Constraints = "@earlyclobber $dst, $src1 = $dst" in {
13436    defm r : AVX512_maskable_3src<opc, MRMSrcReg, v4f32x_info, (outs VR128X:$dst),
13437                        (ins VR128X:$src2, VR128X:$src3), OpcodeStr,
13438                        "$src3, $src2", "$src2, $src3",
13439                        (v4f32 (OpNode VR128X:$src2, VR128X:$src3, VR128X:$src1)), IsCommutable>,
13440                        Sched<[WriteFMAX]>;
13441    defm m : AVX512_maskable_3src<opc, MRMSrcMem, v4f32x_info, (outs VR128X:$dst),
13442                        (ins VR128X:$src2, ssmem:$src3), OpcodeStr,
13443                        "$src3, $src2", "$src2, $src3",
13444                        (v4f32 (OpNode VR128X:$src2, (sse_load_f32 addr:$src3), VR128X:$src1))>,
13445                        Sched<[WriteFMAX.Folded, WriteFMAX.ReadAfterFold]>;
13446    defm rb : AVX512_maskable_3src<opc,  MRMSrcReg, v4f32x_info, (outs VR128X:$dst),
13447                        (ins VR128X:$src2, VR128X:$src3, AVX512RC:$rc), OpcodeStr,
13448                        "$rc, $src3, $src2", "$src2, $src3, $rc",
13449                        (v4f32 (OpNodeRnd VR128X:$src2, VR128X:$src3, VR128X:$src1, (i32 timm:$rc)))>,
13450                        EVEX_B, EVEX_RC, Sched<[WriteFMAX]>;
13451  }
13452}
13453
13454multiclass avx512_cfmbinop_sh_common<bits<8> opc, string OpcodeStr, SDNode OpNode,
13455                                     SDNode OpNodeRnd, bit IsCommutable> {
13456  let Predicates = [HasFP16] in {
13457    defm rr : AVX512_maskable<opc, MRMSrcReg, f32x_info, (outs VR128X:$dst),
13458                        (ins VR128X:$src1, VR128X:$src2), OpcodeStr,
13459                        "$src2, $src1", "$src1, $src2",
13460                        (v4f32 (OpNode VR128X:$src1, VR128X:$src2)),
13461                        IsCommutable, IsCommutable, IsCommutable,
13462                        X86selects, "@earlyclobber $dst">, Sched<[WriteFMAX]>;
13463    defm rm : AVX512_maskable<opc, MRMSrcMem, f32x_info, (outs VR128X:$dst),
13464                        (ins VR128X:$src1, ssmem:$src2), OpcodeStr,
13465                        "$src2, $src1", "$src1, $src2",
13466                        (v4f32 (OpNode VR128X:$src1, (sse_load_f32 addr:$src2))),
13467                        0, 0, 0, X86selects, "@earlyclobber $dst">,
13468                        Sched<[WriteFMAX.Folded, WriteFMAX.ReadAfterFold]>;
13469    defm rrb : AVX512_maskable<opc, MRMSrcReg, f32x_info, (outs VR128X:$dst),
13470                        (ins VR128X:$src1, VR128X:$src2, AVX512RC:$rc), OpcodeStr,
13471                        "$rc, $src2, $src1", "$src1, $src2, $rc",
13472                        (OpNodeRnd (v4f32 VR128X:$src1), (v4f32 VR128X:$src2), (i32 timm:$rc)),
13473                        0, 0, 0, X86selects, "@earlyclobber $dst">,
13474                        EVEX_B, EVEX_RC, Sched<[WriteFMAX]>;
13475  }
13476}
13477
13478let Uses = [MXCSR] in {
13479  defm VFMADDCSHZ  : avx512_cfmaop_sh_common<0x57, "vfmaddcsh", x86vfmaddcSh, x86vfmaddcShRnd, 1>,
13480                                    T_MAP6, XS, EVEX_CD8<32, CD8VT1>, EVEX_V128, EVEX, VVVV;
13481  defm VFCMADDCSHZ : avx512_cfmaop_sh_common<0x57, "vfcmaddcsh", x86vfcmaddcSh, x86vfcmaddcShRnd, 0>,
13482                                    T_MAP6, XD, EVEX_CD8<32, CD8VT1>, EVEX_V128, EVEX, VVVV;
13483
13484  defm VFMULCSHZ  : avx512_cfmbinop_sh_common<0xD7, "vfmulcsh", x86vfmulcSh, x86vfmulcShRnd, 1>,
13485                                    T_MAP6, XS, EVEX_CD8<32, CD8VT1>, EVEX_V128, VEX_LIG, EVEX, VVVV;
13486  defm VFCMULCSHZ : avx512_cfmbinop_sh_common<0xD7, "vfcmulcsh", x86vfcmulcSh, x86vfcmulcShRnd, 0>,
13487                                    T_MAP6, XD, EVEX_CD8<32, CD8VT1>, EVEX_V128, VEX_LIG, EVEX, VVVV;
13488}
13489